On a recent project, I had to provide export functionality for a web grid’s data source that contained wildly varying data structures. The XML structure contained anywhere between 1 to 10 levels of nesting and we needed to be able to export this data into an Excel or comma separate values file.
This code will show you how to turn a complex XML tree structure into a list of key-value pairs.
For example, here is a relatively simple data structure:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
<Customer> <Name>John Smith</Name> <Address>123 Main St</Address> <City>Baltimore</City> <Phones> <Phone>4105551234</Phone> <Phone>4151134443</Phone> </Phones> <Contacts> <Contact> <Name>Jiminy Cricket</Name> <Title>CricketMaster</Title> </Contact> <Contact> <Name>Hans Solo</Name> <Title>Smuggler</Title> </Contact> </Contacts> </Customer> |
In order to get the structure ready for data export, this code turns the XML into a list of key value pairs.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
using System; using System.Collections.Generic; using System.Globalization; using System.Xml; using System.Xml.XPath; using System.Xml.Linq; namespace Neuroticode.Examples { ///<remarks> /// 2016-04-20 - Matt Cullinan /// </remarks> /// <summary> /// Takes an XmlNode and "flattens" it out to a dictionary format. Exporting to Excel is a common use case for this. /// </summary> public class XmlFlattener { private char _delim = '/'; private readonly bool _showZeroIndexIdentifier; private readonly XmlDocument _xmlDocument; private readonly XmlNamespaceManager _namespaceManager; private readonly List<KeyValuePair<string, string>> _flattenedXMLDoc; /// <summary> /// /// </summary> /// <param name="showZeroIndexIdentifier">If TRUE, elements that are first of their kind in the XML tree structure will be appended with a "0".</param> /// <param name="xmlDocument">The document to parse</param> public XmlFlattener(bool showZeroIndexIdentifier, XmlDocument xmlDocument) { _showZeroIndexIdentifier = showZeroIndexIdentifier; _xmlDocument = xmlDocument; _flattenedXMLDoc = new List<KeyValuePair<string, string>>(); _namespaceManager = new XmlNamespaceManager(xmlDocument.NameTable); } public List<KeyValuePair<string, string>> FlattenedXMLDoc { get { return _flattenedXMLDoc; } } ///<remarks> /// 2016-04-20 - Matt Cullinan - Created /// </remarks> /// <summary> /// Performs the flattening of the data. /// </summary> public void Flatten() { var namespaceDictionary = XmlNamespaceRetriever.GetNamespacedictionary(_xmlDocument, XmlNamespaceScope.All); loadNamespacesIntoManager(namespaceDictionary); flattenNode(_xmlDocument, string.Empty); } ///<remarks> /// 2016-04-22 - Matt Cullinan - Created /// </remarks> /// <summary> /// Adds namespace references to the instance namespace manager. /// </summary> /// <param name="namespaceDictionary"></param> private void loadNamespacesIntoManager(IDictionary<string, string> namespaceDictionary) { foreach (var namespaceKey in namespaceDictionary.Keys) { _namespaceManager.AddNamespace(namespaceKey, namespaceDictionary[namespaceKey]); } } private void flattenNode(XmlNode targetNode, string key) { while (targetNode != null) { //If top level node, continue to next node. if (targetNode.NodeType == XmlNodeType.Document) { targetNode = targetNode.FirstChild; key = string.Empty; continue; } switch (targetNode.NodeType) { case XmlNodeType.Element: var selfAndSiblings = targetNode.SelectNodes("../" + targetNode.Name, this._namespaceManager); //loop through collection until the node found is the current node (to store "index" information) for (var i = 0; i < selfAndSiblings.Count; i++) { if (targetNode == selfAndSiblings[i]) { moveKeyDeepOneLevel(ref key, targetNode, i); //If FirstChild is null, this element has blank value. Add to list and get the next node. if (targetNode.FirstChild == null) { _flattenedXMLDoc.Add(new KeyValuePair<string, string>(key, targetNode.InnerText)); targetNode = getNextNode(targetNode, ref key); } else { targetNode = targetNode.FirstChild; } flattenNode(targetNode, key); //break control flow because we've already attained our break; } } break; case XmlNodeType.Text: //Add to the list of key value pairs. _flattenedXMLDoc.Add(new KeyValuePair<string, string>(key, targetNode.InnerText)); //current target node is text. Get the parent of this node to begin traversing back up the XML tree. targetNode = getNextNode(targetNode.ParentNode, ref key); continue; default: throw new ArgumentException( string.Format( "The target node type must be of NodeType.Element or NodeType.Text. The type is {0}", targetNode.NodeType)); } break; } } private void moveKeyUpOneLevel(ref string key) { if (!string.IsNullOrEmpty(key)) key = key.Substring(0, key.LastIndexOf(_delim.ToString(CultureInfo.InvariantCulture), StringComparison.InvariantCulture)); } private void moveKeyDeepOneLevel(ref string key, XmlNode targetNode, int index) { key = key + _delim + targetNode.Name + (!_showZeroIndexIdentifier && index == 0 ? string.Empty : Convert.ToString(index)); } private XmlNode getNextNode(XmlNode node, ref string key) { if (node == null) return null; moveKeyUpOneLevel(ref key); if (node.NextSibling != null) { return node.NextSibling; } return getNextNode(node.ParentNode, ref key); } } ///<remarks> /// 2016-04-22 - Matt Cullinan /// </remarks> /// <summary> /// Returns all XML namespaces in an XML Document /// </summary> public class XmlNamespaceRetriever { public static IDictionary<string, string> GetNamespacedictionary(XmlDocument xmlDocument, XmlNamespaceScope xmlNamespaceScope) { var namespaceRetrievalDocument = XDocument.Parse(xmlDocument.InnerXml); var namespaceRetrievalNavigator = namespaceRetrievalDocument.CreateNavigator(); namespaceRetrievalNavigator.MoveToFollowing(XPathNodeType.Element); var namespaceDictionary = namespaceRetrievalNavigator.GetNamespacesInScope(xmlNamespaceScope); return namespaceDictionary; } } } |
And the console app…
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
using System; using System.Xml; namespace Neuroticode.Examples { class Program { static void Main(string[] args) { var xmlDoc = new XmlDocument(); xmlDoc.LoadXml("<Customer> <Name>John Smith</Name> <Address>123 Main St</Address> <City>Baltimore</City> <Phones> <Phone>4105551234</Phone> <Phone>4151134443</Phone> </Phones> <Contacts> <Contact> <Name>Jiminy Cricket</Name> <Title>CricketMaster</Title> </Contact> <Contact> <Name>Hans Solo</Name> <Title>Smuggler</Title> </Contact> </Contacts></Customer>"); var flattener = new XmlFlattener(false, xmlDoc); flattener.Flatten(); foreach(var item in flattener.FlattenedXMLDoc) { Console.WriteLine(string.Format("Key: {0}, Value: {1}", item.Key, item.Value)); } Console.ReadLine(); } } } |
Here is the output: