From ec0ba1cbb201bb3672fde89b0d0e9a17ec118fa5 Mon Sep 17 00:00:00 2001 From: Stephane Royer Date: Sun, 24 Nov 2024 15:37:42 +0100 Subject: [PATCH] redo xml parser --- .../Core/IXmlObjectReader.cs | 10 + .../Core/Mapping/XmlFieldDefinition.cs | 2 - .../Core/XmlFileDefinition.cs | 2 - .../Core/XmlNodeDefinition.cs | 4 +- .../Core/XmlNodeParsed.cs | 22 +- .../Core/XmlObjectReader.cs | 261 +++++++++--------- .../Core/XmlObjectReaderV2.cs | 165 +++++++++++ .../XmlFileValuesProvider.cs | 4 +- .../XmlNodeOfTypeStreamNode.cs | 2 +- .../Paillave.Etl.XmlFileTests.csproj | 24 ++ .../XmlObjectReaderV2Test.cs | 172 ++++++++++++ src/Paillave.Etl.sln | 14 + 12 files changed, 534 insertions(+), 148 deletions(-) create mode 100644 src/Paillave.Etl.XmlFile/Core/IXmlObjectReader.cs create mode 100644 src/Paillave.Etl.XmlFile/Core/XmlObjectReaderV2.cs create mode 100644 src/Paillave.Etl.XmlFileTests/Paillave.Etl.XmlFileTests.csproj create mode 100644 src/Paillave.Etl.XmlFileTests/XmlObjectReaderV2Test.cs diff --git a/src/Paillave.Etl.XmlFile/Core/IXmlObjectReader.cs b/src/Paillave.Etl.XmlFile/Core/IXmlObjectReader.cs new file mode 100644 index 00000000..4bc16509 --- /dev/null +++ b/src/Paillave.Etl.XmlFile/Core/IXmlObjectReader.cs @@ -0,0 +1,10 @@ +using System; +using System.IO; +using System.Threading; + +namespace Paillave.Etl.XmlFile.Core; + +public interface IXmlObjectReader +{ + void Read(Stream fileStream, CancellationToken cancellationToken); +} diff --git a/src/Paillave.Etl.XmlFile/Core/Mapping/XmlFieldDefinition.cs b/src/Paillave.Etl.XmlFile/Core/Mapping/XmlFieldDefinition.cs index 8cbd36be..0dd7d69a 100644 --- a/src/Paillave.Etl.XmlFile/Core/Mapping/XmlFieldDefinition.cs +++ b/src/Paillave.Etl.XmlFile/Core/Mapping/XmlFieldDefinition.cs @@ -1,9 +1,7 @@ using System; using System.Collections.Generic; -using System.Globalization; using System.Linq; using System.Reflection; -using System.Text; using System.Xml; namespace Paillave.Etl.XmlFile.Core.Mapping diff --git a/src/Paillave.Etl.XmlFile/Core/XmlFileDefinition.cs b/src/Paillave.Etl.XmlFile/Core/XmlFileDefinition.cs index aecb3329..46e55d0f 100644 --- a/src/Paillave.Etl.XmlFile/Core/XmlFileDefinition.cs +++ b/src/Paillave.Etl.XmlFile/Core/XmlFileDefinition.cs @@ -1,8 +1,6 @@ using System; using System.Collections.Generic; using System.Linq.Expressions; -using System.Text; -using System.Xml; using Paillave.Etl.XmlFile.Core.Mapping; namespace Paillave.Etl.XmlFile.Core diff --git a/src/Paillave.Etl.XmlFile/Core/XmlNodeDefinition.cs b/src/Paillave.Etl.XmlFile/Core/XmlNodeDefinition.cs index de58dbd5..ec499fed 100644 --- a/src/Paillave.Etl.XmlFile/Core/XmlNodeDefinition.cs +++ b/src/Paillave.Etl.XmlFile/Core/XmlNodeDefinition.cs @@ -40,8 +40,8 @@ private void SetFieldDefinition(XmlFieldDefinition xmlFieldDefinition) var existingFieldDefinition = _xmlFieldDefinitions.FirstOrDefault(i => i.TargetPropertyInfo.Name == xmlFieldDefinition.TargetPropertyInfo.Name); if (existingFieldDefinition == null) _xmlFieldDefinitions.Add(xmlFieldDefinition); - else - if (xmlFieldDefinition.NodePath != null) existingFieldDefinition.NodePath = xmlFieldDefinition.NodePath; + else if (xmlFieldDefinition.NodePath != null) + existingFieldDefinition.NodePath = xmlFieldDefinition.NodePath; } // public XmlNodeDefinition MapXPathToProperty(string valueXPathQuery, Expression> memberLambda) // { diff --git a/src/Paillave.Etl.XmlFile/Core/XmlNodeParsed.cs b/src/Paillave.Etl.XmlFile/Core/XmlNodeParsed.cs index 78f267df..e55bdb21 100644 --- a/src/Paillave.Etl.XmlFile/Core/XmlNodeParsed.cs +++ b/src/Paillave.Etl.XmlFile/Core/XmlNodeParsed.cs @@ -1,18 +1,28 @@ using System; using System.Collections.Generic; +using System.Collections.ObjectModel; namespace Paillave.Etl.XmlFile.Core { public class XmlNodeParsed { - public string SourceName { get; internal set; } - public string NodeDefinitionName { get; internal set; } - public string NodePath { get; internal set; } - public Type Type { get; internal set; } - public object Value { get; internal set; } + public XmlNodeParsed(string sourceName, string nodeDefinitionName, string nodePath, Type type, object value, IDictionary correlationKeys) + { + SourceName = sourceName; + NodeDefinitionName = nodeDefinitionName; + NodePath = nodePath; + Type = type; + Value = value; + CorrelationKeys = new ReadOnlyDictionary(correlationKeys); + } + public string SourceName { get; } + public string NodeDefinitionName { get; } + public string NodePath { get; } + public Type Type { get; } + public object Value { get; } public T GetValue() => (T)Value; // public object[] ParentValues { get; internal set; } // public T GetValue(int level = 0) => (T)(level == 0 ? Value : ParentValues[level - 1]); - public HashSet CorrelationKeys { get; set; } = new HashSet(); + public ReadOnlyDictionary CorrelationKeys { get; } } } diff --git a/src/Paillave.Etl.XmlFile/Core/XmlObjectReader.cs b/src/Paillave.Etl.XmlFile/Core/XmlObjectReader.cs index 2efb5413..c1c23ac0 100644 --- a/src/Paillave.Etl.XmlFile/Core/XmlObjectReader.cs +++ b/src/Paillave.Etl.XmlFile/Core/XmlObjectReader.cs @@ -7,162 +7,157 @@ using System.Threading; using System.Xml; -namespace Paillave.Etl.XmlFile.Core +namespace Paillave.Etl.XmlFile.Core; +[Obsolete] +public class XmlObjectReader : IXmlObjectReader { - public class XmlObjectReader + private class XmlReadField { - private class XmlReadField - { - public XmlFieldDefinition Definition { get; set; } - public IXmlNodeDefinition NodeDefinition { get; set; } - public int Depth { get; set; } - public object Value { get; set; } - } + public XmlFieldDefinition Definition { get; set; } + public IXmlNodeDefinition NodeDefinition { get; set; } + public int Depth { get; set; } + public object Value { get; set; } + } - private HashSet _xmlFieldsDefinitionSearch; - private HashSet _xmlNodesDefinitionSearch; + private HashSet _xmlFieldsDefinitionSearch; + private HashSet _xmlNodesDefinitionSearch; - private readonly List _inScopeReadFields = new List(); - private readonly XmlFileDefinition _xmlFileDefinition; + private readonly List _inScopeReadFields = new List(); + private readonly XmlFileDefinition _xmlFileDefinition; + private readonly string _sourceName; + private readonly Action _pushResult; - public XmlObjectReader(XmlFileDefinition xmlFileDefinition) - { - _xmlFileDefinition = xmlFileDefinition; - _xmlNodesDefinitionSearch = new HashSet(xmlFileDefinition.XmlNodeDefinitions.Select(i => i.NodePath).Distinct()); - _xmlFieldsDefinitionSearch = new HashSet(xmlFileDefinition.XmlNodeDefinitions.SelectMany(nd => nd.GetXmlFieldDefinitions().Select(fd => fd.NodePath)).Distinct()); - } - private bool XmlReadFieldShouldBeCleanedUp(XmlReadField xmlReadField, int depth) - { - var depthScope = xmlReadField.Definition.DepthScope; - int depthLimit; - if (depthScope > 0) - depthLimit = depthScope; - else - depthLimit = xmlReadField.Depth + depthScope; - return depth < depthLimit; - } - private void ProcessEndOfAnyNode(Stack nodes) - { - foreach (var item in _inScopeReadFields.Where(i => XmlReadFieldShouldBeCleanedUp(i, nodes.Count - 1)).ToList()) - _inScopeReadFields.Remove(item); - } - private void ProcessAttributeValue(string key, Stack nodes, string stringContent) + public XmlObjectReader(XmlFileDefinition xmlFileDefinition, string sourceName, Action pushResult) + { + _xmlFileDefinition = xmlFileDefinition; + this._sourceName = sourceName; + this._pushResult = pushResult; + _xmlNodesDefinitionSearch = new HashSet(xmlFileDefinition.XmlNodeDefinitions.Select(i => i.NodePath).Distinct()); + _xmlFieldsDefinitionSearch = new HashSet(xmlFileDefinition.XmlNodeDefinitions.SelectMany(nd => nd.GetXmlFieldDefinitions().Select(fd => fd.NodePath)).Distinct()); + } + private bool XmlReadFieldShouldBeCleanedUp(XmlReadField xmlReadField, int depth) + { + var depthScope = xmlReadField.Definition.DepthScope; + int depthLimit; + if (depthScope > 0) + depthLimit = depthScope; + else + depthLimit = xmlReadField.Depth + depthScope; + return depth < depthLimit; + } + private void ProcessEndOfAnyNode(Stack nodes) + { + foreach (var item in _inScopeReadFields.Where(i => XmlReadFieldShouldBeCleanedUp(i, nodes.Count - 1)).ToList()) + _inScopeReadFields.Remove(item); + } + private void ProcessAttributeValue(string key, Stack nodes, string stringContent) + { + // string key = $"/{string.Join("/", nodes.Reverse())}"; + if (!_xmlFieldsDefinitionSearch.Contains(key)) return; + var fds = _xmlFileDefinition.XmlNodeDefinitions.SelectMany(nd => nd.GetXmlFieldDefinitions().Select(fd => new { Fd = fd, Nd = nd })).Where(i => i.Fd.NodePath == key).ToList(); + if (string.IsNullOrWhiteSpace(stringContent)) { - // string key = $"/{string.Join("/", nodes.Reverse())}"; - if (!_xmlFieldsDefinitionSearch.Contains(key)) return; - var fds = _xmlFileDefinition.XmlNodeDefinitions.SelectMany(nd => nd.GetXmlFieldDefinitions().Select(fd => new { Fd = fd, Nd = nd })).Where(i => i.Fd.NodePath == key).ToList(); - if (string.IsNullOrWhiteSpace(stringContent)) + foreach (var fd in fds) { - foreach (var fd in fds) + _inScopeReadFields.Add(new XmlReadField { - _inScopeReadFields.Add(new XmlReadField - { - Depth = nodes.Count - 1, - Definition = fd.Fd, - NodeDefinition = fd.Nd, - Value = null - }); - } - } - else - { - foreach (var fd in fds) - { - _inScopeReadFields.Add(new XmlReadField - { - Depth = nodes.Count - 1, - Definition = fd.Fd, - NodeDefinition = fd.Nd, - Value = fd.Fd.Convert(stringContent) - }); - } + Depth = nodes.Count - 1, + Definition = fd.Fd, + NodeDefinition = fd.Nd, + Value = null + }); } } - private string ComputeKey(Stack nodes) => $"/{string.Join("/", nodes.Select(i => i.Name).Reverse())}"; - private void ProcessEndOfNode(Stack nodes, string text, Action pushResult, string sourceName) + else { - string key = ComputeKey(nodes); - if (_xmlFieldsDefinitionSearch.Contains(key)) + foreach (var fd in fds) { - ProcessAttributeValue(key, nodes, text); - } - else if (_xmlNodesDefinitionSearch.Contains(key)) - { - var (value, nd) = CreateValue(sourceName, key); - pushResult(new XmlNodeParsed + _inScopeReadFields.Add(new XmlReadField { - NodeDefinitionName = nd.Name, - SourceName = sourceName, - NodePath = nd.NodePath, - Type = nd.Type, - Value = value, - CorrelationKeys = nodes.Select(i => i.Guid).Where(i => i.HasValue).Select(i => i.Value).ToHashSet() + Depth = nodes.Count - 1, + Definition = fd.Fd, + NodeDefinition = fd.Nd, + Value = fd.Fd.Convert(stringContent) }); } - ProcessEndOfAnyNode(nodes); } - - private (object value, IXmlNodeDefinition nd) CreateValue(string sourceName, string key) + } + private string ComputeKey(Stack nodes) => $"/{string.Join("/", nodes.Select(i => i.Name).Reverse())}"; + private void ProcessEndOfNode(Stack nodes, string text, Action pushResult, string sourceName) + { + string key = ComputeKey(nodes); + if (_xmlFieldsDefinitionSearch.Contains(key)) { - var nd = _xmlFileDefinition.XmlNodeDefinitions.FirstOrDefault(i => i.NodePath == key); - var objectBuilder = new ObjectBuilder(nd.Type); - foreach (var inScopeReadField in _inScopeReadFields.Where(rf => rf.NodeDefinition.NodePath == key)) - objectBuilder.Values[inScopeReadField.Definition.TargetPropertyInfo.Name] = inScopeReadField.Value; - foreach (var propName in nd.GetXmlFieldDefinitions().Where(i => i.ForRowGuid).Select(i => i.TargetPropertyInfo.Name).ToList()) - objectBuilder.Values[propName] = Guid.NewGuid(); - foreach (var propName in nd.GetXmlFieldDefinitions().Where(i => i.ForSourceName).Select(i => i.TargetPropertyInfo.Name).ToList()) - objectBuilder.Values[propName] = sourceName; - return (objectBuilder.CreateInstance(), nd); + ProcessAttributeValue(key, nodes, text); } - - public void Read(Stream fileStream, string sourceName, Action pushResult, CancellationToken cancellationToken) + else if (_xmlNodesDefinitionSearch.Contains(key)) { - XmlReaderSettings xrs = new XmlReaderSettings(); - foreach (var item in _xmlFileDefinition.PrefixToUriNameSpacesDictionary) - xrs.Schemas.Add(item.Key, item.Value); - xrs.IgnoreWhitespace = true; - xrs.IgnoreComments = true; - xrs.IgnoreProcessingInstructions = true; + var (value, nd) = CreateValue(sourceName, key); + pushResult(new XmlNodeParsed(sourceName, nd.Name, nd.NodePath, nd.Type, value, new Dictionary())); + } + ProcessEndOfAnyNode(nodes); + } + + private (object value, IXmlNodeDefinition nd) CreateValue(string sourceName, string key) + { + var nd = _xmlFileDefinition.XmlNodeDefinitions.FirstOrDefault(i => i.NodePath == key); + var objectBuilder = new ObjectBuilder(nd.Type); + foreach (var inScopeReadField in _inScopeReadFields.Where(rf => rf.NodeDefinition.NodePath == key)) + objectBuilder.Values[inScopeReadField.Definition.TargetPropertyInfo.Name] = inScopeReadField.Value; + foreach (var propName in nd.GetXmlFieldDefinitions().Where(i => i.ForRowGuid).Select(i => i.TargetPropertyInfo.Name).ToList()) + objectBuilder.Values[propName] = Guid.NewGuid(); + foreach (var propName in nd.GetXmlFieldDefinitions().Where(i => i.ForSourceName).Select(i => i.TargetPropertyInfo.Name).ToList()) + objectBuilder.Values[propName] = sourceName; + return (objectBuilder.CreateInstance(), nd); + } - var xmlReader = XmlReader.Create(fileStream, xrs); - Stack nodes = new Stack(); - string lastTextValue = null; - while (xmlReader.Read()) + public void Read(Stream fileStream, CancellationToken cancellationToken) + { + XmlReaderSettings xrs = new XmlReaderSettings(); + foreach (var item in _xmlFileDefinition.PrefixToUriNameSpacesDictionary) + xrs.Schemas.Add(item.Key, item.Value); + xrs.IgnoreWhitespace = true; + xrs.IgnoreComments = true; + xrs.IgnoreProcessingInstructions = true; + + var xmlReader = XmlReader.Create(fileStream, xrs); + Stack nodes = new Stack(); + string lastTextValue = null; + while (xmlReader.Read()) + { + if (cancellationToken.IsCancellationRequested) break; + switch (xmlReader.NodeType) { - if (cancellationToken.IsCancellationRequested) break; - switch (xmlReader.NodeType) - { - case XmlNodeType.Element: - bool isEmptyElement = xmlReader.IsEmptyElement; - lastTextValue = null; - nodes.Push(new NodeLevel { Name = xmlReader.Name, Guid = Guid.NewGuid() }); - while (xmlReader.MoveToNextAttribute()) - { - nodes.Push(new NodeLevel { Name = $"@{xmlReader.Name}", Guid = null }); - ProcessAttributeValue(ComputeKey(nodes), nodes, xmlReader.Value); - nodes.Pop(); - } - if (isEmptyElement) - { - ProcessEndOfNode(nodes, null, pushResult, sourceName); - nodes.Pop(); - } - break; - case XmlNodeType.EndElement: - ProcessEndOfNode(nodes, lastTextValue, pushResult, sourceName); - lastTextValue = null; + case XmlNodeType.Element: + bool isEmptyElement = xmlReader.IsEmptyElement; + lastTextValue = null; + nodes.Push(new NodeLevel { Name = xmlReader.Name, Guid = Guid.NewGuid() }); + while (xmlReader.MoveToNextAttribute()) + { + nodes.Push(new NodeLevel { Name = $"@{xmlReader.Name}", Guid = null }); + ProcessAttributeValue(ComputeKey(nodes), nodes, xmlReader.Value); nodes.Pop(); - break; - case XmlNodeType.Text: - lastTextValue = xmlReader.Value; - break; - } + } + if (isEmptyElement) + { + ProcessEndOfNode(nodes, null, _pushResult, _sourceName); + nodes.Pop(); + } + break; + case XmlNodeType.EndElement: + ProcessEndOfNode(nodes, lastTextValue, _pushResult, _sourceName); + lastTextValue = null; + nodes.Pop(); + break; + case XmlNodeType.Text: + lastTextValue = xmlReader.Value; + break; } } - private struct NodeLevel - { - public string Name { get; set; } - public Guid? Guid { get; set; } - } + } + private struct NodeLevel + { + public string Name { get; set; } + public Guid? Guid { get; set; } } } diff --git a/src/Paillave.Etl.XmlFile/Core/XmlObjectReaderV2.cs b/src/Paillave.Etl.XmlFile/Core/XmlObjectReaderV2.cs new file mode 100644 index 00000000..fde3f7a9 --- /dev/null +++ b/src/Paillave.Etl.XmlFile/Core/XmlObjectReaderV2.cs @@ -0,0 +1,165 @@ +using Paillave.Etl.Core; +using Paillave.Etl.XmlFile.Core.Mapping; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; +using System.Xml; + +namespace Paillave.Etl.XmlFile.Core; +public class XmlObjectReaderV2 : IXmlObjectReader +{ + private readonly NodePropertyBags _nodePropertyBags; + private readonly XmlFileDefinition _xmlFileDefinition; + + public XmlObjectReaderV2(XmlFileDefinition xmlFileDefinition, string sourceName, Action pushResult) + { + _nodePropertyBags = new NodePropertyBags(sourceName, xmlFileDefinition, pushResult); + _xmlFileDefinition = xmlFileDefinition; + } + private class XmlPath + { + private readonly struct NodeLevel + { + public NodeLevel(string node, Guid correlationId) + => (Node, CorrelationId) = (node, correlationId); + public string Node { get; } + public Guid CorrelationId { get; } + } + private readonly Stack _nodes = new(); + private string? _attribute = null; + public void UnStackAttribute() => _attribute = null; + public void StackAttribute(string attribute) => _attribute = attribute; + public void StackNode(string node) => _nodes.Push(new NodeLevel(node, Guid.NewGuid())); + public void UnStackNode() => _nodes.Pop(); + public string GetPath() => $"/{string.Join("/", _nodes.Select((i) => i.Node).Reverse())}{(_attribute == null ? "" : $"/@{_attribute}")}"; + public HashSet GetCorrelationKeys() => _nodes.Select(i => i.CorrelationId).ToHashSet(); + public override string ToString() => GetPath(); + } + private class NodePropertyBags + { + private readonly Dictionary _propertyBags; + private readonly Action _pushResult; + public NodePropertyBags(string sourceName, XmlFileDefinition xmlFileDefinition, Action pushResult) + { + _propertyBags = xmlFileDefinition.XmlNodeDefinitions.ToDictionary(i => i.NodePath, i => new PropertyBag(sourceName, i)); + _pushResult = pushResult; + } + public void SetValue(string key, string? value) + { + foreach (var propertyBag in _propertyBags) + propertyBag.Value.SetValue(key, value); + } + public void StartNewNode(string key) + { + if (_propertyBags.TryGetValue(key, out var propertyBag)) + propertyBag.ResetValues(); + } + public void EndNode(string key) + { + if (_propertyBags.TryGetValue(key, out var propertyBag)) + { + var value = propertyBag.CreateRow(); + _pushResult(new XmlNodeParsed( + propertyBag.SourceName, + propertyBag.XmlNodeDefinition.Name, + propertyBag.XmlNodeDefinition.NodePath, + propertyBag.XmlNodeDefinition.Type, + value, + new Dictionary())); + } + } + } + private class PropertyBag + { + public string SourceName { get; } + public IXmlNodeDefinition XmlNodeDefinition { get; } + private readonly List _xmlFieldDefinitions; + private readonly HashSet _valuesPath; + private readonly Dictionary _xmlValues = new Dictionary(); + + public PropertyBag(string sourceName, IXmlNodeDefinition xmlNodeDefinition) + { + SourceName = sourceName; + this.XmlNodeDefinition = xmlNodeDefinition; + _xmlFieldDefinitions = xmlNodeDefinition.GetXmlFieldDefinitions().ToList(); + this._valuesPath = _xmlFieldDefinitions.Select(i => i.NodePath).ToHashSet(); + } + + public void SetValue(string key, string? value) + { + if (string.IsNullOrWhiteSpace(value)) + return; + + if (_valuesPath.Contains(key)) + _xmlValues[key] = value; + } + public object CreateRow() + { + var objectBuilder = new ObjectBuilder(XmlNodeDefinition.Type); + var matchingProperties = _xmlFieldDefinitions.Join(_xmlValues, i => i.NodePath, i => i.Key, (xmlFieldDefinition, xmlFieldDefinitionValue) => new { xmlFieldDefinition, xmlFieldDefinitionValue }).ToList(); + foreach (var matchingProperty in matchingProperties) + objectBuilder.Values[matchingProperty.xmlFieldDefinition.TargetPropertyInfo.Name] = matchingProperty.xmlFieldDefinition.Convert(matchingProperty.xmlFieldDefinitionValue.Value); + foreach (var propName in _xmlFieldDefinitions.Where(i => i.ForRowGuid).Select(i => i.TargetPropertyInfo.Name).ToList()) + objectBuilder.Values[propName] = Guid.NewGuid(); + foreach (var propName in _xmlFieldDefinitions.Where(i => i.ForSourceName).Select(i => i.TargetPropertyInfo.Name).ToList()) + objectBuilder.Values[propName] = SourceName; + return objectBuilder.CreateInstance(); + } + public void ResetValues() + { + foreach (var item in _xmlValues.Where(i => i.Key.StartsWith(XmlNodeDefinition.NodePath)).ToList()) + _xmlValues.Remove(item.Key); + } + } + + public void Read(Stream fileStream, CancellationToken cancellationToken) + { + XmlReaderSettings xrs = new XmlReaderSettings(); + foreach (var item in _xmlFileDefinition.PrefixToUriNameSpacesDictionary) + xrs.Schemas.Add(item.Key, item.Value); + xrs.IgnoreWhitespace = true; + xrs.IgnoreComments = true; + xrs.IgnoreProcessingInstructions = true; + + var xmlPath = new XmlPath(); + + var xmlReader = XmlReader.Create(fileStream, xrs); + string? lastTextValue = null; + while (xmlReader.Read()) + { + if (cancellationToken.IsCancellationRequested) break; + switch (xmlReader.NodeType) + { + case XmlNodeType.Element: + lastTextValue = null; + bool isEmptyElement = xmlReader.IsEmptyElement; + xmlPath.StackNode(xmlReader.Name); + _nodePropertyBags.StartNewNode(xmlPath.ToString()); + while (xmlReader.MoveToNextAttribute()) + { + if (cancellationToken.IsCancellationRequested) break; + xmlPath.StackAttribute(xmlReader.Name); + _nodePropertyBags.SetValue(xmlPath.ToString(), xmlReader.Value); + xmlPath.UnStackAttribute(); + } + if (isEmptyElement) + { + _nodePropertyBags.EndNode(xmlPath.ToString()); + xmlPath.UnStackNode(); + } + break; + case XmlNodeType.EndElement: + _nodePropertyBags.SetValue(xmlPath.ToString(), lastTextValue); + _nodePropertyBags.EndNode(xmlPath.ToString()); + lastTextValue = null; + xmlPath.UnStackNode(); + break; + case XmlNodeType.Text: + lastTextValue = xmlReader.Value; + break; + } + } + } +} diff --git a/src/Paillave.Etl.XmlFile/XmlFileValuesProvider.cs b/src/Paillave.Etl.XmlFile/XmlFileValuesProvider.cs index 9efd150f..0f9fa472 100644 --- a/src/Paillave.Etl.XmlFile/XmlFileValuesProvider.cs +++ b/src/Paillave.Etl.XmlFile/XmlFileValuesProvider.cs @@ -21,8 +21,8 @@ public class XmlFileValuesProvider : ValuesProviderBase push, CancellationToken cancellationToken, IExecutionContext context) { using var stream = input.Get(_args.UseStreamCopy); - XmlObjectReader xmlObjectReader = new XmlObjectReader(_args.XmlFileDefinition); - xmlObjectReader.Read(stream, input.Name, push, cancellationToken); + IXmlObjectReader xmlObjectReader = new XmlObjectReaderV2(_args.XmlFileDefinition, input.Name, push); + xmlObjectReader.Read(stream, cancellationToken); } } } \ No newline at end of file diff --git a/src/Paillave.Etl.XmlFile/XmlNodeOfTypeStreamNode.cs b/src/Paillave.Etl.XmlFile/XmlNodeOfTypeStreamNode.cs index dfae3af8..8e252b4b 100644 --- a/src/Paillave.Etl.XmlFile/XmlNodeOfTypeStreamNode.cs +++ b/src/Paillave.Etl.XmlFile/XmlNodeOfTypeStreamNode.cs @@ -38,7 +38,7 @@ protected override IStream> CreateOutputStream(XmlNodeOfTypeFil obs = obs.Filter(i => i.NodeDefinitionName == args.NodeDefinitionName); return CreateUnsortedStream(obs.Map(i => new Correlated { - CorrelationKeys = i.CorrelationKeys, + CorrelationKeys = default, Row = (TOut)i.Value })); } diff --git a/src/Paillave.Etl.XmlFileTests/Paillave.Etl.XmlFileTests.csproj b/src/Paillave.Etl.XmlFileTests/Paillave.Etl.XmlFileTests.csproj new file mode 100644 index 00000000..7e4fc41d --- /dev/null +++ b/src/Paillave.Etl.XmlFileTests/Paillave.Etl.XmlFileTests.csproj @@ -0,0 +1,24 @@ + + + + net7.0 + latest + enable + + false + + + + + + + + + + + + + + + + diff --git a/src/Paillave.Etl.XmlFileTests/XmlObjectReaderV2Test.cs b/src/Paillave.Etl.XmlFileTests/XmlObjectReaderV2Test.cs new file mode 100644 index 00000000..ef079784 --- /dev/null +++ b/src/Paillave.Etl.XmlFileTests/XmlObjectReaderV2Test.cs @@ -0,0 +1,172 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading; +using Paillave.Etl.XmlFile.Core; +using Xunit; + +namespace Paillave.Etl.XmlFileTests +{ + public class XmlObjectReaderV2Tests + { + [Fact] + public void Read_SimpleXmlWithAttributes_ParsesCorrectly2() + { + // Arrange + var xml = @" + + root data + + 30 +
+ 123 Main St + Springfield +
+
+ + 32 + + + 32 +
+ Big City +
+
+
"; + + var definition = new XmlFileDefinition(); + definition.AddNodeDefinition( + "person", + "/root/person", + i => new TestPerson + { + Id = i.ToXPathQuery("/root/person/@id"), + FirstName = i.ToXPathQuery("/root/person/@firstName"), + LastName = i.ToXPathQuery("/root/person/@lastName"), + Age = i.ToXPathQuery("/root/person/age"), + Street = i.ToXPathQuery("/root/person/address/street"), + City = i.ToXPathQuery("/root/person/address/city"), + RootId = i.ToXPathQuery("/root/@rootId"), + RootData = i.ToXPathQuery("/root/data"), + }); + definition.AddNodeDefinition( + "company", + "/root/company", + i => new TestCompany + { + Id = i.ToXPathQuery("/root/company/@id"), + Name = i.ToXPathQuery("/root/company/@name"), + Street = i.ToXPathQuery("/root/company/address/street"), + City = i.ToXPathQuery("/root/company/address/city"), + RootId = i.ToXPathQuery("/root/@rootId"), + RootData = i.ToXPathQuery("/root/data"), + }); + + var results = new List(); + var reader = new XmlObjectReaderV2(definition, "test", results.Add); + + // Act + using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(xml))) + { + reader.Read(stream, CancellationToken.None); + } + + var people = results.FindAll(i => i.NodeDefinitionName == "person").ToList(); + var person1 = people[0].Value as TestPerson; + Assert.Equal(1, person1.Id); + Assert.Equal("John", person1.FirstName); + Assert.Equal("Doe", person1.LastName); + Assert.Equal(30, person1.Age); + Assert.Equal("123 Main St", person1.Street); + Assert.Equal("Springfield", person1.City); + Assert.Equal("a root id", person1.RootId); + Assert.Equal("root data", person1.RootData); + var person2 = results[1].Value as TestPerson; + Assert.Equal(2, person2.Id); + Assert.Equal("Coucou", person2.FirstName); + Assert.Null(person2.LastName); + Assert.Equal(32, person2.Age); + Assert.Null(person2.Street); + Assert.Null(person2.City); + Assert.Equal("a root id", person2.RootId); + Assert.Equal("root data", person2.RootData); + + var companies = results.FindAll(i => i.NodeDefinitionName == "company").ToList(); + var company1 = companies[0].Value as TestCompany; + Assert.Equal(3, company1.Id); + Assert.Equal("MyCompany", company1.Name); + Assert.Null(company1.Street); + Assert.Equal("Big City", company1.City); + Assert.Equal("a root id", company1.RootId); + Assert.Equal("root data", company1.RootData); + } + [Fact] + public void Read_SimpleXmlWithAttributes_ParsesCorrectly1() + { + // Arrange + var xml = @" + + root data + + 30 + + "; + + var definition = new XmlFileDefinition(); + definition.AddNodeDefinition( + "person", + "/root/person", + i => new TestPerson + { + Id = i.ToXPathQuery("/root/person/@id"), + FirstName = i.ToXPathQuery("/root/person/@firstName"), + LastName = i.ToXPathQuery("/root/person/@lastName"), + Age = i.ToXPathQuery("/root/person/age"), + RootId = i.ToXPathQuery("/root/@rootId"), + RootData = i.ToXPathQuery("/root/data"), + }); + + var results = new List(); + var reader = new XmlObjectReaderV2(definition, "test", results.Add); + + // Act + using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(xml))) + { + reader.Read(stream, CancellationToken.None); + } + + // Assert + Assert.Single(results); + var person = results[0].Value as TestPerson; + Assert.Equal(1, person.Id); + Assert.Equal("John", person.FirstName); + Assert.Equal("Doe", person.LastName); + Assert.Equal(30, person.Age); + Assert.Equal("a root id", person.RootId); + Assert.Equal("root data", person.RootData); + } + + private class TestPerson + { + public int Id { get; set; } + public string FirstName { get; set; } + public string? LastName { get; set; } + public int Age { get; set; } + public string? Street { get; set; } + public string? City { get; set; } + public string RootId { get; set; } + public string RootData { get; set; } + } + private class TestCompany + { + public int Id { get; set; } + public string Name { get; set; } + public string? Street { get; set; } + public string? City { get; set; } + public string RootId { get; set; } + public string RootData { get; set; } + } + } +} \ No newline at end of file diff --git a/src/Paillave.Etl.sln b/src/Paillave.Etl.sln index e4337d61..f9e0a6e0 100644 --- a/src/Paillave.Etl.sln +++ b/src/Paillave.Etl.sln @@ -52,6 +52,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Paillave.Etl.S3", "Paillave EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Paillave.Etl.Http", "Paillave.Etl.Http\Paillave.Etl.Http.csproj", "{2D3EFDF5-6DF6-455A-81D3-6ADE97261AFE}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Paillave.Etl.XmlFileTests", "Paillave.Etl.XmlFileTests\Paillave.Etl.XmlFileTests.csproj", "{D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -362,6 +364,18 @@ Global {2D3EFDF5-6DF6-455A-81D3-6ADE97261AFE}.Release|x64.Build.0 = Release|Any CPU {2D3EFDF5-6DF6-455A-81D3-6ADE97261AFE}.Release|x86.ActiveCfg = Release|Any CPU {2D3EFDF5-6DF6-455A-81D3-6ADE97261AFE}.Release|x86.Build.0 = Release|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Debug|x64.ActiveCfg = Debug|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Debug|x64.Build.0 = Debug|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Debug|x86.ActiveCfg = Debug|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Debug|x86.Build.0 = Debug|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Release|Any CPU.Build.0 = Release|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Release|x64.ActiveCfg = Release|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Release|x64.Build.0 = Release|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Release|x86.ActiveCfg = Release|Any CPU + {D3F4F6F9-F964-436D-8C16-4FEFD97F2EEE}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE