From c7b108f831fa2a5483532058cdc6fb67cf3afdaa Mon Sep 17 00:00:00 2001 From: Mikayla Hutchinson Date: Wed, 4 Sep 2024 03:07:15 -0400 Subject: [PATCH] Fix nasty bug in spine recovery The tests were accidentally "cheating" by recovering from the position before the actual target and catching up, so did not trigger this bug. Add a couple more granualar recovery tests to make it easier to debug these kinds of issues. --- Core.Tests/Parser/ParsingTests.cs | 15 ++++++++++++--- Core/Parser/XmlProcessingInstructionState.cs | 5 +++++ Core/Parser/XmlSpineParser.cs | 15 ++++++++++----- Core/Parser/XmlTagState.cs | 2 +- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/Core.Tests/Parser/ParsingTests.cs b/Core.Tests/Parser/ParsingTests.cs index f3098f8..615d415 100644 --- a/Core.Tests/Parser/ParsingTests.cs +++ b/Core.Tests/Parser/ParsingTests.cs @@ -515,6 +515,13 @@ public void SpineParserRecoveryXhtmlStrictSchema () using var sr = new StreamReader (ResourceManager.GetXhtmlStrictSchema ()); var docTxt = sr.ReadToEnd (); + SpineParserRecovery (docTxt, 1127391); + } + + [TestCase("foo\\", 25)] + [TestCase("", 130)] + public void SpineParserRecovery (string docTxt, int expectedDelta) + { var rootState = CreateRootState (); var treeParser = new XmlTreeParser (rootState); foreach (char c in docTxt) { @@ -530,7 +537,7 @@ public void SpineParserRecoveryXhtmlStrictSchema () char c = docTxt[i]; spineParser.Push (c); - var recoveredParser = XmlSpineParser.FromDocumentPosition (rootState, doc, i).AssertNotNull (); + var recoveredParser = XmlSpineParser.FromDocumentPosition (rootState, doc, spineParser.Position).AssertNotNull (); var delta = i - recoveredParser.Position; totalNotRecovered += delta; @@ -542,12 +549,14 @@ public void SpineParserRecoveryXhtmlStrictSchema () AssertEqual (spineParser.GetContext (), recoveredParser.GetContext ()); } + /* int total = docTxt.Length * docTxt.Length / 2; float recoveryRate = 1f - totalNotRecovered / (float)total; TestContext.WriteLine ($"Recovered {(recoveryRate * 100f):F2}%"); + */ // check it never regresses - Assert.LessOrEqual (totalNotRecovered, 1118088); + Assert.LessOrEqual (totalNotRecovered, expectedDelta); } [TestCase ("\r\n\r\n", "\r\n")] @@ -574,7 +583,7 @@ public void SpineParserRecoverFromError (string docTxt, string recoverFromDoc) char c = docTxt[i]; spineParser.Push (c); - var recoveredParser = XmlSpineParser.FromDocumentPosition (rootState, doc, Math.Min (i, maxCompat)).AssertNotNull (); + var recoveredParser = XmlSpineParser.FromDocumentPosition (rootState, doc, Math.Min (i, maxCompat)); var end = Math.Min (i + 1, docTxt.Length); for (int j = recoveredParser.Position; j < end; j++) { diff --git a/Core/Parser/XmlProcessingInstructionState.cs b/Core/Parser/XmlProcessingInstructionState.cs index d4ebbe0..546ba30 100644 --- a/Core/Parser/XmlProcessingInstructionState.cs +++ b/Core/Parser/XmlProcessingInstructionState.cs @@ -82,6 +82,11 @@ public class XmlProcessingInstructionState : XmlParserState parents.Push (new XProcessingInstruction (pi.Span.Start)); } + // still in RootState + if (length == 1) { + return null; + } + return new ( currentState: this, position: position, diff --git a/Core/Parser/XmlSpineParser.cs b/Core/Parser/XmlSpineParser.cs index 5db89c0..854a286 100644 --- a/Core/Parser/XmlSpineParser.cs +++ b/Core/Parser/XmlSpineParser.cs @@ -58,10 +58,15 @@ public XmlSpineParser (XmlParserContext context, XmlRootState rootState) : base /// the parser is not guaranteed but will not exceed . /// /// - public static XmlSpineParser? FromDocumentPosition (XmlRootState stateMachine, XDocument xdocument, int maximumPosition) - => xdocument.FindAtOrBeforeOffset (maximumPosition) is XObject obj - && stateMachine.TryRecreateState (ref obj, maximumPosition) is XmlParserContext ctx - ? new XmlSpineParser (ctx, stateMachine) - : null; + public static XmlSpineParser FromDocumentPosition (XmlRootState stateMachine, XDocument xdocument, int maximumPosition) + { + // Recovery must be based on the node before the target position. + // The state for the node at the position won't be entered until the character at (i.e. after) the position is processed. + var node = maximumPosition == 0? xdocument : xdocument.FindAtOrBeforeOffset (maximumPosition - 1); + if (node is XObject obj && stateMachine.TryRecreateState (ref obj, maximumPosition) is XmlParserContext ctx) { + return new XmlSpineParser (ctx, stateMachine); + } + return new XmlSpineParser (stateMachine); ; + } } } diff --git a/Core/Parser/XmlTagState.cs b/Core/Parser/XmlTagState.cs index d9c98e3..4fd8331 100644 --- a/Core/Parser/XmlTagState.cs +++ b/Core/Parser/XmlTagState.cs @@ -226,7 +226,7 @@ public XmlTagState (XmlAttributeState attributeState, XmlNameState nameState) newEl.Attributes.AddAttribute ((XAttribute)att.ShallowCopy ()); continue; } - if (att.Span.End > position) { + if (att.Span.End >= position) { foundPosition = Math.Min (position, att.Span.Start); break; }