From f8bff9009a28e20ef2a74625d33f45382e57e645 Mon Sep 17 00:00:00 2001 From: nietras Date: Sun, 10 Mar 2024 15:34:00 +0100 Subject: [PATCH] Fix SepParser*s buffer overrun bug when column count is equal to internal capacity and new row after (#109) Fixes #108 Bug likely present since 0.4.0 when parsing multiple rows at a time was introduced. --- src/Sep.Test/SepReaderTest.cs | 42 +++++++++++++--- .../SepParserAvx2PackCmpOrMoveMaskTzcnt.cs | 7 ++- .../SepParserAvx512PackCmpOrMoveMaskTzcnt.cs | 7 ++- src/Sep/Internals/SepParserIndexOfAny.cs | 16 +++--- .../SepParserSse2PackCmpOrMoveMaskTzcnt.cs | 7 ++- .../SepParserVector128NrwCmpExtMsbTzcnt.cs | 7 ++- .../SepParserVector256NrwCmpExtMsbTzcnt.cs | 7 ++- .../SepParserVector512NrwCmpExtMsbTzcnt.cs | 7 ++- .../SepParserVector64NrwCmpExtMsbTzcnt.cs | 7 ++- src/Sep/SepReader.cs | 49 ++++++++++++------- src/Sep/SepReaderState.cs | 2 + 11 files changed, 109 insertions(+), 49 deletions(-) diff --git a/src/Sep.Test/SepReaderTest.cs b/src/Sep.Test/SepReaderTest.cs index bee0d32a..ed4f3140 100644 --- a/src/Sep.Test/SepReaderTest.cs +++ b/src/Sep.Test/SepReaderTest.cs @@ -3,6 +3,7 @@ using System.Diagnostics.Contracts; using System.IO; using System.Linq; +using System.Numerics; using System.Text; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -444,24 +445,51 @@ public void SepReaderTest_CarriageReturnLineFeedEvenOrOdd_ToEnsureLineFeedReadAf [TestMethod] public void SepReaderTest_ColsInitialLength() { - var initialColCountCapacity = SepReader.ColEndsInitialLength - 1; // -1 since col ends is 1 longer due to having row start - var text = "A" + Environment.NewLine + new string(';', initialColCountCapacity - 1); + var colCount = SepReader.ColEndsInitialLength - 1; // -1 since col ends is 1 longer due to having row start + var text = "A" + Environment.NewLine + new string(';', colCount - 1); using var reader = Sep.Reader(o => o with { DisableColCountCheck = true }).FromText(text); Assert.IsTrue(reader.MoveNext()); var row = reader.Current; - Assert.AreEqual(initialColCountCapacity, row.ColCount); + Assert.AreEqual(colCount, row.ColCount); } [TestMethod] public void SepReaderTest_ExceedingColsInitialLength_WorksByDoublingCapacity() { - var initialColCountCapacity = SepReader.ColEndsInitialLength; - var text = "A" + Environment.NewLine + new string(';', initialColCountCapacity - 1); + var colCount = SepReader.ColEndsInitialLength; + var text = "A" + Environment.NewLine + new string(';', colCount - 1); using var reader = Sep.Reader(o => o with { DisableColCountCheck = true }).FromText(text); Assert.IsTrue(reader.MoveNext()); var row = reader.Current; - Assert.AreEqual(initialColCountCapacity, row.ColCount); - Assert.AreEqual(initialColCountCapacity * 2, reader._colEndsOrColInfos.Length); + Assert.AreEqual(colCount, row.ColCount); + Assert.AreEqual(colCount * 2, reader._colEndsOrColInfos.Length); + } + + [TestMethod] + public void SepReaderTest_ColInfosLength_ArgumentOutOfRangeException_Issue_108() + { + // At any time during parsing there may be an incomplete row e.g. a + // parsing row, when then new rows are about to be parsed e.g. in + // ParseNewRows(). The col ends/infos for that row need to be copied to + // beginning before new rows are found. At any time these col infos + // should never exceed the end of the array of col infos. However, a bug + // was present <= 0.4.3 as reported in issue #108 + // https://github.com/nietras/Sep/issues/108 where this was the case and + // an `ArgumentOutOfRangeException` would occur on the slicing that + // happens when these col infos are to be copied to beginning. This test + // triggers that issue. + var colCounts = Enumerable.Range(SepReader.ColEndsInitialLength - 1, 1); + var charsLength = (int)BitOperations.RoundUpToPowerOf2(SepReader.CharsMinimumLength); + foreach (var colCount in colCounts) + { + var text = new string('A', Math.Max(1, charsLength - colCount + 1)) + + new string(';', colCount - 1) + Environment.NewLine + + new string(';', colCount * 2); + using var reader = Sep + .Reader(o => o with { HasHeader = false, DisableColCountCheck = true }) + .FromText(text); + while (reader.MoveNext()) { } + } } #if !SEPREADERTRACE // Causes OOMs in Debug due to tracing diff --git a/src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs b/src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs index 42a35c77..3a06e27c 100644 --- a/src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs +++ b/src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs @@ -81,7 +81,9 @@ void Parse(SepReaderState s) ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); - ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); charsIndex -= VecUI8.Count; LOOPSTEP: @@ -89,7 +91,7 @@ void Parse(SepReaderState s) LOOPNOSTEP: if (charsIndex < charsEnd && // If current is greater than or equal than "stop", then there is no - // longer guaranteed space enough for next VecUI8.Count. + // longer guaranteed space enough for next VecUI8.Count + next row start. !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex); @@ -159,6 +161,7 @@ void Parse(SepReaderState s) ++s._parsedRowsCount; // Next row start (one before) colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); // Update for next row colInfosRef = ref colInfosRefCurrent; diff --git a/src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs b/src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs index e1b9b40d..a149e917 100644 --- a/src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs +++ b/src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs @@ -85,7 +85,9 @@ void Parse(SepReaderState s) ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); - ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); charsIndex -= VecUI8.Count; LOOPSTEP: @@ -93,7 +95,7 @@ void Parse(SepReaderState s) LOOPNOSTEP: if (charsIndex < charsEnd && // If current is greater than or equal than "stop", then there is no - // longer guaranteed space enough for next VecUI8.Count. + // longer guaranteed space enough for next VecUI8.Count + next row start. !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex); @@ -164,6 +166,7 @@ void Parse(SepReaderState s) ++s._parsedRowsCount; // Next row start (one before) colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); // Update for next row colInfosRef = ref colInfosRefCurrent; diff --git a/src/Sep/Internals/SepParserIndexOfAny.cs b/src/Sep/Internals/SepParserIndexOfAny.cs index bac520a8..f7e7859d 100644 --- a/src/Sep/Internals/SepParserIndexOfAny.cs +++ b/src/Sep/Internals/SepParserIndexOfAny.cs @@ -18,7 +18,7 @@ public unsafe SepParserIndexOfAny(Sep sep) _specialChars = new[] { sep.Separator, CarriageReturn, LineFeed, Quote }; } - public int PaddingLength => 0; + public int PaddingLength => 4; public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] @@ -65,11 +65,14 @@ void Parse(SepReaderState s) ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); - ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - 3); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - 3 - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); var span = chars.AsSpan(0, charsEnd); var specialCharsSpan = _specialChars.AsSpan(); - while ((uint)charsIndex < (uint)charsEnd) + while ((uint)charsIndex < (uint)charsEnd && + !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { // https://github.com/dotnet/runtime/blob/942ce9af6e4858b74cc3a1429e9a64065ffb207a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs#L1926-L2045 var relativeIndex = span.Slice(charsIndex).IndexOfAny(specialCharsSpan); @@ -96,6 +99,7 @@ void Parse(SepReaderState s) ++s._parsedRowsCount; // Next row start (one before) colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); // Update for next row colInfosRef = ref colInfosRefCurrent; @@ -107,12 +111,6 @@ void Parse(SepReaderState s) break; } } - // If current is greater than or equal than "stop", then break. - // There is no longer guaranteed space enough for next. - if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) - { - break; - } } else { diff --git a/src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs b/src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs index 0e3d261b..87272c75 100644 --- a/src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs +++ b/src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs @@ -80,7 +80,9 @@ void Parse(SepReaderState s) ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); - ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); charsIndex -= VecUI8.Count; LOOPSTEP: @@ -88,7 +90,7 @@ void Parse(SepReaderState s) LOOPNOSTEP: if (charsIndex < charsEnd && // If current is greater than or equal than "stop", then there is no - // longer guaranteed space enough for next VecUI8.Count. + // longer guaranteed space enough for next VecUI8.Count + next row start. !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex); @@ -156,6 +158,7 @@ void Parse(SepReaderState s) ++s._parsedRowsCount; // Next row start (one before) colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); // Update for next row colInfosRef = ref colInfosRefCurrent; diff --git a/src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs b/src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs index 0a0562bb..c3ecdb56 100644 --- a/src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs +++ b/src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs @@ -82,7 +82,9 @@ void Parse(SepReaderState s) ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); - ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); charsIndex -= VecUI8.Count; LOOPSTEP: @@ -90,7 +92,7 @@ void Parse(SepReaderState s) LOOPNOSTEP: if (charsIndex < charsEnd && // If current is greater than or equal than "stop", then there is no - // longer guaranteed space enough for next VecUI8.Count. + // longer guaranteed space enough for next VecUI8.Count + next row start. !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex); @@ -160,6 +162,7 @@ void Parse(SepReaderState s) ++s._parsedRowsCount; // Next row start (one before) colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); // Update for next row colInfosRef = ref colInfosRefCurrent; diff --git a/src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs b/src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs index 8e7879b9..cff62eff 100644 --- a/src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs +++ b/src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs @@ -81,7 +81,9 @@ void Parse(SepReaderState s) ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); - ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); charsIndex -= VecUI8.Count; LOOPSTEP: @@ -89,7 +91,7 @@ void Parse(SepReaderState s) LOOPNOSTEP: if (charsIndex < charsEnd && // If current is greater than or equal than "stop", then there is no - // longer guaranteed space enough for next VecUI8.Count. + // longer guaranteed space enough for next VecUI8.Count + next row start. !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex); @@ -159,6 +161,7 @@ void Parse(SepReaderState s) ++s._parsedRowsCount; // Next row start (one before) colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); // Update for next row colInfosRef = ref colInfosRefCurrent; diff --git a/src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs b/src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs index 6d525750..435c55c0 100644 --- a/src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs +++ b/src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs @@ -83,7 +83,9 @@ void Parse(SepReaderState s) ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); - ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); charsIndex -= VecUI8.Count; LOOPSTEP: @@ -91,7 +93,7 @@ void Parse(SepReaderState s) LOOPNOSTEP: if (charsIndex < charsEnd && // If current is greater than or equal than "stop", then there is no - // longer guaranteed space enough for next VecUI8.Count. + // longer guaranteed space enough for next VecUI8.Count + next row start. !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex); @@ -161,6 +163,7 @@ void Parse(SepReaderState s) ++s._parsedRowsCount; // Next row start (one before) colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); // Update for next row colInfosRef = ref colInfosRefCurrent; diff --git a/src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs b/src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs index fc75f2e6..8f6ebd64 100644 --- a/src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs +++ b/src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs @@ -81,7 +81,9 @@ void Parse(SepReaderState s) ref var colInfosRefOrigin = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex); ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex); - ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count); + ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength); + var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount; + ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength); charsIndex -= VecUI8.Count; LOOPSTEP: @@ -89,7 +91,7 @@ void Parse(SepReaderState s) LOOPNOSTEP: if (charsIndex < charsEnd && // If current is greater than or equal than "stop", then there is no - // longer guaranteed space enough for next VecUI8.Count. + // longer guaranteed space enough for next VecUI8.Count + next row start. !IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex); @@ -159,6 +161,7 @@ void Parse(SepReaderState s) ++s._parsedRowsCount; // Next row start (one before) colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); + A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd)); colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0); // Update for next row colInfosRef = ref colInfosRefCurrent; diff --git a/src/Sep/SepReader.cs b/src/Sep/SepReader.cs index 05029eac..278377c7 100644 --- a/src/Sep/SepReader.cs +++ b/src/Sep/SepReader.cs @@ -191,6 +191,8 @@ internal bool ParseNewRows() _currentRowColCount = -1; _currentRowColEndsOrInfosOffset = 0; + CheckPoint($"{nameof(ParseNewRows)} BEGINNING"); + // Move data to start if (_parsingRowCharsStartIndex > 0) { @@ -201,6 +203,8 @@ internal bool ParseNewRows() A.Assert(_charsParseStart >= offset); _charsParseStart -= offset; + A.Assert((_parsingRowColEndsOrInfosStartIndex + _parsingRowColCount + 1) * GetIntegersPerColInfo() <= _colEndsOrColInfos.Length); + // Adjust found current row col infos, note includes col count since +1 if (_colUnquoteUnescape == 0) { @@ -226,8 +230,9 @@ internal bool ParseNewRows() var intsPerColInfo = GetIntegersPerColInfo(); var colInfosSpan = _colEndsOrColInfos.AsSpan(); var length = (_parsingRowColCount + 1) * intsPerColInfo; - colInfosSpan.Slice(_parsingRowColEndsOrInfosStartIndex * intsPerColInfo, length) - .CopyTo(colInfosSpan.Slice(0, length)); + var source = colInfosSpan.Slice(_parsingRowColEndsOrInfosStartIndex * intsPerColInfo, length); + var destination = colInfosSpan.Slice(0, length); + source.CopyTo(destination); _parsingRowColEndsOrInfosStartIndex = 0; } @@ -309,30 +314,35 @@ bool EnsureInitializeAndReadData(bool endOfFile) CheckPoint($"{nameof(TryDetectSeparatorInitializeParser)} AFTER"); } - if (_parser != null && _charsParseStart < _charsDataEnd) - { - // + 1 - must be room for one more col always - if ((_parsingRowColEndsOrInfosStartIndex + _parsingRowColCount + 1) >= (GetColInfosLength() - _parser.PaddingLength)) - { - DoubleColInfosCapacityCopyState(); - } - } - else + if (_parser == null || _charsParseStart >= _charsDataEnd) { if (nothingLeftToRead) { - // + 1 - must be room for one more col always - if ((_parsingRowColEndsOrInfosStartIndex + _parsingRowColCount + 1) >= GetColInfosLength()) - { - DoubleColInfosCapacityCopyState(); - } + // Make sure room for any col at end of file + CheckColInfosCapacityMaybeDouble(paddingLength: 0); // If nothing has been read, then at end of file. endOfFile = true; } } + else + { + CheckColInfosCapacityMaybeDouble(_parser.PaddingLength); + } return endOfFile; } + void CheckColInfosCapacityMaybeDouble(int paddingLength) + { + // Potential end is current parsing end plus maximum col infos for next parse loop + var parsingRowColInfosEnd = _parsingRowColEndsOrInfosStartIndex + _parsingRowColCount; + var colInfosPotentialEnd = parsingRowColInfosEnd + paddingLength + ColEndsOrInfosExtraEndCount; + var colInfosLength = GetColInfosLength(); + if (colInfosLength < colInfosPotentialEnd) + { + DoubleColInfosCapacityCopyState(); + } + } + void DoubleColInfosCapacityCopyState() { var previousColEnds = _colEndsOrColInfos; @@ -444,7 +454,7 @@ void TryDetectSeparatorInitializeParser(bool nothingLeftToRead) } [ExcludeFromCodeCoverage] - [Conditional(TraceCondition), Conditional("SEPREADERCHECKPOINT")] + [Conditional(TraceCondition), Conditional(AssertCondition)] void CheckPoint(string name, [CallerFilePath] string filePath = "", [CallerLineNumber] int lineNumber = 0) { TraceState(name, filePath, lineNumber); @@ -488,12 +498,13 @@ void AssertState(string name, [CallerFilePath] string filePath = "", [CallerLine A.Assert(0 <= _charsDataEnd && _charsDataEnd <= _chars.Length, $"{name}", filePath, lineNumber); A.Assert(_charsDataStart <= _charsDataEnd, $"{name}", filePath, lineNumber); A.Assert(_charsDataStart <= _parsingRowCharsStartIndex && _parsingRowCharsStartIndex <= _charsDataEnd, $"{name}", filePath, lineNumber); + A.Assert((_parsingRowColEndsOrInfosStartIndex + _parsingRowColCount + 1) * GetIntegersPerColInfo() <= _colEndsOrColInfos.Length); if (_colUnquoteUnescape == 0) { var colEnds = GetColsEntireSpanAs(); A.Assert(colEnds.Length > 0, $"{name}", filePath, lineNumber); - A.Assert(0 <= _currentRowColCount && _currentRowColCount <= colEnds.Length, $"{name}", filePath, lineNumber); + A.Assert(-1 <= _currentRowColCount && _currentRowColCount <= colEnds.Length, $"{name}", filePath, lineNumber); for (var i = 0; i < _currentRowColCount; i++) { var colEnd = colEnds[i]; @@ -506,7 +517,7 @@ void AssertState(string name, [CallerFilePath] string filePath = "", [CallerLine { var colInfos = GetColsEntireSpanAs(); A.Assert(colInfos.Length > 0, $"{name}", filePath, lineNumber); - A.Assert(0 <= _currentRowColCount && _currentRowColCount <= colInfos.Length, $"{name}", filePath, lineNumber); + A.Assert(-1 <= _currentRowColCount && _currentRowColCount <= colInfos.Length, $"{name}", filePath, lineNumber); for (var i = 0; i < _currentRowColCount; i++) { var (colEnd, _) = colInfos[i]; diff --git a/src/Sep/SepReaderState.cs b/src/Sep/SepReaderState.cs index 90a4459c..b1d970c9 100644 --- a/src/Sep/SepReaderState.cs +++ b/src/Sep/SepReaderState.cs @@ -41,6 +41,8 @@ public class SepReaderState : IDisposable #else internal const int ColEndsInitialLength = 8 * 1024; #endif + // 1 for first col end and since pre-increment, 1 for next row start + internal const int ColEndsOrInfosExtraEndCount = 2; // Multiple rows of format // [0] = Previous row/col end e.g. one before row/first col start // [1..ColCount] = Col ends/infos e.g. [1] = first col end/info