Skip to content

Commit

Permalink
Fix SepParser*s buffer overrun bug when column count is equal to inte…
Browse files Browse the repository at this point in the history
…rnal capacity and new row after (#109)

Fixes #108 
Bug likely present since 0.4.0 when parsing multiple rows at a time was
introduced.
  • Loading branch information
nietras authored Mar 10, 2024
1 parent da18795 commit f8bff90
Show file tree
Hide file tree
Showing 11 changed files with 109 additions and 49 deletions.
42 changes: 35 additions & 7 deletions src/Sep.Test/SepReaderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Diagnostics.Contracts;
using System.IO;
using System.Linq;
using System.Numerics;
using System.Text;
using Microsoft.VisualStudio.TestTools.UnitTesting;

Expand Down Expand Up @@ -444,24 +445,51 @@ public void SepReaderTest_CarriageReturnLineFeedEvenOrOdd_ToEnsureLineFeedReadAf
[TestMethod]
public void SepReaderTest_ColsInitialLength()
{
var initialColCountCapacity = SepReader.ColEndsInitialLength - 1; // -1 since col ends is 1 longer due to having row start
var text = "A" + Environment.NewLine + new string(';', initialColCountCapacity - 1);
var colCount = SepReader.ColEndsInitialLength - 1; // -1 since col ends is 1 longer due to having row start
var text = "A" + Environment.NewLine + new string(';', colCount - 1);
using var reader = Sep.Reader(o => o with { DisableColCountCheck = true }).FromText(text);
Assert.IsTrue(reader.MoveNext());
var row = reader.Current;
Assert.AreEqual(initialColCountCapacity, row.ColCount);
Assert.AreEqual(colCount, row.ColCount);
}

[TestMethod]
public void SepReaderTest_ExceedingColsInitialLength_WorksByDoublingCapacity()
{
var initialColCountCapacity = SepReader.ColEndsInitialLength;
var text = "A" + Environment.NewLine + new string(';', initialColCountCapacity - 1);
var colCount = SepReader.ColEndsInitialLength;
var text = "A" + Environment.NewLine + new string(';', colCount - 1);
using var reader = Sep.Reader(o => o with { DisableColCountCheck = true }).FromText(text);
Assert.IsTrue(reader.MoveNext());
var row = reader.Current;
Assert.AreEqual(initialColCountCapacity, row.ColCount);
Assert.AreEqual(initialColCountCapacity * 2, reader._colEndsOrColInfos.Length);
Assert.AreEqual(colCount, row.ColCount);
Assert.AreEqual(colCount * 2, reader._colEndsOrColInfos.Length);
}

[TestMethod]
public void SepReaderTest_ColInfosLength_ArgumentOutOfRangeException_Issue_108()
{
// At any time during parsing there may be an incomplete row e.g. a
// parsing row, when then new rows are about to be parsed e.g. in
// ParseNewRows(). The col ends/infos for that row need to be copied to
// beginning before new rows are found. At any time these col infos
// should never exceed the end of the array of col infos. However, a bug
// was present <= 0.4.3 as reported in issue #108
// https://github.com/nietras/Sep/issues/108 where this was the case and
// an `ArgumentOutOfRangeException` would occur on the slicing that
// happens when these col infos are to be copied to beginning. This test
// triggers that issue.
var colCounts = Enumerable.Range(SepReader.ColEndsInitialLength - 1, 1);
var charsLength = (int)BitOperations.RoundUpToPowerOf2(SepReader.CharsMinimumLength);
foreach (var colCount in colCounts)
{
var text = new string('A', Math.Max(1, charsLength - colCount + 1))
+ new string(';', colCount - 1) + Environment.NewLine
+ new string(';', colCount * 2);
using var reader = Sep
.Reader(o => o with { HasHeader = false, DisableColCountCheck = true })
.FromText(text);
while (reader.MoveNext()) { }
}
}

#if !SEPREADERTRACE // Causes OOMs in Debug due to tracing
Expand Down
7 changes: 5 additions & 2 deletions src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,17 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

charsIndex -= VecUI8.Count;
LOOPSTEP:
charsIndex += VecUI8.Count;
LOOPNOSTEP:
if (charsIndex < charsEnd &&
// If current is greater than or equal than "stop", then there is no
// longer guaranteed space enough for next VecUI8.Count.
// longer guaranteed space enough for next VecUI8.Count + next row start.
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex);
Expand Down Expand Up @@ -159,6 +161,7 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
Expand Down
7 changes: 5 additions & 2 deletions src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,17 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

charsIndex -= VecUI8.Count;
LOOPSTEP:
charsIndex += VecUI8.Count;
LOOPNOSTEP:
if (charsIndex < charsEnd &&
// If current is greater than or equal than "stop", then there is no
// longer guaranteed space enough for next VecUI8.Count.
// longer guaranteed space enough for next VecUI8.Count + next row start.
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex);
Expand Down Expand Up @@ -164,6 +166,7 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
Expand Down
16 changes: 7 additions & 9 deletions src/Sep/Internals/SepParserIndexOfAny.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public unsafe SepParserIndexOfAny(Sep sep)
_specialChars = new[] { sep.Separator, CarriageReturn, LineFeed, Quote };
}

public int PaddingLength => 0;
public int PaddingLength => 4;
public int QuoteCount => (int)_quoteCount;

[SkipLocalsInit]
Expand Down Expand Up @@ -65,11 +65,14 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - 3);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - 3 - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

var span = chars.AsSpan(0, charsEnd);
var specialCharsSpan = _specialChars.AsSpan();
while ((uint)charsIndex < (uint)charsEnd)
while ((uint)charsIndex < (uint)charsEnd &&
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
// https://github.com/dotnet/runtime/blob/942ce9af6e4858b74cc3a1429e9a64065ffb207a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs#L1926-L2045
var relativeIndex = span.Slice(charsIndex).IndexOfAny(specialCharsSpan);
Expand All @@ -96,6 +99,7 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
Expand All @@ -107,12 +111,6 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
break;
}
}
// If current is greater than or equal than "stop", then break.
// There is no longer guaranteed space enough for next.
if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
break;
}
}
else
{
Expand Down
7 changes: 5 additions & 2 deletions src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,17 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

charsIndex -= VecUI8.Count;
LOOPSTEP:
charsIndex += VecUI8.Count;
LOOPNOSTEP:
if (charsIndex < charsEnd &&
// If current is greater than or equal than "stop", then there is no
// longer guaranteed space enough for next VecUI8.Count.
// longer guaranteed space enough for next VecUI8.Count + next row start.
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex);
Expand Down Expand Up @@ -156,6 +158,7 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
Expand Down
7 changes: 5 additions & 2 deletions src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,17 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

charsIndex -= VecUI8.Count;
LOOPSTEP:
charsIndex += VecUI8.Count;
LOOPNOSTEP:
if (charsIndex < charsEnd &&
// If current is greater than or equal than "stop", then there is no
// longer guaranteed space enough for next VecUI8.Count.
// longer guaranteed space enough for next VecUI8.Count + next row start.
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex);
Expand Down Expand Up @@ -160,6 +162,7 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
Expand Down
7 changes: 5 additions & 2 deletions src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,17 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

charsIndex -= VecUI8.Count;
LOOPSTEP:
charsIndex += VecUI8.Count;
LOOPNOSTEP:
if (charsIndex < charsEnd &&
// If current is greater than or equal than "stop", then there is no
// longer guaranteed space enough for next VecUI8.Count.
// longer guaranteed space enough for next VecUI8.Count + next row start.
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex);
Expand Down Expand Up @@ -159,6 +161,7 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
Expand Down
7 changes: 5 additions & 2 deletions src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,17 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

charsIndex -= VecUI8.Count;
LOOPSTEP:
charsIndex += VecUI8.Count;
LOOPNOSTEP:
if (charsIndex < charsEnd &&
// If current is greater than or equal than "stop", then there is no
// longer guaranteed space enough for next VecUI8.Count.
// longer guaranteed space enough for next VecUI8.Count + next row start.
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex);
Expand Down Expand Up @@ -161,6 +163,7 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
Expand Down
7 changes: 5 additions & 2 deletions src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,17 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
ref var colInfosRefOrigin = ref As<int, TColInfo>(ref MemoryMarshal.GetArrayDataReference(colInfos));
ref var colInfosRef = ref Add(ref colInfosRefOrigin, s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefCurrent = ref Add(ref colInfosRefOrigin, s._parsingRowColCount + s._parsingRowColEndsOrInfosStartIndex);
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosLength - VecUI8.Count);
ref var colInfosRefEnd = ref Add(ref colInfosRefOrigin, colInfosLength);
var colInfosStopLength = colInfosLength - VecUI8.Count - SepReaderState.ColEndsOrInfosExtraEndCount;
ref var colInfosRefStop = ref Add(ref colInfosRefOrigin, colInfosStopLength);

charsIndex -= VecUI8.Count;
LOOPSTEP:
charsIndex += VecUI8.Count;
LOOPNOSTEP:
if (charsIndex < charsEnd &&
// If current is greater than or equal than "stop", then there is no
// longer guaranteed space enough for next VecUI8.Count.
// longer guaranteed space enough for next VecUI8.Count + next row start.
!IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent))
{
ref var charsRef = ref Add(ref charsOriginRef, (uint)charsIndex);
Expand Down Expand Up @@ -159,6 +161,7 @@ void Parse<TColInfo, TColInfoMethods>(SepReaderState s)
++s._parsedRowsCount;
// Next row start (one before)
colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1);
A.Assert(IsAddressLessThan(ref colInfosRefCurrent, ref colInfosRefEnd));
colInfosRefCurrent = TColInfoMethods.Create(charsIndex - 1, 0);
// Update for next row
colInfosRef = ref colInfosRefCurrent;
Expand Down
Loading

0 comments on commit f8bff90

Please sign in to comment.