From 30962ef73d248d699fe687bf5fd22b462097745c Mon Sep 17 00:00:00 2001 From: nietras Date: Thu, 16 Nov 2023 12:27:40 +0100 Subject: [PATCH] Add Unescape Support (#61) * Add `SepReaderOptions.Unescape` option, set to `true` to enable automatic unescaping or columns. That is, removing outer quotes and every second inner quote. Default is `false` and hence not to unescape. * Update benchmarks to incl. performance when `Unescape = true` as separate `Sep_Unescape` method. * Internally parsers have been made generic to support unescaping without any performance impact if not used. * Fixes #19 --- README.md | 259 +++++++++++------- .../AMD.Ryzen.9.5950X/FloatsReaderBench.md | 54 ++-- .../AMD.Ryzen.9.5950X/PackageAssetsBench.md | 68 ++--- .../PackageAssetsBenchQuotes.md | 68 ++--- benchmarks/AMD.Ryzen.9.5950X/Versions.txt | 2 +- global.json | 2 +- src/Sep.Benchmarks/Program.cs | 4 +- .../SepParseSeparatorsMaskBench.cs | 7 - src/Sep.Benchmarks/SepParserBench.cs | 6 +- .../PackageAssetsBench.cs | 44 +++ src/Sep.ComparisonBenchmarks/Program.cs | 8 + .../UnescapeCompare.cs | 149 ++++++++++ src/Sep.Test/PackageAssetsTest.cs | 49 ++-- src/Sep.Test/ReadMeTest.cs | 3 + src/Sep.Test/SepColInfoTest.cs | 16 ++ .../SepParseMaskTest_ParseSeparatorsMask.cs | 1 - src/Sep.Test/SepParserTest.cs | 43 ++- src/Sep.Test/SepReaderColTest.cs | 43 +++ src/Sep.Test/SepReaderFuzzTest.cs | 197 +++++++++++++ src/Sep.Test/SepReaderNoHeaderTest.cs | 2 +- src/Sep.Test/SepReaderRowTest.cs | 31 ++- src/Sep.Test/SepReaderTest.cs | 4 +- src/Sep.Test/SepUnescapeTest.cs | 47 ++++ src/Sep/Internals/ISepParser.cs | 4 +- src/Sep/Internals/SepArrayExtensions.cs | 12 +- src/Sep/Internals/SepColInfo.cs | 24 ++ src/Sep/Internals/SepParseMask.cs | 168 ++++++------ .../SepParserAvx2PackCmpOrMoveMaskTzcnt.cs | 62 +++-- .../SepParserAvx512PackCmpOrMoveMaskTzcnt.cs | 64 +++-- src/Sep/Internals/SepParserIndexOfAny.cs | 55 ++-- .../SepParserSse2PackCmpOrMoveMaskTzcnt.cs | 62 +++-- .../SepParserVector128NrwCmpExtMsbTzcnt.cs | 62 +++-- .../SepParserVector256NrwCmpExtMsbTzcnt.cs | 62 +++-- .../SepParserVector512NrwCmpExtMsbTzcnt.cs | 62 +++-- .../SepParserVector64NrwCmpExtMsbTzcnt.cs | 62 +++-- src/Sep/Internals/SepUnescape.cs | 95 +++++++ src/Sep/SepReader.cs | 125 ++++++--- src/Sep/SepReaderOptions.cs | 25 +- src/Sep/SepReaderState.cs | 115 ++++++-- 39 files changed, 1608 insertions(+), 558 deletions(-) create mode 100644 src/Sep.ComparisonBenchmarks/UnescapeCompare.cs create mode 100644 src/Sep.Test/SepColInfoTest.cs create mode 100644 src/Sep.Test/SepReaderFuzzTest.cs create mode 100644 src/Sep.Test/SepUnescapeTest.cs create mode 100644 src/Sep/Internals/SepColInfo.cs create mode 100644 src/Sep/Internals/SepUnescape.cs diff --git a/README.md b/README.md index af7f1bc9..20e8c9b1 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,9 @@ and similar from [.NET 7+ and C# 11+](https://nietras.com/2022/11/26/dotnet-and-csharp-versions/) for a modern and highly efficient implementation. * **🔎 Minimal** - a succinct yet expressive API with few options and no hidden -changes to input or output. What you read/write is what you get. This means -there is no "automatic" escaping/unescaping of quotes, for example. +changes to input or output. What you read/write is what you get. E.g. by default +there is no "automatic" escaping/unescaping of quotes. For automatic unescaping +of quotes see [SepReaderOptions](#sepreaderoptions) and [Unescaping](#unescaping). * **🚀 Fast** - blazing fast with both architecture specific and cross-platform SIMD vectorized parsing incl. 64/128/256/512-bit paths e.g. AVX2, AVX-512 (.NET 8.0+), NEON. Uses [csFastFloat](https://github.com/CarlVerret/csFastFloat) for @@ -39,6 +40,8 @@ with [detailed benchmarks](#comparison-benchmarks) to prove it. * **🗑️ Zero allocation** - intelligent and efficient memory management allowing for zero allocations after warmup incl. supporting use cases of reading or writing arrays of values (e.g. features) easily without repeated allocations. +* **✅ Thoroughly tested** with great code coverage and focus on testing + edge cases incl. randomized [fuzz testing](https://en.wikipedia.org/wiki/Fuzzing). * **🌐 Cross-platform** - works on any platform, any architecture supported by .NET. 100% managed and written in beautiful modern C#. * **✂️ Trimmable and AOT/NativeAOT compatible** - no problematic reflection or @@ -260,32 +263,89 @@ The following options are available: /// Specifies the separator used, if `null` then automatic detection /// is used based on first row in source. /// -public Sep? Sep { get; init; } +public Sep? Sep { get; init; } = null; /// /// Specifies the culture used for parsing. /// May be `null` for default culture. /// -public CultureInfo? CultureInfo { get; init; } +public CultureInfo? CultureInfo { get; init; } = SepDefaults.CultureInfo; /// /// Indicates whether the first row is a header row. /// -public bool HasHeader { get; init; } +public bool HasHeader { get; init; } = true; /// /// Specifies the method factory used to convert a column span /// of `char`s to a `string`. /// -public SepCreateToString CreateToString { get; init; } +public SepCreateToString CreateToString { get; init; } = SepToString.Direct; /// /// Disables using [csFastFloat](https://github.com/CarlVerret/csFastFloat) /// for parsing `float` and `double`. /// -public bool DisableFastFloat { get; init; } +public bool DisableFastFloat { get; init; } = false; /// /// Disables checking if column count is the same for all rows. /// -public bool DisableColCountCheck { get; init; } +public bool DisableColCountCheck { get; init; } = false; +/// +/// Unescape quotes on column access. +/// +/// +/// When true, if a column starts with a quote then the two outermost quotes +/// are removed and every second inner quote is removed. Note that +/// unquote/unescape happens in-place, which means the will be modified and contain "garbage" +/// state after unescaped cols before next col. This is for efficiency to +/// avoid allocating secondary memory for unescaped columns. Header +/// columns/names will also be unescaped. +/// +public bool Unescape { get; init; } = false; ``` +#### Unescaping +While great care has been taken to ensure Sep unescaping of quotes is both +correct and fast, there is always the question of how does one respond to +invalid input. + +The below table tries to summarize the behavior of Sep vs CsvHelper and Sylvan. +Note that all do the same for valid input. There are differences for how invalid +input is handled. For Sep the design choice has been based on not wanting to +throw exceptions and to use a principle that is both reasonably fast and simple. + +| Input | Valid | CsvHelper | CsvHelper¹ | Sylvan | Sep² | +|-|-|-|-|-|-| +| `a` | True | `a` | `a` | `a` | `a` | +| `""` | True | | | | | +| `""""` | True | `"` | `"` | `"` | `"` | +| `""""""` | True | `""` | `""` | `""` | `""` | +| `"a"` | True | `a` | `a` | `a` | `a` | +| `"a""a"` | True | `a"a` | `a"a` | `a"a` | `a"a` | +| `"a""a""a"` | True | `a"a"a` | `a"a"a` | `a"a"a` | `a"a"a` | +| `a""a` | False | EXCEPTION | `a""a` | `a""a` | `a""a` | +| `a"a"a` | False | EXCEPTION | `a"a"a` | `a"a"a` | `a"a"a` | +| `·""·` | False | EXCEPTION | `·""·` | `·""·` | `·""·` | +| `·"a"·` | False | EXCEPTION | `·"a"·` | `·"a"·` | `·"a"·` | +| `·""` | False | EXCEPTION | `·""` | `·""` | `·""` | +| `·"a"` | False | EXCEPTION | `·"a"` | `·"a"` | `·"a"` | +| `a"""a` | False | EXCEPTION | `a"""a` | `a"""a` | `a"""a` | +| `"a"a"a"` | False | EXCEPTION | `aa"a"` | `a"a"a` | `aa"a` | +| `""·` | False | EXCEPTION | `·` | `"` | `·` | +| `"a"·` | False | EXCEPTION | `a·` | `a"` | `a·` | +| `"a"""a` | False | EXCEPTION | `aa` | EXCEPTION | `a"a` | +| `"a"""a"` | False | EXCEPTION | `aa"` | `a"a` | `a"a"` | +| `""a"` | False | EXCEPTION | `a"` | `"a` | `a"` | +| `"a"a"` | False | EXCEPTION | `aa"` | `a"a` | `aa"` | +| `""a"a""` | False | EXCEPTION | `a"a""` | `"a"a"` | `a"a"` | +| `"""` | False | | | EXCEPTION | `"` | +| `"""""` | False | `"` | `"` | EXCEPTION | `""` | + +`·` (middle dot) is whitespace to make this visible + +¹ CsvHelper with `BadDataFound = null` + +² Sep with `Unescape = true` in `SepReaderOptions` + + #### SepReader Debuggability Debuggability is an important part of any library and while this is still a work in progress for Sep, `SepReader` does have a unique feature when looking at it @@ -771,36 +831,42 @@ optimized hashing of `ReadOnlySpan`, and thus not really due the the csv-parsing itself, since that is not a big part of the time consumed. At least not for a decently fast csv-parser. -###### AMD.Ryzen.9.5950X - PackageAssets Benchmark Results (Sep 0.2.8.0, Sylvan 1.3.3.0, CsvHelper 30.0.1.0) - -| Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | -|---------- |--------- |------ |------ |-----------:|------:|---:|--------:|-------:|-------------:|------------:| -| Sep______ | .NET 7.0 | Row | 50000 | 2.481 ms | 1.00 | 29 | 11761.3 | 49.6 | 1.13 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Row | 50000 | 3.117 ms | 1.26 | 29 | 9360.8 | 62.3 | 7.17 KB | 6.34 | -| ReadLine_ | .NET 7.0 | Row | 50000 | 13.023 ms | 5.20 | 29 | 2240.8 | 260.5 | 88608.25 KB | 78,287.19 | -| CsvHelper | .NET 7.0 | Row | 50000 | 51.579 ms | 20.76 | 29 | 565.8 | 1031.6 | 20.65 KB | 18.25 | -| Sep______ | .NET 8.0 | Row | 50000 | 2.436 ms | 0.98 | 29 | 11978.3 | 48.7 | 1.13 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Row | 50000 | 2.929 ms | 1.18 | 29 | 9962.0 | 58.6 | 7.17 KB | 6.33 | -| ReadLine_ | .NET 8.0 | Row | 50000 | 11.788 ms | 4.76 | 29 | 2475.5 | 235.8 | 88608.24 KB | 78,287.18 | -| CsvHelper | .NET 8.0 | Row | 50000 | 42.562 ms | 17.15 | 29 | 685.6 | 851.2 | 20.59 KB | 18.19 | -| | | | | | | | | | | | -| Sep______ | .NET 7.0 | Cols | 50000 | 3.166 ms | 1.00 | 29 | 9218.1 | 63.3 | 1.13 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Cols | 50000 | 5.460 ms | 1.72 | 29 | 5344.6 | 109.2 | 7.18 KB | 6.33 | -| ReadLine_ | .NET 7.0 | Cols | 50000 | 13.603 ms | 4.26 | 29 | 2145.1 | 272.1 | 88608.25 KB | 78,152.32 | -| CsvHelper | .NET 7.0 | Cols | 50000 | 83.833 ms | 26.47 | 29 | 348.1 | 1676.7 | 446.31 KB | 393.65 | -| Sep______ | .NET 8.0 | Cols | 50000 | 3.142 ms | 0.99 | 29 | 9288.8 | 62.8 | 1.13 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Cols | 50000 | 5.181 ms | 1.64 | 29 | 5632.6 | 103.6 | 7.18 KB | 6.33 | -| ReadLine_ | .NET 8.0 | Cols | 50000 | 12.208 ms | 3.85 | 29 | 2390.4 | 244.2 | 88608.24 KB | 78,152.32 | -| CsvHelper | .NET 8.0 | Cols | 50000 | 70.302 ms | 22.22 | 29 | 415.1 | 1406.0 | 446.35 KB | 393.68 | -| | | | | | | | | | | | -| Sep______ | .NET 7.0 | Asset | 50000 | 38.120 ms | 1.00 | 29 | 765.5 | 762.4 | 13800.21 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Asset | 50000 | 44.675 ms | 1.17 | 29 | 653.2 | 893.5 | 14025 KB | 1.02 | -| ReadLine_ | .NET 7.0 | Asset | 50000 | 113.648 ms | 2.98 | 29 | 256.8 | 2273.0 | 102133.41 KB | 7.40 | -| CsvHelper | .NET 7.0 | Asset | 50000 | 105.184 ms | 2.77 | 29 | 277.4 | 2103.7 | 13971.28 KB | 1.01 | -| Sep______ | .NET 8.0 | Asset | 50000 | 30.393 ms | 0.80 | 29 | 960.1 | 607.9 | 13799.66 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Asset | 50000 | 38.855 ms | 1.02 | 29 | 751.0 | 777.1 | 14025.03 KB | 1.02 | -| ReadLine_ | .NET 8.0 | Asset | 50000 | 121.473 ms | 3.19 | 29 | 240.2 | 2429.5 | 102133.36 KB | 7.40 | -| CsvHelper | .NET 8.0 | Asset | 50000 | 93.300 ms | 2.45 | 29 | 312.8 | 1866.0 | 13972.05 KB | 1.01 | +###### AMD.Ryzen.9.5950X - PackageAssets Benchmark Results (Sep 0.3.0.0, Sylvan 1.3.5.0, CsvHelper 30.0.1.0) + +| Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | +|------------ |--------- |------ |------ |-----------:|------:|---:|--------:|-------:|------------:|------------:| +| Sep______ | .NET 7.0 | Row | 50000 | 2.537 ms | 1.00 | 29 | 11503.7 | 50.7 | 935 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Row | 50000 | 2.569 ms | 1.01 | 29 | 11360.9 | 51.4 | 935 B | 1.00 | +| Sylvan___ | .NET 7.0 | Row | 50000 | 3.197 ms | 1.26 | 29 | 9128.8 | 63.9 | 7383 B | 7.90 | +| ReadLine_ | .NET 7.0 | Row | 50000 | 13.549 ms | 5.27 | 29 | 2153.8 | 271.0 | 90734847 B | 97,042.62 | +| CsvHelper | .NET 7.0 | Row | 50000 | 61.317 ms | 24.28 | 29 | 475.9 | 1226.3 | 21150 B | 22.62 | +| Sep______ | .NET 8.0 | Row | 50000 | 2.446 ms | 0.96 | 29 | 11931.8 | 48.9 | 934 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Row | 50000 | 2.521 ms | 0.99 | 29 | 11576.6 | 50.4 | 934 B | 1.00 | +| Sylvan___ | .NET 8.0 | Row | 50000 | 2.965 ms | 1.17 | 29 | 9842.7 | 59.3 | 7382 B | 7.90 | +| ReadLine_ | .NET 8.0 | Row | 50000 | 12.622 ms | 5.00 | 29 | 2311.9 | 252.4 | 90734841 B | 97,042.61 | +| CsvHelper | .NET 8.0 | Row | 50000 | 43.642 ms | 17.19 | 29 | 668.6 | 872.8 | 21081 B | 22.55 | +| | | | | | | | | | | | +| Sep______ | .NET 7.0 | Cols | 50000 | 3.705 ms | 1.00 | 29 | 7875.9 | 74.1 | 938 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Cols | 50000 | 4.391 ms | 1.19 | 29 | 6645.3 | 87.8 | 941 B | 1.00 | +| Sylvan___ | .NET 7.0 | Cols | 50000 | 5.598 ms | 1.51 | 29 | 5213.0 | 112.0 | 7389 B | 7.88 | +| ReadLine_ | .NET 7.0 | Cols | 50000 | 13.541 ms | 3.61 | 29 | 2155.1 | 270.8 | 90734847 B | 96,732.25 | +| CsvHelper | .NET 7.0 | Cols | 50000 | 77.553 ms | 20.93 | 29 | 376.3 | 1551.1 | 457022 B | 487.23 | +| Sep______ | .NET 8.0 | Cols | 50000 | 3.628 ms | 0.98 | 29 | 8043.9 | 72.6 | 937 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Cols | 50000 | 3.984 ms | 1.08 | 29 | 7325.4 | 79.7 | 938 B | 1.00 | +| Sylvan___ | .NET 8.0 | Cols | 50000 | 5.157 ms | 1.39 | 29 | 5658.3 | 103.1 | 7386 B | 7.87 | +| ReadLine_ | .NET 8.0 | Cols | 50000 | 13.149 ms | 3.54 | 29 | 2219.3 | 263.0 | 90734841 B | 96,732.24 | +| CsvHelper | .NET 8.0 | Cols | 50000 | 70.761 ms | 19.10 | 29 | 412.4 | 1415.2 | 457060 B | 487.27 | +| | | | | | | | | | | | +| Sep______ | .NET 7.0 | Asset | 50000 | 34.415 ms | 1.00 | 29 | 847.9 | 688.3 | 14130898 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Asset | 50000 | 34.273 ms | 1.00 | 29 | 851.4 | 685.5 | 14130898 B | 1.00 | +| Sylvan___ | .NET 7.0 | Asset | 50000 | 42.214 ms | 1.22 | 29 | 691.3 | 844.3 | 14296698 B | 1.01 | +| ReadLine_ | .NET 7.0 | Asset | 50000 | 113.604 ms | 3.29 | 29 | 256.9 | 2272.1 | 104584612 B | 7.40 | +| CsvHelper | .NET 7.0 | Asset | 50000 | 103.655 ms | 3.02 | 29 | 281.5 | 2073.1 | 14307286 B | 1.01 | +| Sep______ | .NET 8.0 | Asset | 50000 | 30.453 ms | 0.88 | 29 | 958.3 | 609.1 | 14130846 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Asset | 50000 | 30.480 ms | 0.89 | 29 | 957.4 | 609.6 | 14130886 B | 1.00 | +| Sylvan___ | .NET 8.0 | Asset | 50000 | 38.244 ms | 1.11 | 29 | 763.0 | 764.9 | 14296692 B | 1.01 | +| ReadLine_ | .NET 8.0 | Asset | 50000 | 105.568 ms | 2.96 | 29 | 276.4 | 2111.4 | 104584668 B | 7.40 | +| CsvHelper | .NET 8.0 | Asset | 50000 | 85.633 ms | 2.49 | 29 | 340.8 | 1712.7 | 14306936 B | 1.01 | ###### Intel.Xeon.Silver.4316.2.30GHz - PackageAssets Benchmark Results (Sep 0.2.3, Sylvan 1.3.2.0, CsvHelper 30.0.1.0) @@ -878,36 +944,42 @@ looking at the numbers. For each row of 25 columns, there are 24 separators Adding quotes around each of the 25 columns will add 50 characters or almost triple the total to 76. -###### AMD.Ryzen.9.5950X - PackageAssets with Quotes Benchmark Results (Sep 0.2.8.0, Sylvan 1.3.3.0, CsvHelper 30.0.1.0) - -| Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | -|---------- |--------- |------ |------ |-----------:|------:|---:|-------:|-------:|-------------:|------------:| -| Sep______ | .NET 7.0 | Row | 50000 | 7.243 ms | 1.00 | 33 | 4608.3 | 144.9 | 1.14 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Row | 50000 | 21.395 ms | 2.96 | 33 | 1560.0 | 427.9 | 7.33 KB | 6.41 | -| ReadLine_ | .NET 7.0 | Row | 50000 | 16.510 ms | 2.27 | 33 | 2021.6 | 330.2 | 108778.76 KB | 95,042.19 | -| CsvHelper | .NET 7.0 | Row | 50000 | 67.351 ms | 9.26 | 33 | 495.6 | 1347.0 | 20.65 KB | 18.05 | -| Sep______ | .NET 8.0 | Row | 50000 | 6.567 ms | 0.90 | 33 | 5082.8 | 131.3 | 1.14 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Row | 50000 | 17.739 ms | 2.43 | 33 | 1881.6 | 354.8 | 7.2 KB | 6.29 | -| ReadLine_ | .NET 8.0 | Row | 50000 | 14.818 ms | 2.06 | 33 | 2252.6 | 296.4 | 108778.75 KB | 95,042.18 | -| CsvHelper | .NET 8.0 | Row | 50000 | 52.127 ms | 7.16 | 33 | 640.3 | 1042.5 | 20.6 KB | 18.00 | -| | | | | | | | | | | | -| Sep______ | .NET 7.0 | Cols | 50000 | 7.380 ms | 1.00 | 33 | 4522.8 | 147.6 | 1.15 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Cols | 50000 | 23.992 ms | 3.25 | 33 | 1391.2 | 479.8 | 7.22 KB | 6.29 | -| ReadLine_ | .NET 7.0 | Cols | 50000 | 16.336 ms | 2.19 | 33 | 2043.2 | 326.7 | 108778.75 KB | 94,799.53 | -| CsvHelper | .NET 7.0 | Cols | 50000 | 88.119 ms | 11.94 | 33 | 378.8 | 1762.4 | 446.31 KB | 388.95 | -| Sep______ | .NET 8.0 | Cols | 50000 | 7.068 ms | 0.96 | 33 | 4722.2 | 141.4 | 1.14 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Cols | 50000 | 19.952 ms | 2.70 | 33 | 1672.9 | 399.0 | 7.21 KB | 6.29 | -| ReadLine_ | .NET 8.0 | Cols | 50000 | 14.506 ms | 1.96 | 33 | 2301.0 | 290.1 | 108778.75 KB | 94,799.52 | -| CsvHelper | .NET 8.0 | Cols | 50000 | 82.632 ms | 11.20 | 33 | 403.9 | 1652.6 | 446.35 KB | 388.99 | -| | | | | | | | | | | | -| Sep______ | .NET 7.0 | Asset | 50000 | 38.018 ms | 1.00 | 33 | 877.9 | 760.4 | 13808.03 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Asset | 50000 | 60.345 ms | 1.65 | 33 | 553.1 | 1206.9 | 14026.44 KB | 1.02 | -| ReadLine_ | .NET 7.0 | Asset | 50000 | 126.464 ms | 3.20 | 33 | 263.9 | 2529.3 | 122303.92 KB | 8.86 | -| CsvHelper | .NET 7.0 | Asset | 50000 | 111.102 ms | 2.92 | 33 | 300.4 | 2222.0 | 13970.78 KB | 1.01 | -| Sep______ | .NET 8.0 | Asset | 50000 | 34.347 ms | 0.91 | 33 | 971.8 | 686.9 | 13808.08 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Asset | 50000 | 52.851 ms | 1.38 | 33 | 631.5 | 1057.0 | 14026.01 KB | 1.02 | -| ReadLine_ | .NET 8.0 | Asset | 50000 | 122.265 ms | 3.19 | 33 | 273.0 | 2445.3 | 122303.85 KB | 8.86 | -| CsvHelper | .NET 8.0 | Asset | 50000 | 96.526 ms | 2.54 | 33 | 345.8 | 1930.5 | 13971.86 KB | 1.01 | +###### AMD.Ryzen.9.5950X - PackageAssets with Quotes Benchmark Results (Sep 0.3.0.0, Sylvan 1.3.5.0, CsvHelper 30.0.1.0) + +| Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | +|------------ |--------- |------ |------ |-----------:|------:|---:|-------:|-------:|------------:|------------:| +| Sep______ | .NET 7.0 | Row | 50000 | 6.926 ms | 1.00 | 33 | 4819.0 | 138.5 | 948 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Row | 50000 | 7.226 ms | 1.04 | 33 | 4618.8 | 144.5 | 948 B | 1.00 | +| Sylvan___ | .NET 7.0 | Row | 50000 | 20.511 ms | 2.97 | 33 | 1627.3 | 410.2 | 7426 B | 7.83 | +| ReadLine_ | .NET 7.0 | Row | 50000 | 15.893 ms | 2.26 | 33 | 2100.1 | 317.9 | 111389450 B | 117,499.42 | +| CsvHelper | .NET 7.0 | Row | 50000 | 61.713 ms | 8.91 | 33 | 540.8 | 1234.3 | 24234 B | 25.56 | +| Sep______ | .NET 8.0 | Row | 50000 | 6.838 ms | 0.99 | 33 | 4881.5 | 136.8 | 945 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Row | 50000 | 6.963 ms | 1.01 | 33 | 4793.3 | 139.3 | 946 B | 1.00 | +| Sylvan___ | .NET 8.0 | Row | 50000 | 18.535 ms | 2.60 | 33 | 1800.8 | 370.7 | 7411 B | 7.82 | +| ReadLine_ | .NET 8.0 | Row | 50000 | 16.519 ms | 2.38 | 33 | 2020.5 | 330.4 | 111389436 B | 117,499.41 | +| CsvHelper | .NET 8.0 | Row | 50000 | 53.122 ms | 7.67 | 33 | 628.3 | 1062.4 | 21091 B | 22.25 | +| | | | | | | | | | | | +| Sep______ | .NET 7.0 | Cols | 50000 | 8.502 ms | 1.00 | 33 | 3926.0 | 170.0 | 951 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Cols | 50000 | 8.784 ms | 1.03 | 33 | 3799.6 | 175.7 | 953 B | 1.00 | +| Sylvan___ | .NET 7.0 | Cols | 50000 | 23.206 ms | 2.73 | 33 | 1438.3 | 464.1 | 7430 B | 7.81 | +| ReadLine_ | .NET 7.0 | Cols | 50000 | 16.651 ms | 1.94 | 33 | 2004.6 | 333.0 | 111389446 B | 117,128.75 | +| CsvHelper | .NET 7.0 | Cols | 50000 | 98.855 ms | 11.63 | 33 | 337.6 | 1977.1 | 457022 B | 480.57 | +| Sep______ | .NET 8.0 | Cols | 50000 | 7.616 ms | 0.90 | 33 | 4382.5 | 152.3 | 947 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Cols | 50000 | 8.560 ms | 1.01 | 33 | 3899.3 | 171.2 | 950 B | 1.00 | +| Sylvan___ | .NET 8.0 | Cols | 50000 | 20.405 ms | 2.40 | 33 | 1635.8 | 408.1 | 7419 B | 7.80 | +| ReadLine_ | .NET 8.0 | Cols | 50000 | 15.310 ms | 1.80 | 33 | 2180.1 | 306.2 | 111389441 B | 117,128.75 | +| CsvHelper | .NET 8.0 | Cols | 50000 | 85.567 ms | 10.05 | 33 | 390.1 | 1711.3 | 457060 B | 480.61 | +| | | | | | | | | | | | +| Sep______ | .NET 7.0 | Asset | 50000 | 41.325 ms | 1.00 | 33 | 807.7 | 826.5 | 14139450 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Asset | 50000 | 38.076 ms | 0.93 | 33 | 876.6 | 761.5 | 14130898 B | 1.00 | +| Sylvan___ | .NET 7.0 | Asset | 50000 | 61.446 ms | 1.49 | 33 | 543.2 | 1228.9 | 14298344 B | 1.01 | +| ReadLine_ | .NET 7.0 | Asset | 50000 | 125.874 ms | 3.05 | 33 | 265.2 | 2517.5 | 125239164 B | 8.86 | +| CsvHelper | .NET 7.0 | Asset | 50000 | 115.031 ms | 2.77 | 33 | 290.2 | 2300.6 | 14307054 B | 1.01 | +| Sep______ | .NET 8.0 | Asset | 50000 | 37.937 ms | 0.92 | 33 | 879.8 | 758.7 | 14139438 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Asset | 50000 | 34.323 ms | 0.83 | 33 | 972.5 | 686.5 | 14130926 B | 1.00 | +| Sylvan___ | .NET 8.0 | Asset | 50000 | 53.940 ms | 1.30 | 33 | 618.8 | 1078.8 | 14296672 B | 1.01 | +| ReadLine_ | .NET 8.0 | Asset | 50000 | 118.606 ms | 2.88 | 33 | 281.4 | 2372.1 | 125239072 B | 8.86 | +| CsvHelper | .NET 8.0 | Asset | 50000 | 99.200 ms | 2.38 | 33 | 336.5 | 1984.0 | 14306154 B | 1.01 | ###### Intel.Xeon.Silver.4316.2.30GHz - PackageAssets with Quotes Benchmark Results (Sep 0.2.3, Sylvan 1.3.2.0, CsvHelper 30.0.1.0) @@ -1036,36 +1108,36 @@ naive `ReadLine` approach. With Sep being **>3.8x faster than CsvHelper**. It is a testament to how good the .NET and the .NET GC is that the ReadLine is pretty good compared to CsvHelper regardless of allocating a lot of strings. -##### AMD.Ryzen.9.5950X - FloatsReader Benchmark Results (Sep 0.2.8.0, Sylvan 1.3.3.0, CsvHelper 30.0.1.0) +##### AMD.Ryzen.9.5950X - FloatsReader Benchmark Results (Sep 0.3.0.0, Sylvan 1.3.5.0, CsvHelper 30.0.1.0) | Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | |---------- |--------- |------- |------ |-----------:|------:|---:|--------:|-------:|------------:|------------:| -| Sep______ | .NET 7.0 | Row | 25000 | 2.642 ms | 1.00 | 27 | 10318.8 | 105.7 | 1.56 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Row | 25000 | 3.025 ms | 1.14 | 27 | 9013.6 | 121.0 | 10.55 KB | 6.78 | -| ReadLine_ | .NET 7.0 | Row | 25000 | 14.068 ms | 5.31 | 27 | 1938.0 | 562.7 | 89986.82 KB | 57,808.35 | -| CsvHelper | .NET 7.0 | Row | 25000 | 47.926 ms | 18.16 | 27 | 568.9 | 1917.1 | 20.74 KB | 13.32 | -| Sep______ | .NET 8.0 | Row | 25000 | 2.571 ms | 0.97 | 27 | 10604.7 | 102.8 | 1.56 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Row | 25000 | 2.925 ms | 1.11 | 27 | 9320.4 | 117.0 | 10.55 KB | 6.78 | -| ReadLine_ | .NET 8.0 | Row | 25000 | 13.122 ms | 5.01 | 27 | 2077.7 | 524.9 | 89986.83 KB | 57,808.35 | -| CsvHelper | .NET 8.0 | Row | 25000 | 33.886 ms | 12.83 | 27 | 804.6 | 1355.4 | 20.61 KB | 13.24 | +| Sep______ | .NET 7.0 | Row | 25000 | 2.649 ms | 1.00 | 27 | 10291.1 | 106.0 | 1.2 KB | 1.00 | +| Sylvan___ | .NET 7.0 | Row | 25000 | 3.073 ms | 1.16 | 27 | 8873.0 | 122.9 | 10.59 KB | 8.84 | +| ReadLine_ | .NET 7.0 | Row | 25000 | 13.823 ms | 5.09 | 27 | 1972.3 | 552.9 | 89986.84 KB | 75,160.30 | +| CsvHelper | .NET 7.0 | Row | 25000 | 40.128 ms | 15.14 | 27 | 679.4 | 1605.1 | 20.74 KB | 17.32 | +| Sep______ | .NET 8.0 | Row | 25000 | 2.579 ms | 0.98 | 27 | 10571.6 | 103.2 | 1.2 KB | 1.00 | +| Sylvan___ | .NET 8.0 | Row | 25000 | 3.059 ms | 1.15 | 27 | 8911.7 | 122.4 | 10.59 KB | 8.84 | +| ReadLine_ | .NET 8.0 | Row | 25000 | 13.625 ms | 5.13 | 27 | 2000.9 | 545.0 | 89986.83 KB | 75,160.29 | +| CsvHelper | .NET 8.0 | Row | 25000 | 34.168 ms | 12.90 | 27 | 797.9 | 1366.7 | 20.61 KB | 17.22 | | | | | | | | | | | | | -| Sep______ | .NET 7.0 | Cols | 25000 | 3.077 ms | 1.00 | 27 | 8860.0 | 123.1 | 1.56 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Cols | 25000 | 4.852 ms | 1.58 | 27 | 5618.9 | 194.1 | 10.55 KB | 6.77 | -| ReadLine_ | .NET 7.0 | Cols | 25000 | 14.240 ms | 4.58 | 27 | 1914.5 | 569.6 | 89986.84 KB | 57,735.92 | -| CsvHelper | .NET 7.0 | Cols | 25000 | 42.056 ms | 13.67 | 27 | 648.3 | 1682.2 | 28451.27 KB | 18,254.45 | -| Sep______ | .NET 8.0 | Cols | 25000 | 3.095 ms | 1.00 | 27 | 8809.6 | 123.8 | 1.56 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Cols | 25000 | 4.609 ms | 1.50 | 27 | 5915.6 | 184.3 | 10.55 KB | 6.77 | -| ReadLine_ | .NET 8.0 | Cols | 25000 | 13.020 ms | 4.22 | 27 | 2093.9 | 520.8 | 89986.83 KB | 57,735.91 | -| CsvHelper | .NET 8.0 | Cols | 25000 | 36.009 ms | 11.70 | 27 | 757.1 | 1440.4 | 28451.15 KB | 18,254.37 | +| Sep______ | .NET 7.0 | Cols | 25000 | 3.574 ms | 1.00 | 27 | 7628.8 | 142.9 | 1.2 KB | 1.00 | +| Sylvan___ | .NET 7.0 | Cols | 25000 | 4.968 ms | 1.39 | 27 | 5488.1 | 198.7 | 10.59 KB | 8.83 | +| ReadLine_ | .NET 7.0 | Cols | 25000 | 14.102 ms | 3.90 | 27 | 1933.3 | 564.1 | 89986.84 KB | 75,037.89 | +| CsvHelper | .NET 7.0 | Cols | 25000 | 42.826 ms | 11.98 | 27 | 636.6 | 1713.0 | 28451.27 KB | 23,724.84 | +| Sep______ | .NET 8.0 | Cols | 25000 | 3.294 ms | 0.92 | 27 | 8275.5 | 131.8 | 1.2 KB | 1.00 | +| Sylvan___ | .NET 8.0 | Cols | 25000 | 4.717 ms | 1.32 | 27 | 5779.3 | 188.7 | 10.59 KB | 8.83 | +| ReadLine_ | .NET 8.0 | Cols | 25000 | 13.991 ms | 3.92 | 27 | 1948.7 | 559.6 | 89986.83 KB | 75,037.88 | +| CsvHelper | .NET 8.0 | Cols | 25000 | 39.277 ms | 10.94 | 27 | 694.1 | 1571.1 | 28451.15 KB | 23,724.74 | | | | | | | | | | | | | -| Sep______ | .NET 7.0 | Floats | 25000 | 32.440 ms | 1.00 | 27 | 840.4 | 1297.6 | 8.89 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Floats | 25000 | 68.701 ms | 2.12 | 27 | 396.8 | 2748.1 | 18.86 KB | 2.12 | -| ReadLine_ | .NET 7.0 | Floats | 25000 | 79.471 ms | 2.45 | 27 | 343.1 | 3178.8 | 89993.42 KB | 10,122.28 | -| CsvHelper | .NET 7.0 | Floats | 25000 | 133.372 ms | 4.13 | 27 | 204.4 | 5334.9 | 22039.48 KB | 2,478.96 | -| Sep______ | .NET 8.0 | Floats | 25000 | 21.978 ms | 0.68 | 27 | 1240.5 | 879.1 | 9.11 KB | 1.02 | -| Sylvan___ | .NET 8.0 | Floats | 25000 | 65.359 ms | 2.01 | 27 | 417.1 | 2614.4 | 18.84 KB | 2.12 | -| ReadLine_ | .NET 8.0 | Floats | 25000 | 72.653 ms | 2.24 | 27 | 375.3 | 2906.1 | 89990.3 KB | 10,121.93 | -| CsvHelper | .NET 8.0 | Floats | 25000 | 110.129 ms | 3.39 | 27 | 247.6 | 4405.2 | 22036.58 KB | 2,478.63 | +| Sep______ | .NET 7.0 | Floats | 25000 | 33.288 ms | 1.00 | 27 | 819.0 | 1331.5 | 8.18 KB | 1.00 | +| Sylvan___ | .NET 7.0 | Floats | 25000 | 78.853 ms | 2.37 | 27 | 345.8 | 3154.1 | 18.89 KB | 2.31 | +| ReadLine_ | .NET 7.0 | Floats | 25000 | 87.688 ms | 2.62 | 27 | 310.9 | 3507.5 | 89993.42 KB | 11,002.06 | +| CsvHelper | .NET 7.0 | Floats | 25000 | 143.571 ms | 4.29 | 27 | 189.9 | 5742.8 | 22039.48 KB | 2,694.42 | +| Sep______ | .NET 8.0 | Floats | 25000 | 23.568 ms | 0.71 | 27 | 1156.8 | 942.7 | 8.13 KB | 0.99 | +| Sylvan___ | .NET 8.0 | Floats | 25000 | 70.200 ms | 2.10 | 27 | 388.4 | 2808.0 | 18.87 KB | 2.31 | +| ReadLine_ | .NET 8.0 | Floats | 25000 | 81.667 ms | 2.45 | 27 | 333.8 | 3266.7 | 89990.3 KB | 11,001.68 | +| CsvHelper | .NET 8.0 | Floats | 25000 | 121.367 ms | 3.65 | 27 | 224.6 | 4854.7 | 22035.94 KB | 2,693.98 | ##### Intel.Xeon.Silver.4316.2.30GHz - FloatsReader Benchmark Results (Sep 0.2.3, Sylvan 1.3.2.0, CsvHelper 30.0.1.0) @@ -1366,6 +1438,7 @@ namespace nietras.SeparatedValues public bool DisableFastFloat { get; init; } public bool HasHeader { get; init; } public nietras.SeparatedValues.Sep? Sep { get; init; } + public bool Unescape { get; init; } } public class SepReaderState : System.IDisposable { diff --git a/benchmarks/AMD.Ryzen.9.5950X/FloatsReaderBench.md b/benchmarks/AMD.Ryzen.9.5950X/FloatsReaderBench.md index 19b55a70..22075b3d 100644 --- a/benchmarks/AMD.Ryzen.9.5950X/FloatsReaderBench.md +++ b/benchmarks/AMD.Ryzen.9.5950X/FloatsReaderBench.md @@ -1,11 +1,11 @@ ``` -BenchmarkDotNet v0.13.9+228a464e8be6c580ad9408e98f18813f6407fb5a, Windows 10 (10.0.19044.3086/21H2/November2021Update) +BenchmarkDotNet v0.13.10, Windows 10 (10.0.19044.3086/21H2/November2021Update) AMD Ryzen 9 5950X, 1 CPU, 32 logical and 16 physical cores .NET SDK 8.0.100-rc.2.23502.2 [Host] : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 - Job-VQBAPT : .NET 7.0.12 (7.0.1223.47720), X64 RyuJIT AVX2 - Job-QUTMRR : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 + Job-WYCHAH : .NET 7.0.13 (7.0.1323.51816), X64 RyuJIT AVX2 + Job-OXIFBK : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 InvocationCount=Default IterationTime=300.0000 ms MaxIterationCount=15 MinIterationCount=5 WarmupCount=6 Reader=String @@ -13,29 +13,29 @@ MinIterationCount=5 WarmupCount=6 Reader=String ``` | Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | |---------- |--------- |------- |------ |-----------:|------:|---:|--------:|-------:|------------:|------------:| -| Sep______ | .NET 7.0 | Row | 25000 | 2.642 ms | 1.00 | 27 | 10318.8 | 105.7 | 1.56 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Row | 25000 | 3.025 ms | 1.14 | 27 | 9013.6 | 121.0 | 10.55 KB | 6.78 | -| ReadLine_ | .NET 7.0 | Row | 25000 | 14.068 ms | 5.31 | 27 | 1938.0 | 562.7 | 89986.82 KB | 57,808.35 | -| CsvHelper | .NET 7.0 | Row | 25000 | 47.926 ms | 18.16 | 27 | 568.9 | 1917.1 | 20.74 KB | 13.32 | -| Sep______ | .NET 8.0 | Row | 25000 | 2.571 ms | 0.97 | 27 | 10604.7 | 102.8 | 1.56 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Row | 25000 | 2.925 ms | 1.11 | 27 | 9320.4 | 117.0 | 10.55 KB | 6.78 | -| ReadLine_ | .NET 8.0 | Row | 25000 | 13.122 ms | 5.01 | 27 | 2077.7 | 524.9 | 89986.83 KB | 57,808.35 | -| CsvHelper | .NET 8.0 | Row | 25000 | 33.886 ms | 12.83 | 27 | 804.6 | 1355.4 | 20.61 KB | 13.24 | +| Sep______ | .NET 7.0 | Row | 25000 | 2.649 ms | 1.00 | 27 | 10291.1 | 106.0 | 1.2 KB | 1.00 | +| Sylvan___ | .NET 7.0 | Row | 25000 | 3.073 ms | 1.16 | 27 | 8873.0 | 122.9 | 10.59 KB | 8.84 | +| ReadLine_ | .NET 7.0 | Row | 25000 | 13.823 ms | 5.09 | 27 | 1972.3 | 552.9 | 89986.84 KB | 75,160.30 | +| CsvHelper | .NET 7.0 | Row | 25000 | 40.128 ms | 15.14 | 27 | 679.4 | 1605.1 | 20.74 KB | 17.32 | +| Sep______ | .NET 8.0 | Row | 25000 | 2.579 ms | 0.98 | 27 | 10571.6 | 103.2 | 1.2 KB | 1.00 | +| Sylvan___ | .NET 8.0 | Row | 25000 | 3.059 ms | 1.15 | 27 | 8911.7 | 122.4 | 10.59 KB | 8.84 | +| ReadLine_ | .NET 8.0 | Row | 25000 | 13.625 ms | 5.13 | 27 | 2000.9 | 545.0 | 89986.83 KB | 75,160.29 | +| CsvHelper | .NET 8.0 | Row | 25000 | 34.168 ms | 12.90 | 27 | 797.9 | 1366.7 | 20.61 KB | 17.22 | | | | | | | | | | | | | -| Sep______ | .NET 7.0 | Cols | 25000 | 3.077 ms | 1.00 | 27 | 8860.0 | 123.1 | 1.56 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Cols | 25000 | 4.852 ms | 1.58 | 27 | 5618.9 | 194.1 | 10.55 KB | 6.77 | -| ReadLine_ | .NET 7.0 | Cols | 25000 | 14.240 ms | 4.58 | 27 | 1914.5 | 569.6 | 89986.84 KB | 57,735.92 | -| CsvHelper | .NET 7.0 | Cols | 25000 | 42.056 ms | 13.67 | 27 | 648.3 | 1682.2 | 28451.27 KB | 18,254.45 | -| Sep______ | .NET 8.0 | Cols | 25000 | 3.095 ms | 1.00 | 27 | 8809.6 | 123.8 | 1.56 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Cols | 25000 | 4.609 ms | 1.50 | 27 | 5915.6 | 184.3 | 10.55 KB | 6.77 | -| ReadLine_ | .NET 8.0 | Cols | 25000 | 13.020 ms | 4.22 | 27 | 2093.9 | 520.8 | 89986.83 KB | 57,735.91 | -| CsvHelper | .NET 8.0 | Cols | 25000 | 36.009 ms | 11.70 | 27 | 757.1 | 1440.4 | 28451.15 KB | 18,254.37 | +| Sep______ | .NET 7.0 | Cols | 25000 | 3.574 ms | 1.00 | 27 | 7628.8 | 142.9 | 1.2 KB | 1.00 | +| Sylvan___ | .NET 7.0 | Cols | 25000 | 4.968 ms | 1.39 | 27 | 5488.1 | 198.7 | 10.59 KB | 8.83 | +| ReadLine_ | .NET 7.0 | Cols | 25000 | 14.102 ms | 3.90 | 27 | 1933.3 | 564.1 | 89986.84 KB | 75,037.89 | +| CsvHelper | .NET 7.0 | Cols | 25000 | 42.826 ms | 11.98 | 27 | 636.6 | 1713.0 | 28451.27 KB | 23,724.84 | +| Sep______ | .NET 8.0 | Cols | 25000 | 3.294 ms | 0.92 | 27 | 8275.5 | 131.8 | 1.2 KB | 1.00 | +| Sylvan___ | .NET 8.0 | Cols | 25000 | 4.717 ms | 1.32 | 27 | 5779.3 | 188.7 | 10.59 KB | 8.83 | +| ReadLine_ | .NET 8.0 | Cols | 25000 | 13.991 ms | 3.92 | 27 | 1948.7 | 559.6 | 89986.83 KB | 75,037.88 | +| CsvHelper | .NET 8.0 | Cols | 25000 | 39.277 ms | 10.94 | 27 | 694.1 | 1571.1 | 28451.15 KB | 23,724.74 | | | | | | | | | | | | | -| Sep______ | .NET 7.0 | Floats | 25000 | 32.440 ms | 1.00 | 27 | 840.4 | 1297.6 | 8.89 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Floats | 25000 | 68.701 ms | 2.12 | 27 | 396.8 | 2748.1 | 18.86 KB | 2.12 | -| ReadLine_ | .NET 7.0 | Floats | 25000 | 79.471 ms | 2.45 | 27 | 343.1 | 3178.8 | 89993.42 KB | 10,122.28 | -| CsvHelper | .NET 7.0 | Floats | 25000 | 133.372 ms | 4.13 | 27 | 204.4 | 5334.9 | 22039.48 KB | 2,478.96 | -| Sep______ | .NET 8.0 | Floats | 25000 | 21.978 ms | 0.68 | 27 | 1240.5 | 879.1 | 9.11 KB | 1.02 | -| Sylvan___ | .NET 8.0 | Floats | 25000 | 65.359 ms | 2.01 | 27 | 417.1 | 2614.4 | 18.84 KB | 2.12 | -| ReadLine_ | .NET 8.0 | Floats | 25000 | 72.653 ms | 2.24 | 27 | 375.3 | 2906.1 | 89990.3 KB | 10,121.93 | -| CsvHelper | .NET 8.0 | Floats | 25000 | 110.129 ms | 3.39 | 27 | 247.6 | 4405.2 | 22036.58 KB | 2,478.63 | +| Sep______ | .NET 7.0 | Floats | 25000 | 33.288 ms | 1.00 | 27 | 819.0 | 1331.5 | 8.18 KB | 1.00 | +| Sylvan___ | .NET 7.0 | Floats | 25000 | 78.853 ms | 2.37 | 27 | 345.8 | 3154.1 | 18.89 KB | 2.31 | +| ReadLine_ | .NET 7.0 | Floats | 25000 | 87.688 ms | 2.62 | 27 | 310.9 | 3507.5 | 89993.42 KB | 11,002.06 | +| CsvHelper | .NET 7.0 | Floats | 25000 | 143.571 ms | 4.29 | 27 | 189.9 | 5742.8 | 22039.48 KB | 2,694.42 | +| Sep______ | .NET 8.0 | Floats | 25000 | 23.568 ms | 0.71 | 27 | 1156.8 | 942.7 | 8.13 KB | 0.99 | +| Sylvan___ | .NET 8.0 | Floats | 25000 | 70.200 ms | 2.10 | 27 | 388.4 | 2808.0 | 18.87 KB | 2.31 | +| ReadLine_ | .NET 8.0 | Floats | 25000 | 81.667 ms | 2.45 | 27 | 333.8 | 3266.7 | 89990.3 KB | 11,001.68 | +| CsvHelper | .NET 8.0 | Floats | 25000 | 121.367 ms | 3.65 | 27 | 224.6 | 4854.7 | 22035.94 KB | 2,693.98 | diff --git a/benchmarks/AMD.Ryzen.9.5950X/PackageAssetsBench.md b/benchmarks/AMD.Ryzen.9.5950X/PackageAssetsBench.md index da8a8ba9..fc1961df 100644 --- a/benchmarks/AMD.Ryzen.9.5950X/PackageAssetsBench.md +++ b/benchmarks/AMD.Ryzen.9.5950X/PackageAssetsBench.md @@ -1,42 +1,48 @@ ``` -BenchmarkDotNet v0.13.9+228a464e8be6c580ad9408e98f18813f6407fb5a, Windows 10 (10.0.19044.3086/21H2/November2021Update) +BenchmarkDotNet v0.13.10, Windows 10 (10.0.19044.3086/21H2/November2021Update) AMD Ryzen 9 5950X, 1 CPU, 32 logical and 16 physical cores .NET SDK 8.0.100-rc.2.23502.2 [Host] : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 - Job-VQBAPT : .NET 7.0.12 (7.0.1223.47720), X64 RyuJIT AVX2 - Job-QUTMRR : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 + Job-WYCHAH : .NET 7.0.13 (7.0.1323.51816), X64 RyuJIT AVX2 + Job-OXIFBK : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 InvocationCount=Default IterationTime=300.0000 ms MaxIterationCount=15 MinIterationCount=5 WarmupCount=6 Quotes=False Reader=String ``` -| Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | -|---------- |--------- |------ |------ |-----------:|------:|---:|--------:|-------:|-------------:|------------:| -| Sep______ | .NET 7.0 | Row | 50000 | 2.481 ms | 1.00 | 29 | 11761.3 | 49.6 | 1.13 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Row | 50000 | 3.117 ms | 1.26 | 29 | 9360.8 | 62.3 | 7.17 KB | 6.34 | -| ReadLine_ | .NET 7.0 | Row | 50000 | 13.023 ms | 5.20 | 29 | 2240.8 | 260.5 | 88608.25 KB | 78,287.19 | -| CsvHelper | .NET 7.0 | Row | 50000 | 51.579 ms | 20.76 | 29 | 565.8 | 1031.6 | 20.65 KB | 18.25 | -| Sep______ | .NET 8.0 | Row | 50000 | 2.436 ms | 0.98 | 29 | 11978.3 | 48.7 | 1.13 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Row | 50000 | 2.929 ms | 1.18 | 29 | 9962.0 | 58.6 | 7.17 KB | 6.33 | -| ReadLine_ | .NET 8.0 | Row | 50000 | 11.788 ms | 4.76 | 29 | 2475.5 | 235.8 | 88608.24 KB | 78,287.18 | -| CsvHelper | .NET 8.0 | Row | 50000 | 42.562 ms | 17.15 | 29 | 685.6 | 851.2 | 20.59 KB | 18.19 | -| | | | | | | | | | | | -| Sep______ | .NET 7.0 | Cols | 50000 | 3.166 ms | 1.00 | 29 | 9218.1 | 63.3 | 1.13 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Cols | 50000 | 5.460 ms | 1.72 | 29 | 5344.6 | 109.2 | 7.18 KB | 6.33 | -| ReadLine_ | .NET 7.0 | Cols | 50000 | 13.603 ms | 4.26 | 29 | 2145.1 | 272.1 | 88608.25 KB | 78,152.32 | -| CsvHelper | .NET 7.0 | Cols | 50000 | 83.833 ms | 26.47 | 29 | 348.1 | 1676.7 | 446.31 KB | 393.65 | -| Sep______ | .NET 8.0 | Cols | 50000 | 3.142 ms | 0.99 | 29 | 9288.8 | 62.8 | 1.13 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Cols | 50000 | 5.181 ms | 1.64 | 29 | 5632.6 | 103.6 | 7.18 KB | 6.33 | -| ReadLine_ | .NET 8.0 | Cols | 50000 | 12.208 ms | 3.85 | 29 | 2390.4 | 244.2 | 88608.24 KB | 78,152.32 | -| CsvHelper | .NET 8.0 | Cols | 50000 | 70.302 ms | 22.22 | 29 | 415.1 | 1406.0 | 446.35 KB | 393.68 | -| | | | | | | | | | | | -| Sep______ | .NET 7.0 | Asset | 50000 | 38.120 ms | 1.00 | 29 | 765.5 | 762.4 | 13800.21 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Asset | 50000 | 44.675 ms | 1.17 | 29 | 653.2 | 893.5 | 14025 KB | 1.02 | -| ReadLine_ | .NET 7.0 | Asset | 50000 | 113.648 ms | 2.98 | 29 | 256.8 | 2273.0 | 102133.41 KB | 7.40 | -| CsvHelper | .NET 7.0 | Asset | 50000 | 105.184 ms | 2.77 | 29 | 277.4 | 2103.7 | 13971.28 KB | 1.01 | -| Sep______ | .NET 8.0 | Asset | 50000 | 30.393 ms | 0.80 | 29 | 960.1 | 607.9 | 13799.66 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Asset | 50000 | 38.855 ms | 1.02 | 29 | 751.0 | 777.1 | 14025.03 KB | 1.02 | -| ReadLine_ | .NET 8.0 | Asset | 50000 | 121.473 ms | 3.19 | 29 | 240.2 | 2429.5 | 102133.36 KB | 7.40 | -| CsvHelper | .NET 8.0 | Asset | 50000 | 93.300 ms | 2.45 | 29 | 312.8 | 1866.0 | 13972.05 KB | 1.01 | +| Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | +|------------ |--------- |------ |------ |-----------:|------:|---:|--------:|-------:|------------:|------------:| +| Sep______ | .NET 7.0 | Row | 50000 | 2.537 ms | 1.00 | 29 | 11503.7 | 50.7 | 935 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Row | 50000 | 2.569 ms | 1.01 | 29 | 11360.9 | 51.4 | 935 B | 1.00 | +| Sylvan___ | .NET 7.0 | Row | 50000 | 3.197 ms | 1.26 | 29 | 9128.8 | 63.9 | 7383 B | 7.90 | +| ReadLine_ | .NET 7.0 | Row | 50000 | 13.549 ms | 5.27 | 29 | 2153.8 | 271.0 | 90734847 B | 97,042.62 | +| CsvHelper | .NET 7.0 | Row | 50000 | 61.317 ms | 24.28 | 29 | 475.9 | 1226.3 | 21150 B | 22.62 | +| Sep______ | .NET 8.0 | Row | 50000 | 2.446 ms | 0.96 | 29 | 11931.8 | 48.9 | 934 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Row | 50000 | 2.521 ms | 0.99 | 29 | 11576.6 | 50.4 | 934 B | 1.00 | +| Sylvan___ | .NET 8.0 | Row | 50000 | 2.965 ms | 1.17 | 29 | 9842.7 | 59.3 | 7382 B | 7.90 | +| ReadLine_ | .NET 8.0 | Row | 50000 | 12.622 ms | 5.00 | 29 | 2311.9 | 252.4 | 90734841 B | 97,042.61 | +| CsvHelper | .NET 8.0 | Row | 50000 | 43.642 ms | 17.19 | 29 | 668.6 | 872.8 | 21081 B | 22.55 | +| | | | | | | | | | | | +| Sep______ | .NET 7.0 | Cols | 50000 | 3.705 ms | 1.00 | 29 | 7875.9 | 74.1 | 938 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Cols | 50000 | 4.391 ms | 1.19 | 29 | 6645.3 | 87.8 | 941 B | 1.00 | +| Sylvan___ | .NET 7.0 | Cols | 50000 | 5.598 ms | 1.51 | 29 | 5213.0 | 112.0 | 7389 B | 7.88 | +| ReadLine_ | .NET 7.0 | Cols | 50000 | 13.541 ms | 3.61 | 29 | 2155.1 | 270.8 | 90734847 B | 96,732.25 | +| CsvHelper | .NET 7.0 | Cols | 50000 | 77.553 ms | 20.93 | 29 | 376.3 | 1551.1 | 457022 B | 487.23 | +| Sep______ | .NET 8.0 | Cols | 50000 | 3.628 ms | 0.98 | 29 | 8043.9 | 72.6 | 937 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Cols | 50000 | 3.984 ms | 1.08 | 29 | 7325.4 | 79.7 | 938 B | 1.00 | +| Sylvan___ | .NET 8.0 | Cols | 50000 | 5.157 ms | 1.39 | 29 | 5658.3 | 103.1 | 7386 B | 7.87 | +| ReadLine_ | .NET 8.0 | Cols | 50000 | 13.149 ms | 3.54 | 29 | 2219.3 | 263.0 | 90734841 B | 96,732.24 | +| CsvHelper | .NET 8.0 | Cols | 50000 | 70.761 ms | 19.10 | 29 | 412.4 | 1415.2 | 457060 B | 487.27 | +| | | | | | | | | | | | +| Sep______ | .NET 7.0 | Asset | 50000 | 34.415 ms | 1.00 | 29 | 847.9 | 688.3 | 14130898 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Asset | 50000 | 34.273 ms | 1.00 | 29 | 851.4 | 685.5 | 14130898 B | 1.00 | +| Sylvan___ | .NET 7.0 | Asset | 50000 | 42.214 ms | 1.22 | 29 | 691.3 | 844.3 | 14296698 B | 1.01 | +| ReadLine_ | .NET 7.0 | Asset | 50000 | 113.604 ms | 3.29 | 29 | 256.9 | 2272.1 | 104584612 B | 7.40 | +| CsvHelper | .NET 7.0 | Asset | 50000 | 103.655 ms | 3.02 | 29 | 281.5 | 2073.1 | 14307286 B | 1.01 | +| Sep______ | .NET 8.0 | Asset | 50000 | 30.453 ms | 0.88 | 29 | 958.3 | 609.1 | 14130846 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Asset | 50000 | 30.480 ms | 0.89 | 29 | 957.4 | 609.6 | 14130886 B | 1.00 | +| Sylvan___ | .NET 8.0 | Asset | 50000 | 38.244 ms | 1.11 | 29 | 763.0 | 764.9 | 14296692 B | 1.01 | +| ReadLine_ | .NET 8.0 | Asset | 50000 | 105.568 ms | 2.96 | 29 | 276.4 | 2111.4 | 104584668 B | 7.40 | +| CsvHelper | .NET 8.0 | Asset | 50000 | 85.633 ms | 2.49 | 29 | 340.8 | 1712.7 | 14306936 B | 1.01 | diff --git a/benchmarks/AMD.Ryzen.9.5950X/PackageAssetsBenchQuotes.md b/benchmarks/AMD.Ryzen.9.5950X/PackageAssetsBenchQuotes.md index f35e6311..e5a595e5 100644 --- a/benchmarks/AMD.Ryzen.9.5950X/PackageAssetsBenchQuotes.md +++ b/benchmarks/AMD.Ryzen.9.5950X/PackageAssetsBenchQuotes.md @@ -1,42 +1,48 @@ ``` -BenchmarkDotNet v0.13.9+228a464e8be6c580ad9408e98f18813f6407fb5a, Windows 10 (10.0.19044.3086/21H2/November2021Update) +BenchmarkDotNet v0.13.10, Windows 10 (10.0.19044.3086/21H2/November2021Update) AMD Ryzen 9 5950X, 1 CPU, 32 logical and 16 physical cores .NET SDK 8.0.100-rc.2.23502.2 [Host] : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 - Job-VQBAPT : .NET 7.0.12 (7.0.1223.47720), X64 RyuJIT AVX2 - Job-QUTMRR : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 + Job-WYCHAH : .NET 7.0.13 (7.0.1323.51816), X64 RyuJIT AVX2 + Job-OXIFBK : .NET 8.0.0 (8.0.23.47906), X64 RyuJIT AVX2 InvocationCount=Default IterationTime=300.0000 ms MaxIterationCount=15 MinIterationCount=5 WarmupCount=6 Quotes=True Reader=String ``` -| Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | -|---------- |--------- |------ |------ |-----------:|------:|---:|-------:|-------:|-------------:|------------:| -| Sep______ | .NET 7.0 | Row | 50000 | 7.243 ms | 1.00 | 33 | 4608.3 | 144.9 | 1.14 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Row | 50000 | 21.395 ms | 2.96 | 33 | 1560.0 | 427.9 | 7.33 KB | 6.41 | -| ReadLine_ | .NET 7.0 | Row | 50000 | 16.510 ms | 2.27 | 33 | 2021.6 | 330.2 | 108778.76 KB | 95,042.19 | -| CsvHelper | .NET 7.0 | Row | 50000 | 67.351 ms | 9.26 | 33 | 495.6 | 1347.0 | 20.65 KB | 18.05 | -| Sep______ | .NET 8.0 | Row | 50000 | 6.567 ms | 0.90 | 33 | 5082.8 | 131.3 | 1.14 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Row | 50000 | 17.739 ms | 2.43 | 33 | 1881.6 | 354.8 | 7.2 KB | 6.29 | -| ReadLine_ | .NET 8.0 | Row | 50000 | 14.818 ms | 2.06 | 33 | 2252.6 | 296.4 | 108778.75 KB | 95,042.18 | -| CsvHelper | .NET 8.0 | Row | 50000 | 52.127 ms | 7.16 | 33 | 640.3 | 1042.5 | 20.6 KB | 18.00 | -| | | | | | | | | | | | -| Sep______ | .NET 7.0 | Cols | 50000 | 7.380 ms | 1.00 | 33 | 4522.8 | 147.6 | 1.15 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Cols | 50000 | 23.992 ms | 3.25 | 33 | 1391.2 | 479.8 | 7.22 KB | 6.29 | -| ReadLine_ | .NET 7.0 | Cols | 50000 | 16.336 ms | 2.19 | 33 | 2043.2 | 326.7 | 108778.75 KB | 94,799.53 | -| CsvHelper | .NET 7.0 | Cols | 50000 | 88.119 ms | 11.94 | 33 | 378.8 | 1762.4 | 446.31 KB | 388.95 | -| Sep______ | .NET 8.0 | Cols | 50000 | 7.068 ms | 0.96 | 33 | 4722.2 | 141.4 | 1.14 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Cols | 50000 | 19.952 ms | 2.70 | 33 | 1672.9 | 399.0 | 7.21 KB | 6.29 | -| ReadLine_ | .NET 8.0 | Cols | 50000 | 14.506 ms | 1.96 | 33 | 2301.0 | 290.1 | 108778.75 KB | 94,799.52 | -| CsvHelper | .NET 8.0 | Cols | 50000 | 82.632 ms | 11.20 | 33 | 403.9 | 1652.6 | 446.35 KB | 388.99 | -| | | | | | | | | | | | -| Sep______ | .NET 7.0 | Asset | 50000 | 38.018 ms | 1.00 | 33 | 877.9 | 760.4 | 13808.03 KB | 1.00 | -| Sylvan___ | .NET 7.0 | Asset | 50000 | 60.345 ms | 1.65 | 33 | 553.1 | 1206.9 | 14026.44 KB | 1.02 | -| ReadLine_ | .NET 7.0 | Asset | 50000 | 126.464 ms | 3.20 | 33 | 263.9 | 2529.3 | 122303.92 KB | 8.86 | -| CsvHelper | .NET 7.0 | Asset | 50000 | 111.102 ms | 2.92 | 33 | 300.4 | 2222.0 | 13970.78 KB | 1.01 | -| Sep______ | .NET 8.0 | Asset | 50000 | 34.347 ms | 0.91 | 33 | 971.8 | 686.9 | 13808.08 KB | 1.00 | -| Sylvan___ | .NET 8.0 | Asset | 50000 | 52.851 ms | 1.38 | 33 | 631.5 | 1057.0 | 14026.01 KB | 1.02 | -| ReadLine_ | .NET 8.0 | Asset | 50000 | 122.265 ms | 3.19 | 33 | 273.0 | 2445.3 | 122303.85 KB | 8.86 | -| CsvHelper | .NET 8.0 | Asset | 50000 | 96.526 ms | 2.54 | 33 | 345.8 | 1930.5 | 13971.86 KB | 1.01 | +| Method | Runtime | Scope | Rows | Mean | Ratio | MB | MB/s | ns/row | Allocated | Alloc Ratio | +|------------ |--------- |------ |------ |-----------:|------:|---:|-------:|-------:|------------:|------------:| +| Sep______ | .NET 7.0 | Row | 50000 | 6.926 ms | 1.00 | 33 | 4819.0 | 138.5 | 948 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Row | 50000 | 7.226 ms | 1.04 | 33 | 4618.8 | 144.5 | 948 B | 1.00 | +| Sylvan___ | .NET 7.0 | Row | 50000 | 20.511 ms | 2.97 | 33 | 1627.3 | 410.2 | 7426 B | 7.83 | +| ReadLine_ | .NET 7.0 | Row | 50000 | 15.893 ms | 2.26 | 33 | 2100.1 | 317.9 | 111389450 B | 117,499.42 | +| CsvHelper | .NET 7.0 | Row | 50000 | 61.713 ms | 8.91 | 33 | 540.8 | 1234.3 | 24234 B | 25.56 | +| Sep______ | .NET 8.0 | Row | 50000 | 6.838 ms | 0.99 | 33 | 4881.5 | 136.8 | 945 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Row | 50000 | 6.963 ms | 1.01 | 33 | 4793.3 | 139.3 | 946 B | 1.00 | +| Sylvan___ | .NET 8.0 | Row | 50000 | 18.535 ms | 2.60 | 33 | 1800.8 | 370.7 | 7411 B | 7.82 | +| ReadLine_ | .NET 8.0 | Row | 50000 | 16.519 ms | 2.38 | 33 | 2020.5 | 330.4 | 111389436 B | 117,499.41 | +| CsvHelper | .NET 8.0 | Row | 50000 | 53.122 ms | 7.67 | 33 | 628.3 | 1062.4 | 21091 B | 22.25 | +| | | | | | | | | | | | +| Sep______ | .NET 7.0 | Cols | 50000 | 8.502 ms | 1.00 | 33 | 3926.0 | 170.0 | 951 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Cols | 50000 | 8.784 ms | 1.03 | 33 | 3799.6 | 175.7 | 953 B | 1.00 | +| Sylvan___ | .NET 7.0 | Cols | 50000 | 23.206 ms | 2.73 | 33 | 1438.3 | 464.1 | 7430 B | 7.81 | +| ReadLine_ | .NET 7.0 | Cols | 50000 | 16.651 ms | 1.94 | 33 | 2004.6 | 333.0 | 111389446 B | 117,128.75 | +| CsvHelper | .NET 7.0 | Cols | 50000 | 98.855 ms | 11.63 | 33 | 337.6 | 1977.1 | 457022 B | 480.57 | +| Sep______ | .NET 8.0 | Cols | 50000 | 7.616 ms | 0.90 | 33 | 4382.5 | 152.3 | 947 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Cols | 50000 | 8.560 ms | 1.01 | 33 | 3899.3 | 171.2 | 950 B | 1.00 | +| Sylvan___ | .NET 8.0 | Cols | 50000 | 20.405 ms | 2.40 | 33 | 1635.8 | 408.1 | 7419 B | 7.80 | +| ReadLine_ | .NET 8.0 | Cols | 50000 | 15.310 ms | 1.80 | 33 | 2180.1 | 306.2 | 111389441 B | 117,128.75 | +| CsvHelper | .NET 8.0 | Cols | 50000 | 85.567 ms | 10.05 | 33 | 390.1 | 1711.3 | 457060 B | 480.61 | +| | | | | | | | | | | | +| Sep______ | .NET 7.0 | Asset | 50000 | 41.325 ms | 1.00 | 33 | 807.7 | 826.5 | 14139450 B | 1.00 | +| Sep_Unescape| .NET 7.0 | Asset | 50000 | 38.076 ms | 0.93 | 33 | 876.6 | 761.5 | 14130898 B | 1.00 | +| Sylvan___ | .NET 7.0 | Asset | 50000 | 61.446 ms | 1.49 | 33 | 543.2 | 1228.9 | 14298344 B | 1.01 | +| ReadLine_ | .NET 7.0 | Asset | 50000 | 125.874 ms | 3.05 | 33 | 265.2 | 2517.5 | 125239164 B | 8.86 | +| CsvHelper | .NET 7.0 | Asset | 50000 | 115.031 ms | 2.77 | 33 | 290.2 | 2300.6 | 14307054 B | 1.01 | +| Sep______ | .NET 8.0 | Asset | 50000 | 37.937 ms | 0.92 | 33 | 879.8 | 758.7 | 14139438 B | 1.00 | +| Sep_Unescape| .NET 8.0 | Asset | 50000 | 34.323 ms | 0.83 | 33 | 972.5 | 686.5 | 14130926 B | 1.00 | +| Sylvan___ | .NET 8.0 | Asset | 50000 | 53.940 ms | 1.30 | 33 | 618.8 | 1078.8 | 14296672 B | 1.01 | +| ReadLine_ | .NET 8.0 | Asset | 50000 | 118.606 ms | 2.88 | 33 | 281.4 | 2372.1 | 125239072 B | 8.86 | +| CsvHelper | .NET 8.0 | Asset | 50000 | 99.200 ms | 2.38 | 33 | 336.5 | 1984.0 | 14306154 B | 1.01 | diff --git a/benchmarks/AMD.Ryzen.9.5950X/Versions.txt b/benchmarks/AMD.Ryzen.9.5950X/Versions.txt index 70d6cc35..b8465b92 100644 --- a/benchmarks/AMD.Ryzen.9.5950X/Versions.txt +++ b/benchmarks/AMD.Ryzen.9.5950X/Versions.txt @@ -1 +1 @@ -Sep 0.2.8.0, Sylvan 1.3.3.0, CsvHelper 30.0.1.0 \ No newline at end of file +Sep 0.3.0.0, Sylvan 1.3.5.0, CsvHelper 30.0.1.0 \ No newline at end of file diff --git a/global.json b/global.json index d67aac54..17a23f42 100644 --- a/global.json +++ b/global.json @@ -1,6 +1,6 @@ { "sdk": { - "version": "8.0.0", + "version": "8.0.100", "rollForward": "latestFeature", "allowPrerelease": true } diff --git a/src/Sep.Benchmarks/Program.cs b/src/Sep.Benchmarks/Program.cs index 59b65320..0166c91e 100644 --- a/src/Sep.Benchmarks/Program.cs +++ b/src/Sep.Benchmarks/Program.cs @@ -38,11 +38,11 @@ { var b = new SepParserBench(); b.GlobalSetup(); - b.Parse(); + b.ParseColEnds(); Thread.Sleep(200); for (var i = 0; i < 200000000; i++) { - b.Parse(); + b.ParseColEnds(); } } diff --git a/src/Sep.Benchmarks/SepParseSeparatorsMaskBench.cs b/src/Sep.Benchmarks/SepParseSeparatorsMaskBench.cs index 790278c5..a342abab 100644 --- a/src/Sep.Benchmarks/SepParseSeparatorsMaskBench.cs +++ b/src/Sep.Benchmarks/SepParseSeparatorsMaskBench.cs @@ -53,11 +53,4 @@ public unsafe ref int ParseSeparatorsMask() return ref SepParseMask.ParseSeparatorsMask( _maskValue, _dataIndex, ref *_colEnds); } - - [Benchmark] - public unsafe ref int ParseSeparatorsMaskLong() - { - return ref SepParseMask.ParseSeparatorsMaskLong( - _maskValue, _dataIndex, ref *_colEnds); - } } diff --git a/src/Sep.Benchmarks/SepParserBench.cs b/src/Sep.Benchmarks/SepParserBench.cs index 5a5d9121..4ad0f590 100644 --- a/src/Sep.Benchmarks/SepParserBench.cs +++ b/src/Sep.Benchmarks/SepParserBench.cs @@ -77,16 +77,16 @@ public void GlobalSetup() _state._chars = ArrayPool.Shared.Rent(text.Length + _parser.PaddingLength); text.AsSpan().CopyTo(_state._chars.AsSpan().Slice(0, text.Length)); _state._charsDataEnd = text.Length; - _state._colEnds = ArrayPool.Shared.Rent(SepReaderState.ColEndsInitialLength); + _state._colEndsOrColInfos = ArrayPool.Shared.Rent(SepReaderState.ColEndsInitialLength); } [Benchmark(Baseline = true)] - public void Parse() + public void ParseColEnds() { _state!._colCount = 0; _state!._lineNumber = 0; _state!._charsParseStart = 0; - _parser!.Parse(_state!); + _parser!.ParseColEnds(_state!); } //[Benchmark] diff --git a/src/Sep.ComparisonBenchmarks/PackageAssetsBench.cs b/src/Sep.ComparisonBenchmarks/PackageAssetsBench.cs index 488517a7..66b4b853 100644 --- a/src/Sep.ComparisonBenchmarks/PackageAssetsBench.cs +++ b/src/Sep.ComparisonBenchmarks/PackageAssetsBench.cs @@ -77,6 +77,14 @@ public void Sep______() foreach (var row in reader) { } } + [Benchmark] + public void Sep_Unescape() + { + using var reader = Sep.Reader(o => o with { HasHeader = false, Unescape = true }) + .From(Reader.CreateReader()); + foreach (var row in reader) { } + } + #if !SEPBENCHSEPONLY [Benchmark] #endif @@ -158,6 +166,20 @@ public void Sep______() } } + [Benchmark()] + public void Sep_Unescape() + { + using var reader = Sep.Reader(o => o with { HasHeader = false, Unescape = true }) + .From(Reader.CreateReader()); + foreach (var row in reader) + { + for (var i = 0; i < row.ColCount; i++) + { + var span = row[i].Span; + } + } + } + #if !SEPBENCHSEPONLY [Benchmark] #endif @@ -266,6 +288,28 @@ public void Sep______() } } + [Benchmark] + public void Sep_Unescape() + { + var assets = new List(); + + using var reader = Sep.Reader(o => o with + { + HasHeader = false, + Unescape = true, +#if USE_STRING_POOLING + CreateToString = SepToString.PoolPerCol(maximumStringLength: 128), +#endif + }) + .From(Reader.CreateReader()); + + foreach (var row in reader) + { + var asset = PackageAsset.Read(reader, static (r, i) => r.ToString(i)); + assets.Add(asset); + } + } + #if !SEPBENCHSEPONLY [Benchmark] #endif diff --git a/src/Sep.ComparisonBenchmarks/Program.cs b/src/Sep.ComparisonBenchmarks/Program.cs index 002dc434..c2161ecf 100644 --- a/src/Sep.ComparisonBenchmarks/Program.cs +++ b/src/Sep.ComparisonBenchmarks/Program.cs @@ -32,6 +32,14 @@ log($"{Environment.Version} args: {args.Length} versions: {GetVersions()}"); +#if DEBUG +// Consider where to move this perhaps a new ComparisonTest project +if (Debugger.IsAttached) +{ + UnescapeCompare.CompareUnescape(); +} +#endif + await PackageAssetsTestData.EnsurePackageAssets().ConfigureAwait(true); // Use args as switch to run BDN or not e.g. BDN only run when using script diff --git a/src/Sep.ComparisonBenchmarks/UnescapeCompare.cs b/src/Sep.ComparisonBenchmarks/UnescapeCompare.cs new file mode 100644 index 00000000..c47eee70 --- /dev/null +++ b/src/Sep.ComparisonBenchmarks/UnescapeCompare.cs @@ -0,0 +1,149 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Text; +using CsvHelper; +using CsvHelper.Configuration; +using Sylvan.Data.Csv; + +namespace nietras.SeparatedValues.ComparisonBenchmarks; + +public static class UnescapeCompare +{ + record UnescapeTest(string ColText, bool IsValid = false); + + public static void CompareUnescape() + { + var tests = new UnescapeTest[] + { + new("a", IsValid: true), + new("\"\"", IsValid: true), + new("\"\"\"\"", IsValid: true), + new("\"\"\"\"\"\"", IsValid: true), + new("\"a\"", IsValid: true), + new("\"a\"\"a\"", IsValid: true), + new("\"a\"\"a\"\"a\"", IsValid: true), + + // No start quote + new("a\"\"a"), + new("a\"a\"a"), + new(" \"\" "), + new(" \"a\" "), + new(" \"\""), + new(" \"a\""), + new("a\"\"\"a"), + + new("\"a\"a\"a\""), + new("\"\" "), + new("\"a\" "), + new("\"a\"\"\"a"), + + new("\"a\"\"\"a\""), + new("\"\"a\""), + new("\"a\"a\""), + new("\"\"a\"a\"\""), + + new("\"\"\""), + new("\"\"\"\"\""), + }; + var runners = new Dictionary>() + { + { nameof(CsvHelper), t => UnescapeCsvHelper(ConfigurationFunctions.BadDataFound, t.ColText) }, + { nameof(CsvHelper) + "¹", t => UnescapeCsvHelper(null, t.ColText) }, + { nameof(Sylvan), t => UnescapeSylvan(t.ColText) }, + { nameof(Sep) + "²", t => UnescapeSep(t.ColText) }, + }; + var sb = new StringBuilder(); + var outputCsharp = false; + sb.Append($"| Input |"); + if (outputCsharp) { sb.Append($" Input (C#) |"); } + sb.Append($" Valid |"); + foreach (var (name, _) in runners) + { + sb.Append($" {name} |"); + } + sb.AppendLine(); + sb.Append($"|-|"); + if (outputCsharp) { sb.Append($"-|"); } + sb.Append($"-|"); + foreach (var (_, _) in runners) + { + sb.Append($"-|"); + } + sb.AppendLine(); + foreach (var test in tests) + { + sb.Append($"| `{test.ColText.Replace(" ", "·")}` |"); + var csharpColText = test.ColText.Replace(" ", "·").Replace("\"", "\\\""); + if (outputCsharp) { sb.Append($" `{csharpColText}` |"); } + sb.Append($" {test.IsValid} |"); + + var csharpColTextResult = UnescapeSep(test.ColText).Replace("\"", "\\\""); + Trace.WriteLine($"new object[] {{ \"{test.ColText.Replace("\"", "\\\"")}\", \"{csharpColTextResult}\" }},"); + + foreach (var (_, action) in runners) + { + try + { + var outputColText = action(test); + if (outputColText.Length > 0) + { + sb.Append($" `{outputColText.Replace(" ", "·")}`"); + } + sb.Append($" |"); + } + catch (Exception e) + { + var message = e.Message.ReplaceLineEndings(" "); + Trace.WriteLine(message); + sb.Append($" EXCEPTION |"); + } + } + sb.AppendLine(); + } + sb.AppendLine(); + sb.AppendLine($"`·` (middle dot) is whitespace to make this visible"); + sb.AppendLine(); + sb.AppendLine($"¹ CsvHelper with `BadDataFound = null`"); + sb.AppendLine(); + sb.AppendLine($"² Sep with `{nameof(SepReaderOptions.Unescape)} = true` in `{nameof(SepReaderOptions)}`"); + + var text = sb.ToString(); + Trace.WriteLine(text); + File.WriteAllText("UnescapeCompare.md", text, Encoding.UTF8); + } + + static string UnescapeCsvHelper(BadDataFound? badDataFound, string colText) + { + var config = new CsvConfiguration(CultureInfo.InvariantCulture) + { + HasHeaderRecord = false, + BadDataFound = badDataFound, + }; + using var reader = new StringReader(colText); + using var csvParser = new CsvParser(reader, config); + SepAssert.Assert(csvParser.Read()); + return csvParser[0]; + } + + static string UnescapeSylvan(string colText) + { + var options = new CsvDataReaderOptions + { + HasHeaders = false, + }; + using var reader = new StringReader(colText); + using var csvReader = Sylvan.Data.Csv.CsvDataReader.Create(reader, options); + SepAssert.Assert(csvReader.Read()); + return csvReader.GetString(0); + } + + static string UnescapeSep(string colText) + { + using var reader = Sep.Reader(o => o with { HasHeader = false, Unescape = true }).FromText(colText); + SepAssert.Assert(reader.MoveNext()); + return reader.Current[0].ToString(); + } +} diff --git a/src/Sep.Test/PackageAssetsTest.cs b/src/Sep.Test/PackageAssetsTest.cs index a5a1d6b9..a6d24945 100644 --- a/src/Sep.Test/PackageAssetsTest.cs +++ b/src/Sep.Test/PackageAssetsTest.cs @@ -10,53 +10,61 @@ namespace nietras.SeparatedValues.Test; public class PackageAssetsTest { [TestMethod] - public void PackageAssetsTest_Read_NoQuotes() - { - VerifyRead(NoQuotes); - } + public void PackageAssetsTest_Read_NoQuotes() => VerifyRead(NoQuotes); [TestMethod] - public void PackageAssetsTest_Read_WithQuotes() - { - VerifyRead(WithQuotes); - } + public void PackageAssetsTest_Read_NoQuotes_Unescape() => VerifyRead(NoQuotes, unescape: true); [TestMethod] - public void PackageAssetsTest_Enumerate_NoQuotes() - { + public void PackageAssetsTest_Read_WithQuotes() => VerifyRead(WithQuotes); + + [TestMethod] + public void PackageAssetsTest_Read_WithQuotes_Unescape() => VerifyRead(WithQuotes, unescape: true); + + [TestMethod] + public void PackageAssetsTest_Enumerate_NoQuotes() => VerifyEnumerate(NoQuotes, (reader, select) => reader.Enumerate(select)); - } [TestMethod] - public void PackageAssetsTest_Enumerate_WithQuotes() - { + public void PackageAssetsTest_Enumerate_NoQuotes_Unescape() => + VerifyEnumerate(NoQuotes, (reader, select) => reader.Enumerate(select), unescape: true); + + [TestMethod] + public void PackageAssetsTest_Enumerate_WithQuotes() => VerifyEnumerate(WithQuotes, (reader, select) => reader.Enumerate(select)); - } - static void VerifyRead(string text) + [TestMethod] + public void PackageAssetsTest_Enumerate_WithQuotes_Unescape() => + VerifyEnumerate(WithQuotes, (reader, select) => reader.Enumerate(select), unescape: true); + + static void VerifyRead(string text, bool unescape = false) { var expected = ReadLineSplitAsList(text); - var reader = Sep.Reader(o => o with { HasHeader = false }).FromText(text); + var reader = Sep.Reader(o => o with { HasHeader = false, Unescape = unescape }).FromText(text); var rowIndex = 0; foreach (var row in reader) { var expectedCols = expected[rowIndex]; + expectedCols = unescape ? UnescapeColsByTrim(expectedCols) : expectedCols; Assert.AreEqual(expectedCols.Length, row.ColCount); - CollectionAssert.AreEqual(expectedCols, row[0..row.ColCount].ToStringsArray()); + CollectionAssert.AreEqual(expectedCols, row[..].ToStringsArray()); ++rowIndex; } Assert.AreEqual(expected.Count, rowIndex); } - static void VerifyEnumerate(string text, Func, IEnumerable> enumerate) + static void VerifyEnumerate(string text, + Func, IEnumerable> enumerate, + bool unescape = false) { var expected = ReadLineSplitAsList(text); - var reader = Sep.Reader(o => o with { HasHeader = false }).FromText(text); + var reader = Sep.Reader(o => o with { HasHeader = false, Unescape = unescape }).FromText(text); var rows = enumerate(reader, r => r[0..r.ColCount].ToStringsArray()); var rowIndex = 0; foreach (var cols in rows) { var expectedCols = expected[rowIndex]; + expectedCols = unescape ? UnescapeColsByTrim(expectedCols) : expectedCols; Assert.AreEqual(expectedCols.Length, cols.Length); CollectionAssert.AreEqual(expectedCols, cols); ++rowIndex; @@ -77,6 +85,9 @@ static IEnumerable ReadLineSplit(string text, char separator) } } + static string[] UnescapeColsByTrim(string[] expectedCols) => + expectedCols.Select(c => c.Trim('"')).ToArray(); + const string NoQuotes = @"75fcf875-017d-4579-bfd9-791d3e6767f0,2020-11-28T01:50:41.2449947+00:00,Akinzekeel.BlazorGrid,0.9.1-preview,2020-11-27T22:42:54.3100000+00:00,AvailableAssets,RuntimeAssemblies,,,net5.0,,,,,,lib/net5.0/BlazorGrid.dll,BlazorGrid.dll,.dll,lib,net5.0,.NETCoreApp,5.0.0.0,,,0.0.0.0 75fcf875-017d-4579-bfd9-791d3e6767f0,2020-11-28T01:50:41.2449947+00:00,Akinzekeel.BlazorGrid,0.9.1-preview,2020-11-27T22:42:54.3100000+00:00,AvailableAssets,CompileLibAssemblies,,,net5.0,,,,,,lib/net5.0/BlazorGrid.dll,BlazorGrid.dll,.dll,lib,net5.0,.NETCoreApp,5.0.0.0,,,0.0.0.0 75fcf875-017d-4579-bfd9-791d3e6767f0,2020-11-28T01:50:41.2449947+00:00,Akinzekeel.BlazorGrid,0.9.1-preview,2020-11-27T22:42:54.3100000+00:00,AvailableAssets,ResourceAssemblies,,,net5.0,,,,,,lib/net5.0/de/BlazorGrid.resources.dll,BlazorGrid.resources.dll,.dll,lib,net5.0,.NETCoreApp,5.0.0.0,,,0.0.0.0 diff --git a/src/Sep.Test/ReadMeTest.cs b/src/Sep.Test/ReadMeTest.cs index be223c41..f9ace645 100644 --- a/src/Sep.Test/ReadMeTest.cs +++ b/src/Sep.Test/ReadMeTest.cs @@ -8,6 +8,9 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; using PublicApiGenerator; +// Only parallize on class level to avoid multiple writes to README file +[assembly: Parallelize(Workers = 0, Scope = ExecutionScope.ClassLevel)] + namespace nietras.SeparatedValues.Test; [TestClass] diff --git a/src/Sep.Test/SepColInfoTest.cs b/src/Sep.Test/SepColInfoTest.cs new file mode 100644 index 00000000..6291b07e --- /dev/null +++ b/src/Sep.Test/SepColInfoTest.cs @@ -0,0 +1,16 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace nietras.SeparatedValues.Test; + +[TestClass] +public class SepColInfoTest +{ + [TestMethod] + public void SepColInfoTest_() + { + var colInfo = new SepColInfo(12, 34); + Assert.AreEqual(12, colInfo.ColEnd); + Assert.AreEqual(34, colInfo.QuoteCount); + Assert.AreEqual("(12, 34)", colInfo.ToString()); + } +} diff --git a/src/Sep.Test/SepParseMaskTest_ParseSeparatorsMask.cs b/src/Sep.Test/SepParseMaskTest_ParseSeparatorsMask.cs index da50d815..dcd81fe7 100644 --- a/src/Sep.Test/SepParseMaskTest_ParseSeparatorsMask.cs +++ b/src/Sep.Test/SepParseMaskTest_ParseSeparatorsMask.cs @@ -29,7 +29,6 @@ public delegate ref int ParseSeparatorsMaskMethod( static IEnumerable Methods => new object[][] { new object[] { new ParseSeparatorsMaskMethod(SepParseMask.ParseSeparatorsMask) }, - new object[] { new ParseSeparatorsMaskMethod(SepParseMask.ParseSeparatorsMaskLong) }, }; [TestMethod] diff --git a/src/Sep.Test/SepParserTest.cs b/src/Sep.Test/SepParserTest.cs index c7ab5992..cf22a619 100644 --- a/src/Sep.Test/SepParserTest.cs +++ b/src/Sep.Test/SepParserTest.cs @@ -11,15 +11,20 @@ public class SepParserTest { // TODO: Add randomized long tests using baseline naive parser implementation readonly SepReaderState _state = new(); + readonly SepReaderState _stateUnescape = new(colUnquoteUnescape: true); readonly char[] _chars; readonly int[] _colEnds; public SepParserTest() { _chars = new char[1024]; - _state._chars = _chars; _colEnds = new int[1024]; - _state._colEnds = _colEnds; + + _state._chars = _chars; + _state._colEndsOrColInfos = _colEnds; + + _stateUnescape._chars = _chars; + _stateUnescape._colEndsOrColInfos = _colEnds; } static IEnumerable Parsers => SepParserFactory.CreateFactories() @@ -52,11 +57,12 @@ public void SepParserTest_Properties(object parserObject) Contract.Assume(parserObject is not null); var parser = (ISepParser)parserObject; Assert.IsTrue(parser.PaddingLength >= 0); + Assert.IsTrue(parser.QuoteCount == 0); } [TestMethod] [DynamicData(nameof(Parsers))] - public void SepParserTest_Parse_Sequence(object parserObject) + public void SepParserTest_ParseColEnds_Sequence(object parserObject) { Contract.Assume(parserObject is not null); var parser = (ISepParser)parserObject; @@ -64,14 +70,29 @@ public void SepParserTest_Parse_Sequence(object parserObject) _state._charsDataEnd = FillChars(new(Enumerable.Range(0, 256).Select(i => (char)i).ToArray())); _state._lineNumber = 3; - var rowLineEndingOffset = parser.Parse(_state); + var rowLineEndingOffset = parser.ParseColEnds(_state); + + // No assert, test is mainly for debugging SIMD code easily + } + + [TestMethod] + [DynamicData(nameof(Parsers))] + public void SepParserTest_ParseColInfos_Sequence(object parserObject) + { + Contract.Assume(parserObject is not null); + var parser = (ISepParser)parserObject; + + _stateUnescape._charsDataEnd = FillChars(new(Enumerable.Range(0, 256).Select(i => (char)i).ToArray())); + _stateUnescape._lineNumber = 3; + + var rowLineEndingOffset = parser.ParseColInfos(_stateUnescape); // No assert, test is mainly for debugging SIMD code easily } [TestMethod] [DynamicData(nameof(Parsers))] - public void SepParserTest_Parse_Short(object parserObject) + public void SepParserTest_ParseColEnds_Short(object parserObject) { Contract.Assume(parserObject is not null); var parser = (ISepParser)parserObject; @@ -80,7 +101,7 @@ public void SepParserTest_Parse_Short(object parserObject) _state._charsDataEnd = charsEnd; _state._lineNumber = 3; - var rowLineEndingOffset = parser.Parse(_state); + var rowLineEndingOffset = parser.ParseColEnds(_state); var expected = new int[] { 2, 6, 9 }; AreEqual(expected, _colEnds, 0, _state._colCount); @@ -91,7 +112,7 @@ public void SepParserTest_Parse_Short(object parserObject) [TestMethod] [DynamicData(nameof(Parsers))] - public void SepParserTest_Parse_Long(object parserObject) + public void SepParserTest_ParseColEnds_Long(object parserObject) { Contract.Assume(parserObject is not null); var parser = (ISepParser)parserObject; @@ -113,7 +134,7 @@ public void SepParserTest_Parse_Long(object parserObject) [TestMethod] [DynamicData(nameof(Parsers))] - public void SepParserTest_Parse_Long_SeparatorsOnly(object parserObject) + public void SepParserTest_ParseColEnds_Long_SeparatorsOnly(object parserObject) { Contract.Assume(parserObject is not null); var parser = (ISepParser)parserObject; @@ -127,7 +148,7 @@ public void SepParserTest_Parse_Long_SeparatorsOnly(object parserObject) [TestMethod] [DynamicData(nameof(Parsers))] - public void SepParserTest_Parse_Long_At_ParseStart(object parserObject) + public void SepParserTest_ParseColEnds_Long_At_ParseStart(object parserObject) { Contract.Assume(parserObject is not null); var parser = (ISepParser)parserObject; @@ -145,7 +166,7 @@ public void SepParserTest_Parse_Long_At_ParseStart(object parserObject) [TestMethod] [DynamicData(nameof(Parsers))] - public void SepParserTest_Parse_Long_ColEndsAlmostFilled(object parserObject) + public void SepParserTest_ParseColEnds_Long_ColEndsAlmostFilled(object parserObject) { Contract.Assume(parserObject is not null); var parser = (ISepParser)parserObject; @@ -178,7 +199,7 @@ void AssertParserOutput(ISepParser parser, int charsStart, int charsEnd, Expecte _state._lineNumber = 3; foreach (var (expected, expectedNextStart, expectedRowLineEndingOffset, expectedLineNumber) in expectedSet) { - var rowLineEndingOffset = parser.Parse(_state); + var rowLineEndingOffset = parser.ParseColEnds(_state); AreEqual(expected, _colEnds, colEndsFrom, _state._colCount); Assert.AreEqual(expectedNextStart, _state._charsParseStart, nameof(_state._charsParseStart)); diff --git a/src/Sep.Test/SepReaderColTest.cs b/src/Sep.Test/SepReaderColTest.cs index f782ee5f..5f45a4cf 100644 --- a/src/Sep.Test/SepReaderColTest.cs +++ b/src/Sep.Test/SepReaderColTest.cs @@ -94,6 +94,49 @@ public void SepReaderColTest_TryParse_Out_String() } #endif + internal static IEnumerable UnescapeData => SepUnescapeTest.UnescapeData.Concat(new object[][] + { + new object[] { "a\"\"a", "a\"\"a" }, + new object[] { "a\"a\"a", "a\"a\"a" }, + new object[] { "·\"\"·", "·\"\"·" }, + new object[] { "·\"a\"·", "·\"a\"·" }, + new object[] { "·\"\"", "·\"\"" }, + new object[] { "·\"a\"", "·\"a\"" }, + new object[] { "a\"\"\"a", "a\"\"\"a" }, + }); + + [DataTestMethod] + [DynamicData(nameof(UnescapeData))] + public void SepReaderColTest_Unescape_Header_Test(string chars, string expected) + { + var src = new string(chars); + + using var reader = Sep.Reader(o => o with { HasHeader = true, Unescape = true }).FromText(src); + var actual = reader.Header.ColNames[0]; + + Assert.AreEqual(expected, actual, src); + } + + [DataTestMethod] + [DynamicData(nameof(UnescapeData))] + public void SepReaderColTest_Unescape_Col_Test(string chars, string expectedCol) + { + var src = new string(chars); + using var reader = Sep.Reader(o => o with { HasHeader = false, Unescape = true }).FromText(src); + // Ensure repeated access works + for (var i = 0; i < 4; i++) + { + var row = reader.Current; + + var actualCol = row[0].ToString(); + Assert.AreEqual(expectedCol, actualCol, src); + + // Ensure row can be gotten and that expectedCol is contained + var rowText = row.Span.ToString(); + Assert.IsTrue(rowText.Contains(expectedCol)); + } + } + static void Run(SepReader.ColAction action, string colValue = ColText, Func? configure = null) { Func defaultConfigure = static c => c; diff --git a/src/Sep.Test/SepReaderFuzzTest.cs b/src/Sep.Test/SepReaderFuzzTest.cs new file mode 100644 index 00000000..aa28c3bc --- /dev/null +++ b/src/Sep.Test/SepReaderFuzzTest.cs @@ -0,0 +1,197 @@ +using System; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace nietras.SeparatedValues.Test; + +[TestClass] +public class SepReaderFuzzTest +{ + static readonly char Separator = SepDefaults.Separator; + readonly Random _random = new(23768213); + + readonly record struct TestCol(string Raw, string Expected, string ExpectedInRow); + readonly record struct TestRow(string Raw, string Expected, TestCol[] Cols); + + [DataRow(false, 500, 20, false, 16)] + [DataRow(true, 500, 20, false, 16)] +#if !DEBUG + [DataRow(false, 500, 40, true, 16)] + [DataRow(true, 500, 40, true, 16)] + [DataRow(false, 5000, 40, false, 64)] + [DataRow(true, 5000, 40, false, 64)] +#endif + [DataTestMethod] + public void SepReaderFuzzTest_Fuzz(bool unescape, int rowCount, int maxColCount, bool colCountSame, int maxColLength) + { + var colCount = _random.Next(0, maxColCount); + var sbRowRaw = new StringBuilder(); + var sbRowExpected = new StringBuilder(); + var expectedRows = Enumerable.Range(0, rowCount).Select(_ => + GenerateRandomTestRow(_random, sbRowRaw, sbRowExpected, + colCount, colCountSame, maxColLength, unescape)) + .ToArray(); + var text = GetTestText(_random, expectedRows); + + using var reader = Sep + .Reader(o => o with { HasHeader = false, Unescape = unescape, DisableColCountCheck = !colCountSame }) + .FromText(text); + // Verify reader same as rows + var moveNext = false; + var rowIndex = 0; + while ((moveNext = reader.MoveNext() && rowIndex < expectedRows.Length)) + { + var expectedRow = expectedRows[rowIndex]; + var expectedCols = expectedRow.Cols; + var readRow = reader.Current; + Assert.AreEqual(expectedCols.Length, readRow.ColCount); + + var actualRowBeforeUnescape = readRow.Span.ToString(); + Assert.AreEqual(expectedRow.Raw, actualRowBeforeUnescape); + + for (var colIndex = 0; colIndex < expectedCols.Length; colIndex++) + { + var col = expectedCols[colIndex]; + var readerCol = readRow[colIndex]; + Assert.AreEqual(col.Expected, readerCol.ToString()); + } + + var actualRowAfterUnescape = readRow.Span.ToString(); + Assert.AreEqual(expectedRow.Expected, actualRowAfterUnescape); + + ++rowIndex; + } + Assert.AreEqual(!moveNext, rowIndex == expectedRows.Length, "MoveNext and rowIndex should match"); + } + + static string GetTestText(Random random, TestRow[] rows) + { + var sb = new StringBuilder(1024 * 1024); + var previousNewLine = ""; + foreach (var row in rows) + { + // Use indexing + var cols = row.Cols; + for (var colIndex = 0; colIndex < cols.Length; colIndex++) + { + sb.Append(cols[colIndex].Raw); + if (colIndex < cols.Length - 1) + { + sb.Append(Separator); + } + } + var newLine = RandomNewLine(random); + // Avoid a new line that does not end up actually being a new line + newLine = newLine == "\n" && previousNewLine == "\r" ? "\r\n" : newLine; + sb.Append(newLine); + previousNewLine = newLine; + } + var text = sb.ToString(); + return text; + } + + static TestRow GenerateRandomTestRow(Random random, StringBuilder sbRaw, StringBuilder sbExpected, + int colCount, bool colCountSame, int maxColLength, bool unescape) + { + sbRaw.Clear(); + sbExpected.Clear(); + // Always have 1 col even if empty + colCount = colCountSame ? colCount : random.Next(1, Math.Max(1, colCount) * 2); + var cols = new TestCol[colCount]; + for (var colIndex = 0; colIndex < colCount; colIndex++) + { + var col = GenerateRandomTestCol(random, maxColLength, unescape); + cols[colIndex] = col; + sbRaw.Append(col.Raw); + sbExpected.Append(col.ExpectedInRow); + Assert.AreEqual(col.Raw.Length, col.ExpectedInRow.Length); + if (colIndex != (colCount - 1)) + { + sbRaw.Append(Separator); + sbExpected.Append(Separator); + } + } + return new(sbRaw.ToString(), sbExpected.ToString(), cols); + } + + static TestCol GenerateRandomTestCol(Random random, int maxColLength, bool unescape) + { + var colLength = random.Next(0, maxColLength); + Span source = stackalloc char[colLength]; + Span unescaped = stackalloc char[colLength]; + Span unescapedInRow = stackalloc char[colLength]; + var unescapedLength = 0; + var quoteCount = 0; + var firstCharQuote = false; + for (var i = 0; i < colLength; i++) + { + var c = GenerateRandomChar(random, quoteCount); + var isQuote = c == SepDefaults.Quote; + // if last index and + if (i == (colLength - 1)) + { + // Quote count uneven, always use quote + if ((quoteCount & 1) == 1) + { + c = SepDefaults.Quote; + isQuote = true; + } + // Quote count even, make sure not quote at end + else if (isQuote) + { + c = 'a'; + isQuote = false; + } + } + firstCharQuote |= i == 0 && isQuote; + quoteCount += isQuote ? 1 : 0; + source[i] = c; + // Unescape that is skip char in unescaped if first char is a quote and + // if either first char or current char is a quote and quote count is even + var unescapeChar = unescape && firstCharQuote && (i == 0 || (isQuote && ((quoteCount & 1) == 0))); + if (!unescapeChar) + { + unescaped[unescapedLength] = c; + ++unescapedLength; + } + } + + source.CopyTo(unescapedInRow); + if (firstCharQuote && unescape && !(quoteCount == 2 && source[^1] == SepDefaults.Quote)) + { + // Use Unescape directly for how unescaped looks in row, actual + // unescaping is checked via manually unescaped + SepUnescape.UnescapeInPlace(ref MemoryMarshal.GetReference(unescapedInRow), unescapedInRow.Length); + } + var sourceString = new string(source); + Assert.IsTrue((source.ToArray().Count(c => c == '"') & 1) == 0); + return new(sourceString, new string(unescaped.Slice(0, unescapedLength)), new(unescapedInRow)); + } + + static char GenerateRandomChar(Random random, int quoteCount) + { + // Generate random specific chars based on hard-coded probabilities + var quoting = (quoteCount & 1) == 1; + var p = random.NextDouble(); + return (quoting, p) switch + { + (_, < 0.2) => SepDefaults.Quote, + (true, < 0.4) => Separator, + (true, < 0.5) => '\r', + (true, < 0.6) => '\n', +#if DEBUG + _ => 'a', +#else + // Be sure values larger than byte are correctly handled too (e.g. due to narrowing) + _ => (char)random.Next(Math.Max(Separator, SepDefaults.Quote) + 1, 256 * 2), +#endif + }; + } + + static string RandomNewLine(Random random) + { + return random.Next(0, 3) switch { 0 => "\r\n", 1 => "\n", 2 => "\r", _ => Environment.NewLine }; + } +} diff --git a/src/Sep.Test/SepReaderNoHeaderTest.cs b/src/Sep.Test/SepReaderNoHeaderTest.cs index 3581f8b5..9adc2a73 100644 --- a/src/Sep.Test/SepReaderNoHeaderTest.cs +++ b/src/Sep.Test/SepReaderNoHeaderTest.cs @@ -183,7 +183,7 @@ public void SepReaderNoHeaderTest_ExceedingColsInitialLength_WorksByDoublingCapa var initialColCountCapacity = SepReader.ColEndsInitialLength; var text = new string(';', initialColCountCapacity - 1); using var reader = Sep.Reader(o => o with { HasHeader = false }).FromText(text); - Assert.AreEqual(initialColCountCapacity * 2, reader._colEnds.Length); + Assert.AreEqual(initialColCountCapacity * 2, reader._colEndsOrColInfos.Length); Assert.IsTrue(reader.MoveNext()); var row = reader.Current; Assert.AreEqual(initialColCountCapacity, row.ColCount); diff --git a/src/Sep.Test/SepReaderRowTest.cs b/src/Sep.Test/SepReaderRowTest.cs index 17a4b3c7..73538ade 100644 --- a/src/Sep.Test/SepReaderRowTest.cs +++ b/src/Sep.Test/SepReaderRowTest.cs @@ -21,10 +21,6 @@ public class SepReaderRowTest static readonly string[] _colValues = new string[_cols] { _colValue0, _colValue1, _colValue2, _colValue3 }; static readonly string _headerText = string.Join(';', _colNames); static readonly string _rowText = string.Join(';', _colValues); - //static readonly string _text = $""" - // {_headerText} - // {_rowText} - // """; static readonly string _text = $"{_headerText}\r{_rowText}\r"; readonly SepReader _reader = Sep.Reader().FromText(_text); @@ -37,10 +33,12 @@ public SepReaderRowTest() _enumerator = enumerator; } - [TestMethod] - public void SepReaderRowTest_EmptyString_Properties() + [DataRow(false)] + [DataRow(true)] + [DataTestMethod] + public void SepReaderRowTest_EmptyString_Properties(bool unescape) { - using var reader = Sep.Reader().FromText(""); + using var reader = Sep.Reader(o => o with { Unescape = unescape }).FromText(""); using var enumerator = reader.GetEnumerator(); Assert.IsFalse(enumerator.MoveNext()); // enumerator.Current should not be called if MoveNext false, @@ -53,10 +51,12 @@ public void SepReaderRowTest_EmptyString_Properties() Assert.AreEqual(0, row.Span.Length); } - [TestMethod] - public void SepReaderRowTest_EmptyRow_Properties() + [DataRow(false)] + [DataRow(true)] + [DataTestMethod] + public void SepReaderRowTest_EmptyRow_Properties(bool unescape) { - using var reader = Sep.Reader().FromText("\n\n"); + using var reader = Sep.Reader(o => o with { Unescape = unescape }).FromText("\n\n"); using var enumerator = reader.GetEnumerator(); Assert.IsTrue(enumerator.MoveNext()); var row = enumerator.Current; @@ -67,10 +67,15 @@ public void SepReaderRowTest_EmptyRow_Properties() Assert.AreEqual(0, row.Span.Length); } - [TestMethod] - public void SepReaderRowTest_Row_Properties() + [DataRow(false)] + [DataRow(true)] + [DataTestMethod] + public void SepReaderRowTest_Row_Properties(bool unescape) { - var row = _enumerator.Current; + using var reader = Sep.Reader(o => o with { Unescape = unescape }).FromText(_text); + using var enumerator = reader.GetEnumerator(); + Assert.IsTrue(enumerator.MoveNext()); + var row = enumerator.Current; Assert.AreEqual(1, row.RowIndex); Assert.AreEqual(2, row.LineNumberFrom); Assert.AreEqual(3, row.LineNumberToExcl); diff --git a/src/Sep.Test/SepReaderTest.cs b/src/Sep.Test/SepReaderTest.cs index 6a5db301..dbb51e3a 100644 --- a/src/Sep.Test/SepReaderTest.cs +++ b/src/Sep.Test/SepReaderTest.cs @@ -458,11 +458,11 @@ public void SepReaderTest_ExceedingColsInitialLength_WorksByDoublingCapacity() var initialColCountCapacity = SepReader.ColEndsInitialLength; var text = "A" + Environment.NewLine + new string(';', initialColCountCapacity - 1); using var reader = Sep.Reader(o => o with { DisableColCountCheck = true }).FromText(text); - Assert.AreEqual(initialColCountCapacity, reader._colEnds.Length); + Assert.AreEqual(initialColCountCapacity, reader._colEndsOrColInfos.Length); Assert.IsTrue(reader.MoveNext()); var row = reader.Current; Assert.AreEqual(initialColCountCapacity, row.ColCount); - Assert.AreEqual(initialColCountCapacity * 2, reader._colEnds.Length); + Assert.AreEqual(initialColCountCapacity * 2, reader._colEndsOrColInfos.Length); } #if !DEBUG // Causes OOMs in Debug due to tracing diff --git a/src/Sep.Test/SepUnescapeTest.cs b/src/Sep.Test/SepUnescapeTest.cs new file mode 100644 index 00000000..a605aef1 --- /dev/null +++ b/src/Sep.Test/SepUnescapeTest.cs @@ -0,0 +1,47 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.Contracts; +using System.Runtime.InteropServices; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace nietras.SeparatedValues.Test; + +[TestClass] +public class SepUnescapeTest +{ + // Should always be an even count since all quotes have to be paired + // First char must always be quote since that is checked outside this scope + internal static IEnumerable UnescapeData => new object[][] + { + new object[] { "\"\"", "" }, + new object[] { "\"\"\"\"", "\"" }, + new object[] { "\"\"\"\"\"\"", "\"\"" }, + new object[] { "\"a\"", "a" }, + new object[] { "\"a\"\"a\"", "a\"a" }, + new object[] { "\"a\"\"a\"\"a\"", "a\"a\"a" }, + new object[] { "\"a\"a\"a\"", "aa\"a" }, + new object[] { "\"\" ", " " }, + new object[] { "\"a\" ", "a " }, + new object[] { "\"a\"\"\"a", "a\"a" }, + new object[] { "\"a\"\"\"a\"", "a\"a\"" }, + new object[] { "\"\"a\"", "a\"" }, + new object[] { "\"a\"a\"", "aa\"" }, + new object[] { "\"\"a\"a\"\"", "a\"a\"" }, + new object[] { "\"\"\"", "\"" }, + }; + + [DataTestMethod] + [DynamicData(nameof(UnescapeData))] + public void SepUnescapeTest_UnescapeInPlace(string chars, string expected) + { + Contract.Assume(chars != null); + var src = new string(chars); + + var unescapedLength = SepUnescape.UnescapeInPlace( + ref MemoryMarshal.GetReference(chars), + chars.Length); + + var actual = new string(chars.AsSpan(0, unescapedLength)); + Assert.AreEqual(expected, actual, src); + } +} diff --git a/src/Sep/Internals/ISepParser.cs b/src/Sep/Internals/ISepParser.cs index e97f1d67..ad63e339 100644 --- a/src/Sep/Internals/ISepParser.cs +++ b/src/Sep/Internals/ISepParser.cs @@ -3,5 +3,7 @@ interface ISepParser { int PaddingLength { get; } - int Parse(SepReaderState s); + int QuoteCount { get; } + int ParseColEnds(SepReaderState s); + int ParseColInfos(SepReaderState s); } diff --git a/src/Sep/Internals/SepArrayExtensions.cs b/src/Sep/Internals/SepArrayExtensions.cs index 0b743459..980040a9 100644 --- a/src/Sep/Internals/SepArrayExtensions.cs +++ b/src/Sep/Internals/SepArrayExtensions.cs @@ -91,9 +91,15 @@ internal static void CheckPaddingAndIsZero(this T[] array, int end, int minim [ExcludeFromCodeCoverage] internal static void CheckPadding(this T[] array, int end, int minimumPaddingLength) { - var paddingLength = (array.Length - end); - A.Assert(paddingLength >= minimumPaddingLength, - $"Padding length {paddingLength} less than minimum {minimumPaddingLength}"); + CheckPadding(array.Length, end, minimumPaddingLength); } + [Conditional("DEBUG")] + [ExcludeFromCodeCoverage] + internal static void CheckPadding(int length, int end, int minimumPaddingLength) + { + var paddingLength = (length - end); + A.Assert(paddingLength >= minimumPaddingLength, + $"Padding length {paddingLength} less than minimum {minimumPaddingLength}"); + } } diff --git a/src/Sep/Internals/SepColInfo.cs b/src/Sep/Internals/SepColInfo.cs new file mode 100644 index 00000000..69c68d95 --- /dev/null +++ b/src/Sep/Internals/SepColInfo.cs @@ -0,0 +1,24 @@ +namespace nietras.SeparatedValues; + +record struct SepColInfo(int ColEnd, int QuoteCount) +{ + public override string ToString() => $"({ColEnd}, {QuoteCount})"; +} + +interface ISepColInfoMethods +{ + static abstract TColInfo Create(int colEnd, int quoteCount); + static abstract int GetColEnd(TColInfo colInfo); +} + +abstract class SepColInfoMethods : ISepColInfoMethods +{ + public static SepColInfo Create(int colEnd, int quoteCount) => new(colEnd, quoteCount); + public static int GetColEnd(SepColInfo colInfo) => colInfo.ColEnd; +} + +abstract class SepColEndMethods : ISepColInfoMethods +{ + public static int Create(int colEnd, int quoteCount) => colEnd; + public static int GetColEnd(int colEnd) => colEnd; +} diff --git a/src/Sep/Internals/SepParseMask.cs b/src/Sep/Internals/SepParseMask.cs index 7d0875a9..090fccfe 100644 --- a/src/Sep/Internals/SepParseMask.cs +++ b/src/Sep/Internals/SepParseMask.cs @@ -5,77 +5,80 @@ namespace nietras.SeparatedValues; -static class SepParseMask +static partial class SepParseMask { [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static ref int ParseSeparatorsMask(nuint mask, int charsIndex, ref int colEndsRef) - { - do - { - var relativeIndex = BitOperations.TrailingZeroCount(mask); - mask &= (mask - 1); - // Pre-increment colEndsRef since [0] reserved for row start - colEndsRef = ref Add(ref colEndsRef, 1); - colEndsRef = charsIndex + relativeIndex; - } - while (mask != 0); - return ref colEndsRef; - } + => ref ParseSeparatorsMask( + mask, charsIndex, ref colEndsRef); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static ref int ParseAnyCharsMask(nuint mask, char separator, + scoped ref char charsRef, int charsIndex, + scoped ref int rowLineEndingOffset, scoped ref nuint quoteCount, + ref int colEndsRef, scoped ref int lineNumber) + => ref ParseAnyCharsMask( + mask, separator, ref charsRef, charsIndex, + ref rowLineEndingOffset, ref quoteCount, + ref colEndsRef, ref lineNumber); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static ref int ParseSeparatorsLineEndingsMasks(nuint separatorsMask, nuint separatorsLineEndingsMask, + scoped ref char charsRef, scoped ref int charsIndex, char separator, + ref int colEndsRefCurrent, scoped ref int rowLineEndingOffset, scoped ref int lineNumber) + => ref ParseSeparatorsLineEndingsMasks( + separatorsMask, separatorsLineEndingsMask, ref charsRef, ref charsIndex, separator, + ref colEndsRefCurrent, ref rowLineEndingOffset, ref lineNumber); + - // Not faster [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static ref int ParseSeparatorsMaskLong(nuint mask, int charsIndex, ref int positionsRefCurrent) + internal static ref TColInfo ParseSeparatorsMask( + nuint mask, int charsIndex, ref TColInfo colInfosRef) + where TColInfoMethods : ISepColInfoMethods { - var count = BitOperations.PopCount(mask); - ref var positionsRefCurrentEnd = ref Add(ref positionsRefCurrent, count); - var charsIndexLong = (long)((ulong)charsIndex + ((ulong)charsIndex << 32)); do { - // Pre-increment colEndsRef since [0] reserved for row start - positionsRefCurrent = ref Add(ref positionsRefCurrent, 1); - - long p0 = BitOperations.TrailingZeroCount(mask); - mask &= (mask - 1); - long p1 = BitOperations.TrailingZeroCount(mask); + var relativeIndex = BitOperations.TrailingZeroCount(mask); mask &= (mask - 1); - // Assume endianness - var packed = (p0 | (p1 << 32)) + charsIndexLong; - As(ref positionsRefCurrent) = packed; - - positionsRefCurrent = ref Add(ref positionsRefCurrent, 1); + // Pre-increment colInfosRef since [0] reserved for row start + colInfosRef = ref Add(ref colInfosRef, 1); + colInfosRef = TColInfoMethods.Create(charsIndex + relativeIndex, 0); } while (mask != 0); - return ref positionsRefCurrentEnd; + return ref colInfosRef; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static ref int ParseAnyCharsMask(nuint mask, char separator, + internal static ref TColInfo ParseAnyCharsMask( + nuint mask, char separator, scoped ref char charsRef, int charsIndex, - scoped ref int rowLineEndingOffset, scoped ref nuint quoting, - ref int colEndsRef, scoped ref int lineNumber) + scoped ref int rowLineEndingOffset, scoped ref nuint quoteCount, + ref TColInfo colInfosRef, scoped ref int lineNumber) + where TColInfoMethods : ISepColInfoMethods { do { var relativeIndex = BitOperations.TrailingZeroCount(mask); mask &= (mask - 1); - colEndsRef = ref ParseAnyChar(ref charsRef, + colInfosRef = ref ParseAnyChar(ref charsRef, charsIndex, relativeIndex, separator, - ref rowLineEndingOffset, ref quoting, - ref colEndsRef, ref lineNumber); + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRef, ref lineNumber); } while (mask != 0 && (rowLineEndingOffset == 0)); - return ref colEndsRef; + return ref colInfosRef; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static ref int ParseAnyChar( + internal static ref TColInfo ParseAnyChar( scoped ref char charsRef, int charsIndex, int relativeIndex, char separator, - scoped ref int rowLineEndingOffset, scoped ref nuint quoting, - ref int colEndsRef, scoped ref int lineNumber) + scoped ref int rowLineEndingOffset, scoped ref nuint quoteCount, + ref TColInfo colInfosRef, scoped ref int lineNumber) + where TColInfoMethods : ISepColInfoMethods { var c = Add(ref charsRef, relativeIndex); - if (quoting != 0) + if ((quoteCount & 1) != 0) { if (c == CarriageReturn) { @@ -107,30 +110,32 @@ internal static ref int ParseAnyChar( if (c == LineFeed) { goto NEWLINE; } if (c == Quote) { - // Flip quoting flag - quoting ^= 1; + ++quoteCount; goto RETURN; } NEWLINE: ++lineNumber; ++rowLineEndingOffset; ADDCOLEND: - // Pre-increment colEndsRef since [0] reserved for row start - colEndsRef = ref Add(ref colEndsRef, 1); - colEndsRef = charsIndex + relativeIndex; + // Pre-increment colInfosRef since [0] reserved for row start + colInfosRef = ref Add(ref colInfosRef, 1); + colInfosRef = TColInfoMethods.Create(charsIndex + relativeIndex, (int)quoteCount); + quoteCount = 0; RETURN: - return ref colEndsRef; + return ref colInfosRef; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static ref int ParseSeparatorsLineEndingsMasks(nuint separatorsMask, nuint separatorsLineEndingsMask, + internal static ref TColInfo ParseSeparatorsLineEndingsMasks( + nuint separatorsMask, nuint separatorsLineEndingsMask, scoped ref char charsRef, scoped ref int charsIndex, char separator, - ref int colEndsRefCurrent, scoped ref int rowLineEndingOffset, scoped ref int lineNumber) + ref TColInfo colInfosRefCurrent, scoped ref int rowLineEndingOffset, scoped ref int lineNumber) + where TColInfoMethods : ISepColInfoMethods { if (separatorsMask == 0) { - colEndsRefCurrent = ref ParseLineEndingMask(separatorsLineEndingsMask, - ref charsRef, ref charsIndex, ref colEndsRefCurrent, + colInfosRefCurrent = ref ParseLineEndingMask( + separatorsLineEndingsMask, ref charsRef, ref charsIndex, ref colInfosRefCurrent, ref rowLineEndingOffset, ref lineNumber); charsIndex += rowLineEndingOffset; } @@ -140,15 +145,15 @@ internal static ref int ParseSeparatorsLineEndingsMasks(nuint separatorsMask, nu var lineEndingIndex = BitOperations.TrailingZeroCount(endingsMask); if (((SizeOf() * 8 - 1) - BitOperations.LeadingZeroCount(separatorsMask)) < lineEndingIndex) { - colEndsRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, - ref colEndsRefCurrent); + colInfosRefCurrent = ref ParseSeparatorsMask( + separatorsMask, charsIndex, ref colInfosRefCurrent); var c = Add(ref charsRef, lineEndingIndex); ++rowLineEndingOffset; - // Pre-increment colEndsRef since [0] reserved for row start - colEndsRefCurrent = ref Add(ref colEndsRefCurrent, 1); + // Pre-increment colInfosRef since [0] reserved for row start + colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); charsIndex += lineEndingIndex; - colEndsRefCurrent = charsIndex; + colInfosRefCurrent = TColInfoMethods.Create(charsIndex, 0); if (c == CarriageReturn) { // If \r=CR, we should always be able to look 1 ahead, and if char not valid should not be \n=LF @@ -161,29 +166,31 @@ internal static ref int ParseSeparatorsLineEndingsMasks(nuint separatorsMask, nu else { // Used both to indicate row ended and if need to step +2 due to '\r\n' - colEndsRefCurrent = ref ParseSeparatorsLineEndingsMask(separatorsLineEndingsMask, - separator, ref charsRef, charsIndex, ref rowLineEndingOffset, - ref colEndsRefCurrent, ref lineNumber); + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMask( + separatorsLineEndingsMask, separator, ref charsRef, charsIndex, ref rowLineEndingOffset, + ref colInfosRefCurrent, ref lineNumber); // We know line has ended and RowEnded set so no need to check // Must be a col end and last is then dataIndex, +1 to start at next - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; } } - return ref colEndsRefCurrent; + return ref colInfosRefCurrent; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static ref int ParseLineEndingMask(nuint lineEndingsMask, + internal static ref TColInfo ParseLineEndingMask( + nuint lineEndingsMask, scoped ref char charsRef, scoped ref int charsIndex, - ref int colEndsRefCurrent, scoped ref int rowLineEndingOffset, scoped ref int lineNumber) + ref TColInfo colInfosRefCurrent, scoped ref int rowLineEndingOffset, scoped ref int lineNumber) + where TColInfoMethods : ISepColInfoMethods { var lineEndingIndex = BitOperations.TrailingZeroCount(lineEndingsMask); var c = Add(ref charsRef, lineEndingIndex); ++rowLineEndingOffset; - // Pre-increment colEndsRef since [0] reserved for row start - colEndsRefCurrent = ref Add(ref colEndsRefCurrent, 1); + // Pre-increment colInfosRef since [0] reserved for row start + colInfosRefCurrent = ref Add(ref colInfosRefCurrent, 1); charsIndex += lineEndingIndex; - colEndsRefCurrent = charsIndex; + colInfosRefCurrent = TColInfoMethods.Create(charsIndex, 0); if (c == CarriageReturn) { // If \r=CR, we should always be able to look 1 ahead, and if char not valid should not be \n=LF @@ -191,33 +198,36 @@ internal static ref int ParseLineEndingMask(nuint lineEndingsMask, if (oneCharAhead == LineFeed) { ++rowLineEndingOffset; } } ++lineNumber; - return ref colEndsRefCurrent; + return ref colInfosRefCurrent; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref int ParseSeparatorsLineEndingsMask(nuint mask, char separator, + static ref TColInfo ParseSeparatorsLineEndingsMask( + nuint mask, char separator, scoped ref char charsRef, int charsIndex, scoped ref int rowLineEndingOffset, - ref int colEndsRef, scoped ref int lineNumber) + ref TColInfo colInfosRef, scoped ref int lineNumber) + where TColInfoMethods : ISepColInfoMethods { do { var relativeIndex = BitOperations.TrailingZeroCount(mask); mask &= (mask - 1); - colEndsRef = ref ParseSeparatorLineEndingChar(ref charsRef, - charsIndex, relativeIndex, separator, - ref rowLineEndingOffset, ref colEndsRef, ref lineNumber); + colInfosRef = ref ParseSeparatorLineEndingChar( + ref charsRef, charsIndex, relativeIndex, separator, + ref rowLineEndingOffset, ref colInfosRef, ref lineNumber); } while (mask != 0 && (rowLineEndingOffset == 0)); - return ref colEndsRef; + return ref colInfosRef; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static ref int ParseSeparatorLineEndingChar( + static ref TColInfo ParseSeparatorLineEndingChar( scoped ref char charsRef, int charsIndex, int relativeIndex, char separator, scoped ref int rowLineEndingOffset, - ref int colEndsRef, scoped ref int lineNumber) + ref TColInfo colInfosRef, scoped ref int lineNumber) + where TColInfoMethods : ISepColInfoMethods { var c = Add(ref charsRef, relativeIndex); if (c == separator) goto ADDCOLEND; @@ -236,9 +246,9 @@ static ref int ParseSeparatorLineEndingChar( ++lineNumber; ++rowLineEndingOffset; ADDCOLEND: - // Pre-increment colEndsRef since [0] reserved for row start - colEndsRef = ref Add(ref colEndsRef, 1); - colEndsRef = charsIndex + relativeIndex; - return ref colEndsRef; + // Pre-increment colInfosRef since [0] reserved for row start + colInfosRef = ref Add(ref colInfosRef, 1); + colInfosRef = TColInfoMethods.Create(charsIndex + relativeIndex, 0); + return ref colInfosRef; } } diff --git a/src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs b/src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs index e11eed2f..ef45c28f 100644 --- a/src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs +++ b/src/Sep/Internals/SepParserAvx2PackCmpOrMoveMaskTzcnt.cs @@ -19,7 +19,7 @@ sealed class SepParserAvx2PackCmpOrMoveMaskTzcnt : ISepParser readonly VecUI8 _crs = Vec.Create(CarriageReturnByte); readonly VecUI8 _qts = Vec.Create(QuoteByte); readonly VecUI8 _sps; - internal nuint _quoting = 0; + nuint _quoteCount = 0; public unsafe SepParserAvx2PackCmpOrMoveMaskTzcnt(Sep sep) { @@ -29,36 +29,54 @@ public unsafe SepParserAvx2PackCmpOrMoveMaskTzcnt(Sep sep) // Parses 2 x char vectors e.g. 1 byte vector public int PaddingLength => VecUI8.Count; + public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public int Parse(SepReaderState s) + public int ParseColEnds(SepReaderState s) + { + return Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public int ParseColInfos(SepReaderState s) + { + return Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods { // Method should **not** call other non-inlined methods, since this // impacts code-generation severely. var separator = (char)_separator; + var quoteCount = _quoteCount; - var quoting = _quoting; var chars = s._chars; var charsIndex = s._charsParseStart; var charsEnd = s._charsDataEnd; - var colEnds = s._colEnds; - var colEndsEnd = s._colCount; + var colInfos = s._colEndsOrColInfos; + var colCount = s._colCount; var lineNumber = s._lineNumber; var rowLineEndingOffset = 0; + var colInfosLength = colInfos.Length / (SizeOf() / SizeOf()); + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); - colEnds.CheckPadding(colEndsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, colCount, PaddingLength); A.Assert(charsIndex <= charsEnd); A.Assert(charsEnd <= (chars.Length - PaddingLength)); ref var charsRef = ref Add(ref MemoryMarshal.GetArrayDataReference(chars), charsIndex); - ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(colEnds); - ref var colEndsRefCurrent = ref Add(ref colEndsRef, colEndsEnd); - ref var colEndsRefStop = ref Add(ref colEndsRef, colEnds.Length - VecUI8.Count); + ref var colInfosRef = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRefCurrent = ref Add(ref colInfosRef, colCount); + ref var colInfosRefStop = ref Add(ref colInfosRef, colInfosLength - VecUI8.Count); // Use instance fields to force values into registers var nls = _nls; //Vec.Create(LineFeedByte); @@ -92,41 +110,41 @@ public int Parse(SepReaderState s) var separatorsMask = (uint)ISA.MoveMask(spsEq); // Optimize for case of only separators i.e. no endings or quotes. // Add quoting flags to mask as hack to skip if quoting. - var testMask = specialCharMask + quoting; + var testMask = specialCharMask + quoteCount; if (separatorsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, - ref colEndsRefCurrent); + colInfosRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, + ref colInfosRefCurrent); } else { var separatorLineEndingsMask = (uint)ISA.MoveMask(lineEndingsSeparators); if (separatorLineEndingsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsLineEndingsMasks( + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks( separatorsMask, separatorLineEndingsMask, ref charsRef, ref charsIndex, separator, - ref colEndsRefCurrent, ref rowLineEndingOffset, ref lineNumber); + ref colInfosRefCurrent, ref rowLineEndingOffset, ref lineNumber); break; } else { - colEndsRefCurrent = ref ParseAnyCharsMask(specialCharMask, + colInfosRefCurrent = ref ParseAnyCharsMask(specialCharMask, separator, ref charsRef, charsIndex, - ref rowLineEndingOffset, ref quoting, - ref colEndsRefCurrent, ref lineNumber); + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRefCurrent, ref lineNumber); // Used both to indicate row ended and if need to step +2 due to '\r\n' if (rowLineEndingOffset != 0) { // Must be a col end and last is then dataIndex - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; break; } } } // If current is greater than or equal than "stop", then break. // There is no longer guaranteed space enough for next VecUI8.Count. - if (IsAddressLessThan(ref colEndsRefStop, ref colEndsRefCurrent)) + if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { // Move data index so next find starts correctly charsIndex += VecUI8.Count; @@ -136,12 +154,12 @@ public int Parse(SepReaderState s) } // ">> 2" instead of "/ sizeof(int))" // CQ: Weird with div sizeof - colEndsEnd = (int)(ByteOffset(ref colEndsRef, ref colEndsRefCurrent) >> 2); + colCount = (int)(ByteOffset(ref colInfosRef, ref colInfosRefCurrent) / SizeOf()); // Step is VecUI8.Count so may go past end, ensure limited charsIndex = Math.Min(charsEnd, charsIndex); - _quoting = quoting; - s._colCount = colEndsEnd; + _quoteCount = quoteCount; + s._colCount = colCount; s._lineNumber = lineNumber; s._charsParseStart = charsIndex; diff --git a/src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs b/src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs index d6e1a798..dfb5178a 100644 --- a/src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs +++ b/src/Sep/Internals/SepParserAvx512PackCmpOrMoveMaskTzcnt.cs @@ -1,5 +1,6 @@ #if NET8_0_OR_GREATER using System; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; @@ -13,6 +14,7 @@ namespace nietras.SeparatedValues; +[ExcludeFromCodeCoverage] sealed class SepParserAvx512PackCmpOrMoveMaskTzcnt : ISepParser { readonly byte _separator; @@ -20,7 +22,7 @@ sealed class SepParserAvx512PackCmpOrMoveMaskTzcnt : ISepParser readonly VecUI8 _crs = Vec.Create(CarriageReturnByte); readonly VecUI8 _qts = Vec.Create(QuoteByte); readonly VecUI8 _sps; - internal nuint _quoting = 0; + nuint _quoteCount = 0; public unsafe SepParserAvx512PackCmpOrMoveMaskTzcnt(Sep sep) { @@ -31,36 +33,54 @@ public unsafe SepParserAvx512PackCmpOrMoveMaskTzcnt(Sep sep) // Parses 2 x char vectors e.g. 1 byte vector public int PaddingLength => VecUI8.Count; + public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public int Parse(SepReaderState s) + public int ParseColEnds(SepReaderState s) + { + return Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public int ParseColInfos(SepReaderState s) + { + return Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods { // Method should **not** call other non-inlined methods, since this // impacts code-generation severely. var separator = (char)_separator; + var quoteCount = _quoteCount; - var quoting = _quoting; var chars = s._chars; var charsIndex = s._charsParseStart; var charsEnd = s._charsDataEnd; - var colEnds = s._colEnds; - var colEndsEnd = s._colCount; + var colInfos = s._colEndsOrColInfos; + var colCount = s._colCount; var lineNumber = s._lineNumber; var rowLineEndingOffset = 0; + var colInfosLength = colInfos.Length / (SizeOf() / SizeOf()); + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); - colEnds.CheckPadding(colEndsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, colCount, PaddingLength); A.Assert(charsIndex <= charsEnd); A.Assert(charsEnd <= (chars.Length - PaddingLength)); ref var charsRef = ref Add(ref MemoryMarshal.GetArrayDataReference(chars), charsIndex); - ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(colEnds); - ref var colEndsRefCurrent = ref Add(ref colEndsRef, colEndsEnd); - ref var colEndsRefStop = ref Add(ref colEndsRef, colEnds.Length - VecUI8.Count); + ref var colInfosRef = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRefCurrent = ref Add(ref colInfosRef, colCount); + ref var colInfosRefStop = ref Add(ref colInfosRef, colInfosLength - VecUI8.Count); // Use instance fields to force values into registers var nls = _nls; //Vec.Create(LineFeedByte); @@ -95,41 +115,41 @@ public int Parse(SepReaderState s) var separatorsMask = (nuint)Vec.ExtractMostSignificantBits(spsEq); // Optimize for case of only separators i.e. no endings or quotes. // Add quoting flags to mask as hack to skip if quoting. - var testMask = specialCharMask + quoting; + var testMask = specialCharMask + quoteCount; if (separatorsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, - ref colEndsRefCurrent); + colInfosRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, + ref colInfosRefCurrent); } else { var separatorLineEndingsMask = (nuint)Vec.ExtractMostSignificantBits(lineEndingsSeparators); if (separatorLineEndingsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsLineEndingsMasks( + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks( separatorsMask, separatorLineEndingsMask, ref charsRef, ref charsIndex, separator, - ref colEndsRefCurrent, ref rowLineEndingOffset, ref lineNumber); + ref colInfosRefCurrent, ref rowLineEndingOffset, ref lineNumber); break; } else { - colEndsRefCurrent = ref ParseAnyCharsMask(specialCharMask, + colInfosRefCurrent = ref ParseAnyCharsMask(specialCharMask, separator, ref charsRef, charsIndex, - ref rowLineEndingOffset, ref quoting, - ref colEndsRefCurrent, ref lineNumber); + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRefCurrent, ref lineNumber); // Used both to indicate row ended and if need to step +2 due to '\r\n' if (rowLineEndingOffset != 0) { // Must be a col end and last is then dataIndex - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; break; } } } // If current is greater than or equal than "stop", then break. // There is no longer guaranteed space enough for next VecUI8.Count. - if (IsAddressLessThan(ref colEndsRefStop, ref colEndsRefCurrent)) + if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { // Move data index so next find starts correctly charsIndex += VecUI8.Count; @@ -139,12 +159,12 @@ public int Parse(SepReaderState s) } // ">> 2" instead of "/ sizeof(int))" // CQ: Weird with div sizeof - colEndsEnd = (int)(ByteOffset(ref colEndsRef, ref colEndsRefCurrent) >> 2); + colCount = (int)(ByteOffset(ref colInfosRef, ref colInfosRefCurrent) / SizeOf()); // Step is VecUI8.Count so may go past end, ensure limited charsIndex = Math.Min(charsEnd, charsIndex); - _quoting = quoting; - s._colCount = colEndsEnd; + _quoteCount = quoteCount; + s._colCount = colCount; s._lineNumber = lineNumber; s._charsParseStart = charsIndex; diff --git a/src/Sep/Internals/SepParserIndexOfAny.cs b/src/Sep/Internals/SepParserIndexOfAny.cs index a598c117..8ea41d5a 100644 --- a/src/Sep/Internals/SepParserIndexOfAny.cs +++ b/src/Sep/Internals/SepParserIndexOfAny.cs @@ -10,7 +10,7 @@ sealed class SepParserIndexOfAny : ISepParser { readonly char _separator; readonly char[] _specialChars; - nuint _quoting = 0; + nuint _quoteCount = 0; public unsafe SepParserIndexOfAny(Sep sep) { @@ -19,33 +19,54 @@ public unsafe SepParserIndexOfAny(Sep sep) } public int PaddingLength => 0; + public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public int Parse(SepReaderState s) + public int ParseColEnds(SepReaderState s) { - var separator = _separator; + return Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public int ParseColInfos(SepReaderState s) + { + return Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods + { + // Method should **not** call other non-inlined methods, since this + // impacts code-generation severely. + + var separator = (char)_separator; + var quoteCount = _quoteCount; - var quoting = _quoting; var chars = s._chars; var charsIndex = s._charsParseStart; var charsEnd = s._charsDataEnd; - var colEnds = s._colEnds; - var colEndsEnd = s._colCount; + var colInfos = s._colEndsOrColInfos; + var colCount = s._colCount; var lineNumber = s._lineNumber; var rowLineEndingOffset = 0; + var colInfosLength = colInfos.Length / (SizeOf() / SizeOf()); + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); - colEnds.CheckPadding(colEndsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, colCount, PaddingLength); A.Assert(charsIndex <= charsEnd); A.Assert(charsEnd <= (chars.Length - PaddingLength)); ref var charsRef = ref MemoryMarshal.GetArrayDataReference(chars); - ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(colEnds); - ref var colEndsRefCurrent = ref Add(ref colEndsRef, colEndsEnd); - ref var colEndsRefStop = ref Add(ref colEndsRef, colEnds.Length - 2); + ref var colInfosRef = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRefCurrent = ref Add(ref colInfosRef, colCount); + ref var colInfosRefStop = ref Add(ref colInfosRef, colInfosLength - 2); var span = chars.AsSpan(0, charsEnd); var specialCharsSpan = _specialChars.AsSpan(); @@ -58,21 +79,21 @@ public int Parse(SepReaderState s) A.Assert(charsIndex < charsEnd, $"{nameof(charsIndex)} >= {nameof(charsEnd)}"); ref var charsCurrentRef = ref Add(ref charsRef, charsIndex); - colEndsRefCurrent = ref SepParseMask.ParseAnyChar(ref charsCurrentRef, charsIndex, relativeIndex, - separator, ref rowLineEndingOffset, ref quoting, ref colEndsRefCurrent, ref lineNumber); + colInfosRefCurrent = ref SepParseMask.ParseAnyChar(ref charsCurrentRef, charsIndex, relativeIndex, + separator, ref rowLineEndingOffset, ref quoteCount, ref colInfosRefCurrent, ref lineNumber); charsIndex += relativeIndex + 1; // Used both to indicate row ended and if need to step +2 due to '\r\n' if (rowLineEndingOffset != 0) { // Must be a col end and last is then dataIndex - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; break; } // If current is greater than or equal than "stop", then break. // There is no longer guaranteed space enough for next. - if (IsAddressLessThan(ref colEndsRefStop, ref colEndsRefCurrent)) + if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { break; } @@ -84,12 +105,12 @@ public int Parse(SepReaderState s) } // ">> 2" instead of "/ sizeof(int))" // CQ: Weird with div sizeof - colEndsEnd = (int)(ByteOffset(ref colEndsRef, ref colEndsRefCurrent) >> 2); + colCount = (int)(ByteOffset(ref colInfosRef, ref colInfosRefCurrent) / SizeOf()); // Step is VecUI8.Count so may go past end, ensure limited charsIndex = Math.Min(charsEnd, charsIndex); - _quoting = quoting; - s._colCount = colEndsEnd; + _quoteCount = quoteCount; + s._colCount = colCount; s._lineNumber = lineNumber; s._charsParseStart = charsIndex; diff --git a/src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs b/src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs index 3625b758..a824b883 100644 --- a/src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs +++ b/src/Sep/Internals/SepParserSse2PackCmpOrMoveMaskTzcnt.cs @@ -18,7 +18,7 @@ sealed class SepParserSse2PackCmpOrMoveMaskTzcnt : ISepParser readonly VecUI8 _crs = Vec.Create(CarriageReturnByte); readonly VecUI8 _qts = Vec.Create(QuoteByte); readonly VecUI8 _sps; - internal nuint _quoting = 0; + nuint _quoteCount = 0; public unsafe SepParserSse2PackCmpOrMoveMaskTzcnt(Sep sep) { @@ -28,36 +28,54 @@ public unsafe SepParserSse2PackCmpOrMoveMaskTzcnt(Sep sep) // Parses 2 x char vectors e.g. 1 byte vector public int PaddingLength => VecUI8.Count; + public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public int Parse(SepReaderState s) + public int ParseColEnds(SepReaderState s) + { + return Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public int ParseColInfos(SepReaderState s) + { + return Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods { // Method should **not** call other non-inlined methods, since this // impacts code-generation severely. var separator = (char)_separator; + var quoteCount = _quoteCount; - var quoting = _quoting; var chars = s._chars; var charsIndex = s._charsParseStart; var charsEnd = s._charsDataEnd; - var colEnds = s._colEnds; - var colEndsEnd = s._colCount; + var colInfos = s._colEndsOrColInfos; + var colCount = s._colCount; var lineNumber = s._lineNumber; var rowLineEndingOffset = 0; + var colInfosLength = colInfos.Length / (SizeOf() / SizeOf()); + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); - colEnds.CheckPadding(colEndsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, colCount, PaddingLength); A.Assert(charsIndex <= charsEnd); A.Assert(charsEnd <= (chars.Length - PaddingLength)); ref var charsRef = ref Add(ref MemoryMarshal.GetArrayDataReference(chars), charsIndex); - ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(colEnds); - ref var colEndsRefCurrent = ref Add(ref colEndsRef, colEndsEnd); - ref var colEndsRefStop = ref Add(ref colEndsRef, colEnds.Length - VecUI8.Count); + ref var colInfosRef = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRefCurrent = ref Add(ref colInfosRef, colCount); + ref var colInfosRefStop = ref Add(ref colInfosRef, colInfosLength - VecUI8.Count); // Use instance fields to force values into registers var nls = _nls; //Vec.Create(LineFeedByte); @@ -89,41 +107,41 @@ public int Parse(SepReaderState s) var separatorsMask = (uint)ISA.MoveMask(spsEq); // Optimize for case of only separators i.e. no endings or quotes. // Add quoting flags to mask as hack to skip if quoting. - var testMask = specialCharMask + quoting; + var testMask = specialCharMask + quoteCount; if (separatorsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, - ref colEndsRefCurrent); + colInfosRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, + ref colInfosRefCurrent); } else { var separatorLineEndingsMask = (uint)ISA.MoveMask(lineEndingsSeparators); if (separatorLineEndingsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsLineEndingsMasks( + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks( separatorsMask, separatorLineEndingsMask, ref charsRef, ref charsIndex, separator, - ref colEndsRefCurrent, ref rowLineEndingOffset, ref lineNumber); + ref colInfosRefCurrent, ref rowLineEndingOffset, ref lineNumber); break; } else { - colEndsRefCurrent = ref ParseAnyCharsMask(specialCharMask, + colInfosRefCurrent = ref ParseAnyCharsMask(specialCharMask, separator, ref charsRef, charsIndex, - ref rowLineEndingOffset, ref quoting, - ref colEndsRefCurrent, ref lineNumber); + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRefCurrent, ref lineNumber); // Used both to indicate row ended and if need to step +2 due to '\r\n' if (rowLineEndingOffset != 0) { // Must be a col end and last is then dataIndex - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; break; } } } // If current is greater than or equal than "stop", then break. // There is no longer guaranteed space enough for next VecUI8.Count. - if (IsAddressLessThan(ref colEndsRefStop, ref colEndsRefCurrent)) + if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { // Move data index so next find starts correctly charsIndex += VecUI8.Count; @@ -133,12 +151,12 @@ public int Parse(SepReaderState s) } // ">> 2" instead of "/ sizeof(int))" // CQ: Weird with div sizeof - colEndsEnd = (int)(ByteOffset(ref colEndsRef, ref colEndsRefCurrent) >> 2); + colCount = (int)(ByteOffset(ref colInfosRef, ref colInfosRefCurrent) / SizeOf()); // Step is VecUI8.Count so may go past end, ensure limited charsIndex = Math.Min(charsEnd, charsIndex); - _quoting = quoting; - s._colCount = colEndsEnd; + _quoteCount = quoteCount; + s._colCount = colCount; s._lineNumber = lineNumber; s._charsParseStart = charsIndex; diff --git a/src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs b/src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs index e402b13f..ebd532b0 100644 --- a/src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs +++ b/src/Sep/Internals/SepParserVector128NrwCmpExtMsbTzcnt.cs @@ -19,7 +19,7 @@ sealed class SepParserVector128NrwCmpExtMsbTzcnt : ISepParser readonly VecUI8 _crs = Vec.Create(CarriageReturnByte); readonly VecUI8 _qts = Vec.Create(QuoteByte); readonly VecUI8 _sps; - internal nuint _quoting = 0; + nuint _quoteCount = 0; public unsafe SepParserVector128NrwCmpExtMsbTzcnt(Sep sep) { @@ -29,36 +29,54 @@ public unsafe SepParserVector128NrwCmpExtMsbTzcnt(Sep sep) // Parses 2 x char vectors e.g. 1 byte vector public int PaddingLength => VecUI8.Count; + public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public int Parse(SepReaderState s) + public int ParseColEnds(SepReaderState s) + { + return Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public int ParseColInfos(SepReaderState s) + { + return Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods { // Method should **not** call other non-inlined methods, since this // impacts code-generation severely. var separator = (char)_separator; + var quoteCount = _quoteCount; - var quoting = _quoting; var chars = s._chars; var charsIndex = s._charsParseStart; var charsEnd = s._charsDataEnd; - var colEnds = s._colEnds; - var colEndsEnd = s._colCount; + var colInfos = s._colEndsOrColInfos; + var colCount = s._colCount; var lineNumber = s._lineNumber; var rowLineEndingOffset = 0; + var colInfosLength = colInfos.Length / (SizeOf() / SizeOf()); + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); - colEnds.CheckPadding(colEndsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, colCount, PaddingLength); A.Assert(charsIndex <= charsEnd); A.Assert(charsEnd <= (chars.Length - PaddingLength)); ref var charsRef = ref Add(ref MemoryMarshal.GetArrayDataReference(chars), charsIndex); - ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(colEnds); - ref var colEndsRefCurrent = ref Add(ref colEndsRef, colEndsEnd); - ref var colEndsRefStop = ref Add(ref colEndsRef, colEnds.Length - VecUI8.Count); + ref var colInfosRef = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRefCurrent = ref Add(ref colInfosRef, colCount); + ref var colInfosRefStop = ref Add(ref colInfosRef, colInfosLength - VecUI8.Count); // Use instance fields to force values into registers var max = _max; @@ -93,41 +111,41 @@ public int Parse(SepReaderState s) var separatorsMask = spsEq.ExtractMostSignificantBits(); // Optimize for case of only separators i.e. no endings or quotes. // Add quoting flags to mask as hack to skip if quoting. - var testMask = specialCharMask + quoting; + var testMask = specialCharMask + quoteCount; if (separatorsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, - ref colEndsRefCurrent); + colInfosRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, + ref colInfosRefCurrent); } else { var separatorLineEndingsMask = lineEndingsSeparators.ExtractMostSignificantBits(); if (separatorLineEndingsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsLineEndingsMasks( + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks( separatorsMask, separatorLineEndingsMask, ref charsRef, ref charsIndex, separator, - ref colEndsRefCurrent, ref rowLineEndingOffset, ref lineNumber); + ref colInfosRefCurrent, ref rowLineEndingOffset, ref lineNumber); break; } else { - colEndsRefCurrent = ref ParseAnyCharsMask(specialCharMask, + colInfosRefCurrent = ref ParseAnyCharsMask(specialCharMask, separator, ref charsRef, charsIndex, - ref rowLineEndingOffset, ref quoting, - ref colEndsRefCurrent, ref lineNumber); + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRefCurrent, ref lineNumber); // Used both to indicate row ended and if need to step +2 due to '\r\n' if (rowLineEndingOffset != 0) { // Must be a col end and last is then dataIndex - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; break; } } } // If current is greater than or equal than "stop", then break. // There is no longer guaranteed space enough for next VecUI8.Count. - if (IsAddressLessThan(ref colEndsRefStop, ref colEndsRefCurrent)) + if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { // Move data index so next find starts correctly charsIndex += VecUI8.Count; @@ -137,12 +155,12 @@ public int Parse(SepReaderState s) } // ">> 2" instead of "/ sizeof(int))" // CQ: Weird with div sizeof - colEndsEnd = (int)(ByteOffset(ref colEndsRef, ref colEndsRefCurrent) >> 2); + colCount = (int)(ByteOffset(ref colInfosRef, ref colInfosRefCurrent) / SizeOf()); // Step is VecUI8.Count so may go past end, ensure limited charsIndex = Math.Min(charsEnd, charsIndex); - _quoting = quoting; - s._colCount = colEndsEnd; + _quoteCount = quoteCount; + s._colCount = colCount; s._lineNumber = lineNumber; s._charsParseStart = charsIndex; diff --git a/src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs b/src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs index 8684c530..0b691963 100644 --- a/src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs +++ b/src/Sep/Internals/SepParserVector256NrwCmpExtMsbTzcnt.cs @@ -19,7 +19,7 @@ sealed class SepParserVector256NrwCmpExtMsbTzcnt : ISepParser readonly VecUI8 _crs = Vec.Create(CarriageReturnByte); readonly VecUI8 _qts = Vec.Create(QuoteByte); readonly VecUI8 _sps; - internal nuint _quoting = 0; + nuint _quoteCount = 0; public unsafe SepParserVector256NrwCmpExtMsbTzcnt(Sep sep) { @@ -29,36 +29,54 @@ public unsafe SepParserVector256NrwCmpExtMsbTzcnt(Sep sep) // Parses 2 x char vectors e.g. 1 byte vector public int PaddingLength => VecUI8.Count; + public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public int Parse(SepReaderState s) + public int ParseColEnds(SepReaderState s) + { + return Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public int ParseColInfos(SepReaderState s) + { + return Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods { // Method should **not** call other non-inlined methods, since this // impacts code-generation severely. var separator = (char)_separator; + var quoteCount = _quoteCount; - var quoting = _quoting; var chars = s._chars; var charsIndex = s._charsParseStart; var charsEnd = s._charsDataEnd; - var colEnds = s._colEnds; - var colEndsEnd = s._colCount; + var colInfos = s._colEndsOrColInfos; + var colCount = s._colCount; var lineNumber = s._lineNumber; var rowLineEndingOffset = 0; + var colInfosLength = colInfos.Length / (SizeOf() / SizeOf()); + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); - colEnds.CheckPadding(colEndsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, colCount, PaddingLength); A.Assert(charsIndex <= charsEnd); A.Assert(charsEnd <= (chars.Length - PaddingLength)); ref var charsRef = ref Add(ref MemoryMarshal.GetArrayDataReference(chars), charsIndex); - ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(colEnds); - ref var colEndsRefCurrent = ref Add(ref colEndsRef, colEndsEnd); - ref var colEndsRefStop = ref Add(ref colEndsRef, colEnds.Length - VecUI8.Count); + ref var colInfosRef = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRefCurrent = ref Add(ref colInfosRef, colCount); + ref var colInfosRefStop = ref Add(ref colInfosRef, colInfosLength - VecUI8.Count); // Use instance fields to force values into registers var max = _max; @@ -93,41 +111,41 @@ public int Parse(SepReaderState s) var separatorsMask = spsEq.ExtractMostSignificantBits(); // Optimize for case of only separators i.e. no endings or quotes. // Add quoting flags to mask as hack to skip if quoting. - var testMask = specialCharMask + quoting; + var testMask = specialCharMask + quoteCount; if (separatorsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, - ref colEndsRefCurrent); + colInfosRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, + ref colInfosRefCurrent); } else { var separatorLineEndingsMask = lineEndingsSeparators.ExtractMostSignificantBits(); if (separatorLineEndingsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsLineEndingsMasks( + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks( separatorsMask, separatorLineEndingsMask, ref charsRef, ref charsIndex, separator, - ref colEndsRefCurrent, ref rowLineEndingOffset, ref lineNumber); + ref colInfosRefCurrent, ref rowLineEndingOffset, ref lineNumber); break; } else { - colEndsRefCurrent = ref ParseAnyCharsMask(specialCharMask, + colInfosRefCurrent = ref ParseAnyCharsMask(specialCharMask, separator, ref charsRef, charsIndex, - ref rowLineEndingOffset, ref quoting, - ref colEndsRefCurrent, ref lineNumber); + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRefCurrent, ref lineNumber); // Used both to indicate row ended and if need to step +2 due to '\r\n' if (rowLineEndingOffset != 0) { // Must be a col end and last is then dataIndex - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; break; } } } // If current is greater than or equal than "stop", then break. // There is no longer guaranteed space enough for next VecUI8.Count. - if (IsAddressLessThan(ref colEndsRefStop, ref colEndsRefCurrent)) + if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { // Move data index so next find starts correctly charsIndex += VecUI8.Count; @@ -137,12 +155,12 @@ public int Parse(SepReaderState s) } // ">> 2" instead of "/ sizeof(int))" // CQ: Weird with div sizeof - colEndsEnd = (int)(ByteOffset(ref colEndsRef, ref colEndsRefCurrent) >> 2); + colCount = (int)(ByteOffset(ref colInfosRef, ref colInfosRefCurrent) / SizeOf()); // Step is VecUI8.Count so may go past end, ensure limited charsIndex = Math.Min(charsEnd, charsIndex); - _quoting = quoting; - s._colCount = colEndsEnd; + _quoteCount = quoteCount; + s._colCount = colCount; s._lineNumber = lineNumber; s._charsParseStart = charsIndex; diff --git a/src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs b/src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs index 019eb891..6106ee13 100644 --- a/src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs +++ b/src/Sep/Internals/SepParserVector512NrwCmpExtMsbTzcnt.cs @@ -20,7 +20,7 @@ sealed class SepParserVector512NrwCmpExtMsbTzcnt : ISepParser readonly VecUI8 _crs = Vec.Create(CarriageReturnByte); readonly VecUI8 _qts = Vec.Create(QuoteByte); readonly VecUI8 _sps; - internal nuint _quoting = 0; + nuint _quoteCount = 0; public unsafe SepParserVector512NrwCmpExtMsbTzcnt(Sep sep) { @@ -31,36 +31,54 @@ public unsafe SepParserVector512NrwCmpExtMsbTzcnt(Sep sep) // Parses 2 x char vectors e.g. 1 byte vector public int PaddingLength => VecUI8.Count; + public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public int Parse(SepReaderState s) + public int ParseColEnds(SepReaderState s) + { + return Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public int ParseColInfos(SepReaderState s) + { + return Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods { // Method should **not** call other non-inlined methods, since this // impacts code-generation severely. var separator = (char)_separator; + var quoteCount = _quoteCount; - var quoting = _quoting; var chars = s._chars; var charsIndex = s._charsParseStart; var charsEnd = s._charsDataEnd; - var colEnds = s._colEnds; - var colEndsEnd = s._colCount; + var colInfos = s._colEndsOrColInfos; + var colCount = s._colCount; var lineNumber = s._lineNumber; var rowLineEndingOffset = 0; + var colInfosLength = colInfos.Length / (SizeOf() / SizeOf()); + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); - colEnds.CheckPadding(colEndsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, colCount, PaddingLength); A.Assert(charsIndex <= charsEnd); A.Assert(charsEnd <= (chars.Length - PaddingLength)); ref var charsRef = ref Add(ref MemoryMarshal.GetArrayDataReference(chars), charsIndex); - ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(colEnds); - ref var colEndsRefCurrent = ref Add(ref colEndsRef, colEndsEnd); - ref var colEndsRefStop = ref Add(ref colEndsRef, colEnds.Length - VecUI8.Count); + ref var colInfosRef = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRefCurrent = ref Add(ref colInfosRef, colCount); + ref var colInfosRefStop = ref Add(ref colInfosRef, colInfosLength - VecUI8.Count); // Use instance fields to force values into registers var max = _max; @@ -95,41 +113,41 @@ public int Parse(SepReaderState s) var separatorsMask = (nuint)spsEq.ExtractMostSignificantBits(); // Optimize for case of only separators i.e. no endings or quotes. // Add quoting flags to mask as hack to skip if quoting. - var testMask = specialCharMask + quoting; + var testMask = specialCharMask + quoteCount; if (separatorsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, - ref colEndsRefCurrent); + colInfosRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, + ref colInfosRefCurrent); } else { var separatorLineEndingsMask = (nuint)lineEndingsSeparators.ExtractMostSignificantBits(); if (separatorLineEndingsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsLineEndingsMasks( + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks( separatorsMask, separatorLineEndingsMask, ref charsRef, ref charsIndex, separator, - ref colEndsRefCurrent, ref rowLineEndingOffset, ref lineNumber); + ref colInfosRefCurrent, ref rowLineEndingOffset, ref lineNumber); break; } else { - colEndsRefCurrent = ref ParseAnyCharsMask(specialCharMask, + colInfosRefCurrent = ref ParseAnyCharsMask(specialCharMask, separator, ref charsRef, charsIndex, - ref rowLineEndingOffset, ref quoting, - ref colEndsRefCurrent, ref lineNumber); + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRefCurrent, ref lineNumber); // Used both to indicate row ended and if need to step +2 due to '\r\n' if (rowLineEndingOffset != 0) { // Must be a col end and last is then dataIndex - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; break; } } } // If current is greater than or equal than "stop", then break. // There is no longer guaranteed space enough for next VecUI8.Count. - if (IsAddressLessThan(ref colEndsRefStop, ref colEndsRefCurrent)) + if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { // Move data index so next find starts correctly charsIndex += VecUI8.Count; @@ -139,12 +157,12 @@ public int Parse(SepReaderState s) } // ">> 2" instead of "/ sizeof(int))" // CQ: Weird with div sizeof - colEndsEnd = (int)(ByteOffset(ref colEndsRef, ref colEndsRefCurrent) >> 2); + colCount = (int)(ByteOffset(ref colInfosRef, ref colInfosRefCurrent) / SizeOf()); // Step is VecUI8.Count so may go past end, ensure limited charsIndex = Math.Min(charsEnd, charsIndex); - _quoting = quoting; - s._colCount = colEndsEnd; + _quoteCount = quoteCount; + s._colCount = colCount; s._lineNumber = lineNumber; s._charsParseStart = charsIndex; diff --git a/src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs b/src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs index f999174d..f5adfdd8 100644 --- a/src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs +++ b/src/Sep/Internals/SepParserVector64NrwCmpExtMsbTzcnt.cs @@ -19,7 +19,7 @@ sealed class SepParserVector64NrwCmpExtMsbTzcnt : ISepParser readonly VecUI8 _crs = Vec.Create(CarriageReturnByte); readonly VecUI8 _qts = Vec.Create(QuoteByte); readonly VecUI8 _sps; - internal nuint _quoting = 0; + nuint _quoteCount = 0; public unsafe SepParserVector64NrwCmpExtMsbTzcnt(Sep sep) { @@ -29,36 +29,54 @@ public unsafe SepParserVector64NrwCmpExtMsbTzcnt(Sep sep) // Parses 2 x char vectors e.g. 1 byte vector public int PaddingLength => VecUI8.Count; + public int QuoteCount => (int)_quoteCount; [SkipLocalsInit] [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public int Parse(SepReaderState s) + public int ParseColEnds(SepReaderState s) + { + return Parse(s); + } + + [SkipLocalsInit] + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + public int ParseColInfos(SepReaderState s) + { + return Parse(s); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + int Parse(SepReaderState s) + where TColInfo : unmanaged + where TColInfoMethods : ISepColInfoMethods { // Method should **not** call other non-inlined methods, since this // impacts code-generation severely. var separator = (char)_separator; + var quoteCount = _quoteCount; - var quoting = _quoting; var chars = s._chars; var charsIndex = s._charsParseStart; var charsEnd = s._charsDataEnd; - var colEnds = s._colEnds; - var colEndsEnd = s._colCount; + var colInfos = s._colEndsOrColInfos; + var colCount = s._colCount; var lineNumber = s._lineNumber; var rowLineEndingOffset = 0; + var colInfosLength = colInfos.Length / (SizeOf() / SizeOf()); + chars.CheckPaddingAndIsZero(charsEnd, PaddingLength); - colEnds.CheckPadding(colEndsEnd, PaddingLength); + SepArrayExtensions.CheckPadding(colInfosLength, colCount, PaddingLength); A.Assert(charsIndex <= charsEnd); A.Assert(charsEnd <= (chars.Length - PaddingLength)); ref var charsRef = ref Add(ref MemoryMarshal.GetArrayDataReference(chars), charsIndex); - ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(colEnds); - ref var colEndsRefCurrent = ref Add(ref colEndsRef, colEndsEnd); - ref var colEndsRefStop = ref Add(ref colEndsRef, colEnds.Length - VecUI8.Count); + ref var colInfosRef = ref As(ref MemoryMarshal.GetArrayDataReference(colInfos)); + ref var colInfosRefCurrent = ref Add(ref colInfosRef, colCount); + ref var colInfosRefStop = ref Add(ref colInfosRef, colInfosLength - VecUI8.Count); // Use instance fields to force values into registers var max = _max; @@ -93,41 +111,41 @@ public int Parse(SepReaderState s) var separatorsMask = spsEq.ExtractMostSignificantBits(); // Optimize for case of only separators i.e. no endings or quotes. // Add quoting flags to mask as hack to skip if quoting. - var testMask = specialCharMask + quoting; + var testMask = specialCharMask + quoteCount; if (separatorsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, - ref colEndsRefCurrent); + colInfosRefCurrent = ref ParseSeparatorsMask(separatorsMask, charsIndex, + ref colInfosRefCurrent); } else { var separatorLineEndingsMask = lineEndingsSeparators.ExtractMostSignificantBits(); if (separatorLineEndingsMask == testMask) { - colEndsRefCurrent = ref ParseSeparatorsLineEndingsMasks( + colInfosRefCurrent = ref ParseSeparatorsLineEndingsMasks( separatorsMask, separatorLineEndingsMask, ref charsRef, ref charsIndex, separator, - ref colEndsRefCurrent, ref rowLineEndingOffset, ref lineNumber); + ref colInfosRefCurrent, ref rowLineEndingOffset, ref lineNumber); break; } else { - colEndsRefCurrent = ref ParseAnyCharsMask(specialCharMask, + colInfosRefCurrent = ref ParseAnyCharsMask(specialCharMask, separator, ref charsRef, charsIndex, - ref rowLineEndingOffset, ref quoting, - ref colEndsRefCurrent, ref lineNumber); + ref rowLineEndingOffset, ref quoteCount, + ref colInfosRefCurrent, ref lineNumber); // Used both to indicate row ended and if need to step +2 due to '\r\n' if (rowLineEndingOffset != 0) { // Must be a col end and last is then dataIndex - charsIndex = colEndsRefCurrent + rowLineEndingOffset; + charsIndex = TColInfoMethods.GetColEnd(colInfosRefCurrent) + rowLineEndingOffset; break; } } } // If current is greater than or equal than "stop", then break. // There is no longer guaranteed space enough for next VecUI8.Count. - if (IsAddressLessThan(ref colEndsRefStop, ref colEndsRefCurrent)) + if (IsAddressLessThan(ref colInfosRefStop, ref colInfosRefCurrent)) { // Move data index so next find starts correctly charsIndex += VecUI8.Count; @@ -137,12 +155,12 @@ public int Parse(SepReaderState s) } // ">> 2" instead of "/ sizeof(int))" // CQ: Weird with div sizeof - colEndsEnd = (int)(ByteOffset(ref colEndsRef, ref colEndsRefCurrent) >> 2); + colCount = (int)(ByteOffset(ref colInfosRef, ref colInfosRefCurrent) / SizeOf()); // Step is VecUI8.Count so may go past end, ensure limited charsIndex = Math.Min(charsEnd, charsIndex); - _quoting = quoting; - s._colCount = colEndsEnd; + _quoteCount = quoteCount; + s._colCount = colCount; s._lineNumber = lineNumber; s._charsParseStart = charsIndex; diff --git a/src/Sep/Internals/SepUnescape.cs b/src/Sep/Internals/SepUnescape.cs new file mode 100644 index 00000000..9a2a3fe3 --- /dev/null +++ b/src/Sep/Internals/SepUnescape.cs @@ -0,0 +1,95 @@ +using System.Diagnostics.CodeAnalysis; +using static System.Runtime.CompilerServices.Unsafe; + +namespace nietras.SeparatedValues; + +static class SepUnescape +{ + internal static int UnescapeInPlace(ref char charRef, int length) + { + nint unescapedLength = 0; + nint quoteCount = 1; // We start just past first quote + for (var i = 1; i < length; i++) + { + var c = Add(ref charRef, i); + Add(ref charRef, unescapedLength) = c; + nint quote = c == SepDefaults.Quote ? 1 : 0; + nint notQuote = quote ^ 1; + quoteCount += quote; + nint increment = quoteCount & 1 | notQuote; + unescapedLength += increment; + } + for (var i = unescapedLength; i < length; i++) + { + Add(ref charRef, i) = SepDefaults.Quote; + } + return (int)unescapedLength; + } + + [ExcludeFromCodeCoverage] // Trial + internal static int UnescapeInPlaceRefs(ref char charRef, int length) + { + nint quoteCount = 1; // We start just past first quote + ref var charRefStart = ref charRef; + ref var charRefEnd = ref Add(ref charRef, length); + ref var unescapedCharRef = ref charRef; + charRef = ref Add(ref charRef, 1); + for (; !AreSame(ref charRef, ref charRefEnd); charRef = ref Add(ref charRef, 1)) + { + var c = charRef; + unescapedCharRef = c; + nint quote = c == SepDefaults.Quote ? 1 : 0; + nint notQuote = quote ^ 1; + quoteCount += quote; + nint increment = quoteCount & 1 | notQuote; + unescapedCharRef = ref Add(ref unescapedCharRef, increment); + } + var unescapedLength = ByteOffset(ref charRefStart, ref unescapedCharRef) / sizeof(char); + for (var i = unescapedLength; i < length; i++) + { + Add(ref charRef, i) = SepDefaults.Quote; + } + return (int)unescapedLength; + } + + [ExcludeFromCodeCoverage] // Trial + internal static int UnescapeInPlaceQuoteCountBoolUNVALIDATED(ref char charRef, int length) + { + // After first quote + var quoteCount = 0; + var unescapedLength = 0; + for (var i = 1; i < length; i++) + { + var c = Add(ref charRef, i); + var quoteNumber = c == SepDefaults.Quote ? 1 : 0; + quoteCount += quoteNumber; + unescapedLength += (quoteCount & 1) == 0 ? 1 : 0; + Add(ref charRef, unescapedLength) = c; + } + for (var i = unescapedLength; i < length; i++) + { + Add(ref charRef, i) = SepDefaults.Quote; + } + return unescapedLength; + } + + [ExcludeFromCodeCoverage] // Trial + internal static int UnescapeInPlaceEvenBoolUNVALIDATED(ref char charRef, int length) + { + // After first quote, so quote 0, hence even + var evenQuote = false; + var unescapedLength = 0; + for (var i = 1; i < length; i++) + { + var c = Add(ref charRef, i); + evenQuote ^= c == SepDefaults.Quote; + unescapedLength += evenQuote ? 1 : 0; + Add(ref charRef, unescapedLength) = c; + } + for (var i = unescapedLength; i < length; i++) + { + Add(ref charRef, i) = SepDefaults.Quote; + } + return unescapedLength; + } +} diff --git a/src/Sep/SepReader.cs b/src/Sep/SepReader.cs index 06b88cb1..193cc1e7 100644 --- a/src/Sep/SepReader.cs +++ b/src/Sep/SepReader.cs @@ -5,6 +5,7 @@ using System.Diagnostics.CodeAnalysis; using System.IO; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; using System.Text; using static nietras.SeparatedValues.SepDefaults; @@ -40,6 +41,7 @@ public sealed partial class SepReader : SepReaderState bool _rowAlreadyFound = false; internal SepReader(Info info, SepReaderOptions options, TextReader reader) + : base(colUnquoteUnescape: options.Unescape) { _info = info; _reader = reader; @@ -82,7 +84,7 @@ internal SepReader(Info info, SepReaderOptions options, TextReader reader) var paddingLength = _parser?.PaddingLength ?? 64; - _colEnds = ArrayPool.Shared.Rent(Math.Max(ColEndsInitialLength, paddingLength * 2)); + _colEndsOrColInfos = ArrayPool.Shared.Rent(Math.Max(ColEndsInitialLength, paddingLength * 2)); } public bool IsEmpty { get; private set; } @@ -169,34 +171,38 @@ public bool MoveNext() // Reset #if DEBUG - Array.Fill(_colEnds, -42); + Array.Fill(_colEndsOrColInfos, -42); #endif _cacheIndex = 0; _arrayPool.Reset(); _charsDataStart = _charsRowStart; - _colEnds[0] = _charsRowStart - 1; + _colEndsOrColInfos[0] = _charsRowStart - 1; + // QuoteCount initialize hack + _colEndsOrColInfos[1] = 0; _colCount = 0; var endOfFile = false; LOOP: - CheckPoint($"{nameof(_parser.Parse)} BEFORE"); + CheckPoint($"{nameof(_parser)} BEFORE"); var rowLineEndingOffset = 0; if (_parser is not null) { - rowLineEndingOffset = _parser.Parse(this); + rowLineEndingOffset = _colUnquoteUnescape == 0 + ? _parser.ParseColEnds(this) + : _parser.ParseColInfos(this); } MAYBEROW: if (rowLineEndingOffset != 0) { - CheckPoint($"{nameof(_parser.Parse)} AFTER - RETURN TRUE"); + CheckPoint($"{nameof(_parser)} AFTER - RETURN TRUE"); if (_colCountExpected >= 0 && _colCount != _colCountExpected) { // Capture row start and move next to be able to continue even // after exception. var rowStart = _charsRowStart; _charsRowStart = _charsParseStart; - ThrowInvalidDataExceptionColCountMismatch(_colCountExpected, _colEnds[_colCount], rowStart); + ThrowInvalidDataExceptionColCountMismatch(_colCountExpected, _colEndsOrColInfos[_colCount], rowStart); } _charsRowStart = _charsParseStart; foundRow = true; @@ -204,18 +210,26 @@ public bool MoveNext() } else if (endOfFile) { - CheckPoint($"{nameof(_parser.Parse)} AFTER - ENDOFFILE"); + CheckPoint($"{nameof(_parser)} AFTER - ENDOFFILE"); foundRow = false; goto RETURN; } - CheckPoint($"{nameof(_parser.Parse)} AFTER"); + CheckPoint($"{nameof(_parser)} AFTER"); endOfFile = EnsureInitializeAndReadData(endOfFile); if (endOfFile && _charsRowStart < _charsDataEnd && _charsParseStart == _charsDataEnd) { ++_colCount; - _colEnds[_colCount] = _charsDataEnd; + if (_colUnquoteUnescape == 0) + { + _colEndsOrColInfos[_colCount] = _charsDataEnd; + } + else + { + Unsafe.Add(ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(_colEndsOrColInfos)), _colCount) = + new(_charsDataEnd, _parser?.QuoteCount ?? 0); + } rowLineEndingOffset = 1; ++_lineNumber; goto MAYBEROW; @@ -252,9 +266,9 @@ bool EnsureInitializeAndReadData(bool endOfFile) if (_parser != null && _charsParseStart < _charsDataEnd) { // + 1 - must be room for one more col always - if ((_colCount + 1) >= (_colEnds.Length - _parser.PaddingLength)) + if ((_colCount + 1) >= (GetColInfosLength() - _parser.PaddingLength)) { - DoubleColsCapacityCopyState(); + DoubleColInfosCapacityCopyState(); } } else @@ -262,9 +276,9 @@ bool EnsureInitializeAndReadData(bool endOfFile) if (nothingLeftToRead) { // + 1 - must be room for one more col always - if ((_colCount + 1) >= _colEnds.Length) + if ((_colCount + 1) >= GetColInfosLength()) { - DoubleColsCapacityCopyState(); + DoubleColInfosCapacityCopyState(); } // If nothing has been read, then at end of file. endOfFile = true; @@ -273,13 +287,16 @@ bool EnsureInitializeAndReadData(bool endOfFile) return endOfFile; } - void DoubleColsCapacityCopyState() + void DoubleColInfosCapacityCopyState() { - var previousColEnds = _colEnds; - _colEnds = ArrayPool.Shared.Rent(_colEnds.Length * 2); - var length = _colCount + 1; - var previousColEndsSpan = previousColEnds.AsSpan().Slice(0, length); - var newColEndsSpan = _colEnds.AsSpan().Slice(0, length); + var previousColEnds = _colEndsOrColInfos; + _colEndsOrColInfos = ArrayPool.Shared.Rent(_colEndsOrColInfos.Length * 2); + + var factor = GetIntegersPerColInfo(); + var lengthInIntegers = (_colCount + 1) * factor; + + var previousColEndsSpan = previousColEnds.AsSpan().Slice(0, lengthInIntegers); + var newColEndsSpan = _colEndsOrColInfos.AsSpan().Slice(0, lengthInIntegers); previousColEndsSpan.CopyTo(newColEndsSpan); ArrayPool.Shared.Return(previousColEnds); } @@ -357,11 +374,23 @@ void HandleDataMoved(int offset) A.Assert(_charsRowStart >= offset); _charsRowStart -= offset; // Adjust found cols, note includes _colCount since +1 - var colEnds = _colEnds; - for (var i = 0; i <= _colCount; i++) + if (_colUnquoteUnescape == 0) { - ref var colEnd = ref colEnds[i]; - colEnd -= offset; + ref var colEndsRef = ref GetColsRefAs(); + for (var i = 0; i <= _colCount; i++) + { + ref var colEnd = ref Unsafe.Add(ref colEndsRef, i); + colEnd -= offset; + } + } + else + { + ref var colInfosRef = ref GetColsRefAs(); + for (var i = 0; i <= _colCount; i++) + { + ref var colInfo = ref Unsafe.Add(ref colInfosRef, i); + colInfo.ColEnd -= offset; + } } } @@ -413,7 +442,16 @@ void TraceState(string name, [CallerFilePath] string filePath = "", [CallerLineN { T.WriteLine($"{filePath}({lineNumber}): {name}"); T.WriteLine($"{nameof(_chars),-10}:{_chars.Length,5} [{_charsDataStart,4},{_charsDataEnd,4}] ({_charsParseStart,2}) '{FormatValidChars()}'"); - T.WriteLine($"{nameof(_colEnds),-10}:{_colEnds.Length,5} [{0,4},{_colCount,4}] {string.Join(',', _colEnds[0..Math.Min(_colCount, _colEnds.Length)])}"); + if (_colUnquoteUnescape == 0) + { + var colEnds = GetColsEntireSpanAs(); + T.WriteLine($"{nameof(colEnds),-10}:{colEnds.Length,5} [{0,4},{_colCount,4}] {string.Join(',', colEnds[0..Math.Min(_colCount, colEnds.Length)].ToArray())}"); + } + else + { + var colInfos = GetColsEntireSpanAs(); + T.WriteLine($"{nameof(colInfos),-10}:{colInfos.Length,5} [{0,4},{_colCount,4}] {string.Join(',', colInfos[0..Math.Min(_colCount, colInfos.Length)].ToArray())}"); + } [ExcludeFromCodeCoverage] Span FormatValidChars() @@ -436,14 +474,37 @@ void AssertState(string name, [CallerFilePath] string filePath = "", [CallerLine A.Assert(_charsDataStart <= _charsDataEnd, $"{name}", filePath, lineNumber); A.Assert(_charsDataStart <= _charsRowStart && _charsRowStart <= _charsDataEnd, $"{name}", filePath, lineNumber); - A.Assert(_colEnds.Length > 0, $"{name}", filePath, lineNumber); - A.Assert(0 <= _colCount && _colCount <= _colEnds.Length, $"{name}", filePath, lineNumber); - for (var i = 0; i < _colCount; i++) + if (_colUnquoteUnescape == 0) + { + var colEnds = GetColsEntireSpanAs(); + A.Assert(colEnds.Length > 0, $"{name}", filePath, lineNumber); + A.Assert(0 <= _colCount && _colCount <= colEnds.Length, $"{name}", filePath, lineNumber); + for (var i = 0; i < _colCount; i++) + { + var colEnd = colEnds[i]; + // colEnds are one before, so first may be before data starts + colEnd += i == 0 ? 1 : 0; + A.Assert(_charsRowStart <= colEnd && colEnd < _charsDataEnd, $"{name}", filePath, lineNumber); + } + } + else { - var colEnd = _colEnds[i]; - // colEnds are one before, so first may be before data starts - colEnd += i == 0 ? 1 : 0; - A.Assert(_charsRowStart <= colEnd && colEnd < _charsDataEnd, $"{name}", filePath, lineNumber); + var colInfos = GetColsEntireSpanAs(); + A.Assert(colInfos.Length > 0, $"{name}", filePath, lineNumber); + A.Assert(0 <= _colCount && _colCount <= colInfos.Length, $"{name}", filePath, lineNumber); + for (var i = 0; i < _colCount; i++) + { + var (colEnd, _) = colInfos[i]; + // colEnds are one before, so first may be before data starts + colEnd += i == 0 ? 1 : 0; + A.Assert(_charsRowStart <= colEnd && colEnd < _charsDataEnd, $"{name}", filePath, lineNumber); + if (i > 0) + { + var colStart = colInfos[i - 1].ColEnd + 1; + var colLength = colEnd - colStart; + A.Assert(0 <= colLength && colLength < 1024 * 1024, $"ColIndex {i} Start {colStart} End {colEnd} Length {colLength}"); + } + } } if (_colNameCache != null) { diff --git a/src/Sep/SepReaderOptions.cs b/src/Sep/SepReaderOptions.cs index 79e8e33d..8d60e773 100644 --- a/src/Sep/SepReaderOptions.cs +++ b/src/Sep/SepReaderOptions.cs @@ -20,28 +20,41 @@ public SepReaderOptions(Sep? sep) /// Specifies the separator used, if `null` then automatic detection /// is used based on first row in source. /// - public Sep? Sep { get; init; } + public Sep? Sep { get; init; } = null; /// /// Specifies the culture used for parsing. /// May be `null` for default culture. /// - public CultureInfo? CultureInfo { get; init; } + public CultureInfo? CultureInfo { get; init; } = SepDefaults.CultureInfo; /// /// Indicates whether the first row is a header row. /// - public bool HasHeader { get; init; } + public bool HasHeader { get; init; } = true; /// /// Specifies the method factory used to convert a column span /// of `char`s to a `string`. /// - public SepCreateToString CreateToString { get; init; } + public SepCreateToString CreateToString { get; init; } = SepToString.Direct; /// /// Disables using [csFastFloat](https://github.com/CarlVerret/csFastFloat) /// for parsing `float` and `double`. /// - public bool DisableFastFloat { get; init; } + public bool DisableFastFloat { get; init; } = false; /// /// Disables checking if column count is the same for all rows. /// - public bool DisableColCountCheck { get; init; } + public bool DisableColCountCheck { get; init; } = false; + /// + /// Unescape quotes on column access. + /// + /// + /// When true, if a column starts with a quote then the two outermost quotes + /// are removed and every second inner quote is removed. Note that + /// unquote/unescape happens in-place, which means the will be modified and contain "garbage" + /// state after unescaped cols before next col. This is for efficiency to + /// avoid allocating secondary memory for unescaped columns. Header + /// columns/names will also be unescaped. + /// + public bool Unescape { get; init; } = false; } diff --git a/src/Sep/SepReaderState.cs b/src/Sep/SepReaderState.cs index 8ca2fdbc..43a75528 100644 --- a/src/Sep/SepReaderState.cs +++ b/src/Sep/SepReaderState.cs @@ -1,5 +1,6 @@ using System; using System.Buffers; +using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -28,11 +29,12 @@ public class SepReaderState : IDisposable internal const int ColEndsInitialLength = 128; // [0] = Previous row/col end e.g. one before row/first col start - // [1...] = Col ends e.g. [1] = first col end + // [1...] = Col ends/infos e.g. [1] = first col end/info // Length = colCount + 1 - internal int[] _colEnds = Array.Empty(); + internal int[] _colEndsOrColInfos = Array.Empty(); internal int _colCountExpected = -1; internal int _colCount = 0; + readonly internal uint _colUnquoteUnescape = 0; internal int _rowIndex = -1; internal int _rowLineNumberFrom = 0; @@ -47,17 +49,27 @@ public class SepReaderState : IDisposable internal SepToString _toString = null!; #pragma warning restore CA2213 // Disposable fields should be disposed - internal SepReaderState() { } + internal SepReaderState(bool colUnquoteUnescape = false) { _colUnquoteUnescape = colUnquoteUnescape ? 1u : 0u; } #region Row internal ReadOnlySpan RowSpan() { if (_colCount > 0) { - var colEnds = _colEnds; - var start = colEnds[0] + 1; // +1 since previous end - var end = colEnds[_colCount]; - return new(_chars, start, end - start); + if (_colUnquoteUnescape == 0) + { + var colEnds = _colEndsOrColInfos; + var start = colEnds[0] + 1; // +1 since previous end + var end = colEnds[_colCount]; + return new(_chars, start, end - start); + } + else + { + ref var colInfos = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(_colEndsOrColInfos)); + var start = colInfos.ColEnd + 1; // +1 since previous end + var end = Unsafe.Add(ref colInfos, _colCount).ColEnd; + return new(_chars, start, end - start); + } } else { @@ -92,21 +104,58 @@ internal int GetCachedColIndex(string colName) internal ReadOnlySpan GetColSpan(int index) { if ((uint)index >= (uint)_colCount) { SepThrow.IndexOutOfRangeException(); } - - // Using array indexing is slightly faster despite more code 🤔 - var colEnds = _colEnds; - var colStart = colEnds[index] + 1; // +1 since previous end - var colEnd = colEnds[index + 1]; - // Above bounds checked is faster than below 🤔 - //ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(_colEnds); - //var colStart = Unsafe.Add(ref colEndsRef, index) + 1; // +1 since previous end - //var colEnd = Unsafe.Add(ref colEndsRef, index + 1); - - var colLength = colEnd - colStart; - // Much better code generation given col span always inside buffer - ref var colRef = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_chars), colStart); - var col = MemoryMarshal.CreateReadOnlySpan(ref colRef, colLength); - return col; + if (_colUnquoteUnescape == 0) + { + // Using array indexing is slightly faster despite more code 🤔 + var colEnds = _colEndsOrColInfos; + var colStart = colEnds[index] + 1; // +1 since previous end + var colEnd = colEnds[index + 1]; + // Above bounds checked is faster than below 🤔 + //ref var colEndsRef = ref MemoryMarshal.GetArrayDataReference(_colEnds); + //var colStart = Unsafe.Add(ref colEndsRef, index) + 1; // +1 since previous end + //var colEnd = Unsafe.Add(ref colEndsRef, index + 1); + + var colLength = colEnd - colStart; + // Much better code generation given col span always inside buffer + ref var colRef = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_chars), colStart); + var col = MemoryMarshal.CreateReadOnlySpan(ref colRef, colLength); + return col; + } + else // Unquote/Unescape + { + ref var colInfos = ref Unsafe.As(ref MemoryMarshal.GetArrayDataReference(_colEndsOrColInfos)); + var colStart = Unsafe.Add(ref colInfos, index).ColEnd + 1; // +1 since previous end + ref var colInfo = ref Unsafe.Add(ref colInfos, index + 1); + var (colEnd, quoteCountOrNegativeUnescapedLength) = colInfo; + var colLength = colEnd - colStart; + ref var colRef = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_chars), colStart); + // Unescape if quotes found, negative and col has already been + // unescaped and the count is instead the new col length. + if (quoteCountOrNegativeUnescapedLength == 0 || + (quoteCountOrNegativeUnescapedLength > 0 && colRef != SepDefaults.Quote)) + { + return MemoryMarshal.CreateReadOnlySpan(ref colRef, colLength); + } + // From now on it is known the first char in col is a quote if not + // already escaped. Optimize for common case of outermost quotes. + else if (quoteCountOrNegativeUnescapedLength == 2 && + Unsafe.Add(ref colRef, colLength - 1) == SepDefaults.Quote) + { + return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.Add(ref colRef, 1), colLength - 2); + } + else if (quoteCountOrNegativeUnescapedLength < 0) + { + var unescapedLength = -quoteCountOrNegativeUnescapedLength; + return MemoryMarshal.CreateReadOnlySpan(ref colRef, unescapedLength); + } + else + { + // Unescape fully and in-place + var unescapedLength = SepUnescape.UnescapeInPlace(ref colRef, colLength); + colInfo.QuoteCount = -unescapedLength; + return MemoryMarshal.CreateReadOnlySpan(ref colRef, unescapedLength); + } + } } internal string ToStringDefault(int index) @@ -406,10 +455,30 @@ internal unsafe Span Select(int colStart, int colCount, delegate* } #endregion + [ExcludeFromCodeCoverage] + internal Span GetColsEntireSpanAs() where T : unmanaged => + MemoryMarshal.CreateSpan(ref GetColsRefAs(), GetColInfosLength()); + [ExcludeFromCodeCoverage] + internal int GetColInfosLength() where T : unmanaged => + _colEndsOrColInfos.Length / (Unsafe.SizeOf() / sizeof(int)); + + internal int GetColInfosLength() => + _colEndsOrColInfos.Length / GetIntegersPerColInfo(); + + internal int GetIntegersPerColInfo() => + _colUnquoteUnescape == 0 ? 1 : Unsafe.SizeOf() / sizeof(int); + + internal ref T GetColsRefAs() where T : unmanaged + { + A.Assert(Unsafe.SizeOf() % sizeof(int) == 0); + ref var colEndsOrColInfosRef = ref MemoryMarshal.GetArrayDataReference(_colEndsOrColInfos); + return ref Unsafe.As(ref colEndsOrColInfosRef); + } + internal virtual void DisposeManaged() { ArrayPool.Shared.Return(_chars); - ArrayPool.Shared.Return(_colEnds); + ArrayPool.Shared.Return(_colEndsOrColInfos); _arrayPool.Dispose(); _toString?.Dispose(); }