From b0a2c59df8d8ba05c6cd901fb1a40410cb9f81ae Mon Sep 17 00:00:00 2001 From: nietras Date: Sat, 11 Jan 2025 18:05:13 +0100 Subject: [PATCH] README: Add Escaping section based on CompareEscape test (#217) --- README.md | 28 +++- src/Sep.XyzTest/ReadMeTest.CompareEscape.cs | 146 ++++++++++++++++++++ src/Sep.XyzTest/Sep.XyzTest.csproj | 1 + 3 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 src/Sep.XyzTest/ReadMeTest.CompareEscape.cs diff --git a/README.md b/README.md index 5e74194..71d14cd 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ changes to input or output. What you read/write is what you get. E.g. by default there is no "automatic" escaping/unescaping of quotes or trimming of spaces. To enable this see [SepReaderOptions](#sepreaderoptions) and [Unescaping](#unescaping) and [Trimming](#trimming). See -[SepWriterOptions](#sepwriteroptions) for escaping. +[SepWriterOptions](#sepwriteroptions) for [Escaping](#escaping). * **馃殌 Fast** - blazing fast with both architecture specific and cross-platform SIMD vectorized parsing incl. 64/128/256/512-bit paths e.g. AVX2, AVX-512 (.NET 8.0+), NEON. Uses [csFastFloat](https://github.com/CarlVerret/csFastFloat) for @@ -869,6 +869,32 @@ public SepColNotSetOption ColNotSetOption { get; init; } = SepColNotSetOption.Th public bool Escape { get; init; } = false; ``` +#### Escaping +Escaping is not enabled by default in Sep, but when it is it gives the same +results as other popular CSV librares as shown below. Although, CsvHelper +appears to be escaping spaces as well, which is not necessary. + +| Input | CsvHelper | Sylvan | Sep鹿 | +|-|-|-|-| +| `` | | | | +| `路` | `"路"` | `路` | `路` | +| `a` | `a` | `a` | `a` | +| `;` | `";"` | `";"` | `";"` | +| `,` | `,` | `,` | `,` | +| `"` | `""""` | `""""` | `""""` | +| `\r` | `"\r"` | `"\r"` | `"\r"` | +| `\n` | `"\n"` | `"\n"` | `"\n"` | +| `a"aa"aaa` | `"a""aa""aaa"` | `"a""aa""aaa"` | `"a""aa""aaa"` | +| `a;aa;aaa` | `"a;aa;aaa"` | `"a;aa;aaa"` | `"a;aa;aaa"` | + +Separator/delimiter is set to semi-colon `;` (default for Sep) + +`路` (middle dot) is whitespace to make this visible + +`\r`, `\n` are carriage return and line feed special characters to make these visible + +鹿 Sep with `Escape = true` in `SepWriterOptions` + ## Limitations and Constraints Sep is designed to be minimal and fast. As such, it has some limitations and constraints, since these are not needed for the initial intended usage: diff --git a/src/Sep.XyzTest/ReadMeTest.CompareEscape.cs b/src/Sep.XyzTest/ReadMeTest.CompareEscape.cs new file mode 100644 index 0000000..e72eaa3 --- /dev/null +++ b/src/Sep.XyzTest/ReadMeTest.CompareEscape.cs @@ -0,0 +1,146 @@ +锘縰sing System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Text; +using CsvHelper; +using CsvHelper.Configuration; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Sylvan.Data; +using Sylvan.Data.Csv; + +namespace nietras.SeparatedValues.XyzTest; + +public partial class ReadMeTest +{ + [TestMethod] + public void ReadMeTest_CompareEscape() + { + var tests = new string[] + { + new(""), + new(" "), + new("a"), + new(";"), + new(","), + new("\""), + new("\r"), + new("\n"), + new("a\"aa\"aaa"), + new("a;aa;aaa"), + }; + var runners = new Dictionary>() + { + { nameof(CsvHelper), EscapeCsvHelper }, + { nameof(Sylvan), EscapeSylvan }, + { nameof(Sep) + "鹿", EscapeSep }, + }; + var sb = new StringBuilder(); + sb.Append($"| Input |"); + foreach (var (name, _) in runners) + { + sb.Append($" {name} |"); + } + sb.AppendLine(); + sb.Append($"|-|"); + foreach (var (_, _) in runners) + { + sb.Append($"-|"); + } + sb.AppendLine(); + foreach (var test in tests) + { + var display = ForDisplay(test); + + sb.Append($"| `{display}` |"); + + var csharpColText = display.Replace("\"", "\\\""); + var csharpColTextResult = ForDisplay(EscapeSep(test)).Replace("\"", "\\\""); + Trace.WriteLine($"new object[] {{ \"{test.Replace("\"", "\\\"")}\", \"{csharpColTextResult}\" }},"); + + foreach (var (_, action) in runners) + { + try + { + var outputColText = action(test); + if (outputColText.Length > 0) + { + sb.Append($" `{ForDisplay(outputColText)}`"); + } + sb.Append($" |"); + } + catch (Exception e) + { + var message = e.Message.ReplaceLineEndings(" "); + Trace.WriteLine(message); + sb.Append($" EXCEPTION |"); + } + } + sb.AppendLine(); + } + sb.AppendLine(); + sb.AppendLine("Separator/delimiter is set to semi-colon `;` (default for Sep)"); + sb.AppendLine(); + sb.AppendLine($"`路` (middle dot) is whitespace to make this visible"); + sb.AppendLine(); + sb.AppendLine($"`\\r`, `\\n` are carriage return and line feed special characters to make these visible"); + sb.AppendLine(); + sb.AppendLine($"鹿 Sep with `{nameof(SepWriterOptions.Escape)} = true` in `{nameof(SepWriterOptions)}`"); + + var text = sb.ToString(); + Trace.WriteLine(text); +#if NET9_0 + // Only write to file on latest version to avoid multiple accesses + File.WriteAllText("../../../CompareEscape.md", text, Encoding.UTF8); +#endif + } + + static string EscapeCsvHelper(string colText) + { + var config = new CsvConfiguration(CultureInfo.InvariantCulture) + { + HasHeaderRecord = false, + Delimiter = ";", + }; + using var stringWriter = new StringWriter(); + using var writer = new CsvWriter(stringWriter, config); + writer.WriteField(colText); + writer.NextRecord(); + return GetCol(stringWriter.ToString()); + } + + static string EscapeSylvan(string colText) + { + // Sylvan has to have some kind of type it seems + var records = new[] { new { Name = colText } }; + + // create a DbDataReader over the anonymous records. + var recordReader = records.AsDataReader(); + var options = new CsvDataWriterOptions { WriteHeaders = false, Delimiter = ';' }; + using var stringWriter = new StringWriter(); + using var csvWriter = Sylvan.Data.Csv.CsvDataWriter.Create(stringWriter, options); + csvWriter.Write(recordReader); + return GetCol(stringWriter.ToString()); + } + + static string EscapeSep(string colText) + { + using var writer = Sep.Writer(o => o with { WriteHeader = false, Escape = true }).ToText(); + { + using var row = writer.NewRow(); + row[0].Set(colText); + } + return GetCol(writer.ToString()); + } + + static string GetCol(string col) + { + using var reader = Sep.Default.Reader(o => o with { HasHeader = false }).FromText(col); + reader.MoveNext(); + return reader.Current[0].ToString(); + } + + static string ForDisplay(string test) => + test.Replace(" ", "路").Replace("\r", "\\r").Replace("\n", "\\n"); +} diff --git a/src/Sep.XyzTest/Sep.XyzTest.csproj b/src/Sep.XyzTest/Sep.XyzTest.csproj index 29c30ce..d23f4b8 100644 --- a/src/Sep.XyzTest/Sep.XyzTest.csproj +++ b/src/Sep.XyzTest/Sep.XyzTest.csproj @@ -27,6 +27,7 @@ +