Skip to content

Commit

Permalink
README: Add Escaping section based on CompareEscape test (#217)
Browse files Browse the repository at this point in the history
  • Loading branch information
nietras authored Jan 11, 2025
1 parent c4107e5 commit b0a2c59
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 1 deletion.
28 changes: 27 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ changes to input or output. What you read/write is what you get. E.g. by default
there is no "automatic" escaping/unescaping of quotes or trimming of spaces. To
enable this see [SepReaderOptions](#sepreaderoptions) and
[Unescaping](#unescaping) and [Trimming](#trimming). See
[SepWriterOptions](#sepwriteroptions) for escaping.
[SepWriterOptions](#sepwriteroptions) for [Escaping](#escaping).
* **🚀 Fast** - blazing fast with both architecture specific and cross-platform
SIMD vectorized parsing incl. 64/128/256/512-bit paths e.g. AVX2, AVX-512 (.NET
8.0+), NEON. Uses [csFastFloat](https://github.com/CarlVerret/csFastFloat) for
Expand Down Expand Up @@ -869,6 +869,32 @@ public SepColNotSetOption ColNotSetOption { get; init; } = SepColNotSetOption.Th
public bool Escape { get; init; } = false;
```

#### Escaping
Escaping is not enabled by default in Sep, but when it is it gives the same
results as other popular CSV librares as shown below. Although, CsvHelper
appears to be escaping spaces as well, which is not necessary.

| Input | CsvHelper | Sylvan | Sep¹ |
|-|-|-|-|
| `` | | | |
| `·` | `"·"` | `·` | `·` |
| `a` | `a` | `a` | `a` |
| `;` | `";"` | `";"` | `";"` |
| `,` | `,` | `,` | `,` |
| `"` | `""""` | `""""` | `""""` |
| `\r` | `"\r"` | `"\r"` | `"\r"` |
| `\n` | `"\n"` | `"\n"` | `"\n"` |
| `a"aa"aaa` | `"a""aa""aaa"` | `"a""aa""aaa"` | `"a""aa""aaa"` |
| `a;aa;aaa` | `"a;aa;aaa"` | `"a;aa;aaa"` | `"a;aa;aaa"` |

Separator/delimiter is set to semi-colon `;` (default for Sep)

`·` (middle dot) is whitespace to make this visible

`\r`, `\n` are carriage return and line feed special characters to make these visible

¹ Sep with `Escape = true` in `SepWriterOptions`

## Limitations and Constraints
Sep is designed to be minimal and fast. As such, it has some limitations and
constraints, since these are not needed for the initial intended usage:
Expand Down
146 changes: 146 additions & 0 deletions src/Sep.XyzTest/ReadMeTest.CompareEscape.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Text;
using CsvHelper;
using CsvHelper.Configuration;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Sylvan.Data;
using Sylvan.Data.Csv;

namespace nietras.SeparatedValues.XyzTest;

public partial class ReadMeTest
{
[TestMethod]
public void ReadMeTest_CompareEscape()
{
var tests = new string[]
{
new(""),
new(" "),
new("a"),
new(";"),
new(","),
new("\""),
new("\r"),
new("\n"),
new("a\"aa\"aaa"),
new("a;aa;aaa"),
};
var runners = new Dictionary<string, Func<string, string>>()
{
{ nameof(CsvHelper), EscapeCsvHelper },
{ nameof(Sylvan), EscapeSylvan },
{ nameof(Sep) + "¹", EscapeSep },
};
var sb = new StringBuilder();
sb.Append($"| Input |");
foreach (var (name, _) in runners)
{
sb.Append($" {name} |");
}
sb.AppendLine();
sb.Append($"|-|");
foreach (var (_, _) in runners)
{
sb.Append($"-|");
}
sb.AppendLine();
foreach (var test in tests)
{
var display = ForDisplay(test);

sb.Append($"| `{display}` |");

var csharpColText = display.Replace("\"", "\\\"");
var csharpColTextResult = ForDisplay(EscapeSep(test)).Replace("\"", "\\\"");
Trace.WriteLine($"new object[] {{ \"{test.Replace("\"", "\\\"")}\", \"{csharpColTextResult}\" }},");

foreach (var (_, action) in runners)
{
try
{
var outputColText = action(test);
if (outputColText.Length > 0)
{
sb.Append($" `{ForDisplay(outputColText)}`");
}
sb.Append($" |");
}
catch (Exception e)
{
var message = e.Message.ReplaceLineEndings(" ");
Trace.WriteLine(message);
sb.Append($" EXCEPTION |");
}
}
sb.AppendLine();
}
sb.AppendLine();
sb.AppendLine("Separator/delimiter is set to semi-colon `;` (default for Sep)");
sb.AppendLine();
sb.AppendLine($"`·` (middle dot) is whitespace to make this visible");
sb.AppendLine();
sb.AppendLine($"`\\r`, `\\n` are carriage return and line feed special characters to make these visible");
sb.AppendLine();
sb.AppendLine($"¹ Sep with `{nameof(SepWriterOptions.Escape)} = true` in `{nameof(SepWriterOptions)}`");

var text = sb.ToString();
Trace.WriteLine(text);
#if NET9_0
// Only write to file on latest version to avoid multiple accesses
File.WriteAllText("../../../CompareEscape.md", text, Encoding.UTF8);
#endif
}

static string EscapeCsvHelper(string colText)
{
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
HasHeaderRecord = false,
Delimiter = ";",
};
using var stringWriter = new StringWriter();
using var writer = new CsvWriter(stringWriter, config);
writer.WriteField(colText);
writer.NextRecord();
return GetCol(stringWriter.ToString());
}

static string EscapeSylvan(string colText)
{
// Sylvan has to have some kind of type it seems
var records = new[] { new { Name = colText } };

// create a DbDataReader over the anonymous records.
var recordReader = records.AsDataReader();
var options = new CsvDataWriterOptions { WriteHeaders = false, Delimiter = ';' };
using var stringWriter = new StringWriter();
using var csvWriter = Sylvan.Data.Csv.CsvDataWriter.Create(stringWriter, options);
csvWriter.Write(recordReader);
return GetCol(stringWriter.ToString());
}

static string EscapeSep(string colText)
{
using var writer = Sep.Writer(o => o with { WriteHeader = false, Escape = true }).ToText();
{
using var row = writer.NewRow();
row[0].Set(colText);
}
return GetCol(writer.ToString());
}

static string GetCol(string col)
{
using var reader = Sep.Default.Reader(o => o with { HasHeader = false }).FromText(col);
reader.MoveNext();
return reader.Current[0].ToString();
}

static string ForDisplay(string test) =>
test.Replace(" ", "·").Replace("\r", "\\r").Replace("\n", "\\n");
}
1 change: 1 addition & 0 deletions src/Sep.XyzTest/Sep.XyzTest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
<PackageReference Include="MSTest" Version="3.7.0" />
<PackageReference Include="CsvHelper" Version="33.0.1" />
<PackageReference Include="Sylvan.Common" Version="0.4.3" />
<PackageReference Include="Sylvan.Data" Version="0.2.16" />
<PackageReference Include="Sylvan.Data.Csv" Version="1.3.9" />
</ItemGroup>

Expand Down

0 comments on commit b0a2c59

Please sign in to comment.