diff --git a/doc/index.md b/doc/index.md index f33ce83..3e2805d 100644 --- a/doc/index.md +++ b/doc/index.md @@ -33,7 +33,7 @@ How to process tabular data with a known structure:

-# [High-level API](#tab/high-level-api) +# [High-level API (C#)](#tab/api-hl-cs) ```cs var dialect = new TabularDialect("\r\n", ',', '\"'); @@ -83,7 +83,7 @@ internal struct Book } ``` -# [Low-level API](#tab/low-level-api) +# [Low-level API (C#)](#tab/api-ll-cs) ```cs var dialect = new TabularDialect("\r\n", ',', '\"'); @@ -105,17 +105,91 @@ using (var reader = new TabularReader(File.OpenRead("books.csv"), dialect)) while (reader.TryPickRecord()) { reader.TryReadField(); - reader.TryGetString(out var author); + reader.TryGetString(out var field0); reader.TryReadField(); - reader.TryGetString(out var title); + reader.TryGetString(out var field1); reader.TryReadField(); - reader.TryGetDateOnly(out var published); + reader.TryGetDateOnly(out var field2); - Console.WriteLine($"{author} '{title}' ({published})"); + Console.WriteLine($"{field0} '{field1}' ({field2})"); } } ``` +# [High-level API (F#)](#tab/api-hl-fs) + +```fs +type internal Book = + struct + val mutable Author: string + val mutable Title: string + val mutable Published: Nullable + end + +let dialect = new TabularDialect("\r\n", ',', '\"') + +using (new TabularWriter(File.Create "books.csv", dialect)) (fun writer -> + let book1 = new Book ( + Author = "Lewis Carroll", + Title = "Alice's Adventures in Wonderland", + Published = new DateOnly(1865, 11, 09) + ) + + writer.WriteRecord &book1 + + let book2 = new Book ( + Author = "H. G. Wells", + Title = "The Time Machine", + Published = new DateOnly(1894, 03, 17) + ) + + writer.WriteRecord &book2 +) + +using (new TabularReader(File.OpenRead "books.csv", dialect)) (fun reader -> + while reader.TryReadRecord () do + let book = reader.CurrentRecord + + printfn $"{book.Author} '{book.Title}' ({book.Published})" +) +``` + +> [!NOTE] +> Using the high-level API with F# in this example requires a [custom](xref:urn:topics:extensibility:record-handlers) record handler. + +# [Low-level API (F#)](#tab/api-ll-fs) + +```fs +let dialect = new TabularDialect("\r\n", ',', '\"') + +using (new TabularWriter(File.Create "books.csv", dialect)) (fun writer -> + writer.WriteString "Lewis Carroll" + writer.WriteString "Alice's Adventures in Wonderland" + writer.WriteDateOnly (new DateOnly(1865, 11, 09)) + writer.FinishRecord () + writer.WriteString "H. G. Wells" + writer.WriteString "The Time Machine" + writer.WriteDateOnly (new DateOnly(1894, 03, 17)) + writer.FinishRecord () +) + +using (new TabularReader(File.OpenRead "books.csv", dialect)) (fun reader -> + while reader.TryPickRecord () do + let mutable field0 = Unchecked.defaultof + let mutable field1 = Unchecked.defaultof + let mutable field2 = Unchecked.defaultof + + reader.TryReadField () |> ignore + reader.TryGetString &field0 |> ignore + reader.TryReadField () |> ignore + reader.TryGetString &field1 |> ignore + reader.TryReadField () |> ignore + reader.TryGetDateOnly &field2 |> ignore + + printfn $"{field0} '{field1}' ({field2})" +) +``` + ---

@@ -124,7 +198,7 @@ How to process tabular data with a known structure that has a header:

-# [High-level API](#tab/high-level-api) +# [High-level API (C#)](#tab/api-hl-cs) ```cs var dialect = new TabularDialect("\r\n", ',', '\"'); @@ -177,7 +251,7 @@ internal struct Book } ``` -# [Low-level API](#tab/low-level-api) +# [Low-level API (C#)](#tab/api-ll-cs) ```cs var dialect = new TabularDialect("\r\n", ',', '\"'); @@ -200,36 +274,114 @@ using (var writer = new TabularWriter(File.Create("books.csv"), dialect)) using (var reader = new TabularReader(File.OpenRead("books.csv"), dialect)) { - if (reader.TryPickRecord()) + while (reader.TryReadField()) { - while (reader.TryReadField()) - { - } } while (reader.TryPickRecord()) { reader.TryReadField(); - reader.TryGetString(out var author); + reader.TryGetString(out var field0); reader.TryReadField(); - reader.TryGetString(out var title); + reader.TryGetString(out var field1); reader.TryReadField(); - reader.TryGetDateOnly(out var published); + reader.TryGetDateOnly(out var field2); - Console.WriteLine($"{author} '{title}' ({published})"); + Console.WriteLine($"{field0} '{field1}' ({field2})"); } } ``` +# [High-level API (F#)](#tab/api-hl-fs) + +```fs +type internal Book = + struct + val mutable Author: string + val mutable Title: string + val mutable Published: Nullable + end + +let dialect = new TabularDialect("\r\n", ',', '\"') + +using (new TabularWriter(File.Create "books.csv", dialect)) (fun writer -> + let book1 = new Book ( + Author = "Lewis Carroll", + Title = "Alice's Adventures in Wonderland", + Published = new DateOnly(1865, 11, 09) + ) + + writer.WriteRecord &book1 + + let book2 = new Book ( + Author = "H. G. Wells", + Title = "The Time Machine", + Published = new DateOnly(1894, 03, 17) + ) + + writer.WriteRecord &book2 +) + +using (new TabularReader(File.OpenRead "books.csv", dialect)) (fun reader -> + while reader.TryReadRecord () do + let book = reader.CurrentRecord + + printfn $"{book.Author} '{book.Title}' ({book.Published})" +) +``` + +> [!NOTE] +> Using the high-level API with F# in this example requires a [custom](xref:urn:topics:extensibility:record-handlers) record handler. + +# [Low-level API (F#)](#tab/api-ll-fs) + +```fs +let dialect = new TabularDialect("\r\n", ',', '\"') + +using (new TabularWriter(File.Create "books.csv", dialect)) (fun writer -> + writer.WriteString "author" + writer.WriteString "title" + writer.WriteString "published" + writer.FinishRecord () + writer.WriteString "Lewis Carroll" + writer.WriteString "Alice's Adventures in Wonderland" + writer.WriteDateOnly (new DateOnly(1865, 11, 09)) + writer.FinishRecord () + writer.WriteString "H. G. Wells" + writer.WriteString "The Time Machine" + writer.WriteDateOnly (new DateOnly(1894, 03, 17)) + writer.FinishRecord () +) + +using (new TabularReader(File.OpenRead "books.csv", dialect)) (fun reader -> + while reader.TryReadField () do + () + + while reader.TryPickRecord () do + let mutable field0 = Unchecked.defaultof + let mutable field1 = Unchecked.defaultof + let mutable field2 = Unchecked.defaultof + + reader.TryReadField () |> ignore + reader.TryGetString &field0 |> ignore + reader.TryReadField () |> ignore + reader.TryGetString &field1 |> ignore + reader.TryReadField () |> ignore + reader.TryGetDateOnly &field2 |> ignore + + printfn $"{field0} '{field1}' ({field2})" +) +``` + ---

-How to use minimal API for a limited amount of records: +How to process a limited amount of records with the minimal API:

-# [High-level API](#tab/high-level-api) +# [High-level API (C#)](#tab/api-hl-cs) ```cs var dialect = new TabularDialect("\r\n", ',', '\"'); @@ -273,9 +425,51 @@ internal struct Book } ``` -# [Low-level API](#tab/low-level-api) +# [Low-level API (C#)](#tab/api-ll-cs) + +> [!NOTE] +> The minimal API is an extension for the high-level API. + +# [High-level API (F#)](#tab/api-hl-fs) + +```fs +type internal Book = + struct + val mutable Author: string + val mutable Title: string + val mutable Published: Nullable + end + +let dialect = new TabularDialect("\r\n", ',', '\"') + +let mutable private books = [| + new Book ( + Author = "Lewis Carroll", + Title = "Alice's Adventures in Wonderland", + Published = new DateOnly(1865, 11, 09) + ) + new Book ( + Author = "H. G. Wells", + Title = "The Time Machine", + Published = new DateOnly(1894, 03, 17) + ) +|] + +TabularData.WriteRecords(File.Create "books.csv", dialect, books) -N/A +books <- TabularData.ReadRecords(File.OpenRead "books.csv", dialect) + +for book in books do + printfn $"{book.Author} '{book.Title}' ({book.Published})" +``` + +> [!NOTE] +> Using the high-level API with F# in this example requires a [custom](xref:urn:topics:extensibility:record-handlers) record handler. + +# [Low-level API (F#)](#tab/api-ll-fs) + +> [!NOTE] +> The minimal API is an extension for the high-level API. --- @@ -285,7 +479,7 @@ How to display the first ten records from a file with an unknown structure:

-# [High-level API](#tab/high-level-api) +# [High-level API (C#)](#tab/api-hl-cs) ```cs var dialect = new TabularDialect("\r\n", ',', '\"'); @@ -299,19 +493,18 @@ using (var reader = new TabularReader(File.OpenRead("data.csv"), dial } ``` -# [Low-level API](#tab/low-level-api) +# [Low-level API (C#)](#tab/api-ll-cs) ```cs var dialect = new TabularDialect("\r\n", ',', '\"'); -using (var reader = new TabularReader(File.OpenRead("data.csv"), dialect)) +using (var reader = new TabularReader(File.OpenRead("books.csv"), dialect)) { while (reader.TryPickRecord() && (reader.RecordsRead <= 10)) { while (reader.TryReadField()) { - Console.Write(reader.GetString()); - Console.Write('|'); + Console.Write($"{reader.GetString()}|"); } Console.WriteLine(); @@ -319,4 +512,28 @@ using (var reader = new TabularReader(File.OpenRead("data.csv"), dialect)) } ``` +# [High-level API (F#)](#tab/api-hl-fs) + +```fs +let dialect = new TabularDialect("\r\n", ',', '\"') + +using (new TabularReader>(File.OpenRead "books.csv", dialect)) (fun reader -> + while reader.TryReadRecord () && (reader.RecordsRead <= 10) do + printfn "%s" (String.concat "|" reader.CurrentRecord) +) +``` + +# [Low-level API (F#)](#tab/api-ll-fs) + +```fs +let dialect = new TabularDialect("\r\n", ',', '\"') + +using (new TabularReader(File.OpenRead "books.csv", dialect)) (fun reader -> + while reader.TryPickRecord () && (reader.RecordsRead <= 10) do + while reader.TryReadField () do + printf "%s|" (reader.GetString ()) + printfn "" +) +``` + --- diff --git a/doc/template/public/main.js b/doc/template/public/main.js index 50e1be3..8bbf556 100644 --- a/doc/template/public/main.js +++ b/doc/template/public/main.js @@ -1,5 +1,10 @@ export default { iconLinks: [ + { + icon: 'box', + href: 'https://nuget.org/packages/Addax.Formats.Tabular', + title: 'NuGet' + }, { icon: 'github', href: 'https://github.com/alexanderkozlenko/addax', diff --git a/doc/topics/benchmarks.md b/doc/topics/benchmarks.md index 7cb7e5e..11c6e20 100644 --- a/doc/topics/benchmarks.md +++ b/doc/topics/benchmarks.md @@ -1,27 +1,35 @@ +--- +uid: urn:topics:benchmarks +--- + ## Addax - Benchmarks

+The following benchmarks reflect the approximate time and memory required to process `1,048,576` fields: + +

+ ```txt .NET 8.0.0 (8.0.23.53103) | Method | Mean | Median | Min | Max | Op/s | Allocated | |--------------------------------- |------------:|------------:|------------:|------------:|-------:|----------:| -| 'read string - empty' | 7,176.5 us | 7,186.8 us | 7,082.7 us | 7,246.8 us | 139.34 | 389 B | -| 'read string - regular' | 18,054.2 us | 18,053.4 us | 18,019.4 us | 18,084.6 us | 55.39 | 406 B | -| 'read string - escaped' | 33,885.7 us | 33,849.9 us | 33,788.8 us | 34,088.2 us | 29.51 | 430 B | -| 'read string - empty (saync)' | 7,374.2 us | 7,372.1 us | 7,366.2 us | 7,387.0 us | 135.61 | 389 B | -| 'read string - regular (async)' | 18,727.4 us | 18,726.7 us | 18,690.5 us | 18,769.1 us | 53.40 | 406 B | -| 'read string - escaped (async)' | 36,717.3 us | 36,741.7 us | 36,439.5 us | 36,937.6 us | 27.24 | 433 B | +| 'read string - empty' | 6,738.6 μs | 6,756.1 μs | 6,609.2 μs | 6,906.1 μs | 148.40 | 389 B | +| 'read string - regular' | 17,973.0 μs | 17,939.9 μs | 17,783.3 μs | 18,286.9 μs | 55.64 | 406 B | +| 'read string - escaped' | 33,030.4 μs | 32,845.8 μs | 32,497.5 μs | 34,234.7 μs | 30.28 | 430 B | +| 'read string - empty (saync)' | 7,253.0 μs | 7,235.0 μs | 7,203.0 μs | 7,328.2 μs | 137.87 | 389 B | +| 'read string - regular (async)' | 18,483.3 μs | 18,481.3 μs | 18,430.0 μs | 18,540.9 μs | 54.10 | 406 B | +| 'read string - escaped (async)' | 35,669.9 μs | 35,640.6 μs | 35,502.1 μs | 35,841.1 μs | 28.03 | 430 B | | Method | Mean | Median | Min | Max | Op/s | Allocated | |--------------------------------- |------------:|------------:|------------:|------------:|-------:|----------:| -| 'write string - empty' | 5,417.2 us | 5,404.4 us | 5,340.0 us | 5,503.8 us | 184.60 | 277 B | -| 'write string - regular' | 9,931.1 us | 9,935.4 us | 9,832.0 us | 9,954.2 us | 100.69 | 395 B | -| 'write string - escaped' | 14,797.1 us | 14,799.1 us | 14,746.7 us | 14,829.9 us | 67.58 | 395 B | -| 'write string - empty (async)' | 10,216.9 us | 10,219.3 us | 10,192.3 us | 10,240.4 us | 97.88 | 283 B | -| 'write string - regular (async)' | 14,833.5 us | 14,832.2 us | 14,818.1 us | 14,853.4 us | 67.41 | 395 B | -| 'write string - escaped (async)' | 20,071.4 us | 20,086.1 us | 19,998.3 us | 20,115.6 us | 49.82 | 406 B | +| 'write string - empty' | 5,341.3 μs | 5,352.1 μs | 5,256.9 μs | 5,423.7 μs | 187.22 | 277 B | +| 'write string - regular' | 9,236.5 μs | 9,238.9 μs | 9,157.1 μs | 9,351.6 μs | 108.27 | 395 B | +| 'write string - escaped' | 14,607.3 μs | 14,603.4 μs | 14,588.9 μs | 14,633.8 μs | 68.46 | 395 B | +| 'write string - empty (async)' | 10,284.7 μs | 10,270.8 μs | 10,175.8 μs | 10,418.4 μs | 97.23 | 283 B | +| 'write string - regular (async)' | 14,646.5 μs | 14,634.4 μs | 14,588.9 μs | 14,711.1 μs | 68.28 | 385 B | +| 'write string - escaped (async)' | 20,475.2 μs | 20,501.8 μs | 20,289.4 μs | 20,557.1 μs | 48.84 | 406 B | Mean : Arithmetic mean of all measurements Error : Half of 99.9% confidence interval diff --git a/doc/topics/extensibility/record-handlers.md b/doc/topics/extensibility/record-handlers.md index 8340268..8841afb 100644 --- a/doc/topics/extensibility/record-handlers.md +++ b/doc/topics/extensibility/record-handlers.md @@ -1,3 +1,7 @@ +--- +uid: urn:topics:extensibility:record-handlers +--- + ## Addax - Record Handlers

@@ -6,12 +10,14 @@

-A simple example of a custom record handler that interprets a record as the `(double, double)` type: +A simple example of a custom record handler that interprets a record as a tuple with point coordinates:

+# [C#](#tab/cs) + ```cs -internal sealed class PointHandler : TabularHandler<(double, double)> +internal class PointHandler : TabularHandler<(double, double)> { public override TabularRecord<(double, double)> Read(TabularReader reader) { @@ -31,11 +37,37 @@ internal sealed class PointHandler : TabularHandler<(double, double)> } ``` +# [F#](#tab/fs) + +```fs +type internal PointHandler() = + inherit TabularHandler<(double * double)>() + + override this.Read(reader) = + let mutable item1 = Unchecked.defaultof + let mutable item2 = Unchecked.defaultof + + reader.TryReadField () |> ignore + reader.TryGetDouble &item1 |> ignore + reader.TryReadField () |> ignore + reader.TryGetDouble &item2 |> ignore + + new TabularRecord<(double * double)>((item1, item2)) + + override this.Write(writer, record) = + let (item1, item2) = record + + writer.WriteDouble item1 + writer.WriteDouble item2 +``` + +--- +

-# [High-level API](#tab/high-level-api) +# [High-level API (C#)](#tab/api-hl/cs) -The primary approach is to specify the required record handler for reader or writer explicitly: +The primary approach is to specify the record handler for reader or writer explicitly:

@@ -88,8 +120,57 @@ using (var reader = new TabularReader<(double, double)>(File.OpenRead("points.cs } ``` -# [Low-level API](#tab/low-level-api) +# [High-level API (F#)](#tab/api-hl/fs) + +The primary approach is to specify the record handler for reader or writer explicitly: + +

+ +```fs +let private handler = new PointHandler() +let dialect = new TabularDialect("\r\n", ',', '\"') -N/A +using (new TabularWriter<(double * double)>(File.Create "points.csv", dialect, handler = handler)) (fun writer -> + let point1 = (double 50.4501, double 30.5234) + let point2 = (double 45.4215, double 75.6972) + + writer.WriteRecord &point1 + writer.WriteRecord &point2 +) + +using (new TabularReader<(double * double)>(File.OpenRead "points.csv", dialect, handler = handler)) (fun reader -> + while reader.TryReadRecord () do + let (lat, lon) = reader.CurrentRecord + + printfn $"{lat} N, {lon} W" +) +``` + +

+ +Additonally, it can be added to the `TabularRegistry.Handlers` shared collection with generated record handlers: + +

+ +```fs +TabularRegistry.Handlers.Add(typeof<(double * double)>, new PointHandler()) + +let dialect = new TabularDialect("\r\n", ',', '\"') + +using (new TabularWriter<(double * double)>(File.Create "points.csv", dialect)) (fun writer -> + let point1 = (double 50.4501, double 30.5234) + let point2 = (double 45.4215, double 75.6972) + + writer.WriteRecord &point1 + writer.WriteRecord &point2 +) + +using (new TabularReader<(double * double)>(File.OpenRead "points.csv", dialect)) (fun reader -> + while reader.TryReadRecord () do + let (lat, lon) = reader.CurrentRecord + + printfn $"{lat} N, {lon} W" +) +``` --- diff --git a/doc/topics/extensibility/value-converters.md b/doc/topics/extensibility/value-converters.md index 2ebf121..713d8a5 100644 --- a/doc/topics/extensibility/value-converters.md +++ b/doc/topics/extensibility/value-converters.md @@ -1,3 +1,7 @@ +--- +uid: urn:topics:extensibility:value-converters +--- + ## Addax - Value Converters

@@ -6,12 +10,14 @@

-A complete example of a custom value converter that handles values of the `System.DateTime` type represented as Unix timestamps: +A complete example of a custom value converter that handles `System.DateTime` values represented as Unix timestamps:

+# [C#](#tab/cs) + ```cs -internal sealed class UnixDateTimeConverter : TabularConverter +internal class UnixDateTimeConverter : TabularConverter { public override bool TryFormat(DateTime value, Span destination, IFormatProvider? provider, out int charsWritten) { @@ -38,9 +44,33 @@ internal sealed class UnixDateTimeConverter : TabularConverter } ``` +# [F#](#tab/fs) + +```fs +type internal UnixDateTimeConverter() = + inherit TabularConverter() + + override this.TryFormat(value, destination, provider, charsWritten) = + let seconds = int64 (value.ToUniversalTime () - DateTime.UnixEpoch).TotalSeconds + + seconds.TryFormat(destination, &charsWritten, "g", provider) + + override this.TryParse(source, provider, value) = + let mutable seconds = Unchecked.defaultof + + if Int64.TryParse(source, NumberStyles.Integer, provider, &seconds) then + value <- DateTime.UnixEpoch.AddSeconds (float seconds) + true + else + value <- Unchecked.defaultof + false +``` + +--- +

-# [High-level API](#tab/high-level-api) +# [High-level API (C#)](#tab/api-hl/cs) ```cs var dialect = new TabularDialect("\r\n", ',', '\"'); @@ -91,7 +121,7 @@ internal struct Book } ``` -# [Low-level API](#tab/low-level-api) +# [Low-level API (C#)](#tab/api-ll/cs) ```cs using Addax.Formats.Tabular; @@ -116,22 +146,94 @@ using (var reader = new TabularReader(File.OpenRead("books.csv"), dialect)) while (reader.TryPickRecord()) { reader.TryReadField(); - reader.TryGetString(out var author); + reader.TryGetString(out var field0); reader.TryReadField(); - reader.TryGetString(out var title); + reader.TryGetString(out var field1); reader.TryReadField(); - reader.TryGet(converter, out var published); + reader.TryGet(converter, out var field2); + + Console.WriteLine($"{field0} '{field1}' ({field2})"); + } +} +``` + +

+ +Consider adding extension methods for using a custom value converter with the low-level API: + +

+ +```cs +internal static class TabularUnixDateTimeExtensions +{ + private static readonly UnixDateTimeConverter s_converter = new(); + + public static bool TryGetUnixDateTime(this TabularReader reader, out DateTime value) + { + return reader.TryGet(s_converter, out value); + } + + public static DateTime GetUnixDateTime(this TabularReader reader) + { + return reader.Get(s_converter); + } + + public static void WriteUnixDateTime(this TabularWriter writer, DateTime value) + { + writer.Write(value, s_converter); + } - Console.WriteLine($"{author} '{title}' ({published})"); + public static ValueTask WriteUnixDateTimeAsync(this TabularWriter writer, DateTime value, CancellationToken cancellationToken) + { + return writer.WriteAsync(value, s_converter, cancellationToken); } } ``` +# [High-level API (F#)](#tab/api-hl/fs) + +> [!NOTE] +> Using a custom value converter in the high-level API with F# requires a custom record handler. + +# [Low-level API (F#)](#tab/api-ll/fs) + +```fs +let private converter = new UnixDateTimeConverter() +let dialect = new TabularDialect("\r\n", ',', '\"') + +using (new TabularWriter(File.Create "books.csv", dialect)) (fun writer -> + writer.WriteString "Lewis Carroll" + writer.WriteString "Alice's Adventures in Wonderland" + writer.Write (new DateTime(1865, 11, 09, 0, 0, 0, DateTimeKind.Utc), converter) + writer.FinishRecord () + writer.WriteString "H. G. Wells" + writer.WriteString "The Time Machine" + writer.Write (new DateTime(1894, 03, 17, 0, 0, 0, DateTimeKind.Utc), converter) + writer.FinishRecord () +) + +using (new TabularReader(File.OpenRead "books.csv", dialect)) (fun reader -> + while reader.TryPickRecord () do + let mutable field0 = Unchecked.defaultof + let mutable field1 = Unchecked.defaultof + let mutable field2 = Unchecked.defaultof + + reader.TryReadField () |> ignore + reader.TryGetString &field0 |> ignore + reader.TryReadField () |> ignore + reader.TryGetString &field1 |> ignore + reader.TryReadField () |> ignore + reader.TryGet (converter, &field2) |> ignore + + printfn $"{field0} '{field1}' ({field2})" +) +``` + ---

-### Standard date and time converters +### Standard converters

diff --git a/doc/topics/features.md b/doc/topics/features.md index 73a8057..21ba22c 100644 --- a/doc/topics/features.md +++ b/doc/topics/features.md @@ -1,3 +1,7 @@ +--- +uid: urn:topics:features +--- + ## Addax - Features

@@ -28,7 +32,7 @@ The framework has built-in support for working with tabular fields as values of |`System.Int128`|Format specifier: `g`|| |`System.SByte`|Format specifier: `g`|| |`System.Single`|Format specifier: `g`|| -|`System.String`|Up to 2,147,483,591 UTF-16 code units|| +|`System.String`|Up to `2,147,483,591` UTF-16 code units|| |`System.TimeOnly`|`HH':'mm':'ss.FFFFFFF`|RFC 3339 / ISO 8601-1:2019| |`System.TimeSpan`|`[-]'P'd'DT'h'H'm'M's'.'FFFFFFF'S'`|RFC 3339 / ISO 8601-1:2019| |`System.UInt16`|Format specifier: `g`|| @@ -63,20 +67,36 @@ The field and record readers can advance through tabular data without reading it

+# [C#](#tab/cs) + ```cs -public sealed class TabularReader +public class TabularReader { public bool TrySkipField(); public ValueTask TrySkipFieldAsync(CancellationToken cancellationToken); } -public sealed class TabularReader +public class TabularReader { public bool TrySkipRecord(); public ValueTask TrySkipRecordAsync(CancellationToken cancellationToken); } ``` +# [F#](#tab/fs) + +```fs +type TabularReader = + member TrySkipField: unit -> bool + member TrySkipFieldAsync: CancellationToken -> ValueTask + +type TabularReader<'T> = + member TrySkipRecord: unit -> bool + member TrySkipRecordAsync: CancellationToken -> ValueTask +``` + +--- +

### Memory Usage @@ -87,19 +107,57 @@ The field reader provides access to the last read field in a way that allows rea

+# [C#](#tab/cs) + ```cs -public sealed class TabularReader +public class TabularReader { - public ReadOnlyMemory CurrentField { get; } + public ReadOnlyMemory CurrentField + { + get; + } } ``` +# [F#](#tab/fs) + +```fs +type TabularReader = + member CurrentField: ReadOnlyMemory + with get() +``` + +--- +

The default string factory supports a mode with a thread-safe pool based on hash codes, reducing allocations when reading fields as strings:

+# [C#](#tab/cs) + ```cs -var options = new TabularOptions { StringFactory = new(maxLength: 128) }; +var options = new TabularOptions +{ + StringFactory = new(maxLength: 128) +}; +``` + +# [F#](#tab/fs) + +```fs +let options = new TabularOptions ( + StringFactory = new TabularStringFactory(maxLength = 128) +) ``` + +--- + +

+ +### References + +

+ +- [W3C - Model for Tabular Data and Metadata on the Web](https://w3.org/TR/2015/REC-tabular-data-model-20151217) diff --git a/doc/topics/grammar.md b/doc/topics/grammar.md index 6cfefb2..9d513bf 100644 --- a/doc/topics/grammar.md +++ b/doc/topics/grammar.md @@ -1,8 +1,16 @@ +--- +uid: urn:topics:grammar +--- + ## Addax - Grammar

-The engine is based on the following generalized grammar for tabular data: +### Grammar + +

+ +The engine is based on the following generalized EBNF grammar for tabular data:

@@ -29,5 +37,12 @@ char = ? %x00-10FFFF ?;

-> [!NOTE] -> Fields with line terminator characters are written as escaped to ensure compatibility with more specific dialects, such as RFC 4180. +Fields with line terminator characters are written as escaped to ensure compatibility with more specific dialects, such as RFC 4180. + +

+ +### References + +

+ +- [W3C - Model for Tabular Data and Metadata on the Web](https://w3.org/TR/2015/REC-tabular-data-model-20151217) diff --git a/doc/topics/guidelines.md b/doc/topics/guidelines.md new file mode 100644 index 0000000..197e6d3 --- /dev/null +++ b/doc/topics/guidelines.md @@ -0,0 +1,13 @@ +--- +uid: urn:topics:guidelines +--- + +## Addax - Guidelines + +

+ +### Guidelines + +

+ +- **DO** reuse instances of the `Addax.Formats.Tabular.TabularDialect` type as they contain pre-calculated data for parsing and formatting. diff --git a/doc/topics/limitations.md b/doc/topics/limitations.md index c71e024..3c8ea55 100644 --- a/doc/topics/limitations.md +++ b/doc/topics/limitations.md @@ -1,6 +1,14 @@ +--- +uid: urn:topics:limitations +--- + ## Addax - Limitations

+### Limitations + +

+ - The maximum supported field length varies from $2^{30} - 31$ to $2^{31} - 59$ characters, dependent on escaping. - No built-in support for source generation of record handlers in languages other than C# (version 11 or higher). diff --git a/doc/topics/toc.yml b/doc/topics/toc.yml index ff66f28..687d6ec 100644 --- a/doc/topics/toc.yml +++ b/doc/topics/toc.yml @@ -4,6 +4,8 @@ href: features.md - name: Extensibility href: extensibility/toc.yml +- name: Guidelines + href: guidelines.md - name: Limitations href: limitations.md - name: Grammar diff --git a/src/Addax.Formats.Tabular.Analyzers.CSharp/TabularHandlerParser.cs b/src/Addax.Formats.Tabular.Analyzers.CSharp/TabularHandlerParser.cs index 2f234b7..9531d15 100644 --- a/src/Addax.Formats.Tabular.Analyzers.CSharp/TabularHandlerParser.cs +++ b/src/Addax.Formats.Tabular.Analyzers.CSharp/TabularHandlerParser.cs @@ -112,34 +112,34 @@ public ImmutableArray GetRecordMappings(CSharpCompilation var recordMembers = GetMappedRecordMembers(recordType, fieldOrderAttributeType, cancellationToken); var fieldMappingsBuilder = ImmutableDictionary.CreateBuilder(); - foreach (var recordMemberInfo in recordMembers) + foreach (var (recordMember, fieldOrderAttribute) in recordMembers) { cancellationToken.ThrowIfCancellationRequested(); - if (recordMemberInfo.Member.IsStatic) + if (recordMember.IsStatic) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0002, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0002, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; } - var valueTypeInfo = GetValueTypeInfo(recordMemberInfo.Member); + var valueTypeInfo = GetValueTypeInfo(recordMember); if (valueTypeInfo.Type.IsStatic || (valueTypeInfo.Type.IsValueType && valueTypeInfo.Type.IsRefLikeType)) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0002, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0002, recordMember.Locations.FirstOrDefault())); continue; } var converterTypeName = default(string); - if (!TryGetAttribute(recordMemberInfo.Member, converterAttributeType, out var converterAttribute, cancellationToken)) + if (!TryGetAttribute(recordMember, converterAttributeType, out var converterAttribute, cancellationToken)) { if (!valueTypeInfo.IsSupported) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0003, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0003, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; @@ -149,7 +149,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation { if (!TryGetConverterType(converterAttribute, out var converterType)) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; @@ -157,7 +157,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation if (!TryGetConverterValueType(converterType, converterBaseType, out var converterValueType)) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; @@ -165,7 +165,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation if (!SymbolEqualityComparer.Default.Equals(valueTypeInfo.Type, converterValueType)) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; @@ -173,7 +173,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation if (!TryGetDefaultConstructor(converterType, out var converterConstructor)) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; @@ -181,7 +181,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation if (!compilation.IsSymbolAccessibleWithin(converterConstructor, compilation.Assembly)) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0004, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; @@ -190,7 +190,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation converterTypeName = converterType.ToDisplayString(s_displayFormat); } - if (!TryGetFieldOrder(recordMemberInfo.FieldOrderAttribute, out var fieldOrder)) + if (!TryGetFieldOrder(fieldOrderAttribute, out var fieldOrder)) { recordTypeHasErrors = true; @@ -199,7 +199,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation if (fieldOrder < 0) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0005, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0005, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; @@ -207,13 +207,13 @@ public ImmutableArray GetRecordMappings(CSharpCompilation if (fieldMappingsBuilder.ContainsKey(fieldOrder)) { - context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0005, recordMemberInfo.Member.Locations.FirstOrDefault())); + context.ReportDiagnostic(Diagnostic.Create(s_diagnostic0005, recordMember.Locations.FirstOrDefault())); recordTypeHasErrors = true; continue; } - var mappedMemberAccess = GetTypeMemberAccess(recordMemberInfo.Member); + var mappedMemberAccess = GetTypeMemberAccess(recordMember); if (!recordTypeHasDefaultConstructor || !compilation.IsSymbolAccessibleWithin(recordTypeConstructor, compilation.Assembly)) { @@ -227,7 +227,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation var fieldNameLiteral = default(SyntaxToken?); - if (TryGetAttribute(recordMemberInfo.Member, fieldNameAttributeType, out var fieldNameAttribute, cancellationToken)) + if (TryGetAttribute(recordMember, fieldNameAttributeType, out var fieldNameAttribute, cancellationToken)) { if (TryGetFieldName(fieldNameAttribute, out var fieldName)) { @@ -236,7 +236,7 @@ public ImmutableArray GetRecordMappings(CSharpCompilation } var fieldMapping = new TabularFieldMapping( - recordMemberInfo.Member.Name, + recordMember.Name, mappedMemberAccess, valueTypeInfo.AsNullableT, valueTypeInfo.Name, diff --git a/src/Addax.Formats.Tabular.Benchmarks/Addax.Formats.Tabular.Benchmarks.csproj b/src/Addax.Formats.Tabular.Benchmarks/Addax.Formats.Tabular.Benchmarks.csproj index 8b13c42..9bef387 100644 --- a/src/Addax.Formats.Tabular.Benchmarks/Addax.Formats.Tabular.Benchmarks.csproj +++ b/src/Addax.Formats.Tabular.Benchmarks/Addax.Formats.Tabular.Benchmarks.csproj @@ -5,6 +5,7 @@ enable enable en + true diff --git a/src/Addax.Formats.Tabular.Benchmarks/Program.cs b/src/Addax.Formats.Tabular.Benchmarks/Program.cs index f903c64..1cd3a21 100644 --- a/src/Addax.Formats.Tabular.Benchmarks/Program.cs +++ b/src/Addax.Formats.Tabular.Benchmarks/Program.cs @@ -2,6 +2,7 @@ using BenchmarkDotNet.Columns; using BenchmarkDotNet.Configs; using BenchmarkDotNet.Diagnosers; +using BenchmarkDotNet.Engines; using BenchmarkDotNet.Exporters; using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Loggers; @@ -12,12 +13,16 @@ var config = ManualConfig .CreateEmpty() - .AddJob(Job.Default.WithToolchain(InProcessEmitToolchain.Instance)) + .AddJob(Job.Default + .WithToolchain(InProcessEmitToolchain.Instance) + .WithStrategy(RunStrategy.Throughput)) .AddDiagnoser(MemoryDiagnoser.Default) .AddColumnProvider(DefaultColumnProviders.Instance) .AddColumn(StatisticColumn.OperationsPerSecond, StatisticColumn.Median, StatisticColumn.Min, StatisticColumn.Max) - .WithSummaryStyle(SummaryStyle.Default.WithSizeUnit(SizeUnit.B).WithTimeUnit(TimeUnit.Microsecond)) + .WithSummaryStyle(SummaryStyle.Default + .WithSizeUnit(SizeUnit.B) + .WithTimeUnit(TimeUnit.Microsecond)) .AddLogger(ConsoleLogger.Default) - .AddExporter(MarkdownExporter.Console); + .AddExporter(MarkdownExporter.GitHub); BenchmarkSwitcher.FromAssembly(Assembly.GetEntryAssembly()!).Run(args, config); diff --git a/src/Addax.Formats.Tabular/Properties/Package/build/Addax.Formats.Tabular.targets b/src/Addax.Formats.Tabular/Properties/Package/buildTransitive/Addax.Formats.Tabular.targets similarity index 82% rename from src/Addax.Formats.Tabular/Properties/Package/build/Addax.Formats.Tabular.targets rename to src/Addax.Formats.Tabular/Properties/Package/buildTransitive/Addax.Formats.Tabular.targets index 6edf7cf..2d0d64f 100644 --- a/src/Addax.Formats.Tabular/Properties/Package/build/Addax.Formats.Tabular.targets +++ b/src/Addax.Formats.Tabular/Properties/Package/buildTransitive/Addax.Formats.Tabular.targets @@ -3,7 +3,7 @@ AfterTargets="ResolvePackageDependenciesForBuild;ResolveNuGetPackageAssets" Condition="'$(DisableAddaxFormatsTabularSourceGenerator)' == 'true'"> - + \ No newline at end of file diff --git a/src/Addax.Formats.Tabular/TabularData.cs b/src/Addax.Formats.Tabular/TabularData.cs index c02a057..17e9456 100644 --- a/src/Addax.Formats.Tabular/TabularData.cs +++ b/src/Addax.Formats.Tabular/TabularData.cs @@ -13,7 +13,7 @@ public static class TabularData /// The dialect to use for reading. /// The options to control the behavior during reading. /// The handler to read a instance from a record. - /// An array containing all records in the stream. + /// An array of records. /// does not support reading. /// or is . /// The record handler is not specified and cannot be found in the registry. @@ -39,7 +39,7 @@ public static T[] ReadRecords(Stream stream, TabularDialect dialect, TabularO /// The stream to read from. /// The dialect to use for reading. /// The options to control the behavior during reading. - /// An array containing all records in the stream. + /// An array of records. /// does not support reading. /// or is . /// The record handler is not specified and cannot be found in the registry. @@ -54,7 +54,7 @@ public static T[] ReadRecords(Stream stream, TabularDialect dialect, TabularO /// The type of an object that represents a record. /// The stream to read from. /// The dialect to use for reading. - /// An array containing all records in the stream. + /// An array of records. /// does not support reading. /// or is . /// The record handler is not specified and cannot be found in the registry. @@ -72,7 +72,7 @@ public static T[] ReadRecords(Stream stream, TabularDialect dialect) /// The options to control the behavior during reading. /// The handler to read a instance from a record. /// The token to monitor for cancellation requests. - /// A task object that, when awaited, produces an array containing all records in the stream. + /// A task object that, when awaited, produces an array of records. /// does not support reading. /// or is . /// The record handler is not specified and cannot be found in the registry. @@ -102,7 +102,7 @@ public static async ValueTask ReadRecordsAsync(Stream stream, TabularDia /// The dialect to use for reading. /// The options to control the behavior during reading. /// The token to monitor for cancellation requests. - /// A task object that, when awaited, produces an array containing all records in the stream. + /// A task object that, when awaited, produces an array of records. /// does not support reading. /// or is . /// The record handler is not specified and cannot be found in the registry. @@ -119,7 +119,7 @@ public static ValueTask ReadRecordsAsync(Stream stream, TabularDialect d /// The stream to read from. /// The dialect to use for reading. /// The token to monitor for cancellation requests. - /// A task object that, when awaited, produces an array containing all records in the stream. + /// A task object that, when awaited, produces an array of records. /// does not support reading. /// or is . /// The record handler is not specified and cannot be found in the registry. diff --git a/src/Addax.Formats.Tabular/TabularHandler`1.cs b/src/Addax.Formats.Tabular/TabularHandler`1.cs index 018eabd..a5017ca 100644 --- a/src/Addax.Formats.Tabular/TabularHandler`1.cs +++ b/src/Addax.Formats.Tabular/TabularHandler`1.cs @@ -55,6 +55,9 @@ public virtual ValueTask WriteAsync(TabularWriter writer, T record, Cancellation /// An collection of strings or . public virtual IEnumerable? Header { - get; + get + { + return null; + } } } diff --git a/src/Directory.Metadata.props b/src/Directory.Metadata.props index ca232cd..0b783e6 100644 --- a/src/Directory.Metadata.props +++ b/src/Directory.Metadata.props @@ -1,8 +1,9 @@  1.0.0 - rc + MIT + https://alexanderkozlenko.github.io/addax