From 6939a037b7af43e6f81f721a453de4a27ef29373 Mon Sep 17 00:00:00 2001 From: Erik Knudsen <31932266+erik1066@users.noreply.github.com> Date: Wed, 28 Aug 2024 18:43:40 -0400 Subject: [PATCH 1/2] Example folder cleanup Changed example app to reference the NuGet package instead of a relative path to the csproj file. Removed the .sln file for the example app, too, as this was not needed. --- .../demo-console/RapidCsv.ConsoleDemo.csproj | 2 +- .../demo-console/demo-console.generated.sln | 25 ------------------- 2 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 examples/demo-console/demo-console.generated.sln diff --git a/examples/demo-console/RapidCsv.ConsoleDemo.csproj b/examples/demo-console/RapidCsv.ConsoleDemo.csproj index aac176a..51ea584 100644 --- a/examples/demo-console/RapidCsv.ConsoleDemo.csproj +++ b/examples/demo-console/RapidCsv.ConsoleDemo.csproj @@ -1,7 +1,7 @@  - + diff --git a/examples/demo-console/demo-console.generated.sln b/examples/demo-console/demo-console.generated.sln deleted file mode 100644 index 5bffde0..0000000 --- a/examples/demo-console/demo-console.generated.sln +++ /dev/null @@ -1,25 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.5.002.0 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "RapidCsv.ConsoleDemo", "RapidCsv.ConsoleDemo.csproj", "{05815FA9-F697-46B4-849C-19F9B324B3CE}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {05815FA9-F697-46B4-849C-19F9B324B3CE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {05815FA9-F697-46B4-849C-19F9B324B3CE}.Debug|Any CPU.Build.0 = Debug|Any CPU - {05815FA9-F697-46B4-849C-19F9B324B3CE}.Release|Any CPU.ActiveCfg = Release|Any CPU - {05815FA9-F697-46B4-849C-19F9B324B3CE}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {EC89F3B0-4381-4C45-8992-9C4C0AEF0D44} - EndGlobalSection -EndGlobal From 10e8bd5a5798a02f346517e3e07892248f038092 Mon Sep 17 00:00:00 2001 From: Erik Knudsen <31932266+erik1066@users.noreply.github.com> Date: Wed, 28 Aug 2024 19:14:18 -0400 Subject: [PATCH 2/2] Added another example app Added another example app to the examples folder, demonstrating how to use the profile-driven content validation feature of the library. The README.md file was updated to cover how to use the profile-driven content validation features. --- README.md | 118 +++++++++++++++- .../Program.cs | 128 ++++++++++++++++++ .../demo-console-content-validation.csproj | 15 ++ 3 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 examples/demo-console-content-validation/Program.cs create mode 100644 examples/demo-console-content-validation/demo-console-content-validation.csproj diff --git a/README.md b/README.md index 5e375be..0e82713 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ static Stream GenerateStreamFromString(string s) The [examples](/examples/) folder contains example code that demonstrates how to use RapidCsv. -### Simplest Example: .NET Console App +### Example #1: RFC 4180 validation in a .NET Console App Let's look at the `RapidCsv.ConsoleDemo` project. @@ -105,6 +105,122 @@ That's all there is to it. > This console app includes a hard-coded CSV file in `program.cs` to make it as simple as possible to run the example. A CSV input file is therefore not required. +### Example #2: Profile-driven content validation + +See [demo-console-content-validation](./examples/demo-console-content-validation/) for working and runnable code that uses the example below. + +Let's say you want more than RFC 4180 validation. Perhaps you have a CSV file like the one below: + + +``` +NAME,AGE,DOB,PHONE,STATUS +John,23,1/1/2012,555-555-5555,actv +Mary,34,1/1/1990,555-555-5555,inac +Jane,25,1/1/2010,555-555-5555,actv +Hana,55,1/1/1970,555-555-555X,unkn +``` + +Let's suppose we want to validate this CSV file on the following rules: + +1. `NAME` must be 0-25 characters +1. `AGE` must be an integer +1. `DOB` must use `m/d/yyyy` format +1. `PHONE` must be a valid 10-digit US phone number +1. `STATUS` must be one of two values, `actv` or `inac`; all other values are invalid + +We can create an optional validation profile in JSON: + +```json +{ + "$schema": "rapid-csv/validator-config-schema.json", + "name": "Acme Bookstore Customer Records", + "description": "Validation profile for the CSV records of our Acme bookstore customers", + "filename": "abc123.csv", + "separator": ",", + "has_header": true, + "columns": [ + { + "name": "NAME", + "description": "The customer's name", + "ordinal": 1, + "type": "string", + "max": 25, + "min": 0, + "required": false, + "null_or_empty": true, + "format": null, + "regex": null + }, + { + "name": "AGE", + "description": "The customer's age", + "ordinal": 2, + "type": "integer", + "max": 125, + "min": 7, + "required": false, + "null_or_empty": true, + "format": null, + "regex": null + }, + { + "name": "DOB", + "description": "The customer's date of birth", + "ordinal": 3, + "type": "string", + "required": false, + "null_or_empty": true, + "format": "m/d/yyyy", + "regex": null + }, + { + "name": "PHONE", + "description": "The customer's phone number", + "ordinal": 4, + "type": "string", + "required": false, + "null_or_empty": true, + "format": null, + "regex": "^(\\+\\d{1,2}\\s)?\\(?\\d{3}\\)?[\\s.-]\\d{3}[\\s.-]\\d{4}$" + }, + { + "name": "STATUS", + "description": "Customer status", + "ordinal": 5, + "type": "enum", + "values": [ "actv", "inac" ], + "required": false, + "null_or_empty": true, + "format": null, + "regex": null + } + ] +}; +``` +Note the use of the `format` property in the `DOB` column definition, the `regex` for the `PHONE` column, the use of `min` and `max` for `NAME`, and the `enum` with `values` in the `STATUS` column. These are how we define the five rules outlined earlier. + +Using the profile is straightforward: + +```csharp +CsvValidator validator = new CsvValidator(); +var options = new ValidationOptions() +{ + Separator = ',', + HasHeaderRow = true, + ValidationProfile = validationProfile +}; + +Stream content = GenerateStreamFromString(csvContent); +ValidationResult result = validator.Validate(content: content, options: options); +``` + +In other words, we read the raw JSON into memory and assign it to the `ValidationProfile` property of the `ValidationOptions` object. The validator will then use the profile to execute these content checks. + +Since `ValidationProfile` is optional and can be empty, leaving it empty will conduct basic RFC 4180 checks only and apply no content validation rules. + +The added overhead of these profile-driven content checks can be significant in terms of performance when running the validator at scale. Use caution in applying these rules and only apply them when real-time content validation is required for the use case. + + ## Architecture and Design Decisions RapidCsv is meant to be used in situations where one needs speed and memory efficiency _at scale_. For instance, if you're required to process CSV files in near real-time at high volume, where validation results are viewable by clients almost instantly after file submission, then this is a library worth considering. diff --git a/examples/demo-console-content-validation/Program.cs b/examples/demo-console-content-validation/Program.cs new file mode 100644 index 0000000..61ab9fb --- /dev/null +++ b/examples/demo-console-content-validation/Program.cs @@ -0,0 +1,128 @@ +using RapidCsv; + +string csvContent = @"NAME,AGE,DOB,PHONE,STATUS +John,23,1/1/2012,555-555-5555,actv +Mary,34,1/1/1990,555-555-5555,inac +Jane,25,1/1/2010,555-555-5555,actv +Hana,55,1/1/1970,555-555-555X,unkn"; + +string validationProfile = @"{ + ""$schema"": ""rapid-csv/validator-config-schema.json"", + ""name"": ""Acme Bookstore Customer Records"", + ""description"": ""Validation profile for the CSV records of our Acme bookstore customers"", + ""filename"": ""abc123.csv"", + ""separator"": "","", + ""has_header"": true, + ""columns"": [ + { + ""name"": ""NAME"", + ""description"": ""The customer's name"", + ""ordinal"": 1, + ""type"": ""string"", + ""max"": 25, + ""min"": 0, + ""required"": false, + ""null_or_empty"": true, + ""format"": null, + ""regex"": null + }, + { + ""name"": ""AGE"", + ""description"": ""The customer's age"", + ""ordinal"": 2, + ""type"": ""integer"", + ""max"": 125, + ""min"": 7, + ""required"": false, + ""null_or_empty"": true, + ""format"": null, + ""regex"": null + }, + { + ""name"": ""DOB"", + ""description"": ""The customer's date of birth"", + ""ordinal"": 3, + ""type"": ""string"", + ""required"": false, + ""null_or_empty"": true, + ""format"": ""m/d/yyyy"", + ""regex"": null + }, + { + ""name"": ""PHONE"", + ""description"": ""The customer's phone number"", + ""ordinal"": 4, + ""type"": ""string"", + ""required"": false, + ""null_or_empty"": true, + ""format"": null, + ""regex"": ""^(\\+\\d{1,2}\\s)?\\(?\\d{3}\\)?[\\s.-]\\d{3}[\\s.-]\\d{4}$"" + }, + { + ""name"": ""STATUS"", + ""description"": ""Customer status"", + ""ordinal"": 5, + ""type"": ""enum"", + ""values"": [ ""actv"", ""inac"" ], + ""required"": false, + ""null_or_empty"": true, + ""format"": null, + ""regex"": null + } + ] +}"; + +CsvValidator validator = new CsvValidator(); +var options = new ValidationOptions() +{ + Separator = ',', + HasHeaderRow = true, + Quote = '\"', + ValidationProfile = validationProfile +}; + +Stream content = GenerateStreamFromString(csvContent); +ValidationResult result = validator.Validate(content: content, options: options); + +Console.WriteLine($"Valid File = {result.IsValid}"); +Console.WriteLine($" Data Rows = {result.DataRowCount}"); +Console.WriteLine($" Elapsed time (ms) = {result.ElapsedMilliseconds.ToString("N0")}ms"); +Console.WriteLine($" Columns = {result.FieldCount}"); +Console.WriteLine($" Error count = {result.ErrorCount}"); +Console.WriteLine($" Warning count = {result.WarningCount}"); + +if (result.Headers.Count != 0) +{ + Console.WriteLine(" Headers = "); + for (int i = 0; i < result.Headers.Count; i++) + { + Console.WriteLine($" Column {i + 1} = {result.Headers[i]}"); + } +} + +if (result.Messages.Count != 0) +{ + Console.WriteLine(" Validation Messages = "); + for (int i = 0; i < result.Messages.Count; i++) + { + var msg = result.Messages[i]; + Console.WriteLine($" Message {i + 1} = "); + Console.WriteLine($" Severity : {msg.Severity}"); + Console.WriteLine($" Code : {msg.Code}"); + Console.WriteLine($" Row : {msg.Row}"); + if (msg.Character >=0) Console.WriteLine($" Character : {msg.Character}"); + Console.WriteLine($" Field Num : {msg.FieldNumber}"); + Console.WriteLine($" Field Name : {msg.FieldName}"); + Console.WriteLine($" Err Content: {msg.Content}"); + } +} + +static Stream GenerateStreamFromString(string s) +{ + var stream = new MemoryStream(); + var writer = new StreamWriter(stream); + writer.Write(s); + writer.Flush(); + stream.Position = 0; + return stream; +} diff --git a/examples/demo-console-content-validation/demo-console-content-validation.csproj b/examples/demo-console-content-validation/demo-console-content-validation.csproj new file mode 100644 index 0000000..7c0a572 --- /dev/null +++ b/examples/demo-console-content-validation/demo-console-content-validation.csproj @@ -0,0 +1,15 @@ + + + + + + + + Exe + net8.0 + demo_console_content_validation + enable + enable + + +