Skip to content

Commit

Permalink
Implement schema validation and error reporting enhancements (#57)
Browse files Browse the repository at this point in the history
This update introduces multiple improvements to the schema validation
feature, including a new option to skip schema checks. The error
reporting has been enhanced with clearer and more informative messages.
Multiple validations and error checks have been added to ensure more
robust CSV file and schema verification.
  • Loading branch information
SmetDenis authored Mar 19, 2024
1 parent 69a3da5 commit fc04014
Show file tree
Hide file tree
Showing 25 changed files with 730 additions and 208 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
build
tests
vendor
schema-examples
.DS_Store
.editorconfig
.gitattributes
Expand Down
26 changes: 13 additions & 13 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ jobs:
! ./build/csv-blueprint.phar \
validate:csv \
--csv=./tests/fixtures/batch/*.csv \
--schema=./tests/schemas/demo_invalid.yml \
--schema=./tests/schemas/invalid_schema.yml \
--ansi
- name: Upload Artifacts
Expand All @@ -234,20 +234,20 @@ jobs:

- name: 👍 Valid CSV file
run: |
docker run --rm \
-v `pwd`:/parent-host \
jbzoo/csv-blueprint \
validate:csv \
--csv=/parent-host/tests/fixtures/demo.csv \
--schema=/parent-host/tests/schemas/demo_valid.yml \
docker run --rm \
-v `pwd`:/parent-host \
jbzoo/csv-blueprint \
validate:csv \
--csv=/parent-host/tests/fixtures/demo.csv \
--schema=/parent-host/tests/schemas/demo_valid.yml \
--ansi
- name: 👎 Invalid CSV file
run: |
! docker run --rm \
-v `pwd`:/parent-host \
jbzoo/csv-blueprint \
validate:csv \
--csv=/parent-host/tests/fixtures/demo.csv \
--schema=/parent-host/tests/schemas/demo_invalid.yml \
! docker run --rm \
-v `pwd`:/parent-host \
jbzoo/csv-blueprint \
validate:csv \
--csv=/parent-host/tests/fixtures/demo.csv \
--schema=/parent-host/tests/schemas/invalid_schema.yml \
--ansi
23 changes: 12 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

.PHONY: build

REPORT ?= table
COLUMNS_TEST ?= 150
REPORT ?= table
COLUMNS ?= 150

ifneq (, $(wildcard ./vendor/jbzoo/codestyle/src/init.Makefile))
include ./vendor/jbzoo/codestyle/src/init.Makefile
Expand Down Expand Up @@ -53,7 +53,8 @@ demo-valid: ##@Project Run demo valid CSV
$(call title,"Demo - Valid CSV")
@${PHP_BIN} ./csv-blueprint validate:csv \
--csv=./tests/fixtures/demo.csv \
--schema=./tests/schemas/demo_valid.yml
--schema=./tests/schemas/demo_valid.yml \
--skip-schema -v

demo-docker: ##@Project Run demo via Docker
$(call title,"Demo - Valid CSV \(via Docker\)")
Expand All @@ -76,17 +77,17 @@ demo-docker: ##@Project Run demo via Docker

demo-invalid: ##@Project Run demo invalid CSV
$(call title,"Demo - Invalid CSV")
@${PHP_BIN} ./csv-blueprint validate:csv \
--csv=./tests/fixtures/demo.csv \
--schema=./tests/schemas/demo_invalid.yml \
--report=$(REPORT)
@${PHP_BIN} ./csv-blueprint validate:csv \
--csv=./tests/fixtures/demo.csv \
--schema=./tests/schemas/invalid_schema.yml \
--report=$(REPORT) -v


demo-github: ##@Project Run demo invalid CSV
@${PHP_BIN} ./csv-blueprint validate:csv \
--csv=./tests/fixtures/batch/*.csv \
--schema=./tests/schemas/demo_invalid.yml \
--report=$(REPORT) \
@${PHP_BIN} ./csv-blueprint validate:csv \
--csv=./tests/fixtures/batch/*.csv \
--schema=./tests/schemas/demo_invalid.yml \
--report=$(REPORT) \
--ansi


Expand Down
117 changes: 64 additions & 53 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ csv: # Here are default values. You can skip this section if you don't need to o

columns:
- name: "Column Name (header)" # Any custom name of the column in the CSV file (first row). Required if "csv_structure.header" is true.
description: "Lorem ipsum" # Optional. Description of the column. Not used in the validation process.
description: "Lorem ipsum" # Description of the column. Not used in the validation process.
example: "Some example" # Example of the column value. Not used in the validation process.

# Important notes about the validation rules.
# 1. All rules except "not_empty" ignored for empty strings (length 0).
Expand Down Expand Up @@ -316,10 +317,12 @@ columns:
coef_of_var_max: 10.123

- name: "another_column"
rules:
not_empty: true

- name: "third_column"

- description: "Column with description only. Undefined header name."
rules:
not_empty: true

```
<!-- /full-yml -->
Expand Down Expand Up @@ -353,6 +356,11 @@ You can find launch examples in the [workflow demo](https://github.com/JBZoo/Csv
# You can skip it
quick: no

# Skip schema validation. If you are sure that the schema is correct, you can skip this check.
# Default value: no
# You can skip it
skip-schema: no

```
<!-- /github-actions-yml -->

Expand Down Expand Up @@ -435,38 +443,41 @@ Usage:
validate:csv [options]
Options:
-c, --csv=CSV Path(s) to validate.
You can specify path in which CSV files will be searched (max depth is 10).
Feel free to use glob pattrens. Usage examples:
/full/path/file.csv, p/file.csv, p/*.csv, p/**/*.csv, p/**/name-*.csv, **/*.csv, etc. (multiple values allowed)
-s, --schema=SCHEMA Schema filepath.
It can be a YAML, JSON or PHP. See examples on GitHub.
-r, --report=REPORT Report output format. Available options:
text, table, github, gitlab, teamcity, junit [default: "table"]
-Q, --quick[=QUICK] Immediately terminate the check at the first error found.
Of course it will speed up the check, but you will get only 1 message out of many.
If any error is detected, the utility will return a non-zero exit code.
Empty value or "yes" will be treated as "true". [default: "no"]
--no-progress Disable progress bar animation for logs. It will be used only for text output format.
--mute-errors Mute any sort of errors. So exit code will be always "0" (if it's possible).
It has major priority then --non-zero-on-error. It's on your own risk!
--stdout-only For any errors messages application will use StdOut instead of StdErr. It's on your own risk!
--non-zero-on-error None-zero exit code on any StdErr message.
--timestamp Show timestamp at the beginning of each message.It will be used only for text output format.
--profile Display timing and memory usage information.
--output-mode=OUTPUT-MODE Output format. Available options:
text - Default text output format, userfriendly and easy to read.
cron - Shortcut for crontab. It's basically focused on human-readable logs output.
It's combination of --timestamp --profile --stdout-only --no-progress -vv.
logstash - Logstash output format, for integration with ELK stack.
[default: "text"]
--cron Alias for --output-mode=cron. Deprecated!
-h, --help Display help for the given command. When no command is given display help for the list command
-q, --quiet Do not output any message
-V, --version Display this application version
--ansi|--no-ansi Force (or disable --no-ansi) ANSI output
-n, --no-interaction Do not ask any interactive question
-v|vv|vvv, --verbose Increase the verbosity of messages: 1 for normal output, 2 for more verbose output and 3 for debug
-c, --csv=CSV Path(s) to validate.
You can specify path in which CSV files will be searched (max depth is 10).
Feel free to use glob pattrens. Usage examples:
/full/path/file.csv, p/file.csv, p/*.csv, p/**/*.csv, p/**/name-*.csv, **/*.csv, etc. (multiple values allowed)
-s, --schema=SCHEMA Schema filepath.
It can be a YAML, JSON or PHP. See examples on GitHub.
-r, --report=REPORT Report output format. Available options:
text, table, github, gitlab, teamcity, junit [default: "table"]
-Q, --quick[=QUICK] Immediately terminate the check at the first error found.
Of course it will speed up the check, but you will get only 1 message out of many.
If any error is detected, the utility will return a non-zero exit code.
Empty value or "yes" will be treated as "true". [default: "no"]
-S, --skip-schema[=SKIP-SCHEMA] Skip schema validation.
If you are sure that the schema is correct, you can skip this check.
Empty value or "yes" will be treated as "true". [default: "no"]
--no-progress Disable progress bar animation for logs. It will be used only for text output format.
--mute-errors Mute any sort of errors. So exit code will be always "0" (if it's possible).
It has major priority then --non-zero-on-error. It's on your own risk!
--stdout-only For any errors messages application will use StdOut instead of StdErr. It's on your own risk!
--non-zero-on-error None-zero exit code on any StdErr message.
--timestamp Show timestamp at the beginning of each message.It will be used only for text output format.
--profile Display timing and memory usage information.
--output-mode=OUTPUT-MODE Output format. Available options:
text - Default text output format, userfriendly and easy to read.
cron - Shortcut for crontab. It's basically focused on human-readable logs output.
It's combination of --timestamp --profile --stdout-only --no-progress -vv.
logstash - Logstash output format, for integration with ELK stack.
[default: "text"]
--cron Alias for --output-mode=cron. Deprecated!
-h, --help Display help for the given command. When no command is given display help for the list command
-q, --quiet Do not output any message
-V, --version Display this application version
--ansi|--no-ansi Force (or disable --no-ansi) ANSI output
-n, --no-interaction Do not ask any interactive question
-v|vv|vvv, --verbose Increase the verbosity of messages: 1 for normal output, 2 for more verbose output and 3 for debug
```
<!-- /validate-csv-help -->
Expand Down Expand Up @@ -495,25 +506,26 @@ Found CSV files: 3
+------+------------------+--------------+--------- demo-1.csv --------------------------------------------------+
(2/3) Invalid file: ./tests/fixtures/batch/demo-2.csv
+------+------------+------------+-------------------------- demo-2.csv ------------------------------------------------------------+
| Line | id:Column | Rule | Message |
+------+------------+------------+--------------------------------------------------------------------------------------------------+
| 2 | 0:Name | length_min | The length of the value "Carl" is 4, which is less than the expected "5" |
| 7 | 0:Name | length_min | The length of the value "Lois" is 4, which is less than the expected "5" |
| 2 | 3:Birthday | date_min | The date of the value "1955-05-14" is parsed as "1955-05-14 00:00:00 +00:00", which is less than |
| | | | the expected "1955-05-15 00:00:00 +00:00 (1955-05-15)" |
| 4 | 3:Birthday | date_min | The date of the value "1955-05-14" is parsed as "1955-05-14 00:00:00 +00:00", which is less than |
| | | | the expected "1955-05-15 00:00:00 +00:00 (1955-05-15)" |
| 5 | 3:Birthday | date_max | The date of the value "2010-07-20" is parsed as "2010-07-20 00:00:00 +00:00", which is greater |
| | | | than the expected "2009-01-01 00:00:00 +00:00 (2009-01-01)" |
+------+------------+------------+-------------------------- demo-2.csv ------------------------------------------------------------+
+------+------------+------------+----------------- demo-2.csv --------------------------------------------------+
| Line | id:Column | Rule | Message |
+------+------------+------------+-------------------------------------------------------------------------------+
| 2 | 0:Name | length_min | The length of the value "Carl" is 4, which is less than the expected "5" |
| 7 | 0:Name | length_min | The length of the value "Lois" is 4, which is less than the expected "5" |
| 2 | 3:Birthday | date_min | The date of the value "1955-05-14" is parsed as "1955-05-14 00:00:00 +00:00", |
| | | | which is less than the expected "1955-05-15 00:00:00 +00:00 (1955-05-15)" |
| 4 | 3:Birthday | date_min | The date of the value "1955-05-14" is parsed as "1955-05-14 00:00:00 +00:00", |
| | | | which is less than the expected "1955-05-15 00:00:00 +00:00 (1955-05-15)" |
| 5 | 3:Birthday | date_max | The date of the value "2010-07-20" is parsed as "2010-07-20 00:00:00 +00:00", |
| | | | which is greater than the expected "2009-01-01 00:00:00 +00:00 (2009-01-01)" |
+------+------------+------------+----------------- demo-2.csv --------------------------------------------------+
(3/3) Invalid file: ./tests/fixtures/batch/sub/demo-3.csv
+------+-----------+------------------+---------------------- demo-3.csv ------------------------------------------------------------+
| Line | id:Column | Rule | Message |
+------+-----------+------------------+----------------------------------------------------------------------------------------------+
| 1 | | filename_pattern | Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: "/demo-[12].csv$/i" |
+------+-----------+------------------+---------------------- demo-3.csv ------------------------------------------------------------+
+------+-----------+------------------+------------ demo-3.csv --------------------------------------------------+
| Line | id:Column | Rule | Message |
+------+-----------+------------------+--------------------------------------------------------------------------+
| 1 | | filename_pattern | Filename "./tests/fixtures/batch/sub/demo-3.csv" does not match pattern: |
| | | | "/demo-[12].csv$/i" |
+------+-----------+------------------+------------ demo-3.csv --------------------------------------------------+
Found 8 issues in 3 out of 3 CSV files.
Expand Down Expand Up @@ -548,7 +560,6 @@ It's random ideas and plans. No orderings and deadlines. <u>But batch processing
**Validation**
* [More aggregate rules](https://github.com/markrogoyski/math-php#statistics---descriptive).
* [More cell rules](https://github.com/Respect/Validation).
* `--strict` option to show warnings if any. It's useful when you want to know about any sort of issues.
* `required` flag for the column.
* Custom cell rule as a callback. It's useful when you have a complex rule that can't be described in the schema file.
* Custom agregate rule as a callback. It's useful when you have a complex rule that can't be described in the schema file.
Expand Down
6 changes: 6 additions & 0 deletions action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ inputs:
description: 'Quick mode. It will not validate all rows. It will stop after the first error.'
default: no
required: true
skip-schema:
description: 'Skip schema validation. If you are sure that the schema is correct, you can skip this check.'
default: no
required: true

runs:
using: 'docker'
Expand All @@ -52,5 +56,7 @@ runs:
- ${{ inputs.report }}
- '--quick'
- ${{ inputs.quick }}
- '--skip-schema'
- ${{ inputs.skip-schema }}
- '--ansi'
- '-vvv'
5 changes: 4 additions & 1 deletion box.json.dist
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

"output" : "build/csv-blueprint.phar",

"directories" : ["src"],
"directories" : [
"schema-examples",
"src"
],
"files" : ["csv-blueprint.php"],
"git-version" : "git-version",

Expand Down
4 changes: 0 additions & 4 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,4 @@
<logging>
<junit outputFile="build/coverage_junit/main.xml"/>
</logging>

<php>
<env name="COLUMNS_TEST" value="150"/>
</php>
</phpunit>
13 changes: 9 additions & 4 deletions schema-examples/full.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
{
"name" : "Column Name (header)",
"description" : "Lorem ipsum",
"example" : "Some example",
"rules" : {
"not_empty" : true,
"exact_value" : "Some string",
Expand Down Expand Up @@ -141,10 +142,14 @@
}
},

{"name" : "another_column"},

{"name" : "third_column"},
{
"name" : "another_column",
"rules" : {"not_empty" : true}
},

{"description" : "Column with description only. Undefined header name."}
{
"name" : "third_column",
"rules" : {"not_empty" : true}
}
]
}
13 changes: 9 additions & 4 deletions schema-examples/full.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
[
'name' => 'Column Name (header)',
'description' => 'Lorem ipsum',
'example' => 'Some example',
'rules' => [
'not_empty' => true,
'exact_value' => 'Some string',
Expand Down Expand Up @@ -157,10 +158,14 @@
],
],

['name' => 'another_column'],

['name' => 'third_column'],
[
'name' => 'another_column',
'rules' => ['not_empty' => true],
],

['description' => 'Column with description only. Undefined header name.'],
[
'name' => 'third_column',
'rules' => ['not_empty' => true],
],
],
];
Loading

0 comments on commit fc04014

Please sign in to comment.