Skip to content

Commit

Permalink
Add check for incompatible 'parquet' and 'returnUnmatched' options
Browse files Browse the repository at this point in the history
  • Loading branch information
pipliggins committed Oct 7, 2024
1 parent 10e2d72 commit adcecc5
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 2 deletions.
8 changes: 8 additions & 0 deletions adtl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ def get_value_unhashed(row: StrDict, rule: Rule, ctx: Context = None) -> Any:
value = rule["values"].get(value, value)
else:
value = rule["values"].get(value)

# recheck if value is empty after mapping (use to map values to None)
if value == "":
return None
# Either source_unit / unit OR source_date / date triggers conversion
# do not parse units if value is empty
if "source_unit" in rule and "unit" in rule:
Expand Down Expand Up @@ -1055,6 +1059,10 @@ def main(argv=None):
include_defs = args.include_def or []
spec = Parser(args.spec, include_defs=include_defs, quiet=args.quiet)

# check for incompatible options
if spec.header.get("returnUnmatched") and args.parquet:
raise ValueError("returnUnmatched and parquet options are incompatible")

# run adtl
adtl_output = spec.parse(args.file, encoding=args.encoding)
adtl_output.save(args.output or spec.name, args.parquet)
Expand Down
4 changes: 2 additions & 2 deletions docs/specification.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ if not present in a datafile, following the same syntax as `fieldPattern` key.
terminal describing the error in the transformation. Transformations requiring multiple
parameters will only return the current field value that was not transformed.
> :warning: This is likely to return columns with non-matching datatypes. External json
validation may fail, as will attempting to use the `--parquet` option to save outputs as
parquet files (which required a consistent type down each column).
validation may fail. This option is incompatible with the `--parquet` option to save
outputs as parquet files (which required a consistent type down each column).

## Validation

Expand Down
17 changes: 17 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,23 @@ def test_main_parquet():
Path("output-table.parquet").unlink()


def test_main_parquet_error():

ARG = [
str(TEST_PARSERS_PATH / "return-unmapped.toml"),
str(TEST_SOURCES_PATH / "return-unmapped.csv"),
"-o",
"output",
"--encoding",
"utf-8",
]

with pytest.raises(
ValueError, match="returnUnmatched and parquet options are incompatible"
):
parser.main(ARG + ["--parquet"])


@responses.activate
def test_main_web_schema(snapshot):
# test with schema on the web
Expand Down

0 comments on commit adcecc5

Please sign in to comment.