Skip to content

Commit

Permalink
chore: add validation for missing headers in csv resource file (#191)
Browse files Browse the repository at this point in the history
ECALC-201

* chore: add validation for missing headers in csv resource file

* chore: add type hints

* chore: split validation into separate checks for missing headers and invalid characters

* chore: rename fixture
  • Loading branch information
markusrf authored Sep 19, 2023
1 parent 74fcfd8 commit 60e8403
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -390,10 +390,12 @@ def _validate_headers(headers: List[str]):
for header in headers:
if not re.match(r"^[A-Za-z][A-Za-z0-9_.,\-\s#+:\/]*$", header):
raise ValueError(
"Csv input file must include header, each header value must start with a letter in the english "
"Each header value must start with a letter in the english "
"alphabet (a-zA-Z). And may only contain letters, spaces, numbers or any of the following characters "
"[ _ - # + : . , /] "
)
elif re.match(r"^Unnamed: \d+$", header):
raise ValueError("CSV input file must include header")


def _validate_not_nan(columns: List[List]):
Expand Down
51 changes: 51 additions & 0 deletions src/ecalc/libraries/libecalc/common/tests/input/test_file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,17 @@ def facility_resource_missing_value_file(tmp_path):
return facility_file


def create_csv_from_line(tmp_path: Path, csv_line: str) -> Path:
csv_file = tmp_path / "csv_file.csv"
csv_file.write_text(csv_line)
return csv_file


@pytest.fixture
def tmp_path_fixture(tmp_path: Path) -> Path:
return tmp_path


class TestReadFacilityResource:
def test_no_nans(self, facility_resource_missing_value_file):
with pytest.raises(ValueError) as exc:
Expand All @@ -109,6 +120,46 @@ def test_no_nans(self, facility_resource_missing_value_file):
== "csv file contains invalid data at row 1, all headers must be associated with a valid column value"
)

@pytest.mark.parametrize(
"csv_line, is_valid_characters",
[
("aa :, bb", True),
("aa ., bb", True),
("aa +, bb", True),
("aa 0, bb", True),
("aa _, bb", True),
("aa -, bb", True),
("aa /, bb", True),
("aa #, bb", True),
("aa @, bb", False),
("aa %, bb", False),
("aa &, bb", False),
("aa ?, bb", False),
("aa ', bb", False),
('aa ", bb', False),
("aa )(, bb", False),
("aa ][, bb", False),
("aa }{, bb", False),
("aa ><, bb", False),
],
) # This is not meant to be extensive, just to test that we have some validation
def test_valid_characters(self, tmp_path_fixture, csv_line: str, is_valid_characters: bool):
if is_valid_characters:
file_io.read_facility_resource(create_csv_from_line(tmp_path_fixture, csv_line))
else:
with pytest.raises(ValueError) as e:
file_io.read_facility_resource(create_csv_from_line(tmp_path_fixture, csv_line))
assert (
str(e.value) == "Each header value must start with a letter in the "
"english alphabet (a-zA-Z). And may only contain letters, spaces, numbers or any of the following "
"characters [ _ - # + : . , /] "
)

def test_missing_headers(self, tmp_path_fixture):
with pytest.raises(ValueError) as e:
file_io.read_facility_resource(create_csv_from_line(tmp_path_fixture, "HEADER1 ,,HEADER3"))
assert str(e.value) == "CSV input file must include header"


@pytest.fixture
def yaml_resource():
Expand Down

0 comments on commit 60e8403

Please sign in to comment.