Skip to content

Commit

Permalink
Resolves #812.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjordan committed Aug 20, 2024
1 parent 5290ce0 commit 525c1c9
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 12 deletions.
7 changes: 7 additions & 0 deletions tests/assets/csv_row_filters_test/csv_row_filters_test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
file,id,title,field_model,field_edtf_date
,issue_812_001,Issue 812 item 1,Image,2020-01-01
foo.jpg,issue_812_002,Issue 812 item 2,Image,
noo.jpg,issue_812_003,Issue 812 item 3,Binary,1999-01-01|2000
,issue_812_004,Issue 812 item 4,Digital document,2000|2001
bar.jpg,issue_812_005,Issue 812 item 5,Digital document,2012-12-12|2001
,issue_812_006,Issue 812 item 6,Compound object,
12 changes: 12 additions & 0 deletions tests/assets/csv_row_filters_test/csv_row_filters_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
task: create
host: https://islandora.traefik.me
username: admin
password: password
input_csv: csv_row_filters_test.csv
nodes_only: true
input_dir: tests/assets/csv_row_filters_test
secure_ssl_only: false
csv_row_filters:
- field_model:isnot:Digital document
- field_edtf_date:is:2020-01-01
- field_edtf_date:is:2000
39 changes: 39 additions & 0 deletions tests/islandora_tests_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,5 +1565,44 @@ def tearDown(self):
os.remove(self.true_log_file_path)


class TestCsvRowFilters(unittest.TestCase):

def setUp(self):
self.current_dir = os.path.dirname(os.path.abspath(__file__))
config_file_path = os.path.join(
self.current_dir,
"assets",
"csv_row_filters_test",
"csv_row_filters_test.yml",
)
self.temp_dir = tempfile.gettempdir()
self.preprocessed_csv_file_path = os.path.join(
self.temp_dir, "csv_row_filters_test.csv.preprocessed"
)

cmd = ["./workbench", "--config", config_file_path, "--check"]
output = subprocess.check_output(cmd)
self.output = output.decode().strip()

def test_update_check(self):
file = open(self.preprocessed_csv_file_path)
csv_rows = file.readlines()
file.close()

self.assertEqual(len(csv_rows), 3, "")
self.assertEqual(
csv_rows[1].strip(), ",issue_812_001,Issue 812 item 1,Image,2020-01-01", ""
)
self.assertEqual(
csv_rows[2].strip(),
"noo.jpg,issue_812_003,Issue 812 item 3,Binary,1999-01-01|2000",
"",
)

def tearDown(self):
if os.path.exists(self.preprocessed_csv_file_path):
os.remove(self.preprocessed_csv_file_path)


if __name__ == "__main__":
unittest.main()
26 changes: 14 additions & 12 deletions workbench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5803,6 +5803,9 @@ def get_csv_data(config, csv_file_target="node_fields", file_path=None):
# Then populate the lists of filter values.
for filter_config in config["csv_row_filters"]:
filter_group = filter_config.split(":", 2)
# Prepare the '' filter value.
if filter_group[2] == "''" or filter_group[2] == '""':
filter_group[2] = ""
if filter_group[1] == "is":
filter_group_field = filter_group[0]
filter_group_value = filter_group[2]
Expand Down Expand Up @@ -5837,41 +5840,40 @@ def get_csv_data(config, csv_file_target="node_fields", file_path=None):
# WIP on #812.
# Apply the "is" and "isnot" csv_row_filters defined defined above.
# If the field/value combo is in the 'isnot' list, skip this row.
filter_out_this_csv_row = False
if "csv_row_filters" in config and len(config["csv_row_filters"]) > 0:
filter_out_this_csv_row = False
# filter_out_this_csv_row = False
if len(row_filters_isnot) > 0:
for filter_field, filter_values in row_filters_isnot.items():
if (
len(filter_values) > 0
and filter_field in row
and len(row[filter_field]) > 0
):
if len(filter_values) > 0 and filter_field in row:
# Split out multiple field values to test each one.
values_in_row_field = row[filter_field].split(
config["subdelimiter"]
)
for value_in_row_field in values_in_row_field:
filter_out_this_csv_row = False
if value_in_row_field.strip() in filter_values:
filter_out_this_csv_row = True
else:
break
if filter_out_this_csv_row is True:
continue

# If the field/value combo is not in the 'is' list, skip this row.
filter_out_this_csv_row = False
if len(row_filters_is) > 0:
# filter_out_this_csv_row = False
for filter_field, filter_values in row_filters_is.items():
if (
len(filter_values) > 0
and filter_field in row
and len(row[filter_field]) > 0
):
if len(filter_values) > 0 and filter_field in row:
# Split out multiple field values to test each one.
values_in_row_field = row[filter_field].split(
config["subdelimiter"]
)
for value_in_row_field in values_in_row_field:
filter_out_this_csv_row = False
if value_in_row_field.strip() not in filter_values:
filter_out_this_csv_row = True
else:
break
if filter_out_this_csv_row is True:
continue

Expand Down

0 comments on commit 525c1c9

Please sign in to comment.