From 6c9d03b60c1899038f4e1d40ebea542e3b458532 Mon Sep 17 00:00:00 2001 From: Ben Welsh Date: Tue, 31 Jan 2023 11:48:03 -0500 Subject: [PATCH] Add exports for other file types (#366) * Add exports for other file types * Update consolidate.py --- newshomepages/extract/consolidate.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/newshomepages/extract/consolidate.py b/newshomepages/extract/consolidate.py index f318e4b2284..d213fb5f1fa 100644 --- a/newshomepages/extract/consolidate.py +++ b/newshomepages/extract/consolidate.py @@ -117,20 +117,28 @@ def consolidate( hyperlinks_list = [] lighthouse_list = [] wayback_list = [] + html_list = [] print("🪆 Extracting files") for f in track(file_list): - if f["file_name"].endswith(".jpg"): + name = f['file_name'] + if name.endswith(".jpg"): + if 'fullpage' in name: + f['type'] = 'fullpage' + else: + f['type'] = 'cropped' screenshot_list.append(f) - elif "accessibility" in f["file_name"]: + elif "accessibility" in name: a11y_list.append(f) - elif "hyperlinks" in f["file_name"]: + elif "hyperlinks" in name: hyperlinks_list.append(f) - elif "lighthouse" in f["file_name"]: + elif "lighthouse" in name: lighthouse_list.append(f) - elif "wayback" in f["file_name"]: + elif "wayback" in name: wayback_list.append(f) + elif name.endswith(".html"): + html_list.append(f) else: - raise ValueError(f"File name {f['file_name']} doesn't have an output file") + raise ValueError(f"File name {name} doesn't have an output file") # Write those out too utils.write_csv(screenshot_list, output_path / "screenshot-files.csv") @@ -138,6 +146,7 @@ def consolidate( utils.write_csv(hyperlinks_list, output_path / "hyperlink-files.csv") utils.write_csv(lighthouse_list, output_path / "lighthouse-files.csv") utils.write_csv(wayback_list, output_path / "wayback-files.csv") + utils.write_csv(html_list, output_path / "html-files.csv") # Delete the zip file zip_path = output_path / "latest.zip"