Skip to content

Commit

Permalink
Add exports for other file types (#366)
Browse files Browse the repository at this point in the history
* Add exports for other file types

* Update consolidate.py
  • Loading branch information
palewire authored Jan 31, 2023
1 parent 9d66018 commit 6c9d03b
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions newshomepages/extract/consolidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,27 +117,36 @@ def consolidate(
hyperlinks_list = []
lighthouse_list = []
wayback_list = []
html_list = []
print("🪆 Extracting files")
for f in track(file_list):
if f["file_name"].endswith(".jpg"):
name = f['file_name']
if name.endswith(".jpg"):
if 'fullpage' in name:
f['type'] = 'fullpage'
else:
f['type'] = 'cropped'
screenshot_list.append(f)
elif "accessibility" in f["file_name"]:
elif "accessibility" in name:
a11y_list.append(f)
elif "hyperlinks" in f["file_name"]:
elif "hyperlinks" in name:
hyperlinks_list.append(f)
elif "lighthouse" in f["file_name"]:
elif "lighthouse" in name:
lighthouse_list.append(f)
elif "wayback" in f["file_name"]:
elif "wayback" in name:
wayback_list.append(f)
elif name.endswith(".html"):
html_list.append(f)
else:
raise ValueError(f"File name {f['file_name']} doesn't have an output file")
raise ValueError(f"File name {name} doesn't have an output file")

# Write those out too
utils.write_csv(screenshot_list, output_path / "screenshot-files.csv")
utils.write_csv(a11y_list, output_path / "accessibility-files.csv")
utils.write_csv(hyperlinks_list, output_path / "hyperlink-files.csv")
utils.write_csv(lighthouse_list, output_path / "lighthouse-files.csv")
utils.write_csv(wayback_list, output_path / "wayback-files.csv")
utils.write_csv(html_list, output_path / "html-files.csv")

# Delete the zip file
zip_path = output_path / "latest.zip"
Expand Down

0 comments on commit 6c9d03b

Please sign in to comment.