Skip to content

Commit

Permalink
Lets try to consolidate the robots files
Browse files Browse the repository at this point in the history
  • Loading branch information
palewire committed Aug 27, 2023
1 parent 309a7a1 commit b1b2895
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion newshomepages/extract/consolidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ def consolidate(
for p in item_data["files"]
if (
handle.lower() in p["name"].lower()
and p["format"] in ["JSON", "JPEG", "HTML"]
and (
p["format"] in ["JSON", "JPEG", "HTML"]
or p["name"].lower().endswith("robots.txt")
)
)
]

Expand Down Expand Up @@ -121,6 +124,7 @@ def consolidate(
lighthouse_list = []
wayback_list = []
html_list = []
robotstxt_list = []
print("🪆 Extracting files")
for f in track(file_list):
name = f["file_name"]
Expand All @@ -140,6 +144,8 @@ def consolidate(
wayback_list.append(f)
elif name.endswith(".html"):
html_list.append(f)
elif name.endswith("robots.txt"):
robotstxt_list.append(f)
else:
raise ValueError(f"File name {name} doesn't have an output file")

Expand All @@ -150,6 +156,7 @@ def consolidate(
utils.write_csv(lighthouse_list, output_path / "lighthouse-files.csv")
utils.write_csv(wayback_list, output_path / "wayback-files.csv")
utils.write_csv(html_list, output_path / "html-files.csv")
utils.write_csv(robotstxt_list, output_path / "robotstxt-files.csv")

# Delete the zip file
zip_path = output_path / "latest.zip"
Expand Down

0 comments on commit b1b2895

Please sign in to comment.