Skip to content

Commit

Permalink
Try to clean up drudge analysis and cut blanks
Browse files Browse the repository at this point in the history
  • Loading branch information
palewire committed Aug 26, 2023
1 parent 125b345 commit 0230275
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
5 changes: 4 additions & 1 deletion newshomepages/analyze/drudge.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_lemma(headline: str):
doc = nlp(headline)

# Parse out all the words
token_list = [token for token in doc]
token_list = [token.strip() for token in doc if token.strip()]

# Remove stop words
token_list = [t for t in token_list if not t.is_stop]
Expand Down Expand Up @@ -121,6 +121,8 @@ def get_lemma(headline: str):
"RISE",
"DEAD",
"SET",
"HOUSE", # This usually refers to the White House
"\n",
]
qualified_df = word_df[
(~word_df.part_of_speech.isin(["SYM", "VERB"]))
Expand Down Expand Up @@ -156,6 +158,7 @@ def get_top_verb(lemma: str) -> str:
"HAVE",
"MELONI",
"ZERO",
"'", # This is a weird one
]
if lemma == "COVID":
stop_verbs += ["TESTS", "TEST"]
Expand Down
1 change: 1 addition & 0 deletions newshomepages/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def drudge():
out_dir = CHARTS_DIR / "drudge" / "top-words"
out_dir.mkdir(parents=True, exist_ok=True)
for d in track(dict_list):
print(d)
_write_template(
"drudge-top-words.svg", dict(obj=d), out_dir / f"{d['lemma'].lower()}.svg"
)
Expand Down

0 comments on commit 0230275

Please sign in to comment.