-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
110 lines (86 loc) · 3.45 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""Trending on Twitter NetworkX App
This app visualizes connections between entities in Twitter's trending topics
for Germany, making use of the NetworkX and pyvis libraries. It also accepts
user input for additional link analysis.
"""
import os
from collections import Counter
import tweepy
import pandas as pd
import networkx as nx
import streamlit as st
from dotenv import load_dotenv
from pyvis.network import Network
import app_layout # local module containing layout and text elements
@st.experimental_singleton()
def get_tweepy_api() -> tweepy.api:
"""Authenticate and return tweepy API instance."""
load_dotenv() # Take environment variables from .env file
BEARER_TOKEN = os.environ.get("BEARER_TOKEN")
auth = tweepy.OAuth2BearerHandler(BEARER_TOKEN)
api = tweepy.API(auth)
return api
@st.experimental_memo(ttl=60 * 5) # Cache for 5 minutes
def get_trends_list() -> list[str]:
"""Return top 10 trending Twitter topics for Germany."""
trends = api.get_place_trends(23424829) # Germany's Yahoo! Where On Earth ID
trend_names_top10 = [trend["name"] for trend in trends[0]["trends"][1:11]]
return trend_names_top10
@st.experimental_memo(ttl=60 * 5) # Cache for 5 minutes
def add_trend_to_df(trend: str) -> pd.DataFrame:
"""Return pandas.DataFrame containing entities for any given `trend`."""
tweets = api.search_tweets(q=trend, count=100)
# Loop through sourced tweets and save entities
hashtags = []
mentions = []
for tweet in tweets:
for tag in tweet.entities["hashtags"]:
if tag["text"].lower() != trend.replace("#", "").lower():
hashtags.append(tag["text"])
for mention in tweet.entities["user_mentions"]:
mentions.append(mention["name"])
# De-duplicate and count entities
hashtags = Counter(hashtags)
mentions = Counter(mentions)
hashtag_names = list(hashtags.keys())
mention_names = list(mentions.keys())
target = hashtag_names + mention_names
hashtag_counts = list(hashtags.values())
mention_counts = list(mentions.values())
value = hashtag_counts + mention_counts
# Populate pandas.DataFrame with sourced data
d = {
"source": [trend.replace("#", "")] * len(target),
"target": target,
"type": ["hashtag"] * len(hashtags) + ["mention"] * len(mentions),
"value": value,
}
new_df = pd.DataFrame(data=d)
return new_df
def draw_network_graph(df: pd.DataFrame) -> None:
"""Draw graph of NetworkX instance."""
G = nx.from_pandas_edgelist(df, "source", "target", "value")
trends_net = Network(
height="410px", width="100%", bgcolor="#0e1117", font_color="white"
)
trends_net.from_nx(G)
trends_net.save_graph("/tmp/graph.html")
html_file = open("/tmp/graph.html", "r", encoding="utf-8")
st.components.v1.html(html_file.read(), height=435)
app_layout.setup_page()
api = get_tweepy_api()
trends = get_trends_list()
selected_trends = st.multiselect(
"Select the Germany trend(s) you are interested in:", trends
)
selected_input = st.text_input(
"Or try adding trends of your own (as a comma-separated list):"
)
if selected_input:
selected_trends += selected_input.split(",")
df = pd.DataFrame(columns=["source", "target", "type", "value"])
for trend in selected_trends:
new_df = add_trend_to_df(trend)
df = pd.concat([df, new_df])
if len(selected_trends) > 0:
draw_network_graph(df[df.value > 1]) # threshold for min occurrences