-
Notifications
You must be signed in to change notification settings - Fork 0
/
plots.py
321 lines (266 loc) · 10.8 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from matplotlib import pyplot as plt
from wordcloud import WordCloud
from template.html import POPUP
from utils import insert_sentiment_scores
import folium
from folium.plugins import MarkerCluster
from streamlit_folium import folium_static
COLORS = ["#0081a7", "#00afb9", "#f07167", "#e9c46a", "#264653",
"#f4a261", "#e76f51", "#ef233c", "#fed9b7", "#f6bd60",
"#84a59d", "#f95738", "#fdfcdc", ]
def update_layout(fig: go.Figure, x_label: str, y_label: str, title: str) -> go.Figure:
"""
Updates the layout of a Plotly figure with specified labels and title.
:param fig: plotly fig to be updated
:param x_label: title for x-axis
:param y_label: title for y-axis
:param title: title for the plotly fig
:return: The updated Plotly figure
"""
fig = fig.update_layout(xaxis_title=x_label, yaxis_title=y_label,
title=title,
hovermode="x unified",
hoverlabel=dict(
bgcolor="white",
font_color="black",
font_size=16,
font_family="Rockwell"
)
)
return fig
def reviews_wordcloud(df: pd.DataFrame) -> plt.figure:
"""
Generate a word cloud to visualize frequent words in a DataFrame of reviews.
:param df: The input DataFrame containing review data.
:return: A matplotlib figure representing the word cloud
"""
wordcloud = WordCloud(background_color='white', min_font_size=5)
text = ' '.join(df['text'])
if len(text) == 0:
text = 'Null'
wordcloud.generate(text)
# Convert the word cloud to an image
fig = plt.figure(facecolor=None)
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.tight_layout(pad=10)
plt.title("Frequent Words in Reviews")
return fig
def average_rating_overtime(df):
"""
Function to plot Bar Chart to visualize average rating
w.r.t Quarters for each year
:param df: The input DataFrame containing review data.
:return: A Plotly Figure representing average distribution overtime.
"""
df['year'] = df['datetime'].dt.year
df['quarter'] = df['datetime'].dt.quarter
# Calculate average rating for each year and quarter
avg_rating = df.groupby(['year', 'quarter'])['rating'].mean().reset_index()
# Create a Plotly Go figure
fig = go.Figure()
# Add a bar trace for each quarter
for quarter in range(1, 5):
quarter_data = avg_rating[avg_rating['quarter'] == quarter]
fig.add_trace(go.Bar(
x=quarter_data['year'],
y=quarter_data['rating'],
name=f'Quarter {quarter}',
marker=dict(color=COLORS[quarter])
))
fig.update_layout(barmode='group', legend=dict(title='Quarter'))
fig = update_layout(fig, "Time", "Average Rating", "Average Rating overtime")
return fig
def average_rating_wrt_month_year(df):
"""
Function to plot Bar Chart to visualize average rating
w.r.t Months for each year
:param df: The input DataFrame containing review data.
:return: A Plotly Figure representing average distribution overtime.
"""
df['year'] = df['datetime'].dt.year
df['month_num'] = df['datetime'].dt.month
df['month_year'] = df['datetime'].dt.strftime("%b %Y")
# Calculate average rating for each year and month
avg_rating = df.groupby(['year', 'month_num', 'month_year'])['rating'].mean().reset_index()
fig = go.Figure()
years = sorted(list(avg_rating['year'].unique()))
# Add a bar trace for each month
ind = 0
for year in years:
year_data = avg_rating[avg_rating['year'] == year]
year_data.sort_values(by='month_num', inplace=True)
fig.add_trace(go.Bar(
x=year_data['month_year'],
y=year_data['rating'],
name=f'{year}',
marker=dict(color=COLORS[ind])
))
ind+=1
fig.update_layout(barmode='group', legend=dict(title='Year'))
fig = update_layout(fig, "Time", "Average Rating", "Average Rating overtime w.r.t Month-Year")
return fig
def rating_breakdown_pie(df: pd.DataFrame) -> go.Figure:
"""
Generate a pie chart to visualize the breakdown of reviews by rating.
:param df: The input DataFrame containing review data.
:return: A Plotly Figure representing the breakdown of reviews by rating.
"""
df = df.groupby("rating")["text"].count().reset_index()
df["rating"] = df["rating"].astype(int)
df["Rating-Formatted"] = df["rating"].map({
5: "⭐ 5 😊", 4: "⭐ 4 🙂", 3: "⭐ 3 😕", 2: "⭐ 2 😒", 1: "⭐ 1 😑"
})
df.sort_values(by="rating", ascending=True, inplace=True)
fig = go.Figure(
go.Pie(
labels=df["Rating-Formatted"],
values=df["text"],
hole=0.3,
sort=False
)
)
fig.update_traces(hoverinfo='label+value', textinfo='percent', textfont_size=15,
marker=dict(colors=COLORS))
fig = update_layout(fig, "Rating", "Review Count", "Rating Distribution")
return fig
def sentiment_score_overtime(df):
"""
Function to plot a scatter chart to visualize sentiment score
w.r.t time
:param df: The input DataFrame containing review data.
:return: A Plotly Figure representing sentiment score overtime.
"""
df = insert_sentiment_scores(df)
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=df["datetime"],
y=df["sentiment_score"],
name="Sentiment Score",
mode="markers",
marker=dict(color="#84a59d", size=20),
)
)
fig.add_hrect(y0=0.05, y1=1.05, line_width=0, fillcolor="#57cc99", opacity=0.2)
fig.add_hrect(y0=-0.05, y1=-1.05, line_width=0, fillcolor="#ef233c", opacity=0.2)
fig = update_layout(fig, "Time", "Sentiment Score", "Sentiment Score Analytics")
return fig
def top_performing_places(df):
"""
Function to plot a bar chart of top-performing places based on reviews, ratings, and reliability.
:param df: The input DataFrame containing review data.
:return: A Plotly Figure representing top places with their satisfaction and reliability scores.
"""
# Drop rows with missing average ratings
df = df.dropna(subset=["averageRating"])
# Aggregate data by place name
df = df.groupby("name").agg({
"averageRating": "mean",
"totalReviews": "sum"
}).reset_index()
# Filter places with total reviews above certain range
thresh = df["totalReviews"].quantile(0.40)
df = df[df["totalReviews"] >= thresh]
# Calculate Satisfaction Score
df['Satisfaction Score'] = (df['averageRating'] * df['totalReviews']) / df['totalReviews'].sum()
max_score = df['Satisfaction Score'].max()
df['Relative Satisfaction Score'] = (df['Satisfaction Score'] / max_score) * 100
# Calculate Reliability Score
df['Reliability Score'] = df['averageRating'] * np.log1p(df['totalReviews'])
# Sort places by Relative Satisfaction Score
df.sort_values(by="Relative Satisfaction Score", ascending=False, inplace=True)
# Select top 30 places
top_places = df.head(30)
# Create bar chart
fig = go.Figure()
# Add Relative Satisfaction Score bars
fig.add_trace(
go.Bar(
x=top_places["name"],
y=top_places["Relative Satisfaction Score"],
marker=dict(color="#2a9d8f"),
name="Satisfaction Score",
text=top_places["Relative Satisfaction Score"].round(2).astype(str) + " %",
hovertext=(
"Rating: " + top_places["averageRating"].astype(str) + " stars<br>" +
"Total Reviews: " + top_places["totalReviews"].astype(str) + "<br>" +
"Satisfaction Score: " + top_places["Satisfaction Score"].round(4).astype(str)
),
hoverinfo="text"
)
)
# Add Reliability Score line
fig.add_trace(
go.Scatter(
x=top_places["name"],
y=top_places["Reliability Score"],
marker=dict(color="#e9c46a"),
name="Reliability Score",
text=top_places["Reliability Score"].round(2).astype(str),
hovertext=(
"Rating: " + top_places["averageRating"].astype(str) + " stars<br>" +
"Total Reviews: " + top_places["totalReviews"].astype(str) + "<br>" +
"Reliability Score: " + top_places["Reliability Score"].round(4).astype(str)
),
hoverinfo="text",
)
)
fig.update_layout(
title="Top Rated Places",
height=500,
xaxis_title="Score",
yaxis_title="Place",
hovermode ="x unified",
hoverlabel=dict(
bgcolor="white",
font_color="black",
font_size=16,
font_family="Rockwell"
)
)
return fig
def spatial_dist_of_business_points(df):
# Create a base map centered around the average latitude and longitude
map_center = [df['latitude'].mean(), df['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=12)
# Create a MarkerCluster object
marker_cluster = MarkerCluster().add_to(m)
# Add markers to the cluster
for i, row in df.iterrows():
popup_text = f"<strong>{row['name']}</strong><br>Rating: {row['averageRating']}<br>Reviews: {row['totalReviews']}"
folium.Marker(
location=[row['latitude'], row['longitude']],
popup=popup_text
).add_to(marker_cluster)
m.fit_bounds(m.get_bounds())
folium_static(m, width=600, height=500)
def folium_marker_map(df):
# Create a map centered around the average latitude and longitude
map_center = [df['latitude'].mean(), df['longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=10, control_scale=True, prefer_canvas=True)
# Add circle markers to the map
for i, row in df.iterrows():
iframe = folium.IFrame(POPUP.format(
"",
str(row["name"]),
str(row["address"]),
str(row["averageRating"]),
str(row["totalReviews"]),
row["contact"]
), width=320, height=180)
popup = folium.Popup(iframe, min_width=150, max_width=300)
folium.CircleMarker(
location=[row['latitude'], row['longitude']],
radius=row['totalReviews'] / 50,
color=None,
fill=True,
fill_color='blue' if row['averageRating'] >= 4 else 'yellow' if row['averageRating'] >= 3 else 'red',
fill_opacity=0.6,
popup=popup
).add_to(m)
m.fit_bounds(m.get_bounds())
folium_static(m, width=600, height=500)