forked from Khululiwe/MM4_2401FTDS-Anime-Recommender-App
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyapp.py
448 lines (377 loc) · 20.1 KB
/
myapp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
import streamlit as st
import pandas as pd
import gdown
import zipfile
from PIL import Image
import os
import gzip
import pickle
import dask.array as da
from fuzzywuzzy import fuzz
from io import BytesIO
from joblib import load, dump
# Define a function to load images
def load_image(image_path):
return Image.open(image_path)
# Load images
images_path = os.path.join(os.getcwd(), 'Images')
anime2 = load_image(os.path.join(images_path, 'anime2.png'))
img2 = load_image(os.path.join(images_path, 'img2.jpeg'))
img3 = load_image(os.path.join(images_path, 'img3.jpeg'))
img4 = load_image(os.path.join(images_path, 'img4.jpeg'))
img5 = load_image(os.path.join(images_path, 'img5.jpeg'))
# Function to download and unzip files from Google Drive
def download_and_unzip_file_from_drive(file_id, output_name, extract_to):
url = f"https://drive.google.com/uc?id={file_id}"
zip_path = gdown.download(url, output_name, quiet=False)
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_to)
return extract_to
# Define the path to your data folder
data_path = os.path.join(os.getcwd(), 'Anime', 'Data') # Adjust this if your data is stored differently
@st.cache_data
def load_anime_data_from_drive(file_id, dtype=None, sample_size=None, random_state=42):
extract_to = download_and_unzip_file_from_drive(file_id, 'anime.zip', data_path)
csv_path = os.path.join(extract_to, 'anime.csv') # Ensure this is the correct file name
if os.path.exists(csv_path):
df = pd.read_csv(csv_path, dtype=dtype) # No need to use gzip.open since it's not gzipped
if sample_size:
return df.sample(n=sample_size, random_state=random_state)
return df
else:
raise FileNotFoundError(f"File not found: {csv_path}")
# Define dtypes for columns
dtype_anime = {
'episodes': 'object',
}
@st.cache_data
def load_train_data_from_drive(file_id, dtype=None, sample_size=None, random_state=42):
extract_to = download_and_unzip_file_from_drive(file_id, 'train.zip', data_path)
csv_path = os.path.join(extract_to, 'train.csv') # Ensure this is the correct file name
if os.path.exists(csv_path):
df = pd.read_csv(csv_path, dtype=dtype) # No need to use gzip.open since it's not gzipped
if sample_size:
return df.sample(n=sample_size, random_state=random_state)
return df
else:
raise FileNotFoundError(f"File not found: {csv_path}")
# Load sampled anime and train datasets
anime = load_anime_data_from_drive('1mnaNLptvbR42b860oGTDz1BWLfbBf3Pt', dtype=dtype_anime, sample_size=1000)
train = load_train_data_from_drive('1JxBbumLNL4r264uQUa7nX9WjssE2Zenf', sample_size=50000)
model_folder = os.path.join(os.getcwd(), 'Anime', 'Models') # Adjust if models are stored differently
# Lazy loading models
@st.cache_resource
def load_tfv_vectorizer():
extract_to = download_and_unzip_file_from_drive('14F3CyYylEwkD5mfUohjhxaE-cPY2_Oul', 'tfv_vectorizer.zip', model_folder)
model_path = os.path.join(extract_to, 'tfv_vectorizer.pkl.gz')
with gzip.open(model_path, 'rb') as f:
return pickle.load(f)
@st.cache_resource
def load_sigmoid_kernel_matrix():
extract_to = download_and_unzip_file_from_drive('1w7xBJId_2lqF2QzP_jZNjtXNXpHQl719', 'sigmoid_kernel_matrix.zip', model_folder)
model_path = os.path.join(extract_to, 'sigmoid_kernel_matrix.pkl.gz')
# Use joblib to load the array as a memory-mapped file
if not os.path.exists('sigmoid_kernel_matrix_memmap'):
with gzip.open(model_path, 'rb') as f:
matrix = pickle.load(f)
dump(matrix, 'sigmoid_kernel_matrix_memmap')
else:
matrix = load('sigmoid_kernel_matrix_memmap', mmap_mode='r')
return da.from_array(matrix, chunks=(250, 250))
@st.cache_resource
def load_baseline_model():
extract_to = download_and_unzip_file_from_drive('12Kvk2Khz279QhHLOYJ3dnbri5Ek1FxrY', 'baseline_model.zip', model_folder)
model_path = os.path.join(extract_to, 'baseline_model.pkl.gz')
with gzip.open(model_path, 'rb') as f:
return pickle.load(f)
# Load models only when needed
tfv = load_tfv_vectorizer()
sig = load_sigmoid_kernel_matrix()
best_baseline_model = load_baseline_model()
# Drop duplicates and reset index
rec_data = anime.drop_duplicates(subset="name", keep="first").reset_index(drop=True)
rec_indices = pd.Series(rec_data.index, index=rec_data["name"]).drop_duplicates()
def get_anime_index(name, rec_indices):
rec_indices = rec_indices.fillna('').astype(str)
name_lower = name.lower()
rec_indices_lower = rec_indices.str.lower()
# Use fuzzy matching with a high score threshold (e.g., 80) for partial matches
matches = rec_indices_lower[rec_indices_lower.apply(lambda x: fuzz.ratio(x, name_lower) >= 80)]
if not matches.empty:
return matches.index[0]
else:
return None
def top_10_anime_by_average_rating(anime_data):
return anime_data.sort_values(by="rating", ascending=False).head(10)[['anime_id', 'name', 'rating']]
def top_10_anime_by_user_ratings(train_data, anime_data):
top_rated_ids = train_data.groupby('anime_id')['rating'].mean().sort_values(ascending=False).head(10).index
valid_anime_data = anime_data.set_index('anime_id')
valid_top_rated_ids = [anime_id for anime_id in top_rated_ids if anime_id in valid_anime_data.index]
top_anime = valid_anime_data.loc[valid_top_rated_ids][['name', 'rating']].reset_index()
return top_anime
def content_based_recommendations(user_input_list, anime_data, sig, top_n=10):
all_recommendations = []
for anime in user_input_list:
idx = get_anime_index(anime, rec_indices)
if idx is not None:
if 0 <= idx < sig.shape[0]:
sim_scores = list(enumerate(sig[idx].compute()))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_indices = [i[0] for i in sim_scores[1:top_n + 1]]
recommended_animes = anime_data['name'].iloc[sim_indices].tolist()
all_recommendations.extend(recommended_animes)
else:
st.warning(f"Index {idx} for anime '{anime}' is out of bounds. Skipping.")
else:
st.warning(f"Anime '{anime}' not found. Skipping.")
if all_recommendations:
recommendation_counts = pd.Series(all_recommendations).value_counts()
top_recommendations = recommendation_counts.head(top_n).index.tolist()
return top_recommendations, None
else:
top_popular_animes = anime_data.sort_values(by='rating', ascending=False).head(top_n)['name'].tolist()
st.warning(f"No anime found for your input. Here are the top {top_n} popular animes:")
return [], top_popular_animes
def collaborative_recommendations(user_ratings, model, anime_data, train_data, top_n=5):
assert isinstance(anime_data, pd.DataFrame), "Expected anime_data to be a DataFrame"
user_id = train_data['user_id'].max() + 1
anime_ids = []
unrecognized_animes = []
for anime_name in user_ratings.keys():
mask = anime_data['name'].str.lower().apply(lambda x: anime_name.lower() in x)
anime_id_series = anime_data[mask]['anime_id']
if not anime_id_series.empty:
anime_ids.append(anime_id_series.iloc[0])
else:
unrecognized_animes.append(anime_name)
if not anime_ids:
st.warning("Your anime has not been rated yet or it is invalid, in the meantime enjoy some of the most popular animes of all time.")
return top_10_anime_by_user_ratings(train_data, anime_data)
if unrecognized_animes:
st.warning(f"The following anime(s) were not recognized or not yet rated: {', '.join(unrecognized_animes)}. Please enter valid anime names.")
return top_10_anime_by_user_ratings(train_data, anime_data)
predictions = [(anime_id, model.predict(user_id, anime_id).est) for anime_id in anime_ids]
predictions_df = pd.DataFrame(predictions, columns=['anime_id', 'predicted_rating'])
recommendations = pd.merge(predictions_df, anime_data, on='anime_id').sort_values(by='predicted_rating', ascending=False).head(top_n)
return recommendations[['name', 'predicted_rating']]
def display_team_member(name, role, description, image_path, linkedin_url):
st.markdown(
"""
<style>
.team-member {
display: flex;
align-items: center;
margin-bottom: 20px;
}
.team-member-info {
max-width: 500px;
margin-left: 20px;
}
</style>
""",
unsafe_allow_html=True
)
col1, col2 = st.columns([1, 4])
with col1:
st.image(image_path, width=150, caption=f"{name} Photo")
with col2:
st.markdown(
f"""
<div class="team-member-info">
<h3>{name}</h3>
<p><strong>{role}</strong></p>
<p>{description}</p>
<a href="{linkedin_url}" target="_blank">
<button>LinkedIn Profile</button>
</a>
</div>
""",
unsafe_allow_html=True
)
st.write("---")
def main():
"""Anime Recommender App with Streamlit """
st.markdown(
"""
<h1 style="color:#FF5733;">Welcome to Funiverse Network</h1>
<p>Discover Your Next Favorite Anime!</p>
<hr>
""", unsafe_allow_html=True
)
st.image(anime2, use_column_width=True)
st.markdown(
"""
<div style="border-left: 7px solid #0288d1; padding-left: 15px; background-color: #e1f5fe;">
<p style="color: #000000; font-weight: bold;">Your personalized gateway to discovering the best anime.
Whether you're a seasoned otaku or new to the world of anime, our app helps you find shows that match your interests.</p>
</div>
""", unsafe_allow_html=True
)
options = ["Recommend", "Information", "Our Team", "EDA", "Contact Us", "App Feedback"]
selection = st.sidebar.selectbox("Choose Option", options)
if selection == "Information":
st.markdown('<h1>About the Anime Recommender App:</h1>', unsafe_allow_html=True)
st.markdown('<h2>Project Overview</h2>', unsafe_allow_html=True)
st.markdown(
"""Introduction: The world of anime is vast and diverse, with thousands of shows spanning multiple genres, themes, and styles. For anime enthusiasts, discovering new series that align with their tastes can be a challenge. The Anime Recommender App is designed to address this challenge by providing personalized recommendations, making it easier for users to find their next favorite anime.""",
unsafe_allow_html=True
)
st.markdown(
"""Objective: The primary objective of this project is to create a user-friendly web application that delivers tailored anime recommendations based on users' preferences and viewing history. The app employs both content-based and collaborative filtering techniques to generate these recommendations, ensuring a broad and accurate selection of anime titles for each user.""",
unsafe_allow_html=True
)
st.markdown(
"""Features:
- **Personalized Recommendations:** Users can receive anime recommendations based on the shows they have watched and rated.
- **Exploratory Data Analysis (EDA):** Users can explore data visualizations related to anime ratings, genres, and more.
- **Top-Rated Anime:** The app highlights the top-rated anime based on average user ratings, helping users discover critically acclaimed shows.
- **User-Friendly Interface:** With a clean and intuitive design, users can easily interact with the app and find the information they need.
""",
unsafe_allow_html=True
)
elif selection == "Our Team":
st.markdown('<h1>Meet the Team:</h1>', unsafe_allow_html=True)
st.markdown('<h2>Our Team Members</h2>', unsafe_allow_html=True)
# Integrate CSS styles
st.markdown(
"""
<style>
.team-member {
display: flex;
align-items: center;
margin-bottom: 20px;
}
.team-member img {
width: 600px; /* Adjust the width as needed */
height: 200px;
margin-right: 20px; /* Add space between the image and text */
border-radius: 10px; /* Optional: add rounded corners */
}
.team-member-info {
max-width: 500px; /* Limit the width of the text to avoid wrapping issues */
}
</style>
""",
unsafe_allow_html=True
)
# Display team members
display_team_member("Khululiwe Hlongwane", "Project Manager",
"Khululiwe oversees the project's development of the Anime Recommender App.",
img2, "https://www.linkedin.com/in/Khululiwe-Hlongwane/")
display_team_member("Judith Kabongo", "Data Scientist",
"Judith handles slide decks and presentation communications.",
img4, "https://www.linkedin.com/in/Judith-Kabongo-568b581b7/")
display_team_member("Ntembeko Mhlungu", "Data Scientist",
"Ntembeko is responsible for developing the app's user interface.",
img5, "https://www.linkedin.com/in/Ntembeko-Mhlungu-899ab5201/")
display_team_member("Tselani Moeti", "Github Manager",
"Tselani is tasked with managing GitHub for version control.",
img3, "https://www.linkedin.com/in/Tselani-Moeti-449a451b1/")
elif selection == "EDA":
st.markdown('<h1>Anime Recommender App Exploratory Data Analysis:</h1>', unsafe_allow_html=True)
st.markdown('<h2>Exploratory Data Analysis (EDA)</h2>', unsafe_allow_html=True)
st.markdown(
"""Exploratory Data Analysis (EDA) is an approach to analyzing data sets to summarize their main characteristics, often with visual methods. In the context of the Anime Recommender App, EDA is used to explore various aspects of the anime dataset, such as ratings distribution, genre popularity, and the relationship between different features. By gaining insights into the data, we can better understand user preferences and improve the recommendation system.""",
unsafe_allow_html=True
)
st.markdown("<p>Key insights and visualizations from our dataset.</p>", unsafe_allow_html=True)
# Anime Dataset EDA
st.markdown('<h3>Anime Dataset</h3>', unsafe_allow_html=True)
# Genre distribution
st.markdown('<h4>Genre Distribution</h4>', unsafe_allow_html=True)
genre_distribution = anime['genre'].value_counts().head(10)
st.bar_chart(genre_distribution)
# Ratings distribution
st.markdown('<h4>Ratings Distribution</h4>', unsafe_allow_html=True)
ratings_distribution = anime['rating'].value_counts().sort_index()
st.line_chart(ratings_distribution)
# Types of anime
st.markdown('<h4>Types of Anime</h4>', unsafe_allow_html=True)
anime_types = anime['type'].value_counts()
st.bar_chart(anime_types)
# Episodes of anime
st.markdown('<h4>Episodes Distribution</h4>', unsafe_allow_html=True)
anime_episodes = anime['episodes'].value_counts()
st.bar_chart(anime_episodes)
# Members distribution (if available)
st.markdown('<h4>Members Distribution</h4>', unsafe_allow_html=True)
anime_members = anime['members'].value_counts().head(10)
st.bar_chart(anime_members)
# Train Dataset EDA
st.markdown('<h3>Train Dataset</h3>', unsafe_allow_html=True)
# User ratings distribution
st.markdown('<h4>User Ratings Distribution</h4>', unsafe_allow_html=True)
user_ratings_distribution = train['rating'].value_counts().sort_index()
st.line_chart(user_ratings_distribution)
# Number of ratings per anime
st.markdown('<h4>Number of Ratings per Anime</h4>', unsafe_allow_html=True)
ratings_per_anime = train['anime_id'].value_counts().head(10)
st.bar_chart(ratings_per_anime)
# Number of ratings per user
st.markdown('<h4>Number of Ratings per User</h4>', unsafe_allow_html=True)
ratings_per_user = train['user_id'].value_counts().head(10)
st.bar_chart(ratings_per_user)
elif selection == "Contact Us":
st.markdown('<h1>Contact Us:</h1>', unsafe_allow_html=True)
st.markdown(
"""
<div style="border-left: 7px solid #0288d1; padding-left: 15px; background-color: #e1f5fe;">
<p style="color: #000000; font-weight: bold;">Thank you for using the Anime Recommender App! We value your feedback and are here to assist you. If you have any questions, suggestions, or inquiries, please feel free to contact us using the information below:</p>
</div>
""", unsafe_allow_html=True
)
contact_info = """
**Email:** support@funiversenetwork.com
**Phone:** +27 322 774 789
**Address:** 381 Church Street, Pietermaritzburg, South Africa
"""
st.markdown(contact_info)
elif selection == "App Feedback":
st.markdown('<h1>App Feedback:</h1>', unsafe_allow_html=True)
st.markdown(
"""
<div style="border-left: 7px solid #0288d1; padding-left: 15px; background-color: #e1f5fe;">
<p style="color: #000000; font-weight: bold;">We hope you had a fantastic experience using the Anime Recommender App. Your feedback is invaluable to us, and we would love to hear your thoughts on how we can improve the app to better serve you. Please take a moment to share your feedback with us:</p>
</div>
""", unsafe_allow_html=True
)
st.text_area("Please leave your feedback here...")
elif selection == "Recommend":
st.title("Our Anime Collection")
# Video content with autoplay enabled
video_html = """
<div class="video-container">
<iframe width="914" height="514" src="https://www.youtube.com/embed/aN4tETHoAcc?autoplay=1" title=""It's Just Anime" 4K" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
</div>
"""
st.markdown(video_html, unsafe_allow_html=True)
st.markdown('<h1>Anime Recommendations</h1>', unsafe_allow_html=True)
st.markdown('<h2>Collaborative Filtering Recommendations</h2>', unsafe_allow_html=True)
# Example user input section for user_ratings
user_ratings = {}
st.markdown("### Rate some anime:")
anime_list = anime['name'].sample(10).tolist() # Random sample of anime names
for anime_name in anime_list:
rating = st.slider(f'Rate {anime_name}', 0, 10, 5)
user_ratings[anime_name] = rating
# Call the collaborative_recommendations function
if user_ratings:
recommendations = collaborative_recommendations(user_ratings, best_baseline_model, anime, train)
st.write("### Recommendations for you:")
st.table(recommendations)
else:
st.warning("Please rate at least one anime to get recommendations.")
# Content-based recommendations
st.markdown('<h2>Content-Based Recommendations</h2>', unsafe_allow_html=True)
user_input = st.text_area("Enter an anime you like (or multiple, separated by commas):")
user_input_list = [anime.strip() for anime in user_input.split(',') if anime.strip()]
if user_input_list:
recommendations, popular_anime = content_based_recommendations(user_input_list, anime, sig)
if recommendations:
st.write(f"### Recommended Animes based on your input: {', '.join(user_input_list)}")
st.table(recommendations)
else:
st.write(f"### Top 10 popular animes:")
st.table(popular_anime)
if __name__ == '__main__':
main()