Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 257 additions & 0 deletions jupyter_files/sara_work_area/BookRecommenderSystem.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Book Rcommender System"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.metrics.pairwise import cosine_similarity\n",
"from scipy.sparse import csr_matrix"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\71524\\AppData\\Local\\Temp\\ipykernel_6532\\4250405617.py:2: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" books = pd.read_csv('Books.csv')\n"
]
}
],
"source": [
"# Load data\n",
"books = pd.read_csv('Books.csv')\n",
"ratings = pd.read_csv('Ratings.csv')\n",
"users = pd.read_csv('Users.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Preprocess data\n",
"ratings = ratings[ratings['Book-Rating'] > 0] # Filter out zero ratings since it is unrated\n",
"book_ratings = pd.merge(ratings, books, on='ISBN') # Merge with book info"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of unique book titles: 135567\n"
]
}
],
"source": [
"# Check how many unique book titles are present\n",
"unique_titles_count = book_ratings['Book-Title'].nunique()\n",
"print(f\"Number of unique book titles: {unique_titles_count}\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Filtered data has 10844 unique book titles.\n"
]
}
],
"source": [
"# Filter for books rated more than 5 times\n",
"book_ratings_filtered = book_ratings.groupby('Book-Title').filter(lambda x: len(x) > 5)\n",
"print(f\"Filtered data has {book_ratings_filtered['Book-Title'].nunique()} unique book titles.\")"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(383842, 10)\n",
" User-ID ISBN Book-Rating \\\n",
"0 276726 0155061224 5 \n",
"1 276729 052165615X 3 \n",
"2 276729 0521795028 6 \n",
"3 276744 038550120X 7 \n",
"4 276747 0060517794 9 \n",
"\n",
" Book-Title Book-Author \\\n",
"0 Rites of Passage Judith Rae \n",
"1 Help!: Level 1 Philip Prowse \n",
"2 The Amsterdam Connection : Level 4 (Cambridge ... Sue Leather \n",
"3 A Painted House JOHN GRISHAM \n",
"4 Little Altars Everywhere Rebecca Wells \n",
"\n",
" Year-Of-Publication Publisher \\\n",
"0 2001 Heinle \n",
"1 1999 Cambridge University Press \n",
"2 2001 Cambridge University Press \n",
"3 2001 Doubleday \n",
"4 2003 HarperTorch \n",
"\n",
" Image-URL-S \\\n",
"0 http://images.amazon.com/images/P/0155061224.0... \n",
"1 http://images.amazon.com/images/P/052165615X.0... \n",
"2 http://images.amazon.com/images/P/0521795028.0... \n",
"3 http://images.amazon.com/images/P/038550120X.0... \n",
"4 http://images.amazon.com/images/P/0060517794.0... \n",
"\n",
" Image-URL-M \\\n",
"0 http://images.amazon.com/images/P/0155061224.0... \n",
"1 http://images.amazon.com/images/P/052165615X.0... \n",
"2 http://images.amazon.com/images/P/0521795028.0... \n",
"3 http://images.amazon.com/images/P/038550120X.0... \n",
"4 http://images.amazon.com/images/P/0060517794.0... \n",
"\n",
" Image-URL-L \n",
"0 http://images.amazon.com/images/P/0155061224.0... \n",
"1 http://images.amazon.com/images/P/052165615X.0... \n",
"2 http://images.amazon.com/images/P/0521795028.0... \n",
"3 http://images.amazon.com/images/P/038550120X.0... \n",
"4 http://images.amazon.com/images/P/0060517794.0... \n"
]
}
],
"source": [
"# Create pivot table\n",
"rating_pivot = book_ratings_filtered.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating').fillna(0)\n",
"\n",
"# Check if there are any non-zero ratings\n",
"non_zero_ratings = book_ratings[book_ratings['Book-Rating'] > 0]\n",
"print(non_zero_ratings.shape) # This will give the number of rows with non-zero ratings\n",
"print(non_zero_ratings.head()) # This will show the first few rows with non-zero ratings\n"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"# Convert to sparse matrix for efficiency\n",
"rating_matrix = csr_matrix(rating_pivot.values)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"# Compute cosine similarity\n",
"user_similarity = cosine_similarity(rating_matrix)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"# Recommend books for a given user\n",
"def recommend_books(user_id, n=5):\n",
" if user_id not in rating_pivot.index:\n",
" return \"User not found.\"\n",
" user_idx = rating_pivot.index.get_loc(user_id)\n",
" similarity_scores = list(enumerate(user_similarity[user_idx]))\n",
" similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:n+1]\n",
" similar_users = [rating_pivot.index[i[0]] for i in similarity_scores]\n",
" books_rated_by_similar_users = book_ratings[book_ratings['User-ID'].isin(similar_users)]\n",
" recommended_books = (\n",
" books_rated_by_similar_users.groupby('Book-Title')['Book-Rating']\n",
" .mean()\n",
" .sort_values(ascending=False)\n",
" .head(n)\n",
" .index\n",
" )\n",
" return recommended_books.tolist()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Recommended Books for user 274061:\n",
"1. 1984 (Everyman's Library)\n",
"2. Teddy Bear for Sale (Hello Reader!, Level 1)\n",
"3. Scotland Bc: An Introduction to the Prehistoric Houses, Tombs, Ceremonial Monuments, and Fortifications in the Care of the Secretary of State for Scotland (Historic Buildings and Monuments)\n",
"4. SIXTH COLUMN\n",
"5. Roller Skates! (Hello Reader! Level 2 (Paperback))\n"
]
}
],
"source": [
"# Example usage\n",
"user_id = 274061\n",
"recommendations = recommend_books(user_id, n=5)\n",
"# Print recommendations under each other with numbering\n",
"print(f\"Recommended Books for user {user_id}:\")\n",
"for i, book in enumerate(recommendations, 1):\n",
" print(f\"{i}. {book}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}