From cac0f65482161534d324286d96ea85de3d9fbb81 Mon Sep 17 00:00:00 2001
From: Sara Yasser Amur Al Shukaili <71524@omantel.om>
Date: Mon, 13 Jan 2025 11:22:36 +0400
Subject: [PATCH] Added Logic for Book Recommendation System

---
 .../BookRecommenderSystem.ipynb               | 257 ++++++++++++++++++
 1 file changed, 257 insertions(+)
 create mode 100644 jupyter_files/sara_work_area/BookRecommenderSystem.ipynb

diff --git a/jupyter_files/sara_work_area/BookRecommenderSystem.ipynb b/jupyter_files/sara_work_area/BookRecommenderSystem.ipynb
new file mode 100644
index 00000000..0fb50d3c
--- /dev/null
+++ b/jupyter_files/sara_work_area/BookRecommenderSystem.ipynb
@@ -0,0 +1,257 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Book Rcommender System"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.metrics.pairwise import cosine_similarity\n",
+    "from scipy.sparse import csr_matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\71524\\AppData\\Local\\Temp\\ipykernel_6532\\4250405617.py:2: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "  books = pd.read_csv('Books.csv')\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load data\n",
+    "books = pd.read_csv('Books.csv')\n",
+    "ratings = pd.read_csv('Ratings.csv')\n",
+    "users = pd.read_csv('Users.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Preprocess data\n",
+    "ratings = ratings[ratings['Book-Rating'] > 0]  # Filter out zero ratings since it is unrated\n",
+    "book_ratings = pd.merge(ratings, books, on='ISBN')  # Merge with book info"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of unique book titles: 135567\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Check how many unique book titles are present\n",
+    "unique_titles_count = book_ratings['Book-Title'].nunique()\n",
+    "print(f\"Number of unique book titles: {unique_titles_count}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Filtered data has 10844 unique book titles.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Filter for books rated more than 5 times\n",
+    "book_ratings_filtered = book_ratings.groupby('Book-Title').filter(lambda x: len(x) > 5)\n",
+    "print(f\"Filtered data has {book_ratings_filtered['Book-Title'].nunique()} unique book titles.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(383842, 10)\n",
+      "   User-ID        ISBN  Book-Rating  \\\n",
+      "0   276726  0155061224            5   \n",
+      "1   276729  052165615X            3   \n",
+      "2   276729  0521795028            6   \n",
+      "3   276744  038550120X            7   \n",
+      "4   276747  0060517794            9   \n",
+      "\n",
+      "                                          Book-Title    Book-Author  \\\n",
+      "0                                   Rites of Passage     Judith Rae   \n",
+      "1                                     Help!: Level 1  Philip Prowse   \n",
+      "2  The Amsterdam Connection : Level 4 (Cambridge ...    Sue Leather   \n",
+      "3                                    A Painted House   JOHN GRISHAM   \n",
+      "4                           Little Altars Everywhere  Rebecca Wells   \n",
+      "\n",
+      "  Year-Of-Publication                   Publisher  \\\n",
+      "0                2001                      Heinle   \n",
+      "1                1999  Cambridge University Press   \n",
+      "2                2001  Cambridge University Press   \n",
+      "3                2001                   Doubleday   \n",
+      "4                2003                 HarperTorch   \n",
+      "\n",
+      "                                         Image-URL-S  \\\n",
+      "0  http://images.amazon.com/images/P/0155061224.0...   \n",
+      "1  http://images.amazon.com/images/P/052165615X.0...   \n",
+      "2  http://images.amazon.com/images/P/0521795028.0...   \n",
+      "3  http://images.amazon.com/images/P/038550120X.0...   \n",
+      "4  http://images.amazon.com/images/P/0060517794.0...   \n",
+      "\n",
+      "                                         Image-URL-M  \\\n",
+      "0  http://images.amazon.com/images/P/0155061224.0...   \n",
+      "1  http://images.amazon.com/images/P/052165615X.0...   \n",
+      "2  http://images.amazon.com/images/P/0521795028.0...   \n",
+      "3  http://images.amazon.com/images/P/038550120X.0...   \n",
+      "4  http://images.amazon.com/images/P/0060517794.0...   \n",
+      "\n",
+      "                                         Image-URL-L  \n",
+      "0  http://images.amazon.com/images/P/0155061224.0...  \n",
+      "1  http://images.amazon.com/images/P/052165615X.0...  \n",
+      "2  http://images.amazon.com/images/P/0521795028.0...  \n",
+      "3  http://images.amazon.com/images/P/038550120X.0...  \n",
+      "4  http://images.amazon.com/images/P/0060517794.0...  \n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create pivot table\n",
+    "rating_pivot = book_ratings_filtered.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating').fillna(0)\n",
+    "\n",
+    "# Check if there are any non-zero ratings\n",
+    "non_zero_ratings = book_ratings[book_ratings['Book-Rating'] > 0]\n",
+    "print(non_zero_ratings.shape)  # This will give the number of rows with non-zero ratings\n",
+    "print(non_zero_ratings.head())  # This will show the first few rows with non-zero ratings\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Convert to sparse matrix for efficiency\n",
+    "rating_matrix = csr_matrix(rating_pivot.values)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Compute cosine similarity\n",
+    "user_similarity = cosine_similarity(rating_matrix)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Recommend books for a given user\n",
+    "def recommend_books(user_id, n=5):\n",
+    "    if user_id not in rating_pivot.index:\n",
+    "        return \"User not found.\"\n",
+    "    user_idx = rating_pivot.index.get_loc(user_id)\n",
+    "    similarity_scores = list(enumerate(user_similarity[user_idx]))\n",
+    "    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:n+1]\n",
+    "    similar_users = [rating_pivot.index[i[0]] for i in similarity_scores]\n",
+    "    books_rated_by_similar_users = book_ratings[book_ratings['User-ID'].isin(similar_users)]\n",
+    "    recommended_books = (\n",
+    "        books_rated_by_similar_users.groupby('Book-Title')['Book-Rating']\n",
+    "        .mean()\n",
+    "        .sort_values(ascending=False)\n",
+    "        .head(n)\n",
+    "        .index\n",
+    "    )\n",
+    "    return recommended_books.tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Recommended Books for user 274061:\n",
+      "1. 1984 (Everyman's Library)\n",
+      "2. Teddy Bear for Sale (Hello Reader!, Level 1)\n",
+      "3. Scotland Bc: An Introduction to the Prehistoric Houses, Tombs, Ceremonial Monuments, and Fortifications in the Care of the Secretary of State for Scotland (Historic Buildings and Monuments)\n",
+      "4. SIXTH COLUMN\n",
+      "5. Roller Skates! (Hello Reader! Level 2 (Paperback))\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Example usage\n",
+    "user_id = 274061\n",
+    "recommendations = recommend_books(user_id, n=5)\n",
+    "# Print recommendations under each other with numbering\n",
+    "print(f\"Recommended Books for user {user_id}:\")\n",
+    "for i, book in enumerate(recommendations, 1):\n",
+    "    print(f\"{i}. {book}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}