From cac0f65482161534d324286d96ea85de3d9fbb81 Mon Sep 17 00:00:00 2001 From: Sara Yasser Amur Al Shukaili <71524@omantel.om> Date: Mon, 13 Jan 2025 11:22:36 +0400 Subject: [PATCH] Added Logic for Book Recommendation System --- .../BookRecommenderSystem.ipynb | 257 ++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 jupyter_files/sara_work_area/BookRecommenderSystem.ipynb diff --git a/jupyter_files/sara_work_area/BookRecommenderSystem.ipynb b/jupyter_files/sara_work_area/BookRecommenderSystem.ipynb new file mode 100644 index 00000000..0fb50d3c --- /dev/null +++ b/jupyter_files/sara_work_area/BookRecommenderSystem.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Book Rcommender System" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.metrics.pairwise import cosine_similarity\n", + "from scipy.sparse import csr_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\71524\\AppData\\Local\\Temp\\ipykernel_6532\\4250405617.py:2: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " books = pd.read_csv('Books.csv')\n" + ] + } + ], + "source": [ + "# Load data\n", + "books = pd.read_csv('Books.csv')\n", + "ratings = pd.read_csv('Ratings.csv')\n", + "users = pd.read_csv('Users.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Preprocess data\n", + "ratings = ratings[ratings['Book-Rating'] > 0] # Filter out zero ratings since it is unrated\n", + "book_ratings = pd.merge(ratings, books, on='ISBN') # Merge with book info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of unique book titles: 135567\n" + ] + } + ], + "source": [ + "# Check how many unique book titles are present\n", + "unique_titles_count = book_ratings['Book-Title'].nunique()\n", + "print(f\"Number of unique book titles: {unique_titles_count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Filtered data has 10844 unique book titles.\n" + ] + } + ], + "source": [ + "# Filter for books rated more than 5 times\n", + "book_ratings_filtered = book_ratings.groupby('Book-Title').filter(lambda x: len(x) > 5)\n", + "print(f\"Filtered data has {book_ratings_filtered['Book-Title'].nunique()} unique book titles.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(383842, 10)\n", + " User-ID ISBN Book-Rating \\\n", + "0 276726 0155061224 5 \n", + "1 276729 052165615X 3 \n", + "2 276729 0521795028 6 \n", + "3 276744 038550120X 7 \n", + "4 276747 0060517794 9 \n", + "\n", + " Book-Title Book-Author \\\n", + "0 Rites of Passage Judith Rae \n", + "1 Help!: Level 1 Philip Prowse \n", + "2 The Amsterdam Connection : Level 4 (Cambridge ... Sue Leather \n", + "3 A Painted House JOHN GRISHAM \n", + "4 Little Altars Everywhere Rebecca Wells \n", + "\n", + " Year-Of-Publication Publisher \\\n", + "0 2001 Heinle \n", + "1 1999 Cambridge University Press \n", + "2 2001 Cambridge University Press \n", + "3 2001 Doubleday \n", + "4 2003 HarperTorch \n", + "\n", + " Image-URL-S \\\n", + "0 http://images.amazon.com/images/P/0155061224.0... \n", + "1 http://images.amazon.com/images/P/052165615X.0... \n", + "2 http://images.amazon.com/images/P/0521795028.0... \n", + "3 http://images.amazon.com/images/P/038550120X.0... \n", + "4 http://images.amazon.com/images/P/0060517794.0... \n", + "\n", + " Image-URL-M \\\n", + "0 http://images.amazon.com/images/P/0155061224.0... \n", + "1 http://images.amazon.com/images/P/052165615X.0... \n", + "2 http://images.amazon.com/images/P/0521795028.0... \n", + "3 http://images.amazon.com/images/P/038550120X.0... \n", + "4 http://images.amazon.com/images/P/0060517794.0... \n", + "\n", + " Image-URL-L \n", + "0 http://images.amazon.com/images/P/0155061224.0... \n", + "1 http://images.amazon.com/images/P/052165615X.0... \n", + "2 http://images.amazon.com/images/P/0521795028.0... \n", + "3 http://images.amazon.com/images/P/038550120X.0... \n", + "4 http://images.amazon.com/images/P/0060517794.0... \n" + ] + } + ], + "source": [ + "# Create pivot table\n", + "rating_pivot = book_ratings_filtered.pivot_table(index='User-ID', columns='Book-Title', values='Book-Rating').fillna(0)\n", + "\n", + "# Check if there are any non-zero ratings\n", + "non_zero_ratings = book_ratings[book_ratings['Book-Rating'] > 0]\n", + "print(non_zero_ratings.shape) # This will give the number of rows with non-zero ratings\n", + "print(non_zero_ratings.head()) # This will show the first few rows with non-zero ratings\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# Convert to sparse matrix for efficiency\n", + "rating_matrix = csr_matrix(rating_pivot.values)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "# Compute cosine similarity\n", + "user_similarity = cosine_similarity(rating_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "# Recommend books for a given user\n", + "def recommend_books(user_id, n=5):\n", + " if user_id not in rating_pivot.index:\n", + " return \"User not found.\"\n", + " user_idx = rating_pivot.index.get_loc(user_id)\n", + " similarity_scores = list(enumerate(user_similarity[user_idx]))\n", + " similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:n+1]\n", + " similar_users = [rating_pivot.index[i[0]] for i in similarity_scores]\n", + " books_rated_by_similar_users = book_ratings[book_ratings['User-ID'].isin(similar_users)]\n", + " recommended_books = (\n", + " books_rated_by_similar_users.groupby('Book-Title')['Book-Rating']\n", + " .mean()\n", + " .sort_values(ascending=False)\n", + " .head(n)\n", + " .index\n", + " )\n", + " return recommended_books.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Recommended Books for user 274061:\n", + "1. 1984 (Everyman's Library)\n", + "2. Teddy Bear for Sale (Hello Reader!, Level 1)\n", + "3. Scotland Bc: An Introduction to the Prehistoric Houses, Tombs, Ceremonial Monuments, and Fortifications in the Care of the Secretary of State for Scotland (Historic Buildings and Monuments)\n", + "4. SIXTH COLUMN\n", + "5. Roller Skates! (Hello Reader! Level 2 (Paperback))\n" + ] + } + ], + "source": [ + "# Example usage\n", + "user_id = 274061\n", + "recommendations = recommend_books(user_id, n=5)\n", + "# Print recommendations under each other with numbering\n", + "print(f\"Recommended Books for user {user_id}:\")\n", + "for i, book in enumerate(recommendations, 1):\n", + " print(f\"{i}. {book}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}