Skip to content

Commit

Permalink
Add new version
Browse files Browse the repository at this point in the history
  • Loading branch information
barkamoljon authored Feb 28, 2023
1 parent 335e722 commit cd9ae52
Showing 1 changed file with 285 additions and 0 deletions.
285 changes: 285 additions & 0 deletions Amazon Web Scraper Project.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "e7e64222",
"metadata": {},
"outputs": [],
"source": [
"# Import libraries\n",
"from bs4 import BeautifulSoup\n",
"import requests\n",
"import time\n",
"import datetime\n",
"import smtplib"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9dd0533d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Mathematics for Machine Learning \n",
"4.6 out of 5 stars\n",
"634 ratings\n",
"$48.99\n",
" Delivery March 2 - 9 \n"
]
}
],
"source": [
"# Connect to Website\n",
"header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36','Referer':'https://www.amazon.com/'}\n",
"URL = 'https://www.amazon.com/Mathematics-Machine-Learning-Peter-Deisenroth/dp/110845514X/ref=sr_1_1_sspa?crid=3GZ8RPR9N6MPE&keywords=machine+learning&qid=1670788336&sprefix=machine%2Caps%2C253&sr=8-1-spons&psc=1&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFJUkQ0UlFBM1M0V1cmZW5jcnlwdGVkSWQ9QTEwNDcyMjhQSk9ZWFpBUVlGTCZlbmNyeXB0ZWRBZElkPUEwMzMwODIwMkZNR0cxMDJOMzZLQyZ3aWRnZXROYW1lPXNwX2F0ZiZhY3Rpb249Y2xpY2tSZWRpcmVjdCZkb05vdExvZ0NsaWNrPXRydWU='\n",
"page = requests.get(URL,headers=header)\n",
"soup = BeautifulSoup(page.content,features=\"lxml\")\n",
"\n",
"title = soup.find(id =\"productTitle\").get_text()\n",
"rating = soup.find('i',{'class':\"a-icon a-icon-star a-star-4-5\"}).get_text()\n",
"num_ratings = soup.find('span', {'id':\"acrCustomerReviewText\"}).get_text()\n",
"price = soup.find('span', {'class':'a-offscreen'}).get_text()\n",
"when_delivery = soup.find(id=\"mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE\").get_text()\n",
"\n",
"print(title)\n",
"print(rating)\n",
"print(num_ratings)\n",
"print(price)\n",
"print(when_delivery)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e61627e9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mathematics for Machine Learning\n",
"4.6\n",
"634\n",
"48.99\n",
"March 2 - 9\n"
]
}
],
"source": [
"title = title.strip()\n",
"rating = rating.strip()[:-15]\n",
"num_ratings = num_ratings.strip()[:-8]\n",
"price = price.strip()[1:]\n",
"when_delivery = when_delivery.strip()[9:]\n",
"\n",
"print(title)\n",
"print(rating)\n",
"print(num_ratings)\n",
"print(price)\n",
"print(when_delivery)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "dde2b2bc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2023-02-28\n"
]
}
],
"source": [
"import datetime\n",
"\n",
"today = datetime.date.today()\n",
"print(today)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8abf5a22",
"metadata": {},
"outputs": [],
"source": [
"import csv \n",
"\n",
"header = ['Title', 'Rating(out of 5 star)', 'Number of Ratings', 'Price($)', 'Delivery date', 'Date']\n",
"data = [title, rating, num_ratings, price, when_delivery, today]\n",
"\n",
"\n",
"with open('AmazonWebScraperDataset.csv', 'w', newline='', encoding='UTF8') as f:\n",
" writer = csv.writer(f)\n",
" writer.writerow(header)\n",
" writer.writerow(data)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "6920af61",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Title Rating(out of 5 star) Number of Ratings \\\n",
"0 Mathematics for Machine Learning 4.6 634 \n",
"\n",
" Price($) Delivery date Date \n",
"0 48.99 March 2 - 9 2023-02-28 \n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(r\"C:\\Users\\barka\\AmazonWebScraperDataset.csv\")\n",
"\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ca8e745c",
"metadata": {},
"outputs": [],
"source": [
"#Now we are appending data to csv\n",
"with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:\n",
" writer = csv.writer(f)\n",
" writer.writerow(data)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "b84aea60",
"metadata": {},
"outputs": [],
"source": [
"def check_price():\n",
" header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36','Referer':'https://www.amazon.com/'}\n",
" URL = 'https://www.amazon.com/Mathematics-Machine-Learning-Peter-Deisenroth/dp/110845514X/ref=sr_1_1_sspa?crid=3GZ8RPR9N6MPE&keywords=machine+learning&qid=1670788336&sprefix=machine%2Caps%2C253&sr=8-1-spons&psc=1&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFJUkQ0UlFBM1M0V1cmZW5jcnlwdGVkSWQ9QTEwNDcyMjhQSk9ZWFpBUVlGTCZlbmNyeXB0ZWRBZElkPUEwMzMwODIwMkZNR0cxMDJOMzZLQyZ3aWRnZXROYW1lPXNwX2F0ZiZhY3Rpb249Y2xpY2tSZWRpcmVjdCZkb05vdExvZ0NsaWNrPXRydWU='\n",
" page = requests.get(URL,headers=header)\n",
" soup = BeautifulSoup(page.content,features=\"lxml\")\n",
"\n",
" title = soup.find(id =\"productTitle\").get_text()\n",
" rating = soup.find('i',{'class':\"a-icon a-icon-star a-star-4-5\"}).get_text()\n",
" num_ratings = soup.find('span', {'id':\"acrCustomerReviewText\"}).get_text()\n",
" price = soup.find('span', {'class':'a-offscreen'}).get_text()\n",
" when_delivery = soup.find(id=\"mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE\").get_text()\n",
" \n",
" title = title.strip()\n",
" rating = rating.strip()[:-15]\n",
" num_ratings = num_ratings.strip()[:-8]\n",
" price = price.strip()[1:]\n",
" when_delivery = when_delivery.strip()[17:]\n",
" \n",
" import datetime\n",
"\n",
" today = datetime.date.today()\n",
" \n",
" import csv \n",
"\n",
" header = ['Title', 'Rating(out of 5 star)', 'Number of Ratings', 'Price($)', 'Delivery date', 'Date']\n",
" data = [title, rating, num_ratings, price, when_delivery, today]\n",
" \n",
" with open('AmazonWebScraperDataset.csv', 'a+', newline='', encoding='UTF8') as f:\n",
" writer = csv.writer(f)\n",
" writer.writerow(data)\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "952c9de6",
"metadata": {},
"outputs": [],
"source": [
"# Runs check_price after a set time and inputs data into your CSV\n",
"\n",
"while(True):\n",
" check_price()\n",
" time.sleep(86400)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0833a448",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(r\"C:\\Users\\barka\\AmazonWebScraperDataset.csv\")\n",
"\n",
"print(df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae976f98",
"metadata": {},
"outputs": [],
"source": [
"# If uou want to try sending yourself an email (just for fun) when a price hits below a certain level you can try it\n",
"# out with this script\n",
"\n",
"def send_mail():\n",
" server = smtplib.SMTP_SSL('smtp.gmail.com',465)\n",
" server.ehlo()\n",
" server.login('barkamolurinboev@gmail.com', 'mail_email_password') #write here your email password\n",
" \n",
" recipient = 'recipient_email_address@gmail.com'\n",
" subject = 'The Book you want is below $48! Now is your chance to buy!'\n",
" body = \"Barkamol, This is the moment we have been waiting for. Now is your chance to pick up the book of your dreams. Don't mess it up! Link here: https://www.amazon.com/Mathematics-Machine-Learning-Peter-Deisenroth/dp/110845514X/ref=sr_1_1_sspa?crid=3GZ8RPR9N6MPE&keywords=machine+learning&qid=1670788336&sprefix=machine%2Caps%2C253&sr=8-1-spons&psc=1&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFJUkQ0UlFBM1M0V1cmZW5jcnlwdGVkSWQ9QTEwNDcyMjhQSk9ZWFpBUVlGTCZlbmNyeXB0ZWRBZElkPUEwMzMwODIwMkZNR0cxMDJOMzZLQyZ3aWRnZXROYW1lPXNwX2F0ZiZhY3Rpb249Y2xpY2tSZWRpcmVjdCZkb05vdExvZ0NsaWNrPXRydWU=\"\n",
" \n",
" message = f\"Subject: {subject}\\n\\n{body}\"\n",
" \n",
" server.sendmail('barkamolurinboev@gmail.com', recipient, message)\n",
" \n",
" server.quit()\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit cd9ae52

Please sign in to comment.