-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraping.py
68 lines (51 loc) · 1.93 KB
/
scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from bs4 import BeautifulSoup
from time import sleep
from product import *
from requests import get
from csv import writer
def get_products(departament, products):
url = departament.link
sleep(2)
page = get(url)
soup = BeautifulSoup(page.content,'html.parser')
li = soup.find_all('ol', class_="items_container")[0].find_all('li')
for product in li:
price = ''
name = product.find('p').text
price = product.find('span',class_=('andes-money-amount__fraction')).text
try:
price += ','+ product.find('span',class_=('andes-money-amount__cents andes-money-amount__cents--superscript-24')).text
except:
price +=',0'
price = str(price)
price = price.replace('.','')
price = price.replace(',','.')
price = float(price)
products.append(Product(departament, price, name))
return products
def get_csv(products):
with open('products.csv', 'w', newline='') as file:
csvwriter = writer(file)
csvwriter.writerow(['Name','Price','Departament name'])
for product in products:
csvwriter.writerow([product.name, str(product.price), product.departament.name])
def get_data():
url = "https://www.mercadolivre.com.br/ofertas#nav-header"
page = get(url)
soup = BeautifulSoup(page.content,'html.parser')
aside = soup.find_all('aside')
ols = aside[0].find_all('ol')
a_tags = ols[1].find_all('a', href=True)
departaments = []
links= []
for link in a_tags:
href = link.get('href')
text = link.text.strip('0123456789() ')
departaments.append(Department(text, href))
products = []
print('Scraping departaments...\n')
for departament in departaments:
get_products(departament, products)
departament.show()
get_csv(products)
return products, departaments