-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider-man.py
25 lines (21 loc) · 1.32 KB
/
spider-man.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import scrapy
class MankanSpider(scrapy.Spider):
name = "mankan"
def start_requests(self):
MAX_PRODUCTS = 10
for index in range(MAX_PRODUCTS):
yield scrapy.Request(url = f"https://mankan.me/mag/lib/read_one.php?id={index}", \
callback = self.detailer, meta = {'index' : index})
def detailer(self, response):
yield {'siteId' : response.meta['index'],
'name' : response.css("div.read-one-header h1 ::text").get(),
'calory' : response.css("div.calory-box span#calory-amount ::text").get(),
'carbo' : response.css("div.carbo-box span#carbo-amount ::text").get(),
'protein' : response.css("div.protein-box span#protein-amount ::text").get(),
'fat' : response.css("div.fat-box span#fat-amount ::text").get(),
'fiber' : response.css("div.fiber-box span#fiber-amount ::text").get(),
'activity1' : response.css("div.icon-activity p#number-activity-1 ::text").get(),
'activity2' : response.css("div.icon-activity p#number-activity-2 ::text").get(),
'activity3' : response.css("div.icon-activity p#number-activity-3 ::text").get(),
'activity4' : response.css("div.icon-activity p#number-activity-4 ::text").get()
}