-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsan_antonio_scraper.py
23 lines (19 loc) · 914 Bytes
/
san_antonio_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# -*- coding: utf-8 -*-
import scrapy
from scrapy.selector import Selector
class BlogSpider(scrapy.Spider):
name = 'blogspider'
start_urls = ['http://www.babelio.com/auteur/Frederic-Dard/7187/citations']
def parse(self, response):
for title in response.css('div.post_con div.text.row div'):
yield {'quote': title.css('div ::text').extract_first()}
next_pages = response.css('div.pagination.row > a').extract()
for index, page in enumerate(next_pages):
if 'class="active"' in page:
n_page = next_pages[index + 1]
next_page = Selector(text=n_page).xpath('//a/@href').extract()
next_page_url = next_page[0]
if index == (len(next_pages) - 1):
next_page = False
if next_page:
yield scrapy.Request(response.urljoin(next_page_url), callback=self.parse)