Skip to content

Commit

Permalink
Merge pull request #2 from scrapinghub/hn-refactoring
Browse files Browse the repository at this point in the history
Renamed frontier_settings.py, ran yapf on spider and settings
  • Loading branch information
rdowinton committed Apr 16, 2015
2 parents 6cdb29b + 35584e8 commit 05ec2f9
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
15 changes: 8 additions & 7 deletions blog/hn_scraper/hn_scraper/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
#--------------------------------------------------------------------------
SPIDER_MIDDLEWARES = {}
DOWNLOADER_MIDDLEWARES = {}
SPIDER_MIDDLEWARES.update(
{'frontera.contrib.scrapy.middlewares.schedulers.SchedulerSpiderMiddleware': 999},
)
DOWNLOADER_MIDDLEWARES.update(
{'frontera.contrib.scrapy.middlewares.schedulers.SchedulerDownloaderMiddleware': 999}
)
SPIDER_MIDDLEWARES.update({
'frontera.contrib.scrapy.middlewares.schedulers.SchedulerSpiderMiddleware': 999
}, )
DOWNLOADER_MIDDLEWARES.update({
'frontera.contrib.scrapy.middlewares.schedulers.SchedulerDownloaderMiddleware':
999
})
SCHEDULER = 'frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler'
FRONTERA_SETTINGS = 'hn_scraper.frontier_settings'
FRONTERA_SETTINGS = 'hn_scraper.frontera_settings'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'hn_scraper (+http://www.yourdomain.com)'
6 changes: 3 additions & 3 deletions blog/hn_scraper/hn_scraper/spiders/HackerNews.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ class HackernewsSpider(Spider):
allowed_domains = ["news.ycombinator.com"]
start_urls = ('https://news.ycombinator.com/', )

link_extractor = SgmlLinkExtractor(allow=('news', ),
restrict_xpaths=('//a[text()="More"]', ))
link_extractor = SgmlLinkExtractor(
allow=('news', ),
restrict_xpaths=('//a[text()="More"]', ))

def extract_one(self, selector, xpath, default=None):
extracted = selector.xpath(xpath).extract()
Expand All @@ -31,7 +32,6 @@ def parse(self, response):
for item in self.parse_item(response):
yield item


def parse_item(self, response):
selector = Selector(response)

Expand Down

0 comments on commit 05ec2f9

Please sign in to comment.