Merge pull request #2 from scrapinghub/hn-refactoring

Renamed frontier_settings.py, ran yapf on spider and settings
scrapinghub · Apr 16, 2015 · 05ec2f9 · 05ec2f9
2 parents 6cdb29b + 35584e8
commit 05ec2f9
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 10 deletions.
diff --git a/...n_scraper/hn_scraper/frontier_settings.py → ...n_scraper/hn_scraper/frontera_settings.py b/...n_scraper/hn_scraper/frontier_settings.py → ...n_scraper/hn_scraper/frontera_settings.py
diff --git a/blog/hn_scraper/hn_scraper/settings.py b/blog/hn_scraper/hn_scraper/settings.py
@@ -18,14 +18,15 @@
 #--------------------------------------------------------------------------
 SPIDER_MIDDLEWARES = {}
 DOWNLOADER_MIDDLEWARES = {}
-SPIDER_MIDDLEWARES.update(
-    {'frontera.contrib.scrapy.middlewares.schedulers.SchedulerSpiderMiddleware': 999},
-)
-DOWNLOADER_MIDDLEWARES.update(
-    {'frontera.contrib.scrapy.middlewares.schedulers.SchedulerDownloaderMiddleware': 999}
-)
+SPIDER_MIDDLEWARES.update({
+    'frontera.contrib.scrapy.middlewares.schedulers.SchedulerSpiderMiddleware': 999
+}, )
+DOWNLOADER_MIDDLEWARES.update({
+    'frontera.contrib.scrapy.middlewares.schedulers.SchedulerDownloaderMiddleware':
+    999
+})
 SCHEDULER = 'frontera.contrib.scrapy.schedulers.frontier.FronteraScheduler'
-FRONTERA_SETTINGS = 'hn_scraper.frontier_settings'
+FRONTERA_SETTINGS = 'hn_scraper.frontera_settings'
 
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 #USER_AGENT = 'hn_scraper (+http://www.yourdomain.com)'
diff --git a/blog/hn_scraper/hn_scraper/spiders/HackerNews.py b/blog/hn_scraper/hn_scraper/spiders/HackerNews.py
@@ -13,8 +13,9 @@ class HackernewsSpider(Spider):
     allowed_domains = ["news.ycombinator.com"]
     start_urls = ('https://news.ycombinator.com/', )
 
-    link_extractor = SgmlLinkExtractor(allow=('news', ),
-                                       restrict_xpaths=('//a[text()="More"]', ))
+    link_extractor = SgmlLinkExtractor(
+        allow=('news', ),
+        restrict_xpaths=('//a[text()="More"]', ))
 
     def extract_one(self, selector, xpath, default=None):
         extracted = selector.xpath(xpath).extract()
@@ -31,7 +32,6 @@ def parse(self, response):
         for item in self.parse_item(response):
             yield item
 
-
     def parse_item(self, response):
         selector = Selector(response)