added some changes on spiders amach, cristim, rcsrds

peviitor-ro · Sep 28, 2024 · db6b154 · db6b154
1 parent 3f672de
commit db6b154
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 14 deletions.
diff --git a/JobsCrawlerProject/JobsCrawlerProject/spiders/amach_spider.py b/JobsCrawlerProject/JobsCrawlerProject/spiders/amach_spider.py
@@ -15,15 +15,11 @@ class AmachSpiderSpider(scrapy.Spider):
     allowed_domains = ["amach.software", "boards.eu.greenhouse.io"]
     start_urls = ["https://boards.eu.greenhouse.io/embed/job_board/?for=amach"]
 
-    def start_requests(self):
-        request = scrapy.Request(self.start_urls[0])
-        yield request
-
     def parse(self, response):
         # data here
         for job in response.xpath('//div[@class="opening"]'):
             city_tag = job.xpath('.//span[@class="location"]/text()').get()
-            
+
             # check city
             if 'romania' in city_tag.lower():
                 if (location := city_tag.split(',')[0].strip()) and location.lower() == 'bucharest':
@@ -45,4 +41,4 @@ def parse(self, response):
                 item['remote'] = 'on-site'
                 item['logo_company'] = 'https://s101-recruiting.cdn.greenhouse.io/external_greenhouse_job_boards/logos/400/114/210/resized/AMACH-5_Color_Logo_1.png?1675863445'
                 #
-                yield item
+                yield item
diff --git a/JobsCrawlerProject/JobsCrawlerProject/spiders/cristim_spider.py b/JobsCrawlerProject/JobsCrawlerProject/spiders/cristim_spider.py
@@ -19,7 +19,7 @@ class CristimSpiderSpider(CrawlSpider):
 
     rules = (
             Rule(LinkExtractor(allow=('/cariere/',), deny=('/apply',)),
-                 callback='parse_job'),
+                callback='parse_job'),
             )
 
     def parse_job(self, response):
@@ -30,7 +30,7 @@ def parse_job(self, response):
             location = location[0]
         else:
             location = 'all'
-        
+
         # get location
         location_finish = get_county(location=location)
 

diff --git a/JobsCrawlerProject/JobsCrawlerProject/spiders/rscrds_spider.py b/JobsCrawlerProject/JobsCrawlerProject/spiders/rscrds_spider.py
@@ -14,7 +14,6 @@
 #
 from urllib.parse import urlencode
 import re
-
 from time import sleep
 
 
@@ -24,7 +23,7 @@ def make_headers(town: str, industry: str) -> tuple[dict, dict]:
 
         params: Town: str, for search jobs in each town.
                 Industry: Exact title job.
-        
+
         return: headers - dict
                 payload - dict
     '''
@@ -56,9 +55,9 @@ class RscrdsSpider(scrapy.Spider):
     def start_requests(self):
         yield scrapy.Request(url='https://www.digi.ro/cariere',
                                     callback=self.parse_towns)
-    
+
     def parse_towns(self, response):
-        
+
         # get all counties
         for city_ro in response.xpath('//select[@id="form-assistance-support-careers-county"]//option'):
             if (location := city_ro.xpath('.//text()').extract_first()) and location.lower() == 'judet':
@@ -94,8 +93,8 @@ def parse_job(self, response):
             #
             location = response.meta.get('location')
             location_finish = get_county(location=location)
-            
-            # send data to Pipelines 
+
+            # send data to Pipelines
             item = JobItem()
             item['job_link'] = f"https://www.digi.ro/{links[idx]}"
             item['job_title'] = titles[idx]