From a5b4ba8d576507b86f5cd845a0aff833a9f330cd Mon Sep 17 00:00:00 2001 From: lalalaurentiu Date: Wed, 28 Aug 2024 17:49:24 +0300 Subject: [PATCH 1/2] refactor: Update ADP scraper to extract job listings from new URL and improve data parsing --- sites/adp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sites/adp.py b/sites/adp.py index bd5b25b..c3df662 100644 --- a/sites/adp.py +++ b/sites/adp.py @@ -22,9 +22,11 @@ data = re.search(pattern, scraper.text).group(1) jobs = json.loads("{" + data + "}").get("searchResults") +# show_jobs(jobs) + for job in jobs: job_title = job.get("job").get("title") - job_link = job.get("job").get("url") + job_link = "https://jobs.adp.com/en/jobs/" + job.get("job").get("ref") locations = job.get("job").get("google_locations") cities = [ @@ -55,3 +57,4 @@ publish_logo(company.get("company"), logoUrl) show_jobs(finalJobs) + From 802b806f445c12bf5ad72e8116bde5ee90f0e42c Mon Sep 17 00:00:00 2001 From: lalalaurentiu Date: Wed, 28 Aug 2024 18:27:48 +0300 Subject: [PATCH 2/2] refactor: Update WTB scraper to extract job listings from new URL and improve data parsing --- sites/wtb.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sites/wtb.py b/sites/wtb.py index d40b979..0ea377d 100644 --- a/sites/wtb.py +++ b/sites/wtb.py @@ -10,7 +10,7 @@ scraper.get_from_url(url) page = 1 -jobs = scraper.find("div", {"class": "isotope-wrapper half-gutter"}).find_all( +jobs = scraper.find_all( "h3", {"class": "t-entry-title h5"} ) @@ -32,13 +32,7 @@ page += 1 scraper.get_from_url(url + f"category/careers/category/careers/?%&upage={page}") - try: - jobs = scraper.find("div", {"class": "isotope-wrapper half-gutter"}).find_all( - "h3", {"class": "t-entry-title h5"} - ) - except: - jobs = None - + jobs = scraper.find_all( "h3", {"class": "t-entry-title h5"}) publish_or_update(finalJobs) publish_logo(company, "https://www.wtb.ro/wp-content/uploads/2018/04/logoblack.svg")