From 29e68a521209f1209084a36cd291596c98cf1b59 Mon Sep 17 00:00:00 2001 From: lalalaurentiu Date: Sat, 25 Nov 2023 14:07:55 +0200 Subject: [PATCH] Fixed Samsung Scraper --- sites/samsung.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/sites/samsung.py b/sites/samsung.py index e1e1873..c346dbf 100644 --- a/sites/samsung.py +++ b/sites/samsung.py @@ -1,50 +1,57 @@ from scraper_peviitor import Scraper, loadingData -import uuid import json +from utils import translate_city +from getCounty import get_county apiUrl = "https://sec.wd3.myworkdayjobs.com/wday/cxs/sec/Samsung_Careers/jobs" scraper = Scraper() -#Cream un header pentru a putea face request-uri POST +# Cream un header pentru a putea face request-uri POST headers = { "Accept": "application/json", "Content-Type": "application/json", } -#Cream un dictionar cu datele pe care dorim sa le trimitem catre server -data = {"appliedFacets":{},"limit":20,"offset":0,"searchText":"Romania"} +# Cream un dictionar cu datele pe care dorim sa le trimitem catre server +data = {"appliedFacets": {}, "limit": 20, "offset": 0, "searchText": "Romania"} -#Actualizam header-ul cu datele de mai sus +# Actualizam header-ul cu datele de mai sus scraper.session.headers.update(headers) -#Facem request-ul POST si salvam numarul total de joburi +# Facem request-ul POST si salvam numarul total de joburi numberOfJobs = scraper.post(apiUrl, json=data).json().get("total") -#Cream o lista cu numerele de la 0 la numarul total de joburi, cu pasul de 20 +# Cream o lista cu numerele de la 0 la numarul total de joburi, cu pasul de 20 iteration = [i for i in range(0, numberOfJobs, 20)] company = {"company": "Samsung"} finaljobs = list() -#Pentru fiecare numar din lista, extragem joburile +# Pentru fiecare numar din lista, extragem joburile for num in iteration: data["offset"] = num jobs = scraper.post(apiUrl, json=data).json().get("jobPostings") for job in jobs: - id = uuid.uuid4() job_title = job.get("title") - job_link = "https://sec.wd3.myworkdayjobs.com/en-US/Samsung_Careers" + job.get("externalPath") + job_link = "https://sec.wd3.myworkdayjobs.com/en-US/Samsung_Careers" + \ + job.get("externalPath") + city = translate_city( + job.get("locationsText").split(",")[1].strip().split(" ")[0] + ) + county = get_county(city) + remote = job.get("remoteType") finaljobs.append({ - "id": str(id), "job_title": job_title, "job_link": job_link, "company": company.get("company"), "country": "Romania", - "city": "Romania" + "city": city, + "county": county, + "remote": remote }) #afisam numarul total de joburi gasite print(json.dumps(finaljobs, indent=4)) -#se incarca datele in baza de date -loadingData(finaljobs, company.get("company")) \ No newline at end of file +# #se incarca datele in baza de date +# loadingData(finaljobs, company.get("company"))