diff --git a/sites/arabesque.py b/sites/arabesque.py index ea659e3..6545bfa 100644 --- a/sites/arabesque.py +++ b/sites/arabesque.py @@ -1,6 +1,7 @@ from scraper_peviitor import Scraper, Rules, loadingData -import uuid import json +from getCounty import get_county +from utils import translate_city, acurate_city_and_county url = "https://cariere.arabesque.ro" @@ -13,6 +14,10 @@ company = {"company": "Arabesque"} finalJobs = list() +acurate_city = acurate_city_and_county( + Iasi={"city": "Iasi", "county": "Iasi"}, +) + #Pentru fiecare categorie de joburi for category in categories: #Luam joburile din categoria respectiva @@ -37,19 +42,37 @@ jobs = jobsContainer.find_all("article") for job in jobs: - id = uuid.uuid4() job_title = job.find("h4").text.strip() job_link = job.get("id").replace("post-", jobUrl + "&job_id=") + city = translate_city(job_title.split(" ")[-1].strip().title()) + + county = get_county(city) + + if acurate_city.get(city): + city = acurate_city.get(city).get("city") + county = acurate_city.get(city).get("county") + + elif not county: + + first_name = job_title.split(" ")[-2].strip().title() + city = translate_city(first_name + "-" + city) + county = get_county(city) + + if not county: + + city = "Bucuresti" + county = "Bucuresti" finalJobs.append({ - "id": str(id), "job_title": job_title, "job_link": job_link, "company": company.get("company"), "country": "Romania", - "city": "Romania", + "city": city, + "county": county }) print(json.dumps(finalJobs, indent=4)) -loadingData(finalJobs, company.get("company")) \ No newline at end of file +loadingData(finalJobs, company.get("company")) + diff --git a/utils.py b/utils.py index 9b8c310..2b7c166 100644 --- a/utils.py +++ b/utils.py @@ -38,8 +38,13 @@ def show_jobs(data): def translate_city(city): cities = { + # This is general for all scrapers "bucharest": "Bucuresti", "cluj": "Cluj-Napoca", + # This is for Arabesque Scraper + "targul-mures": "Targu Mures", + "militari": "Bucuresti", + ############################ } if cities.get(city.lower()):