Skip to content

Commit

Permalink
Merge pull request #251 from lalalaurentiu/main
Browse files Browse the repository at this point in the history
Fixed Arabesque Scraper
  • Loading branch information
lalalaurentiu committed Nov 11, 2023
2 parents d41a0ab + d9f9524 commit 1472127
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
33 changes: 28 additions & 5 deletions sites/arabesque.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from scraper_peviitor import Scraper, Rules, loadingData
import uuid
import json
from getCounty import get_county
from utils import translate_city, acurate_city_and_county

url = "https://cariere.arabesque.ro"

Expand All @@ -13,6 +14,10 @@
company = {"company": "Arabesque"}
finalJobs = list()

acurate_city = acurate_city_and_county(
Iasi={"city": "Iasi", "county": "Iasi"},
)

#Pentru fiecare categorie de joburi
for category in categories:
#Luam joburile din categoria respectiva
Expand All @@ -37,19 +42,37 @@
jobs = jobsContainer.find_all("article")

for job in jobs:
id = uuid.uuid4()
job_title = job.find("h4").text.strip()
job_link = job.get("id").replace("post-", jobUrl + "&job_id=")
city = translate_city(job_title.split(" ")[-1].strip().title())

county = get_county(city)

if acurate_city.get(city):
city = acurate_city.get(city).get("city")
county = acurate_city.get(city).get("county")

elif not county:

first_name = job_title.split(" ")[-2].strip().title()
city = translate_city(first_name + "-" + city)
county = get_county(city)

if not county:

city = "Bucuresti"
county = "Bucuresti"

finalJobs.append({
"id": str(id),
"job_title": job_title,
"job_link": job_link,
"company": company.get("company"),
"country": "Romania",
"city": "Romania",
"city": city,
"county": county
})

print(json.dumps(finalJobs, indent=4))

loadingData(finalJobs, company.get("company"))
loadingData(finalJobs, company.get("company"))

5 changes: 5 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,13 @@ def show_jobs(data):

def translate_city(city):
cities = {
# This is general for all scrapers
"bucharest": "Bucuresti",
"cluj": "Cluj-Napoca",
# This is for Arabesque Scraper
"targul-mures": "Targu Mures",
"militari": "Bucuresti",
############################
}

if cities.get(city.lower()):
Expand Down

0 comments on commit 1472127

Please sign in to comment.