Skip to content

Commit

Permalink
Merge pull request #508 from lalalaurentiu/main
Browse files Browse the repository at this point in the history
Refactor code
  • Loading branch information
lalalaurentiu authored Feb 15, 2024
2 parents 34c40c5 + 303dd18 commit 781a8db
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 181 deletions.
64 changes: 37 additions & 27 deletions sites/svn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from scraper.Scraper import Scraper
from utils import (publish, publish_logo, create_job, show_jobs, translate_city, acurate_city_and_county)
from utils import (
publish,
publish_logo,
create_job,
show_jobs,
translate_city
)
from getCounty import get_county, remove_diacritics


Expand All @@ -10,44 +16,48 @@ def remove_umlaut(string):
:return: unumlauted string
"""

t = '\u0083'
a = '\u00c4\u0083'
aa = '\u00c3\u00a2'
t = "\u0083"
a = "\u00c4\u0083"
aa = "\u00c3\u00a2"

string = string.replace(t, "t")
string = string.replace(a, "a")
string = string.replace(aa, "a")

string = string.replace(t, 't')
string = string.replace(a, 'a')
string = string.replace(aa, 'a')

return string

company = 'SVN'
url = 'https://jobs.svn.ro/posturi-vacante.html'

company = "SVN"
url = "https://jobs.svn.ro/posturi-vacante.html"

scraper = Scraper()
scraper.get_from_url(url)

jobs = []

jobs_elements = scraper.find('div', class_='jobs').find_all('div', class_='job')
jobs_elements = scraper.find("div", class_="jobs").find_all("div", class_="job")

for job in jobs_elements:
job_title = remove_diacritics(job.find('h3').text.encode(
'raw_unicode_escape').decode('utf-8'))
job_link='https://jobs.svn.ro' + job.find('a')['href']
city=translate_city(job.find('ul').find_all('li')[-1].text).replace("ș", "s")
job_title = remove_diacritics(
job.find("h3").text.encode("raw_unicode_escape").decode("utf-8")
)
job_link = "https://jobs.svn.ro" + job.find("a")["href"]
city = translate_city(job.find("ul").find_all("li")[-1].text).replace("ș", "s")
county = get_county(city)

jobs.append(create_job(
job_title=job_title,
job_link=job_link,
company=company,
country="Romania",
city=city,
county=county,
))
jobs.append(
create_job(
job_title=job_title,
job_link=job_link,
company=company,
country="Romania",
city=city,
county=county,
)
)


for version in [1,4]:
publish(version, company, jobs, 'APIKEY')
publish(4, company, jobs, "APIKEY")

publish_logo(company, 'https://www.svn.ro/assets/images/logo/3.png')
show_jobs(jobs)
publish_logo(company, "https://www.svn.ro/assets/images/logo/3.png")
show_jobs(jobs)
44 changes: 24 additions & 20 deletions sites/synevo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from scraper_peviitor import Scraper, Rules, loadingData
import json
from scraper_peviitor import Scraper, Rules
from getCounty import get_county, remove_diacritics
from utils import translate_city
from utils import translate_city, publish, publish_logo, show_jobs

# Cream o instanta a clasei Scraper
scraper = Scraper("https://www.synevo.ro/cariere/")
Expand Down Expand Up @@ -30,36 +29,41 @@
job_title = rules.getTag("h1", {"class": "entry-title"}).text
# Daca sunt mai multe orase le impartim in lista
# try :
locations = rules.getTag(
"div", {"class": "jobs-info-city"}).find("b").text.split(",")
locations = (
rules.getTag("div", {"class": "jobs-info-city"}).find("b").text.split(",")
)

cities = list()
counties = set()

for city in locations:
if "Chiajna" in city:
city = "Chiajna"
elif "Laborator Monza" in city:
city = "Bucuresti"
else:
city = translate_city(
remove_diacritics(city.strip())
)
city = translate_city(remove_diacritics(city.strip()))

cities.append(city)
counties.add(get_county(city))

# Cream un dictionar cu jobul si il adaugam in lista finala

finaljobs.append({
"job_title": job_title,
"job_link": scraper.url,
"company": company.get("company"),
"country": "Romania",
"city": cities,
"county": list(counties),
})
finaljobs.append(
{
"job_title": job_title,
"job_link": scraper.url,
"company": company.get("company"),
"country": "Romania",
"city": cities,
"county": list(counties),
}
)

# #Afisam numarul total de joburi
print(json.dumps(finaljobs, indent=4))

# #Incarcam datele in baza de date
loadingData(finaljobs, company.get("company"))
publish(4, company.get("company"), finaljobs, "APIKEY")

logourl = "https://www.synevo.ro/wp-content/themes/synevo-sage/dist/images/synevo-logo_6edc429f.svg"
publish_logo(company.get("company"), logourl)

show_jobs(finaljobs)
13 changes: 8 additions & 5 deletions sites/tbibank.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from scraper.Scraper import Scraper
import json
from utils import show_jobs, translate_city, publish
from utils import show_jobs, translate_city, publish, publish_logo
from getCounty import get_county

url = "https://tbibank.ro/cariere/"
Expand All @@ -27,7 +27,7 @@
"job_link": job_link,
"company": company,
"country": country,
"remote": remote
"remote": remote,
}

if country == "Romania":
Expand All @@ -41,7 +41,10 @@

finalJobs.append(job_obj)

show_jobs(finalJobs)

for version in [1, 4]:
publish(version, company, finalJobs, 'APIKEY')
publish(4, company, finalJobs, "APIKEY")

logoUrl = "https://tbibank.ro/wp-content/themes/Avada-child-bg/assets/images/tbi-layout/logo/new-logo.svg"
publish_logo(company, logoUrl)

show_jobs(finalJobs)
44 changes: 15 additions & 29 deletions sites/teleperformance.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,29 @@
from scraper_peviitor import Scraper, loadingData
import uuid
import json
from scraper_peviitor import Scraper
from utils import publish, publish_logo, show_jobs, translate_city
from getCounty import get_county

apiUrl = "https://www.teleperformance.com/Umbraco/Api/Careers/GetCareersBase?node=13761&country=Romania&pageSize=100"

company = {"company": "Teleperformance"}
finalJobs = list()

scraper = Scraper(apiUrl)

jobs = scraper.getJson().get("resultado")

for job in jobs:
id = uuid.uuid4()
job_title = job.get("title")
job_link = job.get("url")
city = job.get("location")

finalJobs.append({
"id": str(id),
"job_title": job_title,
"job_link": job_link,
finalJobs = [
{
"job_title": job.get("title"),
"job_link": job.get("url"),
"company": company.get("company"),
"country": "Romania",
"city": city
})

print(json.dumps(finalJobs, indent=4))
"city": translate_city(job.get("location")),
"county": get_county(translate_city(job.get("location"))),
}
for job in jobs
]

loadingData(finalJobs, company.get("company"))
publish(4, company.get("company"), finalJobs, "APIKEY")

logoUrl = "https://www.teleperformance.com/media/yn5lcxbl/tp-main-logo-svg.svg"
publish_logo(company.get("company"), logoUrl)

scraper.session.headers.update({
"Content-Type": "application/json",
})
scraper.post( "https://api.peviitor.ro/v1/logo/add/" ,json.dumps([
{
"id":company.get("company"),
"logo":logoUrl
}
]))
show_jobs(finalJobs)
30 changes: 15 additions & 15 deletions sites/telusinternational.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
from scraper.Scraper import Scraper
from utils import (publish, publish_logo, show_jobs)

from utils import publish, publish_logo, show_jobs

url = "https://jobs.telusinternational.com/en_US/careers/Romania?source=TI+website&amp%3Btags=telus_main_website&listFilterMode=1&2947=5170&2947_format=4626"

company = "TelusInternational"
finalJobs = list()

scraper = Scraper()
scraper.get_from_url(url, verify=False)

jobs = scraper.find_all("li", {"class": "listSingleColumnItem"})

for job in jobs:
job_title = job.find("h3").text.strip()
job_link = job.find("h3").find("a").get("href")

finalJobs.append({
"job_title": job_title,
"job_link": job_link,
finalJobs = [
{
"job_title": job.find("h3").text.strip(),
"job_link": job.find("h3").find("a").get("href"),
"company": company,
"country": "Romania",
"city": "Bucuresti",
"county": "Bucuresti"
})
"county": "Bucuresti",
}
for job in jobs
]


for version in [1,4]:
publish(version, company, finalJobs, 'APIKEY')
publish_logo(company, "https://jobs.telusinternational.com/portal/11/images/logo_telus-international_header-v2.svg")
publish(4, company, finalJobs, "APIKEY")
publish_logo(
company,
"https://jobs.telusinternational.com/portal/11/images/logo_telus-international_header-v2.svg",
)
show_jobs(finalJobs)
59 changes: 29 additions & 30 deletions sites/thalesgroup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from scraper_peviitor import Scraper, loadingData
import uuid
import json
from scraper_peviitor import Scraper
from utils import publish, publish_logo, show_jobs, translate_city
from getCounty import get_county

apiUrl = "https://thales.wd3.myworkdayjobs.com/wday/cxs/thales/Careers/jobs"

Expand All @@ -14,7 +14,12 @@
"Content-Type": "application/json",
}

data = {"appliedFacets":{"locationCountry":["f2e609fe92974a55a05fc1cdc2852122"]},"limit":20,"offset":0,"searchText":""}
data = {
"appliedFacets": {"locationCountry": ["f2e609fe92974a55a05fc1cdc2852122"]},
"limit": 20,
"offset": 0,
"searchText": "",
}

scraper.session.headers.update(headers)

Expand All @@ -26,33 +31,27 @@
data["offset"] = num
jobs = scraper.post(apiUrl, json=data).json().get("jobPostings")
for job in jobs:
id = uuid.uuid4()
job_title = job.get("title")
job_link = "https://thales.wd3.myworkdayjobs.com/en-US/Careers" + job.get("externalPath")
city = job.get("locationsText").split(",")[0]

finalJobs.append({
"id": str(id),
"job_title": job_title,
"job_link": job_link,
"company": company.get("company"),
"country": "Romania",
"city": city
})

print(json.dumps(finalJobs, indent=4))

loadingData(finalJobs, company.get("company"))
job_link = "https://thales.wd3.myworkdayjobs.com/en-US/Careers" + job.get(
"externalPath"
)
city = translate_city(job.get("locationsText").split(",")[0])
county = get_county(city)

finalJobs.append(
{
"job_title": job_title,
"job_link": job_link,
"company": company.get("company"),
"country": "Romania",
"city": city,
"county": county,
}
)

publish(4, company.get("company"), finalJobs, "APIKEY")

logoUrl = "https://upload.wikimedia.org/wikipedia/commons/thumb/2/21/Thales_Logo.svg/484px-Thales_Logo.svg.png?20210518101610"
publish_logo(company.get("company"), logoUrl)

scraper.session.headers.update({
"Content-Type": "application/json",
})
scraper.post( "https://api.peviitor.ro/v1/logo/add/" ,json.dumps([
{
"id":company.get("company"),
"logo":logoUrl
}
]))

show_jobs(finalJobs)
Loading

0 comments on commit 781a8db

Please sign in to comment.