Skip to content

Commit

Permalink
Merge pull request #562 from lalalaurentiu/main
Browse files Browse the repository at this point in the history
refactor: Update Weatherford scraper to extract job listings from new…
  • Loading branch information
lalalaurentiu authored Aug 27, 2024
2 parents 54b9e9d + 1bf3e82 commit 97317cd
Showing 1 changed file with 18 additions and 43 deletions.
61 changes: 18 additions & 43 deletions sites/weatherford.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,29 @@

_counties = GetCounty()

url = "https://external-weatherford.icims.com/jobs/search?ss=1&searchRelation=keyword_all&searchLocation=13526--&mobile=false&width=1424&height=500&bga=true&needsRedirect=false&jan1offset=120&jun1offset=180&in_iframe=1"
url = "https://fa-exmi-saasfaprod1.fa.ocs.oraclecloud.com/hcmRestApi/resources/latest/recruitingCEJobRequisitions?onlyData=true&expand=requisitionList.secondaryLocations,flexFieldsFacet.values,requisitionList.requisitionFlexFields&finder=findReqs;siteNumber=CX_1,facetsList=LOCATIONS%3BWORK_LOCATIONS%3BWORKPLACE_TYPES%3BTITLES%3BCATEGORIES%3BORGANIZATIONS%3BPOSTING_DATES%3BFLEX_FIELDS,limit=25,locationId=300000000465601,sortBy=POSTING_DATES_DESC"

company = "Weatherford"
jobs = []

scraper = Scraper()
rendered = scraper.get_from_url(url)

jobs_elements = scraper.find("div", class_="iCIMS_JobsTable").find_all(
"div", class_="row"
)

for job in jobs_elements:
if job.find("div", {"class": "title"}):
job_title = job.find("div", class_="title").find("h3").text.strip()
job_link = job.find("div", class_="title").find("a")["href"]
country = "Romania"
city = (
job.find("div", {"class": "header"})
.find_all("span")[-1]
.text.strip()
.split("|")
)

cities = []
counties = []

for county in city:
city = translate_city(county.replace("RO-", "").strip().capitalize())
if city == "Cimpina":
city = "Campina"
judet = _counties.get_county(city)
if judet and judet not in counties:
counties.extend(judet)
if judet and city not in cities:
cities.append(city)
if cities and counties:
jobs.append(
create_job(
job_title=job_title,
job_link=job_link,
city=cities,
county=counties,
country=country,
company=company,
)
)

headers = {
"Content-Type": "application/json",
}
scraper.set_headers(headers)
scraper.get_from_url(url, "JSON")

elements = scraper.markup.get("items")[0].get("requisitionList")

jobs = [
{
"job_title": job.get("Title"),
"job_link": "https://careers.weatherford.com/#en/sites/CX_1/job/" + job.get("Id"),
"country": "Romania",
"company": company,
}
for job in elements
]

publish_or_update(jobs)
publish_logo(
Expand Down

0 comments on commit 97317cd

Please sign in to comment.