-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
90 lines (82 loc) · 3.23 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#! /usr/bin/env python3
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from datetime import datetime, timedelta
import time
import re
import json
class color:
PURPLE = '\033[95m'
CYAN = '\033[96m'
DARKCYAN = '\033[36m'
BLUE = '\033[94m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m'
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)
urlList = []
def getCompany():
while True:
print('Input Greenhouse.io job board page:')
careerPage = input()
if re.match(r'https://boards.greenhouse.io/.*', careerPage):
return careerPage
else:
print("Invalid - Program takes only https://boards.greenhouse.io/XYZ formats.")
def getOpenings(jobBoard):
driver.get(jobBoard)
jobLinks = driver.find_elements(By.TAG_NAME, 'a')
for link in jobLinks:
href = link.get_attribute('href')
if 'job' in href:
urlList.append(href)
return urlList
def main(urlList):
if not urlList:
print("No Jobs here")
print('------------------------')
driver.quit()
return
else:
try:
# Iterate through urlList, extract text and datePosted from JSON
for job_link in urlList:
driver.get(job_link)
job_name = driver.find_element(By.XPATH, "//*[@id='header']/h1")
job_name_text = job_name.text
job_location = driver.find_element(By.XPATH, "//*[@id='header']/div")
job_location_text = job_location.text
try:
script_tag = driver.find_element(By.XPATH, "//script[@type='application/ld+json']")
script_content = script_tag.get_attribute('innerHTML')
json_data = json.loads(script_content)
date_posted = json_data.get('datePosted')
current_date = datetime.now().date()
difference = current_date - datetime.strptime(date_posted, "%Y-%m-%d").date()
one_week = timedelta(weeks=1)
two_weeks = timedelta(weeks=2)
if difference <= one_week:
color_code = color.GREEN
elif difference <= two_weeks:
color_code = color.YELLOW
else:
color_code = color.RED
print(f'\n>>> {job_name_text}: --- posted: {color_code}{date_posted}{color.END} \n>>> Location: {job_location_text} \n>>> {job_link}')
except NoSuchElementException:
print(f'\n>>> {job_name_text}: --- posted: {color.RED}No Post Date Available{color.END} \n>>> Location: {job_location_text} \n>>> {job_link}')
finally:
print('------------------------')
driver.quit()
careerPage = getCompany()
start_selenium = time.time()
getOpenings(careerPage)
main(urlList)
end_selenium = time.time()
selenium_time = end_selenium - start_selenium
print(f'Selenium Script took {selenium_time} seconds to execute.')