-
Notifications
You must be signed in to change notification settings - Fork 0
/
ets_jobs_rating.py
92 lines (80 loc) · 3.13 KB
/
ets_jobs_rating.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import requests
import json
import re
import time
# Settings
file_input = 'SEE_jobs.json'
file_output = 'SEE_rating.txt'
api_token = ''
api_token_key = ''
# Config
api_url_base = 'http://api.glassdoor.com/api/api.htm'
headers = {"Content-Type": "application/x-www-form-urlencoded", "user-agent": "Mozilla/5.0"}
parameters = {
"t.p": api_token,
"t.k": api_token_key,
"userip": "0.0.0.0",
"format": "json",
"v": 1,
"action": "employers"
}
data = json.load(open(file_input))
errors = {}
companies_not_found = []
jobs = []
def normalize(name):
# https://docs.python.org/2/library/string.html#string.maketrans
translation = str.maketrans("éàèùâêîôûç", "eaeuaeiouc")
name = name.translate(translation) \
.lower() \
.split("-")[0] \
.split("–")[0] \
.split(",")[0]
return re.sub('(\([^]]*\)\s?)|&|\+|inc|INC|\.', '', name)
# Fetch data
with open(file_output, 'w') as file:
for d in data:
company_name = normalize(d['Nmemp'])
parameters["q"] = company_name
parameters["l"] = d['Lieupost']
# Don't try to fetch companies if they already failed once
if company_name not in companies_not_found or company_name not in errors.keys():
print(company_name)
response = requests.get(api_url_base, params=parameters, headers=headers).json()
time.sleep(0.2)
try:
if response and response['response']['employers']:
employers = sorted(response['response']['employers'], key=lambda k: k['overallRating'], reverse=True)
jobs.append({
"name": d['Nmemp'],
"job_title": d['Titpost'],
"glassdoor": employers,
"top_rating": employers[0]['overallRating']
})
else:
companies_not_found.append(company_name)
except Exception as E:
errors.update({company_name: str(response)})
# Successful results
file.write("\n%-140s %-100s\n" % ('ETS', 'TOP GLASSDOOR RESULT'))
file.write("%-40s %-100s %-10s %-10s %-10s %-50s %-50s\n\n" % ('Company', 'Job title', 'rating', '# ratings', '# results', 'Company', 'Website'))
for res in [r for r in sorted(jobs, key=lambda k: k['top_rating'], reverse=True)]:
file.write("%-40s %-100s %-10s %-10s %-10s %-50s %-50s\n" %
(
res['name'],
res['job_title'],
res['glassdoor'][0]['overallRating'],
res['glassdoor'][0]['numberOfRatings'],
len(res['glassdoor']),
res['glassdoor'][0]['name'],
res['glassdoor'][0]['website'])
)
# Companies not found
file.write("\n\n -- Not found -- \n")
for job_company_name in companies_not_found:
file.write("\n %s" % job_company_name)
# Errors
file.write("\n\n\n -- Errors -- \n")
for company_name, error in errors.items():
file.write("\n %s -> %s" % (company_name, error))
print('Done!')