-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape.py
42 lines (32 loc) · 1.15 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import urllib, urllib2, sys , json
from BeautifulSoup import BeautifulSoup
def get_job_list(language, country_code):
language = urllib.quote(language)
country_code = urllib.quote(country_code)
url = "http://www.linkedin.com/jsearch?keywords=%s&searchLocationType=I&countryCode=%s&page_num=1&pplSearchOrigin=MDYS&sortCriteria=R#"% (language, country_code)
req = urllib2.urlopen(url)
page = req.read()
soup = BeautifulSoup(page)
total_number = soup.find("span", {"class" : "keywords"})
company_list = soup.findAll('h4')[1].parent.findAll('label')
final_list = []
for x in company_list:
final_list.append(x.contents)
return (final_list[1:-1], total_number)
def go():
language = "Python"
country_code = "us"
if len(sys.argv) == 3:
language = sys.argv[1]
country_code = sys.argv[2]
(lis, total)= get_job_list(language, country_code)
dic = {}
for x in lis:
sp = x[0].split()
new1 = "".join(sp[:-1])
new2 = "".join(sp[-1])[1:-1]
dic[new1] = new2
dic["Total"] = total.contents[2].contents[0]
js = json.JSONEncoder().encode(dic)
print js
go()