-
Notifications
You must be signed in to change notification settings - Fork 0
/
task3_reducer1.py
executable file
·58 lines (41 loc) · 1.51 KB
/
task3_reducer1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/python3
import sys
import collections
def read_map_output(file):
""" Return an iterator for key, value pair extracted from file (sys.stdin).
Input format: key \t value
Output format: (key, value)
"""
for line in file:
yield line.strip().split("\t", 1)
def reduce4():
data = read_map_output(sys.stdin)
#input city,number,year,tags
tag_dict = {}
current_city = ""
locality = []
for city, detail in data:
parts = detail.strip().split("\t")
number,year,tags = parts[0].strip(),parts[1].strip(),parts[2].strip()
#filter
if current_city != city:
tag_dict[city] = [number,{}]
current_city = city
parents = city.split(",")
parents = [item.strip() for item in parents]
locality = [item.lower() for item in parents]
photo_tags = tags.split()
if current_city in tag_dict:
locality.append(year)
for tag in photo_tags:
if tag not in locality:
tag_dict[city][1][tag.strip()] = tag_dict[city][1].get(tag.strip(),0) + 1
locality.pop()
for city,detail in tag_dict.items():
d = collections.Counter(detail[1])
tag_output = ""
for tag,value in d.most_common(10):
tag_output += " (" + tag + ":" + str(value) + ")"
print (city + "\t" + detail[0] + "\t" + tag_output)
if __name__ == "__main__":
reduce4()