-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathatepassar_visual.py
137 lines (97 loc) · 2.91 KB
/
atepassar_visual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#-*- coding:utf-8 -*-
from unicodedata import normalize
from pygeocoder import Geocoder
import pickle
import time
remove_acentos = lambda x: normalize('NFKD', x).encode('ASCII','ignore')
users = open('users.csv')
pk3 = open('geo_cities.pk3', 'rb')
pk4 = open('geo_cities.pk4', 'rb')
pk_final = open('atepassar_geo.pk', 'wb+')
geo = pickle.load(pk3)
tmp = pickle.load(pk3)
geo2 = pickle.load(pk4)
tmp2 = pickle.load(pk4)
print len(geo) + len(geo2)
geo.update(geo2)
user_friends = open('user_friends.csv')
user_followers = open('users_followers.csv')
user_cities = {}
user_fr = {}
user_fo = {}
for usr in user_friends:
usr = usr.strip().split(';')
usr, friends = usr[0], usr[1:]
friends = map(int, filter(lambda x: x not in [''], friends))
user_fr[int(usr)] = friends + [int(usr)]
#print len(user_fr)
#print user_fr[1]
for usr in user_followers:
usr = usr.strip().split(';')
usr, followers = usr[0], usr[1:]
followers = map(int, filter(lambda x: x not in [''], followers))
user_fo[int(usr)] = followers + [int(usr)]
#print len(user_fo)
#print user_fo[1]
mutual_friends = user_fr.items() + user_fo.items()
final_sn = {}
for user,relationships in mutual_friends:
final_sn.setdefault(user,[])
final_sn[user].extend(relationships)
final_sn[user] = list(set(final_sn[user]))
#print len(final_sn)
#print final_sn[1]
'''
user_cities = {}
for user in users:
user = user.strip()
user_id, city, state, abbr = user.split(';')
city = remove_acentos(city.decode('utf-8').lower()).title()
user_cities.setdefault((city, state, abbr), [])
user_cities[(city, state, abbr)].append(user_id)
print len(user_cities)
'''
atepassar_users = {}
for user in geo:
if user == 39:
continue #mascote..no!
print user
friends = final_sn[user]
print user, friends
for friend in friends:
try:
atepassar_users.setdefault((geo[user], geo[friend]), 0)
atepassar_users[(geo[user], geo[friend])] +=1
except KeyError:
print friend
print atepassar_users.items()[-1]
pickle.dump(atepassar_users, pk_final)
pk_final.close()
'''
cities_user = {}
temp_cities = []
for city, state, abbr in user_cities:
achou = False
for user in user_cities[(city, state, abbr)]:
if (city, state, abbr, user) in tmp:
print 'achei'
achou = True
if achou:
continue
endereco = '%s, %s, Brasil' % (city, state)
results = Geocoder.geocode(endereco)
coords, end = results[0].coordinates, results[0]
print coords, end
for user in user_cities[(city, state, abbr)]:
cities_user[int(user)] = coords
temp_cities.append((city, state, abbr, user))
pickle.dump(cities_user, pk4)
pickle.dump(temp_cities, pk4)
pk4.close()
'''
'''
for city, state, abbr in user_cities:
if len(user_cities[(city, state, abbr)]) <= 1:
count+=1
print count / float(len(user_cities))
'''