-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_in_osaka_users_trajectories_2011.py
71 lines (58 loc) · 2.36 KB
/
get_in_osaka_users_trajectories_2011.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python
# encoding: utf-8
import csv
import os
import time
def filter_users_in_region(filename, lat_min, lat_max, lon_min, lon_max):
user_set = dict({})
with open(filename, 'r') as f:
# cnt = 0
for uid_str, time_str, lat_str, lon_str, _, _, _ in csv.reader(f):
# cnt += 1
# print cnt
uid = int(uid_str[3:])
lat = float(lat_str) / 3600000.0
lon = float(lon_str) / 3600000.0
if lat > lat_min and lat < lat_max and lon > lon_min and lon < lon_max:
if uid not in user_set:
user_set[uid] = 1
else:
user_set[uid] += 1
return user_set
def output_traj(full_path, out_folder, out_filename, user_set):
with open(full_path, 'r') as fin:
with open(os.path.join(out_folder, out_filename), 'w') as fout:
for uid_str, time_str, lat_str, lon_str, _, _, _ in csv.reader(fin):
uid = int(uid_str[3:])
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.strptime(time_str, '%Y%m%d%H%M%S'))
lat = float(lat_str) / 3600000.0
lon = float(lon_str) / 3600000.0
if uid in user_set:
if user_set[uid] > 5:
fout.write('{},{},{},{}\n'.format(uid_str, time_str, lat, lon))
else:
del user_set[uid]
def filename_generator(folder_path):
filename_fmt = '2010{:02d}{:02d}.csv'
cnt = 0
for m in xrange(10, 11):
for d in xrange(1, 32):
if cnt >= 0:
filename = filename_fmt.format(m, d)
full_path = os.path.join(folder_path, filename)
print 'Reading {}'.format(full_path)
if os.path.isfile(full_path):
yield filename, full_path
cnt += 1
def main():
lat_min = 34.4416666667
lat_max = 34.8416666667
lon_min = 135.3 + 1e-10
lon_max = 135.7 + 1e-10
folder_path = '/media/fan/65D42DD030E60A2D/ZDC/2010/ZDC/'
for filename, full_path in filename_generator(folder_path):
user_set = filter_users_in_region(full_path, lat_min, lat_max, lon_min, lon_max)
out_folder = '/home/fan/work/data/UsersInOsaka_2010/'
output_traj(full_path, out_folder, filename, user_set)
if __name__ == '__main__':
main()