-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhdf_iterate_test.py
38 lines (27 loc) · 982 Bytes
/
hdf_iterate_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import sys
print(sys.version)
import pandas as pd
import datetime
import functools
def row_to_xml(row, file):
xml = ['<item>']
for col in row:
print(col)
# with open(filename, mode) as f:
# f.write(res)
file.write('\n'.join(xml))
def process_hdf(filename):
print(datetime.datetime.now())
hdf = pd.HDFStore(filename, "r")
n = 0
for c in hdf.select(hdf.keys()[0], where=('infection_state == ["infectious", "latent", "susceptible", "recovered"]'), chunksize=10000, iterator=True):
l = c.index.names.index('simulator_time')
l2 = c.index.names.index('infection_state')
if n == 0:
r = c.groupby(level=[l, l2]).sum()
else:
r = r.add(c.groupby(level=[l, l2]).sum(), fill_value=0)
n += 1
r.to_csv("/Users/nem41/Documents/apollo/output/test2.csv", sep=',')
print(datetime.datetime.now())
process_hdf('/Users/nem41/Documents/apollo/output/R0.1.4.apollo.h5.04.01.16')