-
Notifications
You must be signed in to change notification settings - Fork 0
/
bg_download_data.py
executable file
·630 lines (498 loc) · 24.1 KB
/
bg_download_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
"""
Pulls smart trap data from the Biogents API.
Pulls smart trap data in JSON format from the Biogents API given the API
key and the desired timeframe. By default, this script shows a
graphical display of its progress. This display shows a progress bar
for each trap detected in the initial request. The left side of the
graph marks the beginning of the target timeframe and the right side the
end. Each trap's bar represents the most recent data that has been
collected for that trap.
For usage information, run with -h.
This script requires at least Python 3.5.
"""
import argparse
import collections
import curses
import json
import math
import os
import re
import time
import datetime as dt
import requests
import bg_common as com
def parse_args():
"""Parse the command line arguments and return an args namespace."""
parser = argparse.ArgumentParser(description='Pulls smart trap data.')
parser.add_argument('api_key', help='The 32-character API key taken from the Biogents user '
'dashboard. Keep dashes.')
parser.add_argument('start_time', type=com.parse_date,
help='Beginning of the target timeframe. Acceptable time formats ("T" is '
'literal): "YYYY-MM-DD", "YYYY-MM-DDTHH-MM", "YYYY-MM-DDTHH-MM-SS"')
parser.add_argument('end_time', type=com.parse_date,
help='End of the target timeframe. Same acceptable formats as above.')
parser.add_argument('-p', '--pretty-print', action='store_const', const=4, default=None,
help='Pretty print to file.')
parser.add_argument('-t', '--trap', metavar='TRAP_ID', dest='target_traps', nargs='*',
type=valid_trap_id, help='Only get data for particular traps based on '
'their trap IDs.')
parser.add_argument('--skip-empty', action='store_true',
help="Don't write traps with no data to file.")
parser.add_argument('--no-display', dest='display', action='store_false',
help="Don't show the graphical display.")
parser.add_argument('--split-traps', action='store_true',
help='Write each trap into a separate file.')
output_group_wrapper = parser.add_argument_group('output arguments',
'Must specify exactly one of the following.')
output_group = output_group_wrapper.add_mutually_exclusive_group(required=True)
output_group.add_argument('-d', '--dry', action='store_true', help="Don't write to file.")
output_group.add_argument('-o', '--output',
help='The name of the output file. If --split-traps is set, each '
'separate file will end in "_X", where X is a unique integer.')
output_group.add_argument('-n', '--nested-directories', action='store_true',
help='Instead of writing to a file in the current directory, write '
'to ./smart-trap-json/[API Key]/. The filename will be '
'[Start Time]_[End Time].json. If --split-traps is set, write '
'to ./smart-trap-json/[API Key]/[Trap ID]/. Each filename '
'will be [Trap ID]_[Start Time]_[End Time].json')
args = parser.parse_args()
if not args.start_time < args.end_time:
parser.error('End time must be after start time')
if dt.datetime.now() - args.end_time < dt.timedelta(days=1):
print('Warning: Attempting to get data from within the past 24 hours (or in the future)\n'
' may yield incomplete datasets. Continuing in 5 seconds.')
time.sleep(5)
del args.nested_directories
return args
def download_data(stdscr, api_key, start_time, end_time, output, target_traps=None,
split_traps=False, skip_empty=False, pretty_print=None, dry=False):
"""Download smart trap data over a specific timeframe.
Required arguments:
stdscr -- The curses screen object on which to show the graphical
display. 'None' can be passed to indicate no graphical display.
api_key -- The 32-character API key taken from the Biogents user
dashboard. Keep dashes.
start_time -- A datetime object representing the beginning of the
timeframe to get data over.
end_time -- A datetime object representing the end of the timeframe
to get data over.
output -- A string containing the desired name of the output file.
If 'None' is passed, the file will be placed within a directory
structure described in the documentation for the
--nested-directories option above.
Optional arguments:
target_traps -- A container holding one or more string trap IDs
to exclusively get data for. All other traps will be ignored.
If not all target traps are found, a ValueError will be thrown.
split_traps -- A boolean controlling whether to keep all traps'
data in one file or to split each trap's data into a separate
file.
skip_empty -- A boolean controlling whether to list traps in the
output file that did not provide any capture data.
pretty_print -- A boolean controlling whether to add indentation
to the final JSON output.
dry -- Pass True to prevent the script from creating any files.
"""
# The limit of data points (captures) per trap the API can deliver.
limit = 1000
if stdscr:
# Get screen size.
max_y, max_x = stdscr.getmaxyx()
# Check whether the window is big enough.
if max_x < 40:
raise ValueError('Window is too narrow. Please widen to at least 40 columns '
'or use --no-display.')
# Set up stuff related to curses.
curses.curs_set(False)
stdscr.nodelay(True)
graph_width = max_x - 35
duration = end_time - start_time
gradation = duration / graph_width
curses.init_pair(1, curses.COLOR_BLACK, curses.COLOR_WHITE)
curses.init_pair(2, curses.COLOR_BLACK, curses.COLOR_GREEN)
# Will contain the y-coordinate of each trap's line
# in the display.
trap_ys = {}
# Create the pad we will draw on.
pad = Pad(stdscr, max_y, max_x)
else:
pad = None
# Perform a request on the full duration first.
js = request_data(api_key, start_time, end_time, stdscr)
# Keys will be traps with incomplete data and values will be
# their most recent timestamps.
incomplete_traps = {}
# Will contain the JSON objects for each individual trap.
trap_data = {}
if stdscr:
# Determine whether the pad needs to be resized.
num_traps = len(js['traps'])
min_height = 8 + 2*num_traps
if min_height > max_y:
pad.resize(min_height, max_x)
# Move cursor to prepare for horizontal line drawing.
pad.move(3, 2)
# Loop over each trap.
for trap_wrapper in js['traps']:
trap_id = trap_wrapper['Trap']['id']
# If no trap was specified or this trap was specified,
# grab its data.
if not target_traps or trap_id in target_traps:
captures = trap_wrapper['Capture']
num_captures = len(captures)
trap_data[trap_id] = trap_wrapper
if stdscr:
# Draw horizontal lines.
y, x = pad.getyx()
y += 2
pad.move(y, 2)
trap_ys[trap_id] = y
pad.addstr(trap_id)
pad.hline(y, 21, '-', graph_width)
# If we get limit capture for this trap, most likely we hit
# the max and the trap has more data that wasn't delivered.
if num_captures == limit:
# Store the most recent timestamp in incomplete_traps.
ending_timestamp = captures[-1]['timestamp_end']
ending_datetime = dt.datetime.strptime(ending_timestamp, '%Y-%m-%d %H:%M:%S')
incomplete_traps[trap_id] = ending_datetime
if stdscr:
# Draw a white line to show the data
# we currently have and print the number of
# captures (limit) to the right of the line.
position = date_to_position(ending_datetime, start_time, gradation)
pad.hline(trap_ys[trap_id], 21, ' ', position, curses.color_pair(1))
pad.addstr(y, 23 + graph_width, str(num_captures))
else:
percentage = date_to_percentage(ending_datetime, start_time, end_time)
print('Trap {}: {}% complete.'.format(trap_id, percentage))
# If the API is ever improved so that it can deliver more
# than limit captures, we'll have to modify this script.
elif num_captures > limit:
raise ValueError('More than {} (limit) captures for a trap: {}.'
.format(limit, num_captures))
# Else, assume that we got all captures.
else:
if stdscr:
pad.addstr(y, 23 + graph_width, 'Done')
pad.hline(y, 21, ' ', graph_width, curses.color_pair(2))
else:
print('Trap {}: 100% complete.'.format(trap_id))
# If traps were specified, check to see if they're all there.
if target_traps:
diff = set(target_traps) - set(trap_data.keys())
if diff:
raise ValueError('Trap ID(s) not found in response: ' + ', '.join(diff))
if stdscr:
# Draw vertical lines.
pad.vline(3, 20, '|', 2*len(trap_ys) + 3)
pad.vline(3, 21 + graph_width, '|', 2*len(trap_ys) + 3)
pad.addch(2, 20, '+')
pad.addch(2*len(trap_ys) + 6, 20, '+')
pad.refresh()
# Loop while there are still traps with more data.
while incomplete_traps:
# Find which ending timestamp is the earliest.
ending_dates = list(incomplete_traps.values())
earliest_date = ending_dates[0]
for date in ending_dates:
if date < earliest_date:
earliest_date = date
if stdscr:
# Move the tracking line to the earliest time.
position = date_to_position(earliest_date, start_time, gradation)
erase_tracking_line(graph_width, trap_ys, pad)
draw_tracking_line(position, trap_ys, pad)
# Perform a request from the earliest time to the original
# end time with a second delay so we don't overload the server.
time.sleep(1)
new_js = request_data(api_key, earliest_date, end_time, pad)
# Turn all previous data green.
if stdscr:
for trap_id, ending_date in incomplete_traps.items():
position = date_to_position(ending_date, start_time, gradation)
pad.hline(trap_ys[trap_id], 21, ' ', position, curses.color_pair(2))
pad.refresh()
for trap_wrapper in new_js['traps']:
trap_id = trap_wrapper['Trap']['id']
if trap_id in incomplete_traps:
captures = trap_wrapper['Capture']
num_captures = len(captures)
num_new_captures = 0
if stdscr:
# Get the last timestamp and draw a white bar
# for all new data.
y = trap_ys[trap_id]
last_ending_position = date_to_position(incomplete_traps[trap_id], start_time,
gradation)
ending_timestamp = captures[-1]['timestamp_end']
ending_datetime = dt.datetime.strptime(ending_timestamp, '%Y-%m-%d %H:%M:%S')
position = date_to_position(ending_datetime, start_time, gradation)
pad.hline(y, last_ending_position + 21, ' ', position - last_ending_position,
curses.color_pair(1))
pad.refresh()
# Check if there are more than limit captures
# for the same reason as before.
if num_captures > limit:
raise ValueError('More than {} (limit) captures for a trap: {}.'
.format(limit, num_captures))
# Loop through the captures.
for i in range(num_captures):
capture = captures[i]
timestamp_start = com.make_datetime(capture['timestamp_start'])
timestamp_end = com.make_datetime(capture['timestamp_end'])
# If this capture has valid timestamps and has
# a timestamp after the most recent timestamp
# for this trap, this and all following captures
# are new data.
if (timestamp_start and timestamp_end
and timestamp_start >= incomplete_traps[trap_id]):
# Add the new data to the JSON object.
new_captures = captures[i:]
num_new_captures = len(new_captures)
trap_data[trap_id]['Capture'].extend(new_captures)
# Update the most recent timestamp
# in incomplete_traps.
ending_timestamp = new_captures[-1]['timestamp_end']
ending_datetime = com.make_datetime(ending_timestamp)
# If the last timestamp_end for a trap in
# a request is empty, this might mean that all
# its timestamp_ends are empty, which is
# a problem.
if not ending_datetime:
raise ValueError('Last ending timestamp is empty at capture ID: '
+ capture['id'])
incomplete_traps[trap_id] = ending_datetime
# No need to loop through the rest
# of the captures.
break
# If less than limit captures were delivered, assume
# we now have all the data.
if num_captures < limit:
del incomplete_traps[trap_id]
if stdscr:
pad.hline(y, 23 + graph_width, ' ', 6)
pad.addstr(y, 23 + graph_width, 'Done')
pad.hline(y, 21, ' ', graph_width, curses.color_pair(2))
else:
print('Trap {}: 100% complete.'.format(trap_id))
else:
if stdscr:
# Print the number of new captures
# to the right of the trap's line.
pad.hline(y, 23 + graph_width, ' ', 6)
pad.addstr(y, 23 + graph_width, '+' + str(num_new_captures))
else:
percentage = date_to_percentage(incomplete_traps[trap_id],
start_time, end_time)
print('Trap {}: {}% complete. ({} new captures)'
.format(trap_id, percentage, num_new_captures))
if stdscr:
# Now that we're done, move the tracking line to the end.
erase_tracking_line(graph_width, trap_ys, pad)
draw_tracking_line(graph_width, trap_ys, pad)
# Unless we're doing a dry run, write the JSON objects to file.
if not dry:
write_to_file(trap_data, api_key, start_time, end_time, output,
split_traps, skip_empty, pretty_print, pad)
if stdscr:
print_status('Finished!', pad)
time.sleep(1.5)
else:
print('Finished.')
def write_to_file(trap_data, api_key, start_time, end_time, output,
split_traps=False, skip_empty=False, pretty_print=None, screen=None):
"""Write smart trap JSON data to file.
Required arguments:
trap_data -- The dict containing the smart trap data.
api_key -- The 32-character API key taken from the Biogents user
dashboard. Used only if the output option isn't provided.
start_time -- A datetime object representing the beginning of the
timeframe that contains this data.
end_time -- A datetime object representing the end of the timeframe
that contains this data.
output -- A string containing the desired name of the output file.
If 'None' is passed, the file will be placed within a directory
structure described in the documentation for the
--nested-directories option above.
Optional arguments:
split_traps -- A boolean controlling whether to keep all traps'
data in one file or to split each trap's data into a separate
file.
skip_empty -- A boolean controlling whether to list traps in the
output file that did not provide any capture data.
pretty_print -- A boolean controlling whether to add indentation
to the output files.
screen -- The curses screen object on which to print the status.
'None' indicates that a curses screen is not being used.
"""
# Write trap data to file.
if screen:
print_status('Writing to file...', screen)
else:
print('Writing to file...')
date_fmt = '%Y-%m-%d'
midnight = dt.time() # dt.time() defaults to 00:00:00.
# If either time contains some non-zero value,
# we'll print both fully.
if start_time.time() != midnight or end_time.time() != midnight:
date_fmt += 'T%H-%M-%S'
if split_traps:
i = 0
for trap_id, trap_wrapper in trap_data.items():
# Don't write if we're skipping empty
# and there are no captures.
if not (skip_empty and not trap_wrapper['Capture']):
if output:
filename = '{}_{}'.format(output, i)
path = './' + filename
else:
dir_path = './smart-trap-json/{}/{}'.format(api_key, trap_id)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
filename = '{}_{}_{}.json'.format(trap_id, start_time.strftime(date_fmt),
end_time.strftime(date_fmt))
path = '{}/{}'.format(dir_path, filename)
with open(path, 'w') as f:
trap_obj = {'traps': [trap_wrapper]}
json.dump(trap_obj, f, indent=pretty_print)
i += 1
else:
trap_obj = {'traps': []}
for trap_wrapper in trap_data.values():
if not (skip_empty and not trap_wrapper['Capture']):
trap_obj['traps'].append(trap_wrapper)
if output:
path = './' + output
else:
dir_path = './smart-trap-json/' + api_key
if not os.path.exists(dir_path):
os.makedirs(dir_path)
filename = '{}_{}.json'.format(start_time.strftime(date_fmt),
end_time.strftime(date_fmt))
path = '{}/{}'.format(dir_path, filename)
with open(path, 'w') as f:
json.dump(trap_obj, f, indent=pretty_print)
def valid_trap_id(string):
"""Check a string to see if it's a valid trap ID."""
if not re.match('^[0-9]{15}$', string):
raise argparse.ArgumentTypeError('Invalid trap ID: ' + string)
return string
def request_data(api_key, start_time, end_time, screen):
"""Send a request for smart trap data and return data as a dict."""
if screen:
print_status('Performing request...', screen)
else:
print('Performing request...')
url = 'http://live.bg-counter.com/traps/exportTrapCapturesForTimeFrame.json'
data = {
'data[user_id]': api_key,
'data[startTime]': start_time.isoformat(' '),
'data[endTime]': end_time.isoformat(' ')
}
response = requests.post(url, data)
response.raise_for_status()
try:
js = json.loads(response.text, object_pairs_hook=collections.OrderedDict)
except json.JSONDecodeError:
if screen:
curses.endwin()
print("Response text:\n'" + response.text + "'\n")
raise
if screen:
print_status('Done.', screen)
else:
print('Done.')
return js
def date_to_position(datetime, start_time, gradation):
"""Return the graph position that a datetime maps to."""
return math.floor((datetime-start_time) / gradation)
def date_to_percentage(datetime, start_time, end_time):
"""Return the percentage of the timeframe a datetime represents."""
return math.floor(((datetime-start_time) / (end_time-start_time)) * 100)
def draw_tracking_line(position, trap_ys, screen):
"""Draw the tracking line at the given position on the graph."""
position += 21
screen.addch(2, position, '+')
screen.vline(3, position, '|', 2)
for trap_id, y in trap_ys.items():
screen.addch(y + 1, position, '|')
screen.addch(2*len(trap_ys) + 5, position, '|')
screen.addch(2*len(trap_ys) + 6, position, '+')
screen.refresh()
def erase_tracking_line(graph_width, trap_ys, screen):
"""Erase the tracking line."""
screen.hline(2, 20, ' ', graph_width + 2)
screen.hline(3, 21, ' ', graph_width)
screen.hline(4, 21, ' ', graph_width)
for trap_id, y in trap_ys.items():
screen.hline(y + 1, 21, ' ', graph_width)
screen.hline(2*len(trap_ys) + 5, 21, ' ', graph_width)
screen.hline(2*len(trap_ys) + 6, 20, ' ', graph_width + 2)
screen.refresh()
def print_status(string, screen):
"""Print the given string at the top left of the screen."""
screen.hline(1, 2, ' ', 30)
screen.addstr(1, 2, string)
screen.refresh()
class Pad:
"""Create and manage a pad potentially larger than the screen size.
This class encapsulates curses' window class to create a pad that
behaves well even if it's larger than the screen size, including
providing basic scrolling functionality.
Public methods:
resize
check_input
scroll
refresh
"""
def __init__(self, stdscr, nlines, ncols):
"""Initialize the instance and create the main pad object."""
self.stdscr = stdscr
self.pad = curses.newpad(nlines, ncols)
self.scr_height, self.scr_width = stdscr.getmaxyx()
self.top_shown = self.max_top_shown = 0
def __getattr__(self, item):
"""Implement encapsulation.
This method implements the encapsulation of the curses window
object by routing all undefined attributes to the internal pad.
item -- The attribute that was called.
"""
return getattr(self.pad, item)
def resize(self, nlines, ncols):
"""Resize the pad."""
self.pad.resize(nlines, ncols)
self.max_top_shown = nlines - self.scr_height
def check_input(self):
"""Read keyboard input and perform the appropriate action."""
pages = 0
code = self.stdscr.getch()
# Find the cumulative result of the key presses.
while code >= 0:
if code == curses.KEY_UP:
pages -= 1
elif code == curses.KEY_DOWN:
pages += 1
code = self.stdscr.getch()
if pages != 0:
self.scroll(pages)
def scroll(self, num_pages):
"""Scroll the pad some number of pages.
Note that a positive num_pages scrolls down while a negative
scrolls up.
"""
self.top_shown = self.top_shown + num_pages*self.scr_height
self.top_shown = max(self.top_shown, 0)
self.top_shown = min(self.top_shown, self.max_top_shown)
def refresh(self):
"""Refresh the pad after checking for input."""
self.check_input()
self.pad.refresh(self.top_shown, 0, 0, 0, self.scr_height - 1, self.scr_width)
if __name__ == '__main__':
args = vars(parse_args())
display = args['display']
del args['display']
if display:
curses.wrapper(download_data, **args)
else:
download_data(None, **args)