-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
131 lines (120 loc) · 5.67 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import argparse
import logging
from consensus_decentralization.aggregate import aggregate
from consensus_decentralization.map import apply_mapping
from consensus_decentralization.analyze import analyze
from consensus_decentralization.parse import parse
from consensus_decentralization.plot import plot
import consensus_decentralization.helper as hlp
logging.basicConfig(format='[%(asctime)s] %(message)s', datefmt='%Y/%m/%d %I:%M:%S %p', level=logging.INFO)
def main(projects, timeframe, aggregate_by, force_map, make_plots, make_animated_plots, output_dir=hlp.OUTPUT_DIR):
"""
Executes the entire pipeline (parsing, mapping, analyzing) for some projects and timeframes.
:param projects: list of strings that correspond to the ledgers whose data should be analyzed
:param timeframe: tuple of (start_date, end_date) where each date is a datetime.date object.
:param aggregate_by: string that corresponds to the granularity that will be used for the analysis. It can be one
of: day, week, month, year, all.
:param force_map: bool. If True, then the parsing and mapping will be performed, regardless of whether
mapped data for some or all of the projects already exist
:param make_plots: bool. If True, then plots are generated and saved for the results
:param make_animated_plots: bool. If True (and make_plots also True) then animated plots are also generated.
Warning: generating animated plots might take a long time
:param output_dir: pathlib.PosixPath object of the directory where the output data will be saved
"""
logging.info(f"The ledgers that will be analyzed are: {','.join(projects)}")
for project in projects:
project_dir = output_dir / project
project_dir.mkdir(parents=True, exist_ok=True) # create project output directory if it doesn't already exist
mapped_data_file = project_dir / 'mapped_data.json'
if force_map or not mapped_data_file.is_file():
parsed_data = parse(project=project, input_dir=hlp.RAW_DATA_DIR)
mapped_data = apply_mapping(project=project, parsed_data=parsed_data, output_dir=output_dir)
else:
mapped_data = None
aggregate(
project=project,
output_dir=output_dir,
timeframe=timeframe,
aggregate_by=aggregate_by,
force_aggregate=force_map,
mapped_data=mapped_data
)
used_metrics = analyze(
projects=projects,
aggregated_data_filename=hlp.get_blocks_per_entity_filename(aggregate_by=aggregate_by, timeframe=timeframe),
output_dir=output_dir
)
if make_plots:
plot(
ledgers=projects,
metrics=used_metrics,
aggregated_data_filename=hlp.get_blocks_per_entity_filename(aggregate_by=aggregate_by, timeframe=timeframe),
animated=make_animated_plots
)
if __name__ == '__main__':
default_ledgers = hlp.get_default_ledgers()
start_date, end_date = hlp.get_default_start_end_dates()
parser = argparse.ArgumentParser()
parser.add_argument(
'--ledgers',
nargs="*",
type=str.lower,
default=default_ledgers,
choices=default_ledgers,
help='The ledgers that will be analyzed.'
)
parser.add_argument(
'--timeframe',
nargs="*",
type=hlp.valid_date,
default=[start_date, end_date],
help='The timeframe that will be analyzed. You can provide two values to mark the beginning and end of the '
'time frame or a single value that encapsulates both.'
)
parser.add_argument(
'--aggregate-by',
nargs="?",
type=str.lower,
default='month',
choices=['day', 'week', 'month', 'year', 'all'],
help='The granularity that will be used for the analysis. It can be one of: "day", "week", "month", "year", '
'"all" and by default it is month. Note that in the case of weekly aggregation, we consider a week to '
'be 7 consecutive days, starting from the first day of the time period under consideration (so not '
'necessarily Monday to Sunday). If "all" is chosen then no aggregation will be performed, meaning that '
'the given timeframe will be treated as one unit of time in our analysis.'
)
parser.add_argument(
'--force-map',
action='store_true',
help='Flag to specify whether to map the parsed data, regardless if the mapped data files exist.'
)
parser.add_argument(
'--plot',
action='store_true',
help='Flag to specify whether to produce and save plots of the results.'
)
parser.add_argument(
'--animated',
action='store_true',
help='Flag to specify whether to also generate animated plots.'
)
args = parser.parse_args()
aggregate_by = args.aggregate_by
timeframe = args.timeframe
if len(timeframe) > 2:
parser.error('Too many values given for --timeframe argument. Please provide one date to get a snapshot or '
'two dates to get a time series.')
timeframe_start = hlp.get_timeframe_beginning(timeframe[0])
timeframe_end = hlp.get_timeframe_end(timeframe[-1])
if timeframe_end < timeframe_start:
parser.error('Invalid --timeframe values. Please note that if providing a second date, it must occur after '
'the first date.')
main(
projects=args.ledgers,
timeframe=(timeframe_start, timeframe_end),
aggregate_by=aggregate_by,
force_map=args.force_map,
make_plots=args.plot,
make_animated_plots=args.animated
)
logging.info('Done. Please check the output directory for results.')