-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
65 lines (50 loc) · 1.89 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import re
from collections import defaultdict
import logzero
import settings
from siemactk import notification, scraping, storage, wrangling
def init_logger():
console_logformat = (
'%(asctime)s '
'%(color)s'
'[%(levelname)-8s] '
'%(end_color)s '
'%(message)s '
'%(color)s'
'(%(filename)s:%(lineno)d)'
'%(end_color)s'
)
# remove colors on logfile
file_logformat = re.sub(r'%\((end_)?color\)s', '', console_logformat)
console_formatter = logzero.LogFormatter(fmt=console_logformat)
file_formatter = logzero.LogFormatter(fmt=file_logformat)
logzero.setup_default_logger(formatter=console_formatter)
logzero.logfile(
settings.LOGFILE,
maxBytes=settings.LOGFILE_SIZE,
backupCount=settings.LOGFILE_BACKUP_COUNT,
formatter=file_formatter,
)
return logzero.logger
def run():
logger = init_logger()
logger.info('Downloading dataref file...')
codelists, datasets_urls = storage.download_dataref()
logger.info('Converting codelists to dict mapping...')
codelists = wrangling.codelists_to_dict(codelists)
uploaded_files = defaultdict(list)
for dataset_url in datasets_urls:
logger.info(f'Downloading {dataset_url}')
if dataset := scraping.download_dataset(dataset_url):
logger.info(f'Staging {dataset}...')
if output_files := wrangling.stage_dataset(dataset, codelists):
logger.info('Uploading output files...')
for file in output_files:
download_url = storage.upload(file)
filename = file.name
uploaded_files[dataset.stem].append((filename, download_url))
logger.debug(f'{filename} -> {download_url}')
logger.info('Notifying results...')
notification.notify(uploaded_files)
if __name__ == '__main__':
run()