forked from kokes/od
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
68 lines (56 loc) · 2.65 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import csv
import codecs
import os
import ssl
import zipfile
from datetime import datetime
from urllib.request import urlretrieve
from tempfile import TemporaryDirectory
header = {
'Evidenční číslo dotace': 'evidencni_cislo_dotace',
'Identifikator dotace': 'identifikator_dotace',
'Název dotace': 'nazev_dotace',
'Účastník': 'ucastnik',
'IČ účastníka': 'ic_ucastnika',
' Účel dotace ': 'ucel_dotace',
'Poskytovatel dotace': 'poskytovatel_dotace',
'IČ poskytovatele': 'ic_poskytovatele',
'Částka požadovaná': 'castka_pozadovana',
'Částka schválená': 'castka_schvalne',
'Datum poskytnutí dotace': 'datum_poskytnuti',
}
def main(outdir: str, partial: bool = False):
ssl._create_default_https_context = ssl._create_unverified_context
with TemporaryDirectory() as tmpdir:
rawpath = os.path.join(tmpdir, "raw.zip")
urlretrieve('https://data.mfcr.cz/sites/default/files/DotInfo_report_29_01_2020.zip', rawpath)
with zipfile.ZipFile(rawpath) as zf, zf.open('DotInfo_report_29_01_2020.csv') as f, open(os.path.join(outdir, 'dotace.csv'), 'w', encoding='utf8') as fw:
ut = codecs.iterdecode(f, encoding='cp1250')
cr = csv.DictReader(ut, delimiter=';')
cw = csv.DictWriter(fw, fieldnames=header.values())
cw.writeheader()
exphd = set(header.keys())
for j, row in enumerate(cr):
if partial and j > 1e3:
break
if j == 0:
rem = set(row.keys()) - exphd
if rem:
print('vynechavame sloupce: ', rem)
row = {k: None if v == 'NULL' else v for k, v in row.items()}
remapped = {header[k]: v for k, v in row.items() if k in header}
if remapped['datum_poskytnuti']:
try:
datetime.fromisoformat(remapped['datum_poskytnuti'])
except ValueError:
print('nevalidni datum', remapped['datum_poskytnuti'])
remapped['datum_poskytnuti'] = None
if remapped['ic_ucastnika'] and not remapped['ic_ucastnika'].isdigit():
print('nevalidni ICO', remapped['ic_ucastnika'])
remapped['ic_ucastnika'] = None
if remapped['ic_poskytovatele'] and not remapped['ic_poskytovatele'].isdigit():
print('nevalidni ICO', remapped['ic_poskytovatele'])
remapped['ic_poskytovatele'] = None
cw.writerow(remapped)
if __name__ == '__main__':
main(".")