Skip to content

Commit a79a092

Browse files
committed
reorganize datascripts #34
1 parent a5e1f25 commit a79a092

21 files changed

+1134
-1146
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,7 @@ yarn-error.log*
3232

3333

3434
# unwanted tests for now
35-
/src/components/LinearGraphBlock/
35+
/src/components/LinearGraphBlock/
36+
37+
38+
__pycache__

datascripts/decline.py

Lines changed: 0 additions & 108 deletions
This file was deleted.

datascripts/lib.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
2+
import requests
3+
import csv
4+
import os
5+
import coloredlogs, logging
6+
7+
logger = logging.getLogger(__name__)
8+
coloredlogs.install(level='DEBUG')
9+
10+
def get_online_csv(url):
11+
"""
12+
Cette fonction permet de récupérer le contenu d'un csv en ligne.
13+
Pour les google spreadsheets: fichier > publier sur le web > format csv > copier le lien
14+
"""
15+
results = []
16+
with requests.Session() as s:
17+
download = s.get(url)
18+
decoded_content = download.content.decode('utf-8')
19+
reader = csv.DictReader(decoded_content.splitlines(), delimiter=',')
20+
for row in reader:
21+
results.append(row)
22+
return results
23+
24+
25+
def ensure_dir(path):
26+
if not os.path.exists(path):
27+
os.makedirs(path)
28+
29+
def write_csv(filename, data):
30+
logger.debug('write csv | ' + filename)
31+
parts = filename.split('/')
32+
if len(parts) > 1:
33+
folder = parts[0]
34+
folder_path = "../public/data/" + folder
35+
ensure_dir(folder_path)
36+
final_path = "../public/data/" + filename;
37+
with open(final_path, "w") as of:
38+
output_csv = csv.DictWriter(
39+
of, data[0].keys())
40+
output_csv.writeheader()
41+
output_csv.writerows(data)

datascripts/part_1.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
from collections import defaultdict
2+
import csv
3+
from typing import Counter, DefaultDict
4+
import os
5+
from lib import ensure_dir, logger
6+
7+
8+
logger.info('start | part 1 main viz datasets')
9+
10+
def output_row(region, year, region_trade, region_products, total_trade):
11+
sum_imports = sum(value.get(
12+
'Imports') for value in region_products[year].values() if value.get('Imports'))
13+
sum_exports = sum(value.get(
14+
'Exports') for value in region_products[year].values() if value.get('Exports'))
15+
return {
16+
'region': region,
17+
'year': year,
18+
'Exports': region_trade[year].get('Exports'),
19+
'Imports': region_trade[year].get('Imports'),
20+
'Imports_share': region_trade[year]['Imports']/total_trade['Imports'] if region_trade[year].get('Imports') else None,
21+
'Exports_share': region_trade[year]['Exports']/total_trade['Exports'] if region_trade[year].get('Exports') else None,
22+
'product_revolutionempire_imports_herfindahl': sum(pow(value['Imports']/sum_imports, 2) for value in region_products[year].values()) if sum_imports != 0 else None,
23+
'product_revolutionempire_exports_herfindahl': sum(pow(value['Exports']/sum_exports, 2) for value in region_products[year].values()) if sum_exports != 0 else None,
24+
'product_revolutionempire_total_herfindahl': sum(pow((value['Imports']+value['Exports'])/(sum_imports + sum_exports), 2) for value in region_products[year].values()) if sum_imports != 0 or sum_exports != 0 else None
25+
}
26+
27+
28+
with open('../data/toflit18_all_flows.csv', 'r') as f:
29+
toflit18_flows = csv.DictReader(f)
30+
flows_fieldnames = toflit18_flows.fieldnames
31+
32+
france_trade = DefaultDict(Counter)
33+
LaRochelle_trade = DefaultDict(Counter)
34+
Bordeaux_trade = DefaultDict(Counter)
35+
36+
Bordeaux_products = DefaultDict(
37+
lambda: DefaultDict(Counter))
38+
LaRochelle_products = DefaultDict(
39+
lambda: DefaultDict(Counter))
40+
LaRochelle_partners = DefaultDict(
41+
lambda: DefaultDict(Counter))
42+
for flow in toflit18_flows:
43+
# ATTENTION we filter out Ports Francs
44+
if flow['partner_grouping'] == 'France':
45+
continue
46+
# longitudinal absolute and share trade
47+
year = flow['year'].split(
48+
'.')[0] if "." in flow['year'] else flow['year']
49+
if flow['best_guess_national_partner'] == "1" and flow['value'] != "":
50+
france_trade[year][flow['export_import']
51+
] += float(flow['value'])
52+
if flow['best_guess_region_prodxpart'] == "1" and flow['value'] != "":
53+
try:
54+
55+
if flow['customs_region'] == "La Rochelle":
56+
LaRochelle_trade[year
57+
][flow['export_import']] += float(flow['value'])
58+
if flow['customs_region'] == "Bordeaux":
59+
Bordeaux_trade[year
60+
][flow['export_import']] += float(flow['value'])
61+
except ValueError as e:
62+
logger.warning(flow['value'])
63+
except KeyError as e:
64+
pass
65+
# product and partner top for La Rochelle
66+
if flow['best_guess_region_prodxpart'] == "1" and flow['value'] != "":
67+
if flow['customs_region'] == "La Rochelle":
68+
LaRochelle_products[year][flow['product_revolutionempire']
69+
][flow['export_import']] += float(flow['value'])
70+
LaRochelle_partners[year
71+
][flow['partner_simplification']][flow['export_import']] += float(flow['value'])
72+
if flow['customs_region'] == "Bordeaux":
73+
Bordeaux_products[year][flow['product_revolutionempire']
74+
][flow['export_import']] += float(flow['value'])
75+
ensure_dir("../public/data/decline_longitudinal_data")
76+
with open("../public/data/decline_longitudinal_data/decline_longitudinal_data.csv", "w") as of:
77+
output_csv = csv.DictWriter(
78+
of, ['region', 'year', 'Exports', 'Imports', 'Exports_share', 'Imports_share', 'product_revolutionempire_imports_herfindahl', 'product_revolutionempire_exports_herfindahl', 'product_revolutionempire_total_herfindahl'])
79+
output_csv.writeheader()
80+
for year, value in sorted(france_trade.items(), key=lambda yv: yv[0]):
81+
82+
output_csv.writerow(output_row(
83+
"La Rochelle", year, LaRochelle_trade, LaRochelle_products, value))
84+
output_csv.writerow(output_row(
85+
"Bordeaux", year, Bordeaux_trade, Bordeaux_products, value))
86+
output_csv.writerow({
87+
'region': 'France',
88+
'year': year,
89+
'Exports': value['Exports'],
90+
'Imports': value['Imports'],
91+
'Imports_share': 100,
92+
'Exports_share': 100
93+
})
94+
ensure_dir("../public/data/decline_LR_products")
95+
with open("../public/data/decline_LR_products/decline_LR_products.csv", "w") as of:
96+
output_csv = csv.DictWriter(
97+
of, ['product', 'year', 'Exports', 'Imports'])
98+
output_csv.writeheader()
99+
output_csv.writerows({'product': product, 'year': year, 'Exports': value.get("Exports"), 'Imports': value.get("Imports")} for year, products in LaRochelle_products.items(
100+
) if year in ['1750', '1789'] for product, value in products.items())
101+
102+
ensure_dir("../public/data/decline_LR_partners")
103+
with open("../public/data/decline_LR_partners/decline_LR_partners.csv", "w") as of:
104+
output_csv = csv.DictWriter(
105+
of, ['partner', 'year', 'Exports', 'Imports'])
106+
output_csv.writeheader()
107+
output_csv.writerows({'partner': partner, 'year': year, 'Imports': value.get('Imports'), 'Exports': value.get('Exports')} for year, partners in LaRochelle_partners.items(
108+
) if year in ['1750', '1789'] for partner, value in partners.items())
109+
110+
logger.debug('done | part 1 main viz datasets')

datascripts/retrieve_part_2_main_viz_navigo_for_radar.py renamed to datascripts/part_2_navigo.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,9 @@
88
'''
99

1010
import csv
11-
import sys
12-
from collections import defaultdict
1311
import os
12+
from lib import ensure_dir, logger
1413

15-
def ensure_dir(path):
16-
if not os.path.exists(path):
17-
os.makedirs(path)
18-
19-
OUTPUT = "../public/data/part_2_navigo_viz_data/part_2_navigo_viz_data.csv"
2014
ensure_dir("../public/data/part_2_navigo_viz_data/")
2115

2216
def clean_bureau_name(name, departure):
@@ -46,10 +40,6 @@ def clean_bureau_name(name, departure):
4640
if flow['departure_ferme_direction'] == 'La Rochelle' and flow['departure_function'] == 'O':
4741
relevant_flows.append(flow)
4842

49-
50-
#print(sys.getdefaultencoding())
51-
#print(sys.stdout.encoding)
52-
5343
for f in relevant_flows :
5444
destination_radar='Unknown'
5545
if f['destination_partner_balance_supp_1789']=='Sénégal et Guinée':
@@ -71,8 +61,8 @@ def clean_bureau_name(name, departure):
7161

7262

7363
if (destination_radar=='Unknown'):
74-
print(f['destination_partner_balance_supp_1789'])
75-
print(f['destination_partner_balance_supp_1789'].encode("utf8"))
64+
logger.warning('unknown radar destination : ' + f['destination_partner_balance_supp_1789'])
65+
logger.warning('unknown radar destination : ' + f['destination_partner_balance_supp_1789'].encode("utf8"))
7666
#Create and assign a new column named destination_radar
7767
f['destination_radar'] = destination_radar
7868

@@ -103,8 +93,8 @@ def clean_bureau_name(name, departure):
10393

10494
#Check all is assigned
10595
if (homeport_destination_radar=='Unknown'):
106-
print(f['homeport_substate_1789_fr'])
107-
print(f['homeport_substate_1789_fr'].encode("utf8"))
96+
logger.warning('unknown homeport destination radar : ' + f['homeport_substate_1789_fr'])
97+
logger.warning('unknown homeport destination radar : ' + f['homeport_substate_1789_fr'].encode("utf8"))
10898
#Create and assign a new column named homeport_destination_radar
10999
f['homeport_destination_radar'] = homeport_destination_radar
110100

@@ -131,11 +121,15 @@ def format_for_viz(f):
131121
}
132122

133123
initial_flows_viz = [format_for_viz(f) for f in relevant_flows]
124+
134125
# write dataset
135-
with open(OUTPUT, "w", newline='') as csvfile:
126+
destination_filepath = "../public/data/part_2_navigo_viz_data/part_2_navigo_viz_data.csv"
127+
with open(destination_filepath, "w", newline='') as csvfile:
128+
logger.info('start | part 2 main viz navigo data')
136129
fieldnames = initial_flows_viz[0].keys()
137130
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
138131

139132
writer.writeheader()
140133
for f in initial_flows_viz:
141134
writer.writerow(f)
135+
logger.debug('done | part 2 main viz navigo data')

datascripts/retrieve_part_2_main_viz_toflit_for_alluvial.py renamed to datascripts/part_2_toflit18.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
import csv
22
from collections import defaultdict
33
import os
4+
from lib import ensure_dir, logger
45

5-
def ensure_dir(path):
6-
if not os.path.exists(path):
7-
os.makedirs(path)
8-
9-
OUTPUT = "../public/data/part_2_toflit_viz_data/part_2_toflit_viz_data.csv"
10-
ensure_dir("../public/data/part_2_toflit_viz_data/")
116

7+
logger.info('start | part 2 main viz toflit18 data')
128
relevant_flows = []
139
# retrieve relevant flows
1410
with open('../data/toflit18_all_flows.csv', 'r') as f:
@@ -30,7 +26,6 @@ def ensure_dir(path):
3026
for f in relevant_flows :
3127
product_weight_kg = 0
3228
# @todo a lot of products are flushed out when doing thing
33-
# print(f['quantities_metric'], f['quantity_unit_metric'])
3429
if f['quantity_unit_metric'] and f['quantity_unit_metric'] == 'kg':
3530
product_weight_kg = float(f['quantities_metric'] if f['quantities_metric'] else 0)
3631
f['product_weight_kg'] = product_weight_kg
@@ -108,10 +103,13 @@ def format_for_viz(f):
108103
flows_viz = list(uniques.values())
109104

110105
# write dataset
111-
with open(OUTPUT, "w") as csvfile:
106+
dataset_filepath = "../public/data/part_2_toflit_viz_data/part_2_toflit_viz_data.csv"
107+
ensure_dir("../public/data/part_2_toflit_viz_data/")
108+
with open(dataset_filepath, "w") as csvfile:
112109
fieldnames = flows_viz[0].keys()
113110
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
114111

115112
writer.writeheader()
116113
for f in flows_viz:
117-
writer.writerow(f)
114+
writer.writerow(f)
115+
logger.debug('done | part 2 main viz toflit18 data')

0 commit comments

Comments
 (0)