-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkonpile.py
executable file
·166 lines (145 loc) · 5.27 KB
/
konpile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env python3
import sys
import os
from pathlib import Path
import argparse
from datetime import datetime
import jinja2
import openpyxl
import csv
import json
DataSet = dict[str, list[dict]]
def std_str(obj: any):
obj = str(obj)
if obj is not None:
return (
obj.lower()
.replace("-", "_")
.replace(" ", "_")
.replace(",", "_")
.replace(".", "_")
)
def process_file(datafile: Path) -> DataSet:
"""
Determine the type of the file and the appropriate processing function
using a match statement, then return the processed data.
"""
# TODO: Implement file parsing injection for decoupling.
match datafile.suffix.lower():
case ".xlsx":
return process_excel(datafile)
case ".csv":
return process_csv(datafile)
case ".json":
return process_json(datafile)
case _:
raise ValueError(f"Unsupported file type: {datafile.suffix}")
def process_excel(datafile: Path) -> DataSet:
"""
Unpack an Excel file into a dictionary of lists of dictionaries (DataSet).
The schema is SHEETNAME: [ {COLUMNNAME: VALUE, ...}, ... ]
"""
# Load workbook
workbook = openpyxl.load_workbook(datafile)
# Load sheets
sheets = [workbook[sheet] for sheet in workbook.sheetnames]
# Build dataset
dataset = {}
for sheet in sheets:
headers = [std_str(cell.value) for cell in sheet[1]]
dataset[std_str(sheet.title)] = [
{
headers[i]: cell.value
for i, cell in enumerate(row)
}
for row in sheet.iter_rows(min_row=2)
]
return dataset
def process_csv(datafile: Path) -> DataSet:
"""
Unpack a CSV file into a dictionary of lists of dictionaries (DataSet).
The schema is FILENAME: [ {COLUMNNAME: VALUE, ...}, ... ]
"""
# Load CSV file
with open(datafile, "r") as f:
reader = csv.DictReader(f)
dataset = {std_str(datafile.stem): [row for row in reader]}
return dataset
def process_json(datafile: Path) -> DataSet:
"""
Unpack a JSON file into a dictionary of lists of dictionaries (DataSet).
The schema is FILENAME: [ {COLUMNNAME: VALUE, ...}, ... ]
"""
# Load JSON file
with open(datafile, "r") as f:
data = json.load(f)
dataset = {std_str(datafile.stem): data}
return dataset
def render_file(template_file: Path, data: DataSet):
"""
Render a Jinja2 template with data.
"""
env = jinja2.Environment(
loader=jinja2.FileSystemLoader(searchpath=str(template_file.parent))
)
template = env.get_template(template_file.name)
rendering = template.render(data)
return rendering
def render_string(template_string: str, data: DataSet):
"""
Render a Jinja2 template string with data.
"""
env = jinja2.Environment(loader=jinja2.BaseLoader())
template = env.from_string(template_string)
rendering = template.render(data)
return rendering
def translate_fields(dataset: any, translators: list[Path]):
"""
Translate fields in the dataset using a list of CSV files.
The CSV files must have two columns: original and translation.
This allows us to restructure data without having to affect
the original files. If we find a field that matches the
name of the file (without the extension), we translate the
values in that field using the CSV file.
"""
for translator in translators:
with open(translator, "r") as f:
target_field = std_str(translator.stem)
reader = csv.DictReader(f)
translation_table = {row["original"]: row["translation"] for row in reader}
def translate(field_name: str, obj: any):
# Recursively translate all applicable values in an object
# whenever the key matches the name of the translator file.
if isinstance(obj, dict):
return {
key: translate(key, value)
for key, value in obj.items()
}
elif isinstance(obj, list):
return [translate(field_name, item) for item in obj]
else:
return translation_table[str(obj)] if field_name == target_field else obj
dataset = translate(None, dataset)
return dataset
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Use an Excel file to fill a Jinja2 template."
)
parser.add_argument("template_file", type=Path, help="Jinja2 template file")
parser.add_argument("data_file", type=Path, help="File with data")
parser.add_argument(
"--translation", "-t", type=Path, nargs="+", help="Translation CSV files"
)
args = parser.parse_args(sys.argv[1:])
print("Unpacking data...")
data = process_file(args.data_file)
if args.translation:
print("Translating fields...")
data = translate_fields(data, args.translation)
print("Rendering template...")
rendering = render_file(args.template_file, data)
# print(rendering)
with open(
f'{args.template_file.stem}_{datetime.now().strftime("%Y%m%d%H%M%S")}.txt', "w"
) as output_file:
output_file.write(rendering)