Skip to content

Commit b4e363e

Browse files
committed
Add reading unit details of UCUM essential to dataclass
1 parent 121f5cd commit b4e363e

File tree

1 file changed

+64
-7
lines changed

1 file changed

+64
-7
lines changed

src/ucumvert/xml_util.py

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,97 @@
1+
from dataclasses import dataclass
12
from pathlib import Path
23
from xml.etree import ElementTree
34

45
UCUM_ESSENCE_FILE = Path(__file__).parent.absolute() / "vendor" / "ucum-essence.xml"
56

7+
# set to "Code" for case-sensitive and to "CODE" for case-insensitive units
8+
CODE_ATTRIB = "Code"
9+
610
tree = ElementTree.parse(UCUM_ESSENCE_FILE) # noqa: S314
711
root = tree.getroot()
812

9-
# set to "Code" for case-sensitive and to "CODE" for case-insensitive units
10-
CODE_ATTRIB = "Code"
1113

14+
@dataclass
15+
class UcumUnitDefinition:
16+
code_cs: str # case-sensitive code
17+
code_ci: str # case-insensitive code
18+
is_metric: bool
19+
is_special: bool
20+
is_arbitrary: bool
21+
class_: str
22+
name: str
23+
print_symbol: str
24+
property_: str
25+
defining_unit: str
26+
conversion_factor: float
1227

13-
def get_prefixes():
28+
29+
def get_prefixes() -> list:
1430
prefix_path = ".//{*}prefix[@" + CODE_ATTRIB + "]"
1531
return [p.attrib[CODE_ATTRIB] for p in root.findall(prefix_path)]
1632

1733

18-
def get_units():
34+
def get_units() -> list:
1935
units = []
2036
for unit in root.findall(".//{*}unit[@" + CODE_ATTRIB + "]"):
2137
cs = unit.attrib[CODE_ATTRIB]
2238
units.append(cs)
2339
return units
2440

2541

26-
def get_metric_units():
42+
def get_metric_units() -> list:
2743
xpath = ".//{*}unit[@" + CODE_ATTRIB + "][@isMetric='yes']"
2844
return [p.attrib[CODE_ATTRIB] for p in root.findall(xpath)]
2945

3046

31-
def get_non_metric_units():
47+
def get_non_metric_units() -> list:
3248
xpath = ".//{*}unit[@" + CODE_ATTRIB + "][@isMetric='no']"
3349
return [p.attrib[CODE_ATTRIB] for p in root.findall(xpath)]
3450

3551

36-
def get_base_units():
52+
def get_base_units() -> list:
3753
xpath = ".//{*}base-unit[@" + CODE_ATTRIB + "]"
3854
return [p.attrib[CODE_ATTRIB] for p in root.findall(xpath)]
3955

4056

57+
def get_units_with_full_definition() -> list:
58+
data = []
59+
for el in root.findall(".//{*}unit[@" + CODE_ATTRIB + "]"):
60+
el_data = dict(el.items())
61+
# rename some keys
62+
el_data["code_ci"] = el_data.pop("CODE", "")
63+
el_data["code_cs"] = el_data.pop("Code", "")
64+
el_data["is_metric"] = el_data.pop("isMetric", "") == "yes"
65+
el_data["is_special"] = el_data.pop("isSpecial", "") == "yes"
66+
el_data["is_arbitrary"] = el_data.pop("isArbitrary", "") == "yes"
67+
el_data["class_"] = el_data.pop("class", "")
68+
69+
# iterate over all children
70+
for child in el:
71+
name = child.tag.rsplit("}", 1)[-1]
72+
if "Unit" in child.attrib:
73+
conversion = dict(child.attrib.items())
74+
conversion["defining_unit"] = conversion.pop("Unit")
75+
conversion.pop("UNIT", "")
76+
77+
# The attribute "value" is sometimes in an element "function" one level deeper.
78+
for attr, value in child.items():
79+
if attr == "value":
80+
conversion[attr] = value
81+
82+
conversion["conversion_factor"] = conversion.pop("value", float("Nan"))
83+
el_data.update(**conversion)
84+
else:
85+
el_data[name] = child.text
86+
el_data["defining_unit"] = child.text
87+
el_data["conversion_factor"] = float("Nan")
88+
# init dataclass and add to list
89+
el_data["print_symbol"] = el_data.pop("printSymbol", "")
90+
el_data["property_"] = el_data.pop("property", "")
91+
data.append(UcumUnitDefinition(**el_data))
92+
return data
93+
94+
4195
if __name__ == "__main__":
4296
print(get_units())
4397
prefixes = get_prefixes()
@@ -58,3 +112,6 @@ def get_base_units():
58112

59113
units = get_units()
60114
print(f"Total number of units: {len(units)}")
115+
116+
units_data = get_units_with_full_definition()
117+
print(f"Units in dataclasses {len(units_data)}")

0 commit comments

Comments
 (0)