1
+ from dataclasses import dataclass
1
2
from pathlib import Path
2
3
from xml .etree import ElementTree
3
4
4
5
UCUM_ESSENCE_FILE = Path (__file__ ).parent .absolute () / "vendor" / "ucum-essence.xml"
5
6
7
+ # set to "Code" for case-sensitive and to "CODE" for case-insensitive units
8
+ CODE_ATTRIB = "Code"
9
+
6
10
tree = ElementTree .parse (UCUM_ESSENCE_FILE ) # noqa: S314
7
11
root = tree .getroot ()
8
12
9
- # set to "Code" for case-sensitive and to "CODE" for case-insensitive units
10
- CODE_ATTRIB = "Code"
11
13
14
+ @dataclass
15
+ class UcumUnitDefinition :
16
+ code_cs : str # case-sensitive code
17
+ code_ci : str # case-insensitive code
18
+ is_metric : bool
19
+ is_special : bool
20
+ is_arbitrary : bool
21
+ class_ : str
22
+ name : str
23
+ print_symbol : str
24
+ property_ : str
25
+ defining_unit : str
26
+ conversion_factor : float
12
27
13
- def get_prefixes ():
28
+
29
+ def get_prefixes () -> list :
14
30
prefix_path = ".//{*}prefix[@" + CODE_ATTRIB + "]"
15
31
return [p .attrib [CODE_ATTRIB ] for p in root .findall (prefix_path )]
16
32
17
33
18
- def get_units ():
34
+ def get_units () -> list :
19
35
units = []
20
36
for unit in root .findall (".//{*}unit[@" + CODE_ATTRIB + "]" ):
21
37
cs = unit .attrib [CODE_ATTRIB ]
22
38
units .append (cs )
23
39
return units
24
40
25
41
26
- def get_metric_units ():
42
+ def get_metric_units () -> list :
27
43
xpath = ".//{*}unit[@" + CODE_ATTRIB + "][@isMetric='yes']"
28
44
return [p .attrib [CODE_ATTRIB ] for p in root .findall (xpath )]
29
45
30
46
31
- def get_non_metric_units ():
47
+ def get_non_metric_units () -> list :
32
48
xpath = ".//{*}unit[@" + CODE_ATTRIB + "][@isMetric='no']"
33
49
return [p .attrib [CODE_ATTRIB ] for p in root .findall (xpath )]
34
50
35
51
36
- def get_base_units ():
52
+ def get_base_units () -> list :
37
53
xpath = ".//{*}base-unit[@" + CODE_ATTRIB + "]"
38
54
return [p .attrib [CODE_ATTRIB ] for p in root .findall (xpath )]
39
55
40
56
57
+ def get_units_with_full_definition () -> list :
58
+ data = []
59
+ for el in root .findall (".//{*}unit[@" + CODE_ATTRIB + "]" ):
60
+ el_data = dict (el .items ())
61
+ # rename some keys
62
+ el_data ["code_ci" ] = el_data .pop ("CODE" , "" )
63
+ el_data ["code_cs" ] = el_data .pop ("Code" , "" )
64
+ el_data ["is_metric" ] = el_data .pop ("isMetric" , "" ) == "yes"
65
+ el_data ["is_special" ] = el_data .pop ("isSpecial" , "" ) == "yes"
66
+ el_data ["is_arbitrary" ] = el_data .pop ("isArbitrary" , "" ) == "yes"
67
+ el_data ["class_" ] = el_data .pop ("class" , "" )
68
+
69
+ # iterate over all children
70
+ for child in el :
71
+ name = child .tag .rsplit ("}" , 1 )[- 1 ]
72
+ if "Unit" in child .attrib :
73
+ conversion = dict (child .attrib .items ())
74
+ conversion ["defining_unit" ] = conversion .pop ("Unit" )
75
+ conversion .pop ("UNIT" , "" )
76
+
77
+ # The attribute "value" is sometimes in an element "function" one level deeper.
78
+ for attr , value in child .items ():
79
+ if attr == "value" :
80
+ conversion [attr ] = value
81
+
82
+ conversion ["conversion_factor" ] = conversion .pop ("value" , float ("Nan" ))
83
+ el_data .update (** conversion )
84
+ else :
85
+ el_data [name ] = child .text
86
+ el_data ["defining_unit" ] = child .text
87
+ el_data ["conversion_factor" ] = float ("Nan" )
88
+ # init dataclass and add to list
89
+ el_data ["print_symbol" ] = el_data .pop ("printSymbol" , "" )
90
+ el_data ["property_" ] = el_data .pop ("property" , "" )
91
+ data .append (UcumUnitDefinition (** el_data ))
92
+ return data
93
+
94
+
41
95
if __name__ == "__main__" :
42
96
print (get_units ())
43
97
prefixes = get_prefixes ()
@@ -58,3 +112,6 @@ def get_base_units():
58
112
59
113
units = get_units ()
60
114
print (f"Total number of units: { len (units )} " )
115
+
116
+ units_data = get_units_with_full_definition ()
117
+ print (f"Units in dataclasses { len (units_data )} " )
0 commit comments