Skip to content

Commit 646bef2

Browse files
committed
Handle missing hierarchy in GeoNames processor
It turns out that sometimes, parts of the hierarchy are skipped. There still remain 4 cities that are directly annotated onto countries (e.g., Hong Kong) but this at least fixes an issue where there are admin 2's that don't have admin 1's.
1 parent c1eccb2 commit 646bef2

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

src/pyobo/sources/geonames.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@ def get_terms(*, force: bool = False) -> Collection[Term]:
3838
"""Get terms."""
3939
code_to_country = get_code_to_country(force=force)
4040
code_to_admin1 = get_code_to_admin1(code_to_country, force=force)
41-
code_to_admin2 = get_code_to_admin2(code_to_admin1, force=force)
41+
code_to_admin2 = get_code_to_admin2(
42+
code_to_country=code_to_country, code_to_admin1=code_to_admin1, force=force
43+
)
4244
id_to_term = get_cities(
4345
code_to_country=code_to_country,
4446
code_to_admin1=code_to_admin1,
@@ -113,7 +115,7 @@ def get_code_to_admin1(
113115

114116

115117
def get_code_to_admin2(
116-
code_to_admin1: Mapping[str, Term], *, force: bool = False
118+
*, code_to_country: Mapping[str, Term], code_to_admin1: Mapping[str, Term], force: bool = False
117119
) -> Mapping[str, Term]:
118120
"""Get a mapping from admin2 code to term."""
119121
admin2_df = ensure_df(
@@ -134,8 +136,13 @@ def get_code_to_admin2(
134136
term.append_property("code", code)
135137
code_to_admin2[code] = term
136138
admin1_code = code.rsplit(".", 1)[0]
137-
admin1_term = code_to_admin1[admin1_code]
138-
term.append_relationship(part_of, admin1_term)
139+
admin1_term = code_to_admin1.get(admin1_code)
140+
if admin1_term:
141+
term.append_relationship(part_of, admin1_term)
142+
else:
143+
country_code = admin1_code.split(".", 1)[0]
144+
country_term = code_to_country[country_code]
145+
term.append_relationship(part_of, country_term)
139146
return code_to_admin2
140147

141148

@@ -197,7 +204,10 @@ def get_cities(
197204
term.append_synonym(synonym)
198205

199206
if pd.isna(admin1):
200-
tqdm.write(f"[geonames:{identifier}] missing admin 1 code for {name} ({country})")
207+
# TODO try to annotate these directly onto countries
208+
tqdm.write(
209+
f"[geonames:{identifier}] {name}, a city in {country}, is missing admin 1 code"
210+
)
201211
continue
202212

203213
admin1_full = f"{country}.{admin1}"

0 commit comments

Comments
 (0)