-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexibank_hubercolumbian.py
71 lines (58 loc) · 2.25 KB
/
lexibank_hubercolumbian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from pathlib import Path
import attr
import lingpy
from clldutils.misc import slug
import pylexibank
from pylexibank.providers import qlc
exclude = ["etc"]
@attr.s
class CustomConcept(pylexibank.Concept):
Spanish = attr.ib(default=None)
Gloss_in_digital_source = attr.ib(default=None)
@attr.s
class CustomLanguage(pylexibank.Language):
Longitude = attr.ib(default=None)
Latitude = attr.ib(default=None)
Name_in_Source = attr.ib(default=None)
class Dataset(pylexibank.providers.qlc.QLC):
dir = Path(__file__).parent
id = "hubercolumbian"
DSETS = ["huber1992.csv"]
concept_class = CustomConcept
language_class = CustomLanguage
writer_options = dict(keep_languages=False, keep_parameters=False)
def cmd_makecldf(self, args):
# column "counterpart_doculect" gives us the proper names of the doculects
wl = lingpy.Wordlist((self.raw_dir / self.DSETS[0]).as_posix(), col="counterpart_doculect")
args.writer.add_sources()
language_lookup = args.writer.add_languages(lookup_factory="Name_in_Source")
concept_lookup = {}
for concept in self.conceptlists[0].concepts.values():
idx = concept.number + "_" + slug(concept.english)
args.writer.add_concept(
ID=idx,
Name=concept.english,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss,
)
for lg in concept.attributes["lexibank_gloss"]:
concept_lookup[lg] = idx
rows = [
(doculect, concept, value, qlcid)
for (idx, doculect, concept, value, qlcid) in wl.iter_rows(
"counterpart_doculect", "concept", "counterpart", "qlcid"
)
if doculect not in ["English", "Español"]
]
for doculect, concept, value, qlcid in rows:
if value in exclude:
continue
else:
args.writer.add_form(
Language_ID=language_lookup[doculect],
Parameter_ID=concept_lookup[concept],
Value=value,
Form=value,
Source=["Huber1992"],
Local_ID=qlcid,
)