-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexibank_saenkoromance.py
83 lines (73 loc) · 3.01 KB
/
lexibank_saenkoromance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from pathlib import Path
import lingpy as lp
from clldutils.misc import slug
from pylexibank import Dataset as BaseDataset
from pylexibank import Concept
import attr
@attr.s
class CustomConcept(Concept):
Number = attr.ib(default=None)
class Dataset(BaseDataset):
dir = Path(__file__).parent
id = "saenkoromance"
concept_class = CustomConcept
def cmd_makecldf(self, args):
concepts = {}
wl = lp.Wordlist(self.raw_dir.joinpath("D_test_Romance-110-43.tsv").as_posix())
for concept in self.conceptlists[0].concepts.values():
idx = "{0}_{1}".format(concept.number, slug(concept.english))
args.writer.add_concept(
ID=idx,
Number=concept.number,
Name=concept.english,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss,
)
concepts[concept.english] = idx
concepts["bite (V)"] = concepts["bite"]
concepts["burn"] = concepts["burn tr."]
concepts["claw"] = concepts["claw (nail)"]
concepts["come (V)"] = concepts["come"]
concepts["die (V)"] = concepts["die"]
concepts["drink (V)"] = concepts["drink"]
concepts["eat (V)"] = concepts["eat"]
concepts["fat"] = concepts["fat n."]
concepts["fly"] = concepts["fly v."]
concepts["give (V)"] = concepts["give"]
concepts["hear (V)"] = concepts["hear"]
concepts["kill (V)"] = concepts["kill"]
concepts["know (V)"] = concepts["know"]
concepts["lie (V)"] = concepts["lie"]
concepts["rain (V)"] = concepts["rain"]
concepts["say (V)"] = concepts["say"]
concepts["see (V)"] = concepts["see"]
concepts["sit (V)"] = concepts["sit"]
concepts["sleep (V)"] = concepts["sleep"]
concepts["stand (V)"] = concepts["stand"]
concepts["swim (V)"] = concepts["swim"]
concepts["walk (V)"] = concepts["walk(go)"]
languages = args.writer.add_languages(
lookup_factory="Name", id_factory=lambda x: slug(x["Name"])
)
args.writer.add_sources()
visited = set()
for idx, concept in wl.iter_rows("concept"):
if wl[idx, "concept"] in concepts:
lexeme = args.writer.add_form(
Language_ID=languages[wl[idx, "language"]],
Parameter_ID=concepts[wl[idx, "concept"]],
Value=wl[idx, "source_form"],
Form=".".join(wl[idx, "tokens"]),
Source="Saenko2015",
Loan=True if wl[idx, "cogid"] < 0 else False,
)
args.writer.add_cognate(
lexeme=lexeme,
Cognateset_ID=wl[idx, "cogid"],
Cognate_Detection_Method="expert",
Source=["Saenko2015"],
)
else:
if concept not in visited:
print(concept)
visited.add(concept)