-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexibank_powerma.py
69 lines (58 loc) · 2.32 KB
/
lexibank_powerma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import attr
import lingpy
from pycldf.sources import Source
from pathlib import Path
from clldutils.misc import slug
from pylexibank import Concept, Language
from pylexibank.dataset import Dataset as BaseDataset
from pylexibank import progressbar
@attr.s
class CustomLanguage(Language):
SubGroup = attr.ib(default=None)
class Dataset(BaseDataset):
dir = Path(__file__).parent
id = 'powerma'
language_class = CustomLanguage
def cmd_download(self, args):
self.raw_dir.download(
"http://edictor.digling.org/triples/get_data.py?file=signalphabets&remote_dbase=signalphabets",
'signalphabets.tsv'
)
def cmd_makecldf(self, args):
wl = lingpy.Wordlist(str(self.raw_dir / 'signalphabets.tsv'))
concepts, sources = {}, {}
for i, c in enumerate(wl.rows):
args.writer.add_concept(
ID=str(i+1),
Name=c,
)
concepts[c] = str(i+1)
for language in self.languages:
args.writer.add_language(
ID=language['Name_in_Database'],
Name=language['Name'],
Latitude=language['Latitude'],
Longitude=language['Longitude'],
Glottocode=language['Glottolog'],
SubGroup=language['SubGroup'],
)
sources[language['Name_in_Database']] = language['Source']
sources['Ukranian_SL'] = 'Lydell2018'
languages = {language: language for language in sources}
languages['Ukranian_SL'] = 'Ukrainian_SL'
args.writer.add_sources(*[x for x in self.raw_dir.read_bib() if x.id
in sources])
for i, c, l, h1, h2, t, cid in progressbar(wl.iter_rows(
'concept', 'doculect', 'handshape_1',
'handshape_2', 'tokens', 'cogid'), desc='makecldf'):
row = args.writer.add_form(
Value = h1+' '+h2,
Language_ID=languages[l],
Parameter_ID=concepts[c],
Form=' '.join(t),
Source=sources[l]
)
args.writer.add_cognate(
lexeme=row,
Cognateset_ID=cid,
)