Skip to content

Commit

Permalink
Release prep
Browse files Browse the repository at this point in the history
  • Loading branch information
chrzyki committed Nov 11, 2019
1 parent 4409bec commit 610d940
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 28 deletions.
2 changes: 1 addition & 1 deletion cldf/cldf-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"rdf:type": "prov:Entity",
"dc:title": "Repository",
"rdf:about": "https://github.com/lexibank/halenepal",
"dc:created": "v1.0.1-9-gd9eb5bd"
"dc:created": "v1.0.1-9-g4409bec"
},
{
"rdf:type": "prov:Entity",
Expand Down
12 changes: 6 additions & 6 deletions cldf/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ lingpy==2.6.5
Markdown==3.1.1
networkx==2.1
newick==1.0.0
numpy==1.17.3
numpy==1.17.4
openpyxl==3.0.0
packaging==19.2
pluggy==0.13.0
Expand All @@ -24,20 +24,20 @@ py==1.8.0
pybtex==0.22.2
pycldf==1.8.2
pyclts==2.0.0
pyconcepticon==2.4.1
pyconcepticon==2.5.1
pycountry==19.8.18
pyglottolog==2.2.1
pylexibank==2.1.0
pytest==5.2.2
regex==2019.8.19
regex==2019.11.1
requests==2.22.0
rfc3986==1.3.2
segments==2.1.1
six==1.12.0
segments==2.1.2
six==1.13.0
SQLAlchemy==1.3.10
tabulate==0.8.5
termcolor==1.1.0
tqdm==4.36.1
tqdm==4.38.0
uritemplate==3.0.0
urllib3==1.25.6
wcwidth==0.1.7
Expand Down
40 changes: 19 additions & 21 deletions lexibank_halenepal.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections import defaultdict
from collections import defaultdict
from pathlib import Path

import attr
Expand Down Expand Up @@ -27,7 +27,7 @@ class Dataset(NonSplittingDataset):
dir = Path(__file__).parent
id = "halenepal"
language_class = CustomLanguage
concept_class=CustomConcept
concept_class = CustomConcept
form_spec = pylexibank.FormSpec(
brackets={"(": ")"},
separators=";/,",
Expand All @@ -38,25 +38,25 @@ class Dataset(NonSplittingDataset):
def cmd_makecldf(self, args):
# due to bad concept ids in STEDT, we need to load them from file
converter = defaultdict(set)
for row in self.raw_dir.read_csv("srcids.tsv", delimiter="\t",
dicts=True):
converter[row['CORRECTED']].add(row['IDINSTEDT'])
for row in self.raw_dir.read_csv("srcids.tsv", delimiter="\t", dicts=True):
converter[row["CORRECTED"]].add(row["IDINSTEDT"])

concept_lookup = {}
for concept in self.conceptlists[0].concepts.values():
idx = concept.id.split('-')[-1] + "_" + slug(concept.english)
idx = concept.id.split("-")[-1] + "_" + slug(concept.english)
args.writer.add_concept(
ID=idx,
Name=concept.english,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss,
Number=concept.number)
ID=idx,
Name=concept.english,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss,
Number=concept.number,
)
concept_lookup[concept.number] = idx
for id_in_source in converter[concept.number]:
concept_lookup[id_in_source] = idx
language_lookup = args.writer.add_languages(lookup_factory="Name")
args.writer.add_sources()

# account for segmented data
args.writer.tokenize = lambda x, z: " ".join(
self.tokenizer(x, "^" + z + "$", column="IPA")
Expand All @@ -65,13 +65,11 @@ def cmd_makecldf(self, args):
args.writer["FormTable", "Segments"].datatype = Datatype.fromvalue(
{"base": "string", "format": "([\\S]+)( [\\S]+)*"}
)
for row in progressbar(self.raw_dir.read_csv("AH-CSDPN.tsv",
delimiter="\t")[1:]):
for row in progressbar(self.raw_dir.read_csv("AH-CSDPN.tsv", delimiter="\t")[1:]):
args.writer.add_forms_from_value(
Local_ID=row[0],
Language_ID=language_lookup[row[6]],
Parameter_ID=concept_lookup[row[7]],
Value=row[1],
Source=["Hale1973"],
)

Local_ID=row[0],
Language_ID=language_lookup[row[6]],
Parameter_ID=concept_lookup[row[7]],
Value=row[1],
Source=["Hale1973"],
)

0 comments on commit 610d940

Please sign in to comment.