Skip to content

Commit

Permalink
update new files
Browse files Browse the repository at this point in the history
  • Loading branch information
lingulist committed Jun 9, 2021
2 parents 7bda8c5 + 240763d commit bff075b
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 58 deletions.
6 changes: 0 additions & 6 deletions .travis.yml

This file was deleted.

5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ This dataset comprises 69 language varieties spoken in Columbia. The orthography
## Statistics


[![Build Status](https://travis-ci.org/lexibank/hubercolumbian.svg?branch=master)](https://travis-ci.org/lexibank/hubercolumbian)
![Glottolog: 100%](https://img.shields.io/badge/Glottolog-100%25-brightgreen.svg "Glottolog: 100%")
![Concepticon: 96%](https://img.shields.io/badge/Concepticon-96%25-green.svg "Concepticon: 96%")
![Source: 100%](https://img.shields.io/badge/Source-100%25-brightgreen.svg "Source: 100%")
Expand All @@ -35,11 +34,11 @@ This dataset comprises 69 language varieties spoken in Columbia. The orthography

- **Varieties:** 69
- **Concepts:** 366
- **Lexemes:** 26,726
- **Lexemes:** 26,723
- **Sources:** 1
- **Synonymy:** 1.18
- **Invalid lexemes:** 0
- **Tokens:** 158,498
- **Tokens:** 158,489
- **Segments:** 109 (0 BIPA errors, 0 CTLS sound class errors, 109 CLTS modified)
- **Inventory size (avg):** 32.99

Expand Down
6 changes: 3 additions & 3 deletions TRANSCRIPTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
|:----------|-------------:|:-------|:------------------|
| a | 22623 |||
| i | 13016 |||
| e | 9202 |||
| e | 9199 |||
| o | 8635 |||
| k | 7698 |||
| u | 7178 |||
| n | 5592 |||
| p | 5331 |||
| b | 5223 |||
| h | 5133 |||
| t | 5110 |||
| t | 5107 |||
| ɨ | 5039 |||
| r | 5029 |||
| m | 3862 |||
Expand Down Expand Up @@ -64,7 +64,7 @@
| θ | 99 |||
| tʃʰ | 98 |||
|| 88 |||
| c | 87 |||
| c | 84 |||
| ɓ | 81 |||
| ɛ | 78 |||
|| 63 |||
Expand Down
18 changes: 9 additions & 9 deletions cldf/.transcription-report.json
Original file line number Diff line number Diff line change
Expand Up @@ -9213,9 +9213,9 @@
"a": 670,
"a\u02d0": 68,
"b": 183,
"c": 7,
"c": 5,
"d": 10,
"e": 266,
"e": 264,
"e\u02d0": 40,
"f": 1,
"h": 163,
Expand All @@ -9232,7 +9232,7 @@
"p": 235,
"r": 109,
"s": 43,
"t": 252,
"t": 250,
"ts": 79,
"t\u0283": 1,
"t\u02b0": 11,
Expand Down Expand Up @@ -12894,9 +12894,9 @@
"+": 45,
"a": 309,
"a\u02d0": 9,
"c": 2,
"c": 1,
"d\u0292": 92,
"e": 117,
"e": 116,
"e\u02d0": 7,
"h": 131,
"i": 137,
Expand All @@ -12911,7 +12911,7 @@
"p\u02b0": 1,
"r": 71,
"s": 80,
"t": 7,
"t": 6,
"t\u02b0": 66,
"t\u02b7\u02b0": 1,
"u": 76,
Expand Down Expand Up @@ -25807,11 +25807,11 @@
"a\u02d0": 625,
"b": 5223,
"b\u02b0": 1,
"c": 87,
"c": 84,
"d": 3610,
"d\u0292": 426,
"d\u02b2": 62,
"e": 9202,
"e": 9199,
"ei": 26,
"e\u02d0": 218,
"f": 40,
Expand Down Expand Up @@ -25847,7 +25847,7 @@
"r\u02b2": 7,
"s": 3080,
"s\u02b0": 1,
"t": 5110,
"t": 5107,
"ts": 628,
"ts\u02b0": 185,
"t\u0255": 126,
Expand Down
4 changes: 2 additions & 2 deletions cldf/cldf-metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"rdf:about": "https://github.com/lexibank/hubercolumbian",
"rdf:type": "prov:Entity",
"dc:created": "v3.0-5-g796f5e7",
"dc:created": "v3.0-6-g7bda8c5",
"dc:title": "Repository"
},
{
Expand Down Expand Up @@ -57,7 +57,7 @@
"tables": [
{
"dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#FormTable",
"dc:extent": 26726,
"dc:extent": 26723,
"tableSchema": {
"columns": [
{
Expand Down
7 changes: 2 additions & 5 deletions cldf/forms.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3876,7 +3876,6 @@ Guahibo-143_louse-1,huber1992/guahibo/PIOJO_LOUSE,Guahibo,143_louse,tarí̵to,ta
Guahibo-144_mosquito-1,huber1992/guahibo/ZANCUDO_MOSQUITO,Guahibo,144_mosquito,wáesoto,wáesoto,w a e s o t o,,Huber1992,,,^ w á e s o t o $,default
Guahibo-144_mosquito-2,huber1992/guahibo/ZANCUDO_MOSQUITO,Guahibo,144_mosquito,wáeseto,wáeseto,w a e s e t o,,Huber1992,,,^ w á e s e t o $,default
Guahibo-145_termite-1,"huber1992/guahibo/COMEJÉN, HORMIGA BLANCA_TERMITE",Guahibo,145_termite,oɸóto,oɸóto,o ɸ o t o,,Huber1992,,,^ o ɸ ó t o $,default
Guahibo-145_termite-2,"huber1992/guahibo/COMEJÉN, HORMIGA BLANCA_TERMITE",Guahibo,145_termite,etc,etc,e t c,,Huber1992,,,^ e t c $,default
Guahibo-146_ant-1,huber1992/guahibo/HORMIGA_ANT,Guahibo,146_ant,amáito,amáito,a m a i t o,,Huber1992,,,^ a m á i t o $,default
Guahibo-146_ant-2,huber1992/guahibo/HORMIGA_ANT,Guahibo,146_ant,pí̵bi̵to,pí̵bi̵to,p ɨ b ɨ t o,,Huber1992,,,^ p í̵ b i̵ t o $,default
Guahibo-147_spider-1,huber1992/guahibo/ARAÑA_SPIDER,Guahibo,147_spider,hómobi̵to,hómobi̵to,h o m o b ɨ t o,,Huber1992,,,^ h ó m o b i̵ t o $,default
Expand Down Expand Up @@ -4131,8 +4130,7 @@ Guahibo-328_dark-3,huber1992/guahibo/OSCURO_DARK,Guahibo,328_dark,kírihai,kíri
Guahibo-329_cut-1,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,ukúta,ukúta,u k u t a,,Huber1992,,,^ u k ú t a $,default
Guahibo-329_cut-2,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,nikáta,nikáta,n i k a t a,,Huber1992,,,^ n i k á t a $,default
Guahibo-329_cut-3,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,kaxiúta,kaxiúta,k a x i u t a,,Huber1992,,,^ k a x i ú t a $,default
Guahibo-329_cut-4,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,etc,etc,e t c,,Huber1992,,,^ e t c $,default
Guahibo-329_cut-5,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,thá:ba,thá:ba,t h aː b a,,Huber1992,,,^ t h á: b a $,default
Guahibo-329_cut-4,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,thá:ba,thá:ba,t h aː b a,,Huber1992,,,^ t h á: b a $,default
Guahibo-330_wide-1,huber1992/guahibo/ANCHO_WIDE,Guahibo,330_wide,ajáiitabara,ajáiitabara,a j a iː t a b a r a,,Huber1992,,,^ a j á ii t a b a r a $,default
Guahibo-331_narrow-1,huber1992/guahibo/ESTRECHO_NARROW,Guahibo,331_narrow,tsikiriháwajo:,tsikiriháwajo:,ts i k i r i h a w a j oː,,Huber1992,,,^ ts i k i r i h á w a j o: $,default
Guahibo-332_far-1,huber1992/guahibo/LEJOS_FAR,Guahibo,332_far,tá:hi̵,tá:hi̵,t aː h ɨ,,Huber1992,,,^ t á: h i̵ $,default
Expand Down Expand Up @@ -13403,8 +13401,7 @@ Koreguaje-144_mosquito-1,huber1992/koreguaje/ZANCUDO_MOSQUITO,Koreguaje,144_mosq
Koreguaje-145_termite-1,"huber1992/koreguaje/COMEJÉN, HORMIGA BLANCA_TERMITE",Koreguaje,145_termite,ʤuʤu,ʤuʤu,dʒ u dʒ u,,Huber1992,,,^ ʤ u ʤ u $,default
Koreguaje-146_ant-1,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,pẽa,pẽa,p ẽ a,,Huber1992,,,^ p ẽ a $,default
Koreguaje-146_ant-2,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,emu,emu,e m u,,Huber1992,,,^ e m u $,default
Koreguaje-146_ant-3,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,etc,etc,e t c,,Huber1992,,,^ e t c $,default
Koreguaje-146_ant-4,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,mekʰa,mekʰa,m e kʰ a,,Huber1992,,,^ m e kʰ a $,default
Koreguaje-146_ant-3,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,mekʰa,mekʰa,m e kʰ a,,Huber1992,,,^ m e kʰ a $,default
Koreguaje-147_spider-1,huber1992/koreguaje/ARAÑA_SPIDER,Koreguaje,147_spider,hĩ̵hi̵,hĩ̵hi̵,h ɨ̃ h ɨ,,Huber1992,,,^ h ĩ̵ h i̵ $,default
Koreguaje-148_chiggoejiggerflea-1,"huber1992/koreguaje/NIGUA, PIGUE_CHIGGOE, JIGGER FLEA",Koreguaje,148_chiggoejiggerflea,suʔu,suʔu,s u ʔ u,,Huber1992,,,^ s u ʔ u $,default
Koreguaje-149_bush-1,huber1992/koreguaje/MONTE_BUSH,Koreguaje,149_bush,airo,airo,a i r o,,Huber1992,,,^ a i r o $,default
Expand Down
48 changes: 22 additions & 26 deletions lexibank_hubercolumbian.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from clldutils.misc import slug
from pylexibank import Concept, Language
from pylexibank.providers import qlc
from pylexibank.util import progressbar


exclude = ["etc"]


@attr.s
Expand All @@ -21,13 +23,6 @@ class CustomLanguage(Language):
Name_in_Source = attr.ib(default=None)


@attr.s
class CustomLanguage(Language):
Longitude = attr.ib(default=None)
Latitude = attr.ib(default=None)
Name_in_Source = attr.ib(default=None)


class Dataset(qlc.QLC):
dir = Path(__file__).parent
id = "hubercolumbian"
Expand All @@ -37,22 +32,20 @@ class Dataset(qlc.QLC):

def cmd_makecldf(self, args):
# column "counterpart_doculect" gives us the proper names of the doculects
wl = lingpy.Wordlist(
(self.raw_dir / self.DSETS[0]).as_posix(), col="counterpart_doculect"
)
wl = lingpy.Wordlist((self.raw_dir / self.DSETS[0]).as_posix(), col="counterpart_doculect")
args.writer.add_sources()

language_lookup = args.writer.add_languages(lookup_factory="Name_in_Source")

concept_lookup = {}
for concept in self.conceptlists[0].concepts.values():
idx = concept.number+"_"+slug(concept.english)
idx = concept.number + "_" + slug(concept.english)
args.writer.add_concept(
ID=idx,
Name=concept.english,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss
)
ID=idx,
Name=concept.english,
Concepticon_ID=concept.concepticon_id,
Concepticon_Gloss=concept.concepticon_gloss,
)
for lg in concept.attributes["lexibank_gloss"]:
concept_lookup[lg] = idx

Expand All @@ -65,11 +58,14 @@ def cmd_makecldf(self, args):
]

for doculect, concept, value, qlcid in rows:
args.writer.add_form(
Language_ID=language_lookup[doculect],
Parameter_ID=concept_lookup[concept],
Value=value,
Form=value,
Source=["Huber1992"],
Local_ID=qlcid,
)
if value in exclude:
continue
else:
args.writer.add_form(
Language_ID=language_lookup[doculect],
Parameter_ID=concept_lookup[concept],
Value=value,
Form=value,
Source=["Huber1992"],
Local_ID=qlcid,
)
4 changes: 1 addition & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
py_modules=["lexibank_hubercolumbian"],
include_package_data=True,
zip_safe=False,
entry_points={
"lexibank.dataset": ["hubercolumbian=lexibank_hubercolumbian:Dataset"]
},
entry_points={"lexibank.dataset": ["hubercolumbian=lexibank_hubercolumbian:Dataset"]},
install_requires=["pylexibank>=3.0"],
extras_require={"test": ["pytest-cldf"]},
)
2 changes: 1 addition & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ def test_valid(cldf_dataset, cldf_logger):


def test_forms(cldf_dataset):
assert len(list(cldf_dataset["FormTable"])) == 26726
assert len(list(cldf_dataset["FormTable"])) == 26723
assert any(f["Form"] == "léemani jú" for f in cldf_dataset["FormTable"])


Expand Down

0 comments on commit bff075b

Please sign in to comment.