From b5e597d7d7030266b24f023cba6127d2d7694f02 Mon Sep 17 00:00:00 2001 From: chrzyki Date: Thu, 22 Apr 2021 09:39:50 +0200 Subject: [PATCH 1/4] Clean setup, remove unusued import --- lexibank_hubercolumbian.py | 1 - setup.py | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/lexibank_hubercolumbian.py b/lexibank_hubercolumbian.py index 6dc913b..bdf6a2d 100644 --- a/lexibank_hubercolumbian.py +++ b/lexibank_hubercolumbian.py @@ -5,7 +5,6 @@ from clldutils.misc import slug from pylexibank import Concept, Language from pylexibank.providers import qlc -from pylexibank.util import progressbar @attr.s diff --git a/setup.py b/setup.py index 6f5d4ac..25982b1 100644 --- a/setup.py +++ b/setup.py @@ -12,9 +12,7 @@ py_modules=["lexibank_hubercolumbian"], include_package_data=True, zip_safe=False, - entry_points={ - "lexibank.dataset": ["hubercolumbian=lexibank_hubercolumbian:Dataset"] - }, + entry_points={"lexibank.dataset": ["hubercolumbian=lexibank_hubercolumbian:Dataset"]}, install_requires=["pylexibank>=3.0"], extras_require={"test": ["pytest-cldf"]}, ) From 0eef3b4c762b5234e05a7a355972894a4f3e4334 Mon Sep 17 00:00:00 2001 From: chrzyki Date: Thu, 22 Apr 2021 09:41:03 +0200 Subject: [PATCH 2/4] Remove double CustomLanguage class, formatting --- lexibank_hubercolumbian.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/lexibank_hubercolumbian.py b/lexibank_hubercolumbian.py index bdf6a2d..94aacad 100644 --- a/lexibank_hubercolumbian.py +++ b/lexibank_hubercolumbian.py @@ -20,13 +20,6 @@ class CustomLanguage(Language): Name_in_Source = attr.ib(default=None) -@attr.s -class CustomLanguage(Language): - Longitude = attr.ib(default=None) - Latitude = attr.ib(default=None) - Name_in_Source = attr.ib(default=None) - - class Dataset(qlc.QLC): dir = Path(__file__).parent id = "hubercolumbian" @@ -36,22 +29,20 @@ class Dataset(qlc.QLC): def cmd_makecldf(self, args): # column "counterpart_doculect" gives us the proper names of the doculects - wl = lingpy.Wordlist( - (self.raw_dir / self.DSETS[0]).as_posix(), col="counterpart_doculect" - ) + wl = lingpy.Wordlist((self.raw_dir / self.DSETS[0]).as_posix(), col="counterpart_doculect") args.writer.add_sources() language_lookup = args.writer.add_languages(lookup_factory="Name_in_Source") - + concept_lookup = {} for concept in self.conceptlists[0].concepts.values(): - idx = concept.number+"_"+slug(concept.english) + idx = concept.number + "_" + slug(concept.english) args.writer.add_concept( - ID=idx, - Name=concept.english, - Concepticon_ID=concept.concepticon_id, - Concepticon_Gloss=concept.concepticon_gloss - ) + ID=idx, + Name=concept.english, + Concepticon_ID=concept.concepticon_id, + Concepticon_Gloss=concept.concepticon_gloss, + ) for lg in concept.attributes["lexibank_gloss"]: concept_lookup[lg] = idx From a4579a9ee99a9ba1bec92a5e82eaa35daeb221c5 Mon Sep 17 00:00:00 2001 From: chrzyki Date: Thu, 22 Apr 2021 09:57:13 +0200 Subject: [PATCH 3/4] Switch to GitHub CI, exclude 'etc' forms, re-generate CLDF, update tests, closes #9, closes #8 --- .travis.yml | 6 ---- README.md | 6 ++-- TRANSCRIPTION.md | 6 ++-- cldf/.transcription-report.json | 18 +++++------ cldf/cldf-metadata.json | 16 +++++----- cldf/forms.csv | 7 ++--- cldf/requirements.txt | 54 +++++++++++++++++---------------- lexibank_hubercolumbian.py | 24 +++++++++------ test.py | 2 +- 9 files changed, 69 insertions(+), 70 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 1bcf843..0000000 --- a/.travis.yml +++ /dev/null @@ -1,6 +0,0 @@ -language: python -python: "3.6" -cache: pip -before_cache: rm -f $HOME/.cache/pip/log/debug.log -install: pip install pytest-cldf -script: pytest --cldf-metadata=cldf/cldf-metadata.json test.py diff --git a/README.md b/README.md index 41bdf33..1523fff 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ This dataset comprises 69 language varieties spoken in Columbia. The orthography ## Statistics -[![Build Status](https://travis-ci.org/lexibank/hubercolumbian.svg?branch=master)](https://travis-ci.org/lexibank/hubercolumbian) +[![CLDF validation](https://github.com/lexibank/hubercolumbian/workflows/CLDF-validation/badge.svg)](https://github.com/lexibank/hubercolumbian/actions?query=workflow%3ACLDF-validation) ![Glottolog: 100%](https://img.shields.io/badge/Glottolog-100%25-brightgreen.svg "Glottolog: 100%") ![Concepticon: 96%](https://img.shields.io/badge/Concepticon-96%25-green.svg "Concepticon: 96%") ![Source: 100%](https://img.shields.io/badge/Source-100%25-brightgreen.svg "Source: 100%") @@ -29,11 +29,11 @@ This dataset comprises 69 language varieties spoken in Columbia. The orthography - **Varieties:** 69 - **Concepts:** 366 -- **Lexemes:** 26,726 +- **Lexemes:** 26,723 - **Sources:** 1 - **Synonymy:** 1.18 - **Invalid lexemes:** 0 -- **Tokens:** 158,498 +- **Tokens:** 158,489 - **Segments:** 109 (0 BIPA errors, 0 CTLS sound class errors, 109 CLTS modified) - **Inventory size (avg):** 32.99 diff --git a/TRANSCRIPTION.md b/TRANSCRIPTION.md index 2a96bda..44df74a 100644 --- a/TRANSCRIPTION.md +++ b/TRANSCRIPTION.md @@ -7,7 +7,7 @@ |:----------|-------------:|:-------|:------------------| | a | 22623 | ✓ | ✓ | | i | 13016 | ✓ | ✓ | -| e | 9202 | ✓ | ✓ | +| e | 9199 | ✓ | ✓ | | o | 8635 | ✓ | ✓ | | k | 7698 | ✓ | ✓ | | u | 7178 | ✓ | ✓ | @@ -15,7 +15,7 @@ | p | 5331 | ✓ | ✓ | | b | 5223 | ✓ | ✓ | | h | 5133 | ✓ | ✓ | -| t | 5110 | ✓ | ✓ | +| t | 5107 | ✓ | ✓ | | ɨ | 5039 | ✓ | ✓ | | r | 5029 | ✓ | ✓ | | m | 3862 | ✓ | ✓ | @@ -64,7 +64,7 @@ | θ | 99 | ✓ | ✓ | | tʃʰ | 98 | ✓ | ✓ | | lʲ | 88 | ✓ | ✓ | -| c | 87 | ✓ | ✓ | +| c | 84 | ✓ | ✓ | | ɓ | 81 | ✓ | ✓ | | ɛ | 78 | ✓ | ✓ | | nʲ | 63 | ✓ | ✓ | diff --git a/cldf/.transcription-report.json b/cldf/.transcription-report.json index 7840a4f..d38758d 100644 --- a/cldf/.transcription-report.json +++ b/cldf/.transcription-report.json @@ -9213,9 +9213,9 @@ "a": 670, "a\u02d0": 68, "b": 183, - "c": 7, + "c": 5, "d": 10, - "e": 266, + "e": 264, "e\u02d0": 40, "f": 1, "h": 163, @@ -9232,7 +9232,7 @@ "p": 235, "r": 109, "s": 43, - "t": 252, + "t": 250, "ts": 79, "t\u0283": 1, "t\u02b0": 11, @@ -12894,9 +12894,9 @@ "+": 45, "a": 309, "a\u02d0": 9, - "c": 2, + "c": 1, "d\u0292": 92, - "e": 117, + "e": 116, "e\u02d0": 7, "h": 131, "i": 137, @@ -12911,7 +12911,7 @@ "p\u02b0": 1, "r": 71, "s": 80, - "t": 7, + "t": 6, "t\u02b0": 66, "t\u02b7\u02b0": 1, "u": 76, @@ -25807,11 +25807,11 @@ "a\u02d0": 625, "b": 5223, "b\u02b0": 1, - "c": 87, + "c": 84, "d": 3610, "d\u0292": 426, "d\u02b2": 62, - "e": 9202, + "e": 9199, "ei": 26, "e\u02d0": 218, "f": 40, @@ -25847,7 +25847,7 @@ "r\u02b2": 7, "s": 3080, "s\u02b0": 1, - "t": 5110, + "t": 5107, "ts": 628, "ts\u02b0": 185, "t\u0255": 126, diff --git a/cldf/cldf-metadata.json b/cldf/cldf-metadata.json index 53b18e6..c99c417 100644 --- a/cldf/cldf-metadata.json +++ b/cldf/cldf-metadata.json @@ -18,31 +18,31 @@ "rdf:type": "prov:Entity", "dc:title": "Repository", "rdf:about": "https://github.com/lexibank/hubercolumbian", - "dc:created": "v3.0-3-gec80b7d" + "dc:created": "v3.0-7-g0eef3b4" }, { "rdf:type": "prov:Entity", "dc:title": "Glottolog", - "rdf:about": "https://github.com/glottolog/glottolog/", - "dc:created": "v4.3" + "rdf:about": "https://github.com/glottolog/glottolog", + "dc:created": "v4.3-treedb-fixes" }, { "rdf:type": "prov:Entity", "dc:title": "Concepticon", "rdf:about": "https://github.com/concepticon/concepticon-data", - "dc:created": "v2.4.0-rc.3" + "dc:created": "v2.4.0-113-gcbfa52b" }, { "rdf:type": "prov:Entity", "dc:title": "CLTS", - "rdf:about": "https://github.com/cldf-clts/clts/", - "dc:created": "v2.0.0" + "rdf:about": "https://github.com/cldf-clts/clts", + "dc:created": "v2.1.0" } ], "prov:wasGeneratedBy": [ { "dc:title": "python", - "dc:description": "3.9.2" + "dc:description": "3.8.5" }, { "dc:title": "python-packages", @@ -57,7 +57,7 @@ "tables": [ { "dc:conformsTo": "http://cldf.clld.org/v1.0/terms.rdf#FormTable", - "dc:extent": 26726, + "dc:extent": 26723, "tableSchema": { "columns": [ { diff --git a/cldf/forms.csv b/cldf/forms.csv index 9ab615c..a61eb4e 100644 --- a/cldf/forms.csv +++ b/cldf/forms.csv @@ -3876,7 +3876,6 @@ Guahibo-143_louse-1,huber1992/guahibo/PIOJO_LOUSE,Guahibo,143_louse,tarí̵to,ta Guahibo-144_mosquito-1,huber1992/guahibo/ZANCUDO_MOSQUITO,Guahibo,144_mosquito,wáesoto,wáesoto,w a e s o t o,,Huber1992,,,^ w á e s o t o $,default Guahibo-144_mosquito-2,huber1992/guahibo/ZANCUDO_MOSQUITO,Guahibo,144_mosquito,wáeseto,wáeseto,w a e s e t o,,Huber1992,,,^ w á e s e t o $,default Guahibo-145_termite-1,"huber1992/guahibo/COMEJÉN, HORMIGA BLANCA_TERMITE",Guahibo,145_termite,oɸóto,oɸóto,o ɸ o t o,,Huber1992,,,^ o ɸ ó t o $,default -Guahibo-145_termite-2,"huber1992/guahibo/COMEJÉN, HORMIGA BLANCA_TERMITE",Guahibo,145_termite,etc,etc,e t c,,Huber1992,,,^ e t c $,default Guahibo-146_ant-1,huber1992/guahibo/HORMIGA_ANT,Guahibo,146_ant,amáito,amáito,a m a i t o,,Huber1992,,,^ a m á i t o $,default Guahibo-146_ant-2,huber1992/guahibo/HORMIGA_ANT,Guahibo,146_ant,pí̵bi̵to,pí̵bi̵to,p ɨ b ɨ t o,,Huber1992,,,^ p í̵ b i̵ t o $,default Guahibo-147_spider-1,huber1992/guahibo/ARAÑA_SPIDER,Guahibo,147_spider,hómobi̵to,hómobi̵to,h o m o b ɨ t o,,Huber1992,,,^ h ó m o b i̵ t o $,default @@ -4131,8 +4130,7 @@ Guahibo-328_dark-3,huber1992/guahibo/OSCURO_DARK,Guahibo,328_dark,kírihai,kíri Guahibo-329_cut-1,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,ukúta,ukúta,u k u t a,,Huber1992,,,^ u k ú t a $,default Guahibo-329_cut-2,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,nikáta,nikáta,n i k a t a,,Huber1992,,,^ n i k á t a $,default Guahibo-329_cut-3,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,kaxiúta,kaxiúta,k a x i u t a,,Huber1992,,,^ k a x i ú t a $,default -Guahibo-329_cut-4,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,etc,etc,e t c,,Huber1992,,,^ e t c $,default -Guahibo-329_cut-5,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,thá:ba,thá:ba,t h aː b a,,Huber1992,,,^ t h á: b a $,default +Guahibo-329_cut-4,huber1992/guahibo/CORTAR_CUT,Guahibo,329_cut,thá:ba,thá:ba,t h aː b a,,Huber1992,,,^ t h á: b a $,default Guahibo-330_wide-1,huber1992/guahibo/ANCHO_WIDE,Guahibo,330_wide,ajáiitabara,ajáiitabara,a j a iː t a b a r a,,Huber1992,,,^ a j á ii t a b a r a $,default Guahibo-331_narrow-1,huber1992/guahibo/ESTRECHO_NARROW,Guahibo,331_narrow,tsikiriháwajo:,tsikiriháwajo:,ts i k i r i h a w a j oː,,Huber1992,,,^ ts i k i r i h á w a j o: $,default Guahibo-332_far-1,huber1992/guahibo/LEJOS_FAR,Guahibo,332_far,tá:hi̵,tá:hi̵,t aː h ɨ,,Huber1992,,,^ t á: h i̵ $,default @@ -13403,8 +13401,7 @@ Koreguaje-144_mosquito-1,huber1992/koreguaje/ZANCUDO_MOSQUITO,Koreguaje,144_mosq Koreguaje-145_termite-1,"huber1992/koreguaje/COMEJÉN, HORMIGA BLANCA_TERMITE",Koreguaje,145_termite,ʤuʤu,ʤuʤu,dʒ u dʒ u,,Huber1992,,,^ ʤ u ʤ u $,default Koreguaje-146_ant-1,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,pẽa,pẽa,p ẽ a,,Huber1992,,,^ p ẽ a $,default Koreguaje-146_ant-2,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,emu,emu,e m u,,Huber1992,,,^ e m u $,default -Koreguaje-146_ant-3,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,etc,etc,e t c,,Huber1992,,,^ e t c $,default -Koreguaje-146_ant-4,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,mekʰa,mekʰa,m e kʰ a,,Huber1992,,,^ m e kʰ a $,default +Koreguaje-146_ant-3,huber1992/koreguaje/HORMIGA_ANT,Koreguaje,146_ant,mekʰa,mekʰa,m e kʰ a,,Huber1992,,,^ m e kʰ a $,default Koreguaje-147_spider-1,huber1992/koreguaje/ARAÑA_SPIDER,Koreguaje,147_spider,hĩ̵hi̵,hĩ̵hi̵,h ɨ̃ h ɨ,,Huber1992,,,^ h ĩ̵ h i̵ $,default Koreguaje-148_chiggoejiggerflea-1,"huber1992/koreguaje/NIGUA, PIGUE_CHIGGOE, JIGGER FLEA",Koreguaje,148_chiggoejiggerflea,suʔu,suʔu,s u ʔ u,,Huber1992,,,^ s u ʔ u $,default Koreguaje-149_bush-1,huber1992/koreguaje/MONTE_BUSH,Koreguaje,149_bush,airo,airo,a i r o,,Huber1992,,,^ a i r o $,default diff --git a/cldf/requirements.txt b/cldf/requirements.txt index 04effe2..8ed77c1 100644 --- a/cldf/requirements.txt +++ b/cldf/requirements.txt @@ -1,49 +1,51 @@ -appdirs==1.4.4 +appdirs==1.4.3 bs4==0.0.1 -certifi==2020.12.5 -chardet==4.0.0 -cldfbench==1.5.0 +certifi==2019.11.28 +chardet==3.0.4 +cldfbench==1.6.0 cldfcatalog==1.3.0 -clldutils==3.7.0 -colorlog==4.8.0 +clldutils==3.8.0 +colorama==0.4.3 +colorlog==5.0.1 csvw==1.10.1 decorator==4.4.2 -gitdb==4.0.5 +gitdb==4.0.7 greenlet==1.0.0 -idna==2.10 +html5lib==1.0.1 +idna==2.8 iniconfig==1.1.1 isodate==0.6.0 lingpy==2.6.7 -lxml==4.6.3 Markdown==3.3.4 -networkx==2.5 -newick==1.0.0 -numpy==1.20.1 +networkx==2.5.1 +newick==1.1.0 +numpy==1.20.2 openpyxl==3.0.7 +packaging==20.3 pluggy==0.13.1 purl==1.5 py==1.10.0 pybtex==0.24.0 -pycldf==1.18.1 -pyclts==3.0.2 +pycldf==1.19.0 +pyclts==3.1.1 pyconcepticon==2.7.0 pycountry==20.7.3 -pyglottolog==3.3.0 -pylexibank==3.0.0 -pytest==6.2.2 -regex==2021.3.17 -requests==2.25.1 +pyglottolog==3.4.1 +pylexibank==3.1.0 +pytest==6.2.3 +regex==2021.4.4 +requests==2.22.0 rfc3986==1.4.0 segments==2.2.0 -six==1.15.0 -smmap==3.0.5 +six==1.14.0 +smmap==4.0.0 soupsieve==2.2.1 -SQLAlchemy==1.4.2 +SQLAlchemy==1.4.11 tabulate==0.8.9 termcolor==1.1.0 -tqdm==4.59.0 +tqdm==4.60.0 uritemplate==3.0.1 -urllib3==1.26.4 -wcwidth==0.2.5 +urllib3==1.25.8 +webencodings==0.5.1 xlrd==2.0.1 -zenodoclient==0.3.0 \ No newline at end of file +zenodoclient==0.4.1 \ No newline at end of file diff --git a/lexibank_hubercolumbian.py b/lexibank_hubercolumbian.py index 94aacad..547e88d 100644 --- a/lexibank_hubercolumbian.py +++ b/lexibank_hubercolumbian.py @@ -3,10 +3,13 @@ import attr import lingpy from clldutils.misc import slug -from pylexibank import Concept, Language +from pylexibank import Concept, Language, FormSpec from pylexibank.providers import qlc +exclude = ["etc"] + + @attr.s class CustomConcept(Concept): Spanish = attr.ib(default=None) @@ -55,11 +58,14 @@ def cmd_makecldf(self, args): ] for doculect, concept, value, qlcid in rows: - args.writer.add_form( - Language_ID=language_lookup[doculect], - Parameter_ID=concept_lookup[concept], - Value=value, - Form=value, - Source=["Huber1992"], - Local_ID=qlcid, - ) + if value in exclude: + continue + else: + args.writer.add_form( + Language_ID=language_lookup[doculect], + Parameter_ID=concept_lookup[concept], + Value=value, + Form=value, + Source=["Huber1992"], + Local_ID=qlcid, + ) diff --git a/test.py b/test.py index f314885..db3bc31 100644 --- a/test.py +++ b/test.py @@ -3,7 +3,7 @@ def test_valid(cldf_dataset, cldf_logger): def test_forms(cldf_dataset): - assert len(list(cldf_dataset["FormTable"])) == 26726 + assert len(list(cldf_dataset["FormTable"])) == 26723 assert any(f["Form"] == "léemani jú" for f in cldf_dataset["FormTable"]) From 240763dd91b8742e381579028986e51acf593d69 Mon Sep 17 00:00:00 2001 From: chrzyki Date: Thu, 22 Apr 2021 10:00:16 +0200 Subject: [PATCH 4/4] Remove unused FormSpec --- cldf/cldf-metadata.json | 2 +- lexibank_hubercolumbian.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cldf/cldf-metadata.json b/cldf/cldf-metadata.json index c99c417..fdf5de3 100644 --- a/cldf/cldf-metadata.json +++ b/cldf/cldf-metadata.json @@ -18,7 +18,7 @@ "rdf:type": "prov:Entity", "dc:title": "Repository", "rdf:about": "https://github.com/lexibank/hubercolumbian", - "dc:created": "v3.0-7-g0eef3b4" + "dc:created": "v3.0-8-ga4579a9" }, { "rdf:type": "prov:Entity", diff --git a/lexibank_hubercolumbian.py b/lexibank_hubercolumbian.py index 547e88d..b225176 100644 --- a/lexibank_hubercolumbian.py +++ b/lexibank_hubercolumbian.py @@ -3,7 +3,7 @@ import attr import lingpy from clldutils.misc import slug -from pylexibank import Concept, Language, FormSpec +from pylexibank import Concept, Language from pylexibank.providers import qlc