From b2f1f50541befe314ec42e8d43bc0b156fc31868 Mon Sep 17 00:00:00 2001 From: Franck Michel Date: Fri, 29 Jul 2022 18:52:24 +0200 Subject: [PATCH] Gen version 15.2 --- CHANGELOG.md | 11 +- README.md | 11 +- .../Taxrefld_example_interactions.ttl | 4 +- src/add_dwc_ranks.py | 123 ++++++++++++++++++ src/add_dwc_ranks.sparql | 84 ------------ src/env.sh | 2 +- src/virtuoso/import-taxrefld.sh | 9 +- src/xr2rml/xr2rml_externalIds_dbxref_tpl.ttl | 14 +- 8 files changed, 149 insertions(+), 109 deletions(-) create mode 100644 src/add_dwc_ranks.py delete mode 100644 src/add_dwc_ranks.sparql diff --git a/CHANGELOG.md b/CHANGELOG.md index ae48e47..785298e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,16 @@ # TAXREF-LD changelog -## [15.1] 2022-01-19 - TAXREF v15 +## [15.2] 2022-07-29 + +### Changed +- Fixed issues in the ontology (owl:AnnotationProperties instead of owl:ObjectProperties) +- Update of dataset metadata (fixed issues, added dcat:Distribution, better named graphs description...) +- Regenerate taxonomy (taxa and names) to fix an issue with missing taxa (detected with Agroportal) +- Reorganize data dump as a zip file with subfolders rather than a tar of zip files + + +## [15.1] 2022-01-19 ### Added - Development stages and sex as part of the species interactions, e.g.: species A in larva stage feeds on B. diff --git a/README.md b/README.md index 3fa4577..81ac29f 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,7 @@ Previous versions are still aviable on this Github repo: | Version | Download link | | ---- | ---- | +| 15.2 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.6940891.svg)](https://doi.org/10.5281/zenodo.6940891) | | 15.1 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5876775.svg)](https://doi.org/10.5281/zenodo.5876775) | | 13.0 | https://github.com/frmichel/taxref-ld/tree/13.0/dataset | | 12.0 | https://github.com/frmichel/taxref-ld/tree/12.0/dataset | @@ -69,20 +70,20 @@ The following **named graphs** can be queried from our SPARQL endpoint: | Named graph | Description | No. RDF triples | | ------------- | ---- | ----: | -| `http://taxref.mnhn.fr/lod/graph/metadata` | DCAT, VOID and SPARQL SD dataset descriptions + definition of various classes, concepts, properties (content of files `dataset/Taxrefld_static*.ttl`) |1,469| +| `http://taxref.mnhn.fr/lod/graph/metadata` | DCAT, VOID and SPARQL SD dataset descriptions + definition of various classes, concepts, properties (content of files `dataset/Taxrefld_static*.ttl`) |1,740| | `http://taxref.mnhn.fr/lod/graph/biblio` | bibliographic resources |408,737| -| `http://taxref.mnhn.fr/lod/graph/locations` | regions, departements, territories etc. |320,599| +| `http://taxref.mnhn.fr/lod/graph/locations` | regions, departements, territories etc. |393,496| | `http://taxref.mnhn.fr/lod/graph/media` | media (photos) linked to taxa |690,508| | `http://taxref.mnhn.fr/lod/graph/statuscodes` | description of the status values of types international convention, european directive, protection and regulation. These are represented as instances of the class bibo:DocumentPart (e.g. http://taxref.mnhn.fr/lod/status/BONN/IBOAC) and related to the bibliographic source describing the document with property dct:isPartOf (content of files `statusCodes.ttl` and `statusBiblio.ttl`) |1,804| -| `http://taxref.mnhn.fr/lod/graph/classes/{TAXREF version}` | description of taxa as OWL classes |4,300,619| -| `http://taxref.mnhn.fr/lod/graph/concepts/{TAXREF version}` | description of scientific names as SKOS concepts |7,739,313| +| `http://taxref.mnhn.fr/lod/graph/classes/{TAXREF version}` | description of taxa as OWL classes |4,374,167| +| `http://taxref.mnhn.fr/lod/graph/concepts/{TAXREF version}` | description of scientific names as SKOS concepts |7,799,394| | `http://taxref.mnhn.fr/lod/graph/interactions/{TAXREF version}` | species interactions |303,025| | `http://taxref.mnhn.fr/lod/graph/statuses/{TAXREF version}` | all taxa statuses (legal, biogeographical, red list) |7,846,358| | `http://taxref.mnhn.fr/lod/graph/vernacular/{TAXREF version}` | taxa vernacular names (direct and as SKOS-XL labels) |518,708| | `http://taxref.mnhn.fr/lod/graph/dbxref/{TAXREF version}` | cross-references to identifiers of third-party data sources such as GBIF, WoRMS, the Plant List etc. |10,330,904| | `http://taxref.mnhn.fr/lod/graph/webpages/{TAXREF version}` | `foaf:page` links to webpages |2,567,841| | `http://taxref.mnhn.fr/lod/graph/links-*/{TAXREF version}` | interllinking to equivalent URIs from NCBI, Agrovoc, WoRMS |250,249| -| Total | | 35,280,107 | +| Total | | 35,486,931 | ## License diff --git a/dataset/examples/Taxrefld_example_interactions.ttl b/dataset/examples/Taxrefld_example_interactions.ttl index d51f9e6..f9d04cb 100644 --- a/dataset/examples/Taxrefld_example_interactions.ttl +++ b/dataset/examples/Taxrefld_example_interactions.ttl @@ -31,11 +31,11 @@ # ================================== Interaction between two species ===================================== -# Direct link to the vernacular name (here: pollinates) +# Direct link (here: pollinates) ro:RO_0002455 . -# Reified link to the vernacular name (adds location and bibliographic reference) +# Reified link (adds location and bibliographic reference) a , rdf:Statement ; rdfs:label "pollinates (statement)" ; diff --git a/src/add_dwc_ranks.py b/src/add_dwc_ranks.py new file mode 100644 index 0000000..76d6e9f --- /dev/null +++ b/src/add_dwc_ranks.py @@ -0,0 +1,123 @@ +#!/bin/python3 + +import requests +import urllib.parse + + +endpoint = "https://taxref.mnhn.fr/sparql?query=" +headers = { 'accept' : 'text/turtle' } + +def run_query(query, outputfile): + output = requests.get(endpoint + urllib.parse.quote(query), headers = headers) + with open(outputfile, "w") as f: + f.write(output.text) + return + +prefixes = ''' + prefix rdfs: + prefix owl: + prefix dwc: + prefix taxref: + prefix taxrefprop: + prefix taxrefrk: +''' + + +# Add dwc:subgenus +query = prefixes + ''' + construct { ?s dwc:subgenus ?tLabel. } + where { + graph { + ?t a owl:Class; + taxrefprop:hasRank taxrefrk:SubGenus; + rdfs:label ?tLabel. + ?s rdfs:subClassOf+ ?t. + } + } +''' +run_query(query, "dwc_subgenus.ttl") + + +# Add dwc:genus +query = prefixes + ''' + construct { ?s dwc:genus ?tLabel. } + where { + graph { + ?t a owl:Class; + taxrefprop:hasRank taxrefrk:Genus; + rdfs:label ?tLabel. + ?s rdfs:subClassOf+ ?t. + } + }''' +run_query(query, "dwc_genus.ttl") + + +# Add dwc:subfamily +query = prefixes + ''' + construct { ?s dwc:subfamily ?tLabel. } + where { + graph { + ?t a owl:Class; + taxrefprop:hasRank taxrefrk:SubFamily; + rdfs:label ?tLabel. + ?s rdfs:subClassOf+ ?t. + } + }''' +run_query(query, "dwc_subfamily.ttl") + + +# Add dwc:family +query = prefixes + ''' + construct { ?s dwc:family ?tLabel. } + where { + graph { + ?t a owl:Class; + taxrefprop:hasRank taxrefrk:Family; + rdfs:label ?tLabel. + ?s rdfs:subClassOf+ ?t. + } + }''' +run_query(query, "dwc_family.ttl") + + +# Add dwc:order +query = prefixes + ''' + construct { ?s dwc:order ?tLabel. } + where { + graph { + ?t a owl:Class; + taxrefprop:hasRank taxrefrk:Order; + rdfs:label ?tLabel. + ?s rdfs:subClassOf+ ?t. + } + }''' +run_query(query, "dwc_order.ttl") + + +# Add dwc:phylum +query = prefixes + ''' + construct { ?s dwc:phylum ?tLabel. } + where { + graph { + ?t a owl:Class; + taxrefprop:hasRank taxrefrk:Phylum; + rdfs:label ?tLabel. + ?s rdfs:subClassOf+ ?t. + } + }''' +run_query(query, "dwc_phylum.ttl") + + +# Add dwc:kingdom +query = prefixes + ''' + construct { ?s dwc:kingdom ?tLabel. } + where { + graph { + ?t a owl:Class; + taxrefprop:hasRank taxrefrk:Kingdom; + rdfs:label ?tLabel. + ?s rdfs:subClassOf+ ?t. + } + }''' +run_query(query, "dwc_kingdom.ttl") + diff --git a/src/add_dwc_ranks.sparql b/src/add_dwc_ranks.sparql deleted file mode 100644 index ad7e0ea..0000000 --- a/src/add_dwc_ranks.sparql +++ /dev/null @@ -1,84 +0,0 @@ -prefix rdfs: -prefix owl: -prefix dwc: -prefix taxref: -prefix taxrefprop: -prefix taxrefrk: - -# Add dwc:subgenus -construct { ?s dwc:subgenus ?tLabel. } -where { - graph { - ?t a owl:Class; - taxrefprop:hasRank taxrefrk:SubGenus; - rdfs:label ?tLabel. - ?s rdfs:subClassOf+ ?t. - } -} - -# Add dwc:genus -construct { ?s dwc:genus ?tLabel. } -where { - graph { - ?t a owl:Class; - taxrefprop:hasRank taxrefrk:Genus; - rdfs:label ?tLabel. - ?s rdfs:subClassOf+ ?t. - } -} - -# Add dwc:subfamily -construct { ?s dwc:subfamily ?tLabel. } -where { - graph { - ?t a owl:Class; - taxrefprop:hasRank taxrefrk:SubFamily; - rdfs:label ?tLabel. - ?s rdfs:subClassOf+ ?t. - } -} - -# Add dwc:family -construct { ?s dwc:family ?tLabel. } -where { - graph { - ?t a owl:Class; - taxrefprop:hasRank taxrefrk:Family; - rdfs:label ?tLabel. - ?s rdfs:subClassOf+ ?t. - } -} - -# Add dwc:order -construct { ?s dwc:order ?tLabel. } -where { - graph { - ?t a owl:Class; - taxrefprop:hasRank taxrefrk:Order; - rdfs:label ?tLabel. - ?s rdfs:subClassOf+ ?t. - } -} - -# Add dwc:phylum -construct { ?s dwc:phylum ?tLabel. } -where { - graph { - ?t a owl:Class; - taxrefprop:hasRank taxrefrk:Phylum; - rdfs:label ?tLabel. - ?s rdfs:subClassOf+ ?t. - } -} - - -# Add dwc:kingdom -construct { ?s dwc:kingdom ?tLabel. } -where { - graph { - ?t a owl:Class; - taxrefprop:hasRank taxrefrk:Kingdom; - rdfs:label ?tLabel. - ?s rdfs:subClassOf+ ?t. - } -} diff --git a/src/env.sh b/src/env.sh index 7ba45ca..1982f3e 100644 --- a/src/env.sh +++ b/src/env.sh @@ -3,7 +3,7 @@ # Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) export DATASET_VERSION=15.0 -export DATASET_DATE=2022-01-13 +export DATASET_DATE=2022-07-29 # MongoDB database export DB=taxrefv15 diff --git a/src/virtuoso/import-taxrefld.sh b/src/virtuoso/import-taxrefld.sh index 1eb971a..a41b1b0 100644 --- a/src/virtuoso/import-taxrefld.sh +++ b/src/virtuoso/import-taxrefld.sh @@ -33,10 +33,6 @@ graph="http://taxref.mnhn.fr/lod/graph/concepts" graph="http://taxref.mnhn.fr/lod/graph/classes/$DATASET_VERSION" ./virtuoso-import.sh --cleargraph --path $DATA_DIR --graph $graph taxonomy_classes.ttl -# After the previous one has been imported, use add_dwc_ranks.sparql to generate files dwc_*.ttl -graph="http://taxref.mnhn.fr/lod/graph/classes/$DATASET_VERSION" -./virtuoso-import.sh --path $DATA_DIR --graph $graph dwc_%.ttl - graph="http://taxref.mnhn.fr/lod/graph/classes/$DATASET_VERSION" ./virtuoso-import.sh --path $DATA_DIR --graph $graph dwc_%.ttl @@ -62,6 +58,11 @@ graph="http://taxref.mnhn.fr/lod/graph/links-worms" ./virtuoso-import.sh --cleargraph --path $DATA_DIR --graph $graph externalIds_worms.ttl +# After the taxonomy is loaded, use add_dwc_ranks.py to generate files dwc_*.ttl +graph="http://taxref.mnhn.fr/lod/graph/classes/$DATASET_VERSION" +./virtuoso-import.sh --path .. --graph $graph dwc_%.ttl + + # Calculated links graph="http://taxref.mnhn.fr/lod/graph/links-agrovoc" ./virtuoso-import.sh --cleargraph --path $DATA_DIR --graph $graph links-agrovoc.nt diff --git a/src/xr2rml/xr2rml_externalIds_dbxref_tpl.ttl b/src/xr2rml/xr2rml_externalIds_dbxref_tpl.ttl index f1e18eb..5976fef 100644 --- a/src/xr2rml/xr2rml_externalIds_dbxref_tpl.ttl +++ b/src/xr2rml/xr2rml_externalIds_dbxref_tpl.ttl @@ -55,12 +55,7 @@ rr:predicateObjectMap [ rr:predicate {{WDTPROP}}; rr:objectMap [ xrr:reference "$.externalId"; xsd:datatype xsd:string ]; - ]. - -<#TM_Taxon_XRef> - a rr:TriplesMap; - xrr:logicalSource [ xrr:query """db.externalIds.find({externalDbName: "{{EXTDBNAME}}"}, $where: 'this.taxrefId == this.taxonReferenceId'})""" ]; - rr:subjectMap <#SM_Taxon>; + ]; rr:predicateObjectMap [ rr:predicate schema:identifier; rr:objectMap [ rr:template "{$.taxrefId}{$.externalDbName}{$.externalId}"; rr:termType rr:BlankNode ]; @@ -76,12 +71,7 @@ rr:predicateObjectMap [ rr:predicate {{WDTPROP}}; rr:objectMap [ xrr:reference "$.externalId"; xsd:datatype xsd:string ]; - ]. - -<#TM_Name_XRef> - a rr:TriplesMap; - xrr:logicalSource [ xrr:query """db.externalIds.find({externalDbName: "{{EXTDBNAME}}"})""" ]; - rr:subjectMap <#SM_Name>; + ]; rr:predicateObjectMap [ rr:predicate schema:identifier; rr:objectMap [ rr:template "{$.taxrefId}{$.externalDbName}{$.externalId}"; rr:termType rr:BlankNode ];