From 51a2845878775ea8ad51c6d392aec9c1ca4f13df Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Mon, 26 Feb 2024 19:30:14 -0500 Subject: [PATCH] add codespell workflow, config and fix some typos (#127) * Add github action to codespell main on push and PRs * Add rudimentary codespell config * run codespell throughout but ignore fail -- committing manually since example outputs are ignored for git * [DATALAD RUNCMD] Do interactive fixing of typos === Do not change lines below === { "chain": [], "cmd": "codespell -w -i 3 -C 2", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- .github/workflows/codespell.yml | 23 +++++++++++++++++++ docs/datamodel/types/Objectidentifier.md | 2 +- docs/intro/export.md | 2 +- docs/intro/mixed-sheets.md | 2 +- .../output/docs/types/Objectidentifier.md | 2 +- examples/output/jsonld/combined.jsonld | 2 +- pyproject.toml | 8 ++++++- schemasheets/schemamaker.py | 4 ++-- tests/input/mixs6_core_test.tsv | 22 +++++++++--------- tests/input/rda-crosswalk.tsv | 2 +- 10 files changed, 49 insertions(+), 20 deletions(-) create mode 100644 .github/workflows/codespell.yml diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 0000000..dd0eb8e --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,23 @@ +# Codespell configuration is within pyproject.toml +--- +name: Codespell + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/docs/datamodel/types/Objectidentifier.md b/docs/datamodel/types/Objectidentifier.md index 8d80546..454ba6b 100644 --- a/docs/datamodel/types/Objectidentifier.md +++ b/docs/datamodel/types/Objectidentifier.md @@ -15,5 +15,5 @@ URI: [linkml:Objectidentifier](https://w3id.org/linkml/Objectidentifier) | | | | | --- | --- | --- | -| **Comments:** | | Used for inheritence and type checking | +| **Comments:** | | Used for inheritance and type checking | diff --git a/docs/intro/export.md b/docs/intro/export.md index 7e53ca7..6b1f32d 100644 --- a/docs/intro/export.md +++ b/docs/intro/export.md @@ -84,7 +84,7 @@ this guards against accidental overwrites. schemasheets allows *custom* sheet formats that map to the LinkML standard. -you can use the combination of sheets2linkml and linkml2sheets to convert betweeen two sheet specifications. +you can use the combination of sheets2linkml and linkml2sheets to convert between two sheet specifications. For example, let's say for schema1.tsv, you use a spreadsheet with the following headers: diff --git a/docs/intro/mixed-sheets.md b/docs/intro/mixed-sheets.md index c18c744..cdcef01 100644 --- a/docs/intro/mixed-sheets.md +++ b/docs/intro/mixed-sheets.md @@ -23,4 +23,4 @@ For example: |C|ForProfit|||||Organization||||||| |C|NonProfit|||||Organization|||Q163740|||foo| - * [personinfo with tyoes](https://docs.google.com/spreadsheets/d/1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ/edit#gid=509198484) + * [personinfo with types](https://docs.google.com/spreadsheets/d/1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ/edit#gid=509198484) diff --git a/examples/output/docs/types/Objectidentifier.md b/examples/output/docs/types/Objectidentifier.md index 8d80546..454ba6b 100644 --- a/examples/output/docs/types/Objectidentifier.md +++ b/examples/output/docs/types/Objectidentifier.md @@ -15,5 +15,5 @@ URI: [linkml:Objectidentifier](https://w3id.org/linkml/Objectidentifier) | | | | | --- | --- | --- | -| **Comments:** | | Used for inheritence and type checking | +| **Comments:** | | Used for inheritance and type checking | diff --git a/examples/output/jsonld/combined.jsonld b/examples/output/jsonld/combined.jsonld index 9e01f53..00f0b03 100644 --- a/examples/output/jsonld/combined.jsonld +++ b/examples/output/jsonld/combined.jsonld @@ -207,7 +207,7 @@ "definition_uri": "https://w3id.org/linkml/Objectidentifier", "description": "A URI or CURIE that represents an object in the model.", "comments": [ - "Used for inheritence and type checking" + "Used for inheritance and type checking" ], "from_schema": "https://w3id.org/linkml/types", "imported_from": "linkml:types", diff --git a/pyproject.toml b/pyproject.toml index b97e9bd..30fac63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,4 +33,10 @@ sheets2linkml = "schemasheets.schemamaker:convert" linkml2sheets = "schemasheets.schema_exporter:export_schema" sheets2project = "schemasheets.sheets_to_project:multigen" -linkml2schemasheets-template = 'schemasheets.generate_populate:cli' \ No newline at end of file +linkml2schemasheets-template = 'schemasheets.generate_populate:cli' +[tool.codespell] +# Ref: https://github.com/codespell-project/codespell#using-a-config-file +skip = '.git,*.lock' +check-hidden = true +ignore-regex = '\bOTU\b' +# ignore-words-list = '' diff --git a/schemasheets/schemamaker.py b/schemasheets/schemamaker.py index 112d4f6..8952096 100644 --- a/schemasheets/schemamaker.py +++ b/schemasheets/schemamaker.py @@ -217,7 +217,7 @@ def get_current_element(self, elt: Element) -> Union[Element, PermissibleValue]: """ sc = self.schema if isinstance(elt, SchemaDefinition): - # TODO: consider multiple shemas per sheet + # TODO: consider multiple schemas per sheet return sc elif isinstance(elt, PermissibleValue): return elt @@ -321,7 +321,7 @@ def check_excess(descriptors): for c in vmap[T_CLASS]: if self.use_attributes: # slots always belong to a class; - # no seperate top level slots + # no separate top level slots a = SlotDefinition(main_elt.name) c.attributes[main_elt.name] = a yield a diff --git a/tests/input/mixs6_core_test.tsv b/tests/input/mixs6_core_test.tsv index beed010..be11c8b 100644 --- a/tests/input/mixs6_core_test.tsv +++ b/tests/input/mixs6_core_test.tsv @@ -1,13 +1,13 @@ -Structured comment name Item (rdfs:label) Action carried out and logged by CIH in change log 21May Discussion review complete (X) status Editor/ reviewer Link to GH issue Definition Expected value Value syntax Example Section migs_eu migs_ba migs_pl migs_vi migs_org mims mimarks_s mimarks_c misag mimag miuvig Preferred unit Occurence Position MIXS ID MIGS ID (mapping to GOLD) +Structured comment name Item (rdfs:label) Action carried out and logged by CIH in change log 21May Discussion review complete (X) status Editor/ reviewer Link to GH issue Definition Expected value Value syntax Example Section migs_eu migs_ba migs_pl migs_vi migs_org mims mimarks_s mimarks_c misag mimag miuvig Preferred unit Occurrence Position MIXS ID MIGS ID (mapping to GOLD) > slot title comments comments comments ignore notes ignore see_also description ignore pattern ignore in_subset cardinality cardinality cardinality cardinality cardinality cardinality cardinality cardinality cardinality cardinality cardinality annotations multivalued annotations slot_uri exact_mapping > "prefix: ""review completed """ "prefix: ""reviewer: """ "applies_to_class: ""MIGS eukaryote""" "applies_to_class: ""MIGS bacteria""" "applies_to_class: ""MIGS plant""" "applies_to_class: ""MIGS virus""" "applies_to_class: ""MIGS org""" "applies_to_class: ""MIMS""" "applies_to_class: ""MIMARKS specimen""" "applies_to_class: ""MIMARKS survey""" "applies_to_class: ""MISAG""" "applies_to_class: ""MIMAG""" "applies_to_class: ""MIUVIG""" tag: preferred_unit vmap: {1: False, 2: True} curie_prefix: MIGS submitted_to_insdc submitted to insdc deprecated term remove X fine as is; this term is not used for submission, LS: suggest removing from MIxS core, NCBI - OK to remove CIH MIXS core https://github.com/GenomicsStandardsConsortium/mixs/issues/60 Depending on the study (large-scale e.g. done with next generation sequencing technology, or small-scale) sequences have to be submitted to SRA (Sequence Read Archive), DRA (DDBJ Read Archive) or via the classical Webin/Sequin systems to Genbank, ENA and DDBJ. Although this field is mandatory, it is meant as a self-test field, therefore it is not necessary to include this field in contextual data submitted to databases boolean {boolean} yes investigation M M M M M M M M M M M 1 1 MIXS:0000004 MIGS-1 investigation_type investigation type deprecated term remove X query definition; this term is not used for submission, LS: suggest removing from MIxS core; NCBI - OK to remove CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/60 Nucleic Acid Sequence Report is the root element of all MIGS/MIMS compliant reports as standardized by Genomic Standards Consortium. This field is either eukaryote,bacteria,virus,plasmid,organelle, metagenome,mimarks-survey, mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled genome, or uncultivated viral genome eukaryote, bacteria_archaea, plasmid, virus, organelle, metagenome,mimarks-survey, mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled genome, or uncultivated viral genomes [eukaryote|bacteria_archaea|plasmid|virus|organelle|metagenome|metatranscriptome|mimarks-survey|mimarks-specimen|misag|mimag|miuvig] metagenome investigation M M M M M M M M M M M 1 2 MIXS:0000007 MIGS-2 samp_name sample name updated description add to the core NEW core term - Approved in Sept CIG call LS MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/78 A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name. text {text} ISDsoil1 investigation M M M M M M M M M M M 1 MIXS:0001107 -samp_taxon_id Taxonomy ID of DNA sample suggested as part of +/-ive controls work, but would actully be an appropriate addition to all checklists so adding it to the core X #205 NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome’ for mock community/positive controls, or 'blank sample' for negative controls. Taxonomy ID {text} [NCBI:txid] Gut Metagenome [NCBI:txid749906] investigation M M M M M M M M M M M 1 MIXS:0001320 +samp_taxon_id Taxonomy ID of DNA sample suggested as part of +/-ive controls work, but would actually be an appropriate addition to all checklists so adding it to the core X #205 NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome’ for mock community/positive controls, or 'blank sample' for negative controls. Taxonomy ID {text} [NCBI:txid] Gut Metagenome [NCBI:txid749906] investigation M M M M M M M M M M M 1 MIXS:0001320 project_name project name no change made X fine as is CIH MIXS core (manditory) Name of the project within which the sequencing was organized {text} Forest soil metagenome investigation M M M M M M M M M M M 1 3 MIXS:0000092 MIGS-3 experimental_factor experimental factor no change made X fine as is CIH MIXS core (optional) Experimental factors are essentially the variable aspects of an experiment design which can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI). For a browser of EFO (v 2.95) terms, please see http://purl.bioontology.org/ontology/EFO; for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI text or EFO and/or OBI {termLabel} {[termID]}|{text} time series design [EFO:EFO_0001779] investigation X X X X X C C X C C C 1 4 MIXS:0000008 -lat_lon geographic location (latitude and longitude) no change made Decision: keep the name of the field as is. - Issue: 1 field, do we accept range and area; Issue 2: split into two fields - discussion: concensus is to keep it as one field; for NCBI keep as one combined field . Note: INSDC discussion, limit to 8 decimal points. X Should the definition be updated to include defined areas using closed linear ring notation? (A linear ring is a closed LineString with four or more positions). (CIH); Suggest renaming to : latitude and longitude [LS] CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/62 The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system decimal degrees, limit to 8 decimal points {float} {float} 50.586825 6.408977 environment M M M M M M M M M M M 1 5 MIXS:0000009 MIGS-4.1|MIGS-4.2 +lat_lon geographic location (latitude and longitude) no change made Decision: keep the name of the field as is. - Issue: 1 field, do we accept range and area; Issue 2: split into two fields - discussion: consensus is to keep it as one field; for NCBI keep as one combined field . Note: INSDC discussion, limit to 8 decimal points. X Should the definition be updated to include defined areas using closed linear ring notation? (A linear ring is a closed LineString with four or more positions). (CIH); Suggest renaming to : latitude and longitude [LS] CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/62 The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system decimal degrees, limit to 8 decimal points {float} {float} 50.586825 6.408977 environment M M M M M M M M M M M 1 5 MIXS:0000009 MIGS-4.1|MIGS-4.2 depth depth Updated Item name and definition CHANGE: Item name to: depth add to core, check for consistency of definitions - DONE X Needs updating CIH MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/63 The vertical distance below local surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples. measurement value {float} {unit} 10 meter environment E E E E E E E E E E E 1 6 MIXS:0000018 MIGS-4.3 alt altitude no change made X fine as is -but does it need to be in packages AS WELL? - Not needed in all packages [LS] CIH MIXS core (optional) Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air measurement value {float} {unit} 100 meter environment E E E E E E E E E E E 1 MIXS:0000094 MIGS-4.4 elev elevation no change made X fine as is -but does it need to be in packages AS WELL? CIH MIXS core (optional) Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit. measurement value {float} {unit} 100 meter environment E E E E E E E E E E E 1 7 MIXS:0000093 @@ -19,16 +19,16 @@ pos_cont_type positive control type added to core new term added as conditional env_broad_scale broad-scale environmental context updated definition add synonym to end of definition X to do; Synonym: 'biome' [LS] LS MIXS core (manditory) #106 Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO’s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS The major environment type(s) where the sample was collected. Recommend subclasses of biome [ENVO:00000428]. Multiple terms can be separated by one or more pipes. {termLabel} {[termID]} oceanic epipelagic zone biome [ENVO:01000033] for annotating a water sample from the photic zone in middle of the Atlantic Ocean environment M M M M M M M M M M M 1 10 MIXS:0000012 env_local_scale local environmental context updated definition add synonym to end of definition X to do; Synonym:'feature [LS], habitat LS MIXS core (manditory) #106 Report the entity or entities which are in the sample or specimen’s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS. Environmental entities having causal influences upon the entity at time of sampling. {termLabel} {[termID]} litter layer [ENVO:01000338]; Annotating a pooled sample taken from various vegetation layers in a forest consider: canopy [ENVO:00000047]|herb and fern layer [ENVO:01000337]|litter layer [ENVO:01000338]|understory [01000335]|shrub layer [ENVO:01000336]. environment M M M M M M M M M M M 1 11 MIXS:0000013 MIGS-6 (habitat) env_medium environmental medium updated definition add synonym to end of definition X to do; Synonym: material [LS] LS MIXS core (manditory) #117 & #196 Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top). The material displaced by the entity at time of sampling. Recommend subclasses of environmental material [ENVO:00010483]. {termLabel} {[termID]} soil [ENVO:00001998]; Annotating a fish swimming in the upper 100 m of the Atlantic Ocean, consider: ocean water [ENVO:00002151]. Example: Annotating a duck on a pond consider: pond water [ENVO:00002228]|air [ENVO_00002005] environment M M M M M M M M M M M 1 12 MIXS:0000014 -env_package environmental package deprecated term agreed, remove from Core X "Similar to ""Investigation Type"" above, and do we need to make it a hidden field? ie users wont need to manually input the value as its implicit on which package they choose to use.; Agreed, this is not a used field [LS]" CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/64 MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported enumeration [air|built environment|host-associated|human-associated|human-skin|human-oral|human-gut|human-vaginal|hydrocarbon resources-cores|hydrocarbon resources-fluids/swabs|microbial mat/biofilm|misc environment|plant-associated|sediment|soil|wastewater/sludge|water] soil mixs extension C C C C C C C C C C C 1 13 MIXS:0000019 (list MIXS ID for each package: air, ...) +env_package environmental package deprecated term agreed, remove from Core X "Similar to ""Investigation Type"" above, and do we need to make it a hidden field? ie users won't need to manually input the value as its implicit on which package they choose to use.; Agreed, this is not a used field [LS]" CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/64 MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported enumeration [air|built environment|host-associated|human-associated|human-skin|human-oral|human-gut|human-vaginal|hydrocarbon resources-cores|hydrocarbon resources-fluids/swabs|microbial mat/biofilm|misc environment|plant-associated|sediment|soil|wastewater/sludge|water] soil mixs extension C C C C C C C C C C C 1 13 MIXS:0000019 (list MIXS ID for each package: air, ...) subspecf_gen_lin subspecific genetic lineage Updated definition Create ticket to unify this term, with this term and package term: host infra-specific name - see which term is used most widely for data, then add note in definition of the synonym fine as is CIH MIXS core (optional) #73 & #256 Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123. Genetic lineage below lowest rank of NCBI taxonomy, which is subspecies, e.g. serovar, biotype, ecotype. {rank name}:{text} serovar:Newport nucleic acid sequence source C C C C C - - C - - - 1 14 MIXS:0000020 ploidy ploidy no change made. - Remove from Core, keep in MIGS Eu, also in MISAG, MIMAG only relevant to MIGS Eu, should we consider removing from Core to a MIGS Eu specific package? CIH MIXS core (optional) The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO PATO {termLabel} {[termID]} allopolyploidy [PATO:0001379] nucleic acid sequence source X - - - - - - - - - - 1 15 MIXS:0000021 num_replicons number of replicons no change made. MIXS core (optional) Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote for eukaryotes and bacteria: chromosomes (haploid count); for viruses: segments {integer} 2 nucleic acid sequence source X M - C - - - - - - - 1 16 MIXS:0000022 extrachrom_elements extrachromosomal elements no change made. - look at moving to genome checklists MIXS core (optional) Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids) number of extrachromosmal elements {integer} 5 nucleic acid sequence source X C - - C - - X - - - 1 17 MIXS:0000023 estimated_size estimated size no change made in v6, added discussion for v7 - change name to estimated genome size "Do we need to make this clearer in the name? ""genome size estimate""" CIH MIXS core (optional) #142 The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period. number of base pairs {integer} bp 300000 bp nucleic acid sequence source X X X X X - - - - - X 1 18 MIXS:0000024 ref_biomaterial reference for biomaterial no change made. update definition to fix breadth of checklists fine as is CIH MIXS core (optional) Primary publication if isolated before genome publication; otherwise, primary genome report. PMID, DOI or URL {PMID}|{DOI}|{URL} doi:10.1016/j.syapm.2018.01.009 nucleic acid sequence source X M X X X X - - X X X 1 19 MIXS:0000025 -source_mat_id source material identifiers no change made in v6, added discussion for v7 refine definition, to clarify that this is the ID of the sample, originally collected; KEEP for Parent Sample, need another ID space/naming for subsamples, check with INSDC -- keep alignment of definition with TDWG. -- TDWG. material_sample_id -- How about 'subsample ID' ?? -- in the definition, include recommendation to use a globally unique identifier, and the name of the source, e.g. NEON CIH MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/149 A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2). for cultures of microorganisms: identifiers for two culture collections; for other material a unique arbitrary identifer {text} MPI012345 nucleic acid sequence source C C C C C C C C C C C m 20 MIXS:0000026 MIGS-13 +source_mat_id source material identifiers no change made in v6, added discussion for v7 refine definition, to clarify that this is the ID of the sample, originally collected; KEEP for Parent Sample, need another ID space/naming for subsamples, check with INSDC -- keep alignment of definition with TDWG. -- TDWG. material_sample_id -- How about 'subsample ID' ?? -- in the definition, include recommendation to use a globally unique identifier, and the name of the source, e.g. NEON CIH MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/149 A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2). for cultures of microorganisms: identifiers for two culture collections; for other material a unique arbitrary identifier {text} MPI012345 nucleic acid sequence source C C C C C C C C C C C m 20 MIXS:0000026 MIGS-13 pathogenicity known pathogenicity no change made in v6, added discussion for v7 -- move to specific packages, review name, definition in GitHub ticket Needs updating NMD MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/65 To what is the entity pathogenic names of organisms that the entity is pathogenic to {text} human, animal, plant, fungi, bacteria nucleic acid sequence source C C - C - - - - - - X 1 21 MIXS:0000027 MIGS-14 -biotic_relationship observed biotic relationship New ticket opened pointing out the discrepencies between definition and expected values (28jun) fine as is NMD MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/168 Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object enumeration [free living|parasitism|commensalism|symbiotic|mutualism] free living nucleic acid sequence source X C - X - - - C - - X 1 22 MIXS:0000028 MIGS-15 +biotic_relationship observed biotic relationship New ticket opened pointing out the discrepancies between definition and expected values (28jun) fine as is NMD MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/168 Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object enumeration [free living|parasitism|commensalism|symbiotic|mutualism] free living nucleic acid sequence source X C - X - - - C - - X 1 22 MIXS:0000028 MIGS-15 specific_host host scientific name updated item name refine the definition, look at virus hosts, obligate symbionts, ... fine as is _ or is it repetitive with host_spec_range? NMD MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/131 Report the host's taxonomic name and/or NCBI taxonomy ID. host scientific name, taxonomy ID {text}|{NCBI taxid} Homo sapiens and/or 9606 nucleic acid sequence source X C C C - - - - - - X 1 23 MIXS:0000029 MIGS-16 host_spec_range host specificity or range Updated definition -- check usage, consider merging specific_host and host_spec_range ; consider removing this term Needs updating NMD MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/66 The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier. NCBI taxid {integer} 9606 nucleic acid sequence source X X X C - - - - - - X m 24 MIXS:0000030 health_disease_stat health or disease status of specific host at time of collection deprecated term - change core term to match packages I propose to align this term to 'disease status' that is in the packages, any objections ? [LS] MIXS core (optional) #141 Health or disease status of specific host at time of collection enumeration [healthy|diseased|dead|disease-free|undetermined|recovering|resolving|pre-existing condition|pathological|life threatening|congenital] dead nucleic acid sequence source X C - C - - - - - - - 1 25 MIXS:0000031 @@ -38,7 +38,7 @@ propagation propagation UPDATED DEFINITION 07-MAY-2021 cih revise definition; mo encoded_traits encoded traits no change made in v6, added discussion for v7 generalize the definition, to apply across all packages to do MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/143 Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage for plasmid: antibiotic resistance; for phage: converting genes {text} beta-lactamase class A nucleic acid sequence source - X C C - - - - - - - 1 28 MIXS:0000034 rel_to_oxygen relationship to oxygen no change made in v6, added discussion for v7 clarify in the definition, that this about the sample, not the organism, move to packages to do MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/144 Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments enumeration [aerobe|anaerobe|facultative|microaerophilic|microanaerobe|obligate aerobe|obligate anaerobe] aerobe nucleic acid sequence source - C - - - X X C X X - 1 29 MIXS:0000015 MIGS-22 isol_growth_condt isolation and growth condition no change made in v6, added discussion for v7 Change the label, Reference - keep in the core, update the definition, DNA and RNA isolation kits - use Protocols.io; look at usage in DSMZ (https://bacdive.dsmz.de/
) to do MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/145 Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material PMID,DOI or URL {PMID}|{DOI}|{URL} doi: 10.1016/j.syapm.2018.01.009 nucleic acid sequence source M M M M M - - M - - - 1 30 MIXS:0000003 MIGS-23 -samp_collec_device sample collection device revise the definiton, encourage Protocols.io - split into two terms, sample collection device (controlled vocabulary) and sample collection method, ok - Anji to do, Split: Done MIXS core (optional) The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094). device name {termLabel} {[termID]}|{text} swab, biopsy, niskin bottle, push core, drag swab [GENEPIO:0002713] nucleic acid sequence source X X X X X C C X C C C 1 31 MIXS:0000002 +samp_collec_device sample collection device revise the definition, encourage Protocols.io - split into two terms, sample collection device (controlled vocabulary) and sample collection method, ok - Anji to do, Split: Done MIXS core (optional) The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094). device name {termLabel} {[termID]}|{text} swab, biopsy, niskin bottle, push core, drag swab [GENEPIO:0002713] nucleic acid sequence source X X X X X C C X C C C 1 31 MIXS:0000002 samp_collec_method sample collection method MIXS core (optional) #159 The method employed for collecting the sample. PMID,DOI,url , or text {PMID}|{DOI}|{URL}|{text} swabbing nucleic acid sequence source X X X X X C C X C C C 1 MIXS:0001225 samp_mat_process sample material processing updated definition, Expected value the remainder on this list, keep as MIxS core optional or move to their appropriate checklist X to do MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/146 A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed. text {text} filtering of seawater, storing samples in ethanol nucleic acid sequence source X X X X X C C C C C C 1 32 MIXS:0000016 size_frac size fraction selected no change made. move to MISAG/MIMAG to do MISAG/MIMAG Filtering pore size used in sample preparation filter size value range {float}-{float} {unit} 0-0.22 micrometer nucleic acid sequence source - - - - - X X - X X C 1 33 MIXS:0000017 @@ -61,11 +61,11 @@ adapters adapters no change made. stay in the core Adapters provide priming pcr_cond pcr conditions no change made. stay in the core fine as is CIH Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...' initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final elongation:degrees_minutes;total cycles initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final elongation:degrees_minutes;total cycles initial denaturation:94_3;annealing:50_1;elongation:72_1.5;final elongation:72_10;35 sequencing - - - - - - C C - - - 1 49 MIXS:0000049 seq_meth sequencing method Updated definition recommendation - look at OBI for the allowable terms , add to definition for options to look at OBI. For now, we are using the ENA approved checklist at https://ena-docs.readthedocs.io/en/latest/submit/reads/webin-cli.html#metadata-validation. update definition see #96 MIXS core (manditory) #96 Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103). Text or OBI {termLabel} {[termID]}|{text} 454 Genome Sequencer FLX [OBI:0000702] sequencing M M M M M M M M M M M 1 50 MIXS:0000050 MIGS-29 seq_quality_check sequence quality check no change made. stay in core Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA none or manually edited [none|manually edited] none sequencing - - - - - - C C - - - 1 51 MIXS:0000051 -chimera_check chimera check software Updated name and definition MIMARKS term "definition needs work - should include what is expected value for this item. Also Jasper suggested chaning name to include suffix ""software"" like other terms e.g. assembly software." CIH #97 Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences. name and version of software, parameters used {software};{version};{parameters} uchime;v4.1;default parameters sequencing - - - - - - C C - - - 1 52 MIXS:0000052 +chimera_check chimera check software Updated name and definition MIMARKS term "definition needs work - should include what is expected value for this item. Also Jasper suggested changing name to include suffix ""software"" like other terms e.g. assembly software." CIH #97 Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences. name and version of software, parameters used {software};{version};{parameters} uchime;v4.1;default parameters sequencing - - - - - - C C - - - 1 52 MIXS:0000052 tax_ident taxonomic identity marker no change made. move to checklist - MISAG, MIMAG, review the definition, can this point to a controlled vocabulary clarify definition and move to checklist for MISAG and MIMAG The phylogenetic marker(s) used to assign an organism name to the SAG or MAG enumeration [16S rRNA gene|multi-marker approach|other] other: rpoB gene sequencing C C C C C - - - M M X 1 53 MIXS:0000053 Note, MIxS v4, 'assembly', was removed in MIxS v5 -assembly_qual assembly quality no change made in v6, added discussion for v7 update description to include the previous item name. Add previous term as a individual item and mark as obsolete. keep in the core ; add a field for previous names/obsolete terms can we re-write defintion to cover all possible assembly use cases, MISAG, MIMAG, MIUVIG fields; in MIxSv4, this term was 'finishing strategy' https://github.com/GenomicsStandardsConsortium/mixs/issues/147 The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling ≥ 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated enumeration [Finished genome|High-quality draft genome|Medium-quality draft genome|Low-quality draft genome|Genome fragment(s)] High-quality draft genome sequencing M M X X X C - - M M M 1 54 MIXS:0000056 -assembly_name assembly name no change made. assembly name is in ealier MIxS (v4 and before); keep in core more details on this required? Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community name and version of assembly {text} {text} HuRef, JCVI_ISG_i3_1.0 sequencing C C C C C C - - C C C 1 55 MIXS:0000057 MIGS-30 +assembly_qual assembly quality no change made in v6, added discussion for v7 update description to include the previous item name. Add previous term as a individual item and mark as obsolete. keep in the core ; add a field for previous names/obsolete terms can we re-write definition to cover all possible assembly use cases, MISAG, MIMAG, MIUVIG fields; in MIxSv4, this term was 'finishing strategy' https://github.com/GenomicsStandardsConsortium/mixs/issues/147 The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling ≥ 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated enumeration [Finished genome|High-quality draft genome|Medium-quality draft genome|Low-quality draft genome|Genome fragment(s)] High-quality draft genome sequencing M M X X X C - - M M M 1 54 MIXS:0000056 +assembly_name assembly name no change made. assembly name is in earlier MIxS (v4 and before); keep in core more details on this required? Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community name and version of assembly {text} {text} HuRef, JCVI_ISG_i3_1.0 sequencing C C C C C C - - C C C 1 55 MIXS:0000057 MIGS-30 assembly_software assembly software no change made. add to core optional to do - MIMAG/MISAG Tool(s) used for assembly, including version number and parameters name and version of software, parameters used {software};{version};{parameters} metaSPAdes;3.11.0;kmer set 21,33,55,77,99,121, default parameters otherwise sequencing M M M M M C C - M M M 1 56 MIXS:0000058 annot annotation no change made in v6, added discussion for v7 "combine with ""feat_pred"" and rewrite definition (ramona) . add to core optional, add obsolete term - Ramona will write new definition" to do - in MIxSv4 - this was 'annoation source', MIMAG/MISAG ramona https://github.com/GenomicsStandardsConsortium/mixs/issues/148 Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter name of tool or pipeline used, or annotation source description {text} prokka sequencing C C C C C C - - X X X 1 57 MIXS:0000059 number_contig number of contigs no change made add to core optional, add obsolete term to do - MIxS 4, was part of 'Finishing Strategy' term Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG value {integer} 40 sequencing M M X X X C - - X X M 1 58 MIXS:0000060 @@ -82,7 +82,7 @@ compl_software completeness software no change made core optional - link the sco compl_appr completeness approach updated definition update definition, make more generic for other checklists, keep MIMAG, etc. part of definition needs attention - MISAG, MIMAG, MIUViG CIH #81 The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome text [marker gene|reference based|other] other: UViG length compared to the average length of reference genomes from the P22virus genus (NCBI RefSeq v83) sequencing - - - - - - - - X X C 1 69 MIXS:0000071 contam_score contamination score no change made update definition, keep in core optional needs attention - MIMAG/MISAG CIH The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases value {float} percentage 1% sequencing - - - - - - - - M M - 1 70 MIXS:0000072 contam_screen_input contamination screening input no change made update definition, keep in core optional needs attention - MIMAG/MISAG CIH The type of sequence data used as input enumeration [reads| contigs] contigs sequencing - - - - - - - - X X - 1 71 MIXS:0000005 -contam_screen_param contamination screening parameters no change made check definition, keep in core optional needs attention - MIMAG/MISAG CIH Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer enumeration;value or name [ref db|kmer|coverage|combination];{text|integer} kmer sequencing - - - - - - - - X X - 1 72 MIXS:0000073 +contam_screen_param contamination screening parameters no change made check definition, keep in core optional needs attention - MIMAG/MISAG CIH Specific parameters used in the decontamination software, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer enumeration;value or name [ref db|kmer|coverage|combination];{text|integer} kmer sequencing - - - - - - - - X X - 1 72 MIXS:0000073 decontam_software decontamination software no change made check definition, keep in core optional needs attention - MIMAG/MISAG CIH Tool(s) used in contamination screening enumeration [checkm/refinem|anvi'o|prodege|bbtools:decontaminate.sh|acdc|combination] anvi'o sequencing - - - - - - - - X X - 1 73 MIXS:0000074 sort_tech sorting technology no change made needs attention - MISAG, MIUViG CIH Method used to sort/isolate cells or particles of interest enumeration [flow cytometric cell sorting|microfluidics|lazer-tweezing|optical manipulation|micromanipulation|other] optical manipulation sequencing - - - - - - - - M - C 1 74 MIXS:0000075 single_cell_lysis_appr single cell or viral particle lysis approach no change made needs attention - MISAG, MIUViG CIH Method used to free DNA from interior of the cell(s) or particle(s) enumeration [chemical|enzymatic|physical|combination] enzymatic sequencing - - - - - - - - M - C 1 75 MIXS:0000076 diff --git a/tests/input/rda-crosswalk.tsv b/tests/input/rda-crosswalk.tsv index 9a2dd64..3f6a963 100644 --- a/tests/input/rda-crosswalk.tsv +++ b/tests/input/rda-crosswalk.tsv @@ -12,7 +12,7 @@ A. From Google dataset search recommendaton Thing description mandatory Text A d CreativeWork keywords Text Keywords or tags used to describe this content. Multiple entries in a keywords list are typically delimited by commas. dct:keyword (R) dcat:keyword dcterms:subject (R)* MD_Identification/descriptiveKeywords//keyword keywords (R) Subject (M); Topic Classification Term; Keywords keywords (O) collection/subject dcterms:subject keywords (M) keywords keywords keyword CreativeWork license CreativeWork or URL A license document that applies to this content, typically indicated by URL. (A license under which the dataset is distributed.) dct:license dct:license dcterms:rights MD_Identification/resourceConstraints//reference/CI_Citation, or text in MD_LegalConstraints/useLimitation [restrictionCode = license] license (R) licenses (R) collection/rights/licence[@rightsURI] AND/OR collection/rights/licence[@type] AND collection/rights/licence dcterms:rights license (R) Rights (O) rights Rights (O) license CreativeWork creator Organization or Person The creator/author of this CreativeWork (dataset). This is the same as the Author property for CreativeWork. (To uniquely identify individuals, use ORCID ID as the value of the sameAs property of the Person type. To uniquely identify institutions and organizations, use ROR ID. ) dct:creator dcterms:creator dcterms:creator (M) MD_Identification/citation//citedResponsibleParty//name [role = one of {author, coAuthor, originator, editor}] creator (M) Author; authorName (M) creator (M) collection/citationInfo/citationMetadata/contributor OR relatedObject|relatedInfo party/name where relation=IsPrincipalInvestigatorOf OR relatedObject|relatedInfo party/name where relation=author OR relatedObject|relatedInfo party/name where relation=coInvestigator OR relatedObject|relatedInfo party/name where relation=hasCollector dcterms:creator creator (R) Creator (R) AuthEnty* Creators (M) author ResourceHeader/Contact[@role=PrincipalInvestigator] ResourceHeader/Contact[@role=DataProducer] - CreativeWork isPartOf CreativeWork Indicates a CreativeWork that this CreativeWork is (in some sense) part of. Reverse property hasPart. If the dataset is a collection of smaller datasets, use the hasPart property to denote such relationship. Conversly, if the dataset is part of a larger dataset, use isPartOf. dct:isPartOf (R) dcterms:isPartOf isPartOf MD_Identifcation/associatedResource/name/CI_Citation [associationType = 'largerWorkCitation'] includedIn(Dataset) (R) relatedObject|relatedInfo collection where relation[@type='isPartOf'] dcterms:isPartOf isPartOf isPartOf ParentID (only for Granule resource type) + CreativeWork isPartOf CreativeWork Indicates a CreativeWork that this CreativeWork is (in some sense) part of. Reverse property hasPart. If the dataset is a collection of smaller datasets, use the hasPart property to denote such relationship. Conversely, if the dataset is part of a larger dataset, use isPartOf. dct:isPartOf (R) dcterms:isPartOf isPartOf MD_Identifcation/associatedResource/name/CI_Citation [associationType = 'largerWorkCitation'] includedIn(Dataset) (R) relatedObject|relatedInfo collection where relation[@type='isPartOf'] dcterms:isPartOf isPartOf isPartOf ParentID (only for Granule resource type) CreativeWork hasPart CreativeWork Indicates a CreativeWork that is (in some sense) a part of this CreativeWork. Reverse property isPartOf dct:hasPart (R) dcterms:hasPart hasPart MD_Identifcation/associatedResource/name/CI_Citation [associationType = 'isComposedOf'] includes(Dataset) (R) hasPart (O) relatedObject|relatedInfo collection where relation[@type='hasPart'] dcterms:hasPart hasPart hasPart CreativeWork version Number or Text The version of the CreativeWork embodied by a specified resource. owl:versionInfo owl:versionInfo Version (O) MD_Identification/citation//edition version (O) Version version (R) registryObject:collection:citationInfo:citationMetadata:version version (R) version version (O) version ProviderVersion CreativeWork temporalCoverage Text The temporalCoverage of a CreativeWork indicates the period that the content applies to (The data in the dataset covers a specific time interval. Only include this property if the dataset has a temporal dimension.) dct:temporal dcterms:temporal Date MD_Identification/extent//temporalElement/extent/TM_Primitive temporalCoverage (O) Time Period Covered collection/coverage/temporal dcterms:temporal (start); dcterms:temporal (end) Temporal Coverage (O) temporal TemporalDescription/TimeSpan