From 51a2845878775ea8ad51c6d392aec9c1ca4f13df Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Mon, 26 Feb 2024 19:30:14 -0500
Subject: [PATCH] add codespell workflow, config and fix some typos (#127)

* Add github action to codespell main on push and PRs

* Add rudimentary codespell config

* run codespell throughout but ignore fail -- committing manually since example outputs are ignored for git

* [DATALAD RUNCMD] Do interactive fixing of typos

=== Do not change lines below ===
{
 "chain": [],
 "cmd": "codespell -w -i 3 -C 2",
 "exit": 0,
 "extra_inputs": [],
 "inputs": [],
 "outputs": [],
 "pwd": "."
}
^^^ Do not change lines above ^^^
---
 .github/workflows/codespell.yml               | 23 +++++++++++++++++++
 docs/datamodel/types/Objectidentifier.md      |  2 +-
 docs/intro/export.md                          |  2 +-
 docs/intro/mixed-sheets.md                    |  2 +-
 .../output/docs/types/Objectidentifier.md     |  2 +-
 examples/output/jsonld/combined.jsonld        |  2 +-
 pyproject.toml                                |  8 ++++++-
 schemasheets/schemamaker.py                   |  4 ++--
 tests/input/mixs6_core_test.tsv               | 22 +++++++++---------
 tests/input/rda-crosswalk.tsv                 |  2 +-
 10 files changed, 49 insertions(+), 20 deletions(-)
 create mode 100644 .github/workflows/codespell.yml

diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
new file mode 100644
index 0000000..dd0eb8e
--- /dev/null
+++ b/.github/workflows/codespell.yml
@@ -0,0 +1,23 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/docs/datamodel/types/Objectidentifier.md b/docs/datamodel/types/Objectidentifier.md
index 8d80546..454ba6b 100644
--- a/docs/datamodel/types/Objectidentifier.md
+++ b/docs/datamodel/types/Objectidentifier.md
@@ -15,5 +15,5 @@ URI: [linkml:Objectidentifier](https://w3id.org/linkml/Objectidentifier)
 
 |  |  |  |
 | --- | --- | --- |
-| **Comments:** | | Used for inheritence and type checking |
+| **Comments:** | | Used for inheritance and type checking |
 
diff --git a/docs/intro/export.md b/docs/intro/export.md
index 7e53ca7..6b1f32d 100644
--- a/docs/intro/export.md
+++ b/docs/intro/export.md
@@ -84,7 +84,7 @@ this guards against accidental overwrites.
 
 schemasheets allows *custom* sheet formats that map to the LinkML standard.
 
-you can use the combination of sheets2linkml and linkml2sheets to convert betweeen two sheet specifications.
+you can use the combination of sheets2linkml and linkml2sheets to convert between two sheet specifications.
 
 For example, let's say for schema1.tsv, you use a spreadsheet with the following headers:
 
diff --git a/docs/intro/mixed-sheets.md b/docs/intro/mixed-sheets.md
index c18c744..cdcef01 100644
--- a/docs/intro/mixed-sheets.md
+++ b/docs/intro/mixed-sheets.md
@@ -23,4 +23,4 @@ For example:
 |C|ForProfit|||||Organization|||||||
 |C|NonProfit|||||Organization|||Q163740|||foo|
 
- * [personinfo with tyoes](https://docs.google.com/spreadsheets/d/1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ/edit#gid=509198484)
+ * [personinfo with types](https://docs.google.com/spreadsheets/d/1wVoaiFg47aT9YWNeRfTZ8tYHN8s8PAuDx5i2HUcDpvQ/edit#gid=509198484)
diff --git a/examples/output/docs/types/Objectidentifier.md b/examples/output/docs/types/Objectidentifier.md
index 8d80546..454ba6b 100644
--- a/examples/output/docs/types/Objectidentifier.md
+++ b/examples/output/docs/types/Objectidentifier.md
@@ -15,5 +15,5 @@ URI: [linkml:Objectidentifier](https://w3id.org/linkml/Objectidentifier)
 
 |  |  |  |
 | --- | --- | --- |
-| **Comments:** | | Used for inheritence and type checking |
+| **Comments:** | | Used for inheritance and type checking |
 
diff --git a/examples/output/jsonld/combined.jsonld b/examples/output/jsonld/combined.jsonld
index 9e01f53..00f0b03 100644
--- a/examples/output/jsonld/combined.jsonld
+++ b/examples/output/jsonld/combined.jsonld
@@ -207,7 +207,7 @@
       "definition_uri": "https://w3id.org/linkml/Objectidentifier",
       "description": "A URI or CURIE that represents an object in the model.",
       "comments": [
-        "Used for inheritence and type checking"
+        "Used for inheritance and type checking"
       ],
       "from_schema": "https://w3id.org/linkml/types",
       "imported_from": "linkml:types",
diff --git a/pyproject.toml b/pyproject.toml
index b97e9bd..30fac63 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,4 +33,10 @@ sheets2linkml = "schemasheets.schemamaker:convert"
 linkml2sheets = "schemasheets.schema_exporter:export_schema"
 sheets2project = "schemasheets.sheets_to_project:multigen"
 
-linkml2schemasheets-template = 'schemasheets.generate_populate:cli'
\ No newline at end of file
+linkml2schemasheets-template = 'schemasheets.generate_populate:cli'
+[tool.codespell]
+# Ref: https://github.com/codespell-project/codespell#using-a-config-file
+skip = '.git,*.lock'
+check-hidden = true
+ignore-regex = '\bOTU\b'
+# ignore-words-list = ''
diff --git a/schemasheets/schemamaker.py b/schemasheets/schemamaker.py
index 112d4f6..8952096 100644
--- a/schemasheets/schemamaker.py
+++ b/schemasheets/schemamaker.py
@@ -217,7 +217,7 @@ def get_current_element(self, elt: Element) -> Union[Element, PermissibleValue]:
         """
         sc = self.schema
         if isinstance(elt, SchemaDefinition):
-            # TODO: consider multiple shemas per sheet
+            # TODO: consider multiple schemas per sheet
             return sc
         elif isinstance(elt, PermissibleValue):
             return elt
@@ -321,7 +321,7 @@ def check_excess(descriptors):
                 for c in vmap[T_CLASS]:
                     if self.use_attributes:
                         # slots always belong to a class;
-                        # no seperate top level slots
+                        # no separate top level slots
                         a = SlotDefinition(main_elt.name)
                         c.attributes[main_elt.name] = a
                         yield a
diff --git a/tests/input/mixs6_core_test.tsv b/tests/input/mixs6_core_test.tsv
index beed010..be11c8b 100644
--- a/tests/input/mixs6_core_test.tsv
+++ b/tests/input/mixs6_core_test.tsv
@@ -1,13 +1,13 @@
-Structured comment name	Item (rdfs:label)	Action carried out and logged by CIH in change log 21May	Discussion	review complete (X)	status	Editor/ reviewer		Link to GH issue	Definition	Expected value	Value syntax	Example	Section	migs_eu	migs_ba	migs_pl	migs_vi	migs_org	mims	mimarks_s	mimarks_c	misag	mimag	miuvig	Preferred unit	Occurence	Position	MIXS ID	MIGS ID (mapping to GOLD)	 
+Structured comment name	Item (rdfs:label)	Action carried out and logged by CIH in change log 21May	Discussion	review complete (X)	status	Editor/ reviewer		Link to GH issue	Definition	Expected value	Value syntax	Example	Section	migs_eu	migs_ba	migs_pl	migs_vi	migs_org	mims	mimarks_s	mimarks_c	misag	mimag	miuvig	Preferred unit	Occurrence	Position	MIXS ID	MIGS ID (mapping to GOLD)	 
 > slot	title	comments	comments	comments	ignore	notes	ignore	see_also	description	ignore	pattern	ignore	in_subset	cardinality	cardinality	cardinality	cardinality	cardinality	cardinality	cardinality	cardinality	cardinality	cardinality	cardinality	annotations	multivalued	annotations	slot_uri	exact_mapping	
 > 				"prefix: ""review completed """		"prefix: ""reviewer: """								"applies_to_class: ""MIGS eukaryote"""	"applies_to_class: ""MIGS bacteria"""	"applies_to_class: ""MIGS plant"""	"applies_to_class: ""MIGS virus"""	"applies_to_class: ""MIGS org"""	"applies_to_class: ""MIMS"""	"applies_to_class: ""MIMARKS specimen"""	"applies_to_class: ""MIMARKS survey"""	"applies_to_class: ""MISAG"""	"applies_to_class: ""MIMAG"""	"applies_to_class: ""MIUVIG"""	tag: preferred_unit	vmap: {1: False, 2: True}			curie_prefix: MIGS	
 submitted_to_insdc	submitted to insdc	deprecated term	remove	X	fine as is; this term is not used for submission, LS: suggest removing from MIxS core, NCBI - OK to remove	CIH	MIXS core	https://github.com/GenomicsStandardsConsortium/mixs/issues/60	Depending on the study (large-scale e.g. done with next generation sequencing technology, or small-scale) sequences have to be submitted to SRA (Sequence Read Archive), DRA (DDBJ Read Archive) or via the classical Webin/Sequin systems to Genbank, ENA and DDBJ. Although this field is mandatory, it is meant as a self-test field, therefore it is not necessary to include this field in contextual data submitted to databases	boolean	{boolean}	yes	investigation	M	M	M	M	M	M	M	M	M	M	M		1	1	MIXS:0000004	MIGS-1	
 investigation_type	investigation type	deprecated term	remove	X	query definition; this term is not used for submission, LS: suggest removing from MIxS core; NCBI - OK to remove	CIH	MIXS core (manditory)	https://github.com/GenomicsStandardsConsortium/mixs/issues/60	Nucleic Acid Sequence Report is the root element of all MIGS/MIMS compliant reports as standardized by Genomic Standards Consortium. This field is either eukaryote,bacteria,virus,plasmid,organelle, metagenome,mimarks-survey, mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled genome, or uncultivated viral genome	eukaryote, bacteria_archaea, plasmid, virus, organelle, metagenome,mimarks-survey, mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled genome, or uncultivated viral genomes	[eukaryote|bacteria_archaea|plasmid|virus|organelle|metagenome|metatranscriptome|mimarks-survey|mimarks-specimen|misag|mimag|miuvig]	metagenome	investigation	M	M	M	M	M	M	M	M	M	M	M		1	2	MIXS:0000007	MIGS-2	
 samp_name	sample name 	updated description	add to the core		NEW core term - Approved in Sept CIG call	LS	MIXS core (manditory)	https://github.com/GenomicsStandardsConsortium/mixs/issues/78	A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name.	text	{text}	ISDsoil1	investigation	M	M	M	M	M	M	M	M	M	M	M		1		MIXS:0001107		
-samp_taxon_id	Taxonomy ID of DNA sample		suggested as part of +/-ive controls work, but would actully be an appropriate addition to all checklists so adding it to the core	X				#205	NCBI taxon id of the sample.  Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome’ for mock community/positive controls, or 'blank sample' for negative controls.	Taxonomy ID 	{text} [NCBI:txid]	Gut Metagenome [NCBI:txid749906]	investigation	M	M	M	M	M	M	M	M	M	M	M		1		MIXS:0001320		
+samp_taxon_id	Taxonomy ID of DNA sample		suggested as part of +/-ive controls work, but would actually be an appropriate addition to all checklists so adding it to the core	X				#205	NCBI taxon id of the sample.  Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome’ for mock community/positive controls, or 'blank sample' for negative controls.	Taxonomy ID 	{text} [NCBI:txid]	Gut Metagenome [NCBI:txid749906]	investigation	M	M	M	M	M	M	M	M	M	M	M		1		MIXS:0001320		
 project_name	project name	no change made		X	fine as is	CIH	MIXS core (manditory)		Name of the project within which the sequencing was organized		{text}	Forest soil metagenome	investigation	M	M	M	M	M	M	M	M	M	M	M		1	3	MIXS:0000092	MIGS-3	
 experimental_factor	experimental factor	no change made		X	fine as is	CIH	MIXS core (optional)		Experimental factors are essentially the variable aspects of an experiment design which can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI). For a browser of EFO (v 2.95) terms, please see http://purl.bioontology.org/ontology/EFO; for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI	text or EFO and/or OBI	{termLabel} {[termID]}|{text}	time series design [EFO:EFO_0001779]	investigation	X	X	X	X	X	C	C	X	C	C	C		1	4	MIXS:0000008		
-lat_lon	geographic location (latitude and longitude)	no change made	Decision: keep the name of the field as is.  - Issue: 1 field, do we accept range and area; Issue 2: split into two fields - discussion: concensus is to keep it as one field; for NCBI keep as one combined field . Note: INSDC discussion, limit to 8 decimal points. 	X	Should the definition be updated to include defined areas using closed linear ring notation? (A linear ring is a closed LineString with four or more positions). (CIH); Suggest renaming to : latitude and longitude [LS]	CIH	MIXS core (manditory)	https://github.com/GenomicsStandardsConsortium/mixs/issues/62	The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system	decimal degrees,  limit to 8 decimal points	{float} {float}	50.586825 6.408977	environment	M	M	M	M	M	M	M	M	M	M	M		1	5	MIXS:0000009	MIGS-4.1|MIGS-4.2	
+lat_lon	geographic location (latitude and longitude)	no change made	Decision: keep the name of the field as is.  - Issue: 1 field, do we accept range and area; Issue 2: split into two fields - discussion: consensus is to keep it as one field; for NCBI keep as one combined field . Note: INSDC discussion, limit to 8 decimal points. 	X	Should the definition be updated to include defined areas using closed linear ring notation? (A linear ring is a closed LineString with four or more positions). (CIH); Suggest renaming to : latitude and longitude [LS]	CIH	MIXS core (manditory)	https://github.com/GenomicsStandardsConsortium/mixs/issues/62	The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system	decimal degrees,  limit to 8 decimal points	{float} {float}	50.586825 6.408977	environment	M	M	M	M	M	M	M	M	M	M	M		1	5	MIXS:0000009	MIGS-4.1|MIGS-4.2	
 depth	depth	Updated Item name and definition	CHANGE: Item name to: depth add to core, check for consistency of definitions - DONE	X	Needs updating	CIH	MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/63	The vertical distance below local surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples.	measurement value	{float} {unit}	10 meter	environment	E	E	E	E	E	E	E	E	E	E	E		1	6	MIXS:0000018	MIGS-4.3	
 alt	altitude	no change made		X	fine as is -but does it need to be in packages AS WELL? - Not needed in all packages [LS]	CIH	MIXS core (optional)		Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air	measurement value	{float} {unit}	100 meter	environment	E	E	E	E	E	E	E	E	E	E	E		1		MIXS:0000094	MIGS-4.4	
 elev	elevation	no change made		X	fine as is -but does it need to be in packages AS WELL?	CIH	MIXS core (optional)		Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit.	measurement value	{float} {unit}	100 meter	environment	E	E	E	E	E	E	E	E	E	E	E		1	7	MIXS:0000093		
@@ -19,16 +19,16 @@ pos_cont_type	positive control type	added to core	new term added as conditional
 env_broad_scale	broad-scale environmental context	updated definition	add synonym to end of definition	X	to do; Synonym: 'biome' [LS]	LS	MIXS core (manditory)	#106	Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO’s biome class:  http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS	The major environment type(s) where the sample was collected. Recommend subclasses of biome [ENVO:00000428]. Multiple terms can be separated by one or more pipes.	{termLabel} {[termID]}	oceanic epipelagic zone biome [ENVO:01000033] for annotating a water sample from the photic zone in middle of the Atlantic Ocean	environment	M	M	M	M	M	M	M	M	M	M	M		1	10	MIXS:0000012		
 env_local_scale	local environmental context	updated definition	add synonym to end of definition	X	to do; Synonym:'feature [LS], habitat	LS	MIXS core (manditory)	#106	Report the entity or entities which are in the sample or specimen’s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS. 	Environmental entities having causal influences upon the entity at time of sampling.	{termLabel} {[termID]}	litter layer [ENVO:01000338]; Annotating a pooled sample taken from various vegetation layers in a forest consider: canopy [ENVO:00000047]|herb and fern layer [ENVO:01000337]|litter layer [ENVO:01000338]|understory [01000335]|shrub layer [ENVO:01000336].	environment	M	M	M	M	M	M	M	M	M	M	M		1	11	MIXS:0000013	MIGS-6 (habitat)	
 env_medium	environmental medium	updated definition	add synonym to end of definition	X	to do; Synonym: material [LS]	LS	MIXS core (manditory)	#117 & #196	Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top).	The material displaced by the entity at time of sampling. Recommend subclasses of environmental material [ENVO:00010483]. 	{termLabel} {[termID]}	soil [ENVO:00001998]; Annotating a fish swimming in the upper 100 m of the Atlantic Ocean, consider: ocean water [ENVO:00002151]. Example: Annotating a duck on a pond consider: pond water [ENVO:00002228]|air [ENVO_00002005]	environment	M	M	M	M	M	M	M	M	M	M	M		1	12	MIXS:0000014		
-env_package	environmental package	deprecated term	agreed, remove from Core	X	"Similar to ""Investigation Type"" above, and do we need to make it a hidden field? ie users wont need to manually input the value as its implicit on which package they choose to use.; Agreed, this is not a used field [LS]"	CIH	MIXS core (manditory)	https://github.com/GenomicsStandardsConsortium/mixs/issues/64	MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported	enumeration	[air|built environment|host-associated|human-associated|human-skin|human-oral|human-gut|human-vaginal|hydrocarbon resources-cores|hydrocarbon resources-fluids/swabs|microbial mat/biofilm|misc environment|plant-associated|sediment|soil|wastewater/sludge|water]	soil	mixs extension	C	C	C	C	C	C	C	C	C	C	C		1	13	MIXS:0000019		(list MIXS ID for each package: air, ...)
+env_package	environmental package	deprecated term	agreed, remove from Core	X	"Similar to ""Investigation Type"" above, and do we need to make it a hidden field? ie users won't need to manually input the value as its implicit on which package they choose to use.; Agreed, this is not a used field [LS]"	CIH	MIXS core (manditory)	https://github.com/GenomicsStandardsConsortium/mixs/issues/64	MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported	enumeration	[air|built environment|host-associated|human-associated|human-skin|human-oral|human-gut|human-vaginal|hydrocarbon resources-cores|hydrocarbon resources-fluids/swabs|microbial mat/biofilm|misc environment|plant-associated|sediment|soil|wastewater/sludge|water]	soil	mixs extension	C	C	C	C	C	C	C	C	C	C	C		1	13	MIXS:0000019		(list MIXS ID for each package: air, ...)
 subspecf_gen_lin	subspecific genetic lineage	Updated definition	Create ticket to unify this term, with this term and package term: host infra-specific name - see which term is used most widely for data, then add note in definition of the synonym		fine as is	CIH	MIXS core (optional)	#73 & #256	Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123.	Genetic lineage below lowest rank of NCBI taxonomy, which is subspecies, e.g. serovar, biotype, ecotype.	{rank name}:{text}	serovar:Newport	nucleic acid sequence source	C	C	C	C	C	-	-	C	-	-	-		1	14	MIXS:0000020		
 ploidy	ploidy	no change made.	- Remove from Core, keep in MIGS Eu, also in MISAG, MIMAG		only relevant to MIGS Eu, should we consider removing from Core to a MIGS Eu specific package?	CIH	MIXS core (optional)		The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO	PATO	{termLabel} {[termID]}	allopolyploidy [PATO:0001379]	nucleic acid sequence source	X	-	-	-	-	-	-	-	-	-	-		1	15	MIXS:0000021		
 num_replicons	number of replicons	no change made.					MIXS core (optional)		Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote	for eukaryotes and bacteria: chromosomes (haploid count); for viruses: segments	{integer}	2	nucleic acid sequence source	X	M	-	C	-	-	-	-	-	-	-		1	16	MIXS:0000022		
 extrachrom_elements	extrachromosomal elements	no change made.	- look at moving to genome checklists				MIXS core (optional)		Do plasmids exist of significant phenotypic consequence (e.g. ones that determine virulence or antibiotic resistance). Megaplasmids? Other plasmids (borrelia has 15+ plasmids)	number of extrachromosmal elements	{integer}	5	nucleic acid sequence source	X	C	-	-	C	-	-	X	-	-	-		1	17	MIXS:0000023		
 estimated_size	estimated size	no change made in v6, added discussion for v7	- change name to estimated genome size		"Do we need to make this clearer in the name? ""genome size estimate"""	CIH	MIXS core (optional)	#142	The estimated size of the genome prior to sequencing. Of particular importance in the sequencing of (eukaryotic) genome which could remain in draft form for a long or unspecified period.	number of base pairs	{integer} bp	300000 bp	nucleic acid sequence source	X	X	X	X	X	-	-	-	-	-	X		1	18	MIXS:0000024		
 ref_biomaterial	reference for biomaterial	no change made.	update definition to fix breadth of checklists		fine as is	CIH	MIXS core (optional)		Primary publication if isolated before genome publication; otherwise, primary genome report.	PMID, DOI or URL	{PMID}|{DOI}|{URL}	doi:10.1016/j.syapm.2018.01.009	nucleic acid sequence source	X	M	X	X	X	X	-	-	X	X	X		1	19	MIXS:0000025		
-source_mat_id	source material identifiers	no change made in v6, added discussion for v7	refine definition, to clarify that this is the ID of the sample, originally collected;    KEEP for Parent Sample,                         need another ID space/naming for subsamples, check with INSDC -- keep alignment of definition with TDWG. -- TDWG. material_sample_id -- How about 'subsample ID' ??  -- in the definition, include recommendation to use a globally unique identifier, and the name of the source, e.g. NEON 			CIH	MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/149	A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2).	for cultures of microorganisms: identifiers for two culture collections; for other material a unique arbitrary identifer	{text}	MPI012345	nucleic acid sequence source	C	C	C	C	C	C	C	C	C	C	C		m	20	MIXS:0000026	MIGS-13	
+source_mat_id	source material identifiers	no change made in v6, added discussion for v7	refine definition, to clarify that this is the ID of the sample, originally collected;    KEEP for Parent Sample,                         need another ID space/naming for subsamples, check with INSDC -- keep alignment of definition with TDWG. -- TDWG. material_sample_id -- How about 'subsample ID' ??  -- in the definition, include recommendation to use a globally unique identifier, and the name of the source, e.g. NEON 			CIH	MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/149	A unique identifier assigned to a material sample (as defined by http://rs.tdwg.org/dwc/terms/materialSampleID, and as opposed to a particular digital record of a material sample) used for extracting nucleic acids, and subsequent sequencing. The identifier can refer either to the original material collected or to any derived sub-samples. The INSDC qualifiers /specimen_voucher, /bio_material, or /culture_collection may or may not share the same value as the source_mat_id field. For instance, the /specimen_voucher qualifier and source_mat_id may both contain 'UAM:Herps:14' , referring to both the specimen voucher and sampled tissue with the same identifier. However, the /culture_collection qualifier may refer to a value from an initial culture (e.g. ATCC:11775) while source_mat_id would refer to an identifier from some derived culture from which the nucleic acids were extracted (e.g. xatc123 or ark:/2154/R2).	for cultures of microorganisms: identifiers for two culture collections; for other material a unique arbitrary identifier	{text}	MPI012345	nucleic acid sequence source	C	C	C	C	C	C	C	C	C	C	C		m	20	MIXS:0000026	MIGS-13	
 pathogenicity	known pathogenicity	no change made in v6, added discussion for v7	-- move to specific packages, review name, definition in GitHub ticket		Needs updating	NMD	MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/65	To what is the entity pathogenic	names of organisms that the entity is pathogenic to	{text}	human, animal, plant, fungi, bacteria	nucleic acid sequence source	C	C	-	C	-	-	-	-	-	-	X		1	21	MIXS:0000027	MIGS-14	
-biotic_relationship	observed biotic relationship		New ticket opened pointing out the discrepencies between definition and expected values (28jun)		fine as is	NMD	MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/168	Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object	enumeration	[free living|parasitism|commensalism|symbiotic|mutualism]	free living	nucleic acid sequence source	X	C	-	X	-	-	-	C	-	-	X		1	22	MIXS:0000028	MIGS-15	
+biotic_relationship	observed biotic relationship		New ticket opened pointing out the discrepancies between definition and expected values (28jun)		fine as is	NMD	MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/168	Description of relationship(s) between the subject organism and other organism(s) it is associated with. E.g., parasite on species X; mutualist with species Y. The target organism is the subject of the relationship, and the other organism(s) is the object	enumeration	[free living|parasitism|commensalism|symbiotic|mutualism]	free living	nucleic acid sequence source	X	C	-	X	-	-	-	C	-	-	X		1	22	MIXS:0000028	MIGS-15	
 specific_host	host scientific name	updated item name	refine the definition, look at virus hosts, obligate symbionts, ...		fine as is _ or is it repetitive with host_spec_range?	NMD	MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/131	Report the host's taxonomic name and/or NCBI taxonomy ID.	host scientific name, taxonomy ID	{text}|{NCBI taxid}	Homo sapiens and/or 9606	nucleic acid sequence source	X	C	C	C	-	-	-	-	-	-	X		1	23	MIXS:0000029	MIGS-16	
 host_spec_range	host specificity or range	Updated definition	-- check usage, consider merging specific_host and host_spec_range ; consider removing this term		Needs updating	NMD	MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/66	The range and diversity of host species that an organism is capable of infecting, defined by NCBI taxonomy identifier.	NCBI taxid	{integer}	9606	nucleic acid sequence source	X	X	X	C	-	-	-	-	-	-	X		m	24	MIXS:0000030		
 health_disease_stat	health or disease status of specific host at time of collection	deprecated term	- change core term to match packages 		I propose to align this term to 'disease status' that is in the packages, any objections ?	[LS] 	MIXS core (optional)	#141	Health or disease status of specific host at time of collection	enumeration	[healthy|diseased|dead|disease-free|undetermined|recovering|resolving|pre-existing condition|pathological|life threatening|congenital]	dead	nucleic acid sequence source	X	C	-	C	-	-	-	-	-	-	-		1	25	MIXS:0000031		
@@ -38,7 +38,7 @@ propagation	propagation	UPDATED DEFINITION 07-MAY-2021 cih	revise definition; mo
 encoded_traits	encoded traits	no change made in v6, added discussion for v7	generalize the definition, to apply across all packages		to do		MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/143	Should include key traits like antibiotic resistance or xenobiotic degradation phenotypes for plasmids, converting genes for phage	for plasmid: antibiotic resistance; for phage: converting genes	{text}	beta-lactamase class A	nucleic acid sequence source	-	X	C	C	-	-	-	-	-	-	-		1	28	MIXS:0000034		
 rel_to_oxygen	relationship to oxygen	no change made in v6, added discussion for v7	clarify in the definition, that this about the sample, not the organism, move to packages		to do		MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/144	Is this organism an aerobe, anaerobe? Please note that aerobic and anaerobic are valid descriptors for microbial environments	enumeration	[aerobe|anaerobe|facultative|microaerophilic|microanaerobe|obligate aerobe|obligate anaerobe]	aerobe	nucleic acid sequence source	-	C	-	-	-	X	X	C	X	X	-		1	29	MIXS:0000015	MIGS-22	
 isol_growth_condt	isolation and growth condition	no change made in v6, added discussion for v7	Change the label, Reference - keep in the core, update the definition, DNA and RNA isolation kits - use Protocols.io; look at usage in DSMZ (https://bacdive.dsmz.de/ )		to do		MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/145	Publication reference in the form of pubmed ID (pmid), digital object identifier (doi) or url for isolation and growth condition specifications of the organism/material	PMID,DOI or URL	{PMID}|{DOI}|{URL}	doi: 10.1016/j.syapm.2018.01.009	nucleic acid sequence source	M	M	M	M	M	-	-	M	-	-	-		1	30	MIXS:0000003	MIGS-23	
-samp_collec_device	sample collection device		revise the definiton, encourage Protocols.io - split into two terms, sample collection device (controlled vocabulary) and sample collection method, ok - Anji		to do, Split: Done 		MIXS core (optional)		The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094).	device name	{termLabel} {[termID]}|{text}	swab, biopsy, niskin bottle, push core, drag swab [GENEPIO:0002713]	nucleic acid sequence source	X	X	X	X	X	C	C	X	C	C	C		1	31	MIXS:0000002		
+samp_collec_device	sample collection device		revise the definition, encourage Protocols.io - split into two terms, sample collection device (controlled vocabulary) and sample collection method, ok - Anji		to do, Split: Done 		MIXS core (optional)		The device used to collect an environmental sample. This field accepts terms listed under environmental sampling device (http://purl.obolibrary.org/obo/ENVO). This field also accepts terms listed under specimen collection device (http://purl.obolibrary.org/obo/GENEPIO_0002094).	device name	{termLabel} {[termID]}|{text}	swab, biopsy, niskin bottle, push core, drag swab [GENEPIO:0002713]	nucleic acid sequence source	X	X	X	X	X	C	C	X	C	C	C		1	31	MIXS:0000002		
 samp_collec_method	sample collection method						MIXS core (optional)	#159	The method employed for collecting the sample.	PMID,DOI,url , or text	{PMID}|{DOI}|{URL}|{text}	swabbing	nucleic acid sequence source	X	X	X	X	X	C	C	X	C	C	C		1		MIXS:0001225		
 samp_mat_process	sample material processing	updated definition, Expected value	the remainder on this list, keep as MIxS core optional or move to their appropriate checklist	X	to do		MIXS core (optional)	https://github.com/GenomicsStandardsConsortium/mixs/issues/146	A brief description of any processing applied to the sample during or after retrieving the sample from environment, or a link to the relevant protocol(s) performed.	text	{text}	filtering of seawater, storing samples in ethanol	nucleic acid sequence source	X	X	X	X	X	C	C	C	C	C	C		1	32	MIXS:0000016		
 size_frac	size fraction selected	no change made.	move to MISAG/MIMAG		to do		MISAG/MIMAG		Filtering pore size used in sample preparation	filter size value range	{float}-{float} {unit}	0-0.22 micrometer	nucleic acid sequence source	-	-	-	-	-	X	X	-	X	X	C		1	33	MIXS:0000017		
@@ -61,11 +61,11 @@ adapters	adapters	no change made.	stay in the core						Adapters provide priming
 pcr_cond	pcr conditions	no change made.	stay in the core		fine as is	CIH			Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...'	initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final elongation:degrees_minutes;total cycles	initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final elongation:degrees_minutes;total cycles	initial denaturation:94_3;annealing:50_1;elongation:72_1.5;final elongation:72_10;35	sequencing	-	-	-	-	-	-	C	C	-	-	-		1	49	MIXS:0000049		
 seq_meth	sequencing method	Updated definition	recommendation - look at OBI for the allowable terms , add to definition for options to look at OBI. For now, we are using the ENA approved checklist at https://ena-docs.readthedocs.io/en/latest/submit/reads/webin-cli.html#metadata-validation.		update definition see #96		MIXS core (manditory)	#96	Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103).	Text or OBI	{termLabel} {[termID]}|{text}	454 Genome Sequencer FLX [OBI:0000702]	sequencing	M	M	M	M	M	M	M	M	M	M	M		1	50	MIXS:0000050	MIGS-29	
 seq_quality_check	sequence quality check	no change made.	stay in core						Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA	none or manually edited	[none|manually edited]	none	sequencing	-	-	-	-	-	-	C	C	-	-	-		1	51	MIXS:0000051		
-chimera_check	chimera check software	Updated name and definition	MIMARKS term		"definition needs work - should include what is expected value for this item. Also Jasper suggested chaning name to include suffix ""software"" like other terms e.g. assembly software."	CIH		#97	Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences.	name and version of software, parameters used	{software};{version};{parameters}	uchime;v4.1;default parameters	sequencing	-	-	-	-	-	-	C	C	-	-	-		1	52	MIXS:0000052		
+chimera_check	chimera check software	Updated name and definition	MIMARKS term		"definition needs work - should include what is expected value for this item. Also Jasper suggested changing name to include suffix ""software"" like other terms e.g. assembly software."	CIH		#97	Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences.	name and version of software, parameters used	{software};{version};{parameters}	uchime;v4.1;default parameters	sequencing	-	-	-	-	-	-	C	C	-	-	-		1	52	MIXS:0000052		
 tax_ident	taxonomic identity marker	no change made.	move to checklist - MISAG, MIMAG, review the definition, can this point to a controlled vocabulary 		clarify definition and move to checklist for MISAG and MIMAG				The phylogenetic marker(s) used to assign an organism name to the SAG or MAG	enumeration	[16S rRNA gene|multi-marker approach|other]	other: rpoB gene	sequencing	C	C	C	C	C	-	-	-	M	M	X		1	53	MIXS:0000053		
 			Note, MIxS v4, 'assembly', was removed in MIxS v5																											
-assembly_qual	assembly quality	no change made in v6, added discussion for v7	update description to include the previous item name. Add previous term as a individual item and mark as obsolete. keep in the core ; add a field for previous names/obsolete terms		can we re-write defintion to cover all possible assembly use cases, MISAG, MIMAG, MIUVIG fields; in MIxSv4, this term was 'finishing strategy'			https://github.com/GenomicsStandardsConsortium/mixs/issues/147	The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling ≥ 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated	enumeration	[Finished genome|High-quality draft genome|Medium-quality draft genome|Low-quality draft genome|Genome fragment(s)]	High-quality draft genome	sequencing	M	M	X	X	X	C	-	-	M	M	M		1	54	MIXS:0000056		
-assembly_name	assembly name	no change made.	assembly name is in ealier MIxS (v4 and before); keep in core 		more details on this required?				Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community	name and version of assembly	{text} {text}	HuRef, JCVI_ISG_i3_1.0	sequencing	C	C	C	C	C	C	-	-	C	C	C		1	55	MIXS:0000057	MIGS-30	
+assembly_qual	assembly quality	no change made in v6, added discussion for v7	update description to include the previous item name. Add previous term as a individual item and mark as obsolete. keep in the core ; add a field for previous names/obsolete terms		can we re-write definition to cover all possible assembly use cases, MISAG, MIMAG, MIUVIG fields; in MIxSv4, this term was 'finishing strategy'			https://github.com/GenomicsStandardsConsortium/mixs/issues/147	The assembly quality category is based on sets of criteria outlined for each assembly quality category. For MISAG/MIMAG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities with a consensus error rate equivalent to Q50 or better. High Quality Draft:Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs. Medium Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Low Quality Draft:Many fragments with little to no review of assembly other than reporting of standard assembly statistics. Assembly statistics include, but are not limited to total assembly size, number of contigs, contig N50/L50, and maximum contig length. For MIUVIG; Finished: Single, validated, contiguous sequence per replicon without gaps or ambiguities, with extensive manual review and editing to annotate putative gene functions and transcriptional units. High-quality draft genome: One or multiple fragments, totaling ≥ 90% of the expected genome or replicon sequence or predicted complete. Genome fragment(s): One or multiple fragments, totalling < 90% of the expected genome or replicon sequence, or for which no genome size could be estimated	enumeration	[Finished genome|High-quality draft genome|Medium-quality draft genome|Low-quality draft genome|Genome fragment(s)]	High-quality draft genome	sequencing	M	M	X	X	X	C	-	-	M	M	M		1	54	MIXS:0000056		
+assembly_name	assembly name	no change made.	assembly name is in earlier MIxS (v4 and before); keep in core 		more details on this required?				Name/version of the assembly provided by the submitter that is used in the genome browsers and in the community	name and version of assembly	{text} {text}	HuRef, JCVI_ISG_i3_1.0	sequencing	C	C	C	C	C	C	-	-	C	C	C		1	55	MIXS:0000057	MIGS-30	
 assembly_software	assembly software	no change made.	add to core optional		to do - MIMAG/MISAG				Tool(s) used for assembly, including version number and parameters	name and version of software, parameters used	{software};{version};{parameters}	metaSPAdes;3.11.0;kmer set 21,33,55,77,99,121, default parameters otherwise	sequencing	M	M	M	M	M	C	C	-	M	M	M		1	56	MIXS:0000058		
 annot	annotation	no change made in v6, added discussion for v7	"combine with ""feat_pred"" and rewrite definition (ramona) . add to core optional, add obsolete term  - Ramona will write new definition"		to do - in MIxSv4 - this was 'annoation source', MIMAG/MISAG	ramona		https://github.com/GenomicsStandardsConsortium/mixs/issues/148	Tool used for annotation, or for cases where annotation was provided by a community jamboree or model organism database rather than by a specific submitter	name of tool or pipeline used, or annotation source description	{text}	prokka	sequencing	C	C	C	C	C	C	-	-	X	X	X		1	57	MIXS:0000059		
 number_contig	number of contigs	no change made	add to core optional, add obsolete term 		to do - MIxS 4, was part of 'Finishing Strategy' term				Total number of contigs in the cleaned/submitted assembly that makes up a given genome, SAG, MAG, or UViG	value	{integer}	40	sequencing	M	M	X	X	X	C	-	-	X	X	M		1	58	MIXS:0000060		
@@ -82,7 +82,7 @@ compl_software	completeness software	no change made	core optional - link the sco
 compl_appr	completeness approach	updated definition	update definition, make more generic for other checklists, keep MIMAG, etc. part of definition		needs attention - MISAG, MIMAG, MIUViG	CIH		#81	The approach used to determine the completeness of a given genomic assembly, which would typically make use of a set of conserved marker genes or a closely related reference genome. For UViG completeness, include reference genome or group used, and contig feature suggesting a complete genome	text	[marker gene|reference based|other]	other: UViG length compared to the average length of reference genomes from the P22virus genus (NCBI RefSeq v83)	sequencing	-	-	-	-	-	-	-	-	X	X	C		1	69	MIXS:0000071		
 contam_score	contamination score	no change made	update definition, keep in core optional		needs attention - MIMAG/MISAG	CIH			The contamination score is based on the fraction of single-copy genes that are observed more than once in a query genome. The following scores are acceptable for; High Quality Draft: < 5%, Medium Quality Draft: < 10%, Low Quality Draft: < 10%. Contamination must be below 5% for a SAG or MAG to be deposited into any of the public databases	value	{float} percentage	1%	sequencing	-	-	-	-	-	-	-	-	M	M	-		1	70	MIXS:0000072		
 contam_screen_input	contamination screening input	no change made	update definition, keep in core optional		needs attention - MIMAG/MISAG	CIH			The type of sequence data used as input	enumeration	[reads| contigs]	contigs	sequencing	-	-	-	-	-	-	-	-	X	X	-		1	71	MIXS:0000005		
-contam_screen_param	contamination screening parameters	no change made	check definition, keep in core optional		needs attention - MIMAG/MISAG	CIH			Specific parameters used in the decontamination sofware, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer	enumeration;value or name	[ref db|kmer|coverage|combination];{text|integer}	kmer	sequencing	-	-	-	-	-	-	-	-	X	X	-		1	72	MIXS:0000073		
+contam_screen_param	contamination screening parameters	no change made	check definition, keep in core optional		needs attention - MIMAG/MISAG	CIH			Specific parameters used in the decontamination software, such as reference database, coverage, and kmers. Combinations of these parameters may also be used, i.e. kmer and coverage, or reference database and kmer	enumeration;value or name	[ref db|kmer|coverage|combination];{text|integer}	kmer	sequencing	-	-	-	-	-	-	-	-	X	X	-		1	72	MIXS:0000073		
 decontam_software	decontamination software	no change made	check definition, keep in core optional		needs attention - MIMAG/MISAG	CIH			Tool(s) used in contamination screening	enumeration	[checkm/refinem|anvi'o|prodege|bbtools:decontaminate.sh|acdc|combination]	anvi'o	sequencing	-	-	-	-	-	-	-	-	X	X	-		1	73	MIXS:0000074		
 sort_tech	sorting technology	no change made			needs attention - MISAG, MIUViG	CIH			Method used to sort/isolate cells or particles of interest	enumeration	[flow cytometric cell sorting|microfluidics|lazer-tweezing|optical manipulation|micromanipulation|other]	optical manipulation	sequencing	-	-	-	-	-	-	-	-	M	-	C		1	74	MIXS:0000075		
 single_cell_lysis_appr	single cell or viral particle lysis approach	no change made			needs attention - MISAG, MIUViG	CIH			Method used to free DNA from interior of the cell(s) or particle(s)	enumeration	[chemical|enzymatic|physical|combination]	enzymatic	sequencing	-	-	-	-	-	-	-	-	M	-	C		1	75	MIXS:0000076		
diff --git a/tests/input/rda-crosswalk.tsv b/tests/input/rda-crosswalk.tsv
index 9a2dd64..3f6a963 100644
--- a/tests/input/rda-crosswalk.tsv
+++ b/tests/input/rda-crosswalk.tsv
@@ -12,7 +12,7 @@ A. From Google dataset search recommendaton	Thing	description	mandatory	Text	A d
 	CreativeWork	keywords		Text	Keywords or tags used to describe this content. Multiple entries in a keywords list are typically delimited by commas.	dct:keyword (R)	dcat:keyword	dcterms:subject (R)*	MD_Identification/descriptiveKeywords//keyword	keywords (R)	Subject (M); Topic Classification Term; Keywords	keywords (O)	collection/subject        	dcterms:subject	keywords (M)		keywords		keywords	keyword
 	CreativeWork	license		CreativeWork or URL	A license document that applies to this content, typically indicated by URL.  (A license under which the dataset is distributed.)	dct:license	dct:license	dcterms:rights	MD_Identification/resourceConstraints//reference/CI_Citation, or text in MD_LegalConstraints/useLimitation [restrictionCode = license]	license (R)		licenses (R)	collection/rights/licence[@rightsURI] AND/OR collection/rights/licence[@type] AND collection/rights/licence	dcterms:rights	license (R)	Rights (O)	rights	Rights (O)	license	
 	CreativeWork	creator		Organization or Person	The creator/author of this CreativeWork (dataset). This is the same as the Author property for CreativeWork.  (To uniquely identify individuals, use ORCID ID as the value of the sameAs property of the Person type. To uniquely identify institutions and organizations, use ROR ID. )	dct:creator	dcterms:creator	dcterms:creator (M) 	MD_Identification/citation//citedResponsibleParty//name  [role = one of {author, coAuthor, originator, editor}]	creator (M)	Author; authorName (M)	creator (M)	collection/citationInfo/citationMetadata/contributor OR relatedObject|relatedInfo party/name where relation=IsPrincipalInvestigatorOf OR relatedObject|relatedInfo party/name where relation=author OR relatedObject|relatedInfo party/name where relation=coInvestigator OR relatedObject|relatedInfo party/name where relation=hasCollector	dcterms:creator	creator (R)	Creator (R)	AuthEnty*	Creators (M)	author	ResourceHeader/Contact[@role=PrincipalInvestigator]  ResourceHeader/Contact[@role=DataProducer]
-	CreativeWork	isPartOf		CreativeWork	Indicates a CreativeWork that this CreativeWork is (in some sense) part of. Reverse property hasPart.  If the dataset is a collection of smaller datasets, use the hasPart property to denote such relationship. Conversly, if the dataset is part of a larger dataset, use isPartOf.	dct:isPartOf (R)	dcterms:isPartOf	isPartOf	MD_Identifcation/associatedResource/name/CI_Citation [associationType = 'largerWorkCitation']	 includedIn(Dataset) (R)			relatedObject|relatedInfo collection where relation[@type='isPartOf']        	dcterms:isPartOf			isPartOf		isPartOf	ParentID (only for Granule resource type)
+	CreativeWork	isPartOf		CreativeWork	Indicates a CreativeWork that this CreativeWork is (in some sense) part of. Reverse property hasPart.  If the dataset is a collection of smaller datasets, use the hasPart property to denote such relationship. Conversely, if the dataset is part of a larger dataset, use isPartOf.	dct:isPartOf (R)	dcterms:isPartOf	isPartOf	MD_Identifcation/associatedResource/name/CI_Citation [associationType = 'largerWorkCitation']	 includedIn(Dataset) (R)			relatedObject|relatedInfo collection where relation[@type='isPartOf']        	dcterms:isPartOf			isPartOf		isPartOf	ParentID (only for Granule resource type)
 	CreativeWork	hasPart		CreativeWork	Indicates a CreativeWork that is (in some sense) a part of this CreativeWork. Reverse property isPartOf	dct:hasPart (R)	dcterms:hasPart	hasPart	MD_Identifcation/associatedResource/name/CI_Citation [associationType = 'isComposedOf']	includes(Dataset) (R)		hasPart (O)	relatedObject|relatedInfo collection where relation[@type='hasPart']        	dcterms:hasPart			hasPart		hasPart	
 	CreativeWork	version		Number or Text	The version of the CreativeWork embodied by a specified resource.	owl:versionInfo	owl:versionInfo	Version (O)	MD_Identification/citation//edition	 version (O)	Version	version (R)	registryObject:collection:citationInfo:citationMetadata:version		version (R)		version	version (O)	version	ProviderVersion
 	CreativeWork	temporalCoverage		Text	The temporalCoverage of a CreativeWork indicates the period that the content applies to  (The data in the dataset covers a specific time interval. Only include this property if the dataset has a temporal dimension.)	dct:temporal	dcterms:temporal	Date	MD_Identification/extent//temporalElement/extent/TM_Primitive	temporalCoverage (O)	Time Period Covered		collection/coverage/temporal	dcterms:temporal (start); dcterms:temporal (end)		Temporal Coverage (O)	 temporal			TemporalDescription/TimeSpan