Skip to content

Commit

Permalink
feat(ci): Switch to using camelCase for metadata fields (#2507)
Browse files Browse the repository at this point in the history
* Switch to camelCase by iterating through metadata fields in values.yaml and performing search and replace.

* Keep the type pango_lineage snakecase as this is recognized by LAPIS as a type.
  • Loading branch information
anna-parker authored Aug 22, 2024
1 parent 7101a57 commit 0a55712
Show file tree
Hide file tree
Showing 37 changed files with 536 additions and 501 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ val defaultExternalMetadata =
accession = "If a test result shows this, processed data was not prepared correctly.",
version = 1,
externalMetadata = mapOf(
"insdc_accession_full" to TextNode("GENBANK1000.1"),
"insdcAccessionFull" to TextNode("GENBANK1000.1"),
),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class SubmitExternalMetadataEndpointTest(
val releasedSequenceEntry = convenienceClient.getReleasedData()
.find { it.metadata["accession"]?.textValue() == accessions.first() }

assertThat(releasedSequenceEntry?.metadata, hasEntry("insdc_accession_full", TextNode("GENBANK1000.1")))
assertThat(releasedSequenceEntry?.metadata, hasEntry("insdcAccessionFull", TextNode("GENBANK1000.1")))
}

@Test
Expand All @@ -86,7 +86,7 @@ class SubmitExternalMetadataEndpointTest(
val releasedSequenceEntry = convenienceClient.getReleasedData()
.find { it.metadata["accession"]?.textValue() == accessions.first() }

assertThat(releasedSequenceEntry?.metadata, hasEntry("insdc_accession_full", TextNode("GENBANK1000.1")))
assertThat(releasedSequenceEntry?.metadata, hasEntry("insdcAccessionFull", TextNode("GENBANK1000.1")))
assertThat(releasedSequenceEntry?.metadata, hasEntry("other_db_accession", TextNode("DB1.1")))
}

Expand All @@ -105,12 +105,12 @@ class SubmitExternalMetadataEndpointTest(
.andExpect(content().contentType(MediaType.APPLICATION_JSON_VALUE))
.andExpect(
jsonPath("\$.detail")
.value(containsString("Unknown fields in metadata: insdc_accession_full")),
.value(containsString("Unknown fields in metadata: insdcAccessionFull")),
)
val releasedSequenceEntry = convenienceClient.getReleasedData()
.find { it.metadata["accession"]?.textValue() == accessions.first() }

assertThat(releasedSequenceEntry?.metadata, not(hasKey("insdc_accession_full")))
assertThat(releasedSequenceEntry?.metadata, not(hasKey("insdcAccessionFull")))
}

@Test
Expand All @@ -137,7 +137,7 @@ class SubmitExternalMetadataEndpointTest(
val releasedSequenceEntry = convenienceClient.getReleasedData()
.find { it.metadata["accession"]?.textValue() == accessions.first().accession }

assertThat(releasedSequenceEntry?.metadata, not(hasKey("insdc_accession_full")))
assertThat(releasedSequenceEntry?.metadata, not(hasKey("insdcAccessionFull")))
}

@Test
Expand All @@ -161,6 +161,6 @@ class SubmitExternalMetadataEndpointTest(
val releasedSequenceEntry = convenienceClient.getReleasedData()
.find { it.metadata["accession"]?.textValue() == accession }

assertThat(releasedSequenceEntry?.metadata, not(hasKey("insdc_accession_full")))
assertThat(releasedSequenceEntry?.metadata, not(hasKey("insdcAccessionFull")))
}
}
4 changes: 2 additions & 2 deletions backend/src/test/resources/backend_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
],
"externalMetadata": [
{
"name": "insdc_accession_full",
"name": "insdcAccessionFull",
"type": "string",
"externalMetadataUpdater": "ena",
"required": true
Expand Down Expand Up @@ -176,4 +176,4 @@
}
}
}
}
}
4 changes: 2 additions & 2 deletions backend/src/test/resources/backend_config_single_segment.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"accessionPrefix" : "LOC_",
"accessionPrefix": "LOC_",
"organisms": {
"dummyOrganism": {
"referenceGenomes": {
Expand Down Expand Up @@ -76,4 +76,4 @@
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Additionally the pipeline performs checks on the metadata fields. The checks are
In the default configuration the pipeline performs:
* **type checks**: Checks that the type of each metadata field corresponds to the expected `type` value seen in the config (default is string).
* **required value checks**: Checks that if a field is required, e.g. `required` field in config is true, that that field is not None.
* **INSDC-accepted country checks**: Using the `process_options` preprocessing function checks that the `geo_loc_country` field is set to an [INSDC-accepted country](https://www.ebi.ac.uk/ena/browser/api/xml/ERC000011) option.
* **INSDC-accepted country checks**: Using the `process_options` preprocessing function checks that the `geoLocCountry` field is set to an [INSDC-accepted country](https://www.ebi.ac.uk/ena/browser/api/xml/ERC000011) option.

The pipeline also formats metadata fields:
* **process date**: Takes a date string and returns a date field in the "%Y-%m-%d" format.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,11 @@ organisms:
- name: host
type: string
autocomplete: true
- name: pango_lineage
- name: pangoLineage
type: pango_lineage
autocomplete: true
required: true
- name: insdc_accession_full
- name: insdcAccessionFull
type: string
displayName: INSDC accession
customDisplay:
Expand All @@ -112,12 +112,12 @@ organisms:
- country
- division
- date
- pango_lineage
- pangoLineage
defaultOrder: descending
defaultOrderBy: date
silo:
dateToSortBy: date
partitionBy: pango_lineage
partitionBy: pangoLineage
preprocessing:
image: ghcr.io/loculus-project/preprocessing-dummy
args:
Expand Down
8 changes: 4 additions & 4 deletions ena-submission/ENA_submission.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,14 @@ The following could be implement as post-MVP features:
<!--filename: project.xml-->
<PROJECT_SET>
<PROJECT alias={group_accession}:{organism}>
<NAME>{ncbi_virus_name}</NAME>
<TITLE>{ncbi_virus_name} Genome sequencing</TITLE>
<DESCRIPTION>Automated upload of {ncbi_virus_name} sequences submitted by {Institution} from {db}.</DESCRIPTION>
<NAME>{ncbiVirusName}</NAME>
<TITLE>{ncbiVirusName} Genome sequencing</TITLE>
<DESCRIPTION>Automated upload of {ncbiVirusName} sequences submitted by {Institution} from {db}.</DESCRIPTION>
<SUBMISSION_PROJECT>
<SEQUENCING_PROJECT/>
<ORGANISM>
<TAXON_ID>{taxon_id}</TAXON_ID>
<SCIENTIFIC_NAME>{ncbi_virus_name}</SCIENTIFIC_NAME>
<SCIENTIFIC_NAME>{ncbiVirusName}</SCIENTIFIC_NAME>
</ORGANISM>
</SUBMISSION_PROJECT>
<PROJECT_LINKS>
Expand Down
64 changes: 32 additions & 32 deletions ena-submission/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,103 +16,103 @@ organisms:
organismName: "Crimean-Congo Hemorrhagic Fever Virus"
externalMetadata:
- externalMetadataUpdater: ena
name: ncbi_release_date
name: ncbiReleaseDate
type: date
- externalMetadataUpdater: ena
name: ncbi_update_date_L
name: ncbiUpdateDate_L
type: date
- externalMetadataUpdater: ena
name: ncbi_update_date_M
name: ncbiUpdateDate_M
type: date
- externalMetadataUpdater: ena
name: ncbi_update_date_S
name: ncbiUpdateDate_S
type: date
- externalMetadataUpdater: ena
name: ncbi_submitter_country
name: ncbiSubmitterCountry
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_L
name: insdcAccessionBase_L
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_M
name: insdcAccessionBase_M
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base_S
name: insdcAccessionBase_S
type: string
- externalMetadataUpdater: ena
name: insdc_version_L
name: insdcVersion_L
type: int
- externalMetadataUpdater: ena
name: insdc_version_M
name: insdcVersion_M
type: int
- externalMetadataUpdater: ena
name: insdc_version_S
name: insdcVersion_S
type: int
- externalMetadataUpdater: ena
name: insdc_accession_full_L
name: insdcAccessionFull_L
type: string
- externalMetadataUpdater: ena
name: insdc_accession_full_M
name: insdcAccessionFull_M
type: string
- externalMetadataUpdater: ena
name: insdc_accession_full_S
name: insdcAccessionFull_S
type: string
- externalMetadataUpdater: ena
name: bioproject_accession
name: bioprojectAccession
type: string
- externalMetadataUpdater: ena
name: biosample_accession
name: biosampleAccession
type: string
- externalMetadataUpdater: ena
name: ncbi_sourcedb
name: ncbiSourceDb
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_name
name: ncbiVirusName
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_tax_id
name: ncbiVirusTaxId
type: int
- externalMetadataUpdater: ena
name: sra_run_accession
name: sraRunAccession
type: string
ebola-sudan:
ingest:
taxon_id: 3052460
organismName: "Ebola Sudan"
externalMetadata:
- externalMetadataUpdater: ena
name: ncbi_release_date
name: ncbiReleaseDate
type: date
- externalMetadataUpdater: ena
name: ncbi_update_date
name: ncbiUpdateDate
type: date
- externalMetadataUpdater: ena
name: ncbi_submitter_country
name: ncbiSubmitterCountry
type: string
- externalMetadataUpdater: ena
name: insdc_accession_base
name: insdcAccessionBase
type: string
- externalMetadataUpdater: ena
name: insdc_version
name: insdcVersion
type: int
- externalMetadataUpdater: ena
name: insdc_accession_full
name: insdcAccessionFull
type: string
- externalMetadataUpdater: ena
name: bioproject_accession
name: bioprojectAccession
type: string
- externalMetadataUpdater: ena
name: biosample_accession
name: biosampleAccession
type: string
- externalMetadataUpdater: ena
name: ncbi_sourcedb
name: ncbiSourceDb
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_name
name: ncbiVirusName
type: string
- externalMetadataUpdater: ena
name: ncbi_virus_tax_id
name: ncbiVirusTaxId
type: int
- externalMetadataUpdater: ena
name: sra_run_accession
name: sraRunAccession
type: string
Loading

0 comments on commit 0a55712

Please sign in to comment.