From 986099640f2f4805a9ea0a21c78b64fb9f1f4119 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Mon, 26 Aug 2024 15:50:35 +0200 Subject: [PATCH 1/2] fix: always validate that preprocessing submitted data for correct organism not only when there is a validation error --- .../service/submission/SubmissionDatabaseService.kt | 2 +- .../submission/SubmitProcessedDataEndpointTest.kt | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt index b24cf6138..2c240b61b 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt @@ -350,9 +350,9 @@ class SubmissionDatabaseService( } private fun validateProcessedData(submittedProcessedData: SubmittedProcessedData, organism: Organism) = try { + throwIfIsSubmissionForWrongOrganism(submittedProcessedData, organism) processedSequenceEntryValidatorFactory.create(organism).validate(submittedProcessedData.data) } catch (validationException: ProcessingValidationException) { - throwIfIsSubmissionForWrongOrganism(submittedProcessedData, organism) throw validationException } diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt index 7566cc749..042b93046 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt @@ -381,21 +381,21 @@ class SubmitProcessedDataEndpointTest( @Test fun `WHEN I submit an entry with the wrong organism THEN refuses update with unprocessable entity`() { - val accession = prepareUnprocessedSequenceEntry(DEFAULT_ORGANISM) + val accession = prepareUnprocessedSequenceEntry(OTHER_ORGANISM) submissionControllerClient.submitProcessedData( PreparedProcessedData.successfullyProcessed(accession = accession), - organism = OTHER_ORGANISM, + organism = DEFAULT_ORGANISM, ) .andExpect(status().isUnprocessableEntity) .andExpect(content().contentType(MediaType.APPLICATION_JSON_VALUE)) .andExpect( jsonPath("\$.detail") - .value(containsString("$accession.1 is for organism dummyOrganism")), + .value(containsString("$accession.1 is for organism otherOrganism")), ) .andExpect( jsonPath("\$.detail") - .value(containsString("submitted data is for organism otherOrganism")), + .value(containsString("submitted data is for organism dummyOrganism")), ) } From 195107565b24a75a6bb8e5b0c425a906a04b375b Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Mon, 26 Aug 2024 16:06:58 +0200 Subject: [PATCH 2/2] feat: transform lower case sequences to upper case --- .../submission/SubmissionDatabaseService.kt | 29 +++++++++++++++++-- .../submission/PreparedProcessedData.kt | 24 +++++++++++++++ .../SubmitProcessedDataEndpointTest.kt | 22 ++++++++++++++ 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt index 2c240b61b..6295b79a8 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/SubmissionDatabaseService.kt @@ -75,6 +75,7 @@ import org.springframework.transaction.annotation.Transactional import java.io.BufferedReader import java.io.InputStream import java.io.InputStreamReader +import java.util.Locale import javax.sql.DataSource private val log = KotlinLogging.logger { } @@ -318,7 +319,7 @@ class SubmissionDatabaseService( val submittedWarnings = submittedProcessedData.warnings.orEmpty() val (newStatus, processedData) = when { - submittedErrors.isEmpty() -> FINISHED to validateProcessedData( + submittedErrors.isEmpty() -> FINISHED to postprocessAndValidateProcessedData( submittedProcessedData, organism, ) @@ -349,13 +350,35 @@ class SubmissionDatabaseService( return newStatus } - private fun validateProcessedData(submittedProcessedData: SubmittedProcessedData, organism: Organism) = try { + private fun postprocessAndValidateProcessedData( + submittedProcessedData: SubmittedProcessedData, + organism: Organism, + ) = try { throwIfIsSubmissionForWrongOrganism(submittedProcessedData, organism) - processedSequenceEntryValidatorFactory.create(organism).validate(submittedProcessedData.data) + val processedData = makeSequencesUpperCase(submittedProcessedData.data) + processedSequenceEntryValidatorFactory.create(organism).validate(processedData) } catch (validationException: ProcessingValidationException) { throw validationException } + private fun makeSequencesUpperCase(processedData: ProcessedData) = processedData.copy( + unalignedNucleotideSequences = processedData.unalignedNucleotideSequences.mapValues { (_, it) -> + it?.uppercase(Locale.US) + }, + alignedNucleotideSequences = processedData.alignedNucleotideSequences.mapValues { (_, it) -> + it?.uppercase(Locale.US) + }, + alignedAminoAcidSequences = processedData.alignedAminoAcidSequences.mapValues { (_, it) -> + it?.uppercase(Locale.US) + }, + nucleotideInsertions = processedData.nucleotideInsertions.mapValues { (_, it) -> + it.map { insertion -> insertion.copy(sequence = insertion.sequence.uppercase(Locale.US)) } + }, + aminoAcidInsertions = processedData.aminoAcidInsertions.mapValues { (_, it) -> + it.map { insertion -> insertion.copy(sequence = insertion.sequence.uppercase(Locale.US)) } + }, + ) + private fun validateExternalMetadata( externalSubmittedData: ExternalSubmittedData, organism: Organism, diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/PreparedProcessedData.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/PreparedProcessedData.kt index 3f78df779..235a919bf 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/PreparedProcessedData.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/PreparedProcessedData.kt @@ -141,6 +141,30 @@ object PreparedProcessedData { ), ) + fun withLowercaseSequences(accession: Accession, version: Version) = defaultSuccessfulSubmittedData.copy( + accession = accession, + version = version, + data = defaultProcessedData.copy( + unalignedNucleotideSequences = mapOf( + MAIN_SEGMENT to "nactg", + ), + alignedNucleotideSequences = mapOf( + MAIN_SEGMENT to "attaaaggtttataccttcccaggtaacaaaccaaccaactttcgatct", + ), + nucleotideInsertions = mapOf( + MAIN_SEGMENT to listOf(Insertion(123, "actg")), + ), + alignedAminoAcidSequences = mapOf( + SOME_LONG_GENE to "acdefghiklmnpqrstvwybzx-*", + SOME_SHORT_GENE to "mads", + ), + aminoAcidInsertions = mapOf( + SOME_LONG_GENE to listOf(Insertion(123, "def")), + SOME_SHORT_GENE to listOf(Insertion(123, "n")), + ), + ), + ) + fun withMissingMetadataFields( accession: Accession, version: Long = defaultSuccessfulSubmittedData.version, diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt index 042b93046..d0f10b58e 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt @@ -90,6 +90,28 @@ class SubmitProcessedDataEndpointTest( .assertStatusIs(Status.AWAITING_APPROVAL) } + @Test + fun `WHEN I submit data with lowercase sequences THEN the sequences are converted to uppercase`() { + val (accession, version) = prepareExtractedSequencesInDatabase().first() + + submissionControllerClient.submitProcessedData( + PreparedProcessedData.withLowercaseSequences(accession = accession, version = version), + ) + .andExpect(status().isNoContent) + + val processedData = convenienceClient.getSequenceEntryToEdit(accession = accession, version = version) + .processedData + + assertThat(processedData.unalignedNucleotideSequences, hasEntry(MAIN_SEGMENT, "NACTG")) + assertThat( + processedData.alignedNucleotideSequences, + hasEntry(MAIN_SEGMENT, "ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCT"), + ) + assertThat(processedData.alignedAminoAcidSequences, hasEntry(SOME_LONG_GENE, "ACDEFGHIKLMNPQRSTVWYBZX-*")) + assertThat(processedData.nucleotideInsertions, hasEntry(MAIN_SEGMENT, listOf(Insertion(123, "ACTG")))) + assertThat(processedData.aminoAcidInsertions, hasEntry(SOME_LONG_GENE, listOf(Insertion(123, "DEF")))) + } + @Test fun `WHEN I submit with all valid symbols THEN the sequence entry is in status processed`() { val accessions = prepareExtractedSequencesInDatabase().map { it.accession }