From 5ef6d0c0f9556d464a482ffbcf05a435f7ff1621 Mon Sep 17 00:00:00 2001 From: Fabian Engelniederhammer Date: Tue, 23 Jan 2024 16:16:04 +0100 Subject: [PATCH] fix(backend): allow segments / genes to be null #822 --- .../loculus/backend/api/SubmissionTypes.kt | 2 +- .../loculus/backend/config/ReferenceGenome.kt | 25 ++- .../service/submission/CompressionService.kt | 24 +-- .../ProcessedSequenceEntryValidator.kt | 17 +- .../org/loculus/backend/utils/FastaReader.kt | 3 +- .../submission/PreparedProcessedData.kt | 151 ++++++++---------- .../SubmitProcessedDataEndpointTest.kt | 29 +++- preprocessing/specification.md | 10 +- 8 files changed, 127 insertions(+), 134 deletions(-) diff --git a/backend/src/main/kotlin/org/loculus/backend/api/SubmissionTypes.kt b/backend/src/main/kotlin/org/loculus/backend/api/SubmissionTypes.kt index 964d671db..98618f313 100644 --- a/backend/src/main/kotlin/org/loculus/backend/api/SubmissionTypes.kt +++ b/backend/src/main/kotlin/org/loculus/backend/api/SubmissionTypes.kt @@ -166,7 +166,7 @@ data class OriginalData( example = "{\"segment1\": \"ACTG\", \"segment2\": \"GTCA\"}", description = "The key is the segment name, the value is the nucleotide sequence", ) - val unalignedNucleotideSequences: Map, + val unalignedNucleotideSequences: Map, ) enum class Status { diff --git a/backend/src/main/kotlin/org/loculus/backend/config/ReferenceGenome.kt b/backend/src/main/kotlin/org/loculus/backend/config/ReferenceGenome.kt index c6c50728f..2d1c4c42f 100644 --- a/backend/src/main/kotlin/org/loculus/backend/config/ReferenceGenome.kt +++ b/backend/src/main/kotlin/org/loculus/backend/config/ReferenceGenome.kt @@ -19,20 +19,17 @@ data class ReferenceGenome( return "ReferenceGenome(nucleotideSequences=[$nucleotideSequencesString], genes=[$genesString])" } - fun getNucleotideSegmentReference(segmentName: String): NucleotideSequence? = nucleotideSequences.find { - it.name == segmentName - }?.sequence - - fun getAminoAcidGeneReference(gene: String): AminoAcidSequence? = genes.find { - it.name == gene - }?.sequence - - private fun shortenSequence(sequence: String): String { - return if (sequence.length > 10) { - sequence.substring(0, 10) + "..." - } else { - sequence - } + fun getNucleotideSegmentReference(segmentName: String): NucleotideSequence? = nucleotideSequences + .find { it.name == segmentName } + ?.sequence + + fun getAminoAcidGeneReference(gene: String): AminoAcidSequence? = genes + .find { it.name == gene } + ?.sequence + + private fun shortenSequence(sequence: String) = when { + sequence.length > 10 -> sequence.substring(0, 10) + "..." + else -> sequence } private fun referenceListToString(list: List) = list.joinToString(", ") { diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt index d4a2d9c93..75bde9421 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/CompressionService.kt @@ -53,11 +53,10 @@ class CompressionService(private val backendConfig: BackendConfig) { originalData.metadata, originalData .unalignedNucleotideSequences.mapValues { - decompressNucleotideSequence( - it.value, - it.key, - organism, - ) + when (val compressedSequence = it.value) { + null -> null + else -> decompressNucleotideSequence(compressedSequence, it.key, organism) + } }, ) @@ -65,11 +64,10 @@ class CompressionService(private val backendConfig: BackendConfig) { originalData.metadata, originalData .unalignedNucleotideSequences.mapValues { (segmentName, sequenceData) -> - compressNucleotideSequence( - sequenceData, - segmentName, - organism, - ) + when (sequenceData) { + null -> null + else -> compressNucleotideSequence(sequenceData, segmentName, organism) + } }, ) @@ -165,12 +163,14 @@ class CompressionService(private val backendConfig: BackendConfig) { .referenceGenomes .getNucleotideSegmentReference( segmentName, - )?.toByteArray() + ) + ?.toByteArray() private fun getDictionaryForAminoAcidSequence(geneName: String, organism: Organism): ByteArray? = backendConfig .getInstanceConfig(organism) .referenceGenomes .getAminoAcidGeneReference( geneName, - )?.toByteArray() + ) + ?.toByteArray() } diff --git a/backend/src/main/kotlin/org/loculus/backend/service/submission/ProcessedSequenceEntryValidator.kt b/backend/src/main/kotlin/org/loculus/backend/service/submission/ProcessedSequenceEntryValidator.kt index 26e8638bb..2006ccd2f 100644 --- a/backend/src/main/kotlin/org/loculus/backend/service/submission/ProcessedSequenceEntryValidator.kt +++ b/backend/src/main/kotlin/org/loculus/backend/service/submission/ProcessedSequenceEntryValidator.kt @@ -2,8 +2,10 @@ package org.loculus.backend.service.submission import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.node.NullNode +import org.loculus.backend.api.AminoAcidSequence import org.loculus.backend.api.Insertion import org.loculus.backend.api.MetadataMap +import org.loculus.backend.api.NucleotideSequence import org.loculus.backend.api.Organism import org.loculus.backend.api.ProcessedData import org.loculus.backend.config.BackendConfig @@ -245,11 +247,7 @@ class ProcessedSequenceEntryValidator( ) } - private fun validateNoMissingSegment( - segment: ReferenceSequence, - sequenceData: Map, - sequence: String, - ) { + private fun validateNoMissingSegment(segment: ReferenceSequence, sequenceData: Map, sequence: String) { if (!sequenceData.containsKey(segment.name)) { throw ProcessingValidationException("Missing the required segment '${segment.name}' in '$sequence'.") } @@ -269,7 +267,7 @@ class ProcessedSequenceEntryValidator( } } - private fun validateNoUnknownSegment(dataToValidate: Map, sequenceGrouping: String) { + private fun validateNoUnknownSegment(dataToValidate: Map, sequenceGrouping: String) { val unknownSegments = dataToValidate.keys.subtract(referenceGenome.nucleotideSequences.map { it.name }.toSet()) if (unknownSegments.isNotEmpty()) { val unknownSegmentsString = unknownSegments.sorted().joinToString(", ") @@ -279,7 +277,10 @@ class ProcessedSequenceEntryValidator( } } - private fun validateNoUnknownNucleotideSymbol(dataToValidate: Map, sequenceGrouping: String) { + private fun validateNoUnknownNucleotideSymbol( + dataToValidate: Map, + sequenceGrouping: String, + ) { for ((segmentName, sequence) in dataToValidate) { if (sequence == null) { continue @@ -354,7 +355,7 @@ class ProcessedSequenceEntryValidator( } } - private fun validateNoUnknownAminoAcidSymbol(dataToValidate: Map) { + private fun validateNoUnknownAminoAcidSymbol(dataToValidate: Map) { for ((gene, sequence) in dataToValidate) { if (sequence == null) { continue diff --git a/backend/src/main/kotlin/org/loculus/backend/utils/FastaReader.kt b/backend/src/main/kotlin/org/loculus/backend/utils/FastaReader.kt index f0be73ac6..cbd20c755 100644 --- a/backend/src/main/kotlin/org/loculus/backend/utils/FastaReader.kt +++ b/backend/src/main/kotlin/org/loculus/backend/utils/FastaReader.kt @@ -7,12 +7,11 @@ import java.io.InputStreamReader data class FastaEntry(val sampleName: String, val sequence: String) class FastaReader(inputStream: InputStream) : Iterator, Iterable, AutoCloseable { - private val reader: BufferedReader + private val reader: BufferedReader = BufferedReader(InputStreamReader(inputStream)) private var nextEntry: FastaEntry? = null private var nextLine: String? = "" init { - reader = BufferedReader(InputStreamReader(inputStream)) read() } diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/PreparedProcessedData.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/PreparedProcessedData.kt index d67ae210e..aa360a464 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/PreparedProcessedData.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/PreparedProcessedData.kt @@ -1,14 +1,11 @@ package org.loculus.backend.controller.submission -import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.node.DoubleNode import com.fasterxml.jackson.databind.node.IntNode import com.fasterxml.jackson.databind.node.NullNode import com.fasterxml.jackson.databind.node.TextNode -import org.loculus.backend.api.AminoAcidSequence import org.loculus.backend.api.GeneName import org.loculus.backend.api.Insertion -import org.loculus.backend.api.NucleotideSequence import org.loculus.backend.api.PreprocessingAnnotation import org.loculus.backend.api.PreprocessingAnnotationSource import org.loculus.backend.api.PreprocessingAnnotationSourceType @@ -17,6 +14,7 @@ import org.loculus.backend.api.SegmentName import org.loculus.backend.api.SubmittedProcessedData import org.loculus.backend.controller.submission.SubmitFiles.DefaultFiles import org.loculus.backend.utils.Accession +import org.loculus.backend.utils.Version const val MAIN_SEGMENT = "main" const val SOME_LONG_GENE = "someLongGene" @@ -118,7 +116,7 @@ object PreparedProcessedData { fun successfullyProcessed( accession: Accession = DefaultFiles.firstAccession, version: Long = defaultSuccessfulSubmittedData.version, - ) = defaultSuccessfulSubmittedData.withValues( + ) = defaultSuccessfulSubmittedData.copy( accession = accession, version = version, ) @@ -126,51 +124,61 @@ object PreparedProcessedData { fun successfullyProcessedOtherOrganismData( accession: Accession = DefaultFiles.firstAccession, version: Long = defaultSuccessfulSubmittedDataMultiSegmented.version, - ) = defaultSuccessfulSubmittedDataMultiSegmented.withValues( + ) = defaultSuccessfulSubmittedDataMultiSegmented.copy( accession = accession, version = version, ) fun withNullForFields(accession: Accession = DefaultFiles.firstAccession, fields: List) = - defaultSuccessfulSubmittedData.withValues( + defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( metadata = defaultProcessedData.metadata + fields.map { it to NullNode.instance }, ), ) + fun withNullForSequences(accession: Accession, version: Version) = defaultSuccessfulSubmittedData.copy( + accession = accession, + version = version, + data = defaultProcessedData.copy( + alignedNucleotideSequences = defaultProcessedData.alignedNucleotideSequences.mapValues { null }, + unalignedNucleotideSequences = defaultProcessedData.unalignedNucleotideSequences.mapValues { null }, + alignedAminoAcidSequences = defaultProcessedData.alignedAminoAcidSequences.mapValues { null }, + ), + ) + fun withMissingMetadataFields( accession: Accession = DefaultFiles.firstAccession, version: Long = defaultSuccessfulSubmittedData.version, absentFields: List, - ) = defaultSuccessfulSubmittedData.withValues( + ) = defaultSuccessfulSubmittedData.copy( accession = accession, version = version, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( metadata = defaultProcessedData.metadata.filterKeys { !absentFields.contains(it) }, ), ) fun withUnknownMetadataField(accession: Accession = DefaultFiles.firstAccession, fields: List) = - defaultSuccessfulSubmittedData.withValues( + defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( metadata = defaultProcessedData.metadata + fields.map { it to TextNode("value for $it") }, ), ) fun withMissingRequiredField(accession: Accession = DefaultFiles.firstAccession, fields: List) = - defaultSuccessfulSubmittedData.withValues( + defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( metadata = defaultProcessedData.metadata.filterKeys { !fields.contains(it) }, ), ) fun withWrongTypeForFields(accession: Accession = DefaultFiles.firstAccession) = - defaultSuccessfulSubmittedData.withValues( + defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( metadata = defaultProcessedData.metadata + mapOf( "region" to IntNode(5), "age" to TextNode("not a number"), @@ -178,20 +186,19 @@ object PreparedProcessedData { ), ) - fun withWrongDateFormat(accession: Accession = DefaultFiles.firstAccession) = - defaultSuccessfulSubmittedData.withValues( - accession = accession, - data = defaultProcessedData.withValues( - metadata = defaultProcessedData.metadata + mapOf( - "date" to TextNode("1.2.2021"), - ), + fun withWrongDateFormat(accession: Accession = DefaultFiles.firstAccession) = defaultSuccessfulSubmittedData.copy( + accession = accession, + data = defaultProcessedData.copy( + metadata = defaultProcessedData.metadata + mapOf( + "date" to TextNode("1.2.2021"), ), - ) + ), + ) fun withWrongPangoLineageFormat(accession: Accession = DefaultFiles.firstAccession) = - defaultSuccessfulSubmittedData.withValues( + defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( metadata = defaultProcessedData.metadata + mapOf( "pangoLineage" to TextNode("A.5.invalid"), ), @@ -201,9 +208,9 @@ object PreparedProcessedData { fun withMissingSegmentInUnalignedNucleotideSequences( accession: Accession = DefaultFiles.firstAccession, segment: SegmentName, - ) = defaultSuccessfulSubmittedData.withValues( + ) = defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( unalignedNucleotideSequences = defaultProcessedData.unalignedNucleotideSequences - segment, ), ) @@ -211,9 +218,9 @@ object PreparedProcessedData { fun withMissingSegmentInAlignedNucleotideSequences( accession: Accession = DefaultFiles.firstAccession, segment: SegmentName, - ) = defaultSuccessfulSubmittedData.withValues( + ) = defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( alignedNucleotideSequences = defaultProcessedData.alignedNucleotideSequences - segment, ), ) @@ -221,9 +228,9 @@ object PreparedProcessedData { fun withUnknownSegmentInAlignedNucleotideSequences( accession: Accession = DefaultFiles.firstAccession, segment: SegmentName, - ) = defaultSuccessfulSubmittedData.withValues( + ) = defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( alignedNucleotideSequences = defaultProcessedData.alignedNucleotideSequences + (segment to "NNNN"), ), ) @@ -231,9 +238,9 @@ object PreparedProcessedData { fun withUnknownSegmentInUnalignedNucleotideSequences( accession: Accession = DefaultFiles.firstAccession, segment: SegmentName, - ) = defaultSuccessfulSubmittedData.withValues( + ) = defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( unalignedNucleotideSequences = defaultProcessedData.unalignedNucleotideSequences + (segment to "NNNN"), ), ) @@ -241,9 +248,9 @@ object PreparedProcessedData { fun withUnknownSegmentInNucleotideInsertions( accession: Accession = DefaultFiles.firstAccession, segment: SegmentName, - ) = defaultSuccessfulSubmittedData.withValues( + ) = defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( nucleotideInsertions = defaultProcessedData.nucleotideInsertions + ( segment to listOf( Insertion( @@ -263,9 +270,9 @@ object PreparedProcessedData { val alignedNucleotideSequences = defaultProcessedData.alignedNucleotideSequences.toMutableMap() alignedNucleotideSequences[segment] = "A".repeat(length) - return defaultSuccessfulSubmittedData.withValues( + return defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues(alignedNucleotideSequences = alignedNucleotideSequences), + data = defaultProcessedData.copy(alignedNucleotideSequences = alignedNucleotideSequences), ) } @@ -276,9 +283,9 @@ object PreparedProcessedData { val alignedNucleotideSequences = defaultProcessedData.alignedNucleotideSequences.toMutableMap() alignedNucleotideSequences[segment] = "ÄÖ" + alignedNucleotideSequences[segment]!!.substring(2) - return defaultSuccessfulSubmittedData.withValues( + return defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues(alignedNucleotideSequences = alignedNucleotideSequences), + data = defaultProcessedData.copy(alignedNucleotideSequences = alignedNucleotideSequences), ) } @@ -289,9 +296,9 @@ object PreparedProcessedData { val unalignedNucleotideSequences = defaultProcessedData.unalignedNucleotideSequences.toMutableMap() unalignedNucleotideSequences[segment] = "ÄÖ" + unalignedNucleotideSequences[segment]!!.substring(2) - return defaultSuccessfulSubmittedData.withValues( + return defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues(unalignedNucleotideSequences = unalignedNucleotideSequences), + data = defaultProcessedData.copy(unalignedNucleotideSequences = unalignedNucleotideSequences), ) } @@ -302,32 +309,32 @@ object PreparedProcessedData { val nucleotideInsertions = defaultProcessedData.nucleotideInsertions.toMutableMap() nucleotideInsertions[segment] = listOf(Insertion(123, "ÄÖ")) - return defaultSuccessfulSubmittedData.withValues( + return defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues(nucleotideInsertions = nucleotideInsertions), + data = defaultProcessedData.copy(nucleotideInsertions = nucleotideInsertions), ) } fun withMissingGeneInAlignedAminoAcidSequences(accession: Accession = DefaultFiles.firstAccession, gene: GeneName) = - defaultSuccessfulSubmittedData.withValues( + defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( alignedAminoAcidSequences = defaultProcessedData.alignedAminoAcidSequences - gene, ), ) fun withUnknownGeneInAlignedAminoAcidSequences(accession: Accession = DefaultFiles.firstAccession, gene: GeneName) = - defaultSuccessfulSubmittedData.withValues( + defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( alignedAminoAcidSequences = defaultProcessedData.alignedAminoAcidSequences + (gene to "RNRNRN"), ), ) fun withUnknownGeneInAminoAcidInsertions(accession: Accession = DefaultFiles.firstAccession, gene: GeneName) = - defaultSuccessfulSubmittedData.withValues( + defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues( + data = defaultProcessedData.copy( aminoAcidInsertions = defaultProcessedData.aminoAcidInsertions + ( gene to listOf( Insertion( @@ -347,9 +354,9 @@ object PreparedProcessedData { val alignedAminoAcidSequences = defaultProcessedData.alignedAminoAcidSequences.toMutableMap() alignedAminoAcidSequences[gene] = "A".repeat(length) - return defaultSuccessfulSubmittedData.withValues( + return defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues(alignedAminoAcidSequences = alignedAminoAcidSequences), + data = defaultProcessedData.copy(alignedAminoAcidSequences = alignedAminoAcidSequences), ) } @@ -360,9 +367,9 @@ object PreparedProcessedData { val aminoAcidSequence = defaultProcessedData.alignedAminoAcidSequences.toMutableMap() aminoAcidSequence[gene] = "ÄÖ" + aminoAcidSequence[gene]!!.substring(2) - return defaultSuccessfulSubmittedData.withValues( + return defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues(alignedAminoAcidSequences = aminoAcidSequence), + data = defaultProcessedData.copy(alignedAminoAcidSequences = aminoAcidSequence), ) } @@ -373,13 +380,13 @@ object PreparedProcessedData { val aminoAcidInsertions = defaultProcessedData.aminoAcidInsertions.toMutableMap() aminoAcidInsertions[gene] = listOf(Insertion(123, "ÄÖ")) - return defaultSuccessfulSubmittedData.withValues( + return defaultSuccessfulSubmittedData.copy( accession = accession, - data = defaultProcessedData.withValues(aminoAcidInsertions = aminoAcidInsertions), + data = defaultProcessedData.copy(aminoAcidInsertions = aminoAcidInsertions), ) } - fun withErrors(accession: Accession = DefaultFiles.firstAccession) = defaultSuccessfulSubmittedData.withValues( + fun withErrors(accession: Accession = DefaultFiles.firstAccession) = defaultSuccessfulSubmittedData.copy( accession = accession, errors = listOf( PreprocessingAnnotation( @@ -403,7 +410,7 @@ object PreparedProcessedData { ), ) - fun withWarnings(accession: Accession = DefaultFiles.firstAccession) = defaultSuccessfulSubmittedData.withValues( + fun withWarnings(accession: Accession = DefaultFiles.firstAccession) = defaultSuccessfulSubmittedData.copy( accession = accession, warnings = listOf( PreprocessingAnnotation( @@ -427,33 +434,3 @@ object PreparedProcessedData { ), ) } - -fun SubmittedProcessedData.withValues( - accession: Accession? = null, - version: Long? = null, - data: ProcessedData? = null, - errors: List? = null, - warnings: List? = null, -) = SubmittedProcessedData( - accession = accession ?: this.accession, - version = version ?: this.version, - data = data ?: this.data, - errors = errors ?: this.errors, - warnings = warnings ?: this.warnings, -) - -fun ProcessedData.withValues( - metadata: Map? = null, - unalignedNucleotideSequences: Map? = null, - alignedNucleotideSequences: Map? = null, - nucleotideInsertions: Map>? = null, - alignedAminoAcidSequences: Map? = null, - aminoAcidInsertions: Map>? = null, -) = ProcessedData( - metadata = metadata ?: this.metadata, - unalignedNucleotideSequences = unalignedNucleotideSequences ?: this.unalignedNucleotideSequences, - alignedNucleotideSequences = alignedNucleotideSequences ?: this.alignedNucleotideSequences, - nucleotideInsertions = nucleotideInsertions ?: this.nucleotideInsertions, - alignedAminoAcidSequences = alignedAminoAcidSequences ?: this.alignedAminoAcidSequences, - aminoAcidInsertions = aminoAcidInsertions ?: this.aminoAcidInsertions, -) diff --git a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt index ebbeb354f..be3e82ba6 100644 --- a/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt +++ b/backend/src/test/kotlin/org/loculus/backend/controller/submission/SubmitProcessedDataEndpointTest.kt @@ -68,6 +68,19 @@ class SubmitProcessedDataEndpointTest( ) } + @Test + fun `WHEN I submit data with null as sequences THEN the sequence entry is in status processed`() { + val (accession, version, _) = prepareExtractedSequencesInDatabase()[0] + + submissionControllerClient.submitProcessedData( + PreparedProcessedData.withNullForSequences(accession = accession, version = version), + ) + .andExpect(status().isNoContent) + + convenienceClient.getSequenceEntryOfUser(accession = accession, version = version) + .assertStatusIs(Status.AWAITING_APPROVAL) + } + @Test fun `WHEN I submit with all valid symbols THEN the sequence entry is in status processed`() { prepareExtractedSequencesInDatabase() @@ -85,8 +98,8 @@ class SubmitProcessedDataEndpointTest( val defaultData = PreparedProcessedData.successfullyProcessed().data submissionControllerClient.submitProcessedData( - PreparedProcessedData.successfullyProcessed(accession = "3").withValues( - data = defaultData.withValues( + PreparedProcessedData.successfullyProcessed(accession = "3").copy( + data = defaultData.copy( unalignedNucleotideSequences = mapOf(MAIN_SEGMENT to nucleotideSequenceOfDesiredLength), alignedNucleotideSequences = mapOf(MAIN_SEGMENT to nucleotideSequenceOfDesiredLength), alignedAminoAcidSequences = @@ -105,13 +118,13 @@ class SubmitProcessedDataEndpointTest( fun `WHEN I submit preprocessed data without insertions THEN the missing keys of the reference will be added`() { prepareExtractedSequencesInDatabase(organism = OTHER_ORGANISM) - val dataWithoutInsertions = PreparedProcessedData.successfullyProcessedOtherOrganismData().data.withValues( + val dataWithoutInsertions = PreparedProcessedData.successfullyProcessedOtherOrganismData().data.copy( nucleotideInsertions = mapOf("notOnlySegment" to listOf(Insertion(1, "A"))), aminoAcidInsertions = emptyMap(), ) submissionControllerClient.submitProcessedData( - PreparedProcessedData.successfullyProcessedOtherOrganismData(accession = "3").withValues( + PreparedProcessedData.successfullyProcessedOtherOrganismData(accession = "3").copy( data = dataWithoutInsertions, ), organism = OTHER_ORGANISM, @@ -152,13 +165,13 @@ class SubmitProcessedDataEndpointTest( fun `WHEN I submit single-segment data without insertions THEN the missing keys of the reference will be added`() { prepareExtractedSequencesInDatabase() - val dataWithoutInsertions = PreparedProcessedData.successfullyProcessed().data.withValues( + val dataWithoutInsertions = PreparedProcessedData.successfullyProcessed().data.copy( nucleotideInsertions = mapOf("main" to listOf(Insertion(1, "A"))), aminoAcidInsertions = emptyMap(), ) submissionControllerClient.submitProcessedData( - PreparedProcessedData.successfullyProcessed(accession = "3").withValues( + PreparedProcessedData.successfullyProcessed(accession = "3").copy( data = dataWithoutInsertions, ), ).andExpect(status().isNoContent) @@ -213,7 +226,7 @@ class SubmitProcessedDataEndpointTest( convenienceClient.submitDefaultFiles() convenienceClient.extractUnprocessedData(1) submissionControllerClient.submitProcessedData( - PreparedProcessedData.withWrongDateFormat().withValues( + PreparedProcessedData.withWrongDateFormat().copy( accession = firstAccession, errors = PreparedProcessedData.withErrors().errors, ), @@ -279,7 +292,7 @@ class SubmitProcessedDataEndpointTest( submissionControllerClient.submitProcessedData( PreparedProcessedData.successfullyProcessed(accession = firstAccession), PreparedProcessedData.successfullyProcessed(accession = firstAccession) - .withValues(version = nonExistentVersion), + .copy(version = nonExistentVersion), ) .andExpect(status().isUnprocessableEntity) .andExpect(content().contentType(MediaType.APPLICATION_JSON_VALUE)) diff --git a/preprocessing/specification.md b/preprocessing/specification.md index e641ee76d..a552cdd70 100644 --- a/preprocessing/specification.md +++ b/preprocessing/specification.md @@ -128,7 +128,12 @@ The `metadata` field should contain a flat object consisting of the fields speci #### Sequences -The `unalignedNucleotideSequences`, `alignedNucleotideSequences`, and `alignedAminoAcidSequences` fields contain objects with the segment/gene name as key and the sequence as value. If there is only a single segment (e.g., as in SARS-CoV-2), the segment name of the nucleotide sequence should be `main`. Examples: +The `unalignedNucleotideSequences`, `alignedNucleotideSequences`, and `alignedAminoAcidSequences` fields contain objects with the segment/gene name as key and the sequence as value. +If there is only a single segment (e.g., as in SARS-CoV-2), the segment name of the nucleotide sequence should be `main`. + +If a segment or a gene is not present in the sequence, the value should be `null`. + +Examples: SARS-CoV-2 nucleotide sequence: @@ -143,7 +148,7 @@ SARS-CoV-2 amino acid sequences: ``` { "S": "...", - "N": "...", + "N": null, ... } ``` @@ -151,3 +156,4 @@ SARS-CoV-2 amino acid sequences: #### Insertions This is not yet specified. +TODO https://github.com/loculus-project/loculus/issues/823