Skip to content

Commit

Permalink
feat: transform lower case sequences to upper case
Browse files Browse the repository at this point in the history
  • Loading branch information
fengelniederhammer committed Aug 26, 2024
1 parent 9860996 commit 1951075
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ import org.springframework.transaction.annotation.Transactional
import java.io.BufferedReader
import java.io.InputStream
import java.io.InputStreamReader
import java.util.Locale
import javax.sql.DataSource

private val log = KotlinLogging.logger { }
Expand Down Expand Up @@ -318,7 +319,7 @@ class SubmissionDatabaseService(
val submittedWarnings = submittedProcessedData.warnings.orEmpty()

val (newStatus, processedData) = when {
submittedErrors.isEmpty() -> FINISHED to validateProcessedData(
submittedErrors.isEmpty() -> FINISHED to postprocessAndValidateProcessedData(
submittedProcessedData,
organism,
)
Expand Down Expand Up @@ -349,13 +350,35 @@ class SubmissionDatabaseService(
return newStatus
}

private fun validateProcessedData(submittedProcessedData: SubmittedProcessedData, organism: Organism) = try {
private fun postprocessAndValidateProcessedData(
submittedProcessedData: SubmittedProcessedData,
organism: Organism,
) = try {
throwIfIsSubmissionForWrongOrganism(submittedProcessedData, organism)
processedSequenceEntryValidatorFactory.create(organism).validate(submittedProcessedData.data)
val processedData = makeSequencesUpperCase(submittedProcessedData.data)
processedSequenceEntryValidatorFactory.create(organism).validate(processedData)
} catch (validationException: ProcessingValidationException) {
throw validationException
}

private fun makeSequencesUpperCase(processedData: ProcessedData<GeneticSequence>) = processedData.copy(
unalignedNucleotideSequences = processedData.unalignedNucleotideSequences.mapValues { (_, it) ->
it?.uppercase(Locale.US)
},
alignedNucleotideSequences = processedData.alignedNucleotideSequences.mapValues { (_, it) ->
it?.uppercase(Locale.US)
},
alignedAminoAcidSequences = processedData.alignedAminoAcidSequences.mapValues { (_, it) ->
it?.uppercase(Locale.US)
},
nucleotideInsertions = processedData.nucleotideInsertions.mapValues { (_, it) ->
it.map { insertion -> insertion.copy(sequence = insertion.sequence.uppercase(Locale.US)) }
},
aminoAcidInsertions = processedData.aminoAcidInsertions.mapValues { (_, it) ->
it.map { insertion -> insertion.copy(sequence = insertion.sequence.uppercase(Locale.US)) }
},
)

private fun validateExternalMetadata(
externalSubmittedData: ExternalSubmittedData,
organism: Organism,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,30 @@ object PreparedProcessedData {
),
)

fun withLowercaseSequences(accession: Accession, version: Version) = defaultSuccessfulSubmittedData.copy(
accession = accession,
version = version,
data = defaultProcessedData.copy(
unalignedNucleotideSequences = mapOf(
MAIN_SEGMENT to "nactg",
),
alignedNucleotideSequences = mapOf(
MAIN_SEGMENT to "attaaaggtttataccttcccaggtaacaaaccaaccaactttcgatct",
),
nucleotideInsertions = mapOf(
MAIN_SEGMENT to listOf(Insertion(123, "actg")),
),
alignedAminoAcidSequences = mapOf(
SOME_LONG_GENE to "acdefghiklmnpqrstvwybzx-*",
SOME_SHORT_GENE to "mads",
),
aminoAcidInsertions = mapOf(
SOME_LONG_GENE to listOf(Insertion(123, "def")),
SOME_SHORT_GENE to listOf(Insertion(123, "n")),
),
),
)

fun withMissingMetadataFields(
accession: Accession,
version: Long = defaultSuccessfulSubmittedData.version,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,28 @@ class SubmitProcessedDataEndpointTest(
.assertStatusIs(Status.AWAITING_APPROVAL)
}

@Test
fun `WHEN I submit data with lowercase sequences THEN the sequences are converted to uppercase`() {
val (accession, version) = prepareExtractedSequencesInDatabase().first()

submissionControllerClient.submitProcessedData(
PreparedProcessedData.withLowercaseSequences(accession = accession, version = version),
)
.andExpect(status().isNoContent)

val processedData = convenienceClient.getSequenceEntryToEdit(accession = accession, version = version)
.processedData

assertThat(processedData.unalignedNucleotideSequences, hasEntry(MAIN_SEGMENT, "NACTG"))
assertThat(
processedData.alignedNucleotideSequences,
hasEntry(MAIN_SEGMENT, "ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCT"),
)
assertThat(processedData.alignedAminoAcidSequences, hasEntry(SOME_LONG_GENE, "ACDEFGHIKLMNPQRSTVWYBZX-*"))
assertThat(processedData.nucleotideInsertions, hasEntry(MAIN_SEGMENT, listOf(Insertion(123, "ACTG"))))
assertThat(processedData.aminoAcidInsertions, hasEntry(SOME_LONG_GENE, listOf(Insertion(123, "DEF"))))
}

@Test
fun `WHEN I submit with all valid symbols THEN the sequence entry is in status processed`() {
val accessions = prepareExtractedSequencesInDatabase().map { it.accession }
Expand Down

0 comments on commit 1951075

Please sign in to comment.