Skip to content

Commit

Permalink
feat(backend): export revocation versions to SILO, make sure all meta…
Browse files Browse the repository at this point in the history
…data fields are set #704 #429
  • Loading branch information
fengelniederhammer committed Jan 24, 2024
1 parent 7857907 commit 53b4f92
Show file tree
Hide file tree
Showing 20 changed files with 465 additions and 220 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,24 @@ typealias SegmentName = String
typealias GeneName = String
typealias NucleotideSequence = String
typealias AminoAcidSequence = String
typealias MetadataMap = Map<String, JsonNode>

data class ProcessedData(
@Schema(
example = """{"date": "2020-01-01", "country": "Germany", "age": 42, "qc": 0.95}""",
description = "Key value pairs of metadata, correctly typed",
)
val metadata: Map<String, JsonNode>,
val metadata: MetadataMap,
@Schema(
example = """{"segment1": "ACTG", "segment2": "GTCA"}""",
description = "The key is the segment name, the value is the nucleotide sequence",
)
val unalignedNucleotideSequences: Map<SegmentName, NucleotideSequence>,
val unalignedNucleotideSequences: Map<SegmentName, NucleotideSequence?>,
@Schema(
example = """{"segment1": "ACTG", "segment2": "GTCA"}""",
description = "The key is the segment name, the value is the aligned nucleotide sequence",
)
val alignedNucleotideSequences: Map<SegmentName, NucleotideSequence>,
val alignedNucleotideSequences: Map<SegmentName, NucleotideSequence?>,
@Schema(
example = """{"segment1": ["123:GTCA", "345:AAAA"], "segment2": ["123:GTCA", "345:AAAA"]}""",
description = "The key is the segment name, the value is a list of nucleotide insertions",
Expand All @@ -90,7 +91,7 @@ data class ProcessedData(
example = """{"gene1": "NRNR", "gene2": "NRNR"}""",
description = "The key is the gene name, the value is the amino acid sequence",
)
val alignedAminoAcidSequences: Map<GeneName, AminoAcidSequence>,
val alignedAminoAcidSequences: Map<GeneName, AminoAcidSequence?>,
@Schema(
example = """{"gene1": ["123:RRN", "345:NNN"], "gene2": ["123:NNR", "345:RN"]}""",
description = "The key is the gene name, the value is a list of amino acid insertions",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,24 @@ package org.loculus.backend.service
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import com.fasterxml.jackson.module.kotlin.readValue
import org.jetbrains.exposed.sql.QueryParameter
import org.jetbrains.exposed.sql.Table
import org.jetbrains.exposed.sql.json.JsonBColumnType
import org.jetbrains.exposed.sql.json.jsonb
import org.loculus.backend.api.ProcessedData

val jacksonObjectMapper: ObjectMapper = jacksonObjectMapper().findAndRegisterModules()
inline fun <reified T : Any> Table.jacksonSerializableJsonb(columnName: String) = jsonb<T>(
columnName,
{ value -> jacksonObjectMapper.writeValueAsString(value) },
{ string -> jacksonObjectMapper.readValue(string) },
::serialize,
::deserialize,
)

inline fun <reified T : Any> jsonbParam(value: T) = QueryParameter(
value,
JsonBColumnType<ProcessedData>(::serialize, ::deserialize),
)

inline fun <reified T : Any> serialize(value: T): String = jacksonObjectMapper.writeValueAsString(value)

inline fun <reified T : Any> deserialize(value: String): T = jacksonObjectMapper.readValue(value)
Original file line number Diff line number Diff line change
Expand Up @@ -76,27 +76,24 @@ class CompressionService(private val backendConfig: BackendConfig) {
fun decompressSequencesInProcessedData(processedData: ProcessedData, organism: Organism) = ProcessedData(
processedData.metadata,
processedData
.unalignedNucleotideSequences.mapValues {
decompressNucleotideSequence(
it.value,
it.key,
organism,
)
.unalignedNucleotideSequences.mapValues { (segmentName, sequenceData) ->
when (sequenceData) {
null -> null
else -> decompressNucleotideSequence(sequenceData, segmentName, organism)
}
},
processedData.alignedNucleotideSequences.mapValues {
decompressNucleotideSequence(
it.value,
it.key,
organism,
)
processedData.alignedNucleotideSequences.mapValues { (segmentName, sequenceData) ->
when (sequenceData) {
null -> null
else -> decompressNucleotideSequence(sequenceData, segmentName, organism)
}
},
processedData.nucleotideInsertions,
processedData.alignedAminoAcidSequences.mapValues {
decompressAminoAcidSequence(
it.value,
it.key,
organism,
)
processedData.alignedAminoAcidSequences.mapValues { (gene, sequenceData) ->
when (sequenceData) {
null -> null
else -> decompressAminoAcidSequence(sequenceData, gene, organism)
}
},
processedData.aminoAcidInsertions,
)
Expand All @@ -105,29 +102,25 @@ class CompressionService(private val backendConfig: BackendConfig) {
processedData.metadata,
processedData
.unalignedNucleotideSequences.mapValues { (segmentName, sequenceData) ->
compressNucleotideSequence(
sequenceData,
segmentName,
organism,
)
when (sequenceData) {
null -> null
else -> compressNucleotideSequence(sequenceData, segmentName, organism)
}
},
processedData.alignedNucleotideSequences.mapValues {
compressNucleotideSequence(
it.value,
it.key,
organism,
)
processedData.alignedNucleotideSequences.mapValues { (segmentName, sequenceData) ->
when (sequenceData) {
null -> null
else -> compressNucleotideSequence(sequenceData, segmentName, organism)
}
},
processedData.nucleotideInsertions,
processedData.alignedAminoAcidSequences.mapValues {
compressAminoAcidSequence(
it.value,
it.key,
organism,
)
processedData.alignedAminoAcidSequences.mapValues { (gene, sequenceData) ->
when (sequenceData) {
null -> null
else -> compressAminoAcidSequence(sequenceData, gene, organism)
}
},
processedData.aminoAcidInsertions,

)

private fun compress(seq: String, dictionary: ByteArray?): String {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ import org.loculus.backend.api.Status.RECEIVED
import org.loculus.backend.api.SubmittedProcessedData
import org.loculus.backend.api.UnprocessedData
import org.loculus.backend.config.BackendConfig
import org.loculus.backend.config.ReferenceGenome
import org.loculus.backend.controller.BadRequestException
import org.loculus.backend.controller.ProcessingValidationException
import org.loculus.backend.controller.UnprocessableEntityException
import org.loculus.backend.service.groupmanagement.GroupManagementPreconditionValidator
import org.loculus.backend.service.jsonbParam
import org.loculus.backend.utils.Accession
import org.loculus.backend.utils.Version
import org.springframework.stereotype.Service
Expand All @@ -59,8 +59,8 @@ class DatabaseService(
private val groupManagementPreconditionValidator: GroupManagementPreconditionValidator,
private val objectMapper: ObjectMapper,
pool: DataSource,
private val backendConfig: BackendConfig,
private val sequenceEntriesTableProvider: SequenceEntriesTableProvider,
private val emptyProcessedDataProvider: EmptyProcessedDataProvider,
) {

init {
Expand Down Expand Up @@ -131,45 +131,37 @@ class DatabaseService(
val now = Clock.System.now().toLocalDateTime(TimeZone.UTC)

val submittedErrors = submittedProcessedData.errors.orEmpty()

if (submittedErrors.isEmpty()) {
try {
sequenceValidatorFactory.create(organism).validateSequence(submittedProcessedData)
} catch (validationException: ProcessingValidationException) {
throwIfIsSubmissionForWrongOrganism(submittedProcessedData, organism)
throw validationException
}
}

val submittedWarnings = submittedProcessedData.warnings.orEmpty()
val referenceGenome = backendConfig.getInstanceConfig(organism).referenceGenomes
val submittedProcessedDataWithAllKeysForInsertions = addMissingKeysForInsertions(
submittedProcessedData,
referenceGenome,
)

val newStatus = when {
submittedErrors.isEmpty() -> AWAITING_APPROVAL
else -> HAS_ERRORS
val (newStatus, processedData) = when {
submittedErrors.isEmpty() -> AWAITING_APPROVAL to validateProcessedData(submittedProcessedData, organism)
else -> HAS_ERRORS to submittedProcessedData.data
}

return sequenceEntriesTableProvider.get(organism).let { table ->
table.update(
where = {
table.accessionVersionEquals(submittedProcessedDataWithAllKeysForInsertions) and
table.accessionVersionEquals(submittedProcessedData) and
table.statusIs(IN_PROCESSING) and
table.organismIs(organism)
},
) {
it[statusColumn] = newStatus.name
it[processedDataColumn] = submittedProcessedDataWithAllKeysForInsertions.data
it[processedDataColumn] = processedData
it[errorsColumn] = submittedErrors
it[warningsColumn] = submittedWarnings
it[finishedProcessingAtColumn] = now
}
}
}

private fun validateProcessedData(submittedProcessedData: SubmittedProcessedData, organism: Organism) = try {
sequenceValidatorFactory.create(organism).validateSequence(submittedProcessedData.data)
} catch (validationException: ProcessingValidationException) {
throwIfIsSubmissionForWrongOrganism(submittedProcessedData, organism)
throw validationException
}

private fun throwIfIsSubmissionForWrongOrganism(
submittedProcessedData: SubmittedProcessedData,
organism: Organism,
Expand All @@ -188,34 +180,6 @@ class DatabaseService(
}
}

private fun addMissingKeysForInsertions(
submittedProcessedData: SubmittedProcessedData,
referenceGenome: ReferenceGenome,
): SubmittedProcessedData {
val nucleotideInsertions = referenceGenome.nucleotideSequences.associate {
if (it.name in submittedProcessedData.data.nucleotideInsertions.keys) {
it.name to submittedProcessedData.data.nucleotideInsertions[it.name]!!
} else {
(it.name to emptyList())
}
}

val aminoAcidInsertions = referenceGenome.genes.associate {
if (it.name in submittedProcessedData.data.aminoAcidInsertions.keys) {
it.name to submittedProcessedData.data.aminoAcidInsertions[it.name]!!
} else {
(it.name to emptyList())
}
}

return submittedProcessedData.copy(
data = submittedProcessedData.data.copy(
nucleotideInsertions = nucleotideInsertions,
aminoAcidInsertions = aminoAcidInsertions,
),
)
}

private fun throwInsertFailedException(submittedProcessedData: SubmittedProcessedData, organism: Organism): String {
sequenceEntriesTableProvider.get(organism).let { table ->
val selectedSequenceEntries = table
Expand Down Expand Up @@ -312,8 +276,6 @@ class DatabaseService(
.select(
where = { table.statusIs(APPROVED_FOR_RELEASE) and table.organismIs(organism) },
)
// TODO(#429): This needs clarification of how to handle revocations. Until then, revocations are filtered out.
.filter { !it[table.isRevocationColumn] }
.map {
RawProcessedData(
accession = it[table.accessionColumn],
Expand Down Expand Up @@ -447,6 +409,7 @@ class DatabaseService(
stringParam(AWAITING_APPROVAL_FOR_REVOCATION.name),
booleanParam(true),
table.organismColumn,
jsonbParam(emptyProcessedDataProvider.provide(organism)),
).select(
where = {
(table.accessionColumn inList accessions) and
Expand All @@ -463,6 +426,7 @@ class DatabaseService(
table.statusColumn,
table.isRevocationColumn,
table.organismColumn,
table.processedDataColumn,
),
)

Expand Down Expand Up @@ -499,6 +463,8 @@ class DatabaseService(
organism,
)

val now = Clock.System.now().toLocalDateTime(TimeZone.UTC)

sequenceEntriesTableProvider.get(organism).let { table ->
table.update(
where = {
Expand All @@ -508,6 +474,7 @@ class DatabaseService(
},
) {
it[statusColumn] = APPROVED_FOR_RELEASE.name
it[releasedAtColumn] = now
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package org.loculus.backend.service.submission

import com.fasterxml.jackson.databind.node.NullNode
import org.loculus.backend.api.Organism
import org.loculus.backend.api.ProcessedData
import org.loculus.backend.config.BackendConfig
import org.springframework.stereotype.Component

@Component
class EmptyProcessedDataProvider(private val backendConfig: BackendConfig) {
fun provide(organism: Organism): ProcessedData {
val (schema, referenceGenomes) = backendConfig.getInstanceConfig(organism)

val nucleotideSequences = referenceGenomes.nucleotideSequences.map { it.name }.associateWith { null }
return ProcessedData(
metadata = schema.metadata.map { it.name }.associateWith { NullNode.instance },
unalignedNucleotideSequences = nucleotideSequences,
alignedNucleotideSequences = nucleotideSequences,
alignedAminoAcidSequences = referenceGenomes.genes.map { it.name }.associateWith { null },
nucleotideInsertions = referenceGenomes.nucleotideSequences.map { it.name }.associateWith { emptyList() },
aminoAcidInsertions = referenceGenomes.genes.map { it.name }.associateWith { emptyList() },
)
}
}
Loading

0 comments on commit 53b4f92

Please sign in to comment.