Skip to content

Commit

Permalink
fix(backend): allow segments / genes to be null #822
Browse files Browse the repository at this point in the history
  • Loading branch information
fengelniederhammer committed Jan 24, 2024
1 parent 4e7c662 commit 5ef6d0c
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 134 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ data class OriginalData(
example = "{\"segment1\": \"ACTG\", \"segment2\": \"GTCA\"}",
description = "The key is the segment name, the value is the nucleotide sequence",
)
val unalignedNucleotideSequences: Map<SegmentName, NucleotideSequence>,
val unalignedNucleotideSequences: Map<SegmentName, NucleotideSequence?>,
)

enum class Status {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,17 @@ data class ReferenceGenome(
return "ReferenceGenome(nucleotideSequences=[$nucleotideSequencesString], genes=[$genesString])"
}

fun getNucleotideSegmentReference(segmentName: String): NucleotideSequence? = nucleotideSequences.find {
it.name == segmentName
}?.sequence

fun getAminoAcidGeneReference(gene: String): AminoAcidSequence? = genes.find {
it.name == gene
}?.sequence

private fun shortenSequence(sequence: String): String {
return if (sequence.length > 10) {
sequence.substring(0, 10) + "..."
} else {
sequence
}
fun getNucleotideSegmentReference(segmentName: String): NucleotideSequence? = nucleotideSequences
.find { it.name == segmentName }
?.sequence

fun getAminoAcidGeneReference(gene: String): AminoAcidSequence? = genes
.find { it.name == gene }
?.sequence

private fun shortenSequence(sequence: String) = when {
sequence.length > 10 -> sequence.substring(0, 10) + "..."
else -> sequence
}

private fun referenceListToString(list: List<ReferenceSequence>) = list.joinToString(", ") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,23 +53,21 @@ class CompressionService(private val backendConfig: BackendConfig) {
originalData.metadata,
originalData
.unalignedNucleotideSequences.mapValues {
decompressNucleotideSequence(
it.value,
it.key,
organism,
)
when (val compressedSequence = it.value) {
null -> null
else -> decompressNucleotideSequence(compressedSequence, it.key, organism)
}
},
)

fun compressSequencesInOriginalData(originalData: OriginalData, organism: Organism) = OriginalData(
originalData.metadata,
originalData
.unalignedNucleotideSequences.mapValues { (segmentName, sequenceData) ->
compressNucleotideSequence(
sequenceData,
segmentName,
organism,
)
when (sequenceData) {
null -> null
else -> compressNucleotideSequence(sequenceData, segmentName, organism)
}
},
)

Expand Down Expand Up @@ -165,12 +163,14 @@ class CompressionService(private val backendConfig: BackendConfig) {
.referenceGenomes
.getNucleotideSegmentReference(
segmentName,
)?.toByteArray()
)
?.toByteArray()

private fun getDictionaryForAminoAcidSequence(geneName: String, organism: Organism): ByteArray? = backendConfig
.getInstanceConfig(organism)
.referenceGenomes
.getAminoAcidGeneReference(
geneName,
)?.toByteArray()
)
?.toByteArray()
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package org.loculus.backend.service.submission

import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.NullNode
import org.loculus.backend.api.AminoAcidSequence
import org.loculus.backend.api.Insertion
import org.loculus.backend.api.MetadataMap
import org.loculus.backend.api.NucleotideSequence
import org.loculus.backend.api.Organism
import org.loculus.backend.api.ProcessedData
import org.loculus.backend.config.BackendConfig
Expand Down Expand Up @@ -245,11 +247,7 @@ class ProcessedSequenceEntryValidator(
)
}

private fun <T> validateNoMissingSegment(
segment: ReferenceSequence,
sequenceData: Map<String, T>,
sequence: String,
) {
private fun validateNoMissingSegment(segment: ReferenceSequence, sequenceData: Map<String, *>, sequence: String) {
if (!sequenceData.containsKey(segment.name)) {
throw ProcessingValidationException("Missing the required segment '${segment.name}' in '$sequence'.")
}
Expand All @@ -269,7 +267,7 @@ class ProcessedSequenceEntryValidator(
}
}

private fun <T> validateNoUnknownSegment(dataToValidate: Map<String, T>, sequenceGrouping: String) {
private fun validateNoUnknownSegment(dataToValidate: Map<String, *>, sequenceGrouping: String) {
val unknownSegments = dataToValidate.keys.subtract(referenceGenome.nucleotideSequences.map { it.name }.toSet())
if (unknownSegments.isNotEmpty()) {
val unknownSegmentsString = unknownSegments.sorted().joinToString(", ")
Expand All @@ -279,7 +277,10 @@ class ProcessedSequenceEntryValidator(
}
}

private fun validateNoUnknownNucleotideSymbol(dataToValidate: Map<String, String?>, sequenceGrouping: String) {
private fun validateNoUnknownNucleotideSymbol(
dataToValidate: Map<String, NucleotideSequence?>,
sequenceGrouping: String,
) {
for ((segmentName, sequence) in dataToValidate) {
if (sequence == null) {
continue
Expand Down Expand Up @@ -354,7 +355,7 @@ class ProcessedSequenceEntryValidator(
}
}

private fun validateNoUnknownAminoAcidSymbol(dataToValidate: Map<String, String?>) {
private fun validateNoUnknownAminoAcidSymbol(dataToValidate: Map<String, AminoAcidSequence?>) {
for ((gene, sequence) in dataToValidate) {
if (sequence == null) {
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@ import java.io.InputStreamReader
data class FastaEntry(val sampleName: String, val sequence: String)

class FastaReader(inputStream: InputStream) : Iterator<FastaEntry>, Iterable<FastaEntry>, AutoCloseable {
private val reader: BufferedReader
private val reader: BufferedReader = BufferedReader(InputStreamReader(inputStream))
private var nextEntry: FastaEntry? = null
private var nextLine: String? = ""

init {
reader = BufferedReader(InputStreamReader(inputStream))
read()
}

Expand Down
Loading

0 comments on commit 5ef6d0c

Please sign in to comment.