AMIXED | 0 | ...
*
*
- * @param line the line from the profile data file to be parsed
- * @param nrColumns the number of columns, defined by the header line
- * @param sampleStartIndex the index of the first column with a sample name in the header field
- * @param hugoSymbolIndex the index of the column Hugo_Symbol
- * @param entrezGeneIdIndex the index of the column Entrez_Gene_Id
- * @param rppaGeneRefIndex the index of the column Composite.Element.Ref
* @param isRppaProfile true if this is an rppa profile (i.e. alteration type is PROTEIN_LEVEL and the first column is Composite.Element.Ref)
* @param isDiscretizedCnaProfile true if this is a discretized CNA profile (i.e. alteration type COPY_NUMBER_ALTERATION and showProfileInAnalysisTab is true)
- * @param daoGene an instance of DaoGeneOptimized ... for use in resolving gene symbols
- * @param orderedSampleList a list of the internal sample ids corresponding to the sample names in the header line
* @param existingCnaEvents a collection of CnaEvents, to be added to or updated during parsing of individual lines
* @return true if any record was stored in genetic_alteration, else false
* @throws DaoException if any DaoException is thrown while using daoGene or daoGeneticAlteration
*/
- private boolean parseLine(String line, int nrColumns, int sampleStartIndex,
- int hugoSymbolIndex, int entrezGeneIdIndex, int rppaGeneRefIndex,
- boolean isRppaProfile, boolean isDiscretizedCnaProfile,
- DaoGeneOptimized daoGene,
- List filteredSampleIndices, List orderedSampleList,
- Set existingCnaEvents
+ private boolean saveLine(String[] values,
+ String entrez,
+ String geneSymbol,
+ boolean isRppaProfile,
+ boolean isDiscretizedCnaProfile,
+ Set existingCnaEvents
) throws DaoException {
- //TODO: refactor this entire function - split functionality into smaller units / subroutines
-
boolean recordStored = false;
- // Ignore lines starting with #
- if (!line.startsWith("#") && line.trim().length() > 0) {
- String[] parts = line.split("\t", -1);
+ if (isRppaProfile && geneSymbol == null) {
+ ProgressMonitor.logWarning("Ignoring line with no Composite.Element.REF value");
+ return false;
+ }
- if (parts.length > nrColumns) {
- if (line.split("\t").length > nrColumns) {
- ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]);
- return false;
- }
- }
- String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length);
- values = filterOutNormalValues(filteredSampleIndices, values);
+ //If all are empty, skip line:
+ boolean noGeneSpecified = geneSymbol == null && entrez == null;
+ if (noGeneSpecified) {
+ ProgressMonitor.logWarning("Ignoring line with no Hugo_Symbol and no Entrez_Id");
+ return false;
+ }
- String geneSymbol = null;
- if (hugoSymbolIndex != -1) {
- geneSymbol = parts[hugoSymbolIndex];
- }
- //RPPA: //TODO - we should split up the RPPA scenario from this code...too many if/else because of this
- if (rppaGeneRefIndex != -1) {
- geneSymbol = parts[rppaGeneRefIndex];
- }
- if (geneSymbol != null && geneSymbol.isEmpty()) {
- geneSymbol = null;
+ if (geneSymbol != null) {
+ boolean multipleGenesLine = geneSymbol.contains("///");
+ if (multipleGenesLine) {
+ ProgressMonitor.logWarning("Ignoring gene symbol: " + geneSymbol
+ + " It is separated by ///. This indicates that the line contains information regarding multiple genes, and we cannot currently handle this");
+ return false;
}
- if (isRppaProfile && geneSymbol == null) {
- ProgressMonitor.logWarning("Ignoring line with no Composite.Element.REF value");
+ boolean unknownGene = geneSymbol.contains("---");
+ if (unknownGene) {
+ ProgressMonitor.logWarning("Ignoring gene symbol: " + geneSymbol
+ + " It is specified as ---. This indicates that the line contains information regarding an unknown gene, and we cannot currently handle this");
return false;
}
- //get entrez
- String entrez = null;
- if (entrezGeneIdIndex != -1) {
- entrez = parts[entrezGeneIdIndex];
+ }
+
+ List genes;
+ //If rppa, parse genes from "Composite.Element.REF" column:
+ if (isRppaProfile) {
+ genes = parseRPPAGenes(geneSymbol);
+ } else {
+ genes = parseGenes(entrez, geneSymbol);
+ }
+
+ //if genes still null, skip current record
+ if (genes == null || genes.isEmpty()) {
+ ProgressMonitor.logWarning("Gene with Entrez_Id " + entrez + " and gene symbol" + geneSymbol +" not found. Record will be skipped for this gene.");
+ return false;
+ }
+
+ List genesMatchingAnAlias = Collections.emptyList();
+ if (geneSymbol != null) {
+ genesMatchingAnAlias = daoGene.getGenesForAlias(geneSymbol);
+ }
+
+ Set microRNAGenes = new HashSet<>();
+ Set nonMicroRNAGenes = new HashSet<>();
+ Iterator geneIterator = Stream.concat(genes.stream(), genesMatchingAnAlias.stream()).iterator();
+ while (geneIterator.hasNext()) {
+ CanonicalGene g = geneIterator.next();
+ if ("miRNA".equals(g.getType())) {
+ microRNAGenes.add(g);
+ } else {
+ nonMicroRNAGenes.add(g);
}
- if (entrez != null) {
- if (entrez.isEmpty()) {
- entrez = null;
- }
- else if (!entrez.matches("[0-9]+")) {
- //TODO - would be better to give an exception in some cases, like negative Entrez values
- ProgressMonitor.logWarning("Ignoring line with invalid Entrez_Id " + entrez);
- return false;
+ }
+ if (!microRNAGenes.isEmpty()) {
+ // for micro rna, duplicate the data
+ for (CanonicalGene gene : microRNAGenes) {
+ if (this.saveValues(gene, values, geneSymbol)) {
+ recordStored = true;
}
}
-
- //If all are empty, skip line:
- if (geneSymbol == null && entrez == null) {
- ProgressMonitor.logWarning("Ignoring line with no Hugo_Symbol and no Entrez_Id");
+ if (!recordStored) {
+ if (nonMicroRNAGenes.isEmpty()) {
+ // this means that no microRNA records could not be stored
+ ProgressMonitor.logWarning("Could not store microRNA data");
+ } else {
+ // this case :
+ // - at least one of the entrez-gene-ids was not a microRNA
+ // - all of the matched microRNA ids (if any) failed to be imported (presumably already imported on a prior line)
+ ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene.");
+ }
return false;
+ }
+ } else {
+ // none of the matched genes are type "miRNA"
+ if (genes.size() == 1) {
+ // Store all values per gene:
+ recordStored = this.saveValues(genes.get(0), values, geneSymbol);
+ //only add extra CNA related records if the step above worked, otherwise skip:
+ if (recordStored && isDiscretizedCnaProfile) {
+ if (updateMode) {
+ DaoCnaEvent.removeSampleCnaEvents(geneticProfileId, orderedImportedSampleList);
+ }
+ long entrezGeneId = genes.get(0).getEntrezGeneId();
+ CnaUtil.storeCnaEvents(existingCnaEvents, composeCnaEventsToAdd(values, entrezGeneId));
+ }
} else {
- if (geneSymbol != null && (geneSymbol.contains("///") || geneSymbol.contains("---"))) {
- // Ignore gene IDs separated by ///. This indicates that
- // the line contains information regarding multiple genes, and
- // we cannot currently handle this.
- // Also, ignore gene IDs that are specified as ---. This indicates
- // the line contains information regarding an unknown gene, and
- // we cannot currently handle this.
- ProgressMonitor.logWarning("Ignoring gene ID: " + geneSymbol);
- return false;
+ if (isRppaProfile) { // for protein data, duplicate the data
+ recordStored = saveRppaValues(values, recordStored, genes, geneSymbol);
} else {
- List genes = null;
- //If rppa, parse genes from "Composite.Element.REF" column:
- if (isRppaProfile) {
- genes = parseRPPAGenes(geneSymbol);
- if (genes == null) {
- //will be null when there is a parse error in this case, so we
- //can return here and avoid duplicated messages:
- return false;
- }
- if (genes.isEmpty()) {
- String gene = (geneSymbol != null) ? geneSymbol : entrez;
- ProgressMonitor.logWarning("Gene not found for: [" + gene
- + "]. Ignoring it "
- + "and all tab-delimited data associated with it!");
- return false;
- }
- } else {
- //try entrez:
- if (entrez != null) {
- CanonicalGene gene = daoGene.getGene(Long.parseLong(entrez));
- if (gene != null) {
- genes = Arrays.asList(gene);
- }
- }
- //no entrez or could not resolve by entrez, try hugo:
- if ((genes == null || genes.isEmpty()) && geneSymbol != null) {
- // deal with multiple symbols separate by |, use the first one
- int ix = geneSymbol.indexOf("|");
- if (ix > 0) {
- geneSymbol = geneSymbol.substring(0, ix);
- }
- genes = daoGene.getGene(geneSymbol, true);
- }
- //if genes still null, skip current record
- if (genes == null || genes.isEmpty()) {
- ProgressMonitor.logWarning("Entrez_Id " + entrez + " not found. Record will be skipped for this gene.");
- return false;
- }
- }
-
- // If targetLine is specified and does not match the current line, skip the current line.
- if (targetLine != null && !(parts[0].equals(targetLine))) {
- return false;
- }
-
- List genesMatchingAnAlias = Collections.emptyList();
- if (geneSymbol != null) {
- genesMatchingAnAlias = daoGene.getGenesForAlias(geneSymbol);
- }
-
- Set microRNAGenes = new HashSet<>();
- Set nonMicroRNAGenes = new HashSet<>();
- Iterator geneIterator = Stream.concat(genes.stream(), genesMatchingAnAlias.stream()).iterator();
- while (geneIterator.hasNext()) {
- CanonicalGene g = geneIterator.next();
- if ("miRNA".equals(g.getType())) {
- microRNAGenes.add(g);
- } else {
- nonMicroRNAGenes.add(g);
- }
- }
- if (!microRNAGenes.isEmpty()) {
- // for micro rna, duplicate the data
- for (CanonicalGene gene : microRNAGenes) {
- if (this.geneticAlterationImporter.store(values, gene, geneSymbol)) {
- recordStored = true;
- }
- }
- if (!recordStored) {
- if (nonMicroRNAGenes.isEmpty()) {
- // this means that no microRNA records could not be stored
- ProgressMonitor.logWarning("Could not store microRNA data");
- } else {
- // this case :
- // - at least one of the entrez-gene-ids was not a microRNA
- // - all of the matched microRNA ids (if any) failed to be imported (presumably already imported on a prior line)
- ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene.");
- }
- return false;
- }
- } else {
- // none of the matched genes are type "miRNA"
- if (genes.size() == 1) {
- List cnaEventsToAdd = new ArrayList();
-
- if (isDiscretizedCnaProfile) {
- long entrezGeneId = genes.get(0).getEntrezGeneId();
- for (int i = 0; i < values.length; i++) {
-
- // temporary solution -- change partial deletion back to full deletion.
- if (values[i].equals(CNA_VALUE_PARTIAL_DELETION)) {
- values[i] = CNA_VALUE_HOMOZYGOUS_DELETION;
- }
- if (values[i].equals(CNA_VALUE_AMPLIFICATION)
- // || values[i].equals(CNA_VALUE_GAIN) >> skipping GAIN, ZERO, HEMIZYGOUS_DELETION to minimize size of dataset in DB
- // || values[i].equals(CNA_VALUE_ZERO)
- // || values[i].equals(CNA_VALUE_HEMIZYGOUS_DELETION)
- || values[i].equals(CNA_VALUE_HOMOZYGOUS_DELETION)
- ) {
- Integer sampleId = orderedSampleList.get(i);
- CnaEvent cnaEvent = new CnaEvent(sampleId, geneticProfileId, entrezGeneId, Short.parseShort(values[i]));
- //delayed add:
- AbstractMap.SimpleEntry sampleGenePair = new AbstractMap.SimpleEntry<>(sampleId, entrezGeneId);
- Map pdAnnotationDetails = this.pdAnnotations.get(sampleGenePair);
- if (pdAnnotationDetails != null) {
- cnaEvent.setDriverFilter(pdAnnotationDetails.get("DRIVER_FILTER"));
- cnaEvent.setDriverFilterAnnotation(pdAnnotationDetails.get("DRIVER_FILTER_ANNOTATION"));
- cnaEvent.setDriverTiersFilter(pdAnnotationDetails.get("DRIVER_TIERS_FILTER"));
- cnaEvent.setDriverTiersFilterAnnotation(pdAnnotationDetails.get("DRIVER_TIERS_FILTER_ANNOTATION"));
- }
- cnaEventsToAdd.add(cnaEvent);
- }
- }
- }
- // Store all values per gene:
- recordStored = this.geneticAlterationImporter.store(values, genes.get(0), geneSymbol);
- //only add extra CNA related records if the step above worked, otherwise skip:
- if (recordStored) {
- CnaUtil.storeCnaEvents(existingCnaEvents, cnaEventsToAdd);
- }
- } else {
- if (isRppaProfile) { // for protein data, duplicate the data
- for (CanonicalGene gene : genes) {
- if (this.geneticAlterationImporter.store(values, gene, geneSymbol)) {
- recordStored = true;
- nrExtraRecords++;
- }
- }
- if (recordStored) {
- //skip one, to avoid double counting:
- nrExtraRecords--;
- } else {
- // this means that RPPA could not be stored
- ProgressMonitor.logWarning("Could not store RPPA data");
- }
- } else {
- if (!recordStored) {
- // this case :
- // - the hugo gene symbol was ambiguous (matched multiple entrez-gene-ids)
- ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous. Record will be skipped for this gene.");
- }
- }
- }
+ if (!recordStored) {
+ // this case :
+ // - the hugo gene symbol was ambiguous (matched multiple entrez-gene-ids)
+ ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous. Record will be skipped for this gene.");
}
}
}
@@ -719,95 +718,147 @@ else if (!entrez.matches("[0-9]+")) {
return recordStored;
}
- /**
- * Parses line for gene set record and stores record in 'genetic_alteration' table.
- * @param line
- * @param nrColumns
- * @param sampleStartIndex
- * @param genesetIdIndex
- * @param filteredSampleIndices
- * @param daoGeneticAlteration
- * @return
- * @throws DaoException
- */
- private boolean parseGenesetLine(String line, int nrColumns, int sampleStartIndex, int genesetIdIndex,
- List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration) throws DaoException {
- boolean storedRecord = false;
-
- if (!line.startsWith("#") && line.trim().length() > 0) {
- String[] parts = line.split("\t", -1);
+ private boolean saveValues(CanonicalGene canonicalGene, String[] values, String geneSymbol) throws DaoException {
+ if (updateMode) {
+ values = updateValues(canonicalGene.getGeneticEntityId(), values);
+ if (!geneticAlterationImporter.isImportedAlready(canonicalGene)) {
+ daoGeneticAlteration.deleteAllRecordsInGeneticProfile(geneticProfile.getGeneticProfileId(), canonicalGene.getGeneticEntityId());
+ }
+ }
+ return geneticAlterationImporter.store(values, canonicalGene, geneSymbol);
+ }
+ private boolean saveValues(int geneticEntityId, String[] values) throws DaoException {
+ if (updateMode) {
+ daoGeneticAlteration.deleteAllRecordsInGeneticProfile(geneticProfile.getGeneticProfileId(), geneticEntityId);
+ values = updateValues(geneticEntityId, values);
+ }
+ return daoGeneticAlteration.addGeneticAlterationsForGeneticEntity(geneticProfile.getGeneticProfileId(), geneticEntityId, values) > 0;
+ }
- if (parts.length > nrColumns) {
- if (line.split("\t").length > nrColumns) {
- ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]);
- return false;
+ private String[] updateValues(int geneticEntityId, String[] values) {
+ Map sampleIdToValue = ArrayUtil.zip(orderedImportedSampleList.toArray(new Integer[0]), values);
+ String[] updatedSampleValues = new String[orderedSampleList.size()];
+ for (int i = 0; i < orderedSampleList.size(); i++) {
+ updatedSampleValues[i] = "";
+ int sampleId = orderedSampleList.get(i);
+ if (geneticAlterationMap.containsKey(geneticEntityId)) {
+ HashMap savedSampleIdToValue = geneticAlterationMap.get(geneticEntityId);
+ updatedSampleValues[i] = savedSampleIdToValue.containsKey(sampleId) ? savedSampleIdToValue.remove(sampleId): "";
+ if (savedSampleIdToValue.isEmpty()) {
+ geneticAlterationMap.remove(geneticEntityId);
}
}
+ if (sampleIdToValue.containsKey(sampleId)) {
+ updatedSampleValues[i] = sampleIdToValue.get(sampleId);
+ }
+ }
+ return updatedSampleValues;
+ }
- String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length);
+ private boolean saveRppaValues(String[] values, boolean recordStored, List genes, String geneSymbol) throws DaoException {
+ for (CanonicalGene gene : genes) {
+ if (this.saveValues(gene, values, geneSymbol)) {
+ recordStored = true;
+ nrExtraRecords++;
+ }
+ }
+ if (recordStored) {
+ //skip one, to avoid double counting:
+ nrExtraRecords--;
+ } else {
+ // this means that RPPA could not be stored
+ ProgressMonitor.logWarning("Could not store RPPA data");
+ }
+ return recordStored;
+ }
+
+ private List parseGenes(String entrez, String geneSymbol) {
+ //try entrez:
+ if (entrez != null) {
+ CanonicalGene gene = daoGene.getGene(Long.parseLong(entrez));
+ if (gene != null) {
+ return Arrays.asList(gene);
+ }
+ }
+ //no entrez or could not resolve by entrez, try hugo:
+ if (geneSymbol != null) {
+ // deal with multiple symbols separate by |, use the first one
+ int ix = geneSymbol.indexOf("|");
+ if (ix > 0) {
+ geneSymbol = geneSymbol.substring(0, ix);
+ }
+ return daoGene.getGene(geneSymbol, true);
+ }
+ return List.of();
+ }
- // trim whitespace from values
- values = Stream.of(values).map(String::trim).toArray(String[]::new);
- values = filterOutNormalValues(filteredSampleIndices, values);
+ private List composeCnaEventsToAdd(String[] values, long entrezGeneId) {
+ if (updateMode) {
+ values = updateValues((int) entrezGeneId, values);
+ }
+ List cnaEventsToAdd = new ArrayList();
+ for (int i = 0; i < values.length; i++) {
- Geneset geneset = DaoGeneset.getGenesetByExternalId(parts[genesetIdIndex]);
- if (geneset != null) {
- storedRecord = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, geneset.getGeneticEntityId(),
- EntityType.GENESET, geneset.getExternalId());
+ // temporary solution -- change partial deletion back to full deletion.
+ if (values[i].equals(CNA_VALUE_PARTIAL_DELETION)) {
+ values[i] = CNA_VALUE_HOMOZYGOUS_DELETION;
}
- else {
- ProgressMonitor.logWarning("Geneset " + parts[genesetIdIndex] + " not found in DB. Record will be skipped.");
+ if (values[i].equals(CNA_VALUE_AMPLIFICATION)
+ // || values[i].equals(CNA_VALUE_GAIN) >> skipping GAIN, ZERO, HEMIZYGOUS_DELETION to minimize size of dataset in DB
+ // || values[i].equals(CNA_VALUE_ZERO)
+ // || values[i].equals(CNA_VALUE_HEMIZYGOUS_DELETION)
+ || values[i].equals(CNA_VALUE_HOMOZYGOUS_DELETION)
+ ) {
+ Integer sampleId = orderedSampleList.get(i);
+ CnaEvent cnaEvent = new CnaEvent(sampleId, geneticProfileId, entrezGeneId, Short.parseShort(values[i]));
+ //delayed add:
+ AbstractMap.SimpleEntry sampleGenePair = new AbstractMap.SimpleEntry<>(sampleId, entrezGeneId);
+ Map pdAnnotationDetails = this.pdAnnotations.get(sampleGenePair);
+ if (pdAnnotationDetails != null) {
+ cnaEvent.setDriverFilter(pdAnnotationDetails.get("DRIVER_FILTER"));
+ cnaEvent.setDriverFilterAnnotation(pdAnnotationDetails.get("DRIVER_FILTER_ANNOTATION"));
+ cnaEvent.setDriverTiersFilter(pdAnnotationDetails.get("DRIVER_TIERS_FILTER"));
+ cnaEvent.setDriverTiersFilterAnnotation(pdAnnotationDetails.get("DRIVER_TIERS_FILTER_ANNOTATION"));
+ }
+ cnaEventsToAdd.add(cnaEvent);
}
}
- return storedRecord;
+ return cnaEventsToAdd;
}
/**
- * Parses line for generic assay profile record and stores record in 'genetic_alteration' table.
- * @param line row from the separated-text that contains one or more values on a single sample
- * @param nrColumns
- * @param sampleStartIndex index of the first sample column
- * @param genericAssayIdIndex index of the column that uniquely identifies a sample
- * @param filteredSampleIndices
- * @param daoGeneticAlteration
+ * Parses line for gene set record and stores record in 'genetic_alteration' table.
+ * @param genesetId
* @return
* @throws DaoException
*/
+ private boolean saveGenesetLine(String[] values, String genesetId) throws DaoException {
+ boolean storedRecord = false;
- private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStartIndex, int genericAssayIdIndex,
- List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration, Map genericAssayStableIdToEntityIdMap) throws DaoException {
-
- boolean recordIsStored = false;
-
- if (!line.startsWith("#") && line.trim().length() > 0) {
- String[] parts = line.split("\t", -1);
- if (parts.length > nrColumns) {
- if (line.split("\t").length > nrColumns) {
- ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length
- + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]);
- return false;
- }
- }
-
- String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length);
+ Geneset geneset = DaoGeneset.getGenesetByExternalId(genesetId);
+ if (geneset != null) {
+ storedRecord = storeGeneticEntityGeneticAlterations(values, geneset.getGeneticEntityId(), EntityType.GENESET, geneset.getExternalId());
+ }
+ else {
+ ProgressMonitor.logWarning("Geneset " + genesetId + " not found in DB. Record will be skipped.");
+ }
+ return storedRecord;
+ }
- // trim whitespace from values
- values = Stream.of(values).map(String::trim).toArray(String[]::new);
- values = filterOutNormalValues(filteredSampleIndices, values);
+ /**
+ * Parses line for generic assay profile record and stores record in 'genetic_alteration' table.
+ */
+ private boolean saveGenericAssayLine(String[] values, String genericAssayId, Map genericAssayStableIdToEntityIdMap) {
- String stableId = parts[genericAssayIdIndex];
- Integer entityId = genericAssayStableIdToEntityIdMap.getOrDefault(stableId, null);
+ boolean recordIsStored = false;
- if (entityId == null) {
- ProgressMonitor.logWarning("Generic Assay entity " + parts[genericAssayIdIndex] + " not found in DB. Record will be skipped.");
- } else {
- recordIsStored = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, entityId,
- EntityType.GENERIC_ASSAY, stableId);
- }
+ Integer entityId = genericAssayStableIdToEntityIdMap.getOrDefault(genericAssayId, null);
- return recordIsStored;
+ if (entityId == null) {
+ ProgressMonitor.logWarning("Generic Assay entity " + genericAssayId + " not found in DB. Record will be skipped.");
+ } else {
+ recordIsStored = storeGeneticEntityGeneticAlterations(values, entityId, EntityType.GENERIC_ASSAY, genericAssayId);
}
return recordIsStored;
@@ -816,18 +867,15 @@ private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStar
/**
* Stores genetic alteration data for a genetic entity.
* @param values
- * @param daoGeneticAlteration
* @param geneticEntityId - internal id for genetic entity
* @param geneticEntityType - "GENE", "GENESET", "PHOSPHOPROTEIN"
* @param geneticEntityName - hugo symbol for "GENE", external id for "GENESET", phospho gene name for "PHOSPHOPROTEIN"
* @return boolean indicating if record was stored successfully or not
*/
- private boolean storeGeneticEntityGeneticAlterations(String[] values, DaoGeneticAlteration daoGeneticAlteration,
- Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) {
+ private boolean storeGeneticEntityGeneticAlterations(String[] values, Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) {
try {
if (importedGeneticEntitySet.add(geneticEntityId)) {
- daoGeneticAlteration.addGeneticAlterationsForGeneticEntity(geneticProfile.getGeneticProfileId(), geneticEntityId, values);
- return true;
+ return saveValues(geneticEntityId, values);
}
else {
ProgressMonitor.logWarning("Data for genetic entity " + geneticEntityName
diff --git a/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java b/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java
new file mode 100644
index 00000000..3235d33e
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java
@@ -0,0 +1,21 @@
+package org.mskcc.cbio.portal.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class ArrayUtil {
+ public static Map zip(K[] keys, V[] values) {
+ Map map = new HashMap<>();
+
+ // Check if both arrays have the same length
+ if (keys.length == values.length) {
+ for (int i = 0; i < keys.length; i++) {
+ map.put(keys[i], values[i]);
+ }
+ } else {
+ throw new IllegalArgumentException("Arrays must be of the same length");
+ }
+ return map;
+
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java b/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java
index 3cc6fd71..de7fe85a 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java
@@ -45,6 +45,7 @@ public CnaUtil(String[] headerParts, Set namespaces) {
this.namespaceColumnParser = new NamespaceColumnParser(namespaces, headerParts);
}
+ // TODO inc: update
public static void storeCnaEvents(
Set existingCnaEvents,
List cnaEventsToAdd
@@ -53,7 +54,7 @@ public static void storeCnaEvents(
if (!CNA.AMP.equals(cnaEvent.getAlteration()) && !CNA.HOMDEL.equals(cnaEvent.getAlteration())) {
continue;
}
-
+ // TODO Clean cnv event
// Revert PR https://github.com/cBioPortal/cbioportal-core/pull/1 breaks importer
Optional existingCnaEvent = existingCnaEvents
.stream()
diff --git a/src/main/java/org/mskcc/cbio/portal/util/EntrezValidator.java b/src/main/java/org/mskcc/cbio/portal/util/EntrezValidator.java
new file mode 100644
index 00000000..335bfd66
--- /dev/null
+++ b/src/main/java/org/mskcc/cbio/portal/util/EntrezValidator.java
@@ -0,0 +1,7 @@
+package org.mskcc.cbio.portal.util;
+
+public class EntrezValidator {
+ public static boolean isaValidEntrezId(String entrez) {
+ return entrez.matches("[0-9]+");
+ }
+}
diff --git a/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java b/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java
index 744ca565..2e767618 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java
@@ -43,30 +43,6 @@
* @author Ethan Cerami.
*/
public class FileUtil {
- /**
- * BioPAX File Type.
- */
- public static final int BIOPAX = 0;
-
- /**
- * PSI_MI File Type.
- */
- public static final int PSI_MI = 1;
-
- /**
- * External DBs File Type.
- */
- public static final int EXTERNAL_DBS = 2;
-
- /**
- * Identifiers File Type.
- */
- public static final int IDENTIFIERS = 3;
-
- /**
- * Unknown File Type.
- */
- public static final int UNKNOWN = 4;
/**
* Gets Number of Lines in Specified File.
@@ -77,32 +53,26 @@ public class FileUtil {
*/
public static int getNumLines(File file) throws IOException {
int numLines = 0;
- FileReader reader = new FileReader(file);
- BufferedReader buffered = new BufferedReader(reader);
- String line = buffered.readLine();
- while (line != null) {
- if (!line.startsWith("#") && line.trim().length() > 0) {
- numLines++;
+ try (FileReader reader = new FileReader(file); BufferedReader buffered = new BufferedReader(reader)) {
+ String line = buffered.readLine();
+ while (line != null) {
+ if (isInfoLine(line)) {
+ numLines++;
+ }
+ line = buffered.readLine();
}
- line = buffered.readLine();
+ return numLines;
}
- reader.close();
- return numLines;
}
/**
- * Gets Next Line of Input. Filters out Empty Lines and Comments.
- *
- * @param buf BufferedReader Object.
- * @return next line of input.
- * @throws IOException Error reading input stream.
+ * Does line brings any information?
+ * e.g. blank like and comments do not
+ * @param line
+ * @return
*/
- public static String getNextLine(BufferedReader buf) throws IOException {
- String line = buf.readLine();
- while (line != null && (line.trim().length() == 0
- || line.trim().startsWith("#"))) {
- line = buf.readLine();
- }
- return line;
+ public static boolean isInfoLine(String line) {
+ return !line.startsWith("#") && line.trim().length() > 0;
}
+
}
\ No newline at end of file
diff --git a/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java b/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java
index af686a72..ab862756 100644
--- a/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java
+++ b/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java
@@ -76,22 +76,25 @@ public static GeneticProfile loadGeneticProfile(File file) throws IOException, D
GeneticProfile geneticProfile = loadGeneticProfileFromMeta(file);
GeneticProfile existingGeneticProfile = DaoGeneticProfile.getGeneticProfileByStableId(geneticProfile.getStableId());
if (existingGeneticProfile != null) {
- if (!existingGeneticProfile.getDatatype().equals("MAF")) {
- // the dbms already contains a GeneticProfile with the file's stable_id. This scenario is not supported
- // anymore, so throw error telling user to remove existing profile first:
- throw new RuntimeException("Error: genetic_profile record found with same Stable ID as the one used in your data: "
- + existingGeneticProfile.getStableId() + ". Remove the existing genetic_profile record first.");
- } else {
- // For mutation data only we can have multiple files with the same genetic_profile.
- // There is a constraint in the mutation database table to prevent duplicated data
- // If this constraint is hit (mistakenly importing the same maf twice) MySqlBulkLoader will throw an exception
- //
- // make an object combining the pre-existing profile with the file-specific properties of the current file
- GeneticProfile gp = new GeneticProfile(existingGeneticProfile);
- gp.setTargetLine(gp.getTargetLine());
- gp.setOtherMetadataFields(gp.getAllOtherMetadataFields());
- return gp;
+ ProgressMonitor.setCurrentMessage("genetic_profile record found with same Stable ID (" + geneticProfile.getStableId()
+ + "). Using it instead.");
+ if (geneticProfile.getGeneticAlterationType() != existingGeneticProfile.getGeneticAlterationType()) {
+ throw new IllegalStateException("genetic_profile record found with same Stable ID ("
+ + existingGeneticProfile.getStableId() + ") but different genetic alteration type: "
+ + existingGeneticProfile.getGeneticProfileId());
}
+ if (!existingGeneticProfile.getDatatype().equals(geneticProfile.getDatatype())) {
+ throw new IllegalStateException("genetic_profile record found with same Stable ID ("
+ + existingGeneticProfile.getStableId() + ") but different data type: "
+ + existingGeneticProfile.getDatatype());
+ }
+ if (geneticProfile.getCancerStudyId() != existingGeneticProfile.getCancerStudyId()) {
+ throw new IllegalStateException("genetic_profile record found with same Stable ID ("
+ + existingGeneticProfile.getStableId() + ") but different cancer study (id="
+ + existingGeneticProfile.getCancerStudyId() + ")");
+ }
+ existingGeneticProfile.setOtherMetadataFields(geneticProfile.getAllOtherMetadataFields());
+ return existingGeneticProfile;
}
// For GSVA profiles, we want to create a geneticProfileLink from source_stable_id for:
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java
index 8c1afdcc..83e04144 100644
--- a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java
@@ -72,7 +72,7 @@ public void setUp() throws DaoException
public void testDaoGetAllGeneticProfiles() throws DaoException {
ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId);
- assertEquals(7, list.size());
+ assertEquals(9, list.size());
}
@Test
@@ -134,12 +134,12 @@ public void testDaoDeleteGeneticProfile() throws DaoException {
GeneticProfile geneticProfile = DaoGeneticProfile.getGeneticProfileById(2);
- assertEquals(7, DaoGeneticProfile.getCount());
+ assertEquals(9, DaoGeneticProfile.getCount());
DaoGeneticProfile.deleteGeneticProfile(geneticProfile);
- assertEquals(6, DaoGeneticProfile.getCount());
+ assertEquals(8, DaoGeneticProfile.getCount());
ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId);
- assertEquals(6, list.size());
+ assertEquals(8, list.size());
geneticProfile = list.get(0);
assertEquals(studyId, geneticProfile.getCancerStudyId());
assertEquals("mRNA expression (microarray)", geneticProfile.getProfileName());
@@ -155,7 +155,7 @@ public void testDaoUpdateGeneticProfile() throws DaoException {
geneticProfile.getGeneticProfileId(), "Updated Name",
"Updated Description"));
ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId);
- assertEquals(7, list.size());
+ assertEquals(9, list.size());
geneticProfile = list.get(0);
assertEquals(studyId, geneticProfile.getCancerStudyId());
assertEquals("Updated Name", geneticProfile.getProfileName());
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java
new file mode 100644
index 00000000..fdf36995
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java
@@ -0,0 +1,55 @@
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.jetbrains.annotations.NotNull;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneticEntity;
+
+import java.util.HashMap;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class GeneticAlterationsTestHelper {
+ @NotNull
+ public static Set geneStableIdsToEntityIds(Set beforeStableIds) {
+ return beforeStableIds.stream().map(stableId -> {
+ try {
+ return geneStableIdToEntityId(stableId);
+ } catch (DaoException e) {
+ throw new RuntimeException(e);
+ }
+ }).collect(Collectors.toSet());
+ }
+
+ public static int geneStableIdToEntityId(String stableId) throws DaoException {
+ return DaoGeneticEntity.getGeneticEntityByStableId(stableId).getId();
+ }
+
+ public static void assertPriorDataState(HashMap> beforeResult, Set expectedEntityIds, Set expectedSampleIds) {
+ assertEquals(expectedEntityIds, beforeResult.keySet());
+ beforeResult.forEach((entityId, sampleIdToValue) -> {
+ assertEquals("Samples for gene with entityId = " + entityId + " have to match expected ones",
+ expectedSampleIds, beforeResult.get(entityId).keySet());
+ });
+ }
+
+ public static void assertNoChange(HashMap> beforeResult,
+ HashMap> afterResult,
+ Set entityIds,
+ Set sampleIds) {
+ entityIds.forEach(entityId -> {
+ assertTrue("After result is expected to contain entityId=" + entityId,
+ afterResult.containsKey(entityId));
+ sampleIds.forEach(sampleId -> {
+ assertTrue("Sample_id=" + sampleId + " expected to be found for gene with entityId=" + entityId,
+ afterResult.get(entityId).containsKey(sampleId));
+ assertEquals("The values for sample_id=" + sampleId +
+ " and entityId=" + entityId + " before and after upload have to match.",
+ beforeResult.get(entityId).get(sampleId), afterResult.get(entityId).get(sampleId));
+ });
+ });
+ }
+
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java
new file mode 100644
index 00000000..ad3ebd55
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java
@@ -0,0 +1,177 @@
+/*
+ * This file is part of cBioPortal.
+ *
+ * cBioPortal is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.cbioportal.model.CNA;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mskcc.cbio.portal.dao.DaoCnaEvent;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
+import org.mskcc.cbio.portal.model.CnaEvent;
+import org.mskcc.cbio.portal.model.GeneticProfile;
+import org.mskcc.cbio.portal.scripts.ImportProfileData;
+import org.springframework.test.annotation.Rollback;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState;
+
+/**
+ * Tests Incremental Import of PROTEIN_LEVEL Data.
+ *
+ * @author Ruslan Forostianov
+ * @author Pieter Lukasse
+ */
+@RunWith(SpringJUnit4ClassRunner.class)
+@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"})
+@Rollback
+@Transactional
+public class TestIncrementalCopyNumberAlterationImport {
+
+ /**
+ * Test incremental upload of COPY_NUMBER_ALTERATION DISCRETE (gistic)
+ */
+ @Test
+ public void testDiscreteCNA() throws DaoException, IOException {
+ /**
+ * Prior checks
+ */
+ // Hugo_Symbol: CDK1
+ final long newGeneEntrezId = 983l;
+ // Gene that is part of the platform, but absent during the incremental upload
+ // Hugo_Symbol: ATM
+ final long absentGeneEntrezId = 472l;
+ final Set noChangeEntrezIds = Set.of(10000l, 207l, 208l, 3265l, 3845l, 4893l, 672l, 673l, 675l);
+ final Set beforeEntrezIds = new HashSet<>(noChangeEntrezIds);
+ beforeEntrezIds.add(absentGeneEntrezId);
+
+ // stable_id: TCGA-XX-0800
+ final int newSampleId = 15;
+ // stable_id: TCGA-A1-A0SO
+ final int updateSampleId = 12;
+ final Set noChangeSampleIds = Set.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14);
+ final Set beforeSampleIds = new HashSet<>(noChangeSampleIds);
+ beforeSampleIds.add(updateSampleId);
+
+ final Set afterSampleIds = new HashSet<>(beforeSampleIds);
+ afterSampleIds.add(newSampleId);
+
+ GeneticProfile discreteCNAProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_gistic");
+ assertNotNull(discreteCNAProfile);
+ HashMap> beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(discreteCNAProfile.getGeneticProfileId(), null);
+ assertPriorDataState(beforeResult, beforeEntrezIds, beforeSampleIds);
+
+ List allCnaLevels = Arrays.stream(CNA.values()).map(CNA::getCode).toList();
+ Set beforeCnaEventsSampleIds = Set.of(4, 13, 14, updateSampleId);
+ List beforeSampleCnaEvents = DaoCnaEvent.getCnaEvents(afterSampleIds.stream().toList(),
+ null,
+ discreteCNAProfile.getGeneticProfileId(),
+ allCnaLevels);
+ Map> beforeSampleIdToSampleCnaEvents = beforeSampleCnaEvents.stream().collect(Collectors.groupingBy(CnaEvent::getSampleId));
+ assertEquals(beforeCnaEventsSampleIds, beforeSampleIdToSampleCnaEvents.keySet());
+
+ File dataFolder = new File("src/test/resources/incremental/copy_number_alteration/");
+ File metaFile = new File(dataFolder, "meta_cna_discrete.txt");
+ File dataFile = new File(dataFolder, "data_cna_discrete.txt");
+
+ /**
+ * Test
+ */
+ new ImportProfileData(new String[] {
+ "--loadMode", "bulkLoad",
+ "--meta", metaFile.getAbsolutePath(),
+ "--data", dataFile.getAbsolutePath(),
+ "--overwrite-existing",
+ }).run();
+
+ /**
+ * After test assertions
+ */
+ HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(discreteCNAProfile.getGeneticProfileId(), null);
+ assertEquals("After result should get exactly one new gene", beforeEntrezIds.size() + 1,
+ afterResult.size());
+ afterResult.values()
+ .forEach(sampleToValue ->
+ assertEquals("Each gene row has to get one extra sample",beforeSampleIds.size() + 1, sampleToValue.size()));
+ assertNoChange(beforeResult, afterResult, noChangeEntrezIds, noChangeSampleIds);
+ assertEquals("-2", afterResult.get(newGeneEntrezId).get(newSampleId));
+ assertEquals("2", afterResult.get(newGeneEntrezId).get(updateSampleId));
+ assertEquals("", afterResult.get(absentGeneEntrezId).get(newSampleId));
+ assertEquals("", afterResult.get(absentGeneEntrezId).get(updateSampleId));
+
+ List afterSampleCnaEvents = DaoCnaEvent.getCnaEvents(afterSampleIds.stream().toList(),
+ afterResult.keySet(),
+ discreteCNAProfile.getGeneticProfileId(),
+ allCnaLevels);
+ Map> afterSampleIdToSampleCnaEvents = afterSampleCnaEvents.stream().collect(Collectors.groupingBy(CnaEvent::getSampleId));
+ assertEquals("There is only one new sample that has to gain cna events", beforeCnaEventsSampleIds.size() + 1, afterSampleIdToSampleCnaEvents.size());
+ beforeCnaEventsSampleIds.forEach(sampleId -> {
+ if (sampleId == updateSampleId) {
+ return;
+ }
+ Set beforeCnaEvents = beforeSampleIdToSampleCnaEvents.get(sampleId).stream().map(CnaEvent::getEvent).collect(Collectors.toSet());
+ Set afterCnaEvents = afterSampleIdToSampleCnaEvents.get(sampleId).stream().map(CnaEvent::getEvent).collect(Collectors.toSet());
+ assertEquals("CNA events for sample_id=" + sampleId + " must not change.", beforeCnaEvents, afterCnaEvents);
+ });
+ Map newSampleEntrezGeneIdToCnaAlteration = afterSampleIdToSampleCnaEvents.get(newSampleId).stream()
+ .map(CnaEvent::getEvent)
+ .collect(Collectors.toMap(
+ event -> event.getGene().getEntrezGeneId(),
+ CnaEvent.Event::getAlteration));
+ assertEquals(Map.of(
+ 208l, CNA.HOMDEL,
+ 3265l, CNA.AMP,
+ 4893l, CNA.HOMDEL,
+ 672l, CNA.AMP,
+ 673l, CNA.AMP,
+ 675l, CNA.HOMDEL,
+ newGeneEntrezId, CNA.HOMDEL
+ ),
+ newSampleEntrezGeneIdToCnaAlteration);
+ Map updatedSampleEntrezGeneIdToCnaAlteration = afterSampleIdToSampleCnaEvents.get(updateSampleId).stream()
+ .map(CnaEvent::getEvent)
+ .collect(Collectors.toMap(
+ event -> event.getGene().getEntrezGeneId(),
+ CnaEvent.Event::getAlteration));
+ assertEquals(Map.of(
+ 10000l, CNA.HOMDEL,
+ 207l, CNA.AMP,
+ 3845l, CNA.AMP,
+ 673l, CNA.HOMDEL,
+ newGeneEntrezId, CNA.AMP
+ ),
+ updatedSampleEntrezGeneIdToCnaAlteration);
+ }
+
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGenericAssayImporter.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGenericAssayImporter.java
new file mode 100644
index 00000000..e0ef8cf5
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGenericAssayImporter.java
@@ -0,0 +1,162 @@
+/*
+ * This file is part of cBioPortal.
+ *
+ * cBioPortal is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+*/
+
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mskcc.cbio.portal.dao.DaoCancerStudy;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
+import org.mskcc.cbio.portal.dao.DaoGeneticEntity;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
+import org.mskcc.cbio.portal.model.GeneticProfile;
+import org.mskcc.cbio.portal.scripts.ImportProfileData;
+import org.springframework.test.annotation.Rollback;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThrows;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.geneStableIdToEntityId;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.geneStableIdsToEntityIds;
+
+/**
+ * Tests Incremental Import of Generic Assay data
+ *
+ * @author Ruslan Forostianov
+ * @author Pieter Lukasse
+ */
+@RunWith(SpringJUnit4ClassRunner.class)
+@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" })
+@Rollback
+@Transactional
+public class TestIncrementalGenericAssayImporter {
+
+ // stable_id: TCGA-A1-A0SB-01
+ final int newSampleId = 1;
+ // stable_id: TCGA-A1-A0SD-01
+ final int updateSampleId = 2;
+ // stable_id: TCGA-A1-A0SE-01
+ final int noChangeSampleId = 3;
+ final Set beforeSampleIds = Set.of(updateSampleId, noChangeSampleId);
+
+ // Stable id that is part of the platform, but absent during the incremental upload
+ final String absentStableId = "L-685458";
+ final Set noChangeStableIds = Set.of("Erlotinib", "Irinotecan", "Lapatinib");
+ final Set beforeStableIds = new HashSet<>(noChangeStableIds);
+ { beforeStableIds.add(absentStableId); }
+
+ private GeneticProfile ic50Profile;
+ private HashMap> beforeResult;
+
+ /**
+ * Test incremental upload of GENERIC_ASSAY
+ */
+ @Test
+ public void testGenericAssay() throws DaoException {
+
+ File dataFolder = new File("src/test/resources/incremental/generic_assay/");
+ File metaFile = new File(dataFolder, "meta_treatment_ic50.txt");
+ File dataFile = new File(dataFolder, "data_treatment_ic50.txt");
+
+ /**
+ * Test
+ */
+ new ImportProfileData(new String[] {
+ "--loadMode", "bulkLoad",
+ "--meta", metaFile.getAbsolutePath(),
+ "--data", dataFile.getAbsolutePath(),
+ "--overwrite-existing",
+ }).run();
+
+ /**
+ * After test assertions
+ */
+ HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMapForEntityIds(ic50Profile.getGeneticProfileId(), null);
+ assertEquals("After result should have +1 amount of entries", beforeResult.size() + 1, afterResult.size());
+ afterResult.values()
+ .forEach(sampleToValue ->
+ assertEquals("Each gene row has to get one extra sample",beforeSampleIds.size() + 1, sampleToValue.size()));
+ assertNoChange(beforeResult, afterResult, geneStableIdsToEntityIds(noChangeStableIds), Set.of(noChangeSampleId));
+ int erlotinibEntityId = geneStableIdToEntityId("Erlotinib");
+ assertEquals(">8", afterResult.get(erlotinibEntityId).get(newSampleId));
+ assertEquals("7.5", afterResult.get(erlotinibEntityId).get(updateSampleId));
+ int irinotecanEntityId = geneStableIdToEntityId("Irinotecan");
+ assertEquals("", afterResult.get(irinotecanEntityId).get(newSampleId));
+ assertEquals("0.081", afterResult.get(irinotecanEntityId).get(updateSampleId));
+ int absentEntityId = geneStableIdToEntityId(absentStableId);
+ assertEquals("", afterResult.get(absentEntityId).get(newSampleId));
+ assertEquals("", afterResult.get(absentEntityId).get(updateSampleId));
+ int lapatinibEntityId = geneStableIdToEntityId("Lapatinib");
+ assertEquals("6.2", afterResult.get(lapatinibEntityId).get(newSampleId));
+ assertEquals("7.848", afterResult.get(lapatinibEntityId).get(updateSampleId));
+ int lbw242EntityId = geneStableIdToEntityId("LBW242");
+ assertEquals("0.1", afterResult.get(lbw242EntityId).get(newSampleId));
+ assertEquals(">~8", afterResult.get(lbw242EntityId).get(updateSampleId));
+ assertNotNull("New generic entity has to be added", DaoGeneticEntity.getGeneticEntityByStableId("LBW242"));
+ }
+
+ /**
+ * Test that incremental upload of GENERIC_ASSAY (patient level) is not supported
+ */
+ @Test
+ public void testGenericAssayPatientLevel() throws DaoException {
+
+ File dataFolder = new File("src/test/resources/incremental/generic_assay/");
+ File metaFile = new File(dataFolder, "meta_treatment_ic50_patient_level.txt");
+ File dataFile = new File(dataFolder, "data_treatment_ic50_patient_level.txt");
+
+ /**
+ * Test
+ */
+ assertThrows("Incremental upload for generic assay patient_level data is not supported. Please use sample level instead.",
+ RuntimeException.class, () -> {
+ new ImportProfileData(new String[] {
+ "--loadMode", "bulkLoad",
+ "--meta", metaFile.getAbsolutePath(),
+ "--data", dataFile.getAbsolutePath(),
+ "--overwrite-existing",
+ }).run();
+ });
+ }
+
+ @Before
+ public void setUp() throws DaoException {
+ DaoCancerStudy.reCacheAll();
+
+ ic50Profile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_treatment_ic50");
+ assertNotNull(ic50Profile);
+
+ beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMapForEntityIds(ic50Profile.getGeneticProfileId(), null);
+ Set beforeEntityIds = geneStableIdsToEntityIds(beforeStableIds);
+ assertPriorDataState(beforeResult, beforeEntityIds, beforeSampleIds);
+ }
+
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGsvaImporter.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGsvaImporter.java
new file mode 100644
index 00000000..c629ecb4
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGsvaImporter.java
@@ -0,0 +1,81 @@
+/*
+ * This file is part of cBioPortal.
+ *
+ * cBioPortal is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+*/
+
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mskcc.cbio.portal.dao.DaoCancerStudy;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
+import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
+import org.mskcc.cbio.portal.dao.DaoGeneticEntity;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
+import org.mskcc.cbio.portal.model.GeneticAlterationType;
+import org.mskcc.cbio.portal.model.GeneticProfile;
+import org.mskcc.cbio.portal.scripts.ImportTabDelimData;
+import org.springframework.test.annotation.Rollback;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThrows;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.geneStableIdToEntityId;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.geneStableIdsToEntityIds;
+
+/**
+ * Tests Incremental Import is not supported for GSVA data type
+ *
+ * @author Ruslan Forostianov
+ * @author Pieter Lukasse
+ */
+@RunWith(SpringJUnit4ClassRunner.class)
+@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" })
+@Rollback
+@Transactional
+public class TestIncrementalGsvaImporter {
+ @Test
+ public void testGsvaIsNotSupported() throws DaoException, IOException {
+ GeneticProfile gsvaProfile = new GeneticProfile();
+ gsvaProfile.setCancerStudyId(DaoCancerStudy.getCancerStudyByStableId("study_tcga_pub").getInternalId());
+ gsvaProfile.setStableId("gsva_scores");
+ gsvaProfile.setDatatype("GENESET_SCORE");
+ gsvaProfile.setGeneticAlterationType(GeneticAlterationType.GENESET_SCORE);
+ gsvaProfile.setProfileName("gsva test platform");
+ DaoGeneticProfile.addGeneticProfile(gsvaProfile);
+
+ assertThrows(UnsupportedOperationException.class, () ->
+ new ImportTabDelimData(File.createTempFile("gsva", "test"),
+ DaoGeneticProfile.getGeneticProfileByStableId("gsva_scores").getGeneticProfileId(),
+ null,
+ true,
+ DaoGeneticAlteration.getInstance(),
+ DaoGeneOptimized.getInstance()));
+ }
+
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalMrnaExpressionImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalMrnaExpressionImport.java
new file mode 100644
index 00000000..d44ccee5
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalMrnaExpressionImport.java
@@ -0,0 +1,119 @@
+/*
+ * This file is part of cBioPortal.
+ *
+ * cBioPortal is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mskcc.cbio.portal.dao.DaoCancerStudy;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
+import org.mskcc.cbio.portal.model.GeneticProfile;
+import org.mskcc.cbio.portal.scripts.ImportProfileData;
+import org.springframework.test.annotation.Rollback;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState;
+
+/**
+ * Tests Incremental Import of MRNA_EXPRESSION Data.
+ *
+ * @author Ruslan Forostianov
+ * @author Pieter Lukasse
+ */
+@RunWith(SpringJUnit4ClassRunner.class)
+@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"})
+@Rollback
+@Transactional
+public class TestIncrementalMrnaExpressionImport {
+
+ /**
+ * Test incremental upload of MRNA_EXPRESSION
+ */
+ @Test
+ public void testMrnaExpression() throws DaoException, IOException {
+ /**
+ * Prior checks
+ */
+ // Hugo_Symbol: CDK1
+ final long newGeneEntrezId = 983l;
+ // Gene that is part of the platform, but absent during the incremental upload
+ // Hugo_Symbol: ARAF
+ final long absentGeneEntrezId = 369l;
+ final Set noChangeEntrezIds = Set.of(10000l, 207l, 208l, 3265l, 3845l, 472l, 4893l, 672l, 673l, 675l);
+ final Set beforeEntrezIds = new HashSet<>(noChangeEntrezIds);
+ beforeEntrezIds.add(absentGeneEntrezId);
+
+ // stable_id: TCGA-A1-A0SB-01
+ final int newSampleId = 1;
+ // stable_id: TCGA-A1-A0SD-01
+ final int updateSampleId = 2;
+ final Set noChangeSampleIds = Set.of(3, 6, 8, 9, 10, 12, 13);
+ final Set beforeSampleIds = new HashSet<>(noChangeSampleIds);
+ beforeSampleIds.add(updateSampleId);
+
+ GeneticProfile mrnaProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_mrna");
+ assertNotNull(mrnaProfile);
+
+ HashMap> beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(mrnaProfile.getGeneticProfileId(), null);
+ assertPriorDataState(beforeResult, beforeEntrezIds, beforeSampleIds);
+
+ File dataFolder = new File("src/test/resources/incremental/mrna_expression/");
+ File metaFile = new File(dataFolder, "meta_expression_Zscores.txt");
+ File dataFile = new File(dataFolder, "data_expression_Zscores.txt");
+
+ /**
+ * Test
+ */
+ new ImportProfileData(new String[] {
+ "--loadMode", "bulkLoad",
+ "--meta", metaFile.getAbsolutePath(),
+ "--data", dataFile.getAbsolutePath(),
+ "--overwrite-existing",
+ }).run();
+
+ /**
+ * After test assertions
+ */
+ HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(mrnaProfile.getGeneticProfileId(), null);
+ assertEquals("After result should get exactly one new gene", beforeEntrezIds.size() + 1,
+ afterResult.size());
+ afterResult.values()
+ .forEach(sampleToValue ->
+ assertEquals("Each gene row has to get one extra sample", beforeSampleIds.size() + 1, sampleToValue.size()));
+ assertNoChange(beforeResult, afterResult, noChangeEntrezIds, noChangeSampleIds);
+ HashMap newGeneRow = afterResult.get(newGeneEntrezId);
+ assertEquals("-0.1735", newGeneRow.get(newSampleId));
+ assertEquals("-0.6412", newGeneRow.get(updateSampleId));
+ HashMap absentGeneRow = afterResult.get(absentGeneEntrezId);
+ assertEquals("", absentGeneRow.get(newSampleId));
+ assertEquals("", absentGeneRow.get(updateSampleId));
+ }
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalProteinLevelImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalProteinLevelImport.java
new file mode 100644
index 00000000..f3933b27
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalProteinLevelImport.java
@@ -0,0 +1,122 @@
+/*
+ * This file is part of cBioPortal.
+ *
+ * cBioPortal is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+ */
+
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mskcc.cbio.portal.dao.DaoCancerStudy;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
+import org.mskcc.cbio.portal.model.GeneticProfile;
+import org.mskcc.cbio.portal.scripts.ImportProfileData;
+import org.springframework.test.annotation.Rollback;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange;
+import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState;
+
+/**
+ * Tests Incremental Import of PROTEIN_LEVEL Data.
+ *
+ * @author Ruslan Forostianov
+ * @author Pieter Lukasse
+ */
+@RunWith(SpringJUnit4ClassRunner.class)
+@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"})
+@Rollback
+@Transactional
+public class TestIncrementalProteinLevelImport {
+
+ /**
+ * Test incremental upload of PROTEIN_LEVEL
+ */
+ @Test
+ public void testRppa() throws DaoException {
+ /**
+ * Prior checks
+ */
+ // Hugo_Symbol: CDK1
+ final long newGeneEntrezId = 983l;
+ // Gene that is part of the platform, but absent during the incremental upload
+ // Hugo_Symbol: ARAF
+ final long absentGeneEntrezId = 369l;
+ final Set noChangeEntrezIds = Set.of(10000l, 207l, 208l, 3265l, 3845l, 472l, 4893l, 672l, 673l, 675l);
+ final Set beforeEntrezIds = new HashSet<>(noChangeEntrezIds);
+ beforeEntrezIds.add(absentGeneEntrezId);
+
+ // stable_id: TCGA-A1-A0SB-01
+ final int newSampleId = 1;
+ // stable_id: TCGA-A1-A0SD-01
+ final int updateSampleId = 2;
+ final Set noChangeSampleIds = Set.of(3, 6, 8, 9, 10, 12, 13);
+ final Set beforeSampleIds = new HashSet<>(noChangeSampleIds);
+ beforeSampleIds.add(updateSampleId);
+
+ GeneticProfile rppaProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_rppa");
+ assertNotNull(rppaProfile);
+
+ HashMap> beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(rppaProfile.getGeneticProfileId(), null);
+ assertPriorDataState(beforeResult, beforeEntrezIds, beforeSampleIds);
+
+ File dataFolder = new File("src/test/resources/incremental/protein_level/");
+ File metaFile = new File(dataFolder, "meta_rppa.txt");
+ File dataFile = new File(dataFolder, "data_rppa.txt");
+
+ /**
+ * Test
+ */
+ new ImportProfileData(new String[] {
+ "--loadMode", "bulkLoad",
+ "--meta", metaFile.getAbsolutePath(),
+ "--data", dataFile.getAbsolutePath(),
+ "--overwrite-existing",
+ }).run();
+
+ /**
+ * After test assertions
+ */
+ HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(rppaProfile.getGeneticProfileId(), null);
+ assertEquals("After result should get exactly one new gene", beforeEntrezIds.size() + 1,
+ afterResult.size());
+ afterResult.values()
+ .forEach(sampleToValue ->
+ assertEquals("Each gene row has to get one extra sample",beforeSampleIds.size() + 1, sampleToValue.size()));
+ assertNoChange(beforeResult, afterResult, noChangeEntrezIds, noChangeSampleIds);
+ assertEquals("-0.141047088398489", afterResult.get(newGeneEntrezId).get(newSampleId));
+ assertEquals("1.61253243564957", afterResult.get(newGeneEntrezId).get(updateSampleId));
+ assertEquals("", afterResult.get(absentGeneEntrezId).get(newSampleId));
+ assertEquals("", afterResult.get(absentGeneEntrezId).get(updateSampleId));
+ }
+
+ @Before
+ public void setUp() throws DaoException {
+ DaoCancerStudy.reCacheAll();
+ }
+
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTabDelimDataTransaction.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTabDelimDataTransaction.java
new file mode 100644
index 00000000..f149d959
--- /dev/null
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTabDelimDataTransaction.java
@@ -0,0 +1,119 @@
+/*
+ * This file is part of cBioPortal.
+ *
+ * cBioPortal is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+*/
+
+package org.mskcc.cbio.portal.integrationTest.incremental;
+
+import org.cbioportal.model.CNA;
+import org.jetbrains.annotations.NotNull;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.runner.RunWith;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.mskcc.cbio.portal.dao.DaoCancerStudy;
+import org.mskcc.cbio.portal.dao.DaoCnaEvent;
+import org.mskcc.cbio.portal.dao.DaoException;
+import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
+import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
+import org.mskcc.cbio.portal.dao.DaoGeneticEntity;
+import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
+import org.mskcc.cbio.portal.model.CnaEvent;
+import org.mskcc.cbio.portal.model.GeneticAlterationType;
+import org.mskcc.cbio.portal.model.GeneticProfile;
+import org.mskcc.cbio.portal.scripts.ImportTabDelimData;
+import org.springframework.test.annotation.Rollback;
+import org.springframework.test.context.ContextConfiguration;
+import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
+import org.springframework.transaction.annotation.Propagation;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertThrows;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.ArgumentMatchers.anyLong;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.mock;
+
+/**
+ * Tests Transaction for Incremental Import of Tab Delimited Data.
+ *
+ * @author Ruslan Forostianov
+ * @author Pieter Lukasse
+ */
+@RunWith(SpringJUnit4ClassRunner.class)
+@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" })
+public class TestIncrementalTabDelimDataTransaction {
+
+ /**
+ * Test transaction
+ */
+ @Test
+ @ExtendWith(MockitoExtension.class)
+ //Mysql does not support nested transactions. That's why we disable the outer transaction.
+ @Transactional(propagation = Propagation.NOT_SUPPORTED)
+ public void testTransaction() throws Exception {
+ GeneticProfile mrnaProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_mrna");
+
+ File dataFolder = new File("src/test/resources/incremental/tab_delim_data/");
+ File dataFile = new File(dataFolder, "data_expression_Zscores.txt");
+
+ HashMap> beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(mrnaProfile.getGeneticProfileId(), null);
+
+ DaoGeneticAlteration mockedDao = mock(DaoGeneticAlteration.class);
+
+ doNothing().doNothing().doThrow(new DaoException("Simulated dao error"))
+ .when(mockedDao).deleteAllRecordsInGeneticProfile(anyLong(), anyLong());
+ /**
+ * Test
+ */
+ try {
+ new ImportTabDelimData(dataFile,
+ mrnaProfile.getGeneticProfileId(),
+ null,
+ true,
+ mockedDao,
+ DaoGeneOptimized.getInstance()).importData();
+ fail("Import has to fail");
+ } catch (RuntimeException runtimeException) {
+ assertTrue(true);
+ }
+
+ /**
+ * After test assertions
+ */
+ HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(mrnaProfile.getGeneticProfileId(), null);
+ assertEquals(beforeResult, afterResult);
+ }
+
+ @Before
+ public void setUp() throws DaoException {
+ DaoCancerStudy.reCacheAll();
+ }
+}
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java
index d317aa03..916a16cd 100644
--- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java
@@ -180,7 +180,7 @@ public void testImportCnaDiscreteLongDataAddsCnaEvents() throws Exception {
@Test
public void testImportCnaDiscreteLongDataAddsGeneticAlterations() throws Exception {
List beforeGeneticAlterations = getAllGeneticAlterations();
- assertEquals(beforeGeneticAlterations.size(), 42);
+ assertEquals(57, beforeGeneticAlterations.size());
File file = new File("src/test/resources/data_cna_discrete_import_test.txt");
new ImportCnaDiscreteLongData(
@@ -205,7 +205,7 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterations() throws Excepti
@Test
public void testImportCnaDiscreteLongDataAddsMissingGeneticAlterations() throws Exception {
List beforeGeneticAlterations = getAllGeneticAlterations();
- assertEquals(beforeGeneticAlterations.size(), 42);
+ assertEquals(57, beforeGeneticAlterations.size());
File file = new File("src/test/resources/data_cna_discrete_import_test_with_cna_events_missing.txt");
new ImportCnaDiscreteLongData(
@@ -233,7 +233,7 @@ public void testImportCnaDiscreteLongDataAddsMissingGeneticAlterations() throws
@Test
public void testImportCnaDiscreteLongDataAddsGeneticAlterationsAndProfileSamplesInCorrectOrder() throws Exception {
List beforeGeneticAlterations = getAllGeneticAlterations();
- assertEquals(beforeGeneticAlterations.size(), 42);
+ assertEquals(57, beforeGeneticAlterations.size());
File file = new File("src/test/resources/data_cna_discrete_import_test.txt");
new ImportCnaDiscreteLongData(
@@ -260,7 +260,7 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterationsAndProfileSamples
@Test
public void testImportCnaDiscreteLongDataHandlesEntriesWithoutEntrezButWithHugo() throws Exception {
List beforeGeneticAlterations = getAllGeneticAlterations();
- assertEquals(beforeGeneticAlterations.size(), 42);
+ assertEquals(57, beforeGeneticAlterations.size());
File file = new File("src/test/resources/data_cna_discrete_import_test_without_entrez_with_hugo.txt");
new ImportCnaDiscreteLongData(
@@ -283,7 +283,7 @@ public void testImportCnaDiscreteLongDataHandlesEntriesWithoutEntrezButWithHugo(
@Test
public void testImportCnaDiscreteLongDataHandlesEntriesWithWrongEntrezAndCorrectHugo() throws Exception {
List beforeGeneticAlterations = getAllGeneticAlterations();
- assertEquals(beforeGeneticAlterations.size(), 42);
+ assertEquals(57, beforeGeneticAlterations.size());
File file = new File("src/test/resources/data_cna_discrete_import_test_with_wrong_entrez_and_correct_hugo.txt");
new ImportCnaDiscreteLongData(
@@ -306,7 +306,7 @@ public void testImportCnaDiscreteLongDataHandlesEntriesWithWrongEntrezAndCorrect
@Test
public void testImportCnaDiscreteLongDataAddsGeneticAlterationsFromNonCnaEvents() throws Exception {
List beforeGeneticAlterations = getAllGeneticAlterations();
- assertEquals(beforeGeneticAlterations.size(), 42);
+ assertEquals(57, beforeGeneticAlterations.size());
File file = new File("src/test/resources/data_cna_discrete_import_test.txt");
new ImportCnaDiscreteLongData(
@@ -334,7 +334,7 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterationsFromNonCnaEvents(
@Test
public void testImportCnaDiscreteLongDataIgnoresLineWithDuplicateGene() throws Exception {
List beforeGeneticAlterations = getAllGeneticAlterations();
- assertEquals(beforeGeneticAlterations.size(), 42);
+ assertEquals(57, beforeGeneticAlterations.size());
File file = new File("src/test/resources/data_cna_discrete_import_test.txt");
new ImportCnaDiscreteLongData(
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayData.java
index a0a33c6d..fa7e0449 100644
--- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayData.java
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayData.java
@@ -95,10 +95,11 @@ public void testImportGenericAssayData() throws Exception {
// Open mutational signature test data file
File file = new File("src/test/resources/data_mutational_signature.txt");
-
+ int numRecordsForGenericAssayBefore = getNumRecordsForGenericAssay();
+
// import data and test all mutational signatures were added
ImportGenericAssayEntity.importData(file, GeneticAlterationType.GENERIC_ASSAY, "name,description", false);
- assertEquals(60, getNumRecordsForGenericAssay());
+ assertEquals(numRecordsForGenericAssayBefore + 60, getNumRecordsForGenericAssay());
// test wether a record can be retrieved via stable id
GenericAssayMeta genericAssayMeta1 = DaoGenericAssay.getGenericAssayMetaByStableId("mean_1");
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayPatientLevelData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayPatientLevelData.java
index 123715f8..480e9a61 100644
--- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayPatientLevelData.java
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayPatientLevelData.java
@@ -53,7 +53,6 @@
import org.mskcc.cbio.portal.model.Patient;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.scripts.ImportGenericAssayPatientLevelData;
-import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.springframework.test.annotation.Rollback;
import org.springframework.test.context.ContextConfiguration;
@@ -151,8 +150,7 @@ private void runImportGenericAssayPatientLevelData() throws DaoException, IOExce
File file = new File("src/test/resources/tabDelimitedData/data_patient_generic_assay.txt");
ImportGenericAssayPatientLevelData parser = new ImportGenericAssayPatientLevelData(file, null, geneticProfileId, null, "name,description");
- int numLines = FileUtil.getNumLines(file);
- parser.importData(numLines);
+ parser.importData();
HashMap> geneticAlterationMap = daoGeneticAlteration.getGeneticAlterationMapForEntityIds(geneticProfileId, Arrays.asList(geneticEntity1.getId(), geneticEntity2.getId()));
diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportTabDelimData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportTabDelimData.java
index 33779cd3..f8bcc335 100644
--- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportTabDelimData.java
+++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportTabDelimData.java
@@ -38,7 +38,6 @@
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
-import org.mskcc.cbio.portal.dao.DaoGeneset;
import org.mskcc.cbio.portal.dao.DaoGeneticAlteration;
import org.mskcc.cbio.portal.dao.DaoGeneticProfile;
import org.mskcc.cbio.portal.dao.DaoPatient;
@@ -48,15 +47,12 @@
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.CanonicalGene;
import org.mskcc.cbio.portal.model.CopyNumberStatus;
-import org.mskcc.cbio.portal.model.Geneset;
import org.mskcc.cbio.portal.model.GeneticAlterationType;
import org.mskcc.cbio.portal.model.GeneticProfile;
import org.mskcc.cbio.portal.model.Patient;
import org.mskcc.cbio.portal.model.Sample;
-import org.mskcc.cbio.portal.scripts.ImportGenesetData;
import org.mskcc.cbio.portal.scripts.ImportTabDelimData;
import org.mskcc.cbio.portal.util.ConsoleUtil;
-import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.springframework.test.annotation.Rollback;
import org.springframework.test.context.ContextConfiguration;
@@ -171,9 +167,8 @@ private void runImportCnaData() throws DaoException, IOException{
ProgressMonitor.setConsoleMode(false);
// TBD: change this to use getResourceAsStream()
File file = new File("src/test/resources/cna_test.txt");
- ImportTabDelimData parser = new ImportTabDelimData(file, "Barry", geneticProfileId, null, DaoGeneticAlteration.getInstance());
- int numLines = FileUtil.getNumLines(file);
- parser.importData(numLines);
+ ImportTabDelimData parser = new ImportTabDelimData(file, "Barry", geneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance());
+ parser.importData();
String value = dao.getGeneticAlteration(geneticProfileId, sample1, 999999207);
assertEquals ("0", value);
@@ -236,9 +231,8 @@ private void runImportCnaData2() throws DaoException, IOException{
ProgressMonitor.setConsoleMode(false);
// TBD: change this to use getResourceAsStream()
File file = new File("src/test/resources/cna_test2.txt");
- ImportTabDelimData parser = new ImportTabDelimData(file, geneticProfileId, null, DaoGeneticAlteration.getInstance());
- int numLines = FileUtil.getNumLines(file);
- parser.importData(numLines);
+ ImportTabDelimData parser = new ImportTabDelimData(file, geneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance());
+ parser.importData();
String value = dao.getGeneticAlteration(geneticProfileId, sample1, 207);
assertEquals (value, "0");
@@ -321,9 +315,8 @@ private void runImportRnaData1() throws DaoException, IOException{
// TBD: change this to use getResourceAsStream()
File file = new File("src/test/resources/mrna_test.txt");
addTestPatientAndSampleRecords(file);
- ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, DaoGeneticAlteration.getInstance());
- int numLines = FileUtil.getNumLines(file);
- parser.importData(numLines);
+ ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance());
+ parser.importData();
ConsoleUtil.showMessages();
int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "DD639").getInternalId();
@@ -375,9 +368,8 @@ public void testImportmRnaData2() throws Exception {
// TBD: change this to use getResourceAsStream()
File file = new File("src/test/resources/tabDelimitedData/data_expression2.txt");
addTestPatientAndSampleRecords(file);
- ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, DaoGeneticAlteration.getInstance());
- int numLines = FileUtil.getNumLines(file);
- parser.importData(numLines);
+ ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance());
+ parser.importData();
// check if expected warnings are given:
ArrayList warnings = ProgressMonitor.getWarnings();
@@ -468,9 +460,8 @@ public void testImportRppaData() throws Exception {
// TBD: change this to use getResourceAsStream()
File file = new File("src/test/resources/tabDelimitedData/data_rppa.txt");
addTestPatientAndSampleRecords(file);
- ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, DaoGeneticAlteration.getInstance());
- int numLines = FileUtil.getNumLines(file);
- parser.importData(numLines);
+ ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance());
+ parser.importData();
ConsoleUtil.showMessages();
int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE1").getInternalId();
diff --git a/src/test/resources/incremental/copy_number_alteration/data_cna_discrete.txt b/src/test/resources/incremental/copy_number_alteration/data_cna_discrete.txt
new file mode 100644
index 00000000..7664e868
--- /dev/null
+++ b/src/test/resources/incremental/copy_number_alteration/data_cna_discrete.txt
@@ -0,0 +1,17 @@
+Hugo_Symbol Entrez_Gene_Id TCGA-XX-0800-01 TCGA-AB-CDEF-10-BLOOD_DERIVED_NORMAL TCGA-A1-A0SO-01
+AKT3 10000 0 -2 -2
+AKT1 207 -1 2 2
+# All after the pipe has to be removed
+AKT2|TEST 208 -2 2 -1
+HRAS 3265 2 2 0
+KRAS 3845 0 -2 2
+# This gene absent in this file, but it's still part of the profile and has to be updated
+#ATM 472
+# This line missing the hugo symbol and the gene has to be detected by entrez id
+ 4893 -2 -2 -1
+# This line missing the entrez id and the gene has to be detected by hugo symbol
+BRCA1 2 2 0
+BRAF 673 2 -2 -2
+BRCA2 675 -1.5 2 0
+# This gene is new! the empty values should be set for the already existing samples in the database
+CDK1 983 -2 -2 2
diff --git a/src/test/resources/incremental/copy_number_alteration/data_cna_pd_annotations.txt b/src/test/resources/incremental/copy_number_alteration/data_cna_pd_annotations.txt
new file mode 100644
index 00000000..3fbcfc58
--- /dev/null
+++ b/src/test/resources/incremental/copy_number_alteration/data_cna_pd_annotations.txt
@@ -0,0 +1,7 @@
+SAMPLE_ID Entrez_Gene_Id cbp_driver cbp_driver_annotation cbp_driver_tiers cbp_driver_tiers_annotation
+TCGA-A1-A0SO-01 3845 Putative_Passenger Test passenger Class 2 Class annotation
+TCGA-A1-A0SO-01 208 Putative_Driver Test driver Class 1 Class annotation
+TCGA-A1-A0SO-01 983 Putative_Passenger Test passenger
+TCGA-XX-0800-01 3845 Class 2 Class annotation
+TCGA-XX-0800-01 208 Class 1 Class annotation
+TCGA-XX-0800-01 983 Putative_Driver
diff --git a/src/test/resources/incremental/copy_number_alteration/meta_cna_discrete.txt b/src/test/resources/incremental/copy_number_alteration/meta_cna_discrete.txt
new file mode 100644
index 00000000..827c31dd
--- /dev/null
+++ b/src/test/resources/incremental/copy_number_alteration/meta_cna_discrete.txt
@@ -0,0 +1,10 @@
+cancer_study_identifier: study_tcga_pub
+genetic_alteration_type: COPY_NUMBER_ALTERATION
+datatype: DISCRETE
+stable_id: gistic
+show_profile_in_analysis_tab: true
+profile_description: Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.
+profile_name: Putative copy-number alterations from GISTIC
+data_filename: data_cna_discrete.txt
+pd_annotations_filename: data_cna_pd_annotations.txt
+namespaces: CustomNamespace
diff --git a/src/test/resources/incremental/generic_assay/data_treatment_ic50.txt b/src/test/resources/incremental/generic_assay/data_treatment_ic50.txt
new file mode 100644
index 00000000..79606fbf
--- /dev/null
+++ b/src/test/resources/incremental/generic_assay/data_treatment_ic50.txt
@@ -0,0 +1,8 @@
+ENTITY_STABLE_ID NAME DESCRIPTION URL TCGA-A1-A0SB-01 TCGA-A1-A0SD-01
+Erlotinib Name of Erlotinib Desc of Erlotinib Url of Erlotinib >8 7.5
+Irinotecan Name of Irinotecan Desc of Irinotecan Url of Irinotecan 0.081
+# The database has this entity, but not the file
+#L-685458
+Lapatinib Name of Lapatinib Desc of Lapatinib Url of Lapatinib 6.2 7.848
+#The entity will be added
+LBW242 Name of LBW242 Desc of LBW242 Url of LBW242 0.1 >~8
diff --git a/src/test/resources/incremental/generic_assay/data_treatment_ic50_patient_level.txt b/src/test/resources/incremental/generic_assay/data_treatment_ic50_patient_level.txt
new file mode 100644
index 00000000..34753bba
--- /dev/null
+++ b/src/test/resources/incremental/generic_assay/data_treatment_ic50_patient_level.txt
@@ -0,0 +1,8 @@
+ENTITY_STABLE_ID NAME DESCRIPTION URL TCGA-A1-A0SB TCGA-A1-A0SD
+Erlotinib Name of Erlotinib Desc of Erlotinib Url of Erlotinib >8 7.5
+Irinotecan Name of Irinotecan Desc of Irinotecan Url of Irinotecan 0.081
+# The database has this entity, but not the file
+#L-685458
+Lapatinib Name of Lapatinib Desc of Lapatinib Url of Lapatinib 6.2 7.848
+#The entity will be added
+LBW242 Name of LBW242 Desc of LBW242 Url of LBW242 0.1 >~8
diff --git a/src/test/resources/incremental/generic_assay/meta_treatment_ic50.txt b/src/test/resources/incremental/generic_assay/meta_treatment_ic50.txt
new file mode 100644
index 00000000..6ec6cdc5
--- /dev/null
+++ b/src/test/resources/incremental/generic_assay/meta_treatment_ic50.txt
@@ -0,0 +1,12 @@
+cancer_study_identifier: study_tcga_pub
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: TREATMENT_RESPONSE
+datatype: LIMIT-VALUE
+stable_id: treatment_ic50
+profile_name: IC50 values of compounds on cellular phenotype readout
+profile_description: IC50 (compound concentration resulting in half maximal inhibition) of compounds on cellular phenotype readout of cultured mutant cell lines.
+data_filename: data_treatment_ic50.txt
+show_profile_in_analysis_tab: true
+pivot_threshold_value: 0.1
+value_sort_order: ASC
+generic_entity_meta_properties: NAME,DESCRIPTION,URL
diff --git a/src/test/resources/incremental/generic_assay/meta_treatment_ic50_patient_level.txt b/src/test/resources/incremental/generic_assay/meta_treatment_ic50_patient_level.txt
new file mode 100644
index 00000000..181899f5
--- /dev/null
+++ b/src/test/resources/incremental/generic_assay/meta_treatment_ic50_patient_level.txt
@@ -0,0 +1,13 @@
+cancer_study_identifier: study_tcga_pub
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: TREATMENT_RESPONSE
+datatype: LIMIT-VALUE
+stable_id: treatment_ic50
+profile_name: IC50 values of compounds on cellular phenotype readout
+profile_description: IC50 (compound concentration resulting in half maximal inhibition) of compounds on cellular phenotype readout of cultured mutant cell lines.
+data_filename: data_treatment_ic50_patient_level.txt
+show_profile_in_analysis_tab: true
+pivot_threshold_value: 0.1
+value_sort_order: ASC
+generic_entity_meta_properties: NAME,DESCRIPTION,URL
+patient_level: true
diff --git a/src/test/resources/incremental/mrna_expression/data_expression_Zscores.txt b/src/test/resources/incremental/mrna_expression/data_expression_Zscores.txt
new file mode 100644
index 00000000..dc189cec
--- /dev/null
+++ b/src/test/resources/incremental/mrna_expression/data_expression_Zscores.txt
@@ -0,0 +1,34 @@
+Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-AB-CDEF-10-BLOOD_DERIVED_NORMAL TCGA-A1-A0SD-01
+AKT3 10000 0.6393 0.1 0.5377
+AKT1 207 0.785 0.1 0.0426
+# All after the pipe has to be removed
+AKT2|TEST 208 1.0741 0.1 0.718
+HRAS 3265 -0.1735 0.1 -0.6412
+# This gene absent in this file, but it's still part of the profile and has to be updated
+#ARAF 369
+KRAS 3845 0.785 0.1 0.0426
+ATM 472 1.0741 0.1 0.718
+# This line missing the hugo symbol and the gene has to be detected by entrez id
+ 4893 -0.1735 0.1 -0.6412
+# This line missing the entrez id and the gene has to be detected by hugo symbol
+BRCA1 0.6393 0.1 0.5377
+BRAF 673 0.785 0.1 0.0426
+# Duplicate lines should be ignored
+BRAF 673 0.7851 0.1 0.0427
+# Although this row has 2 extra columns, we are ok with that as they contain blank values
+BRCA2 675 1.0741 0.1 0.718
+# This gene is new! the empty values should be set for the already existing samples in the database
+CDK1 983 -0.1735 0.1 -0.6412
+# These lines have to be skipped
+# One column too much
+FGFR3 2261 0.045 0.1 0.675 0.0224575
+# No sample columns
+PIEZO1 9780
+# invalid entrez id
+P2RY10 -1 0.741 0.1 0.685
+# Multigene sign
+/// 369 0.6393 0.1 0.5377
+# Unknown gene sign
+--- 3845 0.785 0.1 0.0426
+# Empty gene info
+ 1.0741 0.1 0.718
diff --git a/src/test/resources/incremental/mrna_expression/meta_expression_Zscores.txt b/src/test/resources/incremental/mrna_expression/meta_expression_Zscores.txt
new file mode 100644
index 00000000..e761fed3
--- /dev/null
+++ b/src/test/resources/incremental/mrna_expression/meta_expression_Zscores.txt
@@ -0,0 +1,8 @@
+cancer_study_identifier: study_tcga_pub
+genetic_alteration_type: MRNA_EXPRESSION
+datatype: Z-SCORE
+stable_id: mrna
+profile_description: Expression levels (Agilent microarray).
+show_profile_in_analysis_tab: false
+profile_name: mRNA expression (microarray)
+data_filename: data_expression_Zscores.txt
diff --git a/src/test/resources/incremental/protein_level/data_rppa.txt b/src/test/resources/incremental/protein_level/data_rppa.txt
new file mode 100644
index 00000000..0953ce99
--- /dev/null
+++ b/src/test/resources/incremental/protein_level/data_rppa.txt
@@ -0,0 +1,24 @@
+Composite.Element.REF TCGA-A1-A0SB-01 TCGA-A1-A0SD-01
+AKT3|akt3 1.26122710480548 0.037186254715365
+# Multiple gene symbols joined by space
+AKT1 AKT2 AKT3|akt1 1.61253243664957 -0.141077088398489
+# All after the pipe has to be removed
+AKT2|TEST 5.4424238579025E-05 0.062264661774981
+HRAS|hras 0.37624053370992 0.270399126328659
+# This gene absent in this file, but it's still part of the profile and has to be updated 0.407622077164699 -0.326522823583974
+#ARAF
+KRAS|kras -0.335040546938807 0.00730643372831408
+ATM|atm 0.037186254715365 1.26122710480548
+# This line missing the entrez id and the gene has to be detected by hugo symbol 0.062264661774981 5.4424238579025E-05
+BRCA1|brca1 0.270399126328659 0.37624053370992
+BRAF|braf -0.326522823583974 0.407622077164699
+# Duplicate lines should be ignored 0.218650367364756 0.383702820778609
+BRAF|braf 0.00730643372831408 -0.335040546938807
+BRCA2|brca2 -0.141077088398489 1.61253243664957
+# This gene is new! the empty values should be set for the already existing samples in the database
+CDK1|cdk1 -0.141047088398489 1.61253243564957
+# These lines have to be skipped
+/// -0.335040546938807 0.00730643372831408
+--- 0.037186254715365 1.26122710480548
+ 0.064 0.644
+NA|K-Ras 0.062264661774981 5.4424238579025E-05
diff --git a/src/test/resources/incremental/protein_level/meta_rppa.txt b/src/test/resources/incremental/protein_level/meta_rppa.txt
new file mode 100644
index 00000000..f6481c7d
--- /dev/null
+++ b/src/test/resources/incremental/protein_level/meta_rppa.txt
@@ -0,0 +1,7 @@
+cancer_study_identifier: study_tcga_pub
+genetic_alteration_type: PROTEIN_LEVEL
+datatype: LOG2-VALUE
+stable_id: rppa
+profile_name: Test RPPA
+profile_description: Test protein level data
+data_filename: data_rppa.txt
diff --git a/src/test/resources/seed_mini.sql b/src/test/resources/seed_mini.sql
index 3dfd5ff9..552db83e 100644
--- a/src/test/resources/seed_mini.sql
+++ b/src/test/resources/seed_mini.sql
@@ -199,6 +199,11 @@ INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYP
INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE');
SET @max_entity_id = (Select MAX(ID) from genetic_entity);
INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,2261,'FGFR3','protein-coding');
+-- Generic genetic entities
+INSERT INTO "genetic_entity" ("ENTITY_TYPE", "STABLE_ID") VALUES ('GENERIC_ASSAY', 'Erlotinib');
+INSERT INTO "genetic_entity" ("ENTITY_TYPE", "STABLE_ID") VALUES ('GENERIC_ASSAY', 'Irinotecan');
+INSERT INTO "genetic_entity" ("ENTITY_TYPE", "STABLE_ID") VALUES ('GENERIC_ASSAY', 'L-685458');
+INSERT INTO "genetic_entity" ("ENTITY_TYPE", "STABLE_ID") VALUES ('GENERIC_ASSAY', 'Lapatinib');
-- cna_event
INSERT INTO "cna_event" ("CNA_EVENT_ID","ENTREZ_GENE_ID","ALTERATION") VALUES (20093,207,-2);
@@ -328,6 +333,8 @@ INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID
INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (6,'study_tcga_pub_mutations',1,'MUTATION_EXTENDED','MAF','Mutations','Mutation data from whole exome sequencing.','1');
INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (7,'study_tcga_pub_structural_variants',1,'STRUCTURAL_VARIANT','SV','Structural Variants','Structural Variants test data.','1');
INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (8,'study_tcga_pub_cna_long',1,'COPY_NUMBER_ALTERATION','DISCRETE_LONG','CNA values','CNA values dummy data','1');
+INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (9,'study_tcga_pub_rppa',1,'PROTEIN_LEVEL','LOG2-VALUE','RPPA values','RPPA values dummy data','0');
+INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (10,'study_tcga_pub_treatment_ic50',1,'GENERIC_ASSAY','LIMIT-VALUE','test treatment values','treatment values dummy data','0');
-- gene_panel
INSERT INTO gene_panel (INTERNAL_ID,STABLE_ID,DESCRIPTION) VALUES (1,'TESTPANEL_CNA_DISCRETE_LONG_FORMAT','Some test panel');
@@ -375,12 +382,31 @@ INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALU
INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (5,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 672),'0.066638638,');
INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (5,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 673),'0.020369562,');
INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (5,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 675),'0.793930197,');
+-- RPPA
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 10000),'-0.472,1.514,0.145,-0.183,0.913,-0.665,-1.700,0.976,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 207),'-1.102,-0.243,0.018,-0.154,0.330,1.005,0.681,-0.664,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 208),'-1.221,-0.592,-0.176,-0.310,-1.198,-0.670,0.077,-0.302,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 3265),'0.061,-0.055,-0.165,0.517,2.021,0.381,-0.728,0.944,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 369),'-1.129,-0.306,0.180,-0.601,0.166,0.402,0.243,-0.999,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 3845),'0.177,0.404,0.188,0.428,1.676,0.238,0.469,2.161,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 472),'-1.503,-1.925,-1.755,-1.576,-1.029,-1.401,-1.514,-2.074,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 4893),'-1.914,-2.059,-1.228,-1.322,-4.166,-1.187,0.284,-0.130,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 672),'-1.661,-1.392,-1.924,-1.656,-0.361,-1.998,-0.136,-0.709,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 673),'0.233,0.561,-0.106,-0.085,-0.012,0.143,0.141,0.609,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 675),'-0.570,-1.340,-1.544,-0.404,0.632,-1.231,0.771,-0.036,');
+-- Generic assay data
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'Erlotinib'),'5.2,>8,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'Irinotecan'),'>8,7.1,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'L-685458'),'>4.6,7.2,');
+INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'Lapatinib'),'6.9,>~8,');
-- genetic_profile_samples
INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (2,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,');
INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (3,'2,3,6,8,9,10,12,13,');
INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (4,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,');
INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (5,'2,');
+INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (9,'2,3,6,8,9,10,12,13,');
+INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (10,'2,3,');
-- patient
INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (1,'TCGA-A1-A0SB',1);
diff --git a/tests/system_tests_import_data.py b/tests/system_tests_import_data.py
index 097e6c01..64361571 100755
--- a/tests/system_tests_import_data.py
+++ b/tests/system_tests_import_data.py
@@ -107,6 +107,16 @@ def test_incremental_load(self, run_java, locate_jar):
'--meta', f'{data_directory}/meta_clinical_samples.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_clinical_samples.txt', '--noprogress')
mutation_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
'--meta', f'{data_directory}/meta_mutations_extended.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_mutations_extended.maf', '--noprogress')
+ cna_discrete_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
+ '--meta', f'{data_directory}/meta_cna_discrete.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_cna_discrete.txt', '--noprogress')
+ cna_log2_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
+ '--meta', f'{data_directory}/meta_cna_log2.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_cna_log2.txt', '--noprogress')
+ expression_median_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
+ '--meta', f'{data_directory}/meta_expression_median.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_expression_median.txt', '--noprogress')
+ methylation_hm27_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
+ '--meta', f'{data_directory}/meta_methylation_hm27.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_methylation_hm27.txt', '--noprogress')
+ treatment_ic50_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
+ '--meta', f'{data_directory}/meta_treatment_ic50.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_treatment_ic50.txt', '--noprogress')
case_list_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.UpdateCaseListsSampleIds',
'--meta', f'{data_directory}/meta_clinical_samples.txt', '--case-lists', f'{data_directory}/case_lists')
@@ -115,6 +125,11 @@ def test_incremental_load(self, run_java, locate_jar):
clinical_patient_call,
clinical_sample_call,
mutation_call,
+ cna_discrete_call,
+ cna_log2_call,
+ expression_median_call,
+ methylation_hm27_call,
+ treatment_ic50_call,
case_list_call,
])
diff --git a/tests/test_data/study_es_0_inc/data_cna_discrete.txt b/tests/test_data/study_es_0_inc/data_cna_discrete.txt
new file mode 100644
index 00000000..7915f45b
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/data_cna_discrete.txt
@@ -0,0 +1,10 @@
+Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW
+ACAP3 116983 0 0 -1
+ 375790 -1 -1 0
+ATAD3A 55210 0 0 -2
+ATAD3B 83858 -2 -1 0
+ATAD3C 219293 0 0 0
+#AURKAIP1 54998
+ERCC5 2073 0 -1 -2
+ACP3 55 0 0 0
+TP53 -1 0 -2
diff --git a/tests/test_data/study_es_0_inc/data_cna_log2.txt b/tests/test_data/study_es_0_inc/data_cna_log2.txt
new file mode 100644
index 00000000..0eb820a7
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/data_cna_log2.txt
@@ -0,0 +1,10 @@
+Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW
+ACAP3 116983 0.751 0.533 0.114
+ 375790 0.062 0.071 0.948
+ATAD3A 55210 0.487 0.695 0.364
+ATAD3B 83858 0.150 0.492 0.300
+ATAD3C 219293 0.995 0.170 0.654
+#AURKAIP1 54998
+ERCC5 2073 0.816 0.514 0.165
+ACP3 55 0.252 0.713 0.513
+TP53 0.360 0.538 0.891
diff --git a/tests/test_data/study_es_0_inc/data_expression_median.txt b/tests/test_data/study_es_0_inc/data_expression_median.txt
new file mode 100644
index 00000000..d5c4a9a0
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/data_expression_median.txt
@@ -0,0 +1,10 @@
+Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW
+ACAP3 116983 0.096 0.826 0.032
+ 375790 0.309 0.399 0.680
+ATAD3A 55210 0.569 0.189 0.266
+ATAD3B 83858 0.829 0.473 0.611
+ATAD3C 219293 0.307 0.445 0.045
+#AURKAIP1 54998
+ERCC5 2073 0.171 0.766 0.590
+ACP3 55 0.422 0.870 0.745
+TP53 0.179 0.694 0.808
diff --git a/tests/test_data/study_es_0_inc/data_methylation_hm27.txt b/tests/test_data/study_es_0_inc/data_methylation_hm27.txt
new file mode 100644
index 00000000..d2c67abc
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/data_methylation_hm27.txt
@@ -0,0 +1,10 @@
+Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW
+ACAP3 116983 0.022 0.681 0.790
+ 375790 0.435 0.340 0.321
+ATAD3A 55210 0.229 0.946 0.439
+ATAD3B 83858 0.885 0.707 0.664
+ATAD3C 219293 0.660 0.315 0.694
+#AURKAIP1 54998
+ERCC5 2073 0.436 0.749 0.345
+ACP3 55 0.622 0.396 0.029
+TP53 0.563 0.686 0.607
diff --git a/tests/test_data/study_es_0_inc/data_treatment_ic50.txt b/tests/test_data/study_es_0_inc/data_treatment_ic50.txt
new file mode 100644
index 00000000..2a507cef
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/data_treatment_ic50.txt
@@ -0,0 +1,11 @@
+ENTITY_STABLE_ID NAME DESCRIPTION URL TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW
+17-AAG Name of 17-AAG Desc of 17-AAG Url of 17-AAG 0.315 0.329701692 0.053038094
+AEW541 Name of AEW541 Desc of AEW541 Url of AEW541 >8 2.353 2.68212986
+AZD0530 Name of AZD0530 Desc of AZD0530 Url of AZD0530 0.234 >8 4.597949505
+AZD6244 Name of AZD6244 Desc of AZD6244 Url of AZD6244 >8 >8 >8
+Erlotinib Name of Erlotinib Desc of Erlotinib Url of Erlotinib >8 >8 >8
+Irinotecan Name of Irinotecan Desc of Irinotecan Url of Irinotecan NA 0.083 NA
+L-685458 Name of L-685458 Desc of L-685458 Url of L-685458 >8 >8 3.267752409
+#Lapatinib Name of Lapatinib Desc of Lapatinib Url of Lapatinib
+LBW242 Name of LBW242 Desc of LBW242 Url of LBW242 NA >8 >8
+Nilotinib Name of Nilotinib Desc of Nilotinib Url of Nilotinib >8 >8 NA
diff --git a/tests/test_data/study_es_0_inc/meta_cna_discrete.txt b/tests/test_data/study_es_0_inc/meta_cna_discrete.txt
new file mode 100644
index 00000000..f6ea8bea
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/meta_cna_discrete.txt
@@ -0,0 +1,10 @@
+cancer_study_identifier: study_es_0
+genetic_alteration_type: COPY_NUMBER_ALTERATION
+datatype: DISCRETE
+stable_id: gistic
+show_profile_in_analysis_tab: true
+profile_description: Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.
+profile_name: Putative copy-number alterations from GISTIC
+data_filename: data_cna_discrete.txt
+pd_annotations_filename: data_cna_pd_annotations.txt
+namespaces: CustomNamespace
diff --git a/tests/test_data/study_es_0_inc/meta_cna_log2.txt b/tests/test_data/study_es_0_inc/meta_cna_log2.txt
new file mode 100644
index 00000000..74a07b8e
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/meta_cna_log2.txt
@@ -0,0 +1,8 @@
+cancer_study_identifier: study_es_0
+genetic_alteration_type: COPY_NUMBER_ALTERATION
+datatype: LOG2-VALUE
+stable_id: log2CNA
+show_profile_in_analysis_tab: false
+profile_description: Log2 copy-number values for each gene (from Affymetrix SNP6).
+profile_name: Log2 copy-number values
+data_filename: data_cna_log2.txt
diff --git a/tests/test_data/study_es_0_inc/meta_expression_median.txt b/tests/test_data/study_es_0_inc/meta_expression_median.txt
new file mode 100644
index 00000000..1e2fc6a7
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/meta_expression_median.txt
@@ -0,0 +1,8 @@
+cancer_study_identifier: study_es_0
+genetic_alteration_type: MRNA_EXPRESSION
+datatype: CONTINUOUS
+stable_id: mrna
+profile_description: Expression levels (Agilent microarray).
+show_profile_in_analysis_tab: false
+profile_name: mRNA expression (microarray)
+data_filename: data_expression_median.txt
diff --git a/tests/test_data/study_es_0_inc/meta_methylation_hm27.txt b/tests/test_data/study_es_0_inc/meta_methylation_hm27.txt
new file mode 100644
index 00000000..582b12e9
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/meta_methylation_hm27.txt
@@ -0,0 +1,8 @@
+cancer_study_identifier: study_es_0
+genetic_alteration_type: METHYLATION
+datatype: CONTINUOUS
+stable_id: methylation_hm27
+profile_description: Methylation beta-values (HM27 platform). For genes with multiple methylation probes, the probe least correlated with expression is selected.
+show_profile_in_analysis_tab: false
+profile_name: Methylation (HM27)
+data_filename: data_methylation_hm27.txt
diff --git a/tests/test_data/study_es_0_inc/meta_treatment_ic50.txt b/tests/test_data/study_es_0_inc/meta_treatment_ic50.txt
new file mode 100644
index 00000000..0d3281cd
--- /dev/null
+++ b/tests/test_data/study_es_0_inc/meta_treatment_ic50.txt
@@ -0,0 +1,12 @@
+cancer_study_identifier: study_es_0
+genetic_alteration_type: GENERIC_ASSAY
+generic_assay_type: TREATMENT_RESPONSE
+datatype: LIMIT-VALUE
+stable_id: treatment_ic50
+profile_name: IC50 values of compounds on cellular phenotype readout
+profile_description: IC50 (compound concentration resulting in half maximal inhibition) of compounds on cellular phenotype readout of cultured mutant cell lines.
+data_filename: data_treatment_ic50.txt
+show_profile_in_analysis_tab: true
+pivot_threshold_value: 0.1
+value_sort_order: ASC
+generic_entity_meta_properties: NAME,DESCRIPTION,URL
\ No newline at end of file
|