diff --git a/README.md b/README.md index 252b7ba9..51ccaf64 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,34 @@ This repo contains: ## Inclusion in main codebase The `cbioportal-core` code is currently included in the final Docker image during the Docker build process: https://github.com/cBioPortal/cbioportal/blob/master/docker/web-and-data/Dockerfile#L48 +## Running in docker + +Build docker image with: +```bash +docker build -t cbioportal-core . +``` + +Example of how to start loading of the whole study: +```bash +docker run -it -v $(pwd)/data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core python importer/metaImport.py -s /data/study_es_0 -p /data/api_json -o +``` + +### Incremental upload of data + +To add or update specific patient, sample, or molecular data in an already loaded study, you can perform an incremental upload. This process is quicker than reloading the entire study. + +To execute an incremental upload, use the -d (or --data_directory) option instead of -s (or --study_directory). Here is an example command: +```bash +docker run -it -v $(pwd)/data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core python importer/metaImport.py -d /data/study_es_0_inc -p /data/api_json -o +``` +**Note:** +While the directory should adhere to the standard cBioPortal file formats and study structure, please note the following specific guidelines for incremental uploads: + +- Incremental uploads are not supported for all data types. For instance, uploading study metadata, resources, or GSVA data incrementally is currently unsupported. +- The data pertaining to patient or sample IDs should only include entries that are either new or need updates. + +This method ensures efficient updates without the need for complete study reuploads, saving time and computational resources. + ## How to run integration tests This section guides you through the process of running integration tests by setting up a cBioPortal MySQL database environment using Docker. Please follow these steps carefully to ensure your testing environment is configured correctly. @@ -119,15 +147,3 @@ The script will search for `core-*.jar` in the root of the project: python scripts/importer/metaImport.py -s tests/test_data/study_es_0 -p tests/test_data/api_json_unit_tests -o ``` -## Running in docker - -Build docker image with: -```bash -docker build -t cbioportal-core . -``` - -Example of how to start the loading: -```bash -docker run -it -v $(pwd)/data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core python importer/metaImport.py -s /data/study_es_0 -p /data/api_json -o -``` - diff --git a/pom.xml b/pom.xml index c71f78a2..e858e319 100644 --- a/pom.xml +++ b/pom.xml @@ -252,6 +252,9 @@ org.apache.maven.plugins maven-surefire-plugin 2.21.0 + + false + default-test diff --git a/scripts/importer/cbioportalImporter.py b/scripts/importer/cbioportalImporter.py index f52bbc6a..c2f65cc0 100755 --- a/scripts/importer/cbioportalImporter.py +++ b/scripts/importer/cbioportalImporter.py @@ -464,6 +464,8 @@ def import_incremental_data(jvm_args, data_directory, update_generic_assay_entit Load all data types that are available and support incremental upload """ for meta_file_type in INCREMENTAL_UPLOAD_SUPPORTED_META_TYPES: + if meta_file_type not in meta_file_type_to_meta_files: + continue meta_pairs = meta_file_type_to_meta_files[meta_file_type] for meta_pair in meta_pairs: meta_filename, meta_dictionary = meta_pair @@ -651,5 +653,5 @@ def main(args): # ready to roll if __name__ == '__main__': - parsed_args = interface(args) + parsed_args = interface() main(parsed_args) diff --git a/scripts/importer/cbioportal_common.py b/scripts/importer/cbioportal_common.py index eaa38a5e..798174ee 100644 --- a/scripts/importer/cbioportal_common.py +++ b/scripts/importer/cbioportal_common.py @@ -370,7 +370,16 @@ class MetaFileTypes(object): MetaFileTypes.PATIENT_ATTRIBUTES, MetaFileTypes.SAMPLE_ATTRIBUTES, MetaFileTypes.MUTATION, - # TODO Add more types here as incremental upload is enabled + MetaFileTypes.MUTATION_UNCALLED, + MetaFileTypes.EXPRESSION, + MetaFileTypes.CNA_DISCRETE, + MetaFileTypes.CNA_CONTINUOUS, + MetaFileTypes.CNA_LOG2, + MetaFileTypes.METHYLATION, + MetaFileTypes.PROTEIN, + MetaFileTypes.GENERIC_ASSAY_CONTINUOUS, + MetaFileTypes.GENERIC_ASSAY_BINARY, + MetaFileTypes.GENERIC_ASSAY_CATEGORICAL, ] IMPORTER_CLASSNAME_BY_META_TYPE = { diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoCnaEvent.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoCnaEvent.java index e7785d4f..f19bf514 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoCnaEvent.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoCnaEvent.java @@ -119,7 +119,32 @@ private static long addCnaEventDirectly(CnaEvent cnaEvent) throws DaoException { JdbcUtil.closeAll(DaoCnaEvent.class, con, pstmt, rs); } } - + + public static void removeSampleCnaEvents(int cnaProfileId, List sampleIds) throws DaoException { + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoCnaEvent.class); + pstmt = con.prepareStatement + ("DELETE sample_cna_event, alteration_driver_annotation" + + " FROM sample_cna_event" + + " LEFT JOIN alteration_driver_annotation ON alteration_driver_annotation.`ALTERATION_EVENT_ID` = sample_cna_event.`CNA_EVENT_ID`" + + " WHERE sample_cna_event.`GENETIC_PROFILE_ID` = ? AND sample_cna_event.`SAMPLE_ID` IN (" + + String.join(",", Collections.nCopies(sampleIds.size(), "?")) + + ")"); + pstmt.setInt(1, cnaProfileId); + for (int i = 0; i < sampleIds.size(); i++) { + pstmt.setInt(i + 2, sampleIds.get(i)); + } + pstmt.executeUpdate(); + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoCnaEvent.class, con, pstmt, rs); + } + } + public static Map> getSamplesWithAlterations( Collection eventIds) throws DaoException { return getSamplesWithAlterations(StringUtils.join(eventIds, ",")); diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java index 1f58acb7..3adbfb53 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneOptimized.java @@ -46,6 +46,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.mskcc.cbio.portal.model.CanonicalGene; +import org.mskcc.cbio.portal.util.EntrezValidator; import org.mskcc.cbio.portal.util.ProgressMonitor; /** @@ -322,7 +323,7 @@ public List guessGene(String geneId, String chr) { } CanonicalGene gene; - if (geneId.matches("[0-9]+")) { // likely to be a entrez gene id + if (EntrezValidator.isaValidEntrezId(geneId)) { // likely to be a entrez gene id gene = getGene(Integer.parseInt(geneId)); if (gene!=null) { return Collections.singletonList(gene); diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java index 25bef125..0358b132 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticAlteration.java @@ -65,12 +65,10 @@ private DaoGeneticAlteration() { * Gets Instance of Dao Object. (Singleton pattern). * * @return DaoGeneticAlteration Object. - * @throws DaoException Dao Initialization Error. */ - public static DaoGeneticAlteration getInstance() throws DaoException { + public static DaoGeneticAlteration getInstance() { if (daoGeneticAlteration == null) { daoGeneticAlteration = new DaoGeneticAlteration(); - } return daoGeneticAlteration; @@ -96,7 +94,7 @@ public int addGeneticAlterations(int geneticProfileId, long entrezGeneId, String throws DaoException { return addGeneticAlterationsForGeneticEntity(geneticProfileId, DaoGeneOptimized.getGeneticEntityId(entrezGeneId), values); } - + public int addGeneticAlterationsForGeneticEntity(int geneticProfileId, int geneticEntityId, String[] values) throws DaoException { @@ -239,8 +237,8 @@ public HashMap> getGeneticAlterationMapForEntit int geneticEntityId = rs.getInt("GENETIC_ENTITY_ID"); String values = rs.getString("VALUES"); //hm.debug.. - String valueParts[] = values.split(DELIM); - for (int i=0; i getProcessedAlterationData( rs = pstmt.executeQuery(); while (rs.next()) { long entrezGeneId = DaoGeneOptimized.getEntrezGeneId(rs.getInt("GENETIC_ENTITY_ID")); - String[] values = rs.getString("VALUES").split(DELIM); + String valuesString = rs.getString("VALUES"); + if (valuesString.endsWith(DELIM)) { + valuesString = valuesString.substring(0, valuesString.length() - DELIM.length()); + } + String[] values = valuesString.split(DELIM, -1); ObjectNode datum = processor.process( entrezGeneId, values, @@ -427,17 +429,19 @@ public int getCount() throws DaoException { * Deletes all Genetic Alteration Records associated with the specified Genetic Profile ID. * * @param geneticProfileId Genetic Profile ID. + * @param geneticEntityId Genetic Entity ID. * @throws DaoException Database Error. */ - public void deleteAllRecordsInGeneticProfile(long geneticProfileId) throws DaoException { + public void deleteAllRecordsInGeneticProfile(long geneticProfileId, long geneticEntityId) throws DaoException { Connection con = null; PreparedStatement pstmt = null; ResultSet rs = null; try { con = JdbcUtil.getDbConnection(DaoGeneticAlteration.class); pstmt = con.prepareStatement("DELETE from " + - "genetic_alteration WHERE GENETIC_PROFILE_ID=?"); + "genetic_alteration WHERE GENETIC_PROFILE_ID=? and GENETIC_ENTITY_ID=?"); pstmt.setLong(1, geneticProfileId); + pstmt.setLong(2, geneticEntityId); pstmt.executeUpdate(); } catch (SQLException e) { throw new DaoException(e); diff --git a/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java b/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java index 40f9e9ed..48f59d70 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/JdbcUtil.java @@ -35,10 +35,12 @@ import java.sql.*; import java.util.*; import javax.sql.DataSource; -import org.apache.commons.dbcp2.BasicDataSource; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.mskcc.cbio.portal.util.*; +import org.springframework.jdbc.datasource.DataSourceTransactionManager; +import org.springframework.jdbc.datasource.TransactionAwareDataSourceProxy; +import org.springframework.transaction.support.TransactionTemplate; /** * Connection Utility for JDBC. @@ -50,6 +52,8 @@ public class JdbcUtil { private static DataSource dataSource; private static Map activeConnectionCount = new HashMap(); // keep track of the number of active connection per class/requester private static final Logger LOG = LoggerFactory.getLogger(JdbcUtil.class); + private static DataSourceTransactionManager transactionManager; + private static TransactionTemplate transactionTemplate; /** * Gets the data source @@ -57,17 +61,28 @@ public class JdbcUtil { */ public static DataSource getDataSource() { if (dataSource == null) { - dataSource = new JdbcDataSource(); + dataSource = new TransactionAwareDataSourceProxy(new JdbcDataSource()); + initSpringTx(); } return dataSource; } + private static void initSpringTx() { + transactionManager = new DataSourceTransactionManager(dataSource); + transactionTemplate = new TransactionTemplate(transactionManager); + } + /** * Sets the data source * @param value the data source */ public static void setDataSource(DataSource value) { dataSource = value; + initSpringTx(); + } + + public static TransactionTemplate getTransactionTemplate() { + return transactionTemplate; } /** diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/GeneticAlterationImporter.java b/src/main/java/org/mskcc/cbio/portal/scripts/GeneticAlterationImporter.java index 32aa43f2..623b3122 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/GeneticAlterationImporter.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/GeneticAlterationImporter.java @@ -55,5 +55,9 @@ public boolean store( } } + public boolean isImportedAlready(CanonicalGene gene) { + return importSetOfGenes.contains(gene.getEntrezGeneId()); + } + } diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java index 0ab8bd88..cc3300c0 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportGeneData.java @@ -188,7 +188,7 @@ public static void importHGNCData(File geneFile, String genomeBuild) throws IOEx ProgressMonitor.incrementCurValue(); ConsoleUtil.showProgress(); String parts[] = line.split("\t", -1); // include trailing empty strings - if (!parts[0].matches("[0-9]+")) { + if (!EntrezValidator.isaValidEntrezId(parts[0])) { ProgressMonitor.logWarning("Skipping gene with invalid entrez gene id '" + parts[1] + "'"); continue; } diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayEntity.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayEntity.java index 2da0ebd2..7da2e983 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayEntity.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayEntity.java @@ -40,9 +40,11 @@ import java.io.File; import java.io.FileReader; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import org.cbioportal.model.EntityType; import org.cbioportal.model.GenericEntityProperty; @@ -50,6 +52,7 @@ import org.mskcc.cbio.portal.dao.DaoGenericAssay; import org.mskcc.cbio.portal.dao.DaoGeneticEntity; import org.mskcc.cbio.portal.model.GeneticAlterationType; +import org.mskcc.cbio.portal.util.FileUtil; import org.mskcc.cbio.portal.util.ProgressMonitor; import joptsimple.OptionParser; @@ -160,7 +163,6 @@ public static void startImport(OptionSet options, OptionSpec data, Optio * @throws Exception */ public static void importData(File dataFile, GeneticAlterationType geneticAlterationType, String additionalProperties, boolean updateInfo) throws Exception { - ProgressMonitor.setCurrentMessage("Reading data from: " + dataFile.getCanonicalPath()); // read generic assay data file @@ -186,6 +188,10 @@ public static void importData(File dataFile, GeneticAlterationType geneticAltera currentLine = buf.readLine(); while (currentLine != null) { + if (!FileUtil.isInfoLine(currentLine)) { + currentLine = buf.readLine(); + continue; + } String[] parts = currentLine.split("\t"); diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayPatientLevelData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayPatientLevelData.java index a7dda2ca..dddcc156 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayPatientLevelData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportGenericAssayPatientLevelData.java @@ -79,7 +79,8 @@ public ImportGenericAssayPatientLevelData(File dataFile, String targetLine, int * @throws IOException IO Error. * @throws DaoException Database Error. */ - public void importData(int numLines) throws IOException, DaoException { + public void importData() throws IOException, DaoException { + int numLines = FileUtil.getNumLines(dataFile); geneticProfile = DaoGeneticProfile.getGeneticProfileById(geneticProfileId); diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java index a0ffe297..0e1ff058 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportProfileData.java @@ -54,12 +54,8 @@ public class ImportProfileData extends ConsoleRunnable { public void run() { DaoGeneOptimized daoGene; DaoGeneticAlteration daoGeneticAlteration; - try { - daoGene = DaoGeneOptimized.getInstance(); - daoGeneticAlteration = DaoGeneticAlteration.getInstance(); - } catch (DaoException e) { - throw new RuntimeException("Could not create dao instances", e); - } + daoGene = DaoGeneOptimized.getInstance(); + daoGeneticAlteration = DaoGeneticAlteration.getInstance(); try { // Parse arguments @@ -87,13 +83,11 @@ public void run() { } // Print profile report - int numLines = FileUtil.getNumLines(dataFile); ProgressMonitor.setCurrentMessage( " --> profile id: " + geneticProfile.getGeneticProfileId() + "\n --> profile name: " + geneticProfile.getProfileName() + "\n --> genetic alteration type: " + geneticProfile.getGeneticAlterationType().name()); - ProgressMonitor.setMaxValue(numLines); - + // Check genetic alteration type if (geneticProfile.getGeneticAlterationType() == GeneticAlterationType.MUTATION_EXTENDED || geneticProfile.getGeneticAlterationType() == GeneticAlterationType.MUTATION_UNCALLED) { @@ -122,8 +116,11 @@ public void run() { // use a different importer for patient level data String patientLevel = geneticProfile.getOtherMetaDataField("patient_level"); if (patientLevel != null && patientLevel.trim().toLowerCase().equals("true")) { + if (overwriteExisting) { + throw new UnsupportedOperationException("Incremental upload for generic assay patient_level data is not supported. Please use sample level instead."); + } ImportGenericAssayPatientLevelData genericAssayProfileImporter = new ImportGenericAssayPatientLevelData(dataFile, geneticProfile.getTargetLine(), geneticProfile.getGeneticProfileId(), genePanel, geneticProfile.getOtherMetaDataField("generic_entity_meta_properties")); - genericAssayProfileImporter.importData(numLines); + genericAssayProfileImporter.importData(); } else { // use ImportTabDelimData importer for non-patient level data ImportTabDelimData genericAssayProfileImporter = new ImportTabDelimData( @@ -132,9 +129,10 @@ public void run() { geneticProfile.getGeneticProfileId(), genePanel, geneticProfile.getOtherMetaDataField("generic_entity_meta_properties"), - daoGeneticAlteration + overwriteExisting, + daoGeneticAlteration, daoGene ); - genericAssayProfileImporter.importData(numLines); + genericAssayProfileImporter.importData(); } } else if( geneticProfile.getGeneticAlterationType() == GeneticAlterationType.COPY_NUMBER_ALTERATION @@ -156,13 +154,14 @@ public void run() { geneticProfile.getTargetLine(), geneticProfile.getGeneticProfileId(), genePanel, - daoGeneticAlteration + overwriteExisting, + daoGeneticAlteration, daoGene ); String pdAnnotationsFilename = geneticProfile.getOtherMetaDataField("pd_annotations_filename"); if (pdAnnotationsFilename != null && !"".equals(pdAnnotationsFilename)) { importer.setPdAnnotationsFile(new File(dataFile.getParent(), pdAnnotationsFilename)); } - importer.importData(numLines); + importer.importData(); } } catch (Exception e) { diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportResourceData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportResourceData.java index d04124ba..147d59d9 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportResourceData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportResourceData.java @@ -7,7 +7,6 @@ import java.io.*; import joptsimple.*; import java.util.*; -import java.util.regex.*; import java.util.stream.Collectors; import org.apache.commons.collections4.map.MultiKeyMap; diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java index b984abf4..cb613b08 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTabDelimData.java @@ -53,11 +53,8 @@ */ public class ImportTabDelimData { public static final String CNA_VALUE_AMPLIFICATION = "2"; - public static final String CNA_VALUE_GAIN = "1"; - public static final String CNA_VALUE_HEMIZYGOUS_DELETION = "-1"; public static final String CNA_VALUE_HOMOZYGOUS_DELETION = "-2"; public static final String CNA_VALUE_PARTIAL_DELETION = "-1.5"; - public static final String CNA_VALUE_ZERO = "0"; private HashSet importedGeneticEntitySet = new HashSet<>(); private File dataFile; private String targetLine; @@ -72,6 +69,16 @@ public class ImportTabDelimData { private Map, Map> pdAnnotations; private final GeneticAlterationImporter geneticAlterationImporter; + private int numLines; + private DaoGeneticAlteration daoGeneticAlteration; + + private DaoGeneOptimized daoGene; + + private boolean updateMode; + private HashMap> geneticAlterationMap; + private ArrayList orderedImportedSampleList; + private ArrayList orderedSampleList; + /** * Constructor. * @@ -81,7 +88,8 @@ public class ImportTabDelimData { * @param geneticProfileId GeneticProfile ID. * @param genePanel GenePanel * @param genericEntityProperties Generic Assay Entities. - * + * @param updateMode if true, update/append data to the existing one + * * @deprecated : TODO shall we deprecate this feature (i.e. the targetLine)? */ public ImportTabDelimData( @@ -90,17 +98,12 @@ public ImportTabDelimData( int geneticProfileId, String genePanel, String genericEntityProperties, - DaoGeneticAlteration daoGeneticAlteration + boolean updateMode, + DaoGeneticAlteration daoGeneticAlteration, + DaoGeneOptimized daoGene ) { - this.dataFile = dataFile; - this.targetLine = targetLine; - this.geneticProfileId = geneticProfileId; - this.genePanel = genePanel; - this.genericEntityProperties = genericEntityProperties; - this.geneticAlterationImporter = new GeneticAlterationImporter( - geneticProfileId, - daoGeneticAlteration - ); + this(dataFile, targetLine, geneticProfileId, genePanel, updateMode, daoGeneticAlteration, daoGene); + this.genericEntityProperties = genericEntityProperties; } /** @@ -110,7 +113,8 @@ public ImportTabDelimData( * @param targetLine The line we want to import. * If null, all lines are imported. * @param geneticProfileId GeneticProfile ID. - * + * @param updateMode if true, update/append data to the existing one + * * @deprecated : TODO shall we deprecate this feature (i.e. the targetLine)? */ public ImportTabDelimData( @@ -118,18 +122,18 @@ public ImportTabDelimData( String targetLine, int geneticProfileId, String genePanel, - DaoGeneticAlteration daoGeneticAlteration + boolean updateMode, + DaoGeneticAlteration daoGeneticAlteration, + DaoGeneOptimized daoGene ) { - this.dataFile = dataFile; + this(dataFile, geneticProfileId, genePanel, updateMode, daoGeneticAlteration, daoGene); this.targetLine = targetLine; - this.geneticProfileId = geneticProfileId; - this.genePanel = genePanel; - this.geneticAlterationImporter = new GeneticAlterationImporter(geneticProfileId, daoGeneticAlteration); } /** * Constructor. * + * @param updateMode if true, update/append data to the existing one * @param dataFile Data File containing Copy Number Alteration, MRNA Expression Data, or protein RPPA data * @param geneticProfileId GeneticProfile ID. */ @@ -137,28 +141,54 @@ public ImportTabDelimData( File dataFile, int geneticProfileId, String genePanel, - DaoGeneticAlteration daoGeneticAlteration + boolean updateMode, + DaoGeneticAlteration daoGeneticAlteration, + DaoGeneOptimized daoGene ) { this.dataFile = dataFile; this.geneticProfileId = geneticProfileId; this.genePanel = genePanel; + this.updateMode = updateMode; + this.daoGeneticAlteration = daoGeneticAlteration; this.geneticAlterationImporter = new GeneticAlterationImporter(geneticProfileId, daoGeneticAlteration); + this.daoGene = daoGene; + this.geneticProfile = DaoGeneticProfile.getGeneticProfileById(geneticProfileId); + if (this.updateMode + && geneticProfile != null + && this.geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENESET_SCORE) { + throw new UnsupportedOperationException("Incremental upload of geneset scores is not supported."); + } } /** * Import the Copy Number Alteration, mRNA Expression, protein RPPA, GSVA or generic_assay data * - * @throws IOException IO Error. - * @throws DaoException Database Error. */ - public void importData(int numLines) throws IOException, DaoException { - - geneticProfile = DaoGeneticProfile.getGeneticProfileById(geneticProfileId); - + public void importData() { + JdbcUtil.getTransactionTemplate().execute(status -> { + try { + doImportData(); + } catch (Throwable e) { + status.setRollbackOnly(); + throw new RuntimeException(e); + } + return null; + }); + } + private void doImportData() throws IOException, DaoException { + try { + this.numLines = FileUtil.getNumLines(dataFile); + } catch (IOException e) { + throw new RuntimeException(e); + } + if (updateMode) { + geneticAlterationMap = daoGeneticAlteration.getGeneticAlterationMapForEntityIds(geneticProfile.getGeneticProfileId(), null); + } + ProgressMonitor.setMaxValue(numLines); FileReader reader = new FileReader(dataFile); BufferedReader buf = new BufferedReader(reader); String headerLine = buf.readLine(); - String parts[] = headerLine.split("\t"); + String headerParts[] = headerLine.split("\t"); //Whether data regards CNA or RPPA: boolean isDiscretizedCnaProfile = geneticProfile != null @@ -166,23 +196,23 @@ public void importData(int numLines) throws IOException, DaoException { && geneticProfile.showProfileInAnalysisTab(); boolean isRppaProfile = geneticProfile != null && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.PROTEIN_LEVEL - && "Composite.Element.Ref".equalsIgnoreCase(parts[0]); + && "Composite.Element.Ref".equalsIgnoreCase(headerParts[0]); boolean isGsvaProfile = geneticProfile != null && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENESET_SCORE - && parts[0].equalsIgnoreCase("geneset_id"); + && headerParts[0].equalsIgnoreCase("geneset_id"); boolean isGenericAssayProfile = geneticProfile != null && geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENERIC_ASSAY - && parts[0].equalsIgnoreCase("ENTITY_STABLE_ID"); + && headerParts[0].equalsIgnoreCase("ENTITY_STABLE_ID"); int numRecordsToAdd = 0; int samplesSkipped = 0; try { - int hugoSymbolIndex = getHugoSymbolIndex(parts); - int entrezGeneIdIndex = getEntrezGeneIdIndex(parts); - int rppaGeneRefIndex = getRppaGeneRefIndex(parts); - int genesetIdIndex = getGenesetIdIndex(parts); - int sampleStartIndex = getStartIndex(parts, hugoSymbolIndex, entrezGeneIdIndex, rppaGeneRefIndex, genesetIdIndex); - int genericAssayIdIndex = getGenericAssayIdIndex(parts); + int hugoSymbolIndex = getHugoSymbolIndex(headerParts); + int entrezGeneIdIndex = getEntrezGeneIdIndex(headerParts); + int rppaGeneRefIndex = getRppaGeneRefIndex(headerParts); + int genesetIdIndex = getGenesetIdIndex(headerParts); + int sampleStartIndex = getStartIndex(headerParts, hugoSymbolIndex, entrezGeneIdIndex, rppaGeneRefIndex, genesetIdIndex); + int genericAssayIdIndex = getGenericAssayIdIndex(headerParts); if (isRppaProfile) { if (rppaGeneRefIndex == -1) { throw new RuntimeException("Error: the following column should be present for RPPA data: Composite.Element.Ref"); @@ -200,10 +230,9 @@ public void importData(int numLines) throws IOException, DaoException { } String sampleIds[]; - sampleIds = new String[parts.length - sampleStartIndex]; - System.arraycopy(parts, sampleStartIndex, sampleIds, 0, parts.length - sampleStartIndex); + sampleIds = new String[headerParts.length - sampleStartIndex]; + System.arraycopy(headerParts, sampleStartIndex, sampleIds, 0, headerParts.length - sampleStartIndex); - int nrUnknownSamplesAdded = 0; ProgressMonitor.setCurrentMessage(" --> total number of samples: " + sampleIds.length); Map, Map> pdAnnotationsForStableSampleIds = null; @@ -211,9 +240,9 @@ public void importData(int numLines) throws IOException, DaoException { pdAnnotationsForStableSampleIds = readPdAnnotations(this.pdAnnotationsFile); } // link Samples to the genetic profile - ArrayList orderedSampleList = new ArrayList(); ArrayList filteredSampleIndices = new ArrayList(); this.pdAnnotations = new HashMap<>(); + this.orderedSampleList = new ArrayList<>(); for (int i = 0; i < sampleIds.length; i++) { Sample sample = DaoSample.getSampleByCancerStudyAndSampleId(geneticProfile.getCancerStudyId(), StableIdUtil.getSampleId(sampleIds[i])); @@ -228,10 +257,7 @@ public void importData(int numLines) throws IOException, DaoException { throw new RuntimeException("Unknown sample id '" + StableIdUtil.getSampleId(sampleIds[i]) + "' found in tab-delimited file: " + this.dataFile.getCanonicalPath()); } } - if (!DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId)) { - Integer genePanelID = (genePanel == null) ? null : GeneticProfileUtil.getGenePanelId(genePanel); - DaoSampleProfile.addSampleProfile(sample.getInternalId(), geneticProfileId, genePanelID); - } + ensureSampleGeneticProfile(sample); orderedSampleList.add(sample.getInternalId()); if (pdAnnotationsForStableSampleIds != null) { Set> keys = new HashSet<>(pdAnnotationsForStableSampleIds.keySet()); @@ -248,21 +274,12 @@ public void importData(int numLines) throws IOException, DaoException { if (pdAnnotationsForStableSampleIds != null && !pdAnnotationsForStableSampleIds.keySet().isEmpty()) { ProgressMonitor.logWarning("WARNING: Following pd annotation sample-entrezId pairs newer used in the data file: " + pdAnnotationsForStableSampleIds.keySet()); } - if (nrUnknownSamplesAdded > 0) { - ProgressMonitor.logWarning("WARNING: Number of samples added on the fly because they were missing in clinical data: " + nrUnknownSamplesAdded); - } if (samplesSkipped > 0) { ProgressMonitor.setCurrentMessage(" --> total number of samples skipped (normal samples): " + samplesSkipped); } ProgressMonitor.setCurrentMessage(" --> total number of data lines: " + (numLines - 1)); - DaoGeneticProfileSamples.addGeneticProfileSamples(geneticProfileId, orderedSampleList); - - //Gene cache: - DaoGeneOptimized daoGene = DaoGeneOptimized.getInstance(); - - //Object to insert records in the generic 'genetic_alteration' table: - DaoGeneticAlteration daoGeneticAlteration = DaoGeneticAlteration.getInstance(); + saveOrderedSampleList(); //cache for data found in cna_event' table: Set existingCnaEvents = new HashSet<>(); @@ -277,28 +294,71 @@ public void importData(int numLines) throws IOException, DaoException { genericAssayStableIdToEntityIdMap = GenericAssayMetaUtils.buildGenericAssayStableIdToEntityIdMap(); } - int lenParts = parts.length; + int headerColumns = headerParts.length; String line = buf.readLine(); while (line != null) { + ProgressMonitor.incrementCurValue(); ConsoleUtil.showProgress(); boolean recordAdded = false; - // either parse line as geneset or gene for importing into 'genetic_alteration' table - if (isGsvaProfile) { - recordAdded = parseGenesetLine(line, lenParts, sampleStartIndex, genesetIdIndex, - filteredSampleIndices, daoGeneticAlteration); - } else if (isGenericAssayProfile) { - recordAdded = parseGenericAssayLine(line, lenParts, sampleStartIndex, genericAssayIdIndex, - filteredSampleIndices, daoGeneticAlteration, genericAssayStableIdToEntityIdMap); - } else { - recordAdded = parseLine(line, lenParts, sampleStartIndex, - hugoSymbolIndex, entrezGeneIdIndex, rppaGeneRefIndex, - isRppaProfile, isDiscretizedCnaProfile, - daoGene, - filteredSampleIndices, orderedSampleList, - existingCnaEvents); + if (FileUtil.isInfoLine(line)) { + String[] rowParts = line.split("\t", -1); + + if (rowParts.length > headerColumns && line.split("\t").length > headerColumns) { + ProgressMonitor.logWarning("Ignoring line with more fields (" + rowParts.length + + ") than specified in the headers(" + headerColumns + "): \n" + rowParts[0]); + } else if (rowParts.length < headerColumns) { + ProgressMonitor.logWarning("Ignoring line with less fields (" + rowParts.length + + ") than specified in the headers(" + headerColumns + "): \n" + rowParts[0]); + } else { + String sampleValues[] = ArrayUtils.subarray(rowParts, sampleStartIndex, rowParts.length > headerColumns ? headerColumns : rowParts.length); + + // trim whitespace from values + sampleValues = Stream.of(sampleValues).map(String::trim).toArray(String[]::new); + sampleValues = filterOutNormalValues(filteredSampleIndices, sampleValues); + + // either parse line as geneset or gene for importing into 'genetic_alteration' table + if (isGsvaProfile) { + String genesetId = rowParts[genesetIdIndex]; + recordAdded = saveGenesetLine(sampleValues, genesetId); + } else if (isGenericAssayProfile) { + String genericAssayId = rowParts[genericAssayIdIndex]; + recordAdded = saveGenericAssayLine(sampleValues, genericAssayId, genericAssayStableIdToEntityIdMap); + } else { + String geneSymbol = null; + if (hugoSymbolIndex != -1) { + geneSymbol = rowParts[hugoSymbolIndex]; + } + if (rppaGeneRefIndex != -1) { + geneSymbol = rowParts[rppaGeneRefIndex]; + } + if (geneSymbol != null && geneSymbol.isEmpty()) { + geneSymbol = null; + } + //get entrez + String entrez = null; + if (entrezGeneIdIndex != -1) { + entrez = rowParts[entrezGeneIdIndex]; + } + if (entrez != null && entrez.isEmpty()) { + entrez = null; + } + if (entrez != null && !EntrezValidator.isaValidEntrezId(entrez)) { + ProgressMonitor.logWarning("Ignoring line with invalid Entrez_Id " + entrez); + } else { + String firstCellValue = rowParts[0]; + if (targetLine == null || firstCellValue.equals(targetLine)) { + recordAdded = saveLine(sampleValues, + entrez, geneSymbol, + isRppaProfile, isDiscretizedCnaProfile, + existingCnaEvents); + } + } + } + } + } // increment number of records added or entries skipped @@ -311,6 +371,7 @@ public void importData(int numLines) throws IOException, DaoException { line = buf.readLine(); } + expandRemainingGeneticEntityTabDelimitedRowsWithBlankValue(); if (MySQLbulkLoader.isBulkLoad()) { MySQLbulkLoader.flushAll(); } @@ -332,6 +393,60 @@ public void importData(int numLines) throws IOException, DaoException { } } + private void expandRemainingGeneticEntityTabDelimitedRowsWithBlankValue() { + if (updateMode) { + // Expand remaining genetic entity id rows that were not mentioned in the file + new HashSet<>(geneticAlterationMap.keySet()).forEach(geneticEntityId -> { + try { + String[] values = new String[orderedImportedSampleList.size()]; + Arrays.fill(values, ""); + saveValues(geneticEntityId, values); + } catch (DaoException e) { + throw new RuntimeException(e); + } + }); + } + } + + private void ensureSampleGeneticProfile(Sample sample) throws DaoException { + if (!DaoSampleProfile.sampleExistsInGeneticProfile(sample.getInternalId(), geneticProfileId)) { + Integer genePanelID = (genePanel == null) ? null : GeneticProfileUtil.getGenePanelId(genePanel); + if (updateMode) { + DaoSampleProfile.deleteRecords(List.of(sample.getInternalId()), List.of(geneticProfileId)); + } + DaoSampleProfile.addSampleProfile(sample.getInternalId(), geneticProfileId, genePanelID); + } + } + + private void saveOrderedSampleList() throws DaoException { + if (updateMode) { + ArrayList savedOrderedSampleList = DaoGeneticProfileSamples.getOrderedSampleList(geneticProfileId); + int initialOrderSampleListSize = savedOrderedSampleList.size(); + checkSamplesInDataEqualTo(initialOrderSampleListSize); + // add all new sample ids at the end + ArrayList extendedSampleList = new ArrayList<>(savedOrderedSampleList); + List newSampleIds = orderedSampleList.stream().filter(sampleId -> !savedOrderedSampleList.contains(sampleId)).toList(); + extendedSampleList.addAll(newSampleIds); + orderedImportedSampleList = orderedSampleList; + orderedSampleList = extendedSampleList; + + + DaoGeneticProfileSamples.deleteAllSamplesInGeneticProfile(geneticProfileId); + } + DaoGeneticProfileSamples.addGeneticProfileSamples(geneticProfileId, orderedSampleList); + } + + private void checkSamplesInDataEqualTo(int initialOrderSampleListSize) { + geneticAlterationMap.forEach((geneticEntityId, sampleToValue) -> { + if (sampleToValue.size() != initialOrderSampleListSize) { + throw new IllegalStateException("Number of samples (" + + sampleToValue.size() + ") for genetic entity with id " + + geneticEntityId + " does not match with the number in the inital sample list (" + + initialOrderSampleListSize + ")."); + } + }); + } + private Map, Map> readPdAnnotations(File pdAnnotationsFile) { Map, Map> pdAnnotations = new HashMap<>(); BufferedReader reader; @@ -483,235 +598,119 @@ private Map, Map> readPdAnnotations(File * AMIXED0... * * - * @param line the line from the profile data file to be parsed - * @param nrColumns the number of columns, defined by the header line - * @param sampleStartIndex the index of the first column with a sample name in the header field - * @param hugoSymbolIndex the index of the column Hugo_Symbol - * @param entrezGeneIdIndex the index of the column Entrez_Gene_Id - * @param rppaGeneRefIndex the index of the column Composite.Element.Ref * @param isRppaProfile true if this is an rppa profile (i.e. alteration type is PROTEIN_LEVEL and the first column is Composite.Element.Ref) * @param isDiscretizedCnaProfile true if this is a discretized CNA profile (i.e. alteration type COPY_NUMBER_ALTERATION and showProfileInAnalysisTab is true) - * @param daoGene an instance of DaoGeneOptimized ... for use in resolving gene symbols - * @param orderedSampleList a list of the internal sample ids corresponding to the sample names in the header line * @param existingCnaEvents a collection of CnaEvents, to be added to or updated during parsing of individual lines * @return true if any record was stored in genetic_alteration, else false * @throws DaoException if any DaoException is thrown while using daoGene or daoGeneticAlteration */ - private boolean parseLine(String line, int nrColumns, int sampleStartIndex, - int hugoSymbolIndex, int entrezGeneIdIndex, int rppaGeneRefIndex, - boolean isRppaProfile, boolean isDiscretizedCnaProfile, - DaoGeneOptimized daoGene, - List filteredSampleIndices, List orderedSampleList, - Set existingCnaEvents + private boolean saveLine(String[] values, + String entrez, + String geneSymbol, + boolean isRppaProfile, + boolean isDiscretizedCnaProfile, + Set existingCnaEvents ) throws DaoException { - //TODO: refactor this entire function - split functionality into smaller units / subroutines - boolean recordStored = false; - // Ignore lines starting with # - if (!line.startsWith("#") && line.trim().length() > 0) { - String[] parts = line.split("\t", -1); + if (isRppaProfile && geneSymbol == null) { + ProgressMonitor.logWarning("Ignoring line with no Composite.Element.REF value"); + return false; + } - if (parts.length > nrColumns) { - if (line.split("\t").length > nrColumns) { - ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length - + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]); - return false; - } - } - String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length); - values = filterOutNormalValues(filteredSampleIndices, values); + //If all are empty, skip line: + boolean noGeneSpecified = geneSymbol == null && entrez == null; + if (noGeneSpecified) { + ProgressMonitor.logWarning("Ignoring line with no Hugo_Symbol and no Entrez_Id"); + return false; + } - String geneSymbol = null; - if (hugoSymbolIndex != -1) { - geneSymbol = parts[hugoSymbolIndex]; - } - //RPPA: //TODO - we should split up the RPPA scenario from this code...too many if/else because of this - if (rppaGeneRefIndex != -1) { - geneSymbol = parts[rppaGeneRefIndex]; - } - if (geneSymbol != null && geneSymbol.isEmpty()) { - geneSymbol = null; + if (geneSymbol != null) { + boolean multipleGenesLine = geneSymbol.contains("///"); + if (multipleGenesLine) { + ProgressMonitor.logWarning("Ignoring gene symbol: " + geneSymbol + + " It is separated by ///. This indicates that the line contains information regarding multiple genes, and we cannot currently handle this"); + return false; } - if (isRppaProfile && geneSymbol == null) { - ProgressMonitor.logWarning("Ignoring line with no Composite.Element.REF value"); + boolean unknownGene = geneSymbol.contains("---"); + if (unknownGene) { + ProgressMonitor.logWarning("Ignoring gene symbol: " + geneSymbol + + " It is specified as ---. This indicates that the line contains information regarding an unknown gene, and we cannot currently handle this"); return false; } - //get entrez - String entrez = null; - if (entrezGeneIdIndex != -1) { - entrez = parts[entrezGeneIdIndex]; + } + + List genes; + //If rppa, parse genes from "Composite.Element.REF" column: + if (isRppaProfile) { + genes = parseRPPAGenes(geneSymbol); + } else { + genes = parseGenes(entrez, geneSymbol); + } + + //if genes still null, skip current record + if (genes == null || genes.isEmpty()) { + ProgressMonitor.logWarning("Gene with Entrez_Id " + entrez + " and gene symbol" + geneSymbol +" not found. Record will be skipped for this gene."); + return false; + } + + List genesMatchingAnAlias = Collections.emptyList(); + if (geneSymbol != null) { + genesMatchingAnAlias = daoGene.getGenesForAlias(geneSymbol); + } + + Set microRNAGenes = new HashSet<>(); + Set nonMicroRNAGenes = new HashSet<>(); + Iterator geneIterator = Stream.concat(genes.stream(), genesMatchingAnAlias.stream()).iterator(); + while (geneIterator.hasNext()) { + CanonicalGene g = geneIterator.next(); + if ("miRNA".equals(g.getType())) { + microRNAGenes.add(g); + } else { + nonMicroRNAGenes.add(g); } - if (entrez != null) { - if (entrez.isEmpty()) { - entrez = null; - } - else if (!entrez.matches("[0-9]+")) { - //TODO - would be better to give an exception in some cases, like negative Entrez values - ProgressMonitor.logWarning("Ignoring line with invalid Entrez_Id " + entrez); - return false; + } + if (!microRNAGenes.isEmpty()) { + // for micro rna, duplicate the data + for (CanonicalGene gene : microRNAGenes) { + if (this.saveValues(gene, values, geneSymbol)) { + recordStored = true; } } - - //If all are empty, skip line: - if (geneSymbol == null && entrez == null) { - ProgressMonitor.logWarning("Ignoring line with no Hugo_Symbol and no Entrez_Id"); + if (!recordStored) { + if (nonMicroRNAGenes.isEmpty()) { + // this means that no microRNA records could not be stored + ProgressMonitor.logWarning("Could not store microRNA data"); + } else { + // this case : + // - at least one of the entrez-gene-ids was not a microRNA + // - all of the matched microRNA ids (if any) failed to be imported (presumably already imported on a prior line) + ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene."); + } return false; + } + } else { + // none of the matched genes are type "miRNA" + if (genes.size() == 1) { + // Store all values per gene: + recordStored = this.saveValues(genes.get(0), values, geneSymbol); + //only add extra CNA related records if the step above worked, otherwise skip: + if (recordStored && isDiscretizedCnaProfile) { + if (updateMode) { + DaoCnaEvent.removeSampleCnaEvents(geneticProfileId, orderedImportedSampleList); + } + long entrezGeneId = genes.get(0).getEntrezGeneId(); + CnaUtil.storeCnaEvents(existingCnaEvents, composeCnaEventsToAdd(values, entrezGeneId)); + } } else { - if (geneSymbol != null && (geneSymbol.contains("///") || geneSymbol.contains("---"))) { - // Ignore gene IDs separated by ///. This indicates that - // the line contains information regarding multiple genes, and - // we cannot currently handle this. - // Also, ignore gene IDs that are specified as ---. This indicates - // the line contains information regarding an unknown gene, and - // we cannot currently handle this. - ProgressMonitor.logWarning("Ignoring gene ID: " + geneSymbol); - return false; + if (isRppaProfile) { // for protein data, duplicate the data + recordStored = saveRppaValues(values, recordStored, genes, geneSymbol); } else { - List genes = null; - //If rppa, parse genes from "Composite.Element.REF" column: - if (isRppaProfile) { - genes = parseRPPAGenes(geneSymbol); - if (genes == null) { - //will be null when there is a parse error in this case, so we - //can return here and avoid duplicated messages: - return false; - } - if (genes.isEmpty()) { - String gene = (geneSymbol != null) ? geneSymbol : entrez; - ProgressMonitor.logWarning("Gene not found for: [" + gene - + "]. Ignoring it " - + "and all tab-delimited data associated with it!"); - return false; - } - } else { - //try entrez: - if (entrez != null) { - CanonicalGene gene = daoGene.getGene(Long.parseLong(entrez)); - if (gene != null) { - genes = Arrays.asList(gene); - } - } - //no entrez or could not resolve by entrez, try hugo: - if ((genes == null || genes.isEmpty()) && geneSymbol != null) { - // deal with multiple symbols separate by |, use the first one - int ix = geneSymbol.indexOf("|"); - if (ix > 0) { - geneSymbol = geneSymbol.substring(0, ix); - } - genes = daoGene.getGene(geneSymbol, true); - } - //if genes still null, skip current record - if (genes == null || genes.isEmpty()) { - ProgressMonitor.logWarning("Entrez_Id " + entrez + " not found. Record will be skipped for this gene."); - return false; - } - } - - // If targetLine is specified and does not match the current line, skip the current line. - if (targetLine != null && !(parts[0].equals(targetLine))) { - return false; - } - - List genesMatchingAnAlias = Collections.emptyList(); - if (geneSymbol != null) { - genesMatchingAnAlias = daoGene.getGenesForAlias(geneSymbol); - } - - Set microRNAGenes = new HashSet<>(); - Set nonMicroRNAGenes = new HashSet<>(); - Iterator geneIterator = Stream.concat(genes.stream(), genesMatchingAnAlias.stream()).iterator(); - while (geneIterator.hasNext()) { - CanonicalGene g = geneIterator.next(); - if ("miRNA".equals(g.getType())) { - microRNAGenes.add(g); - } else { - nonMicroRNAGenes.add(g); - } - } - if (!microRNAGenes.isEmpty()) { - // for micro rna, duplicate the data - for (CanonicalGene gene : microRNAGenes) { - if (this.geneticAlterationImporter.store(values, gene, geneSymbol)) { - recordStored = true; - } - } - if (!recordStored) { - if (nonMicroRNAGenes.isEmpty()) { - // this means that no microRNA records could not be stored - ProgressMonitor.logWarning("Could not store microRNA data"); - } else { - // this case : - // - at least one of the entrez-gene-ids was not a microRNA - // - all of the matched microRNA ids (if any) failed to be imported (presumably already imported on a prior line) - ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous (a mixture of microRNA and other types). Record will be skipped for this gene."); - } - return false; - } - } else { - // none of the matched genes are type "miRNA" - if (genes.size() == 1) { - List cnaEventsToAdd = new ArrayList(); - - if (isDiscretizedCnaProfile) { - long entrezGeneId = genes.get(0).getEntrezGeneId(); - for (int i = 0; i < values.length; i++) { - - // temporary solution -- change partial deletion back to full deletion. - if (values[i].equals(CNA_VALUE_PARTIAL_DELETION)) { - values[i] = CNA_VALUE_HOMOZYGOUS_DELETION; - } - if (values[i].equals(CNA_VALUE_AMPLIFICATION) - // || values[i].equals(CNA_VALUE_GAIN) >> skipping GAIN, ZERO, HEMIZYGOUS_DELETION to minimize size of dataset in DB - // || values[i].equals(CNA_VALUE_ZERO) - // || values[i].equals(CNA_VALUE_HEMIZYGOUS_DELETION) - || values[i].equals(CNA_VALUE_HOMOZYGOUS_DELETION) - ) { - Integer sampleId = orderedSampleList.get(i); - CnaEvent cnaEvent = new CnaEvent(sampleId, geneticProfileId, entrezGeneId, Short.parseShort(values[i])); - //delayed add: - AbstractMap.SimpleEntry sampleGenePair = new AbstractMap.SimpleEntry<>(sampleId, entrezGeneId); - Map pdAnnotationDetails = this.pdAnnotations.get(sampleGenePair); - if (pdAnnotationDetails != null) { - cnaEvent.setDriverFilter(pdAnnotationDetails.get("DRIVER_FILTER")); - cnaEvent.setDriverFilterAnnotation(pdAnnotationDetails.get("DRIVER_FILTER_ANNOTATION")); - cnaEvent.setDriverTiersFilter(pdAnnotationDetails.get("DRIVER_TIERS_FILTER")); - cnaEvent.setDriverTiersFilterAnnotation(pdAnnotationDetails.get("DRIVER_TIERS_FILTER_ANNOTATION")); - } - cnaEventsToAdd.add(cnaEvent); - } - } - } - // Store all values per gene: - recordStored = this.geneticAlterationImporter.store(values, genes.get(0), geneSymbol); - //only add extra CNA related records if the step above worked, otherwise skip: - if (recordStored) { - CnaUtil.storeCnaEvents(existingCnaEvents, cnaEventsToAdd); - } - } else { - if (isRppaProfile) { // for protein data, duplicate the data - for (CanonicalGene gene : genes) { - if (this.geneticAlterationImporter.store(values, gene, geneSymbol)) { - recordStored = true; - nrExtraRecords++; - } - } - if (recordStored) { - //skip one, to avoid double counting: - nrExtraRecords--; - } else { - // this means that RPPA could not be stored - ProgressMonitor.logWarning("Could not store RPPA data"); - } - } else { - if (!recordStored) { - // this case : - // - the hugo gene symbol was ambiguous (matched multiple entrez-gene-ids) - ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous. Record will be skipped for this gene."); - } - } - } + if (!recordStored) { + // this case : + // - the hugo gene symbol was ambiguous (matched multiple entrez-gene-ids) + ProgressMonitor.logWarning("Gene symbol " + geneSymbol + " found to be ambiguous. Record will be skipped for this gene."); } } } @@ -719,95 +718,147 @@ else if (!entrez.matches("[0-9]+")) { return recordStored; } - /** - * Parses line for gene set record and stores record in 'genetic_alteration' table. - * @param line - * @param nrColumns - * @param sampleStartIndex - * @param genesetIdIndex - * @param filteredSampleIndices - * @param daoGeneticAlteration - * @return - * @throws DaoException - */ - private boolean parseGenesetLine(String line, int nrColumns, int sampleStartIndex, int genesetIdIndex, - List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration) throws DaoException { - boolean storedRecord = false; - - if (!line.startsWith("#") && line.trim().length() > 0) { - String[] parts = line.split("\t", -1); + private boolean saveValues(CanonicalGene canonicalGene, String[] values, String geneSymbol) throws DaoException { + if (updateMode) { + values = updateValues(canonicalGene.getGeneticEntityId(), values); + if (!geneticAlterationImporter.isImportedAlready(canonicalGene)) { + daoGeneticAlteration.deleteAllRecordsInGeneticProfile(geneticProfile.getGeneticProfileId(), canonicalGene.getGeneticEntityId()); + } + } + return geneticAlterationImporter.store(values, canonicalGene, geneSymbol); + } + private boolean saveValues(int geneticEntityId, String[] values) throws DaoException { + if (updateMode) { + daoGeneticAlteration.deleteAllRecordsInGeneticProfile(geneticProfile.getGeneticProfileId(), geneticEntityId); + values = updateValues(geneticEntityId, values); + } + return daoGeneticAlteration.addGeneticAlterationsForGeneticEntity(geneticProfile.getGeneticProfileId(), geneticEntityId, values) > 0; + } - if (parts.length > nrColumns) { - if (line.split("\t").length > nrColumns) { - ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length - + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]); - return false; + private String[] updateValues(int geneticEntityId, String[] values) { + Map sampleIdToValue = ArrayUtil.zip(orderedImportedSampleList.toArray(new Integer[0]), values); + String[] updatedSampleValues = new String[orderedSampleList.size()]; + for (int i = 0; i < orderedSampleList.size(); i++) { + updatedSampleValues[i] = ""; + int sampleId = orderedSampleList.get(i); + if (geneticAlterationMap.containsKey(geneticEntityId)) { + HashMap savedSampleIdToValue = geneticAlterationMap.get(geneticEntityId); + updatedSampleValues[i] = savedSampleIdToValue.containsKey(sampleId) ? savedSampleIdToValue.remove(sampleId): ""; + if (savedSampleIdToValue.isEmpty()) { + geneticAlterationMap.remove(geneticEntityId); } } + if (sampleIdToValue.containsKey(sampleId)) { + updatedSampleValues[i] = sampleIdToValue.get(sampleId); + } + } + return updatedSampleValues; + } - String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length); + private boolean saveRppaValues(String[] values, boolean recordStored, List genes, String geneSymbol) throws DaoException { + for (CanonicalGene gene : genes) { + if (this.saveValues(gene, values, geneSymbol)) { + recordStored = true; + nrExtraRecords++; + } + } + if (recordStored) { + //skip one, to avoid double counting: + nrExtraRecords--; + } else { + // this means that RPPA could not be stored + ProgressMonitor.logWarning("Could not store RPPA data"); + } + return recordStored; + } + + private List parseGenes(String entrez, String geneSymbol) { + //try entrez: + if (entrez != null) { + CanonicalGene gene = daoGene.getGene(Long.parseLong(entrez)); + if (gene != null) { + return Arrays.asList(gene); + } + } + //no entrez or could not resolve by entrez, try hugo: + if (geneSymbol != null) { + // deal with multiple symbols separate by |, use the first one + int ix = geneSymbol.indexOf("|"); + if (ix > 0) { + geneSymbol = geneSymbol.substring(0, ix); + } + return daoGene.getGene(geneSymbol, true); + } + return List.of(); + } - // trim whitespace from values - values = Stream.of(values).map(String::trim).toArray(String[]::new); - values = filterOutNormalValues(filteredSampleIndices, values); + private List composeCnaEventsToAdd(String[] values, long entrezGeneId) { + if (updateMode) { + values = updateValues((int) entrezGeneId, values); + } + List cnaEventsToAdd = new ArrayList(); + for (int i = 0; i < values.length; i++) { - Geneset geneset = DaoGeneset.getGenesetByExternalId(parts[genesetIdIndex]); - if (geneset != null) { - storedRecord = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, geneset.getGeneticEntityId(), - EntityType.GENESET, geneset.getExternalId()); + // temporary solution -- change partial deletion back to full deletion. + if (values[i].equals(CNA_VALUE_PARTIAL_DELETION)) { + values[i] = CNA_VALUE_HOMOZYGOUS_DELETION; } - else { - ProgressMonitor.logWarning("Geneset " + parts[genesetIdIndex] + " not found in DB. Record will be skipped."); + if (values[i].equals(CNA_VALUE_AMPLIFICATION) + // || values[i].equals(CNA_VALUE_GAIN) >> skipping GAIN, ZERO, HEMIZYGOUS_DELETION to minimize size of dataset in DB + // || values[i].equals(CNA_VALUE_ZERO) + // || values[i].equals(CNA_VALUE_HEMIZYGOUS_DELETION) + || values[i].equals(CNA_VALUE_HOMOZYGOUS_DELETION) + ) { + Integer sampleId = orderedSampleList.get(i); + CnaEvent cnaEvent = new CnaEvent(sampleId, geneticProfileId, entrezGeneId, Short.parseShort(values[i])); + //delayed add: + AbstractMap.SimpleEntry sampleGenePair = new AbstractMap.SimpleEntry<>(sampleId, entrezGeneId); + Map pdAnnotationDetails = this.pdAnnotations.get(sampleGenePair); + if (pdAnnotationDetails != null) { + cnaEvent.setDriverFilter(pdAnnotationDetails.get("DRIVER_FILTER")); + cnaEvent.setDriverFilterAnnotation(pdAnnotationDetails.get("DRIVER_FILTER_ANNOTATION")); + cnaEvent.setDriverTiersFilter(pdAnnotationDetails.get("DRIVER_TIERS_FILTER")); + cnaEvent.setDriverTiersFilterAnnotation(pdAnnotationDetails.get("DRIVER_TIERS_FILTER_ANNOTATION")); + } + cnaEventsToAdd.add(cnaEvent); } } - return storedRecord; + return cnaEventsToAdd; } /** - * Parses line for generic assay profile record and stores record in 'genetic_alteration' table. - * @param line row from the separated-text that contains one or more values on a single sample - * @param nrColumns - * @param sampleStartIndex index of the first sample column - * @param genericAssayIdIndex index of the column that uniquely identifies a sample - * @param filteredSampleIndices - * @param daoGeneticAlteration + * Parses line for gene set record and stores record in 'genetic_alteration' table. + * @param genesetId * @return * @throws DaoException */ + private boolean saveGenesetLine(String[] values, String genesetId) throws DaoException { + boolean storedRecord = false; - private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStartIndex, int genericAssayIdIndex, - List filteredSampleIndices, DaoGeneticAlteration daoGeneticAlteration, Map genericAssayStableIdToEntityIdMap) throws DaoException { - - boolean recordIsStored = false; - - if (!line.startsWith("#") && line.trim().length() > 0) { - String[] parts = line.split("\t", -1); - if (parts.length > nrColumns) { - if (line.split("\t").length > nrColumns) { - ProgressMonitor.logWarning("Ignoring line with more fields (" + parts.length - + ") than specified in the headers(" + nrColumns + "): \n" + parts[0]); - return false; - } - } - - String values[] = (String[]) ArrayUtils.subarray(parts, sampleStartIndex, parts.length > nrColumns ? nrColumns : parts.length); + Geneset geneset = DaoGeneset.getGenesetByExternalId(genesetId); + if (geneset != null) { + storedRecord = storeGeneticEntityGeneticAlterations(values, geneset.getGeneticEntityId(), EntityType.GENESET, geneset.getExternalId()); + } + else { + ProgressMonitor.logWarning("Geneset " + genesetId + " not found in DB. Record will be skipped."); + } + return storedRecord; + } - // trim whitespace from values - values = Stream.of(values).map(String::trim).toArray(String[]::new); - values = filterOutNormalValues(filteredSampleIndices, values); + /** + * Parses line for generic assay profile record and stores record in 'genetic_alteration' table. + */ + private boolean saveGenericAssayLine(String[] values, String genericAssayId, Map genericAssayStableIdToEntityIdMap) { - String stableId = parts[genericAssayIdIndex]; - Integer entityId = genericAssayStableIdToEntityIdMap.getOrDefault(stableId, null); + boolean recordIsStored = false; - if (entityId == null) { - ProgressMonitor.logWarning("Generic Assay entity " + parts[genericAssayIdIndex] + " not found in DB. Record will be skipped."); - } else { - recordIsStored = storeGeneticEntityGeneticAlterations(values, daoGeneticAlteration, entityId, - EntityType.GENERIC_ASSAY, stableId); - } + Integer entityId = genericAssayStableIdToEntityIdMap.getOrDefault(genericAssayId, null); - return recordIsStored; + if (entityId == null) { + ProgressMonitor.logWarning("Generic Assay entity " + genericAssayId + " not found in DB. Record will be skipped."); + } else { + recordIsStored = storeGeneticEntityGeneticAlterations(values, entityId, EntityType.GENERIC_ASSAY, genericAssayId); } return recordIsStored; @@ -816,18 +867,15 @@ private boolean parseGenericAssayLine(String line, int nrColumns, int sampleStar /** * Stores genetic alteration data for a genetic entity. * @param values - * @param daoGeneticAlteration * @param geneticEntityId - internal id for genetic entity * @param geneticEntityType - "GENE", "GENESET", "PHOSPHOPROTEIN" * @param geneticEntityName - hugo symbol for "GENE", external id for "GENESET", phospho gene name for "PHOSPHOPROTEIN" * @return boolean indicating if record was stored successfully or not */ - private boolean storeGeneticEntityGeneticAlterations(String[] values, DaoGeneticAlteration daoGeneticAlteration, - Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) { + private boolean storeGeneticEntityGeneticAlterations(String[] values, Integer geneticEntityId, EntityType geneticEntityType, String geneticEntityName) { try { if (importedGeneticEntitySet.add(geneticEntityId)) { - daoGeneticAlteration.addGeneticAlterationsForGeneticEntity(geneticProfile.getGeneticProfileId(), geneticEntityId, values); - return true; + return saveValues(geneticEntityId, values); } else { ProgressMonitor.logWarning("Data for genetic entity " + geneticEntityName diff --git a/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java b/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java new file mode 100644 index 00000000..3235d33e --- /dev/null +++ b/src/main/java/org/mskcc/cbio/portal/util/ArrayUtil.java @@ -0,0 +1,21 @@ +package org.mskcc.cbio.portal.util; + +import java.util.HashMap; +import java.util.Map; + +public class ArrayUtil { + public static Map zip(K[] keys, V[] values) { + Map map = new HashMap<>(); + + // Check if both arrays have the same length + if (keys.length == values.length) { + for (int i = 0; i < keys.length; i++) { + map.put(keys[i], values[i]); + } + } else { + throw new IllegalArgumentException("Arrays must be of the same length"); + } + return map; + + } +} \ No newline at end of file diff --git a/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java b/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java index 3cc6fd71..de7fe85a 100644 --- a/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java +++ b/src/main/java/org/mskcc/cbio/portal/util/CnaUtil.java @@ -45,6 +45,7 @@ public CnaUtil(String[] headerParts, Set namespaces) { this.namespaceColumnParser = new NamespaceColumnParser(namespaces, headerParts); } + // TODO inc: update public static void storeCnaEvents( Set existingCnaEvents, List cnaEventsToAdd @@ -53,7 +54,7 @@ public static void storeCnaEvents( if (!CNA.AMP.equals(cnaEvent.getAlteration()) && !CNA.HOMDEL.equals(cnaEvent.getAlteration())) { continue; } - + // TODO Clean cnv event // Revert PR https://github.com/cBioPortal/cbioportal-core/pull/1 breaks importer Optional existingCnaEvent = existingCnaEvents .stream() diff --git a/src/main/java/org/mskcc/cbio/portal/util/EntrezValidator.java b/src/main/java/org/mskcc/cbio/portal/util/EntrezValidator.java new file mode 100644 index 00000000..335bfd66 --- /dev/null +++ b/src/main/java/org/mskcc/cbio/portal/util/EntrezValidator.java @@ -0,0 +1,7 @@ +package org.mskcc.cbio.portal.util; + +public class EntrezValidator { + public static boolean isaValidEntrezId(String entrez) { + return entrez.matches("[0-9]+"); + } +} diff --git a/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java b/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java index 744ca565..2e767618 100644 --- a/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java +++ b/src/main/java/org/mskcc/cbio/portal/util/FileUtil.java @@ -43,30 +43,6 @@ * @author Ethan Cerami. */ public class FileUtil { - /** - * BioPAX File Type. - */ - public static final int BIOPAX = 0; - - /** - * PSI_MI File Type. - */ - public static final int PSI_MI = 1; - - /** - * External DBs File Type. - */ - public static final int EXTERNAL_DBS = 2; - - /** - * Identifiers File Type. - */ - public static final int IDENTIFIERS = 3; - - /** - * Unknown File Type. - */ - public static final int UNKNOWN = 4; /** * Gets Number of Lines in Specified File. @@ -77,32 +53,26 @@ public class FileUtil { */ public static int getNumLines(File file) throws IOException { int numLines = 0; - FileReader reader = new FileReader(file); - BufferedReader buffered = new BufferedReader(reader); - String line = buffered.readLine(); - while (line != null) { - if (!line.startsWith("#") && line.trim().length() > 0) { - numLines++; + try (FileReader reader = new FileReader(file); BufferedReader buffered = new BufferedReader(reader)) { + String line = buffered.readLine(); + while (line != null) { + if (isInfoLine(line)) { + numLines++; + } + line = buffered.readLine(); } - line = buffered.readLine(); + return numLines; } - reader.close(); - return numLines; } /** - * Gets Next Line of Input. Filters out Empty Lines and Comments. - * - * @param buf BufferedReader Object. - * @return next line of input. - * @throws IOException Error reading input stream. + * Does line brings any information? + * e.g. blank like and comments do not + * @param line + * @return */ - public static String getNextLine(BufferedReader buf) throws IOException { - String line = buf.readLine(); - while (line != null && (line.trim().length() == 0 - || line.trim().startsWith("#"))) { - line = buf.readLine(); - } - return line; + public static boolean isInfoLine(String line) { + return !line.startsWith("#") && line.trim().length() > 0; } + } \ No newline at end of file diff --git a/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java b/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java index af686a72..ab862756 100644 --- a/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java +++ b/src/main/java/org/mskcc/cbio/portal/util/GeneticProfileReader.java @@ -76,22 +76,25 @@ public static GeneticProfile loadGeneticProfile(File file) throws IOException, D GeneticProfile geneticProfile = loadGeneticProfileFromMeta(file); GeneticProfile existingGeneticProfile = DaoGeneticProfile.getGeneticProfileByStableId(geneticProfile.getStableId()); if (existingGeneticProfile != null) { - if (!existingGeneticProfile.getDatatype().equals("MAF")) { - // the dbms already contains a GeneticProfile with the file's stable_id. This scenario is not supported - // anymore, so throw error telling user to remove existing profile first: - throw new RuntimeException("Error: genetic_profile record found with same Stable ID as the one used in your data: " - + existingGeneticProfile.getStableId() + ". Remove the existing genetic_profile record first."); - } else { - // For mutation data only we can have multiple files with the same genetic_profile. - // There is a constraint in the mutation database table to prevent duplicated data - // If this constraint is hit (mistakenly importing the same maf twice) MySqlBulkLoader will throw an exception - // - // make an object combining the pre-existing profile with the file-specific properties of the current file - GeneticProfile gp = new GeneticProfile(existingGeneticProfile); - gp.setTargetLine(gp.getTargetLine()); - gp.setOtherMetadataFields(gp.getAllOtherMetadataFields()); - return gp; + ProgressMonitor.setCurrentMessage("genetic_profile record found with same Stable ID (" + geneticProfile.getStableId() + + "). Using it instead."); + if (geneticProfile.getGeneticAlterationType() != existingGeneticProfile.getGeneticAlterationType()) { + throw new IllegalStateException("genetic_profile record found with same Stable ID (" + + existingGeneticProfile.getStableId() + ") but different genetic alteration type: " + + existingGeneticProfile.getGeneticProfileId()); } + if (!existingGeneticProfile.getDatatype().equals(geneticProfile.getDatatype())) { + throw new IllegalStateException("genetic_profile record found with same Stable ID (" + + existingGeneticProfile.getStableId() + ") but different data type: " + + existingGeneticProfile.getDatatype()); + } + if (geneticProfile.getCancerStudyId() != existingGeneticProfile.getCancerStudyId()) { + throw new IllegalStateException("genetic_profile record found with same Stable ID (" + + existingGeneticProfile.getStableId() + ") but different cancer study (id=" + + existingGeneticProfile.getCancerStudyId() + ")"); + } + existingGeneticProfile.setOtherMetadataFields(geneticProfile.getAllOtherMetadataFields()); + return existingGeneticProfile; } // For GSVA profiles, we want to create a geneticProfileLink from source_stable_id for: diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java index 8c1afdcc..83e04144 100644 --- a/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/dao/TestDaoGeneticProfile.java @@ -72,7 +72,7 @@ public void setUp() throws DaoException public void testDaoGetAllGeneticProfiles() throws DaoException { ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId); - assertEquals(7, list.size()); + assertEquals(9, list.size()); } @Test @@ -134,12 +134,12 @@ public void testDaoDeleteGeneticProfile() throws DaoException { GeneticProfile geneticProfile = DaoGeneticProfile.getGeneticProfileById(2); - assertEquals(7, DaoGeneticProfile.getCount()); + assertEquals(9, DaoGeneticProfile.getCount()); DaoGeneticProfile.deleteGeneticProfile(geneticProfile); - assertEquals(6, DaoGeneticProfile.getCount()); + assertEquals(8, DaoGeneticProfile.getCount()); ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId); - assertEquals(6, list.size()); + assertEquals(8, list.size()); geneticProfile = list.get(0); assertEquals(studyId, geneticProfile.getCancerStudyId()); assertEquals("mRNA expression (microarray)", geneticProfile.getProfileName()); @@ -155,7 +155,7 @@ public void testDaoUpdateGeneticProfile() throws DaoException { geneticProfile.getGeneticProfileId(), "Updated Name", "Updated Description")); ArrayList list = DaoGeneticProfile.getAllGeneticProfiles(studyId); - assertEquals(7, list.size()); + assertEquals(9, list.size()); geneticProfile = list.get(0); assertEquals(studyId, geneticProfile.getCancerStudyId()); assertEquals("Updated Name", geneticProfile.getProfileName()); diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java new file mode 100644 index 00000000..fdf36995 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/GeneticAlterationsTestHelper.java @@ -0,0 +1,55 @@ +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.jetbrains.annotations.NotNull; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneticEntity; + +import java.util.HashMap; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class GeneticAlterationsTestHelper { + @NotNull + public static Set geneStableIdsToEntityIds(Set beforeStableIds) { + return beforeStableIds.stream().map(stableId -> { + try { + return geneStableIdToEntityId(stableId); + } catch (DaoException e) { + throw new RuntimeException(e); + } + }).collect(Collectors.toSet()); + } + + public static int geneStableIdToEntityId(String stableId) throws DaoException { + return DaoGeneticEntity.getGeneticEntityByStableId(stableId).getId(); + } + + public static void assertPriorDataState(HashMap> beforeResult, Set expectedEntityIds, Set expectedSampleIds) { + assertEquals(expectedEntityIds, beforeResult.keySet()); + beforeResult.forEach((entityId, sampleIdToValue) -> { + assertEquals("Samples for gene with entityId = " + entityId + " have to match expected ones", + expectedSampleIds, beforeResult.get(entityId).keySet()); + }); + } + + public static void assertNoChange(HashMap> beforeResult, + HashMap> afterResult, + Set entityIds, + Set sampleIds) { + entityIds.forEach(entityId -> { + assertTrue("After result is expected to contain entityId=" + entityId, + afterResult.containsKey(entityId)); + sampleIds.forEach(sampleId -> { + assertTrue("Sample_id=" + sampleId + " expected to be found for gene with entityId=" + entityId, + afterResult.get(entityId).containsKey(sampleId)); + assertEquals("The values for sample_id=" + sampleId + + " and entityId=" + entityId + " before and after upload have to match.", + beforeResult.get(entityId).get(sampleId), afterResult.get(entityId).get(sampleId)); + }); + }); + } + +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java new file mode 100644 index 00000000..ad3ebd55 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalCopyNumberAlterationImport.java @@ -0,0 +1,177 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.cbioportal.model.CNA; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCnaEvent; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.model.CnaEvent; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.scripts.ImportProfileData; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState; + +/** + * Tests Incremental Import of PROTEIN_LEVEL Data. + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"}) +@Rollback +@Transactional +public class TestIncrementalCopyNumberAlterationImport { + + /** + * Test incremental upload of COPY_NUMBER_ALTERATION DISCRETE (gistic) + */ + @Test + public void testDiscreteCNA() throws DaoException, IOException { + /** + * Prior checks + */ + // Hugo_Symbol: CDK1 + final long newGeneEntrezId = 983l; + // Gene that is part of the platform, but absent during the incremental upload + // Hugo_Symbol: ATM + final long absentGeneEntrezId = 472l; + final Set noChangeEntrezIds = Set.of(10000l, 207l, 208l, 3265l, 3845l, 4893l, 672l, 673l, 675l); + final Set beforeEntrezIds = new HashSet<>(noChangeEntrezIds); + beforeEntrezIds.add(absentGeneEntrezId); + + // stable_id: TCGA-XX-0800 + final int newSampleId = 15; + // stable_id: TCGA-A1-A0SO + final int updateSampleId = 12; + final Set noChangeSampleIds = Set.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14); + final Set beforeSampleIds = new HashSet<>(noChangeSampleIds); + beforeSampleIds.add(updateSampleId); + + final Set afterSampleIds = new HashSet<>(beforeSampleIds); + afterSampleIds.add(newSampleId); + + GeneticProfile discreteCNAProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_gistic"); + assertNotNull(discreteCNAProfile); + HashMap> beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(discreteCNAProfile.getGeneticProfileId(), null); + assertPriorDataState(beforeResult, beforeEntrezIds, beforeSampleIds); + + List allCnaLevels = Arrays.stream(CNA.values()).map(CNA::getCode).toList(); + Set beforeCnaEventsSampleIds = Set.of(4, 13, 14, updateSampleId); + List beforeSampleCnaEvents = DaoCnaEvent.getCnaEvents(afterSampleIds.stream().toList(), + null, + discreteCNAProfile.getGeneticProfileId(), + allCnaLevels); + Map> beforeSampleIdToSampleCnaEvents = beforeSampleCnaEvents.stream().collect(Collectors.groupingBy(CnaEvent::getSampleId)); + assertEquals(beforeCnaEventsSampleIds, beforeSampleIdToSampleCnaEvents.keySet()); + + File dataFolder = new File("src/test/resources/incremental/copy_number_alteration/"); + File metaFile = new File(dataFolder, "meta_cna_discrete.txt"); + File dataFile = new File(dataFolder, "data_cna_discrete.txt"); + + /** + * Test + */ + new ImportProfileData(new String[] { + "--loadMode", "bulkLoad", + "--meta", metaFile.getAbsolutePath(), + "--data", dataFile.getAbsolutePath(), + "--overwrite-existing", + }).run(); + + /** + * After test assertions + */ + HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(discreteCNAProfile.getGeneticProfileId(), null); + assertEquals("After result should get exactly one new gene", beforeEntrezIds.size() + 1, + afterResult.size()); + afterResult.values() + .forEach(sampleToValue -> + assertEquals("Each gene row has to get one extra sample",beforeSampleIds.size() + 1, sampleToValue.size())); + assertNoChange(beforeResult, afterResult, noChangeEntrezIds, noChangeSampleIds); + assertEquals("-2", afterResult.get(newGeneEntrezId).get(newSampleId)); + assertEquals("2", afterResult.get(newGeneEntrezId).get(updateSampleId)); + assertEquals("", afterResult.get(absentGeneEntrezId).get(newSampleId)); + assertEquals("", afterResult.get(absentGeneEntrezId).get(updateSampleId)); + + List afterSampleCnaEvents = DaoCnaEvent.getCnaEvents(afterSampleIds.stream().toList(), + afterResult.keySet(), + discreteCNAProfile.getGeneticProfileId(), + allCnaLevels); + Map> afterSampleIdToSampleCnaEvents = afterSampleCnaEvents.stream().collect(Collectors.groupingBy(CnaEvent::getSampleId)); + assertEquals("There is only one new sample that has to gain cna events", beforeCnaEventsSampleIds.size() + 1, afterSampleIdToSampleCnaEvents.size()); + beforeCnaEventsSampleIds.forEach(sampleId -> { + if (sampleId == updateSampleId) { + return; + } + Set beforeCnaEvents = beforeSampleIdToSampleCnaEvents.get(sampleId).stream().map(CnaEvent::getEvent).collect(Collectors.toSet()); + Set afterCnaEvents = afterSampleIdToSampleCnaEvents.get(sampleId).stream().map(CnaEvent::getEvent).collect(Collectors.toSet()); + assertEquals("CNA events for sample_id=" + sampleId + " must not change.", beforeCnaEvents, afterCnaEvents); + }); + Map newSampleEntrezGeneIdToCnaAlteration = afterSampleIdToSampleCnaEvents.get(newSampleId).stream() + .map(CnaEvent::getEvent) + .collect(Collectors.toMap( + event -> event.getGene().getEntrezGeneId(), + CnaEvent.Event::getAlteration)); + assertEquals(Map.of( + 208l, CNA.HOMDEL, + 3265l, CNA.AMP, + 4893l, CNA.HOMDEL, + 672l, CNA.AMP, + 673l, CNA.AMP, + 675l, CNA.HOMDEL, + newGeneEntrezId, CNA.HOMDEL + ), + newSampleEntrezGeneIdToCnaAlteration); + Map updatedSampleEntrezGeneIdToCnaAlteration = afterSampleIdToSampleCnaEvents.get(updateSampleId).stream() + .map(CnaEvent::getEvent) + .collect(Collectors.toMap( + event -> event.getGene().getEntrezGeneId(), + CnaEvent.Event::getAlteration)); + assertEquals(Map.of( + 10000l, CNA.HOMDEL, + 207l, CNA.AMP, + 3845l, CNA.AMP, + 673l, CNA.HOMDEL, + newGeneEntrezId, CNA.AMP + ), + updatedSampleEntrezGeneIdToCnaAlteration); + } + +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGenericAssayImporter.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGenericAssayImporter.java new file mode 100644 index 00000000..e0ef8cf5 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGenericAssayImporter.java @@ -0,0 +1,162 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ + +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.dao.DaoGeneticEntity; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.scripts.ImportProfileData; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThrows; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.geneStableIdToEntityId; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.geneStableIdsToEntityIds; + +/** + * Tests Incremental Import of Generic Assay data + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" }) +@Rollback +@Transactional +public class TestIncrementalGenericAssayImporter { + + // stable_id: TCGA-A1-A0SB-01 + final int newSampleId = 1; + // stable_id: TCGA-A1-A0SD-01 + final int updateSampleId = 2; + // stable_id: TCGA-A1-A0SE-01 + final int noChangeSampleId = 3; + final Set beforeSampleIds = Set.of(updateSampleId, noChangeSampleId); + + // Stable id that is part of the platform, but absent during the incremental upload + final String absentStableId = "L-685458"; + final Set noChangeStableIds = Set.of("Erlotinib", "Irinotecan", "Lapatinib"); + final Set beforeStableIds = new HashSet<>(noChangeStableIds); + { beforeStableIds.add(absentStableId); } + + private GeneticProfile ic50Profile; + private HashMap> beforeResult; + + /** + * Test incremental upload of GENERIC_ASSAY + */ + @Test + public void testGenericAssay() throws DaoException { + + File dataFolder = new File("src/test/resources/incremental/generic_assay/"); + File metaFile = new File(dataFolder, "meta_treatment_ic50.txt"); + File dataFile = new File(dataFolder, "data_treatment_ic50.txt"); + + /** + * Test + */ + new ImportProfileData(new String[] { + "--loadMode", "bulkLoad", + "--meta", metaFile.getAbsolutePath(), + "--data", dataFile.getAbsolutePath(), + "--overwrite-existing", + }).run(); + + /** + * After test assertions + */ + HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMapForEntityIds(ic50Profile.getGeneticProfileId(), null); + assertEquals("After result should have +1 amount of entries", beforeResult.size() + 1, afterResult.size()); + afterResult.values() + .forEach(sampleToValue -> + assertEquals("Each gene row has to get one extra sample",beforeSampleIds.size() + 1, sampleToValue.size())); + assertNoChange(beforeResult, afterResult, geneStableIdsToEntityIds(noChangeStableIds), Set.of(noChangeSampleId)); + int erlotinibEntityId = geneStableIdToEntityId("Erlotinib"); + assertEquals(">8", afterResult.get(erlotinibEntityId).get(newSampleId)); + assertEquals("7.5", afterResult.get(erlotinibEntityId).get(updateSampleId)); + int irinotecanEntityId = geneStableIdToEntityId("Irinotecan"); + assertEquals("", afterResult.get(irinotecanEntityId).get(newSampleId)); + assertEquals("0.081", afterResult.get(irinotecanEntityId).get(updateSampleId)); + int absentEntityId = geneStableIdToEntityId(absentStableId); + assertEquals("", afterResult.get(absentEntityId).get(newSampleId)); + assertEquals("", afterResult.get(absentEntityId).get(updateSampleId)); + int lapatinibEntityId = geneStableIdToEntityId("Lapatinib"); + assertEquals("6.2", afterResult.get(lapatinibEntityId).get(newSampleId)); + assertEquals("7.848", afterResult.get(lapatinibEntityId).get(updateSampleId)); + int lbw242EntityId = geneStableIdToEntityId("LBW242"); + assertEquals("0.1", afterResult.get(lbw242EntityId).get(newSampleId)); + assertEquals(">~8", afterResult.get(lbw242EntityId).get(updateSampleId)); + assertNotNull("New generic entity has to be added", DaoGeneticEntity.getGeneticEntityByStableId("LBW242")); + } + + /** + * Test that incremental upload of GENERIC_ASSAY (patient level) is not supported + */ + @Test + public void testGenericAssayPatientLevel() throws DaoException { + + File dataFolder = new File("src/test/resources/incremental/generic_assay/"); + File metaFile = new File(dataFolder, "meta_treatment_ic50_patient_level.txt"); + File dataFile = new File(dataFolder, "data_treatment_ic50_patient_level.txt"); + + /** + * Test + */ + assertThrows("Incremental upload for generic assay patient_level data is not supported. Please use sample level instead.", + RuntimeException.class, () -> { + new ImportProfileData(new String[] { + "--loadMode", "bulkLoad", + "--meta", metaFile.getAbsolutePath(), + "--data", dataFile.getAbsolutePath(), + "--overwrite-existing", + }).run(); + }); + } + + @Before + public void setUp() throws DaoException { + DaoCancerStudy.reCacheAll(); + + ic50Profile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_treatment_ic50"); + assertNotNull(ic50Profile); + + beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMapForEntityIds(ic50Profile.getGeneticProfileId(), null); + Set beforeEntityIds = geneStableIdsToEntityIds(beforeStableIds); + assertPriorDataState(beforeResult, beforeEntityIds, beforeSampleIds); + } + +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGsvaImporter.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGsvaImporter.java new file mode 100644 index 00000000..c629ecb4 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalGsvaImporter.java @@ -0,0 +1,81 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ + +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneOptimized; +import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.dao.DaoGeneticEntity; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.model.GeneticAlterationType; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.scripts.ImportTabDelimData; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThrows; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.geneStableIdToEntityId; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.geneStableIdsToEntityIds; + +/** + * Tests Incremental Import is not supported for GSVA data type + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" }) +@Rollback +@Transactional +public class TestIncrementalGsvaImporter { + @Test + public void testGsvaIsNotSupported() throws DaoException, IOException { + GeneticProfile gsvaProfile = new GeneticProfile(); + gsvaProfile.setCancerStudyId(DaoCancerStudy.getCancerStudyByStableId("study_tcga_pub").getInternalId()); + gsvaProfile.setStableId("gsva_scores"); + gsvaProfile.setDatatype("GENESET_SCORE"); + gsvaProfile.setGeneticAlterationType(GeneticAlterationType.GENESET_SCORE); + gsvaProfile.setProfileName("gsva test platform"); + DaoGeneticProfile.addGeneticProfile(gsvaProfile); + + assertThrows(UnsupportedOperationException.class, () -> + new ImportTabDelimData(File.createTempFile("gsva", "test"), + DaoGeneticProfile.getGeneticProfileByStableId("gsva_scores").getGeneticProfileId(), + null, + true, + DaoGeneticAlteration.getInstance(), + DaoGeneOptimized.getInstance())); + } + +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalMrnaExpressionImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalMrnaExpressionImport.java new file mode 100644 index 00000000..d44ccee5 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalMrnaExpressionImport.java @@ -0,0 +1,119 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.scripts.ImportProfileData; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState; + +/** + * Tests Incremental Import of MRNA_EXPRESSION Data. + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"}) +@Rollback +@Transactional +public class TestIncrementalMrnaExpressionImport { + + /** + * Test incremental upload of MRNA_EXPRESSION + */ + @Test + public void testMrnaExpression() throws DaoException, IOException { + /** + * Prior checks + */ + // Hugo_Symbol: CDK1 + final long newGeneEntrezId = 983l; + // Gene that is part of the platform, but absent during the incremental upload + // Hugo_Symbol: ARAF + final long absentGeneEntrezId = 369l; + final Set noChangeEntrezIds = Set.of(10000l, 207l, 208l, 3265l, 3845l, 472l, 4893l, 672l, 673l, 675l); + final Set beforeEntrezIds = new HashSet<>(noChangeEntrezIds); + beforeEntrezIds.add(absentGeneEntrezId); + + // stable_id: TCGA-A1-A0SB-01 + final int newSampleId = 1; + // stable_id: TCGA-A1-A0SD-01 + final int updateSampleId = 2; + final Set noChangeSampleIds = Set.of(3, 6, 8, 9, 10, 12, 13); + final Set beforeSampleIds = new HashSet<>(noChangeSampleIds); + beforeSampleIds.add(updateSampleId); + + GeneticProfile mrnaProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_mrna"); + assertNotNull(mrnaProfile); + + HashMap> beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(mrnaProfile.getGeneticProfileId(), null); + assertPriorDataState(beforeResult, beforeEntrezIds, beforeSampleIds); + + File dataFolder = new File("src/test/resources/incremental/mrna_expression/"); + File metaFile = new File(dataFolder, "meta_expression_Zscores.txt"); + File dataFile = new File(dataFolder, "data_expression_Zscores.txt"); + + /** + * Test + */ + new ImportProfileData(new String[] { + "--loadMode", "bulkLoad", + "--meta", metaFile.getAbsolutePath(), + "--data", dataFile.getAbsolutePath(), + "--overwrite-existing", + }).run(); + + /** + * After test assertions + */ + HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(mrnaProfile.getGeneticProfileId(), null); + assertEquals("After result should get exactly one new gene", beforeEntrezIds.size() + 1, + afterResult.size()); + afterResult.values() + .forEach(sampleToValue -> + assertEquals("Each gene row has to get one extra sample", beforeSampleIds.size() + 1, sampleToValue.size())); + assertNoChange(beforeResult, afterResult, noChangeEntrezIds, noChangeSampleIds); + HashMap newGeneRow = afterResult.get(newGeneEntrezId); + assertEquals("-0.1735", newGeneRow.get(newSampleId)); + assertEquals("-0.6412", newGeneRow.get(updateSampleId)); + HashMap absentGeneRow = afterResult.get(absentGeneEntrezId); + assertEquals("", absentGeneRow.get(newSampleId)); + assertEquals("", absentGeneRow.get(updateSampleId)); + } +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalProteinLevelImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalProteinLevelImport.java new file mode 100644 index 00000000..f3933b27 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalProteinLevelImport.java @@ -0,0 +1,122 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.scripts.ImportProfileData; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertNoChange; +import static org.mskcc.cbio.portal.integrationTest.incremental.GeneticAlterationsTestHelper.assertPriorDataState; + +/** + * Tests Incremental Import of PROTEIN_LEVEL Data. + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = {"classpath:/applicationContext-dao.xml"}) +@Rollback +@Transactional +public class TestIncrementalProteinLevelImport { + + /** + * Test incremental upload of PROTEIN_LEVEL + */ + @Test + public void testRppa() throws DaoException { + /** + * Prior checks + */ + // Hugo_Symbol: CDK1 + final long newGeneEntrezId = 983l; + // Gene that is part of the platform, but absent during the incremental upload + // Hugo_Symbol: ARAF + final long absentGeneEntrezId = 369l; + final Set noChangeEntrezIds = Set.of(10000l, 207l, 208l, 3265l, 3845l, 472l, 4893l, 672l, 673l, 675l); + final Set beforeEntrezIds = new HashSet<>(noChangeEntrezIds); + beforeEntrezIds.add(absentGeneEntrezId); + + // stable_id: TCGA-A1-A0SB-01 + final int newSampleId = 1; + // stable_id: TCGA-A1-A0SD-01 + final int updateSampleId = 2; + final Set noChangeSampleIds = Set.of(3, 6, 8, 9, 10, 12, 13); + final Set beforeSampleIds = new HashSet<>(noChangeSampleIds); + beforeSampleIds.add(updateSampleId); + + GeneticProfile rppaProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_rppa"); + assertNotNull(rppaProfile); + + HashMap> beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(rppaProfile.getGeneticProfileId(), null); + assertPriorDataState(beforeResult, beforeEntrezIds, beforeSampleIds); + + File dataFolder = new File("src/test/resources/incremental/protein_level/"); + File metaFile = new File(dataFolder, "meta_rppa.txt"); + File dataFile = new File(dataFolder, "data_rppa.txt"); + + /** + * Test + */ + new ImportProfileData(new String[] { + "--loadMode", "bulkLoad", + "--meta", metaFile.getAbsolutePath(), + "--data", dataFile.getAbsolutePath(), + "--overwrite-existing", + }).run(); + + /** + * After test assertions + */ + HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(rppaProfile.getGeneticProfileId(), null); + assertEquals("After result should get exactly one new gene", beforeEntrezIds.size() + 1, + afterResult.size()); + afterResult.values() + .forEach(sampleToValue -> + assertEquals("Each gene row has to get one extra sample",beforeSampleIds.size() + 1, sampleToValue.size())); + assertNoChange(beforeResult, afterResult, noChangeEntrezIds, noChangeSampleIds); + assertEquals("-0.141047088398489", afterResult.get(newGeneEntrezId).get(newSampleId)); + assertEquals("1.61253243564957", afterResult.get(newGeneEntrezId).get(updateSampleId)); + assertEquals("", afterResult.get(absentGeneEntrezId).get(newSampleId)); + assertEquals("", afterResult.get(absentGeneEntrezId).get(updateSampleId)); + } + + @Before + public void setUp() throws DaoException { + DaoCancerStudy.reCacheAll(); + } + +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTabDelimDataTransaction.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTabDelimDataTransaction.java new file mode 100644 index 00000000..f149d959 --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTabDelimDataTransaction.java @@ -0,0 +1,119 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ + +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.cbioportal.model.CNA; +import org.jetbrains.annotations.NotNull; +import org.junit.Before; +import org.junit.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.runner.RunWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoCnaEvent; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoGeneOptimized; +import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; +import org.mskcc.cbio.portal.dao.DaoGeneticEntity; +import org.mskcc.cbio.portal.dao.DaoGeneticProfile; +import org.mskcc.cbio.portal.model.CnaEvent; +import org.mskcc.cbio.portal.model.GeneticAlterationType; +import org.mskcc.cbio.portal.model.GeneticProfile; +import org.mskcc.cbio.portal.scripts.ImportTabDelimData; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Propagation; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; + +/** + * Tests Transaction for Incremental Import of Tab Delimited Data. + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" }) +public class TestIncrementalTabDelimDataTransaction { + + /** + * Test transaction + */ + @Test + @ExtendWith(MockitoExtension.class) + //Mysql does not support nested transactions. That's why we disable the outer transaction. + @Transactional(propagation = Propagation.NOT_SUPPORTED) + public void testTransaction() throws Exception { + GeneticProfile mrnaProfile = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_mrna"); + + File dataFolder = new File("src/test/resources/incremental/tab_delim_data/"); + File dataFile = new File(dataFolder, "data_expression_Zscores.txt"); + + HashMap> beforeResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(mrnaProfile.getGeneticProfileId(), null); + + DaoGeneticAlteration mockedDao = mock(DaoGeneticAlteration.class); + + doNothing().doNothing().doThrow(new DaoException("Simulated dao error")) + .when(mockedDao).deleteAllRecordsInGeneticProfile(anyLong(), anyLong()); + /** + * Test + */ + try { + new ImportTabDelimData(dataFile, + mrnaProfile.getGeneticProfileId(), + null, + true, + mockedDao, + DaoGeneOptimized.getInstance()).importData(); + fail("Import has to fail"); + } catch (RuntimeException runtimeException) { + assertTrue(true); + } + + /** + * After test assertions + */ + HashMap> afterResult = DaoGeneticAlteration.getInstance().getGeneticAlterationMap(mrnaProfile.getGeneticProfileId(), null); + assertEquals(beforeResult, afterResult); + } + + @Before + public void setUp() throws DaoException { + DaoCancerStudy.reCacheAll(); + } +} diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java index d317aa03..916a16cd 100644 --- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportCnaDiscreteLongData.java @@ -180,7 +180,7 @@ public void testImportCnaDiscreteLongDataAddsCnaEvents() throws Exception { @Test public void testImportCnaDiscreteLongDataAddsGeneticAlterations() throws Exception { List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(beforeGeneticAlterations.size(), 42); + assertEquals(57, beforeGeneticAlterations.size()); File file = new File("src/test/resources/data_cna_discrete_import_test.txt"); new ImportCnaDiscreteLongData( @@ -205,7 +205,7 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterations() throws Excepti @Test public void testImportCnaDiscreteLongDataAddsMissingGeneticAlterations() throws Exception { List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(beforeGeneticAlterations.size(), 42); + assertEquals(57, beforeGeneticAlterations.size()); File file = new File("src/test/resources/data_cna_discrete_import_test_with_cna_events_missing.txt"); new ImportCnaDiscreteLongData( @@ -233,7 +233,7 @@ public void testImportCnaDiscreteLongDataAddsMissingGeneticAlterations() throws @Test public void testImportCnaDiscreteLongDataAddsGeneticAlterationsAndProfileSamplesInCorrectOrder() throws Exception { List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(beforeGeneticAlterations.size(), 42); + assertEquals(57, beforeGeneticAlterations.size()); File file = new File("src/test/resources/data_cna_discrete_import_test.txt"); new ImportCnaDiscreteLongData( @@ -260,7 +260,7 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterationsAndProfileSamples @Test public void testImportCnaDiscreteLongDataHandlesEntriesWithoutEntrezButWithHugo() throws Exception { List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(beforeGeneticAlterations.size(), 42); + assertEquals(57, beforeGeneticAlterations.size()); File file = new File("src/test/resources/data_cna_discrete_import_test_without_entrez_with_hugo.txt"); new ImportCnaDiscreteLongData( @@ -283,7 +283,7 @@ public void testImportCnaDiscreteLongDataHandlesEntriesWithoutEntrezButWithHugo( @Test public void testImportCnaDiscreteLongDataHandlesEntriesWithWrongEntrezAndCorrectHugo() throws Exception { List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(beforeGeneticAlterations.size(), 42); + assertEquals(57, beforeGeneticAlterations.size()); File file = new File("src/test/resources/data_cna_discrete_import_test_with_wrong_entrez_and_correct_hugo.txt"); new ImportCnaDiscreteLongData( @@ -306,7 +306,7 @@ public void testImportCnaDiscreteLongDataHandlesEntriesWithWrongEntrezAndCorrect @Test public void testImportCnaDiscreteLongDataAddsGeneticAlterationsFromNonCnaEvents() throws Exception { List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(beforeGeneticAlterations.size(), 42); + assertEquals(57, beforeGeneticAlterations.size()); File file = new File("src/test/resources/data_cna_discrete_import_test.txt"); new ImportCnaDiscreteLongData( @@ -334,7 +334,7 @@ public void testImportCnaDiscreteLongDataAddsGeneticAlterationsFromNonCnaEvents( @Test public void testImportCnaDiscreteLongDataIgnoresLineWithDuplicateGene() throws Exception { List beforeGeneticAlterations = getAllGeneticAlterations(); - assertEquals(beforeGeneticAlterations.size(), 42); + assertEquals(57, beforeGeneticAlterations.size()); File file = new File("src/test/resources/data_cna_discrete_import_test.txt"); new ImportCnaDiscreteLongData( diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayData.java index a0a33c6d..fa7e0449 100644 --- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayData.java +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayData.java @@ -95,10 +95,11 @@ public void testImportGenericAssayData() throws Exception { // Open mutational signature test data file File file = new File("src/test/resources/data_mutational_signature.txt"); - + int numRecordsForGenericAssayBefore = getNumRecordsForGenericAssay(); + // import data and test all mutational signatures were added ImportGenericAssayEntity.importData(file, GeneticAlterationType.GENERIC_ASSAY, "name,description", false); - assertEquals(60, getNumRecordsForGenericAssay()); + assertEquals(numRecordsForGenericAssayBefore + 60, getNumRecordsForGenericAssay()); // test wether a record can be retrieved via stable id GenericAssayMeta genericAssayMeta1 = DaoGenericAssay.getGenericAssayMetaByStableId("mean_1"); diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayPatientLevelData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayPatientLevelData.java index 123715f8..480e9a61 100644 --- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayPatientLevelData.java +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportGenericAssayPatientLevelData.java @@ -53,7 +53,6 @@ import org.mskcc.cbio.portal.model.Patient; import org.mskcc.cbio.portal.model.Sample; import org.mskcc.cbio.portal.scripts.ImportGenericAssayPatientLevelData; -import org.mskcc.cbio.portal.util.FileUtil; import org.mskcc.cbio.portal.util.ProgressMonitor; import org.springframework.test.annotation.Rollback; import org.springframework.test.context.ContextConfiguration; @@ -151,8 +150,7 @@ private void runImportGenericAssayPatientLevelData() throws DaoException, IOExce File file = new File("src/test/resources/tabDelimitedData/data_patient_generic_assay.txt"); ImportGenericAssayPatientLevelData parser = new ImportGenericAssayPatientLevelData(file, null, geneticProfileId, null, "name,description"); - int numLines = FileUtil.getNumLines(file); - parser.importData(numLines); + parser.importData(); HashMap> geneticAlterationMap = daoGeneticAlteration.getGeneticAlterationMapForEntityIds(geneticProfileId, Arrays.asList(geneticEntity1.getId(), geneticEntity2.getId())); diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportTabDelimData.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportTabDelimData.java index 33779cd3..f8bcc335 100644 --- a/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportTabDelimData.java +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/scripts/TestImportTabDelimData.java @@ -38,7 +38,6 @@ import org.mskcc.cbio.portal.dao.DaoCancerStudy; import org.mskcc.cbio.portal.dao.DaoException; import org.mskcc.cbio.portal.dao.DaoGeneOptimized; -import org.mskcc.cbio.portal.dao.DaoGeneset; import org.mskcc.cbio.portal.dao.DaoGeneticAlteration; import org.mskcc.cbio.portal.dao.DaoGeneticProfile; import org.mskcc.cbio.portal.dao.DaoPatient; @@ -48,15 +47,12 @@ import org.mskcc.cbio.portal.model.CancerStudy; import org.mskcc.cbio.portal.model.CanonicalGene; import org.mskcc.cbio.portal.model.CopyNumberStatus; -import org.mskcc.cbio.portal.model.Geneset; import org.mskcc.cbio.portal.model.GeneticAlterationType; import org.mskcc.cbio.portal.model.GeneticProfile; import org.mskcc.cbio.portal.model.Patient; import org.mskcc.cbio.portal.model.Sample; -import org.mskcc.cbio.portal.scripts.ImportGenesetData; import org.mskcc.cbio.portal.scripts.ImportTabDelimData; import org.mskcc.cbio.portal.util.ConsoleUtil; -import org.mskcc.cbio.portal.util.FileUtil; import org.mskcc.cbio.portal.util.ProgressMonitor; import org.springframework.test.annotation.Rollback; import org.springframework.test.context.ContextConfiguration; @@ -171,9 +167,8 @@ private void runImportCnaData() throws DaoException, IOException{ ProgressMonitor.setConsoleMode(false); // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/cna_test.txt"); - ImportTabDelimData parser = new ImportTabDelimData(file, "Barry", geneticProfileId, null, DaoGeneticAlteration.getInstance()); - int numLines = FileUtil.getNumLines(file); - parser.importData(numLines); + ImportTabDelimData parser = new ImportTabDelimData(file, "Barry", geneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance()); + parser.importData(); String value = dao.getGeneticAlteration(geneticProfileId, sample1, 999999207); assertEquals ("0", value); @@ -236,9 +231,8 @@ private void runImportCnaData2() throws DaoException, IOException{ ProgressMonitor.setConsoleMode(false); // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/cna_test2.txt"); - ImportTabDelimData parser = new ImportTabDelimData(file, geneticProfileId, null, DaoGeneticAlteration.getInstance()); - int numLines = FileUtil.getNumLines(file); - parser.importData(numLines); + ImportTabDelimData parser = new ImportTabDelimData(file, geneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance()); + parser.importData(); String value = dao.getGeneticAlteration(geneticProfileId, sample1, 207); assertEquals (value, "0"); @@ -321,9 +315,8 @@ private void runImportRnaData1() throws DaoException, IOException{ // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/mrna_test.txt"); addTestPatientAndSampleRecords(file); - ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, DaoGeneticAlteration.getInstance()); - int numLines = FileUtil.getNumLines(file); - parser.importData(numLines); + ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance()); + parser.importData(); ConsoleUtil.showMessages(); int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "DD639").getInternalId(); @@ -375,9 +368,8 @@ public void testImportmRnaData2() throws Exception { // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/tabDelimitedData/data_expression2.txt"); addTestPatientAndSampleRecords(file); - ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, DaoGeneticAlteration.getInstance()); - int numLines = FileUtil.getNumLines(file); - parser.importData(numLines); + ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance()); + parser.importData(); // check if expected warnings are given: ArrayList warnings = ProgressMonitor.getWarnings(); @@ -468,9 +460,8 @@ public void testImportRppaData() throws Exception { // TBD: change this to use getResourceAsStream() File file = new File("src/test/resources/tabDelimitedData/data_rppa.txt"); addTestPatientAndSampleRecords(file); - ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, DaoGeneticAlteration.getInstance()); - int numLines = FileUtil.getNumLines(file); - parser.importData(numLines); + ImportTabDelimData parser = new ImportTabDelimData(file, newGeneticProfileId, null, false, DaoGeneticAlteration.getInstance(), DaoGeneOptimized.getInstance()); + parser.importData(); ConsoleUtil.showMessages(); int sampleId = DaoSample.getSampleByCancerStudyAndSampleId(studyId, "SAMPLE1").getInternalId(); diff --git a/src/test/resources/incremental/copy_number_alteration/data_cna_discrete.txt b/src/test/resources/incremental/copy_number_alteration/data_cna_discrete.txt new file mode 100644 index 00000000..7664e868 --- /dev/null +++ b/src/test/resources/incremental/copy_number_alteration/data_cna_discrete.txt @@ -0,0 +1,17 @@ +Hugo_Symbol Entrez_Gene_Id TCGA-XX-0800-01 TCGA-AB-CDEF-10-BLOOD_DERIVED_NORMAL TCGA-A1-A0SO-01 +AKT3 10000 0 -2 -2 +AKT1 207 -1 2 2 +# All after the pipe has to be removed +AKT2|TEST 208 -2 2 -1 +HRAS 3265 2 2 0 +KRAS 3845 0 -2 2 +# This gene absent in this file, but it's still part of the profile and has to be updated +#ATM 472 +# This line missing the hugo symbol and the gene has to be detected by entrez id + 4893 -2 -2 -1 +# This line missing the entrez id and the gene has to be detected by hugo symbol +BRCA1 2 2 0 +BRAF 673 2 -2 -2 +BRCA2 675 -1.5 2 0 +# This gene is new! the empty values should be set for the already existing samples in the database +CDK1 983 -2 -2 2 diff --git a/src/test/resources/incremental/copy_number_alteration/data_cna_pd_annotations.txt b/src/test/resources/incremental/copy_number_alteration/data_cna_pd_annotations.txt new file mode 100644 index 00000000..3fbcfc58 --- /dev/null +++ b/src/test/resources/incremental/copy_number_alteration/data_cna_pd_annotations.txt @@ -0,0 +1,7 @@ +SAMPLE_ID Entrez_Gene_Id cbp_driver cbp_driver_annotation cbp_driver_tiers cbp_driver_tiers_annotation +TCGA-A1-A0SO-01 3845 Putative_Passenger Test passenger Class 2 Class annotation +TCGA-A1-A0SO-01 208 Putative_Driver Test driver Class 1 Class annotation +TCGA-A1-A0SO-01 983 Putative_Passenger Test passenger +TCGA-XX-0800-01 3845 Class 2 Class annotation +TCGA-XX-0800-01 208 Class 1 Class annotation +TCGA-XX-0800-01 983 Putative_Driver diff --git a/src/test/resources/incremental/copy_number_alteration/meta_cna_discrete.txt b/src/test/resources/incremental/copy_number_alteration/meta_cna_discrete.txt new file mode 100644 index 00000000..827c31dd --- /dev/null +++ b/src/test/resources/incremental/copy_number_alteration/meta_cna_discrete.txt @@ -0,0 +1,10 @@ +cancer_study_identifier: study_tcga_pub +genetic_alteration_type: COPY_NUMBER_ALTERATION +datatype: DISCRETE +stable_id: gistic +show_profile_in_analysis_tab: true +profile_description: Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification. +profile_name: Putative copy-number alterations from GISTIC +data_filename: data_cna_discrete.txt +pd_annotations_filename: data_cna_pd_annotations.txt +namespaces: CustomNamespace diff --git a/src/test/resources/incremental/generic_assay/data_treatment_ic50.txt b/src/test/resources/incremental/generic_assay/data_treatment_ic50.txt new file mode 100644 index 00000000..79606fbf --- /dev/null +++ b/src/test/resources/incremental/generic_assay/data_treatment_ic50.txt @@ -0,0 +1,8 @@ +ENTITY_STABLE_ID NAME DESCRIPTION URL TCGA-A1-A0SB-01 TCGA-A1-A0SD-01 +Erlotinib Name of Erlotinib Desc of Erlotinib Url of Erlotinib >8 7.5 +Irinotecan Name of Irinotecan Desc of Irinotecan Url of Irinotecan 0.081 +# The database has this entity, but not the file +#L-685458 +Lapatinib Name of Lapatinib Desc of Lapatinib Url of Lapatinib 6.2 7.848 +#The entity will be added +LBW242 Name of LBW242 Desc of LBW242 Url of LBW242 0.1 >~8 diff --git a/src/test/resources/incremental/generic_assay/data_treatment_ic50_patient_level.txt b/src/test/resources/incremental/generic_assay/data_treatment_ic50_patient_level.txt new file mode 100644 index 00000000..34753bba --- /dev/null +++ b/src/test/resources/incremental/generic_assay/data_treatment_ic50_patient_level.txt @@ -0,0 +1,8 @@ +ENTITY_STABLE_ID NAME DESCRIPTION URL TCGA-A1-A0SB TCGA-A1-A0SD +Erlotinib Name of Erlotinib Desc of Erlotinib Url of Erlotinib >8 7.5 +Irinotecan Name of Irinotecan Desc of Irinotecan Url of Irinotecan 0.081 +# The database has this entity, but not the file +#L-685458 +Lapatinib Name of Lapatinib Desc of Lapatinib Url of Lapatinib 6.2 7.848 +#The entity will be added +LBW242 Name of LBW242 Desc of LBW242 Url of LBW242 0.1 >~8 diff --git a/src/test/resources/incremental/generic_assay/meta_treatment_ic50.txt b/src/test/resources/incremental/generic_assay/meta_treatment_ic50.txt new file mode 100644 index 00000000..6ec6cdc5 --- /dev/null +++ b/src/test/resources/incremental/generic_assay/meta_treatment_ic50.txt @@ -0,0 +1,12 @@ +cancer_study_identifier: study_tcga_pub +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: TREATMENT_RESPONSE +datatype: LIMIT-VALUE +stable_id: treatment_ic50 +profile_name: IC50 values of compounds on cellular phenotype readout +profile_description: IC50 (compound concentration resulting in half maximal inhibition) of compounds on cellular phenotype readout of cultured mutant cell lines. +data_filename: data_treatment_ic50.txt +show_profile_in_analysis_tab: true +pivot_threshold_value: 0.1 +value_sort_order: ASC +generic_entity_meta_properties: NAME,DESCRIPTION,URL diff --git a/src/test/resources/incremental/generic_assay/meta_treatment_ic50_patient_level.txt b/src/test/resources/incremental/generic_assay/meta_treatment_ic50_patient_level.txt new file mode 100644 index 00000000..181899f5 --- /dev/null +++ b/src/test/resources/incremental/generic_assay/meta_treatment_ic50_patient_level.txt @@ -0,0 +1,13 @@ +cancer_study_identifier: study_tcga_pub +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: TREATMENT_RESPONSE +datatype: LIMIT-VALUE +stable_id: treatment_ic50 +profile_name: IC50 values of compounds on cellular phenotype readout +profile_description: IC50 (compound concentration resulting in half maximal inhibition) of compounds on cellular phenotype readout of cultured mutant cell lines. +data_filename: data_treatment_ic50_patient_level.txt +show_profile_in_analysis_tab: true +pivot_threshold_value: 0.1 +value_sort_order: ASC +generic_entity_meta_properties: NAME,DESCRIPTION,URL +patient_level: true diff --git a/src/test/resources/incremental/mrna_expression/data_expression_Zscores.txt b/src/test/resources/incremental/mrna_expression/data_expression_Zscores.txt new file mode 100644 index 00000000..dc189cec --- /dev/null +++ b/src/test/resources/incremental/mrna_expression/data_expression_Zscores.txt @@ -0,0 +1,34 @@ +Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-AB-CDEF-10-BLOOD_DERIVED_NORMAL TCGA-A1-A0SD-01 +AKT3 10000 0.6393 0.1 0.5377 +AKT1 207 0.785 0.1 0.0426 +# All after the pipe has to be removed +AKT2|TEST 208 1.0741 0.1 0.718 +HRAS 3265 -0.1735 0.1 -0.6412 +# This gene absent in this file, but it's still part of the profile and has to be updated +#ARAF 369 +KRAS 3845 0.785 0.1 0.0426 +ATM 472 1.0741 0.1 0.718 +# This line missing the hugo symbol and the gene has to be detected by entrez id + 4893 -0.1735 0.1 -0.6412 +# This line missing the entrez id and the gene has to be detected by hugo symbol +BRCA1 0.6393 0.1 0.5377 +BRAF 673 0.785 0.1 0.0426 +# Duplicate lines should be ignored +BRAF 673 0.7851 0.1 0.0427 +# Although this row has 2 extra columns, we are ok with that as they contain blank values +BRCA2 675 1.0741 0.1 0.718 +# This gene is new! the empty values should be set for the already existing samples in the database +CDK1 983 -0.1735 0.1 -0.6412 +# These lines have to be skipped +# One column too much +FGFR3 2261 0.045 0.1 0.675 0.0224575 +# No sample columns +PIEZO1 9780 +# invalid entrez id +P2RY10 -1 0.741 0.1 0.685 +# Multigene sign +/// 369 0.6393 0.1 0.5377 +# Unknown gene sign +--- 3845 0.785 0.1 0.0426 +# Empty gene info + 1.0741 0.1 0.718 diff --git a/src/test/resources/incremental/mrna_expression/meta_expression_Zscores.txt b/src/test/resources/incremental/mrna_expression/meta_expression_Zscores.txt new file mode 100644 index 00000000..e761fed3 --- /dev/null +++ b/src/test/resources/incremental/mrna_expression/meta_expression_Zscores.txt @@ -0,0 +1,8 @@ +cancer_study_identifier: study_tcga_pub +genetic_alteration_type: MRNA_EXPRESSION +datatype: Z-SCORE +stable_id: mrna +profile_description: Expression levels (Agilent microarray). +show_profile_in_analysis_tab: false +profile_name: mRNA expression (microarray) +data_filename: data_expression_Zscores.txt diff --git a/src/test/resources/incremental/protein_level/data_rppa.txt b/src/test/resources/incremental/protein_level/data_rppa.txt new file mode 100644 index 00000000..0953ce99 --- /dev/null +++ b/src/test/resources/incremental/protein_level/data_rppa.txt @@ -0,0 +1,24 @@ +Composite.Element.REF TCGA-A1-A0SB-01 TCGA-A1-A0SD-01 +AKT3|akt3 1.26122710480548 0.037186254715365 +# Multiple gene symbols joined by space +AKT1 AKT2 AKT3|akt1 1.61253243664957 -0.141077088398489 +# All after the pipe has to be removed +AKT2|TEST 5.4424238579025E-05 0.062264661774981 +HRAS|hras 0.37624053370992 0.270399126328659 +# This gene absent in this file, but it's still part of the profile and has to be updated 0.407622077164699 -0.326522823583974 +#ARAF +KRAS|kras -0.335040546938807 0.00730643372831408 +ATM|atm 0.037186254715365 1.26122710480548 +# This line missing the entrez id and the gene has to be detected by hugo symbol 0.062264661774981 5.4424238579025E-05 +BRCA1|brca1 0.270399126328659 0.37624053370992 +BRAF|braf -0.326522823583974 0.407622077164699 +# Duplicate lines should be ignored 0.218650367364756 0.383702820778609 +BRAF|braf 0.00730643372831408 -0.335040546938807 +BRCA2|brca2 -0.141077088398489 1.61253243664957 +# This gene is new! the empty values should be set for the already existing samples in the database +CDK1|cdk1 -0.141047088398489 1.61253243564957 +# These lines have to be skipped +/// -0.335040546938807 0.00730643372831408 +--- 0.037186254715365 1.26122710480548 + 0.064 0.644 +NA|K-Ras 0.062264661774981 5.4424238579025E-05 diff --git a/src/test/resources/incremental/protein_level/meta_rppa.txt b/src/test/resources/incremental/protein_level/meta_rppa.txt new file mode 100644 index 00000000..f6481c7d --- /dev/null +++ b/src/test/resources/incremental/protein_level/meta_rppa.txt @@ -0,0 +1,7 @@ +cancer_study_identifier: study_tcga_pub +genetic_alteration_type: PROTEIN_LEVEL +datatype: LOG2-VALUE +stable_id: rppa +profile_name: Test RPPA +profile_description: Test protein level data +data_filename: data_rppa.txt diff --git a/src/test/resources/seed_mini.sql b/src/test/resources/seed_mini.sql index 3dfd5ff9..552db83e 100644 --- a/src/test/resources/seed_mini.sql +++ b/src/test/resources/seed_mini.sql @@ -199,6 +199,11 @@ INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYP INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE'); SET @max_entity_id = (Select MAX(ID) from genetic_entity); INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE") VALUES (@max_entity_id,2261,'FGFR3','protein-coding'); +-- Generic genetic entities +INSERT INTO "genetic_entity" ("ENTITY_TYPE", "STABLE_ID") VALUES ('GENERIC_ASSAY', 'Erlotinib'); +INSERT INTO "genetic_entity" ("ENTITY_TYPE", "STABLE_ID") VALUES ('GENERIC_ASSAY', 'Irinotecan'); +INSERT INTO "genetic_entity" ("ENTITY_TYPE", "STABLE_ID") VALUES ('GENERIC_ASSAY', 'L-685458'); +INSERT INTO "genetic_entity" ("ENTITY_TYPE", "STABLE_ID") VALUES ('GENERIC_ASSAY', 'Lapatinib'); -- cna_event INSERT INTO "cna_event" ("CNA_EVENT_ID","ENTREZ_GENE_ID","ALTERATION") VALUES (20093,207,-2); @@ -328,6 +333,8 @@ INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (6,'study_tcga_pub_mutations',1,'MUTATION_EXTENDED','MAF','Mutations','Mutation data from whole exome sequencing.','1'); INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (7,'study_tcga_pub_structural_variants',1,'STRUCTURAL_VARIANT','SV','Structural Variants','Structural Variants test data.','1'); INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (8,'study_tcga_pub_cna_long',1,'COPY_NUMBER_ALTERATION','DISCRETE_LONG','CNA values','CNA values dummy data','1'); +INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (9,'study_tcga_pub_rppa',1,'PROTEIN_LEVEL','LOG2-VALUE','RPPA values','RPPA values dummy data','0'); +INSERT INTO "genetic_profile" ("GENETIC_PROFILE_ID","STABLE_ID","CANCER_STUDY_ID","GENETIC_ALTERATION_TYPE","DATATYPE","NAME","DESCRIPTION","SHOW_PROFILE_IN_ANALYSIS_TAB") VALUES (10,'study_tcga_pub_treatment_ic50',1,'GENERIC_ASSAY','LIMIT-VALUE','test treatment values','treatment values dummy data','0'); -- gene_panel INSERT INTO gene_panel (INTERNAL_ID,STABLE_ID,DESCRIPTION) VALUES (1,'TESTPANEL_CNA_DISCRETE_LONG_FORMAT','Some test panel'); @@ -375,12 +382,31 @@ INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALU INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (5,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 672),'0.066638638,'); INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (5,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 673),'0.020369562,'); INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (5,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 675),'0.793930197,'); +-- RPPA +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 10000),'-0.472,1.514,0.145,-0.183,0.913,-0.665,-1.700,0.976,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 207),'-1.102,-0.243,0.018,-0.154,0.330,1.005,0.681,-0.664,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 208),'-1.221,-0.592,-0.176,-0.310,-1.198,-0.670,0.077,-0.302,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 3265),'0.061,-0.055,-0.165,0.517,2.021,0.381,-0.728,0.944,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 369),'-1.129,-0.306,0.180,-0.601,0.166,0.402,0.243,-0.999,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 3845),'0.177,0.404,0.188,0.428,1.676,0.238,0.469,2.161,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 472),'-1.503,-1.925,-1.755,-1.576,-1.029,-1.401,-1.514,-2.074,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 4893),'-1.914,-2.059,-1.228,-1.322,-4.166,-1.187,0.284,-0.130,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 672),'-1.661,-1.392,-1.924,-1.656,-0.361,-1.998,-0.136,-0.709,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 673),'0.233,0.561,-0.106,-0.085,-0.012,0.143,0.141,0.609,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (9,(Select "GENETIC_ENTITY_ID" from "gene" where "ENTREZ_GENE_ID" = 675),'-0.570,-1.340,-1.544,-0.404,0.632,-1.231,0.771,-0.036,'); +-- Generic assay data +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'Erlotinib'),'5.2,>8,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'Irinotecan'),'>8,7.1,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'L-685458'),'>4.6,7.2,'); +INSERT INTO "genetic_alteration" ("GENETIC_PROFILE_ID","GENETIC_ENTITY_ID","VALUES") VALUES (10,(Select "ID" from "genetic_entity" where "STABLE_ID" = 'Lapatinib'),'6.9,>~8,'); -- genetic_profile_samples INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (2,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,'); INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (3,'2,3,6,8,9,10,12,13,'); INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (4,'1,2,3,4,5,6,7,8,9,10,11,12,13,14,'); INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (5,'2,'); +INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (9,'2,3,6,8,9,10,12,13,'); +INSERT INTO "genetic_profile_samples" ("GENETIC_PROFILE_ID","ORDERED_SAMPLE_LIST") VALUES (10,'2,3,'); -- patient INSERT INTO "patient" ("INTERNAL_ID","STABLE_ID","CANCER_STUDY_ID") VALUES (1,'TCGA-A1-A0SB',1); diff --git a/tests/system_tests_import_data.py b/tests/system_tests_import_data.py index 097e6c01..64361571 100755 --- a/tests/system_tests_import_data.py +++ b/tests/system_tests_import_data.py @@ -107,6 +107,16 @@ def test_incremental_load(self, run_java, locate_jar): '--meta', f'{data_directory}/meta_clinical_samples.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_clinical_samples.txt', '--noprogress') mutation_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', '--meta', f'{data_directory}/meta_mutations_extended.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_mutations_extended.maf', '--noprogress') + cna_discrete_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', + '--meta', f'{data_directory}/meta_cna_discrete.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_cna_discrete.txt', '--noprogress') + cna_log2_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', + '--meta', f'{data_directory}/meta_cna_log2.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_cna_log2.txt', '--noprogress') + expression_median_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', + '--meta', f'{data_directory}/meta_expression_median.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_expression_median.txt', '--noprogress') + methylation_hm27_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', + '--meta', f'{data_directory}/meta_methylation_hm27.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_methylation_hm27.txt', '--noprogress') + treatment_ic50_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', + '--meta', f'{data_directory}/meta_treatment_ic50.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_treatment_ic50.txt', '--noprogress') case_list_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.UpdateCaseListsSampleIds', '--meta', f'{data_directory}/meta_clinical_samples.txt', '--case-lists', f'{data_directory}/case_lists') @@ -115,6 +125,11 @@ def test_incremental_load(self, run_java, locate_jar): clinical_patient_call, clinical_sample_call, mutation_call, + cna_discrete_call, + cna_log2_call, + expression_median_call, + methylation_hm27_call, + treatment_ic50_call, case_list_call, ]) diff --git a/tests/test_data/study_es_0_inc/data_cna_discrete.txt b/tests/test_data/study_es_0_inc/data_cna_discrete.txt new file mode 100644 index 00000000..7915f45b --- /dev/null +++ b/tests/test_data/study_es_0_inc/data_cna_discrete.txt @@ -0,0 +1,10 @@ +Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW +ACAP3 116983 0 0 -1 + 375790 -1 -1 0 +ATAD3A 55210 0 0 -2 +ATAD3B 83858 -2 -1 0 +ATAD3C 219293 0 0 0 +#AURKAIP1 54998 +ERCC5 2073 0 -1 -2 +ACP3 55 0 0 0 +TP53 -1 0 -2 diff --git a/tests/test_data/study_es_0_inc/data_cna_log2.txt b/tests/test_data/study_es_0_inc/data_cna_log2.txt new file mode 100644 index 00000000..0eb820a7 --- /dev/null +++ b/tests/test_data/study_es_0_inc/data_cna_log2.txt @@ -0,0 +1,10 @@ +Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW +ACAP3 116983 0.751 0.533 0.114 + 375790 0.062 0.071 0.948 +ATAD3A 55210 0.487 0.695 0.364 +ATAD3B 83858 0.150 0.492 0.300 +ATAD3C 219293 0.995 0.170 0.654 +#AURKAIP1 54998 +ERCC5 2073 0.816 0.514 0.165 +ACP3 55 0.252 0.713 0.513 +TP53 0.360 0.538 0.891 diff --git a/tests/test_data/study_es_0_inc/data_expression_median.txt b/tests/test_data/study_es_0_inc/data_expression_median.txt new file mode 100644 index 00000000..d5c4a9a0 --- /dev/null +++ b/tests/test_data/study_es_0_inc/data_expression_median.txt @@ -0,0 +1,10 @@ +Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW +ACAP3 116983 0.096 0.826 0.032 + 375790 0.309 0.399 0.680 +ATAD3A 55210 0.569 0.189 0.266 +ATAD3B 83858 0.829 0.473 0.611 +ATAD3C 219293 0.307 0.445 0.045 +#AURKAIP1 54998 +ERCC5 2073 0.171 0.766 0.590 +ACP3 55 0.422 0.870 0.745 +TP53 0.179 0.694 0.808 diff --git a/tests/test_data/study_es_0_inc/data_methylation_hm27.txt b/tests/test_data/study_es_0_inc/data_methylation_hm27.txt new file mode 100644 index 00000000..d2c67abc --- /dev/null +++ b/tests/test_data/study_es_0_inc/data_methylation_hm27.txt @@ -0,0 +1,10 @@ +Hugo_Symbol Entrez_Gene_Id TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW +ACAP3 116983 0.022 0.681 0.790 + 375790 0.435 0.340 0.321 +ATAD3A 55210 0.229 0.946 0.439 +ATAD3B 83858 0.885 0.707 0.664 +ATAD3C 219293 0.660 0.315 0.694 +#AURKAIP1 54998 +ERCC5 2073 0.436 0.749 0.345 +ACP3 55 0.622 0.396 0.029 +TP53 0.563 0.686 0.607 diff --git a/tests/test_data/study_es_0_inc/data_treatment_ic50.txt b/tests/test_data/study_es_0_inc/data_treatment_ic50.txt new file mode 100644 index 00000000..2a507cef --- /dev/null +++ b/tests/test_data/study_es_0_inc/data_treatment_ic50.txt @@ -0,0 +1,11 @@ +ENTITY_STABLE_ID NAME DESCRIPTION URL TCGA-A1-A0SB-01 TCGA-A1-A0SB-03 TCGA-BH-NEW +17-AAG Name of 17-AAG Desc of 17-AAG Url of 17-AAG 0.315 0.329701692 0.053038094 +AEW541 Name of AEW541 Desc of AEW541 Url of AEW541 >8 2.353 2.68212986 +AZD0530 Name of AZD0530 Desc of AZD0530 Url of AZD0530 0.234 >8 4.597949505 +AZD6244 Name of AZD6244 Desc of AZD6244 Url of AZD6244 >8 >8 >8 +Erlotinib Name of Erlotinib Desc of Erlotinib Url of Erlotinib >8 >8 >8 +Irinotecan Name of Irinotecan Desc of Irinotecan Url of Irinotecan NA 0.083 NA +L-685458 Name of L-685458 Desc of L-685458 Url of L-685458 >8 >8 3.267752409 +#Lapatinib Name of Lapatinib Desc of Lapatinib Url of Lapatinib +LBW242 Name of LBW242 Desc of LBW242 Url of LBW242 NA >8 >8 +Nilotinib Name of Nilotinib Desc of Nilotinib Url of Nilotinib >8 >8 NA diff --git a/tests/test_data/study_es_0_inc/meta_cna_discrete.txt b/tests/test_data/study_es_0_inc/meta_cna_discrete.txt new file mode 100644 index 00000000..f6ea8bea --- /dev/null +++ b/tests/test_data/study_es_0_inc/meta_cna_discrete.txt @@ -0,0 +1,10 @@ +cancer_study_identifier: study_es_0 +genetic_alteration_type: COPY_NUMBER_ALTERATION +datatype: DISCRETE +stable_id: gistic +show_profile_in_analysis_tab: true +profile_description: Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification. +profile_name: Putative copy-number alterations from GISTIC +data_filename: data_cna_discrete.txt +pd_annotations_filename: data_cna_pd_annotations.txt +namespaces: CustomNamespace diff --git a/tests/test_data/study_es_0_inc/meta_cna_log2.txt b/tests/test_data/study_es_0_inc/meta_cna_log2.txt new file mode 100644 index 00000000..74a07b8e --- /dev/null +++ b/tests/test_data/study_es_0_inc/meta_cna_log2.txt @@ -0,0 +1,8 @@ +cancer_study_identifier: study_es_0 +genetic_alteration_type: COPY_NUMBER_ALTERATION +datatype: LOG2-VALUE +stable_id: log2CNA +show_profile_in_analysis_tab: false +profile_description: Log2 copy-number values for each gene (from Affymetrix SNP6). +profile_name: Log2 copy-number values +data_filename: data_cna_log2.txt diff --git a/tests/test_data/study_es_0_inc/meta_expression_median.txt b/tests/test_data/study_es_0_inc/meta_expression_median.txt new file mode 100644 index 00000000..1e2fc6a7 --- /dev/null +++ b/tests/test_data/study_es_0_inc/meta_expression_median.txt @@ -0,0 +1,8 @@ +cancer_study_identifier: study_es_0 +genetic_alteration_type: MRNA_EXPRESSION +datatype: CONTINUOUS +stable_id: mrna +profile_description: Expression levels (Agilent microarray). +show_profile_in_analysis_tab: false +profile_name: mRNA expression (microarray) +data_filename: data_expression_median.txt diff --git a/tests/test_data/study_es_0_inc/meta_methylation_hm27.txt b/tests/test_data/study_es_0_inc/meta_methylation_hm27.txt new file mode 100644 index 00000000..582b12e9 --- /dev/null +++ b/tests/test_data/study_es_0_inc/meta_methylation_hm27.txt @@ -0,0 +1,8 @@ +cancer_study_identifier: study_es_0 +genetic_alteration_type: METHYLATION +datatype: CONTINUOUS +stable_id: methylation_hm27 +profile_description: Methylation beta-values (HM27 platform). For genes with multiple methylation probes, the probe least correlated with expression is selected. +show_profile_in_analysis_tab: false +profile_name: Methylation (HM27) +data_filename: data_methylation_hm27.txt diff --git a/tests/test_data/study_es_0_inc/meta_treatment_ic50.txt b/tests/test_data/study_es_0_inc/meta_treatment_ic50.txt new file mode 100644 index 00000000..0d3281cd --- /dev/null +++ b/tests/test_data/study_es_0_inc/meta_treatment_ic50.txt @@ -0,0 +1,12 @@ +cancer_study_identifier: study_es_0 +genetic_alteration_type: GENERIC_ASSAY +generic_assay_type: TREATMENT_RESPONSE +datatype: LIMIT-VALUE +stable_id: treatment_ic50 +profile_name: IC50 values of compounds on cellular phenotype readout +profile_description: IC50 (compound concentration resulting in half maximal inhibition) of compounds on cellular phenotype readout of cultured mutant cell lines. +data_filename: data_treatment_ic50.txt +show_profile_in_analysis_tab: true +pivot_threshold_value: 0.1 +value_sort_order: ASC +generic_entity_meta_properties: NAME,DESCRIPTION,URL \ No newline at end of file