diff --git a/scripts/importer/cbioportal_common.py b/scripts/importer/cbioportal_common.py index 798174ee..c68f68e3 100644 --- a/scripts/importer/cbioportal_common.py +++ b/scripts/importer/cbioportal_common.py @@ -380,6 +380,7 @@ class MetaFileTypes(object): MetaFileTypes.GENERIC_ASSAY_CONTINUOUS, MetaFileTypes.GENERIC_ASSAY_BINARY, MetaFileTypes.GENERIC_ASSAY_CATEGORICAL, + MetaFileTypes.TIMELINE, ] IMPORTER_CLASSNAME_BY_META_TYPE = { diff --git a/src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalEvent.java b/src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalEvent.java index 21722902..d5045721 100644 --- a/src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalEvent.java +++ b/src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalEvent.java @@ -32,6 +32,9 @@ package org.mskcc.cbio.portal.dao; +import org.apache.commons.lang3.StringUtils; +import org.mskcc.cbio.portal.model.ClinicalEvent; + import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -40,8 +43,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.commons.lang3.StringUtils; -import org.mskcc.cbio.portal.model.ClinicalEvent; /** * @@ -52,7 +53,7 @@ private DaoClinicalEvent() {} public static int addClinicalEvent(ClinicalEvent clinicalEvent) { if (!MySQLbulkLoader.isBulkLoad()) { - throw new IllegalStateException("Only buld load mode is allowed for importing clinical events"); + throw new IllegalStateException("Only bulk load mode is allowed for importing clinical events"); } MySQLbulkLoader.getMySQLbulkLoader("clinical_event").insertRecord( @@ -202,6 +203,26 @@ public static void deleteByCancerStudyId(int cancerStudyId) throws DaoException JdbcUtil.closeAll(DaoClinicalEvent.class, con, pstmt, rs); } } + + public static void deleteByPatientId(int patientId) throws DaoException { + Connection con = null; + PreparedStatement pstmt = null; + ResultSet rs = null; + try { + con = JdbcUtil.getDbConnection(DaoClinicalEvent.class); + + pstmt = con.prepareStatement("DELETE clinical_event, clinical_event_data" + + " FROM clinical_event" + + " LEFT JOIN clinical_event_data ON clinical_event_data.CLINICAL_EVENT_ID = clinical_event.CLINICAL_EVENT_ID" + + " WHERE clinical_event.PATIENT_ID = ?"); + pstmt.setInt(1, patientId); + pstmt.executeUpdate(); + } catch (SQLException e) { + throw new DaoException(e); + } finally { + JdbcUtil.closeAll(DaoClinicalEvent.class, con, pstmt, rs); + } + } public static void deleteAllRecords() throws DaoException { Connection con = null; diff --git a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java index 4b12a431..c16eba21 100644 --- a/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java +++ b/src/main/java/org/mskcc/cbio/portal/scripts/ImportTimelineData.java @@ -48,8 +48,10 @@ import java.io.FileReader; import java.io.IOException; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Properties; +import java.util.Set; /** * Imports timeline data for display in patient view @@ -58,7 +60,7 @@ */ public class ImportTimelineData extends ConsoleRunnable { - private static void importData(String dataFile, int cancerStudyId) throws IOException, DaoException { + private static void importData(String dataFile, int cancerStudyId, boolean overwriteExisting) throws IOException, DaoException { MySQLbulkLoader.bulkLoadOn(); ProgressMonitor.setCurrentMessage("Reading file " + dataFile); @@ -81,9 +83,10 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce throw new RuntimeException("The first line must start with\n'PATIENT_ID\tSTART_DATE\tEVENT_TYPE'\nor\n" + "PATIENT_ID\tSTART_DATE\tSTOP_DATE\tEVENT_TYPE"); } - + long clinicalEventId = DaoClinicalEvent.getLargestClinicalEventId(); - + Set processedPatientIds = new HashSet<>(); + while ((line = buff.readLine()) != null) { line = line.trim(); @@ -99,6 +102,9 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce ProgressMonitor.logWarning("Patient " + patientId + " not found in study " + cancerStudyId + ". Skipping entry."); continue; } + if (overwriteExisting && processedPatientIds.add(patient.getInternalId())) { + DaoClinicalEvent.deleteByPatientId(patient.getInternalId()); + } ClinicalEvent event = new ClinicalEvent(); event.setClinicalEventId(++clinicalEventId); event.setPatientId(patient.getInternalId()); @@ -128,17 +134,18 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce public void run() { try { String description = "Import 'timeline' data"; - - OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true); + + OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, false); String dataFile = (String) options.valueOf("data"); File descriptorFile = new File((String) options.valueOf("meta")); + boolean overwriteExisting = options.has("overwrite-existing"); Properties properties = new TrimmedProperties(); properties.load(new FileInputStream(descriptorFile)); int cancerStudyInternalId = ValidationUtils.getInternalStudyId(properties.getProperty("cancer_study_identifier")); - importData(dataFile, cancerStudyInternalId); + importData(dataFile, cancerStudyInternalId, overwriteExisting); } catch (RuntimeException e) { throw e; } catch (IOException|DaoException e) { diff --git a/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java b/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java index 39fedd5f..7eba9610 100644 --- a/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java +++ b/src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java @@ -138,8 +138,10 @@ public static OptionSet parseStandardDataAndMetaOptions(String[] args, String de parser.accepts( "loadMode", "direct (per record) or bulk load of data" ) .withRequiredArg().describedAs( "[directLoad|bulkLoad (default)]" ).ofType( String.class ); } + parser.accepts("overwrite-existing", + "Enables re-uploading molecular data that already exist for the given profile and sample.").withOptionalArg().describedAs("overwrite-existing").ofType(String.class); String progName = "importScript"; - + OptionSet options = null; try { options = parser.parse( args ); diff --git a/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTimelineImport.java b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTimelineImport.java new file mode 100644 index 00000000..c077c58b --- /dev/null +++ b/src/test/java/org/mskcc/cbio/portal/integrationTest/incremental/TestIncrementalTimelineImport.java @@ -0,0 +1,115 @@ +/* + * This file is part of cBioPortal. + * + * cBioPortal is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ + +package org.mskcc.cbio.portal.integrationTest.incremental; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mskcc.cbio.portal.dao.DaoCancerStudy; +import org.mskcc.cbio.portal.dao.DaoClinicalEvent; +import org.mskcc.cbio.portal.dao.DaoException; +import org.mskcc.cbio.portal.dao.DaoPatient; +import org.mskcc.cbio.portal.dao.MySQLbulkLoader; +import org.mskcc.cbio.portal.model.CancerStudy; +import org.mskcc.cbio.portal.model.ClinicalEvent; +import org.mskcc.cbio.portal.model.Patient; +import org.mskcc.cbio.portal.scripts.ImportTimelineData; +import org.springframework.test.annotation.Rollback; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +/** + * Tests Incremental Import of Timeline Data. + * + * @author Ruslan Forostianov + * @author Pieter Lukasse + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" }) +@Rollback +@Transactional +public class TestIncrementalTimelineImport { + + public static final String STUDY_ID = "study_tcga_pub"; + private CancerStudy cancerStudy; + + @Before + public void setUp() throws DaoException { + cancerStudy = DaoCancerStudy.getCancerStudyByStableId(STUDY_ID); + } + + @Test + public void testTimelineDataReloading() throws DaoException { + MySQLbulkLoader.bulkLoadOn(); + ClinicalEvent event = new ClinicalEvent(); + event.setClinicalEventId(1L); + Patient sbPatient = DaoPatient.getPatientByCancerStudyAndPatientId(cancerStudy.getInternalId(), "TCGA-A1-A0SB"); + event.setPatientId(sbPatient.getInternalId()); + event.setStartDate(5L); + event.setEventType("SPECIMEN"); + event.setEventData(Map.of("SPECIMEN_SITE", "specimen_site_to_erase")); + DaoClinicalEvent.addClinicalEvent(event); + MySQLbulkLoader.flushAll(); + + File singleTcgaSampleFolder = new File("src/test/resources/incremental/clinical/"); + File metaFile = new File(singleTcgaSampleFolder, "meta_timeline.txt"); + File dataFile = new File(singleTcgaSampleFolder, "data_timeline.txt"); + + ImportTimelineData importTimelineData = new ImportTimelineData(new String[] { + "--meta", metaFile.getAbsolutePath(), + "--data", dataFile.getAbsolutePath(), + "--overwrite-existing", + }); + importTimelineData.run(); + + List sbClinicalEvents = DaoClinicalEvent.getClinicalEvent(sbPatient.getInternalId()); + assertEquals(2, sbClinicalEvents.size()); + ClinicalEvent sbSpecimen = sbClinicalEvents.stream().filter(ce -> ce.getEventType().equals("SPECIMEN")).findFirst().get(); + assertEquals(20L, sbSpecimen.getStartDate()); + assertEquals(60L, sbSpecimen.getStopDate()); + assertEquals(Map.of( + "SPECIMEN_SITE", "test_specimen_site_1", + "SPECIMEN_TYPE", "test_specimen_type", + "SOURCE", "test_source_3" + ), sbSpecimen.getEventData()); + ClinicalEvent sbStatus = sbClinicalEvents.stream().filter(ce -> ce.getEventType().equals("STATUS")).findFirst().get(); + assertEquals(10L, sbStatus.getStartDate()); + assertEquals(20L, sbStatus.getStopDate()); + assertEquals(Map.of("SOURCE", "test_source_4"), sbStatus.getEventData()); + + Patient sdPatient = DaoPatient.getPatientByCancerStudyAndPatientId(cancerStudy.getInternalId(), "TCGA-A1-A0SD"); + List sdClinicalEvents = DaoClinicalEvent.getClinicalEvent(sdPatient.getInternalId()); + assertEquals(1, sdClinicalEvents.size()); + ClinicalEvent sdStatus = sdClinicalEvents.stream().filter(ce -> ce.getEventType().equals("STATUS")).findFirst().get(); + assertEquals(45L, sdStatus.getStartDate()); + assertNull(sdStatus.getStopDate()); + assertEquals(Map.of("SOURCE", "test_source_2"), sdStatus.getEventData()); + + Patient nonexistentPatient = DaoPatient.getPatientByCancerStudyAndPatientId(cancerStudy.getInternalId(), "NONEXISTENT_PATIENT"); + assertNull(nonexistentPatient); + } + +} diff --git a/src/test/resources/incremental/clinical/data_timeline.txt b/src/test/resources/incremental/clinical/data_timeline.txt new file mode 100644 index 00000000..679a9da5 --- /dev/null +++ b/src/test/resources/incremental/clinical/data_timeline.txt @@ -0,0 +1,5 @@ +PATIENT_ID START_DATE STOP_DATE EVENT_TYPE SPECIMEN_SITE SPECIMEN_TYPE SOURCE +TCGA-A1-A0SB 20 60 SPECIMEN test_specimen_site_1 test_specimen_type test_source_3 +TCGA-A1-A0SB 10 20 STATUS test_source_4 +TCGA-A1-A0SD 45 STATUS test_source_2 +NONEXISTENT_PATIENT 100 200 STATUS test_source_1 diff --git a/src/test/resources/incremental/clinical/meta_timeline.txt b/src/test/resources/incremental/clinical/meta_timeline.txt new file mode 100644 index 00000000..bacded8c --- /dev/null +++ b/src/test/resources/incremental/clinical/meta_timeline.txt @@ -0,0 +1,4 @@ +cancer_study_identifier: study_tcga_pub +genetic_alteration_type: CLINICAL +datatype: TIMELINE +data_filename: data_timeline.txt diff --git a/tests/system_tests_import_data.py b/tests/system_tests_import_data.py index 64361571..5fd45a69 100755 --- a/tests/system_tests_import_data.py +++ b/tests/system_tests_import_data.py @@ -117,6 +117,8 @@ def test_incremental_load(self, run_java, locate_jar): '--meta', f'{data_directory}/meta_methylation_hm27.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_methylation_hm27.txt', '--noprogress') treatment_ic50_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing', '--meta', f'{data_directory}/meta_treatment_ic50.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_treatment_ic50.txt', '--noprogress') + timeline_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportTimelineData', '--overwrite-existing', + '--meta', f'{data_directory}/meta_timeline.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_timeline.txt', '--noprogress') case_list_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.UpdateCaseListsSampleIds', '--meta', f'{data_directory}/meta_clinical_samples.txt', '--case-lists', f'{data_directory}/case_lists') @@ -130,6 +132,7 @@ def test_incremental_load(self, run_java, locate_jar): expression_median_call, methylation_hm27_call, treatment_ic50_call, + timeline_call, case_list_call, ]) diff --git a/tests/test_data/study_es_0_inc/data_timeline.txt b/tests/test_data/study_es_0_inc/data_timeline.txt new file mode 100644 index 00000000..e950603c --- /dev/null +++ b/tests/test_data/study_es_0_inc/data_timeline.txt @@ -0,0 +1,4 @@ +PATIENT_ID START_DATE STOP_DATE EVENT_TYPE SPECIMEN_SITE SPECIMEN_TYPE SOURCE +TCGA-BH-A18K 20 60 SPECIMEN test_specimen_site_1 test_specimen_type test_source_3 +TCGA-BH-A18K 10 20 STATUS test_source_4 +TCGA-BH-NEW 100 200 STATUS test_source_1 diff --git a/tests/test_data/study_es_0_inc/meta_timeline.txt b/tests/test_data/study_es_0_inc/meta_timeline.txt new file mode 100644 index 00000000..51a46508 --- /dev/null +++ b/tests/test_data/study_es_0_inc/meta_timeline.txt @@ -0,0 +1,4 @@ +cancer_study_identifier: study_es_0 +genetic_alteration_type: CLINICAL +datatype: TIMELINE +data_filename: data_timeline.txt