Skip to content

Commit

Permalink
Merge pull request #43 from cBioPortal/inc-timeline-uploader
Browse files Browse the repository at this point in the history
(3/7) RFC79: Implement incremental upload for timeline data
  • Loading branch information
forus authored Jun 19, 2024
2 parents 074372f + fb75d7c commit a5ac232
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 10 deletions.
1 change: 1 addition & 0 deletions scripts/importer/cbioportal_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ class MetaFileTypes(object):
MetaFileTypes.GENERIC_ASSAY_CONTINUOUS,
MetaFileTypes.GENERIC_ASSAY_BINARY,
MetaFileTypes.GENERIC_ASSAY_CATEGORICAL,
MetaFileTypes.TIMELINE,
]

IMPORTER_CLASSNAME_BY_META_TYPE = {
Expand Down
27 changes: 24 additions & 3 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalEvent.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@

package org.mskcc.cbio.portal.dao;

import org.apache.commons.lang3.StringUtils;
import org.mskcc.cbio.portal.model.ClinicalEvent;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
Expand All @@ -40,8 +43,6 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.mskcc.cbio.portal.model.ClinicalEvent;

/**
*
Expand All @@ -52,7 +53,7 @@ private DaoClinicalEvent() {}

public static int addClinicalEvent(ClinicalEvent clinicalEvent) {
if (!MySQLbulkLoader.isBulkLoad()) {
throw new IllegalStateException("Only buld load mode is allowed for importing clinical events");
throw new IllegalStateException("Only bulk load mode is allowed for importing clinical events");
}

MySQLbulkLoader.getMySQLbulkLoader("clinical_event").insertRecord(
Expand Down Expand Up @@ -202,6 +203,26 @@ public static void deleteByCancerStudyId(int cancerStudyId) throws DaoException
JdbcUtil.closeAll(DaoClinicalEvent.class, con, pstmt, rs);
}
}

public static void deleteByPatientId(int patientId) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = JdbcUtil.getDbConnection(DaoClinicalEvent.class);

pstmt = con.prepareStatement("DELETE clinical_event, clinical_event_data" +
" FROM clinical_event" +
" LEFT JOIN clinical_event_data ON clinical_event_data.CLINICAL_EVENT_ID = clinical_event.CLINICAL_EVENT_ID" +
" WHERE clinical_event.PATIENT_ID = ?");
pstmt.setInt(1, patientId);
pstmt.executeUpdate();
} catch (SQLException e) {
throw new DaoException(e);
} finally {
JdbcUtil.closeAll(DaoClinicalEvent.class, con, pstmt, rs);
}
}

public static void deleteAllRecords() throws DaoException {
Connection con = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

/**
* Imports timeline data for display in patient view
Expand All @@ -58,7 +60,7 @@
*/
public class ImportTimelineData extends ConsoleRunnable {

private static void importData(String dataFile, int cancerStudyId) throws IOException, DaoException {
private static void importData(String dataFile, int cancerStudyId, boolean overwriteExisting) throws IOException, DaoException {
MySQLbulkLoader.bulkLoadOn();

ProgressMonitor.setCurrentMessage("Reading file " + dataFile);
Expand All @@ -81,9 +83,10 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce
throw new RuntimeException("The first line must start with\n'PATIENT_ID\tSTART_DATE\tEVENT_TYPE'\nor\n"
+ "PATIENT_ID\tSTART_DATE\tSTOP_DATE\tEVENT_TYPE");
}

long clinicalEventId = DaoClinicalEvent.getLargestClinicalEventId();

Set<Integer> processedPatientIds = new HashSet<>();

while ((line = buff.readLine()) != null) {
line = line.trim();

Expand All @@ -99,6 +102,9 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce
ProgressMonitor.logWarning("Patient " + patientId + " not found in study " + cancerStudyId + ". Skipping entry.");
continue;
}
if (overwriteExisting && processedPatientIds.add(patient.getInternalId())) {
DaoClinicalEvent.deleteByPatientId(patient.getInternalId());
}
ClinicalEvent event = new ClinicalEvent();
event.setClinicalEventId(++clinicalEventId);
event.setPatientId(patient.getInternalId());
Expand Down Expand Up @@ -128,17 +134,18 @@ private static void importData(String dataFile, int cancerStudyId) throws IOExce
public void run() {
try {
String description = "Import 'timeline' data";
OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true);

OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, false);
String dataFile = (String) options.valueOf("data");
File descriptorFile = new File((String) options.valueOf("meta"));
boolean overwriteExisting = options.has("overwrite-existing");

Properties properties = new TrimmedProperties();
properties.load(new FileInputStream(descriptorFile));

int cancerStudyInternalId = ValidationUtils.getInternalStudyId(properties.getProperty("cancer_study_identifier"));

importData(dataFile, cancerStudyInternalId);
importData(dataFile, cancerStudyInternalId, overwriteExisting);
} catch (RuntimeException e) {
throw e;
} catch (IOException|DaoException e) {
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,10 @@ public static OptionSet parseStandardDataAndMetaOptions(String[] args, String de
parser.accepts( "loadMode", "direct (per record) or bulk load of data" )
.withRequiredArg().describedAs( "[directLoad|bulkLoad (default)]" ).ofType( String.class );
}
parser.accepts("overwrite-existing",
"Enables re-uploading molecular data that already exist for the given profile and sample.").withOptionalArg().describedAs("overwrite-existing").ofType(String.class);
String progName = "importScript";

OptionSet options = null;
try {
options = parser.parse( args );
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* This file is part of cBioPortal.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.mskcc.cbio.portal.integrationTest.incremental;

import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoClinicalEvent;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoPatient;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.ClinicalEvent;
import org.mskcc.cbio.portal.model.Patient;
import org.mskcc.cbio.portal.scripts.ImportTimelineData;
import org.springframework.test.annotation.Rollback;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.transaction.annotation.Transactional;

import java.io.File;
import java.util.List;
import java.util.Map;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;

/**
* Tests Incremental Import of Timeline Data.
*
* @author Ruslan Forostianov
* @author Pieter Lukasse
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" })
@Rollback
@Transactional
public class TestIncrementalTimelineImport {

public static final String STUDY_ID = "study_tcga_pub";
private CancerStudy cancerStudy;

@Before
public void setUp() throws DaoException {
cancerStudy = DaoCancerStudy.getCancerStudyByStableId(STUDY_ID);
}

@Test
public void testTimelineDataReloading() throws DaoException {
MySQLbulkLoader.bulkLoadOn();
ClinicalEvent event = new ClinicalEvent();
event.setClinicalEventId(1L);
Patient sbPatient = DaoPatient.getPatientByCancerStudyAndPatientId(cancerStudy.getInternalId(), "TCGA-A1-A0SB");
event.setPatientId(sbPatient.getInternalId());
event.setStartDate(5L);
event.setEventType("SPECIMEN");
event.setEventData(Map.of("SPECIMEN_SITE", "specimen_site_to_erase"));
DaoClinicalEvent.addClinicalEvent(event);
MySQLbulkLoader.flushAll();

File singleTcgaSampleFolder = new File("src/test/resources/incremental/clinical/");
File metaFile = new File(singleTcgaSampleFolder, "meta_timeline.txt");
File dataFile = new File(singleTcgaSampleFolder, "data_timeline.txt");

ImportTimelineData importTimelineData = new ImportTimelineData(new String[] {
"--meta", metaFile.getAbsolutePath(),
"--data", dataFile.getAbsolutePath(),
"--overwrite-existing",
});
importTimelineData.run();

List<ClinicalEvent> sbClinicalEvents = DaoClinicalEvent.getClinicalEvent(sbPatient.getInternalId());
assertEquals(2, sbClinicalEvents.size());
ClinicalEvent sbSpecimen = sbClinicalEvents.stream().filter(ce -> ce.getEventType().equals("SPECIMEN")).findFirst().get();
assertEquals(20L, sbSpecimen.getStartDate());
assertEquals(60L, sbSpecimen.getStopDate());
assertEquals(Map.of(
"SPECIMEN_SITE", "test_specimen_site_1",
"SPECIMEN_TYPE", "test_specimen_type",
"SOURCE", "test_source_3"
), sbSpecimen.getEventData());
ClinicalEvent sbStatus = sbClinicalEvents.stream().filter(ce -> ce.getEventType().equals("STATUS")).findFirst().get();
assertEquals(10L, sbStatus.getStartDate());
assertEquals(20L, sbStatus.getStopDate());
assertEquals(Map.of("SOURCE", "test_source_4"), sbStatus.getEventData());

Patient sdPatient = DaoPatient.getPatientByCancerStudyAndPatientId(cancerStudy.getInternalId(), "TCGA-A1-A0SD");
List<ClinicalEvent> sdClinicalEvents = DaoClinicalEvent.getClinicalEvent(sdPatient.getInternalId());
assertEquals(1, sdClinicalEvents.size());
ClinicalEvent sdStatus = sdClinicalEvents.stream().filter(ce -> ce.getEventType().equals("STATUS")).findFirst().get();
assertEquals(45L, sdStatus.getStartDate());
assertNull(sdStatus.getStopDate());
assertEquals(Map.of("SOURCE", "test_source_2"), sdStatus.getEventData());

Patient nonexistentPatient = DaoPatient.getPatientByCancerStudyAndPatientId(cancerStudy.getInternalId(), "NONEXISTENT_PATIENT");
assertNull(nonexistentPatient);
}

}
5 changes: 5 additions & 0 deletions src/test/resources/incremental/clinical/data_timeline.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
PATIENT_ID START_DATE STOP_DATE EVENT_TYPE SPECIMEN_SITE SPECIMEN_TYPE SOURCE
TCGA-A1-A0SB 20 60 SPECIMEN test_specimen_site_1 test_specimen_type test_source_3
TCGA-A1-A0SB 10 20 STATUS test_source_4
TCGA-A1-A0SD 45 STATUS test_source_2
NONEXISTENT_PATIENT 100 200 STATUS test_source_1
4 changes: 4 additions & 0 deletions src/test/resources/incremental/clinical/meta_timeline.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cancer_study_identifier: study_tcga_pub
genetic_alteration_type: CLINICAL
datatype: TIMELINE
data_filename: data_timeline.txt
3 changes: 3 additions & 0 deletions tests/system_tests_import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ def test_incremental_load(self, run_java, locate_jar):
'--meta', f'{data_directory}/meta_methylation_hm27.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_methylation_hm27.txt', '--noprogress')
treatment_ic50_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
'--meta', f'{data_directory}/meta_treatment_ic50.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_treatment_ic50.txt', '--noprogress')
timeline_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportTimelineData', '--overwrite-existing',
'--meta', f'{data_directory}/meta_timeline.txt', '--loadMode', 'bulkload', '--data', f'{data_directory}/data_timeline.txt', '--noprogress')
case_list_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.UpdateCaseListsSampleIds',
'--meta', f'{data_directory}/meta_clinical_samples.txt', '--case-lists', f'{data_directory}/case_lists')

Expand All @@ -130,6 +132,7 @@ def test_incremental_load(self, run_java, locate_jar):
expression_median_call,
methylation_hm27_call,
treatment_ic50_call,
timeline_call,
case_list_call,
])

Expand Down
4 changes: 4 additions & 0 deletions tests/test_data/study_es_0_inc/data_timeline.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
PATIENT_ID START_DATE STOP_DATE EVENT_TYPE SPECIMEN_SITE SPECIMEN_TYPE SOURCE
TCGA-BH-A18K 20 60 SPECIMEN test_specimen_site_1 test_specimen_type test_source_3
TCGA-BH-A18K 10 20 STATUS test_source_4
TCGA-BH-NEW 100 200 STATUS test_source_1
4 changes: 4 additions & 0 deletions tests/test_data/study_es_0_inc/meta_timeline.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cancer_study_identifier: study_es_0
genetic_alteration_type: CLINICAL
datatype: TIMELINE
data_filename: data_timeline.txt

0 comments on commit a5ac232

Please sign in to comment.