Skip to content

Commit

Permalink
Merge pull request #44 from cBioPortal/make_study_es_0_to_load
Browse files Browse the repository at this point in the history
(2/7) RFC79: Make study_es_0_inc data pass validation
  • Loading branch information
forus authored Jun 19, 2024
2 parents 52714d6 + 90cc928 commit 074372f
Show file tree
Hide file tree
Showing 40 changed files with 505 additions and 193 deletions.
31 changes: 29 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,36 @@ Build docker image with:
docker build -t cbioportal-core .
```

Example of how to start loading of the whole study:
### Example of how to load `study_es_0` study

Import gene panels

```bash
docker run -it -v $(pwd)/tests/test_data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
perl importGenePanel.pl --data /data/study_es_0/data_gene_panel_testpanel1.txt
docker run -it -v $(pwd)/tests/test_data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
perl importGenePanel.pl --data /data/study_es_0/data_gene_panel_testpanel2.txt
```

Import gene sets and supplementary data

```bash
docker run -it -v $(pwd)/src/test/resources/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
perl importGenesetData.pl --data /data/genesets/study_es_0_genesets.gmt --new-version msigdb_7.5.1 --supp /data/genesets/study_es_0_supp-genesets.txt
```

Import gene set hierarchy data

```bash
docker run -it -v $(pwd)/src/test/resources/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
perl importGenesetHierarchy.pl --data /data/genesets/study_es_0_tree.yaml
```

Import study

```bash
docker run -it -v $(pwd)/data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core python importer/metaImport.py -s /data/study_es_0 -p /data/api_json -o
docker run -it -v $(pwd)/tests/test_data/:/data/ -v $(pwd)/application.properties:/application.properties cbioportal-core \
python importer/metaImport.py -s /data/study_es_0 -p /data/api_json_system_tests -o
```

### Incremental upload of data
Expand Down
23 changes: 17 additions & 6 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoCancerStudy.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,24 @@

package org.mskcc.cbio.portal.dao;

import java.sql.*;
import java.text.*;
import java.util.*;
import org.apache.commons.lang3.StringUtils;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.CancerStudyTags;
import org.mskcc.cbio.portal.model.ReferenceGenome;
import org.mskcc.cbio.portal.model.TypeOfCancer;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
* Analogous to and replaces the old DaoCancerType. A CancerStudy has a NAME and
Expand All @@ -61,7 +73,6 @@ public static enum Status {
private static final Map<Integer,CancerStudy> byInternalId = new HashMap<Integer,CancerStudy>();

static {
SpringUtil.initDataSource();
reCacheAll();
}

Expand Down
16 changes: 11 additions & 5 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoGeneticProfile.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,17 @@

package org.mskcc.cbio.portal.dao;

import java.sql.*;
import java.util.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.SpringUtil;
import org.mskcc.cbio.portal.model.GeneticAlterationType;
import org.mskcc.cbio.portal.model.GeneticProfile;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Analogous to and replaces the old DaoCancerType. A CancerStudy has a NAME and
Expand All @@ -52,7 +59,6 @@ private DaoGeneticProfile() {}
private static final Map<Integer,List<GeneticProfile>> byStudy = new HashMap<Integer,List<GeneticProfile>>();

static {
SpringUtil.initDataSource();
reCache();
}

Expand Down
13 changes: 8 additions & 5 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoReferenceGenome.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@

package org.mskcc.cbio.portal.dao;

import java.sql.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.SpringUtil;
import org.mskcc.cbio.portal.model.ReferenceGenome;

import java.util.*;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.Map;


/**
Expand All @@ -36,7 +40,6 @@ public final class DaoReferenceGenome {
private static final Map<String, Integer> genomeInternalIds = new HashMap<String,Integer>();

static {
SpringUtil.initDataSource();
reCache();
}

Expand Down
9 changes: 3 additions & 6 deletions src/main/java/org/mskcc/cbio/portal/scripts/AddCaseList.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,17 @@

package org.mskcc.cbio.portal.scripts;

import java.util.ArrayList;
import java.util.List;

import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoSample;
import org.mskcc.cbio.portal.dao.DaoSampleList;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.model.SampleList;
import org.mskcc.cbio.portal.model.SampleListCategory;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.util.SpringUtil;

import java.util.ArrayList;
import java.util.List;

/**
* Command Line tool to Add new case lists by generating them based on some rules.
Expand Down Expand Up @@ -131,7 +129,6 @@ public void run() {
throw new UsageException(progName, null, argSpec,
"cancer_study_identifier is not specified.");
}
SpringUtil.initDataSource();
CancerStudy theCancerStudy = DaoCancerStudy.getCancerStudyByStableId(cancerStudyIdentifier);
if (theCancerStudy == null) {
throw new IllegalArgumentException("cancer study identified by cancer_study_identifier '"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,11 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.CancerStudyTags;
import org.mskcc.cbio.portal.util.CancerStudyReader;
import org.mskcc.cbio.portal.util.CancerStudyTagsReader;
import org.mskcc.cbio.portal.util.ProgressMonitor;

import java.io.File;

Expand All @@ -53,7 +56,6 @@ public void run() {
}

File file = new File(args[0]);
SpringUtil.initDataSource();
CancerStudy cancerStudy = CancerStudyReader.loadCancerStudy(file);
CancerStudyTags cancerStudyTags = CancerStudyTagsReader.loadCancerStudyTags(file, cancerStudy);
String message = "Loaded the following cancer study:" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,40 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.*;
import joptsimple.OptionException;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import org.apache.commons.collections4.map.MultiKeyMap;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoClinicalAttributeMeta;
import org.mskcc.cbio.portal.dao.DaoClinicalData;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoPatient;
import org.mskcc.cbio.portal.dao.DaoSample;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.ClinicalAttribute;
import org.mskcc.cbio.portal.model.Patient;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.util.StableIdUtil;
import org.mskcc.cbio.portal.util.SurvivalAttributeUtil;
import org.mskcc.cbio.portal.util.SurvivalAttributeUtil.SurvivalStatusAttributes;

import java.io.*;
import joptsimple.*;
import java.util.*;
import java.util.regex.*;
import org.apache.commons.collections4.map.MultiKeyMap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;

public class ImportClinicalData extends ConsoleRunnable {

Expand Down Expand Up @@ -677,7 +701,6 @@ public void run() {
overwriteExisting = true;

}
SpringUtil.initDataSource();
CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByStableId(cancerStudyStableId);
if (cancerStudy == null) {
throw new IllegalArgumentException("Unknown cancer study: " + cancerStudyStableId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,30 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.model.*;

import joptsimple.*;

import java.io.*;
import joptsimple.OptionSet;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoCopyNumberSegment;
import org.mskcc.cbio.portal.dao.DaoCopyNumberSegmentFile;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoSample;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.CopyNumberSegment;
import org.mskcc.cbio.portal.model.CopyNumberSegmentFile;
import org.mskcc.cbio.portal.model.ReferenceGenome;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.mskcc.cbio.portal.util.StableIdUtil;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.*;
import java.util.Properties;

/**
* Import Segment data into database.
Expand Down Expand Up @@ -118,7 +133,6 @@ public void run() {

ProgressMonitor.setCurrentMessage("Reading data from: " + dataFile);

SpringUtil.initDataSource();
CancerStudy cancerStudy = getCancerStudy(properties);

if (segmentDataExistsForCancerStudy(cancerStudy)) {
Expand Down
23 changes: 16 additions & 7 deletions src/main/java/org/mskcc/cbio/portal/scripts/ImportCosmicData.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,26 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.dao.DaoCosmicData;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CanonicalGene;
import org.mskcc.cbio.portal.model.CosmicMutationFrequency;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.MutationKeywordUtils;
import org.mskcc.cbio.portal.util.ProgressMonitor;
import org.springframework.util.Assert;

import java.io.*;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.IntStream;

public class ImportCosmicData {
Expand Down Expand Up @@ -180,7 +190,6 @@ public static void main(String[] args) throws Exception {
System.out.println("command line usage: importCosmicData.pl <CosmicCodingMuts.vcf>");
return;
}
SpringUtil.initDataSource();
DaoCosmicData.deleteAllRecords();
ProgressMonitor.setConsoleMode(true);

Expand Down
13 changes: 9 additions & 4 deletions src/main/java/org/mskcc/cbio/portal/scripts/ImportDrugs.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,17 @@

package org.mskcc.cbio.portal.scripts;

import org.mskcc.cbio.portal.util.*;
import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoGeneOptimized;
import org.mskcc.cbio.portal.model.CanonicalGene;
import org.mskcc.cbio.portal.util.ConsoleUtil;
import org.mskcc.cbio.portal.util.FileUtil;
import org.mskcc.cbio.portal.util.ProgressMonitor;

import java.io.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

/**
* Command Line tool to import background drug information.
Expand Down Expand Up @@ -79,7 +85,6 @@ public static void main(String[] args) throws Exception {
return;
}
ProgressMonitor.setConsoleMode(true);
SpringUtil.initDataSource();

File file = new File(args[0]);
System.out.println("Reading drug data from: " + file.getAbsolutePath());
Expand Down
Loading

0 comments on commit 074372f

Please sign in to comment.