Skip to content

Commit

Permalink
Make validation to pass for CNA long and study_es_0_inc data
Browse files Browse the repository at this point in the history
  • Loading branch information
forus committed May 28, 2024
1 parent 0bf6bf2 commit cc80e56
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 12 deletions.
2 changes: 1 addition & 1 deletion scripts/importer/validateData.py
Original file line number Diff line number Diff line change
Expand Up @@ -4718,7 +4718,7 @@ def process_metadata_files(directory, portal_instance, logger, relaxed_mode, str
if stable_id in stable_ids:
# stable id already used in other meta file, give error:
logger.error(
'stable_id repeated. It should be unique across all files in a study',
'stable_id repeated. It should be unique across all files in a directory',
extra={'filename_': filename,
'cause': stable_id})
else:
Expand Down
3 changes: 0 additions & 3 deletions tests/system_tests_import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,6 @@ def test_incremental_load(self, run_java, locate_jar):
'--meta', f'{data_directory}/meta_mutations_extended.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_mutations_extended.maf', '--noprogress')
cna_discrete_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
'--meta', f'{data_directory}/meta_cna_discrete.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_cna_discrete.txt', '--noprogress')
cna_discrete_long_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
'--meta', f'{data_directory}/meta_cna_discrete_long.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_cna_discrete_long.txt', '--noprogress')
cna_log2_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
'--meta', f'{data_directory}/meta_cna_log2.txt', '--loadMode', 'bulkload', '--update-info', 'False', '--data', f'{data_directory}/data_cna_log2.txt', '--noprogress')
expression_median_call = call(*common_part, 'org.mskcc.cbio.portal.scripts.ImportProfileData', '--overwrite-existing',
Expand All @@ -130,7 +128,6 @@ def test_incremental_load(self, run_java, locate_jar):
clinical_sample_call,
mutation_call,
cna_discrete_call,
cna_discrete_long_call,
cna_log2_call,
expression_median_call,
methylation_hm27_call,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ AKT3 10000 TCGA-AO-A129-01 -2
AKT1 207 TCGA-C8-A12K-01 -1
AKT1 207 TCGA-BH-NON-EXIST 2
AKT1 207 TCGA-AO-A129-01 2
# All after the pipe has to be removed
AKT2|TEST 208 TCGA-C8-A12K-01 -2
AKT2|TEST 208 TCGA-BH-NON-EXIST 2
AKT2|TEST 208 TCGA-AO-A129-01 -1 Putative_Driver Test driver Class 1 Class annotation
Expand All @@ -15,13 +14,9 @@ HRAS 3265 TCGA-AO-A129-01 0
KRAS 3845 TCGA-C8-A12K-01 0 Class 2 Class annotation
KRAS 3845 TCGA-BH-NON-EXIST -2
KRAS 3845 TCGA-AO-A129-01 2 Putative_Passenger Test passenger Class 2 Class annotation
# This gene absent in this file, but it's still part of the profile and has to be updated
#ATM 472
# This line missing the hugo symbol and the gene has to be detected by entrez id
4893 TCGA-C8-A12K-01 -2
4893 TCGA-BH-NON-EXIST -2
4893 TCGA-AO-A129-01 -1
# This line missing the entrez id and the gene has to be detected by hugo symbol
BRCA1 TCGA-C8-A12K-01 2
BRCA1 TCGA-BH-NON-EXIST 2
BRCA1 TCGA-AO-A129-01 0
Expand All @@ -31,7 +26,6 @@ BRAF 673 TCGA-AO-A129-01 -2
BRCA2 675 TCGA-C8-A12K-01 -1.5
BRCA2 675 TCGA-BH-NON-EXIST 2
BRCA2 675 TCGA-AO-A129-01 0
# This gene is new! the empty values should be set for the already existing samples in the database
CDK1 983 TCGA-C8-A12K-01 -2 Putative_Driver
CDK1 983 TCGA-BH-NON-EXIST -2
CDK1 983 TCGA-AO-A129-01 2 Putative_Passenger Test passenger
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
cancer_study_identifier: study_es_0
genetic_alteration_type: COPY_NUMBER_ALTERATION
datatype: DISCRETE
datatype: DISCRETE_LONG
stable_id: gistic
show_profile_in_analysis_tab: true
profile_description: Putative copy-number from GISTIC 2.0. Values: -2 = homozygous deletion; -1 = hemizygous deletion; 0 = neutral / no change; 1 = gain; 2 = high level amplification.
profile_name: Putative copy-number alterations from GISTIC
data_filename: data_cna_discrete_long.txt
namespaces: CustomNamespace

0 comments on commit cc80e56

Please sign in to comment.