Skip to content

Commit

Permalink
updates, fixes, and new set_update_process_state
Browse files Browse the repository at this point in the history
  • Loading branch information
importer system account committed Oct 10, 2024
1 parent b140fae commit aa61d3b
Show file tree
Hide file tree
Showing 10 changed files with 265 additions and 107 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,13 @@ function set_database_table_list() {
return 0
}

function delete_output_stream_files() {
return 0
}

function shutdown_main_and_clean_up() {
#TODO restore
#shutdown_sling_command_line_functions
shutdown_sling_command_line_functions
delete_output_stream_files
unset my_properties
unset database_table_list
Expand Down Expand Up @@ -161,5 +165,3 @@ function main() {
}

main "$1" "$2"

exit 0
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ properties_arg=$1
database_arg=$2
chosen_database_name=""

#TODO : generalize this code and add proper error handling

if [ "$database_arg" == "blue" ] ; then
chosen_database_name="$DESTINATION_DATABASE_BLUE"
Expand All @@ -31,8 +30,7 @@ done < "$DERIVED_TABLE_STATEMENT_FILE"
statement_list_length=${#statement_list[@]}
pos=0
while [ $pos -lt $statement_list_length ] ; do
#TODO do not pass password on the command line like this. Use a configuration file instead
clickhouse client --host clickhouse_service_hostname_goes_here --port clickhouse_service_port_goes_here --database="$chosen_database_name" --user clickhouse_username_goes_here --password="$password" <<< "${statement_list[$pos]}"
~/rob/setting_up_clickhouse/clickhouse client --host ip-10-0-7-23.ec2.internal --port 9000 --database="$chosen_database_name" --user cgds_admin --password="$password" <<< "${statement_list[$pos]}"
pos=$(($pos+1))
done

Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@ INSERT INTO TABLE clinical_data_derived SELECT sm.sample_unique_id AS sample_uni
INSERT INTO TABLE clinical_data_derived SELECT '' AS sample_unique_id, concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, cam.attr_id AS attribute_name, ifNull(clinpat.attr_value, '') AS attribute_value, cs.cancer_study_identifier AS cancer_study_identifier, 'patient' AS type FROM patient AS p INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id FULL OUTER JOIN clinical_attribute_meta AS cam ON cs.cancer_study_id = cam.cancer_study_id FULL OUTER JOIN clinical_patient AS clinpat ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id) WHERE cam.patient_attribute = 1;
CREATE TABLE clinical_event_derived ( patient_unique_id String, key String, value String, start_date Int32, stop_date Int32 DEFAULT 0, event_type LowCardinality(String), cancer_study_identifier LowCardinality(String)) ENGINE = MergeTree ORDER BY (event_type, patient_unique_id, cancer_study_identifier);
INSERT INTO clinical_event_derived SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, ced.key AS key, ced.value AS value, ce.start_date AS start_date, ifNull(ce.stop_date, 0) AS stop_date, ce.event_type AS event_type, cs.cancer_study_identifier FROM clinical_event ce LEFT JOIN clinical_event_data ced ON ce.clinical_event_id = ced.clinical_event_id INNER JOIN patient p ON ce.patient_id = p.internal_id INNER JOIN cancer_study cs ON p.cancer_study_id = cs.cancer_study_id;
CREATE TABLE IF NOT EXISTS genetic_alteration_cna_derived ( sample_unique_id String, cancer_study_identifier LowCardinality(String), hugo_gene_symbol String, profile_type LowCardinality(String), alteration_value Nullable(Float32)) ENGINE = MergeTree() ORDER BY (profile_type, cancer_study_identifier, sample_unique_id, hugo_gene_symbol);
INSERT INTO TABLE genetic_alteration_cna_derived SELECT sample_unique_id, cancer_study_identifier, hugo_gene_symbol, replaceOne(stable_id, concat(sd.cancer_study_identifier, '_'), '') as profile_type, alteration_value FROM (SELECT sample_id, hugo_gene_symbol, stable_id, alteration_value FROM (SELECT g.hugo_gene_symbol AS hugo_gene_symbol, gp.stable_id as stable_id, arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS alteration_value, arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id FROM genetic_profile gp JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id JOIN gene g ON ga.genetic_entity_id = g.genetic_entity_id WHERE gp.genetic_alteration_type = 'COPY_NUMBER_ALTERATION') ARRAY JOIN alteration_value, sample_id WHERE alteration_value != 'NA') AS subquery JOIN sample_derived sd ON sd.internal_id = subquery.sample_id;
CREATE TABLE IF NOT EXISTS genetic_alteration_numerical_derived ( sample_unique_id String, cancer_study_identifier LowCardinality(String), hugo_gene_symbol String, profile_type LowCardinality(String), alteration_value String) ENGINE = MergeTree() ORDER BY (profile_type, cancer_study_identifier, hugo_gene_symbol, sample_unique_id );
INSERT INTO TABLE genetic_alteration_numerical_derived SELECT sample_unique_id, cancer_study_identifier, hugo_gene_symbol, profile_type, alteration_value FROM (SELECT sample_id, hugo_gene_symbol, profile_type, alteration_value FROM (SELECT g.hugo_gene_symbol AS hugo_gene_symbol, replaceOne(stable_id, concat(cs.cancer_study_identifier, '_'), '') as profile_type, arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS alteration_value, arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id FROM genetic_profile gp JOIN cancer_study cs ON cs.cancer_study_id = gp.cancer_study_id JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id JOIN gene g ON ga.genetic_entity_id = g.genetic_entity_id WHERE gp.genetic_alteration_type != 'COPY_NUMBER_ALTERATION') ARRAY JOIN alteration_value, sample_id) AS subquery JOIN sample_derived sd ON sd.internal_id = subquery.sample_id;
CREATE TABLE IF NOT EXISTS genetic_alteration_derived ( sample_unique_id String, cancer_study_identifier LowCardinality(String), hugo_gene_symbol String, profile_type LowCardinality(String), alteration_value Nullable(String)) ENGINE = MergeTree() ORDER BY (profile_type, cancer_study_identifier, sample_unique_id, hugo_gene_symbol);
INSERT INTO TABLE genetic_alteration_derived SELECT sample_unique_id, cancer_study_identifier, hugo_gene_symbol, replaceOne(stable_id, concat(sd.cancer_study_identifier, '_'), '') as profile_type, alteration_value FROM (SELECT sample_id, hugo_gene_symbol, stable_id, alteration_value FROM (SELECT g.hugo_gene_symbol AS hugo_gene_symbol, gp.stable_id as stable_id, arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS alteration_value, arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id FROM genetic_profile gp JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id JOIN gene g ON ga.genetic_entity_id = g.genetic_entity_id WHERE gp.genetic_alteration_type NOT IN ('GENERIC_ASSAY', 'MUTATION_EXTENDED', 'STRUCTURAL_VARIANT')) ARRAY JOIN alteration_value, sample_id WHERE alteration_value != 'NA') AS subquery JOIN sample_derived sd ON sd.internal_id = subquery.sample_id;
CREATE TABLE IF NOT EXISTS generic_assay_data_derived ( sample_unique_id String, genetic_entity_id String, value String, generic_assay_type String, profile_stable_id String, entity_stable_id String, datatype String, patient_level NUMERIC, profile_type String) ENGINE = MergeTree() ORDER BY (profile_type, entity_stable_id, sample_unique_id);
INSERT INTO TABLE generic_assay_data_derived SELECT sd.sample_unique_id as sample_unique_id, genetic_entity_id, value, generic_assay_type, profile_stable_id, entity_stable_id, datatype, patient_level, replaceOne(profile_stable_id, concat(cs.cancer_study_identifier, '_'), '') as profile_type FROM (SELECT sample_id, genetic_entity_id, value, cancer_study_id, generic_assay_type, genetic_profile_id, profile_stable_id, entity_stable_id, patient_level, datatype FROM (SELECT sample_id as sample_unique_id, gp.cancer_study_id AS cancer_study_id, ga.genetic_entity_id as genetic_entity_id, gp.genetic_profile_id as genetic_profile_id, gp.generic_assay_type as generic_assay_type, gp.stable_id as profile_stable_id, ge.stable_id as entity_stable_id, gp.datatype as datatype, gp.patient_level as patient_level, arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS value, arrayMap(x -> (x = '' ? NULL : toInt64(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id FROM genetic_profile gp JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id JOIN genetic_entity ge on ga.genetic_entity_id = ge.id WHERE gp.generic_assay_type IS NOT NULL) ARRAY JOIN value, sample_id) AS subquery JOIN cancer_study cs ON cs.cancer_study_id = subquery.cancer_study_id JOIN sample_derived sd ON sd.internal_id = subquery.sample_id;
OPTIMIZE TABLE sample_to_gene_panel_derived;
Expand All @@ -29,8 +27,7 @@ OPTIMIZE TABLE sample_derived;
OPTIMIZE TABLE genomic_event_derived;
OPTIMIZE TABLE clinical_data_derived;
OPTIMIZE TABLE clinical_event_derived;
OPTIMIZE TABLE genetic_alteration_cna_derived;
OPTIMIZE TABLE genetic_alteration_numerical_derived;
OPTIMIZE TABLE genetic_alteration_derived;
OPTIMIZE TABLE generic_assay_data_derived;
DROP TABLE IF EXISTS sample_list_columnstore;
DROP VIEW IF EXISTS sample_list_columnstore_mv;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,59 @@
#!/usr/bin/env bash

# load dependencies
unset this_script_dir
this_script_dir="$(dirname "$(readlink -f $0)")"
if ! source "$this_script_dir/parse_property_file_functions.sh" ; then
echo "Error : unable to load dependency : $this_script_dir/parse_property_file_functions.sh" >&2
exit 1
fi
if ! source "$this_script_dir/clickhouse_client_command_line_functions.sh" ; then
echo "Error : unable to load dependency : $this_script_dir/clickhouse_client_command_line_functions.sh" >&2
exit 1
fi
unset this_script_dir

# non-local environment variables in use
unset my_properties
unset database_table_list
unset database_name
declare -A my_properties
declare -a database_table_list
database_name=""
database_table_list_filepath="$(pwd)/dtcd_database_table_list.txt"
drop_table_result_filepath="$(pwd)/dtcd_drop_table_result.txt"

function usage() {
echo "usage: drop_tables_in_clickhouse_database.sh properties_filepath database" >&2
echo " database must be in {blue, green}" >&2
}

function initialize_main() {
local properties_filepath=$1
local database_to_drop_tables_from=$2
if ! parse_property_file "$properties_filepath" my_properties ; then
usage
return 1
fi
if ! initialize_clickhouse_client_command_line_functions ; then
usage
return 1
fi
remove_credentials_from_properties my_properties # no longer needed - remove for security
if [ "$database_to_drop_tables_from" == "blue" ] ; then
database_name="${my_properties['clickhouse_blue_database_name']}"
else
if [ "$database_to_drop_tables_from" == "green" ] ; then
database_name="${my_properties['clickhouse_green_database_name']}"
else
echo "Error : database must be one of {blue, green}" >&2
usage
return 1
fi
fi
return 0
}

DESTINATION_DATABASE="name_of_clickhouse_blue_database"
read -p 'enter clickhouse password: ' password
echo "password was $password"
Expand Down Expand Up @@ -80,3 +134,21 @@ while [ $pos -lt 1 ] ; do
clickhouse client --host clickhouse_hostname_goes_here --port clickhouse_port_goes_here --user username_goes_here --password="$password" <<< "DROP MATERIALIZED VIEW $DESTINATION_DATABASE.${view_name[$pos]}"
pos=$(($pos+1))
done


function main() {
local properties_filepath=$1
local database_to_drop_tables_from=$2
local exit_status=0
if ! initialize_main "$properties_filepath" "$database_to_drop_tables_from" ||
! selected_database_exists ||
! set_database_table_list ||
! drop_all_database_tables ||
! selected_database_is_empty ; then
exit_status=1
fi
shutdown_main_and_clean_up
return $exit_status
}

main "$1" "$2"

This file was deleted.

14 changes: 4 additions & 10 deletions scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ drop_table_result_filepath="$(pwd)/dtmd_drop_table_result.txt"

function usage() {
echo "usage: drop_tables_in_mysql_database.sh properties_filepath database" >&2
echo " database must be in {blue, green, shelved}" >&2
echo " database must be in {blue, green}" >&2
}

function initialize_main() {
Expand All @@ -46,14 +46,9 @@ function initialize_main() {
if [ "$database_to_drop_tables_from" == "green" ] ; then
database_name="${my_properties['mysql_green_database_name']}"
else
if [ "$database_to_drop_tables_from" == "shelved" ] ; then
database_name="${my_properties['shelved_database_name']}"
else
echo "Error : database must be one of {blue, green, shelved}" >&2
usage
return 1

fi
echo "Error : database must be one of {blue, green}" >&2
usage
return 1
fi
fi
return 0
Expand Down Expand Up @@ -127,7 +122,6 @@ function drop_all_database_tables() {
fi
pos=$(($pos+1))
done
# TODO : add check that database has no tables
return 0
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ mysql_server_disk_usage_invisible_to_user_megabytes=
mysql_server_disk_consumption_anticipated_during_import_megabytes=
mysql_blue_database_name=
mysql_green_database_name=
mysql_shelved_database_name=
mysql_update_management_database=
clickhouse_server_username=
clickhouse_server_password=
clickhouse_server_host_name=
Expand Down
14 changes: 14 additions & 0 deletions scripts/clickhouse_import_support/mysql_command_line_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ configured_mysql_defaults_config_file_path=""
sql_data_field_value=""
declare -a sql_data_array
database_exists_filepath="$(pwd)/mclf_database_exists.txt"
table_exists_filepath="$(pwd)/mclf_table_exists.txt"
database_table_list_filepath="$(pwd)/mclf_database_table_list.txt"

function purge_mysql_credentials_from_environment_variables() {
Expand Down Expand Up @@ -37,11 +38,13 @@ function initialize_mysql_command_line_functions() {
function shutdown_mysql_command_line_functions() {
rm -f "$configured_mysql_defaults_config_file_path"
rm -f "$database_exists_filepath"
rm -f "$table_exists_filepath"
rm -f "$database_table_list_filepath"
unset configured_mysql_defaults_config_file_path
unset sql_data_field_value
unset sql_data_array
unset database_exists_filepath
unset table_exists_filepath
unset database_table_list_filepath
}

Expand Down Expand Up @@ -170,6 +173,17 @@ function database_exists() {
return 0
}

function table_exists() {
local database_name=$1
local table_name=$2
local statement="DESCRIBE TABLE \`$database_name\`.\`$table_name\`"
if ! execute_sql_statement_via_mysql "$statement" "$table_exists_filepath" ; then
echo "Warning : unable to find table $table_name in $database_name using : $statement" >&2
return 1
fi
return 0
}

function database_is_empty() {
local database_name=$1
local statement="SHOW TABLES IN \`$database_name\`"
Expand Down
Loading

0 comments on commit aa61d3b

Please sign in to comment.