From aa61d3baa95d41a045b968c74ca964ff907b29f7 Mon Sep 17 00:00:00 2001 From: importer system account Date: Wed, 9 Oct 2024 16:20:21 -0400 Subject: [PATCH] updates, fixes, and new set_update_process_state --- ...opy_mysql_database_tables_to_clickhouse.sh | 8 +- ...e_derived_tables_in_clickhouse_database.sh | 4 +- .../derived_table_construction_commands.sql | 9 +- ... => drop_tables_in_clickhouse_database.sh} | 72 ++++++++ ...rop_tables_in_clickhouse_database_green.sh | 82 --------- .../drop_tables_in_mysql_database.sh | 14 +- ...anage_cbioportal_databases_tool.properties | 2 +- .../mysql_command_line_functions.sh | 14 ++ .../set_update_process_state.sh | 163 ++++++++++++++++++ .../sling_command_line_functions.sh | 4 +- 10 files changed, 265 insertions(+), 107 deletions(-) rename scripts/clickhouse_import_support/{drop_tables_in_clickhouse_database_blue.sh => drop_tables_in_clickhouse_database.sh} (57%) delete mode 100755 scripts/clickhouse_import_support/drop_tables_in_clickhouse_database_green.sh create mode 100755 scripts/clickhouse_import_support/set_update_process_state.sh diff --git a/scripts/clickhouse_import_support/copy_mysql_database_tables_to_clickhouse.sh b/scripts/clickhouse_import_support/copy_mysql_database_tables_to_clickhouse.sh index c5d02dca..5441044e 100755 --- a/scripts/clickhouse_import_support/copy_mysql_database_tables_to_clickhouse.sh +++ b/scripts/clickhouse_import_support/copy_mysql_database_tables_to_clickhouse.sh @@ -79,9 +79,13 @@ function set_database_table_list() { return 0 } +function delete_output_stream_files() { + return 0 +} + function shutdown_main_and_clean_up() { #TODO restore - #shutdown_sling_command_line_functions + shutdown_sling_command_line_functions delete_output_stream_files unset my_properties unset database_table_list @@ -161,5 +165,3 @@ function main() { } main "$1" "$2" - -exit 0 diff --git a/scripts/clickhouse_import_support/create_derived_tables_in_clickhouse_database.sh b/scripts/clickhouse_import_support/create_derived_tables_in_clickhouse_database.sh index fff18fe7..8ea26fc7 100755 --- a/scripts/clickhouse_import_support/create_derived_tables_in_clickhouse_database.sh +++ b/scripts/clickhouse_import_support/create_derived_tables_in_clickhouse_database.sh @@ -7,7 +7,6 @@ properties_arg=$1 database_arg=$2 chosen_database_name="" -#TODO : generalize this code and add proper error handling if [ "$database_arg" == "blue" ] ; then chosen_database_name="$DESTINATION_DATABASE_BLUE" @@ -31,8 +30,7 @@ done < "$DERIVED_TABLE_STATEMENT_FILE" statement_list_length=${#statement_list[@]} pos=0 while [ $pos -lt $statement_list_length ] ; do - #TODO do not pass password on the command line like this. Use a configuration file instead - clickhouse client --host clickhouse_service_hostname_goes_here --port clickhouse_service_port_goes_here --database="$chosen_database_name" --user clickhouse_username_goes_here --password="$password" <<< "${statement_list[$pos]}" + ~/rob/setting_up_clickhouse/clickhouse client --host ip-10-0-7-23.ec2.internal --port 9000 --database="$chosen_database_name" --user cgds_admin --password="$password" <<< "${statement_list[$pos]}" pos=$(($pos+1)) done diff --git a/scripts/clickhouse_import_support/derived_table_construction_commands.sql b/scripts/clickhouse_import_support/derived_table_construction_commands.sql index 780addbe..3539cff2 100644 --- a/scripts/clickhouse_import_support/derived_table_construction_commands.sql +++ b/scripts/clickhouse_import_support/derived_table_construction_commands.sql @@ -17,10 +17,8 @@ INSERT INTO TABLE clinical_data_derived SELECT sm.sample_unique_id AS sample_uni INSERT INTO TABLE clinical_data_derived SELECT '' AS sample_unique_id, concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, cam.attr_id AS attribute_name, ifNull(clinpat.attr_value, '') AS attribute_value, cs.cancer_study_identifier AS cancer_study_identifier, 'patient' AS type FROM patient AS p INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id FULL OUTER JOIN clinical_attribute_meta AS cam ON cs.cancer_study_id = cam.cancer_study_id FULL OUTER JOIN clinical_patient AS clinpat ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id) WHERE cam.patient_attribute = 1; CREATE TABLE clinical_event_derived ( patient_unique_id String, key String, value String, start_date Int32, stop_date Int32 DEFAULT 0, event_type LowCardinality(String), cancer_study_identifier LowCardinality(String)) ENGINE = MergeTree ORDER BY (event_type, patient_unique_id, cancer_study_identifier); INSERT INTO clinical_event_derived SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, ced.key AS key, ced.value AS value, ce.start_date AS start_date, ifNull(ce.stop_date, 0) AS stop_date, ce.event_type AS event_type, cs.cancer_study_identifier FROM clinical_event ce LEFT JOIN clinical_event_data ced ON ce.clinical_event_id = ced.clinical_event_id INNER JOIN patient p ON ce.patient_id = p.internal_id INNER JOIN cancer_study cs ON p.cancer_study_id = cs.cancer_study_id; -CREATE TABLE IF NOT EXISTS genetic_alteration_cna_derived ( sample_unique_id String, cancer_study_identifier LowCardinality(String), hugo_gene_symbol String, profile_type LowCardinality(String), alteration_value Nullable(Float32)) ENGINE = MergeTree() ORDER BY (profile_type, cancer_study_identifier, sample_unique_id, hugo_gene_symbol); -INSERT INTO TABLE genetic_alteration_cna_derived SELECT sample_unique_id, cancer_study_identifier, hugo_gene_symbol, replaceOne(stable_id, concat(sd.cancer_study_identifier, '_'), '') as profile_type, alteration_value FROM (SELECT sample_id, hugo_gene_symbol, stable_id, alteration_value FROM (SELECT g.hugo_gene_symbol AS hugo_gene_symbol, gp.stable_id as stable_id, arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS alteration_value, arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id FROM genetic_profile gp JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id JOIN gene g ON ga.genetic_entity_id = g.genetic_entity_id WHERE gp.genetic_alteration_type = 'COPY_NUMBER_ALTERATION') ARRAY JOIN alteration_value, sample_id WHERE alteration_value != 'NA') AS subquery JOIN sample_derived sd ON sd.internal_id = subquery.sample_id; -CREATE TABLE IF NOT EXISTS genetic_alteration_numerical_derived ( sample_unique_id String, cancer_study_identifier LowCardinality(String), hugo_gene_symbol String, profile_type LowCardinality(String), alteration_value String) ENGINE = MergeTree() ORDER BY (profile_type, cancer_study_identifier, hugo_gene_symbol, sample_unique_id ); -INSERT INTO TABLE genetic_alteration_numerical_derived SELECT sample_unique_id, cancer_study_identifier, hugo_gene_symbol, profile_type, alteration_value FROM (SELECT sample_id, hugo_gene_symbol, profile_type, alteration_value FROM (SELECT g.hugo_gene_symbol AS hugo_gene_symbol, replaceOne(stable_id, concat(cs.cancer_study_identifier, '_'), '') as profile_type, arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS alteration_value, arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id FROM genetic_profile gp JOIN cancer_study cs ON cs.cancer_study_id = gp.cancer_study_id JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id JOIN gene g ON ga.genetic_entity_id = g.genetic_entity_id WHERE gp.genetic_alteration_type != 'COPY_NUMBER_ALTERATION') ARRAY JOIN alteration_value, sample_id) AS subquery JOIN sample_derived sd ON sd.internal_id = subquery.sample_id; +CREATE TABLE IF NOT EXISTS genetic_alteration_derived ( sample_unique_id String, cancer_study_identifier LowCardinality(String), hugo_gene_symbol String, profile_type LowCardinality(String), alteration_value Nullable(String)) ENGINE = MergeTree() ORDER BY (profile_type, cancer_study_identifier, sample_unique_id, hugo_gene_symbol); +INSERT INTO TABLE genetic_alteration_derived SELECT sample_unique_id, cancer_study_identifier, hugo_gene_symbol, replaceOne(stable_id, concat(sd.cancer_study_identifier, '_'), '') as profile_type, alteration_value FROM (SELECT sample_id, hugo_gene_symbol, stable_id, alteration_value FROM (SELECT g.hugo_gene_symbol AS hugo_gene_symbol, gp.stable_id as stable_id, arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS alteration_value, arrayMap(x -> (x = '' ? NULL : toInt32(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id FROM genetic_profile gp JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id JOIN gene g ON ga.genetic_entity_id = g.genetic_entity_id WHERE gp.genetic_alteration_type NOT IN ('GENERIC_ASSAY', 'MUTATION_EXTENDED', 'STRUCTURAL_VARIANT')) ARRAY JOIN alteration_value, sample_id WHERE alteration_value != 'NA') AS subquery JOIN sample_derived sd ON sd.internal_id = subquery.sample_id; CREATE TABLE IF NOT EXISTS generic_assay_data_derived ( sample_unique_id String, genetic_entity_id String, value String, generic_assay_type String, profile_stable_id String, entity_stable_id String, datatype String, patient_level NUMERIC, profile_type String) ENGINE = MergeTree() ORDER BY (profile_type, entity_stable_id, sample_unique_id); INSERT INTO TABLE generic_assay_data_derived SELECT sd.sample_unique_id as sample_unique_id, genetic_entity_id, value, generic_assay_type, profile_stable_id, entity_stable_id, datatype, patient_level, replaceOne(profile_stable_id, concat(cs.cancer_study_identifier, '_'), '') as profile_type FROM (SELECT sample_id, genetic_entity_id, value, cancer_study_id, generic_assay_type, genetic_profile_id, profile_stable_id, entity_stable_id, patient_level, datatype FROM (SELECT sample_id as sample_unique_id, gp.cancer_study_id AS cancer_study_id, ga.genetic_entity_id as genetic_entity_id, gp.genetic_profile_id as genetic_profile_id, gp.generic_assay_type as generic_assay_type, gp.stable_id as profile_stable_id, ge.stable_id as entity_stable_id, gp.datatype as datatype, gp.patient_level as patient_level, arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS value, arrayMap(x -> (x = '' ? NULL : toInt64(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id FROM genetic_profile gp JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id JOIN genetic_entity ge on ga.genetic_entity_id = ge.id WHERE gp.generic_assay_type IS NOT NULL) ARRAY JOIN value, sample_id) AS subquery JOIN cancer_study cs ON cs.cancer_study_id = subquery.cancer_study_id JOIN sample_derived sd ON sd.internal_id = subquery.sample_id; OPTIMIZE TABLE sample_to_gene_panel_derived; @@ -29,8 +27,7 @@ OPTIMIZE TABLE sample_derived; OPTIMIZE TABLE genomic_event_derived; OPTIMIZE TABLE clinical_data_derived; OPTIMIZE TABLE clinical_event_derived; -OPTIMIZE TABLE genetic_alteration_cna_derived; -OPTIMIZE TABLE genetic_alteration_numerical_derived; +OPTIMIZE TABLE genetic_alteration_derived; OPTIMIZE TABLE generic_assay_data_derived; DROP TABLE IF EXISTS sample_list_columnstore; DROP VIEW IF EXISTS sample_list_columnstore_mv; diff --git a/scripts/clickhouse_import_support/drop_tables_in_clickhouse_database_blue.sh b/scripts/clickhouse_import_support/drop_tables_in_clickhouse_database.sh similarity index 57% rename from scripts/clickhouse_import_support/drop_tables_in_clickhouse_database_blue.sh rename to scripts/clickhouse_import_support/drop_tables_in_clickhouse_database.sh index 48031a34..76f12a31 100755 --- a/scripts/clickhouse_import_support/drop_tables_in_clickhouse_database_blue.sh +++ b/scripts/clickhouse_import_support/drop_tables_in_clickhouse_database.sh @@ -1,5 +1,59 @@ #!/usr/bin/env bash +# load dependencies +unset this_script_dir +this_script_dir="$(dirname "$(readlink -f $0)")" +if ! source "$this_script_dir/parse_property_file_functions.sh" ; then + echo "Error : unable to load dependency : $this_script_dir/parse_property_file_functions.sh" >&2 + exit 1 +fi +if ! source "$this_script_dir/clickhouse_client_command_line_functions.sh" ; then + echo "Error : unable to load dependency : $this_script_dir/clickhouse_client_command_line_functions.sh" >&2 + exit 1 +fi +unset this_script_dir + +# non-local environment variables in use +unset my_properties +unset database_table_list +unset database_name +declare -A my_properties +declare -a database_table_list +database_name="" +database_table_list_filepath="$(pwd)/dtcd_database_table_list.txt" +drop_table_result_filepath="$(pwd)/dtcd_drop_table_result.txt" + +function usage() { + echo "usage: drop_tables_in_clickhouse_database.sh properties_filepath database" >&2 + echo " database must be in {blue, green}" >&2 +} + +function initialize_main() { + local properties_filepath=$1 + local database_to_drop_tables_from=$2 + if ! parse_property_file "$properties_filepath" my_properties ; then + usage + return 1 + fi + if ! initialize_clickhouse_client_command_line_functions ; then + usage + return 1 + fi + remove_credentials_from_properties my_properties # no longer needed - remove for security + if [ "$database_to_drop_tables_from" == "blue" ] ; then + database_name="${my_properties['clickhouse_blue_database_name']}" + else + if [ "$database_to_drop_tables_from" == "green" ] ; then + database_name="${my_properties['clickhouse_green_database_name']}" + else + echo "Error : database must be one of {blue, green}" >&2 + usage + return 1 + fi + fi + return 0 +} + DESTINATION_DATABASE="name_of_clickhouse_blue_database" read -p 'enter clickhouse password: ' password echo "password was $password" @@ -80,3 +134,21 @@ while [ $pos -lt 1 ] ; do clickhouse client --host clickhouse_hostname_goes_here --port clickhouse_port_goes_here --user username_goes_here --password="$password" <<< "DROP MATERIALIZED VIEW $DESTINATION_DATABASE.${view_name[$pos]}" pos=$(($pos+1)) done + + +function main() { + local properties_filepath=$1 + local database_to_drop_tables_from=$2 + local exit_status=0 + if ! initialize_main "$properties_filepath" "$database_to_drop_tables_from" || + ! selected_database_exists || + ! set_database_table_list || + ! drop_all_database_tables || + ! selected_database_is_empty ; then + exit_status=1 + fi + shutdown_main_and_clean_up + return $exit_status +} + +main "$1" "$2" diff --git a/scripts/clickhouse_import_support/drop_tables_in_clickhouse_database_green.sh b/scripts/clickhouse_import_support/drop_tables_in_clickhouse_database_green.sh deleted file mode 100755 index e1d68ff2..00000000 --- a/scripts/clickhouse_import_support/drop_tables_in_clickhouse_database_green.sh +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env bash - -DESTINATION_DATABASE="name_of_clickhouse_green_database" -read -p 'enter clickhouse password: ' password -echo "password was $password" - -#TODO : read the table name list out of clickhouse and include any constructed views -table_name=() -table_name+=('allele_specific_copy_number') -table_name+=('alteration_driver_annotation') -table_name+=('authorities') -table_name+=('cancer_study') -table_name+=('cancer_study_tags') -table_name+=('clinical_attribute_meta') -table_name+=('clinical_event') -table_name+=('clinical_event_data') -table_name+=('clinical_patient') -table_name+=('clinical_sample') -table_name+=('cna_event') -table_name+=('copy_number_seg') -table_name+=('copy_number_seg_file') -table_name+=('cosmic_mutation') -table_name+=('data_access_tokens') -table_name+=('fraction_genome_altered') -table_name+=('gene') -table_name+=('gene_alias') -table_name+=('gene_panel') -table_name+=('gene_panel_list') -table_name+=('generic_entity_properties') -table_name+=('geneset') -table_name+=('geneset_gene') -table_name+=('geneset_hierarchy_leaf') -table_name+=('geneset_hierarchy_node') -table_name+=('genetic_alteration') -table_name+=('genetic_entity') -table_name+=('genetic_profile') -table_name+=('genetic_profile_link') -table_name+=('genetic_profile_samples') -table_name+=('gistic') -table_name+=('gistic_to_gene') -table_name+=('info') -table_name+=('mut_sig') -table_name+=('mutation') -table_name+=('mutation_count') -table_name+=('mutation_count_by_keyword') -table_name+=('mutation_event') -table_name+=('patient') -table_name+=('reference_genome') -table_name+=('reference_genome_gene') -table_name+=('resource_definition') -table_name+=('resource_patient') -table_name+=('resource_sample') -table_name+=('resource_study') -table_name+=('sample') -table_name+=('sample_cna_event') -table_name+=('sample_list') -table_name+=('sample_list_list') -table_name+=('sample_profile') -table_name+=('structural_variant') -table_name+=('type_of_cancer') -table_name+=('users') -table_name+=('sample_to_gene_panel_derived') -table_name+=('gene_panel_to_gene_derived') -table_name+=('sample_derived') -table_name+=('genomic_event_derived') -table_name+=('clinical_data_derived') -table_name+=('clinical_event_derived') -table_name+=('genetic_alteration_cna_derived') -table_name+=('genetic_alteration_numerical_derived') -table_name+=('generic_assay_data_derived') -table_name+=('sample_list_columnstore') -#TODO a separate command is needed for dropping views, so make sure to keep the table list and the view list distinct (read both out of the clickhouse service) -#view_name+=('sample_list_columnstore_mv") -pos=0 -while [ $pos -lt 63 ] ; do - clickhouse client --host clickhouse_hostname_goes_here --port clickhouse_port_goes_here --user username_goes_here --password="$password" <<< "DROP TABLE $DESTINATION_DATABASE.${table_name[$pos]}" - pos=$(($pos+1)) -done -while [ $pos -lt 1 ] ; do - clickhouse client --host clickhouse_hostname_goes_here --port clickhouse_port_goes_here --user username_goes_here --password="$password" <<< "DROP MATERIALIZED VIEW $DESTINATION_DATABASE.${view_name[$pos]}" - pos=$(($pos+1)) -done diff --git a/scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh b/scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh index ccb9c9a1..a1e419d6 100755 --- a/scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh +++ b/scripts/clickhouse_import_support/drop_tables_in_mysql_database.sh @@ -25,7 +25,7 @@ drop_table_result_filepath="$(pwd)/dtmd_drop_table_result.txt" function usage() { echo "usage: drop_tables_in_mysql_database.sh properties_filepath database" >&2 - echo " database must be in {blue, green, shelved}" >&2 + echo " database must be in {blue, green}" >&2 } function initialize_main() { @@ -46,14 +46,9 @@ function initialize_main() { if [ "$database_to_drop_tables_from" == "green" ] ; then database_name="${my_properties['mysql_green_database_name']}" else - if [ "$database_to_drop_tables_from" == "shelved" ] ; then - database_name="${my_properties['shelved_database_name']}" - else - echo "Error : database must be one of {blue, green, shelved}" >&2 - usage - return 1 - - fi + echo "Error : database must be one of {blue, green}" >&2 + usage + return 1 fi fi return 0 @@ -127,7 +122,6 @@ function drop_all_database_tables() { fi pos=$(($pos+1)) done - # TODO : add check that database has no tables return 0 } diff --git a/scripts/clickhouse_import_support/manage_cbioportal_databases_tool.properties b/scripts/clickhouse_import_support/manage_cbioportal_databases_tool.properties index 43a15449..3938317a 100644 --- a/scripts/clickhouse_import_support/manage_cbioportal_databases_tool.properties +++ b/scripts/clickhouse_import_support/manage_cbioportal_databases_tool.properties @@ -25,7 +25,7 @@ mysql_server_disk_usage_invisible_to_user_megabytes= mysql_server_disk_consumption_anticipated_during_import_megabytes= mysql_blue_database_name= mysql_green_database_name= -mysql_shelved_database_name= +mysql_update_management_database= clickhouse_server_username= clickhouse_server_password= clickhouse_server_host_name= diff --git a/scripts/clickhouse_import_support/mysql_command_line_functions.sh b/scripts/clickhouse_import_support/mysql_command_line_functions.sh index 99a6bbc1..e049f8a8 100644 --- a/scripts/clickhouse_import_support/mysql_command_line_functions.sh +++ b/scripts/clickhouse_import_support/mysql_command_line_functions.sh @@ -7,6 +7,7 @@ configured_mysql_defaults_config_file_path="" sql_data_field_value="" declare -a sql_data_array database_exists_filepath="$(pwd)/mclf_database_exists.txt" +table_exists_filepath="$(pwd)/mclf_table_exists.txt" database_table_list_filepath="$(pwd)/mclf_database_table_list.txt" function purge_mysql_credentials_from_environment_variables() { @@ -37,11 +38,13 @@ function initialize_mysql_command_line_functions() { function shutdown_mysql_command_line_functions() { rm -f "$configured_mysql_defaults_config_file_path" rm -f "$database_exists_filepath" + rm -f "$table_exists_filepath" rm -f "$database_table_list_filepath" unset configured_mysql_defaults_config_file_path unset sql_data_field_value unset sql_data_array unset database_exists_filepath + unset table_exists_filepath unset database_table_list_filepath } @@ -170,6 +173,17 @@ function database_exists() { return 0 } +function table_exists() { + local database_name=$1 + local table_name=$2 + local statement="DESCRIBE TABLE \`$database_name\`.\`$table_name\`" + if ! execute_sql_statement_via_mysql "$statement" "$table_exists_filepath" ; then + echo "Warning : unable to find table $table_name in $database_name using : $statement" >&2 + return 1 + fi + return 0 +} + function database_is_empty() { local database_name=$1 local statement="SHOW TABLES IN \`$database_name\`" diff --git a/scripts/clickhouse_import_support/set_update_process_state.sh b/scripts/clickhouse_import_support/set_update_process_state.sh new file mode 100755 index 00000000..73ed3cc6 --- /dev/null +++ b/scripts/clickhouse_import_support/set_update_process_state.sh @@ -0,0 +1,163 @@ +#!/usr/bin/env bash + +# load dependencies +unset this_script_dir +this_script_dir="$(dirname "$(readlink -f $0)")" +if ! source "$this_script_dir/parse_property_file_functions.sh" ; then + echo "Error : unable to load dependency : $this_script_dir/parse_property_file_functions.sh" >&2 + exit 1 +fi +if ! source "$this_script_dir/mysql_command_line_functions.sh" ; then + echo "Error : unable to load dependency : $this_script_dir/mysql_command_line_functions.sh" >&2 + exit 1 +fi +unset this_script_dir + +# other non-local environment variables in use +unset my_properties +unset update_management_database_name +update_management_database_name="" +unset database_currently_in_production +database_currently_in_production="" +declare -A my_properties +record_count_filepath="$(pwd)/update_status_record_count.txt" +update_status_filepath="$(pwd)/update_status.txt" +current_production_database_filepath="$(pwd)/current_production_database.txt" + +function usage() { + echo "usage: set_update_process_state.sh properties_filepath state" >&2 + echo " state must be in {running, complete, abandoned}" >&2 +} + +function initialize_main() { + local properties_filepath=$1 + local state=$2 + if ! parse_property_file "$properties_filepath" my_properties ; then + usage + return 1 + fi + if ! initialize_mysql_command_line_functions ; then + usage + return 1 + fi + update_management_database_name="${my_properties['mysql_update_management_database']}" + ### TODO : fix this + ### remove_credentials_from_properties my_properties # no longer needed - remove for security + if ! [ "$state" == "running" ] && ! [ "$state" == "complete" ] && ! [ "$state" == "abandoned" ] ; then + echo "Error : state must be one of {running, complete, abandoned}" >&2 + usage + return 1 + fi + return 0 +} + +function delete_output_stream_files() { + rm -f "$record_count_filepath" + rm -f "$update_status_filepath" + rm -f "$current_production_database_filepath" +} + +function shutdown_main_and_clean_up() { + shutdown_mysql_command_line_functions + delete_output_stream_files + unset my_properties + unset record_count_filepath + unset update_status_filepath + unset current_production_database_filepath +} + +function process_state_table_is_valid() { + if ! database_exists "$update_management_database_name" ; then + echo "Error : could not proceed with setting update status because database does not exist: $update_management_database_name" >&2 + return 1 + fi + if ! table_exists "$update_management_database_name" "update_status" ; then + echo "Error : could not proceed with setting update status because table 'update_status' does not exist in database : $update_management_database_name" >&2 + fi + local get_record_count_statement="SELECT count(*) AS record_count from \`$update_management_database_name\`.update_status;" + if ! execute_sql_statement_via_mysql "$get_record_count_statement" "$record_count_filepath" ; then + echo "Error : could not validate process_state table. Mysql statement failed to execute properly : $get_record_count_statement" >&2 + return 1 + fi + set_sql_data_array_from_file "$record_count_filepath" 0 + local rowcount="${sql_data_array[0]}" + if [[ "$rowcount" -ne 1 ]] ; then + echo "Error : database $update_management_database_name contains $rowcount rows instead of exactly 1 row as expected." >&2 + return 1 + fi + return 0 +} + +function set_state_in_status_table() { + local state=$1 + local update_status_statement="" + if [ "$state" == "running" ] ; then + update_status_statement="UPDATE \`$update_management_database_name\`.update_status SET update_process_status = 'running' WHERE update_process_status = 'complete'; SELECT ROW_COUNT()" + else + if [ "$state" == "complete" ] ; then + update_status_statement="UPDATE \`$update_management_database_name\`.update_status SET update_process_status = 'complete', time_of_last_update_process_completion = NOW(), current_database_in_production = IF(current_database_in_production = 'blue', 'green', 'blue') WHERE update_process_status = 'running'; SELECT ROW_COUNT()" + else + # handle abandoned attempts to import (keep previous color and timestamp but return to state "completed") + update_status_statement="UPDATE \`$update_management_database_name\`.update_status SET update_process_status = 'complete' WHERE update_process_status = 'running'; SELECT ROW_COUNT()" + fi + fi + if ! execute_sql_statement_via_mysql "$update_status_statement" "$update_status_filepath" ; then + echo "Error : failed to execute SQL statement \"$update_status_statement\"" >&2 + return 1 + fi + set_sql_data_array_from_file "$update_status_filepath" 0 + local rowcount="${sql_data_array[0]}" + if [ "$rowcount" -eq 1 ] ; then + echo "Status table updated" + return 0 + fi + if [ "$rowcount" -eq 0 ] ; then + if [ "$state" == "running" ] ; then + echo "Error : cannot set process status to running because it appears to already be in a running status. You should determine which database is actually in production currently, and manually correct the update_status table in database $update_management_database_name" >&2 + return 1 + else + echo "Warning : cannot set process status to complete because it appears to already be complete. You should determine which database is actually in production currently, and validate the update_status table in database $update_management_database_name" >&2 + return 0 + fi + fi + if [ "$rowcount" -eq -1 ] ; then + echo "Error : an unexpected error occurred trying to execute this mysql statement : $update_status_statement" >&2 + return 1 + fi + return 0 +} + +function set_database_currently_in_production() { + local get_current_database_statement="SELECT current_database_in_production FROM \`$update_management_database_name\`.update_status;" + if ! execute_sql_statement_via_mysql "$get_current_database_statement" "$current_production_database_filepath" ; then + echo "Error : could not retrieve the current database in production. Mysql statement failed to execute properly : $get_current_database_statement" >&2 + return 1 + fi + set_sql_data_array_from_file "$current_production_database_filepath" 0 + database_currently_in_production="${sql_data_array[0]}" + return 0 +} + +function output_database_currently_in_production() { + local state=$1 + if [ "$state" == "running" ] ; then + echo "$database_currently_in_production : current production database" + fi +} + +function main() { + local properties_filepath=$1 + local state=$2 + local exit_status=0 + if ! initialize_main "$properties_filepath" "$state" || + ! process_state_table_is_valid || + ! set_database_currently_in_production || + ! set_state_in_status_table "$state" ; then + exit_status=1 + fi + output_database_currently_in_production "$state" + shutdown_main_and_clean_up + return $exit_status +} + +main "$1" "$2" diff --git a/scripts/clickhouse_import_support/sling_command_line_functions.sh b/scripts/clickhouse_import_support/sling_command_line_functions.sh index e60cefc1..4936e5b5 100644 --- a/scripts/clickhouse_import_support/sling_command_line_functions.sh +++ b/scripts/clickhouse_import_support/sling_command_line_functions.sh @@ -51,7 +51,6 @@ function write_selected_clickhouse_connection_to_env_file() { echo " password: ${my_properties['clickhouse_server_password']}" >> "$env_file" echo " port: \"${my_properties['clickhouse_server_port']}\"" >> "$env_file" echo " user: ${my_properties['clickhouse_server_username']}" >> "$env_file" -# echo " http_url: https://$uname:$pw@$clickhost:${clickport}/${db_name}$additional_args" >> "$env_file" echo >> "$env_file" } @@ -75,7 +74,8 @@ function write_sling_env_file() { write_selected_mysql_connection_to_env_file "$env_file" "$database_to_transfer" write_selected_clickhouse_connection_to_env_file "$env_file" "$database_to_transfer" echo "variables: {}" >> "$env_file" - if ! [ "$(cat $env_file | wc -l)" == "22" ] ; then +### TODO: ADJUST LINECOUNT + if ! [ "$(cat $env_file | wc -l)" == "23" ] ; then echo "Error : could not successfully write default mysql properties to file $env_file" >&2 return 1 fi