Skip to content

Commit

Permalink
add sling copy scripts, properties adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
importer system account committed Oct 2, 2024
1 parent 019b40a commit 901afad
Show file tree
Hide file tree
Showing 7 changed files with 465 additions and 17 deletions.
6 changes: 6 additions & 0 deletions scripts/clickhouse_import_support/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,9 @@ exist in a mysql database. This will occur at the end of an import process in or
clear the data from the prior production database (or the backup copy database) in order
to make the database empty and available for reuse during the next cycle of cancer study
import.

## copy\_mysql\_database\_tables\_to\_clickhouse.sh
This bash script uses the *sling* command line interface tool to copy data from all tables
present in the selected mysql database (green or blue) into the corresponding sling
database. Multiple retries are attempted on individual attempt failures. Copy results are
validated by record counts.
30 changes: 20 additions & 10 deletions scripts/clickhouse_import_support/clone_mysql_database.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
#!/usr/bin/env bash

# bash declaration dependencies
source parse_property_file_functions.sh
source mysql_command_line_functions.sh

# non-local environment variables in use
# load dependencies
unset this_script_dir
this_script_dir="$(dirname "$(readlink -f $0)")"
if ! source "$this_script_dir/parse_property_file_functions.sh" ; then
echo "Error : unable to load dependency : $this_script_dir/parse_property_file_functions.sh" >&2
exit 1
fi
if ! source "$this_script_dir/mysql_command_line_functions.sh" ; then
echo "Error : unable to load dependency : $this_script_dir/mysql_command_line_functions.sh" >&2
exit 1
fi
unset this_script_dir

# other non-local environment variables in use
unset my_properties
unset database_table_list
unset source_database_name
Expand Down Expand Up @@ -32,26 +41,27 @@ function initialize_main() {
usage
return 1
fi
if ! initialize_mysql_command_line_functions ; then # this also purges the mysql credentials from the environment for security
if ! initialize_mysql_command_line_functions ; then
usage
return 1
fi
remove_credentials_from_properties my_properties # no longer needed - remove for security
if [ "$database_to_clone_tables_from" == "blue" ] ; then
source_database_name="${my_properties['blue_database_name']}"
source_database_name="${my_properties['mysql_blue_database_name']}"
else
if [ "$database_to_clone_tables_from" == "green" ] ; then
source_database_name="${my_properties['green_database_name']}"
source_database_name="${my_properties['mysql_green_database_name']}"
else
echo "Error : database_to_clone_tables_from must be one of {blue, green}" >&2
usage
return 1
fi
fi
if [ "$database_to_clone_tables_to" == "blue" ] ; then
destination_database_name="${my_properties['blue_database_name']}"
destination_database_name="${my_properties['mysql_blue_database_name']}"
else
if [ "$database_to_clone_tables_to" == "green" ] ; then
destination_database_name="${my_properties['green_database_name']}"
destination_database_name="${my_properties['mysql_green_database_name']}"
else
echo "Error : database_to_clone_tables_to must be one of {blue, green}" >&2
usage
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env bash

# load dependencies
unset this_script_dir
this_script_dir="$(dirname "$(readlink -f $0)")"
if ! source "$this_script_dir/parse_property_file_functions.sh" ; then
echo "Error : unable to load dependency : $this_script_dir/parse_property_file_functions.sh" >&2
exit 1
fi
if ! source "$this_script_dir/sling_command_line_functions.sh" ; then
echo "Error : unable to load dependency : $this_script_dir/sling_command_line_functions.sh" >&2
exit 1
fi
unset this_script_dir

function usage() {
echo "usage: copy_mysql_database_tables_to_clickhouse.sh properties_file database"
}

# other non-local environment variables in use
unset my_properties
unset database_table_list
unset clickhouse_destination_database_name
unset mysql_source_database_name
declare -A my_properties
declare -a database_table_list
declare -A table_has_been_copied_and_verified
clickhouse_destination_database_name=""
mysql_source_database_name=""
database_table_list_filepath="$(pwd)/cmd_database_table_list.txt"

function initialize_main() {
if ! [ "$database_to_transfer" == "blue" ] && ! [ "$database_to_transfer" == "green" ] ; then
echo "Error : argument for database_to_transfer must be either 'blue' or 'green'" >&2
return 1
fi
if ! parse_property_file "$properties_filepath" my_properties ; then
usage
return 1
fi
if [ "$database_to_transfer" == "blue" ] ; then
clickhouse_destination_database_name="${my_properties['clickhouse_blue_database_name']}"
mysql_source_database_name="${my_properties['mysql_blue_database_name']}"
else
clickhouse_destination_database_name="${my_properties['clickhouse_green_database_name']}"
mysql_source_database_name="${my_properties['mysql_green_database_name']}"
fi
if ! initialize_sling_command_line_functions "$database_to_transfer" ; then
usage
return 1
fi
remove_credentials_from_properties my_properties
}

function destination_database_exists_and_is_empty() {
if ! clickhouse_database_exists "$clickhouse_destination_database_name" ; then
echo "Error : could not proceed with database copying because destination database does not exist: $clickhouse_destination_database_name" >&2
return 1
fi
if ! clickhouse_database_is_empty "$clickhouse_destination_database_name" ; then
echo "Error : could not proceed with database copying because destination database is not empty: $clickhouse_destination_database_name" >&2
return 2
fi
return 0
}

function set_database_table_list() {
local statement="SELECT table_name FROM INFORMATION_SCHEMA.tables WHERE table_type='BASE TABLE' AND table_schema='$mysql_source_database_name'"
rm -f "$database_table_list_filepath"
if ! execute_sql_statement_via_sling "$statement" "mysql" "$database_table_list_filepath" ; then
echo "Warning : failed to execute mysql statement : $statement" >&2
return 1
fi
unset sql_data_array
if ! set_sql_data_array_from_file "$database_table_list_filepath" 0 ; then
return 1
fi
database_table_list=(${sql_data_array[@]})
return 0
}

function shutdown_main_and_clean_up() {
#TODO restore
#shutdown_sling_command_line_functions
delete_output_stream_files
unset my_properties
unset database_table_list
unset table_has_been_copied_and_verified
unset database_table_list_filepath
unset record_count_comparison_filepath
}

function successful_copy_verified_flag_has_been_set() {
local table_name=$1
if [ "${table_has_been_copied_and_verified[$table_name]}" == "true" ] ; then
return 0
fi
return 1
}

function set_successful_copy_verified_flag() {
local table_name=$1
table_has_been_copied_and_verified[$table_name]="true"
}

function copy_all_database_tables_with_sling() {
local pos=0
local exit_status=0
while [ $pos -lt ${#database_table_list[@]} ] ; do
table_name="${database_table_list[$pos]}"
if successful_copy_verified_flag_has_been_set "$table_name" ; then
# table successfully copied on a previous pass
continue
fi
echo "attempting to copy data in table $table_name using sling"
if ! transfer_table_data_via_sling "$mysql_source_database_name" "$clickhouse_destination_database_name" "$table_name" "TODOdeletefile" ; then
echo "Warning : failure to copy table $table_name" >&2
exit_status=1 # any failed table copies cause an eventual failure status to be returned
else
if ! destination_table_matches_source_table "$table_name" ; then
echo "Warning : failure to verify copy of table $table_name" >&2
exit_status=1 # any failed table copies cause an eventual failure status to be returned
else
set_successful_copy_verified_flag "$table_name"
fi
fi
pos=$(($pos+1))
done
return $exit_status
}

function copy_all_database_tables_with_sling_allow_retry() {
local remaining_try_count=3
while [ $remaining_try_count -ne 0 ] ; do
#TODO record iteration start timestamp
if copy_all_database_tables_with_sling ; then
return 0
fi
#TODO pause for the minimum try duration (5 minutes?)
remaining_try_count=$((remaining_try_count-1))
done
return 1
}

function main() {
local properties_filepath=$1
local database_to_transfer=$2
local exit_status=0
if ! initialize_main "$properties_filepath" "$database_to_transfer" ||
! destination_database_exists_and_is_empty ||
! set_database_table_list ||
! copy_all_database_tables_with_sling_allow_retry ; then
exit_status=1
fi
shutdown_main_and_clean_up
return $exit_status
}

main "$1" "$2"

exit 0
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,18 @@
mysql_server_username=
mysql_server_password=
mysql_server_host_name=
mysql_server_port=
mysql_server_additional_args=
disk_capacity_mysql_data_filesystem_megabytes=
disk_usage_invisible_to_mysql_user_megabytes=
disk_consumption_anticipated_during_import_megabytes=
blue_database_name=
green_database_name=
shelved_database_name=
mysql_server_disk_capacity_megabytes=
mysql_server_disk_usage_invisible_to_user_megabytes=
mysql_server_disk_consumption_anticipated_during_import_megabytes=
mysql_blue_database_name=
mysql_green_database_name=
mysql_shelved_database_name=
clickhouse_server_username=
clickhouse_server_password=
clickhouse_server_host_name=
clickhouse_server_port=
clickhouse_server_additional_args=
clickhouse_blue_database_name=
clickhouse_green_database_name=
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function set_sql_data_field_value_from_record() {
while [ $pos -lt $record_string_length ] ; do
local character_at_position="${record_string:$pos:1}"
# a newline should occur at the end of the read line, and only there. Embedded newlines are encoded with '\n'
if [ "$character_at_position" == "$NL" ] ; then
if [ "$character_at_position" == "$LF" ] ; then
field_index=$((field_index+1))
if [ "$field_index" -gt "$column_number" ] ; then
# field has been completely parsed
Expand Down
12 changes: 12 additions & 0 deletions scripts/clickhouse_import_support/parse_property_file_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,15 @@ function parse_property_file() {
done < $property_file_path
return 0
}

function remove_credentials_from_properties() {
associative_array_name=$1 # array names must be proper identifiers (no spaces)
if ! variable_name_refers_to_an_associative_array $associative_array_name ; then
echo "error: variable name '$associative_array_name' was passed to function parse_property_file() but was not available in the environment, or did not refer to a created associative array." >&2
return 1
fi
for key_name in "mysql_server_username" "mysql_server_password" "clickhouse_server_username" "clickhouse_server_password" ; do
unset_command="unset $associative_array_name['$key_name']"
eval $unset_command
done
}
Loading

0 comments on commit 901afad

Please sign in to comment.