From cacb480576362573dc0ec66b6a6e45bd1f3f7662 Mon Sep 17 00:00:00 2001 From: Nitin Garg Date: Tue, 17 Sep 2024 05:53:36 +0000 Subject: [PATCH 1/4] Move code to access bucket iam role to backend --- .../testing_on_gke/examples/dlio/run_tests.py | 12 +++- .../testing_on_gke/examples/fio/run_tests.py | 14 ++++- .../testing_on_gke/examples/run-gke-tests.sh | 14 +---- .../examples/utils/run_tests_common.py | 58 +++++++++++++++++++ 4 files changed, 83 insertions(+), 15 deletions(-) diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py index 2fc859782f..7be36bcdb5 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/run_tests.py @@ -30,7 +30,7 @@ # local imports from other directories sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'utils')) -from run_tests_common import escape_commas_in_string, parse_args, run_command +from run_tests_common import escape_commas_in_string, parse_args, run_command, add_iam_role_for_buckets # local imports from same directory import dlio_workload @@ -79,6 +79,16 @@ def main(args) -> None: args.instance_id, args.machine_type, ) + buckets = [dlioWorkload.bucket for dlioWorkload in dlioWorkloads] + role = 'roles/storage.objectUser' + add_iam_role_for_buckets( + buckets, + role, + args.project_id, + args.project_number, + args.namespace, + args.ksa, + ) for helmInstallCommand in helmInstallCommands: print(f'{helmInstallCommand}') if not args.dry_run: diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py b/perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py index 0d946d80ab..599852ae78 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/run_tests.py @@ -29,7 +29,7 @@ # local imports from other directories sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'utils')) -from run_tests_common import escape_commas_in_string, parse_args, run_command +from run_tests_common import escape_commas_in_string, parse_args, run_command, add_iam_role_for_buckets # local imports from same directory import fio_workload @@ -80,6 +80,16 @@ def main(args) -> None: args.instance_id, args.machine_type, ) + buckets = (fioWorkload.bucket for fioWorkload in fioWorkloads) + role = 'roles/storage.objectUser' + add_iam_role_for_buckets( + buckets, + role, + args.project_id, + args.project_number, + args.namespace, + args.ksa, + ) for helmInstallCommand in helmInstallCommands: print(f'{helmInstallCommand}') if not args.dry_run: @@ -88,4 +98,4 @@ def main(args) -> None: if __name__ == '__main__': args = parse_args() -main(args) + main(args) diff --git a/perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh b/perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh index fde7f4e3f6..29157f1e02 100755 --- a/perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh +++ b/perfmetrics/scripts/testing_on_gke/examples/run-gke-tests.sh @@ -433,15 +433,6 @@ function createKubernetesServiceAccountForCluster() { kubectl config view --minify | grep namespace: } -function addGCSAccessPermissions() { - test -f "${workload_config}" - grep -wh '\"bucket\"' "${workload_config}" | cut -d: -f2 | cut -d, -f1 | cut -d \" -f2 | sort | uniq | grep -v ' ' | while read workload_bucket; do - gcloud storage buckets add-iam-policy-binding gs://${workload_bucket} \ - --member "principal://iam.googleapis.com/projects/${project_number}/locations/global/workloadIdentityPools/${project_id}.svc.id.goog/subject/ns/${appnamespace}/sa/${ksa}" \ - --role "roles/storage.objectUser" - done -} - function ensureGcsfuseCode() { echo "Ensuring we have gcsfuse code ..." # clone gcsfuse code if needed @@ -519,12 +510,12 @@ function deleteAllPods() { function deployAllFioHelmCharts() { echo "Deploying all fio helm charts ..." - cd "${gke_testing_dir}"/examples/fio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --machine-type="${machine_type}" && cd - + cd "${gke_testing_dir}"/examples/fio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --machine-type="${machine_type}" --project-id=${project_id} --project-number=${project_number} --namespace=${appnamespace} --ksa=${ksa} && cd - } function deployAllDlioHelmCharts() { echo "Deploying all dlio helm charts ..." - cd "${gke_testing_dir}"/examples/dlio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --machine-type="${machine_type}" && cd - + cd "${gke_testing_dir}"/examples/dlio && python3 ./run_tests.py --workload-config "${workload_config}" --instance-id ${instance_id} --machine-type="${machine_type}" --project-id=${project_id} --project-number=${project_number} --namespace=${appnamespace} --ksa=${ksa} && cd - } function listAllHelmCharts() { @@ -620,7 +611,6 @@ createKubernetesServiceAccountForCluster ensureGcsfuseCode # GCP/GKE configuration dependent on GCSFuse/CSI driver source code -addGCSAccessPermissions createCustomCsiDriverIfNeeded # Run latest workload configuration diff --git a/perfmetrics/scripts/testing_on_gke/examples/utils/run_tests_common.py b/perfmetrics/scripts/testing_on_gke/examples/utils/run_tests_common.py index 8317ec587e..322c9c6b45 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/utils/run_tests_common.py +++ b/perfmetrics/scripts/testing_on_gke/examples/utils/run_tests_common.py @@ -70,6 +70,33 @@ def parse_args(): help='Machine-type of the GCE VM or GKE cluster node e.g. n2-standard-32', required=True, ) + parser.add_argument( + '--project-id', + metavar='project-id of the user gke cluster', + help='project-id of the user gke cluster e.g. gcs-fuse-test', + required=True, + ) + parser.add_argument( + '--project-number', + metavar='project-number of the user gke cluster', + help='project-number of the user gke cluster e.g. 927584127901', + required=True, + type=int, + ) + parser.add_argument( + '--namespace', + metavar='kubectl namespace of the user', + help='kubectl namespace of the user e.g. default', + required=False, + default='default', + ) + parser.add_argument( + '--ksa', + metavar='kubernetes service account of the user', + help='kubernetest service account of the user e.g. default', + required=False, + default='default', + ) parser.add_argument( '-n', '--dry-run', @@ -84,6 +111,9 @@ def parse_args(): for argument in [ 'instance_id', 'machine_type', + 'project_id', + 'namespace', + 'ksa', ]: value = getattr(args, argument) if not value.strip(): @@ -98,3 +128,31 @@ def parse_args(): ) return args + + +def add_iam_role_for_buckets( + buckets: set, + role: str, + project_id: str, + project_number: str, + namespace: str, + ksa: str, +): + print( + f'Adding role {role} to all the relevant buckets to' + f' ksa={ksa} in namespace={namespace} ...\n\n' + ) + for bucket in buckets: + command = ( + f'gcloud storage buckets add-iam-policy-binding gs://{bucket} --member' + f' principal://iam.googleapis.com/projects/{project_number}/locations/global/workloadIdentityPools/{project_id}.svc.id.goog/subject/ns/{namespace}/sa/{ksa} --role' + f' {role}' + ) + print(command) + ret = run_command(command) + if ret != 0: + raise Exception( + f'Failed to add role {role} for {bucket}: exit-code={ret}' + ) + + pass From 89f122c6e6a7d0feeec4fb68e2e761e7d75895df Mon Sep 17 00:00:00 2001 From: Nitin Garg Date: Wed, 25 Sep 2024 12:17:37 +0000 Subject: [PATCH 2/4] s/parseLogParserArguments/parse_arguments/g --- .../scripts/testing_on_gke/examples/dlio/parse_logs.py | 4 ++-- perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py | 4 ++-- .../testing_on_gke/examples/utils/parse_logs_common.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py index f8f4fc7a6a..7595f7c172 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py @@ -27,7 +27,7 @@ sys.path.append("../") import dlio_workload from utils.utils import get_memory, get_cpu, unix_to_timestamp, standard_timestamp, is_mash_installed, get_memory_from_monitoring_api, get_cpu_from_monitoring_api, timestamp_to_epoch -from utils.parse_logs_common import ensureDir, download_gcs_objects, parseLogParserArguments, SUPPORTED_SCENARIOS +from utils.parse_logs_common import ensureDir, download_gcs_objects, parse_arguments, SUPPORTED_SCENARIOS _LOCAL_LOGS_LOCATION = "../../bin/dlio-logs/logs" @@ -296,7 +296,7 @@ def writeRecordsToCsvOutputFile(output: dict, output_file_path: str): if __name__ == "__main__": - args = parseLogParserArguments() + args = parse_arguments() ensureDir(_LOCAL_LOGS_LOCATION) dlioWorkloads = dlio_workload.ParseTestConfigForDlioWorkloads( diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py index fc4ee31562..f597095147 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py @@ -27,7 +27,7 @@ sys.path.append("../") import fio_workload from utils.utils import get_memory, get_cpu, unix_to_timestamp, is_mash_installed, get_memory_from_monitoring_api, get_cpu_from_monitoring_api -from utils.parse_logs_common import ensureDir, download_gcs_objects, parseLogParserArguments, SUPPORTED_SCENARIOS +from utils.parse_logs_common import ensureDir, download_gcs_objects, parse_arguments, SUPPORTED_SCENARIOS _LOCAL_LOGS_LOCATION = "../../bin/fio-logs" @@ -323,7 +323,7 @@ def writeRecordsToCsvOutputFile(output: dict, output_file_path: str): if __name__ == "__main__": - args = parseLogParserArguments() + args = parse_arguments() ensureDir(_LOCAL_LOGS_LOCATION) fioWorkloads = fio_workload.ParseTestConfigForFioWorkloads( diff --git a/perfmetrics/scripts/testing_on_gke/examples/utils/parse_logs_common.py b/perfmetrics/scripts/testing_on_gke/examples/utils/parse_logs_common.py index 90a65e6a6a..e06f3c175e 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/utils/parse_logs_common.py +++ b/perfmetrics/scripts/testing_on_gke/examples/utils/parse_logs_common.py @@ -55,7 +55,7 @@ def download_gcs_objects(src: str, dst: str) -> Tuple[int, str]: return result.returncode, "" -def parseLogParserArguments() -> object: +def parse_arguments() -> object: parser = argparse.ArgumentParser( prog="DLIO Unet3d test output parser", description=( From 91b686ee0ba4a1a855b703ec6391bfb388e3d77e Mon Sep 17 00:00:00 2001 From: Nitin Garg Date: Wed, 25 Sep 2024 12:24:41 +0000 Subject: [PATCH 3/4] s/ensureDir/ensure_directory_exists/g --- .../scripts/testing_on_gke/examples/dlio/parse_logs.py | 6 +++--- .../scripts/testing_on_gke/examples/fio/parse_logs.py | 8 ++++---- .../testing_on_gke/examples/utils/parse_logs_common.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py index 7595f7c172..8f43dbedf3 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py +++ b/perfmetrics/scripts/testing_on_gke/examples/dlio/parse_logs.py @@ -27,7 +27,7 @@ sys.path.append("../") import dlio_workload from utils.utils import get_memory, get_cpu, unix_to_timestamp, standard_timestamp, is_mash_installed, get_memory_from_monitoring_api, get_cpu_from_monitoring_api, timestamp_to_epoch -from utils.parse_logs_common import ensureDir, download_gcs_objects, parse_arguments, SUPPORTED_SCENARIOS +from utils.parse_logs_common import ensure_directory_exists, download_gcs_objects, parse_arguments, SUPPORTED_SCENARIOS _LOCAL_LOGS_LOCATION = "../../bin/dlio-logs/logs" @@ -297,7 +297,7 @@ def writeRecordsToCsvOutputFile(output: dict, output_file_path: str): if __name__ == "__main__": args = parse_arguments() - ensureDir(_LOCAL_LOGS_LOCATION) + ensure_directory_exists(_LOCAL_LOGS_LOCATION) dlioWorkloads = dlio_workload.ParseTestConfigForDlioWorkloads( args.workload_config @@ -313,7 +313,7 @@ def writeRecordsToCsvOutputFile(output: dict, output_file_path: str): output_file_path = args.output_file # Create the parent directory of output_file_path if doesn't # exist already. - ensureDir(os.path.dirname(output_file_path)) + ensure_directory_exists(os.path.dirname(output_file_path)) writeRecordsToCsvOutputFile(output, output_file_path) print( "\n\nSuccessfully published outputs of DLIO test runs to" diff --git a/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py b/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py index f597095147..7d5b5f2074 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py +++ b/perfmetrics/scripts/testing_on_gke/examples/fio/parse_logs.py @@ -27,7 +27,7 @@ sys.path.append("../") import fio_workload from utils.utils import get_memory, get_cpu, unix_to_timestamp, is_mash_installed, get_memory_from_monitoring_api, get_cpu_from_monitoring_api -from utils.parse_logs_common import ensureDir, download_gcs_objects, parse_arguments, SUPPORTED_SCENARIOS +from utils.parse_logs_common import ensure_directory_exists, download_gcs_objects, parse_arguments, SUPPORTED_SCENARIOS _LOCAL_LOGS_LOCATION = "../../bin/fio-logs" @@ -73,7 +73,7 @@ def downloadFioOutputs(fioWorkloads: set, instanceId: str) -> int: dstDir = ( _LOCAL_LOGS_LOCATION + "/" + instanceId + "/" + fioWorkload.fileSize ) - ensureDir(dstDir) + ensure_directory_exists(dstDir) srcObjects = f"gs://{fioWorkload.bucket}/fio-output/{instanceId}/*" print(f"Downloading FIO outputs from {srcObjects} ...") @@ -324,7 +324,7 @@ def writeRecordsToCsvOutputFile(output: dict, output_file_path: str): if __name__ == "__main__": args = parse_arguments() - ensureDir(_LOCAL_LOGS_LOCATION) + ensure_directory_exists(_LOCAL_LOGS_LOCATION) fioWorkloads = fio_workload.ParseTestConfigForFioWorkloads( args.workload_config @@ -340,7 +340,7 @@ def writeRecordsToCsvOutputFile(output: dict, output_file_path: str): output_file_path = args.output_file # Create the parent directory of output_file_path if doesn't # exist already. - ensureDir(os.path.dirname(output_file_path)) + ensure_directory_exists(os.path.dirname(output_file_path)) writeRecordsToCsvOutputFile(output, output_file_path) print( "\n\nSuccessfully published outputs of FIO test runs to" diff --git a/perfmetrics/scripts/testing_on_gke/examples/utils/parse_logs_common.py b/perfmetrics/scripts/testing_on_gke/examples/utils/parse_logs_common.py index e06f3c175e..936efc9da7 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/utils/parse_logs_common.py +++ b/perfmetrics/scripts/testing_on_gke/examples/utils/parse_logs_common.py @@ -28,7 +28,7 @@ ] -def ensureDir(dirpath: str): +def ensure_directory_exists(dirpath: str): try: os.makedirs(dirpath) except FileExistsError: From 967c68d6fe2cf858e646b54d866d67c779665f1d Mon Sep 17 00:00:00 2001 From: Nitin Garg Date: Thu, 3 Oct 2024 00:13:01 +0000 Subject: [PATCH 4/4] address review comments --- .../scripts/testing_on_gke/examples/utils/run_tests_common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/perfmetrics/scripts/testing_on_gke/examples/utils/run_tests_common.py b/perfmetrics/scripts/testing_on_gke/examples/utils/run_tests_common.py index 322c9c6b45..f207bb1ca2 100644 --- a/perfmetrics/scripts/testing_on_gke/examples/utils/run_tests_common.py +++ b/perfmetrics/scripts/testing_on_gke/examples/utils/run_tests_common.py @@ -93,7 +93,7 @@ def parse_args(): parser.add_argument( '--ksa', metavar='kubernetes service account of the user', - help='kubernetest service account of the user e.g. default', + help='kubernetes service account of the user e.g. default', required=False, default='default', ) @@ -154,5 +154,3 @@ def add_iam_role_for_buckets( raise Exception( f'Failed to add role {role} for {bucket}: exit-code={ret}' ) - - pass