From 8288d454393c5309ff19d8148f2f401eb39d1ff0 Mon Sep 17 00:00:00 2001 From: anushka <78717608+anushka567@users.noreply.github.com> Date: Fri, 23 Aug 2024 13:15:23 +0530 Subject: [PATCH] Get rename metrics for non nested scenario for hns bucket (#2339) * added check directory functions and its unit tests * adding function to parse config file and generate dir in bucket * exit_code set to 1 * correct format * refactors * test formatting * function to avoid code repitition * added check directory functions and its unit tests * moving mount functions to utils * compute metrics from time of operation * uploading metrics to gsheet * nits * testing non nested scenario for gcs bucket * unit tests * using single config file flag along with bucket type * correcting header checks * refactor to avoid code repitition --- .github/header-checker-lint.yml | 1 - .../config-flat.json | 93 +++++++++++++++ .../{config.json => config-hns.json} | 2 +- .../renaming_benchmark.py | 86 ++++++++------ .../renaming_benchmark_test.py | 112 +++++++++++++++++- 5 files changed, 255 insertions(+), 39 deletions(-) create mode 100644 perfmetrics/scripts/hns_rename_folders_metrics/config-flat.json rename perfmetrics/scripts/hns_rename_folders_metrics/{config.json => config-hns.json} (98%) diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml index 2b85216072..c45e076386 100644 --- a/.github/header-checker-lint.yml +++ b/.github/header-checker-lint.yml @@ -23,7 +23,6 @@ allowedLicenses: sourceFileExtensions: - 'go' - 'Makefile' - - 'json' - 'yml' - 'txt' - 'py' diff --git a/perfmetrics/scripts/hns_rename_folders_metrics/config-flat.json b/perfmetrics/scripts/hns_rename_folders_metrics/config-flat.json new file mode 100644 index 0000000000..e5bf607a77 --- /dev/null +++ b/perfmetrics/scripts/hns_rename_folders_metrics/config-flat.json @@ -0,0 +1,93 @@ +{ + "name": "hns-rename-benchmark-flat" , + "folders" : { + "num_folders": 3, + "folder_structure" : [ + { + "name": "1k_files_rename_test_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "5k_files_rename_test_0" , + "num_files": 5000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "10k_files_rename_test_0" , + "num_files": 10000 , + "file_name_prefix": "file" , + "file_size": "1kb" + } + ] + }, + "nested_folders": { + "folder_name": "nested_folder_rename_test", + "num_folders": 10, + "folder_structure" : [ + { + "name": "nested_folder_1_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_2_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_3_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_4_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_5_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_6_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_7_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_8_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_9_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + }, + { + "name": "nested_folder_10_0" , + "num_files": 1000 , + "file_name_prefix": "file" , + "file_size": "1kb" + } + ] + + } +} diff --git a/perfmetrics/scripts/hns_rename_folders_metrics/config.json b/perfmetrics/scripts/hns_rename_folders_metrics/config-hns.json similarity index 98% rename from perfmetrics/scripts/hns_rename_folders_metrics/config.json rename to perfmetrics/scripts/hns_rename_folders_metrics/config-hns.json index ad0512cce1..f0c3705e85 100644 --- a/perfmetrics/scripts/hns_rename_folders_metrics/config.json +++ b/perfmetrics/scripts/hns_rename_folders_metrics/config-hns.json @@ -1,5 +1,5 @@ { - "name": "hns-rename-folders-test" , + "name": "hns-rename-benchmark-hns" , "folders" : { "num_folders": 3, "folder_structure" : [ diff --git a/perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark.py b/perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark.py index 749bcb2726..523c9cdb19 100644 --- a/perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark.py +++ b/perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark.py @@ -13,7 +13,8 @@ # limitations under the License. # To run the script,run in terminal: -# python3 renaming_benchmark.py [--upload_gs] [--num_samples NUM_SAMPLES] +# python3 renaming_benchmark.py config.json bucket_type [--upload_gs] \ +# [--num_samples NUM_SAMPLES] # where dir-config.json file contains the directory structure details for the test. import os @@ -33,6 +34,7 @@ from gsheet import gsheet WORKSHEET_NAME_FLAT = 'rename_metrics_flat' +WORKSHEET_NAME_HNS = 'rename_metrics_hns' SPREADSHEET_ID = '1UVEvsf49eaDJdTGLQU1rlNTIAxg8PZoNQCy_GX6Nw-A' logging.basicConfig( @@ -239,7 +241,7 @@ def _record_time_of_operation(mount_point, dir, num_samples): return results -def _perform_testing(dir, test_type, num_samples, results): +def _perform_testing(dir, test_type, num_samples): """ This function performs rename operations and records time of operation . Args: @@ -272,31 +274,39 @@ def _perform_testing(dir, test_type, num_samples, results): } test_type : flat or hns. num_samples: Number of samples to collect for each test. - results: Dictionary to store the results corresponding to each test type """ if test_type == "hns": - # TODO add mount function for test type hns - return + # Creating config file for mounting with hns enabled. + with open("/tmp/config.yml",'w') as mount_config: + mount_config.write("enable-hns: true") + mount_flags="--config-file=/tmp/config.yml" + else : + mount_flags = "--implicit-dirs --rename-dir-limit=1000000" # Mounting the gcs bucket. - flat_mount_flags = "--implicit-dirs --rename-dir-limit=1000000" - flat_bucket_name = mount_gcs_bucket(dir["name"], flat_mount_flags, log) - + bucket_name = mount_gcs_bucket(dir["name"], mount_flags, log) # Record time of operation and populate the results dict. - flat_results = _record_time_of_operation(flat_bucket_name, dir, num_samples) - results["flat"] = flat_results - + results = _record_time_of_operation(bucket_name, dir, num_samples) + # Unmounting the bucket. unmount_gcs_bucket(dir["name"], log) + return results + def _parse_arguments(argv): argv = sys.argv parser = argparse.ArgumentParser() parser.add_argument( - 'dir_config_file', - help='Provide path of the config file.', - action='store' + 'config_file', + help='Provide path of the config file for GCS bucket.', + action='store', + ) + parser.add_argument( + 'bucket_type', + help='Provide bucket type - hns or flat ', + action='store', + choices=['hns','flat'] ) parser.add_argument( '--upload_gs', @@ -317,17 +327,8 @@ def _parse_arguments(argv): return parser.parse_args(argv[1:]) -if __name__ == '__main__': - argv = sys.argv - if len(argv) < 2: - raise TypeError('Incorrect number of arguments.\n' - 'Usage: ' - 'python3 renaming_benchmark.py [--upload_gs] [--num_samples NUM_SAMPLES] config_file ') - - args = _parse_arguments(argv) - check_dependencies(['gcloud', 'gcsfuse'], log) - - with open(os.path.abspath(args.dir_config_file)) as file: +def _run_rename_benchmark(test_type,dir_config,num_samples,upload_gs): + with open(os.path.abspath(dir_config)) as file: dir_str = json.load(file) exit_code = _check_for_config_file_inconsistency(dir_str) @@ -339,20 +340,37 @@ def _parse_arguments(argv): dir_structure_present = _check_if_dir_structure_exists(dir_str) if not dir_structure_present: log.error("Test data does not exist.To create test data, run : \ - python3 generate_folders_and_files.py ") + python3 generate_folders_and_files.py {} ".format(dir_config)) sys.exit(1) - results = dict() # Dict object to store the results corresonding to the test types. - _perform_testing(dir_str, "flat", args.num_samples, results) - flat_parsed_metrics = _parse_results(dir_str, results['flat'], args.num_samples) - upload_values_flat = _get_values_to_export(dir_str, flat_parsed_metrics, - "flat") + results=_perform_testing(dir_str, test_type, num_samples) + parsed_metrics = _parse_results(dir_str, results, num_samples) + upload_values = _get_values_to_export(dir_str, parsed_metrics, + test_type) - if args.upload_gs: + if upload_gs: log.info('Uploading files to the Google Sheet\n') - exit_code = _upload_to_gsheet(WORKSHEET_NAME_FLAT, upload_values_flat, + if test_type == "flat": + worksheet= WORKSHEET_NAME_FLAT + else: + worksheet= WORKSHEET_NAME_HNS + + exit_code = _upload_to_gsheet(worksheet, upload_values, SPREADSHEET_ID) if exit_code != 0: log.error("Upload to gsheet failed!") else: - print(upload_values_flat) + print(upload_values) + + +if __name__ == '__main__': + argv = sys.argv + if len(argv) < 3: + raise TypeError('Incorrect number of arguments.\n' + 'Usage: ' + 'python3 renaming_benchmark.py [--upload_gs] [--num_samples NUM_SAMPLES] config_file bucket_type') + + args = _parse_arguments(argv) + check_dependencies(['gcloud', 'gcsfuse'], log) + _run_rename_benchmark(args.bucket_type, args.config_file, args.num_samples, + args.upload_gs) diff --git a/perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark_test.py b/perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark_test.py index b1103f6d53..2abee74f83 100644 --- a/perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark_test.py +++ b/perfmetrics/scripts/hns_rename_folders_metrics/renaming_benchmark_test.py @@ -13,7 +13,7 @@ # limitations under the License. import unittest import renaming_benchmark -from mock import patch, call +from mock import patch, call, mock_open class TestRenamingBenchmark(unittest.TestCase): @@ -98,9 +98,44 @@ def test_perform_testing_flat(self, mock_log, mock_record_time_of_operation, mount_flags = "--implicit-dirs --rename-dir-limit=1000000" mock_mount_gcs_bucket.return_value="flat_bucket" mock_record_time_of_operation.return_value = {"test_folder": [0.1, 0.2, 0.3, 0.4]} - expected_results = {"flat": {"test_folder": [0.1, 0.2, 0.3, 0.4]}} + expected_results = {"test_folder": [0.1, 0.2, 0.3, 0.4]} - renaming_benchmark._perform_testing(dir, test_type, num_samples, results) + results= renaming_benchmark._perform_testing(dir, test_type, num_samples) + + self.assertEqual(results, expected_results) + # Verify calls to other functions. + mock_mount_gcs_bucket.assert_called_once_with(dir["name"], mount_flags, mock_log) + mock_record_time_of_operation.assert_called_once_with(mock_mount_gcs_bucket.return_value, dir, num_samples) + mock_unmount_gcs_bucket.assert_called_once_with(dir["name"], mock_log) + mock_log.error.assert_not_called() # No errors should be logged + + @patch('renaming_benchmark.unmount_gcs_bucket') + @patch('renaming_benchmark.mount_gcs_bucket') + @patch('renaming_benchmark._record_time_of_operation') + @patch('renaming_benchmark.log') + def test_perform_testing_hns(self, mock_log, mock_record_time_of_operation, + mock_mount_gcs_bucket, mock_unmount_gcs_bucket): + dir = { + "name":"hns_bucket", + "folders":{ + "num_folders":1, + "folder_structure":{ + 'name': "test_folder", + "num_files": 1, + "file_name_prefix": "file", + "file_size": "1kb" + } + } + } + test_type = "hns" + num_samples = 4 + results = {} + mount_flags = "--config-file=/tmp/config.yml" + mock_mount_gcs_bucket.return_value="hns_bucket" + mock_record_time_of_operation.return_value = {"test_folder": [0.1, 0.2, 0.3, 0.4]} + expected_results = {"test_folder": [0.1, 0.2, 0.3, 0.4]} + + results= renaming_benchmark._perform_testing(dir, test_type, num_samples) self.assertEqual(results, expected_results) # Verify calls to other functions. @@ -177,6 +212,77 @@ def test_upload_to_gsheet_no_spreadsheet_id_passed(self,mock_log,mock_os): self.assertEqual(exit_code,1) mock_log.error.assert_called_once_with('Empty spreadsheet id passed!') + @patch('builtins.open', new_callable=mock_open) + @patch('renaming_benchmark.log') + @patch('renaming_benchmark._check_for_config_file_inconsistency') + @patch('renaming_benchmark.json.load') + def test_run_rename_benchmark_error_config_inconsistency(self,mock_json,mock_inconsistency,mock_log,mock_open): + test_type="flat" + dir_config="test-config.json" + num_samples=10 + results=dict() + upload_gs=True + mock_inconsistency.return_value=1 + mock_json.return_value={} + + with self.assertRaises(SystemExit): + renaming_benchmark._run_rename_benchmark(test_type,dir_config,num_samples,upload_gs) + + mock_log.error.assert_called_once_with('Exited with code 1') + + @patch('builtins.open', new_callable=mock_open) + @patch('renaming_benchmark.log') + @patch('renaming_benchmark._check_for_config_file_inconsistency') + @patch('renaming_benchmark._check_if_dir_structure_exists') + @patch('renaming_benchmark.json.load') + def test_run_rename_benchmark_error_dir_does_not_exist(self,mock_json,mock_check_dir_exists,mock_inconsistency,mock_log,mock_open): + test_type="flat" + dir_config="test-config.json" + num_samples=10 + results=dict() + upload_gs=True + mock_inconsistency.return_value=0 + mock_check_dir_exists.return_value=False + mock_json.return_value={} + + with self.assertRaises(SystemExit) : + renaming_benchmark._run_rename_benchmark(test_type,dir_config,num_samples,upload_gs) + + mock_log.error.assert_called_once_with("Test data does not exist.To create test data, run : \ + python3 generate_folders_and_files.py {} ".format(dir_config)) + + @patch('renaming_benchmark.SPREADSHEET_ID','temp-gsheet-id') + @patch('renaming_benchmark.WORKSHEET_NAME_FLAT','flat-sheet') + @patch('builtins.open', new_callable=mock_open) + @patch('renaming_benchmark.log') + @patch('renaming_benchmark._check_for_config_file_inconsistency') + @patch('renaming_benchmark._check_if_dir_structure_exists') + @patch('renaming_benchmark._perform_testing') + @patch('renaming_benchmark._parse_results') + @patch('renaming_benchmark._get_values_to_export') + @patch('renaming_benchmark._upload_to_gsheet') + @patch('renaming_benchmark.json.load') + def test_run_rename_benchmark_upload_true(self,mock_json,mock_upload,mock_get_values,mock_parse_results,mock_perform_testing,mock_check_dir_exists,mock_inconsistency,mock_log,mock_open): + test_type="flat" + dir_config="test-config.json" + num_samples=10 + results={'flat':''} + upload_gs=True + worksheet= 'flat-sheet' + spreadsheet_id='temp-gsheet-id' + mock_inconsistency.return_value=0 + mock_check_dir_exists.return_value=True + mock_parse_results.return_value={'key':'val'} + mock_get_values.return_value=[['testdata','testdata2']] + mock_upload.return_value=0 + mock_json.return_value={} + + + renaming_benchmark._run_rename_benchmark(test_type,dir_config,num_samples,upload_gs) + + mock_log.info.assert_called_with('Uploading files to the Google Sheet\n') + mock_upload.assert_called_with(worksheet,[['testdata','testdata2']],spreadsheet_id) + if __name__ == '__main__': unittest.main()