diff --git a/perfmetrics/scripts/ls_metrics/listing_benchmark.py b/perfmetrics/scripts/ls_metrics/listing_benchmark.py index 50d90706c0..08f5fe2db9 100644 --- a/perfmetrics/scripts/ls_metrics/listing_benchmark.py +++ b/perfmetrics/scripts/ls_metrics/listing_benchmark.py @@ -46,7 +46,7 @@ Note: This python script is dependent on generate_files.py. Note: This script currently skips folder with 1000000 files to facilitate periodic kokoro tests -without timeout .To run that test case, comment out lines [124-126],[180-182],[259-261],[317-319],[377-379] +without timeout .To run that test case, run script with --run_1m_test flag. """ import argparse @@ -78,6 +78,7 @@ handlers=[logging.StreamHandler(sys.stdout)], ) log = logging.getLogger() +RUN_1M_TEST=False WORKSHEET_NAME_GCS = 'ls_metrics_gcsfuse' WORKSHEET_NAME_PD = 'ls_metrics_persistent_disk' @@ -121,7 +122,7 @@ def _get_values_to_export(folders, metrics, command) -> list: list_metrics_data = [] for testing_folder in folders: - if testing_folder.name == "1KB_1000000files_0subdir": + if not RUN_1M_TEST and testing_folder.name == "1KB_1000000files_0subdir": # Excluding test case with 1m files from HNS in daily periodic tests. continue num_files, num_folders = _count_number_of_files_and_folders( @@ -177,7 +178,7 @@ def _parse_results(folders, results_list, message, num_samples) -> dict: metrics = dict() for testing_folder in folders: - if testing_folder.name == "1KB_1000000files_0subdir": + if not RUN_1M_TEST and testing_folder.name == "1KB_1000000files_0subdir": # Excluding test case with 1m files from HNS in daily periodic tests. continue metrics[testing_folder.name] = dict() @@ -256,7 +257,7 @@ def _perform_testing( persistent_disk_results = {} for testing_folder in folders: - if testing_folder.name == "1KB_1000000files_0subdir": + if not RUN_1M_TEST and testing_folder.name == "1KB_1000000files_0subdir": # Excluding test case with 1m files from HNS in daily periodic tests. continue @@ -314,7 +315,7 @@ def _create_directory_structure( result = 0 for folder in directory_structure.folders: - if folder.name == "1KB_1000000files_0subdir": + if not RUN_1M_TEST and folder.name == "1KB_1000000files_0subdir": # Excluding test case with 1m files from HNS in daily periodic tests. continue result += _create_directory_structure(gcs_bucket_url + folder.name + '/', @@ -374,7 +375,7 @@ def _compare_directory_structure(url, directory_structure) -> bool: result = True for folder in directory_structure.folders: - if folder.name == "1KB_1000000files_0subdir": + if not RUN_1M_TEST and folder.name == "1KB_1000000files_0subdir": # Excluding test case with 1m files from HNS in daily periodic tests. continue new_url = url + folder.name + '/' @@ -475,6 +476,14 @@ def _parse_arguments(argv): action='store', required=False, ) + + parser.add_argument( + '--run_1m_test', + help='Perform listing benchmark on 1m files directory? [True/False]', + action='store_true', + default=False, + required=False, + ) # Ignoring the first parameter, as it is the path of this python # script itself. return parser.parse_args(argv[1:]) @@ -571,6 +580,7 @@ def _export_to_bigquery(test_type, config_id, start_time_build, ls_data): gcs_bucket = mount_gcs_bucket(directory_structure.name, args.gcsfuse_flags[0],log) + RUN_1M_TEST=args.run_1m_test gcs_bucket_results, persistent_disk_results = _perform_testing( directory_structure.folders, gcs_bucket, persistent_disk, int(args.num_samples[0]), args.command[0]) diff --git a/perfmetrics/scripts/ls_metrics/listing_benchmark_test.py b/perfmetrics/scripts/ls_metrics/listing_benchmark_test.py index c163557b3f..f4916f4e16 100644 --- a/perfmetrics/scripts/ls_metrics/listing_benchmark_test.py +++ b/perfmetrics/scripts/ls_metrics/listing_benchmark_test.py @@ -128,6 +128,28 @@ ] } +DIRECTORY_STRUCTURE4 = { + 'name': 'fake_bucket', + 'num_folders': 2, + 'num_files': 1, + 'file_size': '1kb', + 'file_name_prefix': 'file', + 'folders': [ + { + 'name': '2KB_3files_0subdir', + 'num_files': 3, + 'file_name_prefix': 'file', + 'file_size': '2kb' + }, + { + 'name': '1KB_1000000files_0subdir', + 'num_files': 1000000, + 'file_size': '1kb', + 'file_name_prefix': 'file' + } + ] +} + # List of latencies (msec) of list operation to test _parse_results method. METRICS1 = [1.234, 0.995, 0.121, 0.222, 0.01709] METRICS2 = [90.45, 1.95, 0.334, 7.090, 0.001] @@ -206,6 +228,8 @@ DIRECTORY_STRUCTURE2, directory_proto.Directory()) DIRECTORY_STRUCTURE3 = ParseDict( DIRECTORY_STRUCTURE3, directory_proto.Directory()) +DIRECTORY_STRUCTURE4 = ParseDict( + DIRECTORY_STRUCTURE4, directory_proto.Directory()) WORKSHEET_NAME = 'ls_metrics_gcsfuse' @@ -573,6 +597,41 @@ def test_compare_directory_structure_false_file_folder_multi_level_dir( 'fake_bucket/', DIRECTORY_STRUCTURE3) self.assertFalse(result) + @patch('listing_benchmark.RUN_1M_TEST',False) + @patch('listing_benchmark._record_time_of_operation', return_value=[1]) + def test_skip_1m_test_false(self, mock_record_time_of_operation): + mock_record_time_of_operation.return_value = [1, 1] + expected_calls=[call('ls -R','./fake_disk/2KB_3files_0subdir/',2), + call('ls -R','./fake_bucket/2KB_3files_0subdir/',2)] + + gcs_bucket_results, persistent_disk_results = listing_benchmark._perform_testing( + DIRECTORY_STRUCTURE4.folders, 'fake_bucket', 'fake_disk', 2, 'ls -R') + + self.assertEqual(gcs_bucket_results, persistent_disk_results) + mock_record_time_of_operation.assert_has_calls(expected_calls) + self.assertEqual(gcs_bucket_results, { + '2KB_3files_0subdir': [1, 1], + }) + + @patch('listing_benchmark.RUN_1M_TEST',True) + @patch('listing_benchmark._record_time_of_operation', return_value=[1]) + def test_skip_1m_test_true(self, mock_record_time_of_operation): + mock_record_time_of_operation.return_value = [1, 1] + expected_calls=[call('ls -R','./fake_disk/2KB_3files_0subdir/',2), + call('ls -R','./fake_bucket/2KB_3files_0subdir/',2), + call('ls -R','./fake_disk/1KB_1000000files_0subdir/',2), + call('ls -R','./fake_bucket/1KB_1000000files_0subdir/',2),] + + gcs_bucket_results, persistent_disk_results = listing_benchmark._perform_testing( + DIRECTORY_STRUCTURE4.folders, 'fake_bucket', 'fake_disk', 2, 'ls -R') + + self.assertEqual(gcs_bucket_results, persistent_disk_results) + mock_record_time_of_operation.assert_has_calls(expected_calls) + self.assertEqual(gcs_bucket_results, { + '2KB_3files_0subdir': [1, 1], + '1KB_1000000files_0subdir' : [1,1] + }) + if __name__ == '__main__': unittest.main()