Skip to content

Commit

Permalink
adding flag to make 1m files testcase run optional (#2439)
Browse files Browse the repository at this point in the history
  • Loading branch information
anushka567 authored Sep 5, 2024
1 parent 0f73917 commit 913d5a6
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 6 deletions.
22 changes: 16 additions & 6 deletions perfmetrics/scripts/ls_metrics/listing_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
Note: This python script is dependent on generate_files.py.
Note: This script currently skips folder with 1000000 files to facilitate periodic kokoro tests
without timeout .To run that test case, comment out lines [124-126],[180-182],[259-261],[317-319],[377-379]
without timeout .To run that test case, run script with --run_1m_test flag.
"""

import argparse
Expand Down Expand Up @@ -78,6 +78,7 @@
handlers=[logging.StreamHandler(sys.stdout)],
)
log = logging.getLogger()
RUN_1M_TEST=False

WORKSHEET_NAME_GCS = 'ls_metrics_gcsfuse'
WORKSHEET_NAME_PD = 'ls_metrics_persistent_disk'
Expand Down Expand Up @@ -121,7 +122,7 @@ def _get_values_to_export(folders, metrics, command) -> list:

list_metrics_data = []
for testing_folder in folders:
if testing_folder.name == "1KB_1000000files_0subdir":
if not RUN_1M_TEST and testing_folder.name == "1KB_1000000files_0subdir":
# Excluding test case with 1m files from HNS in daily periodic tests.
continue
num_files, num_folders = _count_number_of_files_and_folders(
Expand Down Expand Up @@ -177,7 +178,7 @@ def _parse_results(folders, results_list, message, num_samples) -> dict:
metrics = dict()

for testing_folder in folders:
if testing_folder.name == "1KB_1000000files_0subdir":
if not RUN_1M_TEST and testing_folder.name == "1KB_1000000files_0subdir":
# Excluding test case with 1m files from HNS in daily periodic tests.
continue
metrics[testing_folder.name] = dict()
Expand Down Expand Up @@ -256,7 +257,7 @@ def _perform_testing(
persistent_disk_results = {}

for testing_folder in folders:
if testing_folder.name == "1KB_1000000files_0subdir":
if not RUN_1M_TEST and testing_folder.name == "1KB_1000000files_0subdir":
# Excluding test case with 1m files from HNS in daily periodic tests.
continue

Expand Down Expand Up @@ -314,7 +315,7 @@ def _create_directory_structure(

result = 0
for folder in directory_structure.folders:
if folder.name == "1KB_1000000files_0subdir":
if not RUN_1M_TEST and folder.name == "1KB_1000000files_0subdir":
# Excluding test case with 1m files from HNS in daily periodic tests.
continue
result += _create_directory_structure(gcs_bucket_url + folder.name + '/',
Expand Down Expand Up @@ -374,7 +375,7 @@ def _compare_directory_structure(url, directory_structure) -> bool:

result = True
for folder in directory_structure.folders:
if folder.name == "1KB_1000000files_0subdir":
if not RUN_1M_TEST and folder.name == "1KB_1000000files_0subdir":
# Excluding test case with 1m files from HNS in daily periodic tests.
continue
new_url = url + folder.name + '/'
Expand Down Expand Up @@ -475,6 +476,14 @@ def _parse_arguments(argv):
action='store',
required=False,
)

parser.add_argument(
'--run_1m_test',
help='Perform listing benchmark on 1m files directory? [True/False]',
action='store_true',
default=False,
required=False,
)
# Ignoring the first parameter, as it is the path of this python
# script itself.
return parser.parse_args(argv[1:])
Expand Down Expand Up @@ -571,6 +580,7 @@ def _export_to_bigquery(test_type, config_id, start_time_build, ls_data):
gcs_bucket = mount_gcs_bucket(directory_structure.name,
args.gcsfuse_flags[0],log)

RUN_1M_TEST=args.run_1m_test
gcs_bucket_results, persistent_disk_results = _perform_testing(
directory_structure.folders, gcs_bucket, persistent_disk,
int(args.num_samples[0]), args.command[0])
Expand Down
59 changes: 59 additions & 0 deletions perfmetrics/scripts/ls_metrics/listing_benchmark_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,28 @@
]
}

DIRECTORY_STRUCTURE4 = {
'name': 'fake_bucket',
'num_folders': 2,
'num_files': 1,
'file_size': '1kb',
'file_name_prefix': 'file',
'folders': [
{
'name': '2KB_3files_0subdir',
'num_files': 3,
'file_name_prefix': 'file',
'file_size': '2kb'
},
{
'name': '1KB_1000000files_0subdir',
'num_files': 1000000,
'file_size': '1kb',
'file_name_prefix': 'file'
}
]
}

# List of latencies (msec) of list operation to test _parse_results method.
METRICS1 = [1.234, 0.995, 0.121, 0.222, 0.01709]
METRICS2 = [90.45, 1.95, 0.334, 7.090, 0.001]
Expand Down Expand Up @@ -206,6 +228,8 @@
DIRECTORY_STRUCTURE2, directory_proto.Directory())
DIRECTORY_STRUCTURE3 = ParseDict(
DIRECTORY_STRUCTURE3, directory_proto.Directory())
DIRECTORY_STRUCTURE4 = ParseDict(
DIRECTORY_STRUCTURE4, directory_proto.Directory())

WORKSHEET_NAME = 'ls_metrics_gcsfuse'

Expand Down Expand Up @@ -573,6 +597,41 @@ def test_compare_directory_structure_false_file_folder_multi_level_dir(
'fake_bucket/', DIRECTORY_STRUCTURE3)
self.assertFalse(result)

@patch('listing_benchmark.RUN_1M_TEST',False)
@patch('listing_benchmark._record_time_of_operation', return_value=[1])
def test_skip_1m_test_false(self, mock_record_time_of_operation):
mock_record_time_of_operation.return_value = [1, 1]
expected_calls=[call('ls -R','./fake_disk/2KB_3files_0subdir/',2),
call('ls -R','./fake_bucket/2KB_3files_0subdir/',2)]

gcs_bucket_results, persistent_disk_results = listing_benchmark._perform_testing(
DIRECTORY_STRUCTURE4.folders, 'fake_bucket', 'fake_disk', 2, 'ls -R')

self.assertEqual(gcs_bucket_results, persistent_disk_results)
mock_record_time_of_operation.assert_has_calls(expected_calls)
self.assertEqual(gcs_bucket_results, {
'2KB_3files_0subdir': [1, 1],
})

@patch('listing_benchmark.RUN_1M_TEST',True)
@patch('listing_benchmark._record_time_of_operation', return_value=[1])
def test_skip_1m_test_true(self, mock_record_time_of_operation):
mock_record_time_of_operation.return_value = [1, 1]
expected_calls=[call('ls -R','./fake_disk/2KB_3files_0subdir/',2),
call('ls -R','./fake_bucket/2KB_3files_0subdir/',2),
call('ls -R','./fake_disk/1KB_1000000files_0subdir/',2),
call('ls -R','./fake_bucket/1KB_1000000files_0subdir/',2),]

gcs_bucket_results, persistent_disk_results = listing_benchmark._perform_testing(
DIRECTORY_STRUCTURE4.folders, 'fake_bucket', 'fake_disk', 2, 'ls -R')

self.assertEqual(gcs_bucket_results, persistent_disk_results)
mock_record_time_of_operation.assert_has_calls(expected_calls)
self.assertEqual(gcs_bucket_results, {
'2KB_3files_0subdir': [1, 1],
'1KB_1000000files_0subdir' : [1,1]
})


if __name__ == '__main__':
unittest.main()

0 comments on commit 913d5a6

Please sign in to comment.