diff --git a/common/experiment_utils.py b/common/experiment_utils.py index 3911751ac..604d0218f 100644 --- a/common/experiment_utils.py +++ b/common/experiment_utils.py @@ -97,17 +97,6 @@ def get_custom_seed_corpora_filestore_path(): 'custom_seed_corpora') -def get_oss_fuzz_corpora_unarchived_path(): - """Returns path containing the user-provided seed corpora.""" - return posixpath.join(get_experiment_filestore_path(), - 'oss_fuzz_unarchived') - - -def get_random_corpora_filestore_path(): - """Returns path containing seed corpora for the target fuzzing experiment.""" # pylint: disable=line-too-long - return posixpath.join(get_experiment_filestore_path(), 'random_corpora') - - def get_dispatcher_instance_name(experiment: str) -> str: """Returns a dispatcher instance name for an experiment.""" return f'd-{experiment}' @@ -149,11 +138,6 @@ def is_local_experiment(): return bool(environment.get('LOCAL_EXPERIMENT')) -def is_micro_experiment(): - """Returns True if running a micro experiment.""" - return bool(environment.get('MICRO_EXPERIMENT')) - - def get_trial_dir(fuzzer, benchmark, trial_id): """Returns the unique directory for |fuzzer|, |benchmark|, and |trial_id|.""" diff --git a/common/random_corpus_fuzzing_utils.py b/common/random_corpus_fuzzing_utils.py deleted file mode 100644 index 1bca7561e..000000000 --- a/common/random_corpus_fuzzing_utils.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Utility functions for micro-experiment run.""" - -import random -import os -import tempfile -import multiprocessing -import zipfile -from typing import List - -from common import experiment_utils -from common import filesystem -from common import logs - -MAX_SOURCE_CORPUS_FILES = 1 -CORPUS_ELEMENT_BYTES_LIMIT = 1 * 1024 * 1024 - - -def initialize_random_corpus_fuzzing(benchmarks: List[str], num_trials: int): - """Prepare corpus for micro experiment.""" - pool_args = () - with multiprocessing.Pool(*pool_args) as pool: - pool.starmap(prepare_benchmark_random_corpus, - [(benchmark, num_trials) for benchmark in benchmarks]) - logs.info('Done preparing corpus for micro experiment') - - -# pylint: disable=too-many-locals -def prepare_benchmark_random_corpus(benchmark: str, num_trials: int): - """Prepare corpus for given benchmark.""" - # Temporary location to park corpus files before get picked randomly. - benchmark_unarchived_corpora = os.path.join( - experiment_utils.get_oss_fuzz_corpora_unarchived_path(), benchmark) - filesystem.create_directory(benchmark_unarchived_corpora) - - # Unzip oss fuzz corpus. - corpus_archive_filename = f'{benchmark}.zip' - oss_fuzz_corpus_archive_path = os.path.join( - experiment_utils.get_oss_fuzz_corpora_filestore_path(), - corpus_archive_filename) - with zipfile.ZipFile(oss_fuzz_corpus_archive_path) as zip_file: - idx = 0 - for seed_corpus_file in zip_file.infolist(): - if seed_corpus_file.filename.endswith('/'): - # Ignore directories. - continue - # Allow callers to opt-out of unpacking large files. - if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT: - continue - output_filename = f'{idx:016d}' - output_file_path = os.path.join(benchmark_unarchived_corpora, - output_filename) - zip_file.extract(seed_corpus_file, output_file_path) - idx += 1 - - # Path used to store and feed seed corpus for benchmark runner - # each trial group will have the same seed input(s). - benchmark_random_corpora = os.path.join( - experiment_utils.get_random_corpora_filestore_path(), benchmark) - filesystem.create_directory(benchmark_random_corpora) - - with tempfile.TemporaryDirectory() as tmp_dir: - all_corpus_files = [] - for root, _, files in os.walk(benchmark_unarchived_corpora): - for filename in files: - file_path = os.path.join(root, filename) - all_corpus_files.append(file_path) - - all_corpus_files.sort() - trial_group_num = 0 - # All trials in the same group will start with the same - # set of randomly selected seed files. - while trial_group_num < num_trials: - trial_group_subdir = f'trial-group-{trial_group_num}' - custom_corpus_trial_dir = os.path.join(benchmark_random_corpora, - trial_group_subdir) - src_dir = os.path.join(tmp_dir, 'source') - filesystem.recreate_directory(src_dir) - - source_files = random.sample(all_corpus_files, - MAX_SOURCE_CORPUS_FILES) - for file in source_files: - filesystem.copy(file, src_dir) - - # Copy only the src directory. - filesystem.copytree(src_dir, custom_corpus_trial_dir) - trial_group_num += 1 - - return [] diff --git a/database/models.py b/database/models.py index 02bfff7a9..7cf902397 100644 --- a/database/models.py +++ b/database/models.py @@ -54,7 +54,6 @@ class Trial(Base): # Columns used for preemptible experiments. preemptible = Column(Boolean, default=False, nullable=False) preempted = Column(Boolean, default=False, nullable=False) - trial_group_num = Column(Integer, nullable=True) # Every trial has snapshots which is basically the saved state of that trial # at a given time. The snapshots field here and the trial field on Snapshot, diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index 9f442d755..796c796b8 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -24,7 +24,6 @@ import time from typing import List -from common import random_corpus_fuzzing_utils from common import experiment_path as exp_path from common import experiment_utils from common import logs @@ -90,7 +89,7 @@ def _initialize_trials_in_db(trials: List[models.Trial]): db_utils.bulk_save(trials) -class Experiment: # pylint: disable=too-many-instance-attributes +class Experiment: """Class representing an experiment.""" def __init__(self, experiment_config_filepath: str): @@ -102,7 +101,6 @@ def __init__(self, experiment_config_filepath: str): self.experiment_name = self.config['experiment'] self.git_hash = self.config['git_hash'] self.preemptible = self.config.get('preemptible_runners') - self.micro_experiment = self.config.get('micro_experiment') def build_images_for_trials(fuzzers: List[str], benchmarks: List[str], @@ -125,8 +123,7 @@ def build_images_for_trials(fuzzers: List[str], benchmarks: List[str], models.Trial(fuzzer=fuzzer, experiment=experiment_name, benchmark=benchmark, - preemptible=preemptible, - trial_group_num=trial) for trial in range(num_trials) + preemptible=preemptible) for _ in range(num_trials) ] trials.extend(fuzzer_benchmark_trials) return trials @@ -153,10 +150,6 @@ def dispatcher_main(): experiment.preemptible) _initialize_trials_in_db(trials) - if experiment.micro_experiment: - random_corpus_fuzzing_utils.initialize_random_corpus_fuzzing( - experiment.benchmarks, experiment.num_trials) - create_work_subdirs(['experiment-folders', 'measurement-folders']) # Start measurer and scheduler in seperate threads/processes. diff --git a/experiment/resources/runner-startup-script-template.sh b/experiment/resources/runner-startup-script-template.sh index 5ef1e40bf..79f84c22c 100644 --- a/experiment/resources/runner-startup-script-template.sh +++ b/experiment/resources/runner-startup-script-template.sh @@ -42,8 +42,6 @@ docker run \ -e BENCHMARK={{benchmark}} \ -e EXPERIMENT={{experiment}} \ -e TRIAL_ID={{trial_id}} \ --e TRIAL_GROUP_NUM={{trial_group_num}} \ --e MICRO_EXPERIMENT={{micro_experiment}} \ -e MAX_TOTAL_TIME={{max_total_time}} \ -e SNAPSHOT_PERIOD={{snapshot_period}} \ -e NO_SEEDS={{no_seeds}} \ diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 5f61ad6a8..4771dfebf 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -74,7 +74,6 @@ def _set_default_config_values(config: Dict[str, Union[int, str, bool]], config['snapshot_period'] = config.get( 'snapshot_period', experiment_utils.DEFAULT_SNAPSHOT_SECONDS) config['private'] = config.get('private', False) - config['micro_experiment'] = config.get('micro_experiment', False) def _validate_config_parameters( @@ -188,8 +187,6 @@ def read_and_validate_experiment_config(config_filename: str) -> Dict: Requirement(False, int, False, ''), 'runner_memory': Requirement(False, str, False, ''), - 'micro_experiment': - Requirement(False, bool, False, ''), } all_params_valid = _validate_config_parameters(config, config_requirements) diff --git a/experiment/runner.py b/experiment/runner.py index b955ff665..a4efc5b6f 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -101,18 +101,6 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path): return seed_corpus_path if os.path.exists(seed_corpus_path) else None -def _unpack_random_corpus(corpus_directory): - shutil.rmtree(corpus_directory) - - benchmark = environment.get('BENCHMARK') - trial_group_num = environment.get('TRIAL_GROUP_NUM', 0) - random_corpora_dir = experiment_utils.get_random_corpora_filestore_path() - random_corpora_sub_dir = f'trial-group-{int(trial_group_num)}' - random_corpus_dir = posixpath.join(random_corpora_dir, benchmark, - random_corpora_sub_dir) - filestore_utils.cp(random_corpus_dir, corpus_directory, recursive=True) - - def _copy_custom_seed_corpus(corpus_directory): """Copy custom seed corpus provided by user""" shutil.rmtree(corpus_directory) @@ -269,9 +257,7 @@ def set_up_corpus_directories(self): FUZZ_TARGET_DIR, fuzz_target_name) input_corpus = environment.get('SEED_CORPUS_DIR') os.makedirs(input_corpus, exist_ok=True) - if environment.get('MICRO_EXPERIMENT'): - _unpack_random_corpus(input_corpus) - elif not environment.get('CUSTOM_SEED_CORPUS_DIR'): + if not environment.get('CUSTOM_SEED_CORPUS_DIR'): _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) else: _copy_custom_seed_corpus(input_corpus) diff --git a/experiment/scheduler.py b/experiment/scheduler.py index b189eabb6..0d9da0b22 100644 --- a/experiment/scheduler.py +++ b/experiment/scheduler.py @@ -689,7 +689,7 @@ def start_trials(trials, experiment_config: dict, pool, core_allocation=None): return started_trials -class TrialProxy: # pylint: disable=too-many-instance-attributes +class TrialProxy: """A proxy object for a model.Trial. TrialProxy's allow these fields to be set and retreived without making any database calls.""" @@ -701,7 +701,6 @@ def __init__(self, trial): self.time_ended = trial.time_ended self.preemptible = trial.preemptible self.cpuset = None - self.trial_group_num = trial.trial_group_num def _initialize_logs(experiment): @@ -730,7 +729,7 @@ def _start_trial(trial: TrialProxy, experiment_config: dict, cpuset=None): logger.info('Start trial %d.', trial.id) started = create_trial_instance(trial.fuzzer, trial.benchmark, trial.id, experiment_config, trial.preemptible, - cpuset, trial.trial_group_num) + cpuset) if started: trial.time_started = datetime_now() trial.cpuset = cpuset @@ -744,7 +743,6 @@ def render_startup_script_template( # pylint: disable=too-many-arguments fuzzer: str, benchmark: str, trial_id: int, - trial_group_num: int, experiment_config: dict, cpuset=None): """Render the startup script using the template and the parameters @@ -762,8 +760,6 @@ def render_startup_script_template( # pylint: disable=too-many-arguments 'experiment': experiment, 'fuzzer': fuzzer, 'trial_id': trial_id, - 'trial_group_num': trial_group_num, - 'micro_experiment': experiment_config['micro_experiment'], 'max_total_time': experiment_config['max_total_time'], 'snapshot_period': experiment_config['snapshot_period'], 'experiment_filestore': experiment_config['experiment_filestore'], @@ -794,15 +790,13 @@ def create_trial_instance( # pylint: disable=too-many-arguments trial_id: int, experiment_config: dict, preemptible: bool, - cpuset=None, - trial_group_num: int = 0) -> bool: + cpuset=None) -> bool: """Create or start a trial instance for a specific trial_id,fuzzer,benchmark.""" instance_name = experiment_utils.get_trial_instance_name( experiment_config['experiment'], trial_id) startup_script = render_startup_script_template(instance_name, fuzzer, benchmark, trial_id, - trial_group_num, experiment_config, cpuset) startup_script_path = f'/tmp/{instance_name}-start-docker.sh' with open(startup_script_path, 'w', encoding='utf-8') as file_handle: diff --git a/experiment/test_data/experiment-config.yaml b/experiment/test_data/experiment-config.yaml index deabcee9a..4cecd0e6e 100644 --- a/experiment/test_data/experiment-config.yaml +++ b/experiment/test_data/experiment-config.yaml @@ -40,4 +40,3 @@ measurers_cpus: null runner_num_cpu_cores: 1 runner_machine_type: 'n1-standard-1' private: false -micro_experiment: false \ No newline at end of file diff --git a/experiment/test_data/local-experiment-config.yaml b/experiment/test_data/local-experiment-config.yaml index 8f57cc739..adf30ea11 100644 --- a/experiment/test_data/local-experiment-config.yaml +++ b/experiment/test_data/local-experiment-config.yaml @@ -22,4 +22,3 @@ report_filestore: /tmp/web-reports local_experiment: true benchmarks: "benchmark-1,benchmark-2" git_hash: "git-hash" -micro_experiment: false diff --git a/experiment/test_scheduler.py b/experiment/test_scheduler.py index 1a5e85e8b..7f6f3b5f8 100644 --- a/experiment/test_scheduler.py +++ b/experiment/test_scheduler.py @@ -114,8 +114,6 @@ def test_create_trial_instance(benchmark, expected_image, expected_target, -e BENCHMARK={benchmark} \\ -e EXPERIMENT=test-experiment \\ -e TRIAL_ID=9 \\ --e TRIAL_GROUP_NUM=0 \\ --e MICRO_EXPERIMENT=False \\ -e MAX_TOTAL_TIME=86400 \\ -e SNAPSHOT_PERIOD=900 \\ -e NO_SEEDS=False \\ diff --git a/service/gcbrun_experiment.py b/service/gcbrun_experiment.py index f19ab493d..b092f7e53 100644 --- a/service/gcbrun_experiment.py +++ b/service/gcbrun_experiment.py @@ -16,6 +16,7 @@ """Entrypoint for gcbrun into run_experiment. This script will get the command from the last PR comment containing "/gcbrun" and pass it to run_experiment.py which will run an experiment.""" +# a dummy change. import logging import os