diff --git a/varats-core/varats/experiment/experiment_util.py b/varats-core/varats/experiment/experiment_util.py index bad60ba6f..a30f42c8b 100644 --- a/varats-core/varats/experiment/experiment_util.py +++ b/varats-core/varats/experiment/experiment_util.py @@ -10,6 +10,7 @@ from collections import defaultdict from pathlib import Path from types import TracebackType +import yaml from benchbuild import source from benchbuild.experiment import Experiment diff --git a/varats-core/varats/report/gnu_time_report.py b/varats-core/varats/report/gnu_time_report.py index 88200ba52..1991701e7 100644 --- a/varats-core/varats/report/gnu_time_report.py +++ b/varats-core/varats/report/gnu_time_report.py @@ -74,8 +74,6 @@ def __init__(self, path: Path) -> None: TimeReport._parse_involuntary_ctx_switches(line) continue - # print("Not matched: ", line) - @property def command_name(self) -> str: """Name of the command that was executed.""" @@ -284,7 +282,7 @@ def summary(self) -> str: class WLTimeReportAggregate( WorkloadSpecificReportAggregate[TimeReport], - shorthand=TimeReport.SHORTHAND + ReportAggregate.SHORTHAND, + shorthand="WL" + TimeReport.SHORTHAND + ReportAggregate.SHORTHAND, file_type=ReportAggregate.FILE_TYPE ): """Context Manager for parsing multiple time reports stored inside a zip diff --git a/varats-core/varats/report/report.py b/varats-core/varats/report/report.py index ccbffcdbc..be4b65b36 100644 --- a/varats-core/varats/report/report.py +++ b/varats-core/varats/report/report.py @@ -655,7 +655,6 @@ def is_correct_report_type(cls, file_name: str) -> bool: except ValueError: return False - class ReportSpecification(): """Groups together multiple report types into a specification that can be used, e.g., by experiments, to request multiple reports.""" diff --git a/varats-core/varats/report/tef_report.py b/varats-core/varats/report/tef_report.py index 44bfb8e36..618e4dbaa 100644 --- a/varats-core/varats/report/tef_report.py +++ b/varats-core/varats/report/tef_report.py @@ -18,21 +18,21 @@ class TraceEventType(Enum): value: str # pylint: disable=invalid-name - DURATION_EVENT_BEGIN = 'B' - DURATION_EVENT_END = 'E' - COMPLETE_EVENT = 'X' - INSTANT_EVENT = 'i' - COUNTER_EVENT = 'C' - ASYNC_EVENT_START = 'b' - ASYNC_EVENT_INSTANT = 'n' - ASYNC_EVENT_END = 'e' - FLOW_EVENT_START = 's' - FLOW_EVENT_STEP = 't' - FLOW_EVENT_END = 'f' - SAMPLE_EVENT = 'P' + DURATION_EVENT_BEGIN = "B" + DURATION_EVENT_END = "E" + COMPLETE_EVENT = "X" + INSTANT_EVENT = "i" + COUNTER_EVENT = "C" + ASYNC_EVENT_START = "b" + ASYNC_EVENT_INSTANT = "n" + ASYNC_EVENT_END = "e" + FLOW_EVENT_START = "s" + FLOW_EVENT_STEP = "t" + FLOW_EVENT_END = "f" + SAMPLE_EVENT = "P" @staticmethod - def parse_event_type(raw_event_type: str) -> 'TraceEventType': + def parse_event_type(raw_event_type: str) -> "TraceEventType": """Parses a raw string that represents a trace-format event type and converts it to the corresponding enum value.""" for trace_event_type in TraceEventType: @@ -45,7 +45,7 @@ def __str__(self) -> str: return str(self.value) -class TraceEvent(): +class TraceEvent: """Represents a trace event that was captured during the analysis of a target program.""" @@ -56,9 +56,7 @@ def __init__( self.__name_id_mapper = name_id_mapper self.__name_id = name_id self.__category = str(json_trace_event["cat"]) - self.__event_type = TraceEventType.parse_event_type( - json_trace_event["ph"] - ) + self.__event_type = TraceEventType.parse_event_type(json_trace_event["ph"]) self.__tracing_clock_timestamp = int(json_trace_event["ts"]) self.__pid = int(json_trace_event["pid"]) self.__tid = int(json_trace_event["tid"]) @@ -128,9 +126,7 @@ def trace_events(self) -> tp.List[TraceEvent]: @property def stack_frames(self) -> None: - raise NotImplementedError( - "Stack frame parsing is currently not implemented!" - ) + raise NotImplementedError("Stack frame parsing is currently not implemented!") def _parse_json(self) -> None: trace_events: tp.List[TraceEvent] = list() @@ -172,7 +168,7 @@ def _parse_json(self) -> None: class TEFReportAggregate( ReportAggregate[TEFReport], shorthand=TEFReport.SHORTHAND + ReportAggregate.SHORTHAND, - file_type=ReportAggregate.FILE_TYPE + file_type=ReportAggregate.FILE_TYPE, ): """Context Manager for parsing multiple TEF reports stored inside a zip file.""" diff --git a/varats/varats/data/reports/dynamic_overhead_report.py b/varats/varats/data/reports/dynamic_overhead_report.py new file mode 100644 index 000000000..58dd4b2a7 --- /dev/null +++ b/varats/varats/data/reports/dynamic_overhead_report.py @@ -0,0 +1,53 @@ +from varats.report.report import BaseReport +from pathlib import Path +from collections import defaultdict + + +class DynamicOverheadReport( + BaseReport, shorthand="DynOverhead", file_type="txt" +): + + class RegionCounter: + + def __init__(self): + self.__in = 0 + self.__out = 0 + + def enter(self): + self.__in += 1 + + def leave(self): + self.__out += 1 + + def isvalid(self): + return self.__in == self.__out + + def count_visited(self): + return self.__in + + def __init__(self, path: Path): + super().__init__(path) + self.__entries = defaultdict(DynamicOverheadReport.RegionCounter) + + for line in open(path, "r"): + try: + command, id = line.split() + if command == "Entering": + self.__entries[id].enter() + elif command == "Leaving": + self.__entries[id].leave() + except ValueError: + continue + + self.__total_region_count = 0 + + # Generate report + for region in self.__entries.values(): + if region.isvalid(): + self.__total_region_count += region.count_visited() + + def isvalid(self) -> bool: + return all(v.isvalid() for v in self.__entries.values()) + + def regions_visited(self): + return self.__total_region_count diff --git a/varats/varats/data/reports/instrumentation_verifier_report.py b/varats/varats/data/reports/instrumentation_verifier_report.py index 2bb27e98a..47b6c1b18 100644 --- a/varats/varats/data/reports/instrumentation_verifier_report.py +++ b/varats/varats/data/reports/instrumentation_verifier_report.py @@ -3,6 +3,7 @@ import typing as tp from pathlib import Path from zipfile import ZipFile +import yaml from varats.report.report import BaseReport @@ -15,10 +16,15 @@ def __init__(self, report_path: Path) -> None: super().__init__(report_path) self.__report_data = {} + self.__metadata = {} with ZipFile(report_path, "r") as archive: for file in archive.namelist(): + if file == "metadata.yml": + with archive.open(file, "r") as f: + self.__metadata = yaml.load(f, yaml.Loader) + if not file.endswith(".ivr"): continue @@ -58,7 +64,9 @@ def __init__(self, report_path: Path) -> None: unclosed_enter_begin = content.index( 'Unclosed Region-ID(s):' ) + 1 - wrong_leaves = content[wrong_leaves_begin:-1] + failure_begin = content.index("Finalization: Failure") + wrong_leaves = content[ + wrong_leaves_begin:failure_begin - 1] unclosed_regions = content[ unclosed_enter_begin:wrong_leaves_begin - 1] @@ -137,9 +145,13 @@ def num_unentered_leaves(self, binary: str) -> int: def states(self) -> tp.Dict[str, str]: return { - binary: data['state'] # type: ignore + binary: + data['state'] # type: ignore for binary, data in self.__report_data.items() } def state(self, binary: str) -> str: return self.__report_data[binary]['state'] # type: ignore + + def metadata(self) -> tp.Dict[tp.Any, tp.Any]: + return self.__metadata diff --git a/varats/varats/experiments/base/time_workloads.py b/varats/varats/experiments/base/time_workloads.py index aae0d68cb..1d8d8c44c 100644 --- a/varats/varats/experiments/base/time_workloads.py +++ b/varats/varats/experiments/base/time_workloads.py @@ -1,6 +1,7 @@ """Implements an experiment that times the execution of all project binaries.""" import typing as tp +import textwrap from pathlib import Path from benchbuild import Project @@ -37,11 +38,16 @@ class TimeProjectWorkloads(OutputFolderStep): project: VProject def __init__( - self, project: Project, num: int, binary: ProjectBinaryWrapper + self, + project: Project, + num: int, + binary: ProjectBinaryWrapper, + categories: tp.List[WorkloadCategory] = [WorkloadCategory.EXAMPLE] ): super().__init__(project=project) self.__num = num self.__binary = binary + self.__workload_categories = categories def call_with_output_folder(self, tmp_dir: Path) -> actions.StepResult: return self.analyze(tmp_dir) @@ -50,9 +56,11 @@ def analyze(self, tmp_dir: Path) -> actions.StepResult: """Only create a report file.""" with local.cwd(self.project.builddir): + print(f"Step {self.__num}") for prj_command in workload_commands( - self.project, self.__binary, [WorkloadCategory.EXAMPLE] + self.project, self.__binary, self.__workload_categories ): + print("Running workload") pb_cmd = prj_command.command.as_plumbum(project=self.project) run_report_name = tmp_dir / create_workload_specific_filename( @@ -61,11 +69,21 @@ def analyze(self, tmp_dir: Path) -> actions.StepResult: run_cmd = time['-v', '-o', f'{run_report_name}', pb_cmd] + print("\t", run_cmd) + with cleanup(prj_command): - run_cmd() + run_cmd(retcode=self.__binary.valid_exit_codes) + + print("Done") return actions.StepResult.OK + def __str__(self, indent: int = 0) -> str: + return textwrap.indent( + f"* Run workloads of categories {', '.join(str(x) for x in self.__workload_categories)} " + f"for binary {self.__binary.name} ({self.__num})", indent * " " + ) + class TimeWorkloads(VersionExperiment, shorthand="TWL"): """Generates time report files.""" @@ -95,6 +113,7 @@ def actions_for_project( binary = project.binaries[0] measurement_repetitions = 2 + result_filepath = create_new_success_result_filepath( self.get_handle(), self.get_handle().report_spec().main_report, project, binary diff --git a/varats/varats/experiments/vara/compare_traced_untraced.py b/varats/varats/experiments/vara/compare_traced_untraced.py new file mode 100644 index 000000000..4875af442 --- /dev/null +++ b/varats/varats/experiments/vara/compare_traced_untraced.py @@ -0,0 +1,221 @@ +"""Module for feature performance experiments that instrument and measure the +execution performance of each binary that is produced by a project.""" +from abc import abstractmethod +import typing as tp + +from benchbuild.utils import actions + +from varats.experiment.experiment_util import ( + create_new_success_result_filepath, ZippedExperimentSteps +) +from varats.experiment.workload_util import WorkloadCategory +from varats.report.report import ReportSpecification +from varats.experiments.base.time_workloads import TimeProjectWorkloads +from varats.report.gnu_time_report import WLTimeReportAggregate + +from varats.project.varats_project import VProject + +from varats.experiments.vara.dynamic_overhead_analysis import OptimizerPolicyType +from varats.experiments.vara.feature_experiment import FeatureExperiment, FeatureInstrType + +MEASUREMENT_REPS = 10 + + +class RunUntraced(FeatureExperiment, shorthand="RU"): + """Build and run the untraced version of the binary""" + + NAME = "RunUntraced" + + REPORT_SPEC = ReportSpecification(WLTimeReportAggregate) + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + actions = [] + + for binary in project.binaries: + result_filepath = create_new_success_result_filepath( + self.get_handle(), + self.get_handle().report_spec().main_report, project, binary + ) + actions.append( + ZippedExperimentSteps( + result_filepath, [ + TimeProjectWorkloads( + project, + num, + binary, + categories=[ + WorkloadCategory.EXAMPLE, WorkloadCategory.SMALL + ] + ) for num in range(MEASUREMENT_REPS) + ] + ) + ) + + return self.get_common_tracing_actions( + project, FeatureInstrType.NONE, actions, save_temps=True + ) + + +class RunTraced(FeatureExperiment, shorthand="RT"): + """Build and run the traced version of the binary""" + + NAME = "RunTraced" + REPORT_SPEC = ReportSpecification(WLTimeReportAggregate) + + @property + @abstractmethod + def optimizer_policy(self) -> OptimizerPolicyType: + return OptimizerPolicyType.NONE + + @property + @abstractmethod + def budget(self) -> tp.Optional[int]: + return 0 + + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + + project.cflags += [ + "-mllvm", + f"-vara-optimizer-policy={self.optimizer_policy.value}", + "-mllvm", + f"-vara-optimizer-starting-budget={self.budget}", + ] + + actions = [] + for binary in project.binaries: + result_filepath = create_new_success_result_filepath( + self.get_handle(), + self.get_handle().report_spec().main_report, project, binary + ) + actions.append( + ZippedExperimentSteps( + result_filepath, [ + TimeProjectWorkloads( + project, + num, + binary, + categories=[ + WorkloadCategory.EXAMPLE, WorkloadCategory.SMALL + ] + ) for num in range(MEASUREMENT_REPS) + ] + ) + ) + + return self.get_common_tracing_actions( + project, FeatureInstrType.TEF, actions, save_temps=True + ) + + +class RunTracedNaive(RunTraced, shorthand=RunTraced.SHORTHAND + "N"): + """Build and run the traced version of the binary""" + + NAME = "RunTracedNaive" + + @property + @abstractmethod + def optimizer_policy(self) -> OptimizerPolicyType: + return OptimizerPolicyType.NAIVE + + +class RunTracedNaive20( + RunTracedNaive, shorthand=RunTracedNaive.SHORTHAND + "20" +): + NAME = "RunTracedNaive20" + + @property + @abstractmethod + def budget(self) -> int: + return 20 + + +class RunTracedNaive40( + RunTracedNaive, shorthand=RunTracedNaive.SHORTHAND + "40" +): + NAME = "RunTracedNaive40" + + @property + @abstractmethod + def budget(self) -> int: + return 40 + + +class RunTracedNaive60( + RunTracedNaive, shorthand=RunTracedNaive.SHORTHAND + "60" +): + NAME = "RunTracedNaive60" + + @property + @abstractmethod + def budget(self) -> int: + return 60 + + +class RunTracedNaive80( + RunTracedNaive, shorthand=RunTracedNaive.SHORTHAND + "80" +): + NAME = "RunTracedNaive80" + + @property + @abstractmethod + def budget(self) -> int: + return 80 + + +class RunTracedNaive100( + RunTracedNaive, shorthand=RunTracedNaive.SHORTHAND + "100" +): + NAME = "RunTracedNaive100" + + @property + @abstractmethod + def budget(self) -> int: + return 100 + + +class RunTracedNaive200( + RunTracedNaive, shorthand=RunTracedNaive.SHORTHAND + "200" +): + NAME = "RunTracedNaive200" + + @property + @abstractmethod + def budget(self) -> int: + return 200 + + +class RunTracedNaive500( + RunTracedNaive, shorthand=RunTracedNaive.SHORTHAND + "500" +): + NAME = "RunTracedNaive500" + + @property + @abstractmethod + def budget(self) -> int: + return 500 + + +class RunTracedNaive1000( + RunTracedNaive, shorthand=RunTracedNaive.SHORTHAND + "1000" +): + NAME = "RunTracedNaive1000" + + @property + @abstractmethod + def budget(self) -> int: + return 1000 + + +class RunTracedAlternating(RunTraced, shorthand=RunTraced.SHORTHAND + "A"): + """Build and run the traced version of the binary""" + + NAME = "RunTracedAlternating" + + @property + @abstractmethod + def optimizer_policy(self) -> OptimizerPolicyType: + return OptimizerPolicyType.ALTERNATING diff --git a/varats/varats/experiments/vara/dynamic_overhead_analysis.py b/varats/varats/experiments/vara/dynamic_overhead_analysis.py new file mode 100644 index 000000000..933ab3528 --- /dev/null +++ b/varats/varats/experiments/vara/dynamic_overhead_analysis.py @@ -0,0 +1,130 @@ +from enum import Enum +from typing import MutableSequence +from varats.experiment.experiment_util import Step + +from varats.project.varats_project import VProject + +from varats.experiments.vara.instrumentation_verifier import RunInstrVerifier, RunInstrVerifierBudget + + +class OptimizerPolicyType(Enum): + NONE = "none" + NAIVE = "naive" + ALTERNATING = "alternating" + + +class RunInstrVerifierNaive20( + RunInstrVerifier, shorthand=RunInstrVerifier.SHORTHAND + "N20" +): + NAME = "RunInstrVerifierNaive20" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-vara-optimizer-starting-budget=20" + ] + return super().actions_for_project(project) + + +class RunInstrVerifierNaive40( + RunInstrVerifier, shorthand=RunInstrVerifier.SHORTHAND + "N40" +): + NAME = "RunInstrVerifierNaive40" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-vara-optimizer-starting-budget=40" + ] + return super().actions_for_project(project) + + +class RunInstrVerifierNaive60( + RunInstrVerifier, shorthand=RunInstrVerifier.SHORTHAND + "N60" +): + NAME = "RunInstrVerifierNaive60" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-vara-optimizer-starting-budget=60" + ] + return super().actions_for_project(project) + + +class RunInstrVerifierNaive80( + RunInstrVerifier, shorthand=RunInstrVerifier.SHORTHAND + "N80" +): + NAME = "RunInstrVerifierNaive80" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-vara-optimizer-starting-budget=80" + ] + return super().actions_for_project(project) + + +class RunInstrVerifierNaive100( + RunInstrVerifier, shorthand=RunInstrVerifier.SHORTHAND + "N100" +): + NAME = "RunInstrVerifierNaive100" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-vara-optimizer-starting-budget=100" + ] + return super().actions_for_project(project) + + +class RunInstrVerifierNaive200( + RunInstrVerifier, shorthand=RunInstrVerifier.SHORTHAND + "N200" +): + NAME = "RunInstrVerifierNaive200" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-vara-optimizer-starting-budget=200" + ] + return super().actions_for_project(project) + + +class RunInstrVerifierNaive500( + RunInstrVerifier, shorthand=RunInstrVerifier.SHORTHAND + "N500" +): + NAME = "RunInstrVerifierNaive500" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-vara-optimizer-starting-budget=500" + ] + return super().actions_for_project(project) + + +class RunInstrVerifierNaive1000( + RunInstrVerifier, shorthand=RunInstrVerifier.SHORTHAND + "N1000" +): + NAME = "RunInstrVerifierNaive1000" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-vara-optimizer-starting-budget=1000" + ] + return super().actions_for_project(project) + + +class RunInstrVerifierNaiveBudget( + RunInstrVerifierBudget, shorthand=RunInstrVerifierBudget.SHORTHAND + "N" +): + NAME = "RunInstrVerifierNaiveBudget" + + def actions_for_project(self, project: VProject) -> MutableSequence[Step]: + project.cflags += [ + "-mllvm", "-vara-optimizer-policy=naive", "-mllvm", + "-debug-only=OPT,IRT,InstrMark" + ] + return super().actions_for_project(project) diff --git a/varats/varats/experiments/vara/feature_experiment.py b/varats/varats/experiments/vara/feature_experiment.py index 52d2ffa16..0316c9190 100644 --- a/varats/varats/experiments/vara/feature_experiment.py +++ b/varats/varats/experiments/vara/feature_experiment.py @@ -39,6 +39,7 @@ FeatureModelProvider, ) from varats.report.report import ReportSpecification +import yaml class FeatureInstrType(Enum): @@ -85,6 +86,7 @@ def get_common_tracing_actions( analysis_actions: tp.List[Step], save_temps: bool = False, instruction_threshold: tp.Optional[int] = None, + lto: bool = False, ) -> tp.MutableSequence[Step]: """ Set common options and return a list of common actions for feature @@ -103,9 +105,9 @@ def get_common_tracing_actions( """ project.cflags += self.get_vara_feature_cflags(project) project.cflags += self.get_vara_tracing_cflags( - instr_type, save_temps, instruction_threshold=instruction_threshold + instr_type, save_temps, instruction_threshold=instruction_threshold, lto=lto ) - project.ldflags += self.get_vara_tracing_ldflags() + project.ldflags += self.get_vara_tracing_ldflags(lto=lto) # runtime and compiler extensions project.runtime_extension = run.RuntimeExtension(project, self) \ @@ -166,7 +168,8 @@ def get_vara_tracing_cflags( instr_type: FeatureInstrType, save_temps: bool = False, project: tp.Optional[VProject] = None, - instruction_threshold: tp.Optional[int] = None + instruction_threshold: tp.Optional[int] = None, + lto: bool = True ) -> tp.List[str]: """ Returns the cflags needed to trace projects with VaRA, using the @@ -183,9 +186,14 @@ def get_vara_tracing_cflags( c_flags = [] if instr_type != FeatureInstrType.NONE: c_flags += ["-fsanitize=vara", f"-fvara-instr={instr_type.value}"] + + if lto: + c_flags += ["-flto"] + c_flags += [ - "-flto", "-fuse-ld=lld", "-flegacy-pass-manager", - "-fno-omit-frame-pointer" + "-fuse-ld=lld", + "-flegacy-pass-manager", + "-fno-omit-frame-pointer", ] if instruction_threshold is not None: # For test projects, do not exclude small regions @@ -193,19 +201,20 @@ def get_vara_tracing_cflags( instruction_threshold = 1 c_flags += [f"-fvara-instruction-threshold={instruction_threshold}"] + if save_temps: c_flags += ["-Wl,-plugin-opt=save-temps"] return c_flags @staticmethod - def get_vara_tracing_ldflags() -> tp.List[str]: + def get_vara_tracing_ldflags(lto: bool = True) -> tp.List[str]: """ Returns the ldflags needed to instrument projects with VaRA during LTO. Returns: ldflags for VaRA LTO support """ - return ["-flto"] + return ["-flto"] if lto else [] class RunVaRATracedWorkloads(ProjectStep): # type: ignore @@ -220,11 +229,15 @@ def __init__( self, project: VProject, experiment_handle: ExperimentHandle, - report_file_ending: str = "json" + report_file_ending: str = "json", + workload_categories: tp.List[WorkloadCategory] = [ + WorkloadCategory.EXAMPLE + ] ): super().__init__(project=project) self.__experiment_handle = experiment_handle self.__report_file_ending = report_file_ending + self.__workload_categories = workload_categories def __call__(self) -> StepResult: return self.run_traced_code() @@ -250,8 +263,17 @@ def run_traced_code(self) -> StepResult: with local.cwd(local.path(self.project.builddir)): with ZippedReportFolder(result_filepath.full_path()) as tmp_dir: for prj_command in workload_commands( - self.project, binary, [WorkloadCategory.EXAMPLE] + self.project, binary, self.__workload_categories ): + metadata_obj = { + "cflags": self.project.cflags, + "ldflags": self.project.ldflags + } + + local_metadata_path = Path(tmp_dir) / "metadata.yml" + with open(local_metadata_path, "w") as f: + f.write(yaml.dump(metadata_obj)) + local_tracefile_path = Path( tmp_dir ) / f"trace_{prj_command.command.label}" \ @@ -260,9 +282,6 @@ def run_traced_code(self) -> StepResult: pb_cmd = prj_command.command.as_plumbum( project=self.project ) - print( - f"Running example {prj_command.command.label}" - ) extra_options = get_extra_config_options( self.project diff --git a/varats/varats/experiments/vara/feature_perf_runner.py b/varats/varats/experiments/vara/feature_perf_runner.py index ad6c3b424..c2f3b9d78 100644 --- a/varats/varats/experiments/vara/feature_perf_runner.py +++ b/varats/varats/experiments/vara/feature_perf_runner.py @@ -19,7 +19,6 @@ from varats.report.report import ReportSpecification from varats.report.tef_report import TEFReport - class FeaturePerfRunner(FeatureExperiment, shorthand="FPR"): """Test runner for feature performance.""" @@ -48,8 +47,9 @@ def actions_for_project( project.ldflags += self.get_vara_tracing_ldflags() # Add the required runtime extensions to the project(s). - project.runtime_extension = run.RuntimeExtension(project, self) \ - << time.RunWithTime() + project.runtime_extension = ( + run.RuntimeExtension(project, self) << time.RunWithTime() + ) # Add the required compiler extensions to the project(s). project.compiler_extension = compiler.RunCompiler(project, self) \ diff --git a/varats/varats/experiments/vara/feature_tracing_stats.py b/varats/varats/experiments/vara/feature_tracing_stats.py index 873236204..3273facc3 100644 --- a/varats/varats/experiments/vara/feature_tracing_stats.py +++ b/varats/varats/experiments/vara/feature_tracing_stats.py @@ -11,6 +11,8 @@ from benchbuild.utils import actions from benchbuild.utils.cmd import bpftrace, sudo from plumbum import BG + +from plumbum import local from plumbum.commands.modifiers import Future from varats.data.reports.feature_tracing_stats_report import ( @@ -57,50 +59,51 @@ def __call__(self) -> actions.StepResult: continue # get workload to use - workloads = workload_commands( - self.project, binary, [WorkloadCategory.MEDIUM] - ) - if len(workloads) == 0: - print( - f"No workload for project={self.project.name} " - f"binary={binary.name}. Skipping." + with local.cwd(local.path(self.project.builddir)): + workloads = workload_commands( + self.project, binary, [WorkloadCategory.SMALL, WorkloadCategory.EXAMPLE] ) - continue - if len(workloads) > 1: - raise RuntimeError( - "Currently, only a single workload is supported. " - f"project={self.project.name} binary={binary.name}" + if len(workloads) == 0: + print( + f"No workload for project={self.project.name} " + f"binary={binary.name}. Skipping." + ) + continue + if len(workloads) > 1: + raise RuntimeError( + "Currently, only a single workload is supported. " + f"project={self.project.name} binary={binary.name}" + ) + workload = workloads[0] + + # report path + stats_report = create_new_success_result_filepath( + self.__experiment_handle, FeatureTracingStatsReport, + self.project, binary ) - workload = workloads[0] - - # report path - stats_report = create_new_success_result_filepath( - self.__experiment_handle, FeatureTracingStatsReport, - self.project, binary - ) - - # attach bpftrace script - bpftrace_script = Path( - VaRA.install_location(), - "share/vara/perf_bpf_tracing/UsdtExecutionStats.bt" - ) - - # assertion: Can be run without sudo password prompt - bpftrace_cmd = bpftrace["-f", "json", "-o", stats_report, - bpftrace_script, - self.project.source_of_primary / - binary.path] - bpftrace_cmd = sudo[bpftrace_cmd] - bpftrace_runner: Future = bpftrace_cmd & BG - sleep(3) # give bpftrace time to start up - - # execute binary with workload - run_cmd = workload.command.as_plumbum(project=self.project) - with cleanup(workload): - bb.watch(run_cmd)() - - # Wait for bpftrace running in background to exit. - bpftrace_runner.wait() + + # attach bpftrace script + bpftrace_script = Path( + VaRA.install_location(), + "share/vara/perf_bpf_tracing/UsdtExecutionStats.bt" + ) + + # assertion: Can be run without sudo password prompt + bpftrace_cmd = bpftrace["-f", "json", "-o", stats_report, + bpftrace_script, + self.project.source_of_primary / + binary.path] + bpftrace_cmd = sudo[bpftrace_cmd] + bpftrace_runner: Future = bpftrace_cmd & BG + sleep(3) # give bpftrace time to start up + + # execute binary with workload + run_cmd = workload.command.as_plumbum(project=self.project) + with cleanup(workload): + bb.watch(run_cmd)(retcode=binary.valid_exit_codes) + + # Wait for bpftrace running in background to exit. + bpftrace_runner.wait() return actions.StepResult.OK diff --git a/varats/varats/experiments/vara/instrumentation_verifier.py b/varats/varats/experiments/vara/instrumentation_verifier.py index ed54cf05f..c653f8b4a 100644 --- a/varats/varats/experiments/vara/instrumentation_verifier.py +++ b/varats/varats/experiments/vara/instrumentation_verifier.py @@ -2,16 +2,12 @@ is used during execution to check if regions are correctly opend/closed.""" import typing as tp -from benchbuild.extensions import compiler, run from benchbuild.utils import actions +from varats.experiment.workload_util import WorkloadCategory from varats.data.reports.instrumentation_verifier_report import ( InstrVerifierReport, ) -from varats.experiment.experiment_util import ( - get_default_compile_error_wrapped, - WithUnlimitedStackSize, -) from varats.experiments.vara.feature_experiment import ( FeatureExperiment, RunVaRATracedWorkloads, @@ -19,6 +15,7 @@ ) from varats.project.varats_project import VProject from varats.report.report import ReportSpecification +from varats.experiments.vara.multi_compile_experiment import VaryingStartingBudgetExperiment class RunInstrVerifier(FeatureExperiment, shorthand="RIV"): @@ -38,40 +35,59 @@ def actions_for_project( Args: project: to analyze """ - project.cflags += self.get_vara_feature_cflags(project) - - project.cflags += self.get_vara_tracing_cflags( - FeatureInstrType.VERIFY, True + analysis_actions = [ + RunVaRATracedWorkloads( + project, + self.get_handle(), + report_file_ending="ivr", + workload_categories=[ + WorkloadCategory.EXAMPLE, WorkloadCategory.SMALL + ] + ) + ] + + return self.get_common_tracing_actions( + project, + FeatureInstrType.VERIFY, + analysis_actions, + save_temps=True, + instruction_threshold=0 ) - # Ensure that we detect all regions, when verifying - project.cflags += ["-fvara-instruction-threshold=0"] - - # Add debug information, so traces can be better interpreted - project.cflags += ["-g"] - project.ldflags += self.get_vara_tracing_ldflags() +class RunInstrVerifierBudget(VaryingStartingBudgetExperiment, shorthand="RIVB"): + """Test runner for feature performance.""" - # Add the required runtime extensions to the project(s). - project.runtime_extension = run.RuntimeExtension(project, self) + NAME = "RunInstrVerifierBudget" - # Add the required compiler extensions to the project(s). - project.compiler_extension = compiler.RunCompiler(project, self) \ - << WithUnlimitedStackSize() + REPORT_SPEC = ReportSpecification(InstrVerifierReport) - # Add own error handler to compile step. - project.compile = get_default_compile_error_wrapped( - self.get_handle(), project, self.REPORT_SPEC.main_report - ) + def actions_for_project( + self, project: VProject + ) -> tp.MutableSequence[actions.Step]: + """ + Returns the specified steps to run the project(s) specified in the call + in a fixed order. - analysis_actions = [] + Args: + project: to analyze + """ - analysis_actions.append(actions.Compile(project)) - analysis_actions.append( + analysis_actions = [ RunVaRATracedWorkloads( - project, self.get_handle(), report_file_ending="ivr" + project, + self.get_handle(), + report_file_ending="ivr", + workload_categories=[ + WorkloadCategory.EXAMPLE, WorkloadCategory.SMALL + ], ) + ] + + return self.get_common_tracing_actions( + project, + FeatureInstrType.VERIFY, + analysis_actions, + save_temps=True, + instruction_threshold=0 ) - analysis_actions.append(actions.Clean(project)) - - return analysis_actions diff --git a/varats/varats/experiments/vara/multi_compile_experiment.py b/varats/varats/experiments/vara/multi_compile_experiment.py new file mode 100644 index 000000000..156ca372e --- /dev/null +++ b/varats/varats/experiments/vara/multi_compile_experiment.py @@ -0,0 +1,144 @@ +"""Base class experiment and utilities for experiments that work with +features.""" +import typing as tp +from abc import abstractmethod + +from benchbuild.utils.actions import ( + Step, +) +from benchbuild.project import build_dir +import benchbuild.utils.actions as actns +from varats.experiment.experiment_util import Project +from varats.project.varats_project import VProject +from varats.report.report import ReportSpecification +from varats.experiments.vara.feature_experiment import FeatureExperiment +from benchbuild.experiment import Actions + + +class Flags: + + def __init__( + self, + cflags: tp.Optional[tp.List[str]] = None, + ldflags: tp.Optional[tp.List[str]] = None, + result_folder_name: tp.Optional[str] = None + ): + self.__cflags = cflags or [] + self.__ldflags = ldflags or [] + self.__result_folder_name = result_folder_name + + @property + def cflags(self) -> tp.List[str]: + return self.__cflags + + @property + def ldflags(self) -> tp.List[str]: + return self.__ldflags + + @property + def result_folder_name(self) -> tp.Optional[str]: + return self.__result_folder_name + + def __str__(self): + return f"Flags(cflags={self.cflags}, ldflags={self.ldflags}, result_folder_name={self.result_folder_name})" + + __repr__ = __str__ + + +class MultiCompileExperiment(FeatureExperiment, shorthand=""): + """Base class experiment for feature specific experiments.""" + + NAME = "MultiCompileExperiment" + + REPORT_SPEC = ReportSpecification() + + @abstractmethod + def actions_for_project(self, project: VProject, + flags: Flags) -> tp.MutableSequence[Step]: + """Get the actions a project wants to run.""" + + def get_flags(self) -> tp.List[Flags]: + """Get a list of flags that should be changed for every compilation attempt""" + return [Flags()] + + def actions(self) -> Actions: + actions: Actions = [] + + def new_actions(self, proj: Project, flags: Flags) -> Actions: + atomic_actions: Actions = [ + tp.cast(Step, actns.Clean(proj)), + actns.MakeBuildDir(proj), + actns.Echo( + message=f"Selected {proj.name} with version {version_str}" + ), + ] + if flags.cflags: + atomic_actions.append( + actns.Echo(message=f"Set additional cflags {flags.cflags}") + ) + if flags.ldflags: + atomic_actions.append( + actns.Echo( + message=f"Set additional ldflags {flags.ldflags}" + ) + ) + if flags.result_folder_name: + atomic_actions.append( + actns.Echo( + message= + f"Set result folder name override {flags.result_folder_name}" + ) + ) + atomic_actions.append(actns.ProjectEnvironment(proj)) + atomic_actions.extend(self.actions_for_project(proj)) + return [tp.cast(Step, actns.RequireAll(actions=atomic_actions))] + + for prj_cls in self.projects: + prj_actions: Actions = [] + + for revision in self.sample(prj_cls): + version_str = str(revision) + + p = prj_cls(revision) + + for flags in self.get_flags(): + p_clone = p.clone() + + p_clone.cflags = flags.cflags + p_clone.ldflags = flags.ldflags + result_folder = flags.result_folder_name or str(p.run_uuid) + p_clone.builddir = build_dir(self, p_clone) / result_folder + + prj_actions = new_actions(self, p_clone, flags) + actions.extend(prj_actions) + + if actions: + actions.append(actns.CleanExtra()) + + return actions + + +STARTING_BUDGET = 0 +END_BUDGET = 100 +BUDGET_STEP = 20 + + +class VaryingStartingBudgetExperiment(MultiCompileExperiment, shorthand=""): + NAME = "VaryingStartingBudgetExperiment" + + REPORT_SPEC = ReportSpecification() + + @abstractmethod + def actions_for_project(self, project: VProject, + flags: Flags) -> tp.MutableSequence[Step]: + """Get the actions a project wants to run.""" + + def get_flags(self) -> tp.List[Flags]: + flags = [] + for budget in range(STARTING_BUDGET, END_BUDGET, BUDGET_STEP): + f = Flags( + cflags=["-mllvm", f"-vara-optimizer-starting-budget={budget}"], + result_folder_name=f"starting_budget_{budget}" + ) + flags.append(f) + return flags diff --git a/varats/varats/experiments/vara/output_verifier.py b/varats/varats/experiments/vara/output_verifier.py new file mode 100644 index 000000000..e69de29bb diff --git a/varats/varats/plots/compare_traced.py b/varats/varats/plots/compare_traced.py new file mode 100644 index 000000000..d57d5bd5d --- /dev/null +++ b/varats/varats/plots/compare_traced.py @@ -0,0 +1,96 @@ +"""Example table that uses different workloads and visualizes the time it took +to run them.""" +import typing as tp + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import numpy as np + +from varats.paper.paper_config import get_loaded_paper_config +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator +from varats.report.gnu_time_report import WLTimeReportAggregate +from varats.revision.revisions import get_processed_revisions_files +from varats.ts_utils.click_param_types import REQUIRE_MULTI_EXPERIMENT_TYPE +from varats.utils.git_util import FullCommitHash + +# TODO: Is there a better way to include revisions of all workloads than to use +# only_newest=False ? +# Maybe the result files are not defined correctly. We should be able to find +# the revision files for all workloads with only_newest=True... + + +class CompareRuntimesPlot(Plot, plot_name="compare_runtimes"): + + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + + df = pd.DataFrame() + + for case_study in case_studies: + project_name = case_study.project_name + print(project_name) + + for experiment in self.plot_kwargs["experiment_type"]: + print(experiment.NAME) + report_files = get_processed_revisions_files( + project_name, + experiment, + WLTimeReportAggregate, + get_case_study_file_name_filter(case_study), + only_newest=False + ) + + for report_filepath in report_files: + agg_time_report = WLTimeReportAggregate( + report_filepath.full_path() + ) + report_file = agg_time_report.filename + + for workload_name in agg_time_report.workload_names(): + print(workload_name) + for wall_clock_time in \ + agg_time_report.measurements_wall_clock_time( + workload_name + ): + new_row = { + "Binary": + report_file.binary_name, + "Experiment": + experiment.NAME, + "Mean wall time (msecs)": + wall_clock_time * 1000, + } + + df = pd.concat([df, pd.DataFrame([new_row])], + ignore_index=True) + # df = df.append(new_row, ignore_index=True) + + fig, ax = plt.subplots() + fig.set_size_inches(11.7, 8.27) + sns.barplot( + x="Binary", + y="Mean wall time (msecs)", + hue="Experiment", + estimator=np.mean, + data=df, + ax=ax, + ) + sns.despine() + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise NotImplementedError + + +class CompareRuntimesPlotGenerator( + PlotGenerator, + generator_name="compare-runtimes", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE] +): + + def generate(self) -> tp.List[Plot]: + return [CompareRuntimesPlot(self.plot_config, **self.plot_kwargs)] diff --git a/varats/varats/plots/compare_traced_budget.py b/varats/varats/plots/compare_traced_budget.py new file mode 100644 index 000000000..8887b4a72 --- /dev/null +++ b/varats/varats/plots/compare_traced_budget.py @@ -0,0 +1,113 @@ +"""Example table that uses different workloads and visualizes the time it took +to run them.""" +import typing as tp +import re + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import numpy as np + +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator +from varats.report.gnu_time_report import WLTimeReportAggregate +from varats.revision.revisions import get_processed_revisions_files +from varats.ts_utils.click_param_types import REQUIRE_CASE_STUDY, REQUIRE_MULTI_EXPERIMENT_TYPE +from varats.utils.git_util import FullCommitHash + +starting_budget_command_regex = re.compile("RunTracedNaive([0-9]+)") + + +class CompareRuntimesBudgetPlot(Plot, plot_name="compare_runtimes_budget"): + + def plot(self, view_mode: bool) -> None: + df = pd.DataFrame() + + case_study = self.plot_kwargs["case_study"] + project_name = case_study.project_name + + experiments = self.plot_kwargs["experiment_type"] + + for experiment in experiments: + report_files = get_processed_revisions_files( + project_name, + experiment, + WLTimeReportAggregate, + get_case_study_file_name_filter(case_study), + only_newest=False + ) + + budget = "0" + if ( + m := re.search(starting_budget_command_regex, experiment.NAME) + ) is not None: + budget = m.group(1) + elif experiment.NAME == "RunUntraced": + budget = "Untraced" + + for report_filepath in report_files: + agg_time_report = WLTimeReportAggregate( + report_filepath.full_path() + ) + + for workload_name in agg_time_report.workload_names(): + for report in agg_time_report.reports(workload_name): + new_row = { + "Workload": + workload_name, + "Budget": + budget, + "Mean wall time (secs)": + report.wall_clock_time.total_seconds() + } + + df = pd.concat([df, pd.DataFrame([new_row])], + ignore_index=True) + + df = df.drop(df[df["Workload"] == "example.cnf"].index) + workloads = df["Workload"].unique() + + fig, axs = plt.subplots((1 + len(workloads)) // 2, + 2 - len(workloads) % 2, + constrained_layout=True) + + for i, workload in enumerate(workloads): + if len(workloads) == 1: + ax = axs + elif len(workloads) == 2: + ax = axs[i % 2] + else: + x, y = divmod(i, 2) + ax = axs[(x, y)] + + d = df[df["Workload"] == workload] + + sns.barplot( + x="Budget", + y="Mean wall time (secs)", + estimator=np.mean, + data=d, + ax=ax, + ) + ax.set_xticks( + ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha='right' + ) + ax.set_title(workload) + + fig.suptitle(f"Runtimes by budget for {case_study.project_name}") + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise NotImplementedError + + +class CompareRuntimesBudgetPlotCSGenerator( + PlotGenerator, + generator_name="compare-runtimes-budget", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_CASE_STUDY] +): + + def generate(self) -> tp.List[Plot]: + return [CompareRuntimesBudgetPlot(self.plot_config, **self.plot_kwargs)] diff --git a/varats/varats/plots/compare_traced_cs.py b/varats/varats/plots/compare_traced_cs.py new file mode 100644 index 000000000..76bdacb6b --- /dev/null +++ b/varats/varats/plots/compare_traced_cs.py @@ -0,0 +1,90 @@ +"""Example table that uses different workloads and visualizes the time it took +to run them.""" +import typing as tp + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import numpy as np + +from varats.paper.paper_config import get_loaded_paper_config +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator +from varats.report.gnu_time_report import WLTimeReportAggregate +from varats.revision.revisions import get_processed_revisions_files +from varats.ts_utils.click_param_types import REQUIRE_MULTI_CASE_STUDY, REQUIRE_MULTI_EXPERIMENT_TYPE +from varats.utils.git_util import FullCommitHash + +# TODO: Is there a better way to include revisions of all workloads than to use +# only_newest=False ? +# Maybe the result files are not defined correctly. We should be able to find +# the revision files for all workloads with only_newest=True... + + +class CompareRuntimesCSPlot(Plot, plot_name="compare_runtimes_cs"): + + def plot(self, view_mode: bool) -> None: + df = pd.DataFrame() + + print(self.plot_kwargs["case_study"]) + for case_study in self.plot_kwargs["case_study"]: + project_name = case_study.project_name + + for experiment in self.plot_kwargs["experiment_type"]: + report_files = get_processed_revisions_files( + project_name, + experiment, + WLTimeReportAggregate, + get_case_study_file_name_filter(case_study), + only_newest=False + ) + + for report_filepath in report_files: + agg_time_report = WLTimeReportAggregate( + report_filepath.full_path() + ) + + for workload_name in agg_time_report.workload_names(): + for wall_clock_time in \ + agg_time_report.measurements_wall_clock_time( + workload_name + ): + new_row = { + "Workload": + workload_name, + "Experiment": + experiment.NAME, + "Mean wall time (msecs)": + wall_clock_time * 1000, + } + + df = pd.concat([df, pd.DataFrame([new_row])], + ignore_index=True) + + fig, ax = plt.subplots() + fig.set_size_inches(11.7, 8.27) + sns.barplot( + x="Workload", + y="Mean wall time (msecs)", + hue="Experiment", + estimator=np.mean, + data=df, + ax=ax, + ) + sns.despine() + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise NotImplementedError + + +class CompareRuntimesPlotCSGenerator( + PlotGenerator, + generator_name="compare-runtimes-cs", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_MULTI_CASE_STUDY] +): + + def generate(self) -> tp.List[Plot]: + return [CompareRuntimesCSPlot(self.plot_config, **self.plot_kwargs)] diff --git a/varats/varats/plots/compare_traced_cs_budget_labels.py b/varats/varats/plots/compare_traced_cs_budget_labels.py new file mode 100644 index 000000000..a38ae64b2 --- /dev/null +++ b/varats/varats/plots/compare_traced_cs_budget_labels.py @@ -0,0 +1,90 @@ +"""Example table that uses different workloads and visualizes the time it took +to run them.""" +import typing as tp + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import numpy as np + +from varats.paper.paper_config import get_loaded_paper_config +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator +from varats.report.gnu_time_report import WLTimeReportAggregate +from varats.revision.revisions import get_processed_revisions_files +from varats.ts_utils.click_param_types import REQUIRE_MULTI_CASE_STUDY, REQUIRE_MULTI_EXPERIMENT_TYPE +from varats.utils.git_util import FullCommitHash + +# TODO: Is there a better way to include revisions of all workloads than to use +# only_newest=False ? +# Maybe the result files are not defined correctly. We should be able to find +# the revision files for all workloads with only_newest=True... + + +class CompareRuntimesCSBPlot(Plot, plot_name="compare_runtimes_csb"): + + def plot(self, view_mode: bool) -> None: + df = pd.DataFrame() + + print(self.plot_kwargs["case_study"]) + for case_study in self.plot_kwargs["case_study"]: + project_name = case_study.project_name + + for experiment in self.plot_kwargs["experiment_type"]: + report_files = get_processed_revisions_files( + project_name, + experiment, + WLTimeReportAggregate, + get_case_study_file_name_filter(case_study), + only_newest=True + ) + + for report_filepath in report_files: + agg_time_report = WLTimeReportAggregate( + report_filepath.full_path() + ) + + for workload_name in agg_time_report.workload_names(): + for wall_clock_time in \ + agg_time_report.measurements_wall_clock_time( + workload_name + ): + new_row = { + "Workload": + workload_name, + "Experiment": + experiment.NAME, + "Mean wall time (msecs)": + wall_clock_time * 1000, + } + + df = pd.concat([df, pd.DataFrame([new_row])], + ignore_index=True) + + fig, ax = plt.subplots() + fig.set_size_inches(11.7, 8.27) + sns.barplot( + x="Workload", + y="Mean wall time (msecs)", + hue="Experiment", + estimator=np.mean, + data=df, + ax=ax, + ) + sns.despine() + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise NotImplementedError + + +class CompareRuntimesPlotCSBGenerator( + PlotGenerator, + generator_name="compare-runtimes-csb", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_MULTI_CASE_STUDY] +): + + def generate(self) -> tp.List[Plot]: + return [CompareRuntimesCSPlot(self.plot_config, **self.plot_kwargs)] diff --git a/varats/varats/plots/dynamic_overhead_plot.py b/varats/varats/plots/dynamic_overhead_plot.py new file mode 100644 index 000000000..54caea5e6 --- /dev/null +++ b/varats/varats/plots/dynamic_overhead_plot.py @@ -0,0 +1,73 @@ +import typing as tp + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + +from varats.paper.paper_config import get_loaded_paper_config +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.plot.plot import Plot +from varats.plot.plots import PlotGenerator +from varats.data.reports.dynamic_overhead_report import DynamicOverheadReport +from varats.revision.revisions import get_processed_revisions_files +from varats.ts_utils.click_param_types import REQUIRE_MULTI_EXPERIMENT_TYPE +from varats.utils.git_util import FullCommitHash + + +class DynamicOverheadPlot(Plot, plot_name="dynamic_overhead"): + + def plot(self, view_mode: bool) -> None: + case_studies = get_loaded_paper_config().get_all_case_studies() + + df = pd.DataFrame() + + for case_study in case_studies: + project_name = case_study.project_name + + for experiment in self.plot_kwargs["experiment_type"]: + + report_files = get_processed_revisions_files( + project_name, + experiment, + DynamicOverheadReport, + get_case_study_file_name_filter(case_study), + only_newest=False + ) + + + for report_filepath in report_files: + report = DynamicOverheadReport(report_filepath.full_path()) + + new_row = { + "Name": report.filename.binary_name, + "Visited regions": report.regions_visited(), + } + + df = pd.concat([df, pd.DataFrame([new_row])], + ignore_index=True) + + fig, ax = plt.subplots() + fig.set_size_inches(11.7, 8.27) + sns.barplot( + x="Name", + y="Visited regions", + hue="Name", + data=df, + ax=ax, + ) + sns.despine() + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise NotImplementedError + + +class DynamicOverheadPlotGenerator( + PlotGenerator, + generator_name="dynamic-overhead", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE] +): + + def generate(self) -> tp.List[Plot]: + return [DynamicOverheadPlot(self.plot_config, **self.plot_kwargs)] diff --git a/varats/varats/plots/instrumentation_verifier_compare_experiments_overview.py b/varats/varats/plots/instrumentation_verifier_compare_experiments_overview.py new file mode 100644 index 000000000..c1c55174f --- /dev/null +++ b/varats/varats/plots/instrumentation_verifier_compare_experiments_overview.py @@ -0,0 +1,139 @@ +"""Generate graphs that show an overview of the instrumentation verifier +experiment state for all case studies in the paper config.""" + +import typing as tp + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +import varats.paper.paper_config as PC +from varats.data.reports.instrumentation_verifier_report import ( + InstrVerifierReport, +) +from varats.plot.plot import Plot, PlotDataEmpty +from varats.plot.plots import PlotGenerator +from varats.report.report import ReportFilepath +from varats.revision.revisions import get_all_revisions_files +from varats.ts_utils.click_param_types import REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_MULTI_CASE_STUDY +from varats.utils.exceptions import UnsupportedOperation +from varats.utils.git_util import FullCommitHash + + +class InstrumentationOverviewCompareExperimentsPlot( + Plot, plot_name="instrumentation_overview_compare_experiments_plot" +): + """ + Plot configuration for the instrumentation verifier experiment. + + This plot shows an overview of the instrumentation verifier state for all + case studies in the paper config. + """ + + def plot(self, view_mode: bool) -> None: + self._generate_plot(**self.plot_kwargs) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise UnsupportedOperation + + @staticmethod + def _generate_plot(**kwargs: tp.Any) -> None: + case_study = kwargs['case_study'] + + rows = [] + + for experiment in kwargs["experiment_type"]: + revisions_files: tp.List[ReportFilepath] = get_all_revisions_files( + case_study.project_name, experiment, only_newest=False + ) + + reports: tp.List[InstrVerifierReport] = [ + InstrVerifierReport(rev_file.full_path()) + for rev_file in revisions_files + ] + + if len(reports) == 0: + raise PlotDataEmpty() + + for report in reports: + for binary in report.binaries(): + rows.append({ + "experiment": experiment.NAME, + "binary": binary, + "enters": report.num_enters(binary), + "leaves": report.num_leaves(binary), + "unclosed_enters": report.num_unclosed_enters(binary), + "unentered_leaves": report.num_unentered_leaves(binary) + }) + + df = pd.DataFrame(rows) + binaries = df["binary"].unique() + experiments = df["experiment"].unique() + fig, axs = plt.subplots((1 + len(binaries)) // 2, + 2 - len(binaries) % 2, + constrained_layout=True) + + for i, binary in enumerate(binaries): + if len(binaries) == 1: + ax = axs + elif len(binaries) == 2: + ax = axs[i % 2] + else: + x, y = divmod(i, 2) + ax = axs[(x, y)] + + d = df[df["binary"] == binary] + + num_enters = np.array(d["enters"]) + num_leaves = np.array(d["leaves"]) + num_unclosed_enters = np.array(d["unclosed_enters"]) + num_unentered_leaves = np.array(d["unentered_leaves"]) + + ax.bar(experiments, num_enters) + ax.bar( + experiments, + num_leaves, + bottom=num_enters, + ) + ax.bar( + experiments, + num_unclosed_enters, + bottom=num_enters + num_leaves + ) + ax.bar( + experiments, + num_unentered_leaves, + bottom=num_enters + num_leaves + num_unclosed_enters + ) + + ax.set_ylabel("Number of events") + ax.set_xticks( + ax.get_xticks(), ax.get_xticklabels() + ) + ax.set_title(binary) + + fig.suptitle( + f"Instrumentation Verifier " + f"Overview for {case_study.project_name}" + ) + fig.legend( + labels=["Enters", "Leaves", "Unclosed enters", "Unentered leaves"] + ) + + +class VerifierExperimentCompareOverviewGenerator( + PlotGenerator, + generator_name="iv-ce-overview-plot", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_MULTI_CASE_STUDY] +): + """Generates a single pc-overview plot for the current paper config.""" + + def generate(self) -> tp.List[Plot]: + case_studies = self.plot_kwargs.pop("case_study") + return [ + InstrumentationOverviewCompareExperimentsPlot( + self.plot_config, cse_study=cs, **self.plot_kwargs + ) for cs in case_studies + ] diff --git a/varats/varats/plots/instrumentation_verifier_compare_experiments_overview_budget_labels.py b/varats/varats/plots/instrumentation_verifier_compare_experiments_overview_budget_labels.py new file mode 100644 index 000000000..6535dadec --- /dev/null +++ b/varats/varats/plots/instrumentation_verifier_compare_experiments_overview_budget_labels.py @@ -0,0 +1,155 @@ +"""Generate graphs that show an overview of the instrumentation verifier +experiment state for all case studies in the paper config.""" + +import typing as tp + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +import varats.paper.paper_config as PC +from varats.data.reports.instrumentation_verifier_report import ( + InstrVerifierReport, +) +from varats.plot.plot import Plot, PlotDataEmpty +from varats.plot.plots import PlotGenerator +from varats.report.report import ReportFilepath +from varats.revision.revisions import get_all_revisions_files +from varats.ts_utils.click_param_types import REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_MULTI_CASE_STUDY +from varats.utils.exceptions import UnsupportedOperation +from varats.utils.git_util import FullCommitHash +import re +from varats.paper_mgmt.case_study import get_case_study_file_name_filter + +starting_budget_command_regex = re.compile("RunInstrVerifierNaive([0-9]+)") + + +class InstrumentationOverviewCompareExperimentsBudgetLabelsPlot( + Plot, + plot_name="instrumentation_overview_compare_experiments_budget_labels_plot" +): + """ + Plot configuration for the instrumentation verifier experiment. + + This plot shows an overview of the instrumentation verifier state for all + case studies in the paper config. + """ + + def plot(self, view_mode: bool) -> None: + self._generate_plot(**self.plot_kwargs) + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise UnsupportedOperation + + @staticmethod + def _generate_plot(**kwargs: tp.Any) -> None: + case_study = kwargs['case_study'] + + rows = [] + + for experiment in kwargs["experiment_type"]: + revisions_files: tp.List[ReportFilepath] = get_all_revisions_files( + case_study.project_name, + experiment, + InstrVerifierReport, + get_case_study_file_name_filter(case_study), + only_newest=False + ) + + reports: tp.List[InstrVerifierReport] = [ + InstrVerifierReport(rev_file.full_path()) + for rev_file in revisions_files + ] + + if len(reports) == 0: + raise PlotDataEmpty() + + budget = 0 + if ( + m := re.search(starting_budget_command_regex, experiment.NAME) + ) is not None: + budget = int(m.group(1)) + + for report in reports: + for binary in report.binaries(): + rows.append({ + "experiment": str(budget), + "binary": binary, + "enters": report.num_enters(binary), + "leaves": report.num_leaves(binary), + "unclosed_enters": report.num_unclosed_enters(binary), + "unentered_leaves": report.num_unentered_leaves(binary) + }) + + df = pd.DataFrame(rows) + df = df.drop(df[df["binary"] == "example"].index) + binaries = df["binary"].unique() + + experiments = df["experiment"].unique() + + fig, axs = plt.subplots((1 + len(binaries)) // 2, + 2 - len(binaries) % 2, + constrained_layout=True) + + for i, binary in enumerate(binaries): + if len(binaries) == 1: + ax = axs + elif len(binaries) == 2: + ax = axs[i % 2] + else: + x, y = divmod(i, 2) + ax = axs[(x, y)] + + d = df[df["binary"] == binary] + + num_enters = np.array(d["enters"]) + num_leaves = np.array(d["leaves"]) + num_unclosed_enters = np.array(d["unclosed_enters"]) + num_unentered_leaves = np.array(d["unentered_leaves"]) + + ax.bar(experiments, num_enters) + ax.bar( + experiments, + num_leaves, + bottom=num_enters, + ) + ax.bar( + experiments, + num_unclosed_enters, + bottom=num_enters + num_leaves + ) + ax.bar( + experiments, + num_unentered_leaves, + bottom=num_enters + num_leaves + num_unclosed_enters + ) + + ax.set_ylabel("Number of events") + ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45) + ax.set_title(binary) + + fig.suptitle( + f"Instrumentation Verifier " + f"Overview for {case_study.project_name}" + ) + fig.legend( + labels=["Enters", "Leaves", "Unclosed enters", "Unentered leaves"] + ) + + +class VerifierExperimentCompareBudgetLabelsOverviewGenerator( + PlotGenerator, + generator_name="iv-ceb-overview-plot", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_MULTI_CASE_STUDY] +): + """Generates a single pc-overview plot for the current paper config.""" + + def generate(self) -> tp.List[Plot]: + case_studies = self.plot_kwargs.pop("case_study") + return [ + InstrumentationOverviewCompareExperimentsBudgetLabelsPlot( + self.plot_config, case_study=cs, **self.plot_kwargs + ) for cs in case_studies + ] diff --git a/varats/varats/plots/instrumentation_verifier_overview.py b/varats/varats/plots/instrumentation_verifier_overview.py index 092fc48fb..470a7d227 100644 --- a/varats/varats/plots/instrumentation_verifier_overview.py +++ b/varats/varats/plots/instrumentation_verifier_overview.py @@ -102,7 +102,7 @@ def _generate_plot(**kwargs: tp.Any) -> None: f"Overview for {case_study.project_name}" ) ax.legend() - plt.xticks(rotation=90, ha='right') + plt.xticks(ha='right') plt.subplots_adjust(bottom=0.25) diff --git a/varats/varats/plots/instrumentation_verifier_overview_budget.py b/varats/varats/plots/instrumentation_verifier_overview_budget.py new file mode 100644 index 000000000..24d5ae0ad --- /dev/null +++ b/varats/varats/plots/instrumentation_verifier_overview_budget.py @@ -0,0 +1,141 @@ +"""Example table that uses different workloads and visualizes the time it took +to run them.""" +import typing as tp +import re + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns +import numpy as np + +import varats.paper.paper_config as PC +from varats.plot.plot import Plot, PlotDataEmpty +from varats.plot.plots import PlotGenerator +from varats.data.reports.instrumentation_verifier_report import ( + InstrVerifierReport, +) +from varats.report.report import ReportFilepath +from varats.revision.revisions import get_all_revisions_files +from varats.ts_utils.click_param_types import REQUIRE_EXPERIMENT_TYPE +from varats.utils.git_util import FullCommitHash + + +class InstrumentationVerifierOverviewBudgetPlot( + Plot, plot_name="instrumentation_verifier_overview_budget" +): + + def plot(self, view_mode: bool) -> None: + self._generate_plot(**self.plot_kwargs) + + @staticmethod + def _generate_plot(**kwargs: tp.Any) -> None: + case_study = kwargs["case_study"] + experiment = kwargs["experiment_type"] + + revisions_files: tp.List[ReportFilepath] = get_all_revisions_files( + case_study.project_name, experiment, only_newest=False + ) + + reports: tp.List[InstrVerifierReport] = [ + InstrVerifierReport(rev_file.full_path()) + for rev_file in revisions_files + ] + + if len(reports) == 0: + raise PlotDataEmpty() + + rows = [] + + for report in reports: + budget = 0 + for cf in report.metadata()["cflags"]: + if "budget" not in cf: + continue + + budget = int(cf.split("=")[1]) + + for binary in report.binaries(): + rows.append({ + "binary": binary, + "budget": budget, + "enters": report.num_enters(binary), + "leaves": report.num_leaves(binary), + "unclosed_enters": report.num_unclosed_enters(binary), + "unentered_leaves": report.num_unentered_leaves(binary) + }) + + df = pd.DataFrame(rows) + + binaries = df["binary"].unique() + fig, axs = plt.subplots((1 + len(binaries)) // 2, 2 - len(binaries) % 2, constrained_layout=True) + fig.suptitle( + f"Results of {experiment.NAME} by budget for case study {case_study.project_name}" + ) + + for i, binary in enumerate(binaries): + if len(binaries) == 1: + ax = axs + elif len(binaries) == 2: + ax = axs[i % 2] + else: + x, y = divmod(i, 2) + ax = axs[x, y] + + d = df[df["binary"] == binary].sort_values("budget") + + num_enters_arr = np.array(d["enters"]) + num_leaves_arr = np.array(d["leaves"]) + num_unclosed_enters_arr = np.array(d["unclosed_enters"]) + num_unentered_leaves_arr = np.array(d["unentered_leaves"]) + + num_enters_arr = num_enters_arr - num_unclosed_enters_arr + num_leaves_arr = num_leaves_arr - num_unentered_leaves_arr + + X = np.arange(len(d["budget"])) + + ax.bar(X, num_enters_arr, label="#Enters") + ax.bar(X, num_leaves_arr, label="#Leaves", bottom=num_enters_arr) + ax.bar( + X, + num_unclosed_enters_arr, + label="#Unclosed Enters", + bottom=num_enters_arr + num_leaves_arr + ) + ax.bar( + X, + num_unentered_leaves_arr, + label="#Unentered Leaves", + bottom=num_enters_arr + num_leaves_arr + num_unclosed_enters_arr + ) + + ax.set_ylabel("# Events") + ax.set_xlabel("Budget") + ax.set_xticks(X, labels=d["budget"]) + ax.set_title(binary) + + fig.legend( + labels=[ + "Closed enters", "Entered leaves", "Unclosed enters", + "Unentered leaves" + ] + ) + sns.despine() + + def calc_missing_revisions( + self, boundary_gradient: float + ) -> tp.Set[FullCommitHash]: + raise NotImplementedError + + +class CompareRuntimesBudgetPlotCSGenerator( + PlotGenerator, + generator_name="iv-ce-overview-budget-plot", + options=[REQUIRE_EXPERIMENT_TYPE] +): + + def generate(self) -> tp.List[Plot]: + return [ + InstrumentationVerifierOverviewBudgetPlot( + self.plot_config, case_study=cs, **self.plot_kwargs + ) for cs in PC.get_paper_config().get_all_case_studies() + ] diff --git a/varats/varats/projects/c_projects/bzip2.py b/varats/varats/projects/c_projects/bzip2.py index fa9de73d7..27efc1111 100644 --- a/varats/varats/projects/c_projects/bzip2.py +++ b/varats/varats/projects/c_projects/bzip2.py @@ -28,6 +28,9 @@ ) from varats.utils.settings import bb_cfg +from varats.experiment.workload_util import RSBinary, WorkloadCategory +from benchbuild.command import Command, SourceRoot, WorkloadSet + class Bzip2(VProject): """Compression and decompression tool bzip2 (fetched by Git)""" @@ -79,6 +82,23 @@ class Bzip2(VProject): ] WORKLOADS = { + WorkloadSet(WorkloadCategory.SMALL): [ + Command( + SourceRoot("bzip2") / RSBinary("bzip2"), + "--compress", + "--best", + "-vvv", + "--keep", + # bzip2 compresses very fast even on the best setting, so we + # need the three input files to get approximately 30 seconds + # total execution time + "geo-maps/countries-land-1m.geo.json", + label="countries-land-1m", + creates=[ + "geo-maps/countries-land-1m.geo.json.bz2", + ] + ) + ], WorkloadSet(WorkloadCategory.MEDIUM): [ Command( SourceRoot("bzip2") / RSBinary("bzip2"), @@ -92,6 +112,7 @@ class Bzip2(VProject): "geo-maps/countries-land-1m.geo.json", "geo-maps/countries-land-10m.geo.json", "geo-maps/countries-land-100m.geo.json", + label="countries-land-1m-10m-100m", creates=[ "geo-maps/countries-land-1m.geo.json.bz2", "geo-maps/countries-land-10m.geo.json.bz2", diff --git a/varats/varats/projects/c_projects/gzip.py b/varats/varats/projects/c_projects/gzip.py index f3b2a106d..e2d9f03e3 100644 --- a/varats/varats/projects/c_projects/gzip.py +++ b/varats/varats/projects/c_projects/gzip.py @@ -106,8 +106,10 @@ class Gzip(VProject, ReleaseProviderHook): "--best", "--force", # needed because BB creates symlinks for the inputs "geo-maps/countries-land-1m.geo.json", - label="geo-maps/countries-land-1m", - creates=["geo-maps/countries-land-1m.geo.json.gz"] + label="countries-land-1m", + creates=[ + "geo-maps/countries-land-1m.geo.json.gz" + ] ) ], } diff --git a/varats/varats/projects/c_projects/lrzip.py b/varats/varats/projects/c_projects/lrzip.py index ce1cccb48..e9ddca593 100644 --- a/varats/varats/projects/c_projects/lrzip.py +++ b/varats/varats/projects/c_projects/lrzip.py @@ -7,6 +7,7 @@ from benchbuild.utils.cmd import make from benchbuild.utils.settings import get_number_of_jobs from plumbum import local +from varats.project.sources import FeatureSource from varats.containers.containers import get_base_image, ImageBase from varats.experiment.workload_util import RSBinary, WorkloadCategory @@ -39,6 +40,7 @@ class Lrzip(VProject): limit=None, shallow=False ), + FeatureSource(), # TODO: auto unzipper for BB? HTTP( local="countries-land-1km.geo.json", diff --git a/varats/varats/projects/c_projects/picosat.py b/varats/varats/projects/c_projects/picosat.py index 71de307e6..c42087a63 100644 --- a/varats/varats/projects/c_projects/picosat.py +++ b/varats/varats/projects/c_projects/picosat.py @@ -76,13 +76,20 @@ class PicoSAT(VProject, ReleaseProviderHook): "download/picoSAT-965/traffic_kkb_unknown.cnf.tar.gz" } ), + HTTPUntar( + local="SAT_H_instances_childsnack_p08.hddl_2.cnf", + remote={ + "1.0": + "https://github.com/se-sic/picoSAT-mirror/releases/" + "download/picoSAT-965/SAT_H_instances_childsnack_p08.hddl_2.cnf.tar.gz" + } + ), HTTPUntar( local="UNSAT_H_instances_childsnack_p11.hddl_1.cnf", remote={ "1.0": "https://github.com/se-sic/picoSAT-mirror/releases/" - "download/picoSAT-965/" - "UNSAT_H_instances_childsnack_p11.hddl_1.cnf.tar.gz" + "download/picoSAT-965/UNSAT_H_instances_childsnack_p11.hddl_1.cnf.tar.gz" } ), HTTPUntar( @@ -107,9 +114,9 @@ class PicoSAT(VProject, ReleaseProviderHook): WorkloadSet(WorkloadCategory.SMALL): [ Command( SourceRoot("picosat") / RSBinary("picosat"), - "aim-100-1_6-no-1.cnf", - label="aim-100-1-6-no-1.cnf", - ) + "SAT_H_instances_childsnack_p08.hddl_2.cnf/SAT_H_instances_childsnack_p08.hddl_2.cnf", + label="SAT-H-instances-childsnack-p08.hddl-2.cnf", + ), ], WorkloadSet(WorkloadCategory.MEDIUM): [ Command( diff --git a/varats/varats/projects/c_projects/xz.py b/varats/varats/projects/c_projects/xz.py index 3d1a580ed..1fac7c349 100644 --- a/varats/varats/projects/c_projects/xz.py +++ b/varats/varats/projects/c_projects/xz.py @@ -2,7 +2,7 @@ import typing as tp import benchbuild as bb -from benchbuild.command import SourceRoot, WorkloadSet +from benchbuild.command import Command, SourceRoot, WorkloadSet from benchbuild.source import HTTPMultiple from benchbuild.utils.cmd import autoreconf, make from benchbuild.utils.revision_ranges import ( @@ -24,7 +24,6 @@ verify_binaries, ) from varats.project.sources import FeatureSource -from varats.project.varats_command import VCommand from varats.project.varats_project import VProject from varats.utils.git_util import ( ShortCommitHash, @@ -85,19 +84,16 @@ class Xz(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ - VCommand( + Command( SourceRoot("xz") / RSBinary("xz"), "-k", - # Use output_param to ensure input file - # gets appended after all arguments. - output_param=["{output}"], - output=SourceRoot("geo-maps/countries-land-250m.geo.json"), + "geo-maps/countries-land-1km.geo.json", label="countries-land-1km", creates=["geo-maps/countries-land-1km.geo.json.xz"] ) ], WorkloadSet(WorkloadCategory.MEDIUM): [ - VCommand( + Command( SourceRoot("xz") / RSBinary("xz"), "-k", "-9e", @@ -105,13 +101,9 @@ class Xz(VProject): "--threads=1", "--format=xz", "-vv", - # Use output_param to ensure input file - # gets appended after all arguments. - output_param=["{output}"], - output=SourceRoot("geo-maps/countries-land-250m.geo.json"), + "geo-maps/countries-land-250m.geo.json", label="countries-land-250m", - creates=["geo-maps/countries-land-250m.geo.json.xz"], - requires_all_args={"--compress"}, + creates=["geo-maps/countries-land-250m.geo.json.xz"] ) ], } diff --git a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py index 0674170ee..d1b711420 100644 --- a/varats/varats/projects/perf_tests/feature_perf_cs_collection.py +++ b/varats/varats/projects/perf_tests/feature_perf_cs_collection.py @@ -79,6 +79,15 @@ class FeaturePerfCSCollection(VProject): WORKLOADS = { WorkloadSet(WorkloadCategory.EXAMPLE): [ + Command( + SourceRoot("FeaturePerfCSCollection") / + RSBinary("SimpleBusyLoop"), + "--iterations", + str(10**4), + "--count_to", + str(5 * 10**3), + label="SBL-iterations-10K-count-to-5K" + ), Command( SourceRoot("FeaturePerfCSCollection") / RSBinary("SingleLocalSimple"), @@ -97,7 +106,7 @@ class FeaturePerfCSCollection(VProject): label="SFI-enc-compress" ) ], - WorkloadSet(WorkloadCategory.MEDIUM): [ + WorkloadSet(WorkloadCategory.SMALL): [ Command( SourceRoot("FeaturePerfCSCollection") / RSBinary("SimpleBusyLoop"), diff --git a/varats/varats/tables/instrumentation_verifier_table.py b/varats/varats/tables/instrumentation_verifier_table.py index fe7f058d5..cdfacac0d 100644 --- a/varats/varats/tables/instrumentation_verifier_table.py +++ b/varats/varats/tables/instrumentation_verifier_table.py @@ -15,6 +15,7 @@ from varats.table.table import Table, TableDataEmpty from varats.table.table_utils import dataframe_to_table from varats.table.tables import TableFormat, TableGenerator +from varats.ts_utils.click_param_types import REQUIRE_MULTI_EXPERIMENT_TYPE class InstrumentationVerifierTable( @@ -31,37 +32,38 @@ class InstrumentationVerifierTable( def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: variables = [ - "Workload name", "State", "ConfigID", "#Enters", "#Leaves", - "#Unclosed Enters", "#Unentered Leaves" + "Experiment type", "Workload name", "State", "ConfigID", "#Enters", + "#Leaves", "#Unclosed Enters", "#Unentered Leaves" ] - experiment_type = RunInstrVerifier + data = [] project_name: str = self.table_kwargs['case_study'].project_name - data = [] + for experiment in self.table_kwargs["experiment_type"]: - revision_files = get_all_revisions_files( - project_name, experiment_type, only_newest=False - ) + revision_files = get_all_revisions_files( + project_name, experiment, only_newest=False + ) - reports = [ - InstrVerifierReport(rev_file.full_path()) - for rev_file in revision_files - ] + reports = [ + InstrVerifierReport(rev_file.full_path()) + for rev_file in revision_files + ] - for report in reports: - for binary in report.binaries(): - data.append([ - f"{report.filename.commit_hash} - {binary}", - report.state(binary), report.filename.config_id, - report.num_enters(binary), - report.num_leaves(binary), - report.num_unclosed_enters(binary), - report.num_unentered_leaves(binary) - ]) + for report in reports: + for binary in report.binaries(): + data.append([ + experiment.NAME, + f"{report.filename.commit_hash} - {binary}", + report.state(binary), report.filename.config_id, + report.num_enters(binary), + report.num_leaves(binary), + report.num_unclosed_enters(binary), + report.num_unentered_leaves(binary) + ]) - if len(data) == 0: - raise TableDataEmpty() + if len(data) == 0: + raise TableDataEmpty() pd_data = pd.DataFrame(columns=variables, data=np.array(data)) @@ -71,7 +73,9 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: class InstrVerifierTableGenerator( - TableGenerator, generator_name="instrumentation-verifier-table", options=[] + TableGenerator, + generator_name="instrumentation-verifier-table", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE] ): """Generates an overview table for the instrumentation verifier experiment.""" diff --git a/varats/varats/tables/time_workloads.py b/varats/varats/tables/time_workloads.py index 4120c5210..a7ba4e6bf 100644 --- a/varats/varats/tables/time_workloads.py +++ b/varats/varats/tables/time_workloads.py @@ -35,7 +35,8 @@ def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: report_files = get_processed_revisions_files( project_name, self.table_kwargs["experiment_type"][0], WLTimeReportAggregate, - get_case_study_file_name_filter(case_study) + get_case_study_file_name_filter(case_study), + only_newest=False ) def wall_clock_time_in_msecs( @@ -82,7 +83,7 @@ def wall_clock_time_in_msecs( len(agg_time_report.reports(workload_name)) } - df = df.append(new_row, ignore_index=True) + df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) df.sort_values(["Project", "Binary"], inplace=True) df.set_index( diff --git a/varats/varats/tables/time_workloads2.py b/varats/varats/tables/time_workloads2.py new file mode 100644 index 000000000..dda5764ef --- /dev/null +++ b/varats/varats/tables/time_workloads2.py @@ -0,0 +1,109 @@ +"""Module for the TimedWorkloadsTable.""" +import typing as tp + +import numpy as np +import pandas as pd + +from varats.paper.paper_config import get_loaded_paper_config +from varats.paper_mgmt.case_study import get_case_study_file_name_filter +from varats.report.gnu_time_report import WLTimeReportAggregate +from varats.revision.revisions import get_processed_revisions_files +from varats.table.table import Table +from varats.table.table_utils import dataframe_to_table +from varats.table.tables import TableFormat, TableGenerator +from varats.ts_utils.click_param_types import REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_CASE_STUDY + + +import re + +budgetre = re.compile("RunTracedNaive([0-9]+)") + +def budget_from_experiment_name(name): + if (m := re.search(budgetre, name)) is not None: + return int(m.group(1)) + elif name == "RunTraced": + return 0 + elif name == "RunUntraced": + return -1 + + +class TimedWorkloadTable(Table, table_name="time_workloads_2"): + """Simple table to print the run-time and memory consumption of different + workloads.""" + + def tabulate(self, table_format: TableFormat, wrap_table: bool) -> str: + df = pd.DataFrame() + + case_study = self.table_kwargs["case_study"] + project_name = case_study.project_name + + experiments = self.table_kwargs["experiment_type"] + + for experiment in experiments: + + project_name = case_study.project_name + + report_files = get_processed_revisions_files( + project_name, experiment, + WLTimeReportAggregate, + get_case_study_file_name_filter(case_study), + only_newest=False + ) + + def wall_clock_time_in_msecs( + agg_time_report: WLTimeReportAggregate + ) -> tp.List[float]: + return list( + map( + lambda x: x * 1000, + agg_time_report. + measurements_wall_clock_time(workload_name) + ) + ) + + for report_filepath in report_files: + agg_time_report = WLTimeReportAggregate( + report_filepath.full_path() + ) + + for workload_name in agg_time_report.workload_names(): + new_row = { + "Experiment": + experiment.NAME, + "Budget": budget_from_experiment_name(experiment.NAME), + "Workload": + workload_name, + "Mean wall time (msecs)": + np.mean(wall_clock_time_in_msecs(agg_time_report)), + "StdDev": + round( + np.std( + wall_clock_time_in_msecs(agg_time_report) + ), 2 + ), + "Reps": + len(agg_time_report.reports(workload_name)) + } + + df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) + + df.sort_values(["Workload", "Budget"], inplace=True) + + kwargs: tp.Dict[str, tp.Any] = {} + if table_format.is_latex(): + kwargs["column_format"] = "llr|rr|r|r" + + return dataframe_to_table( + df, table_format, wrap_table, wrap_landscape=True, **kwargs + ) + + +class TimedWorkloadTableGenerator( + TableGenerator, + generator_name="time-workloads-2", + options=[REQUIRE_MULTI_EXPERIMENT_TYPE, REQUIRE_CASE_STUDY] +): + """Generator for `TimeWorkloadsTable`.""" + + def generate(self) -> tp.List[Table]: + return [TimedWorkloadTable(self.table_config, **self.table_kwargs)] diff --git a/varats/varats/tools/bb_config.py b/varats/varats/tools/bb_config.py index 5d3b73bd2..0edd2f41d 100644 --- a/varats/varats/tools/bb_config.py +++ b/varats/varats/tools/bb_config.py @@ -110,6 +110,7 @@ def update_experiments(bb_cfg: s.Configuration) -> None: 'varats.experiments.vara.blame_report_experiment', 'varats.experiments.vara.blame_verifier_experiment', 'varats.experiments.vara.commit_report_experiment', + 'varats.experiments.vara.compare_traced_untraced', 'varats.experiments.vara.feature_perf_runner', 'varats.experiments.vara.feature_perf_sampling', 'varats.experiments.vara.feature_perf_tracing', diff --git a/varats/varats/ts_utils/__init__.py b/varats/varats/ts_utils/__init__.py new file mode 100644 index 000000000..e69de29bb