diff --git a/api/docs/release.dox b/api/docs/release.dox index 237203d88c1..1fd5b2b7965 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -146,6 +146,18 @@ Further non-compatibility-affecting changes include: templates. Added similar options for the drmemtrace scheduler: #dynamorio::drmemtrace:: scheduler_tmpl_t::scheduler_options_t::kernel_syscall_trace_path, and #dynamorio:: drmemtrace::scheduler_tmpl_t::scheduler_options_t::kernel_syscall_reader. + - Added initial implementation of a noise generator in the drmemtrace framework. The + noise generator produces synthetic records to schedule with one or more target traces. + To produce noise generation we implemented: + - #dynamorio::drmemtrace::noise_generator_t scaffolding as a + #dynamorio::drmemtrace::reader_t to produce synthetic trace records. + - #dynamorio::drmemtrace::noise_generator_info_t, which contains the metadata + information to drive the generation of synthetic records. + - #dynamorio::drmemtrace::noise_generator_factory_t that creates + #dynamorio::drmemtrace::noise_generator_t, which can then be added to the input + workloads of #dynamorio::drmemtrace::scheduler_tmpl_t. + - the flag -noise_generator_enable as an analyzer (global) option to enable noise + generation alongside the scheduled traces. **************************************************
diff --git a/clients/drcachesim/CMakeLists.txt b/clients/drcachesim/CMakeLists.txt index 6b24cb3b43c..7c732f4fb1b 100644 --- a/clients/drcachesim/CMakeLists.txt +++ b/clients/drcachesim/CMakeLists.txt @@ -279,6 +279,7 @@ set(drcachesim_srcs scheduler/scheduler_replay.cpp scheduler/scheduler_fixed.cpp scheduler/speculator.cpp + scheduler/noise_generator.cpp analyzer.cpp analyzer_multi.cpp ${client_and_sim_srcs} @@ -346,6 +347,7 @@ add_exported_library(drmemtrace_analyzer STATIC scheduler/scheduler_replay.cpp scheduler/scheduler_fixed.cpp scheduler/speculator.cpp + scheduler/noise_generator.cpp common/trace_entry.cpp reader/reader.cpp reader/config_reader.cpp @@ -399,6 +401,7 @@ install_client_nonDR_header(drmemtrace tools/filter/record_filter.h) install_client_nonDR_header(drmemtrace tracer/raw2trace.h) install_client_nonDR_header(drmemtrace tracer/raw2trace_shared.h) install_client_nonDR_header(drmemtrace scheduler/scheduler.h) +install_client_nonDR_header(drmemtrace scheduler/noise_generator.h) install_client_nonDR_header(drmemtrace scheduler/flexible_queue.h) install_client_nonDR_header(drmemtrace scheduler/speculator.h) diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp index b6cf14a8c8a..6cab692c932 100644 --- a/clients/drcachesim/analyzer.cpp +++ b/clients/drcachesim/analyzer.cpp @@ -56,6 +56,7 @@ #endif #include "reader.h" #include "record_file_reader.h" +#include "noise_generator.h" #include "trace_entry.h" #ifdef HAS_ZIP # include "reader/zipfile_file_reader.h" @@ -295,6 +296,27 @@ analyzer_tmpl_t::init_scheduler_common( std::vector &workloads, typename sched_type_t::scheduler_options_t options) { + // Add noise generator to input workloads. + if (noise_generator_enabled_) { + // TODO i#7216: here can be a good place to analyze the workloads in order to + // tweak noise_generator_info_t parameters. For now we use noise_generator_info_t + // default values. + noise_generator_info_t noise_generator_info; + typename sched_type_t::input_reader_t noise_generator_reader = + noise_generator_factory_.create_noise_generator(noise_generator_info); + // Check for errors. + error_string_ = error_string_ + noise_generator_factory_.get_error_string(); + if (!error_string_.empty()) { + return false; + } + // input_workload_t needs a vector of input_reader_t, so we create a vector with + // a single input_reader_t (the noise generator). + std::vector readers; + readers.emplace_back(std::move(noise_generator_reader)); + // Add the noise generator to the scheduler's input workloads. + workloads.emplace_back(std::move(readers)); + } + for (int i = 0; i < num_tools_; ++i) { if (parallel_ && !tools_[i]->parallel_shard_supported()) { parallel_ = false; diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h index c3440d39eb6..c95e75d4397 100644 --- a/clients/drcachesim/analyzer.h +++ b/clients/drcachesim/analyzer.h @@ -55,6 +55,7 @@ #include "analysis_tool.h" #include "memref.h" +#include "noise_generator.h" #include "reader.h" #include "record_file_reader.h" #include "scheduler.h" @@ -438,6 +439,11 @@ template class analyzer_tmpl_t { bool sched_by_time_ = false; typename sched_type_t::mapping_t sched_mapping_ = sched_type_t::MAP_TO_ANY_OUTPUT; + // Factory to create noise generators that can then be added to the scheduler's + // input workloads. + noise_generator_factory_t noise_generator_factory_; + bool noise_generator_enabled_ = false; + private: bool serial_mode_supported(); diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp index ad77dad5f10..28f4eb8b6fd 100644 --- a/clients/drcachesim/analyzer_multi.cpp +++ b/clients/drcachesim/analyzer_multi.cpp @@ -36,6 +36,7 @@ #include "common/options.h" #include "common/utils.h" #include "common/directory_iterator.h" +#include "noise_generator.h" #include "tlb_simulator.h" #include "tracer/raw2trace_directory.h" #include "tracer/raw2trace.h" @@ -573,6 +574,11 @@ analyzer_multi_tmpl_t::analyzer_multi_tmpl_t() sched_ops.kernel_syscall_trace_path = op_sched_syscall_file.get_value(); + // Enable the noise generator before init_scheduler(), where we eventually add a + // noise generator as another input workload. + if (op_noise_generator_enable.get_value()) + this->noise_generator_enabled_ = true; + if (!indirs.empty()) { std::vector tracedirs; for (const std::string &indir : indirs) diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp index 945bb161da1..2d6f2555736 100644 --- a/clients/drcachesim/common/options.cpp +++ b/clients/drcachesim/common/options.cpp @@ -714,6 +714,12 @@ droption_t "Cache hierarchy configuration file", "The full path to the cache hierarchy configuration file."); +droption_t + op_noise_generator_enable(DROPTION_SCOPE_FRONTEND, "noise_generator_enable", false, + "Enables noise generation.", + "Enables the scheduler to interleave trace records with " + "synthetic records produced by a noise generator."); + // XXX: if we separate histogram + reuse_distance we should move this with them. droption_t op_report_top(DROPTION_SCOPE_FRONTEND, "report_top", 10, diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h index 50bbbadf43f..bd2b3fab9e9 100644 --- a/clients/drcachesim/common/options.h +++ b/clients/drcachesim/common/options.h @@ -178,6 +178,7 @@ extern dynamorio::droption::droption_t op_warmu extern dynamorio::droption::droption_t op_warmup_fraction; extern dynamorio::droption::droption_t op_sim_refs; extern dynamorio::droption::droption_t op_config_file; +extern dynamorio::droption::droption_t op_noise_generator_enable; extern dynamorio::droption::droption_t op_report_top; extern dynamorio::droption::droption_t op_reuse_distance_threshold; extern dynamorio::droption::droption_t op_reuse_distance_histogram; diff --git a/clients/drcachesim/scheduler/noise_generator.cpp b/clients/drcachesim/scheduler/noise_generator.cpp new file mode 100644 index 00000000000..d9d1f8720ef --- /dev/null +++ b/clients/drcachesim/scheduler/noise_generator.cpp @@ -0,0 +1,182 @@ +/* ********************************************************** + * Copyright (c) 2025 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#include +#include + +#include "noise_generator.h" +#include "memref.h" +#include "trace_entry.h" + +namespace dynamorio { +namespace drmemtrace { + +noise_generator_t::noise_generator_t() +{ +} + +noise_generator_t::noise_generator_t(noise_generator_info_t &info) + : num_records_to_generate_(info.num_records_to_generate) + , pid_(info.pid) + , tid_(info.tid) +{ +} + +noise_generator_t::~noise_generator_t() +{ +} + +bool +noise_generator_t::init() +{ + at_eof_ = false; + ++*this; + return true; +} + +std::string +noise_generator_t::get_stream_name() const +{ + return "noise_generator"; +} + +trace_entry_t +noise_generator_t::generate_trace_entry() +{ + // TODO i#7216: this is a temporary trace record that we use as a placeholder until + // the logic to generate noise records is in place. + trace_entry_t generated_entry = { TRACE_TYPE_READ, 4, { 0xdeadbeef } }; + return generated_entry; +} + +trace_entry_t * +noise_generator_t::read_next_entry() +{ + if (num_records_to_generate_ == 0) { + at_eof_ = true; + return nullptr; + } + + // Do not change the order for generating TRACE_TYPE_THREAD and TRACE_TYPE_PID. + // The scheduler expects a tid first and then a pid. + if (!tid_generated_) { + entry_ = { TRACE_TYPE_THREAD, sizeof(int), { tid_ } }; + tid_generated_ = true; + return &entry_; + } + if (!pid_generated_) { + entry_ = { TRACE_TYPE_PID, sizeof(int), { pid_ } }; + pid_generated_ = true; + return &entry_; + } + // The scheduler expects a TRACE_MARKER_TYPE_TIMESTAMP for relative threads order. + // We provide one with a high value to indicate that noise generator threads have + // no dependencies with other threads. The scheduler will re-write these values. + if (!marker_timestamp_generated_) { + entry_ = { TRACE_TYPE_MARKER, + TRACE_MARKER_TYPE_TIMESTAMP, + { static_cast(ULONG_MAX - 1) } }; + marker_timestamp_generated_ = true; + return &entry_; + } + + entry_ = generate_trace_entry(); + + if (num_records_to_generate_ == 1) { + entry_ = { TRACE_TYPE_THREAD_EXIT, sizeof(int), { tid_ } }; + } + --num_records_to_generate_; + + return &entry_; +} + +template +std::string +noise_generator_factory_t::get_error_string() +{ + return error_string_; +} + +template +typename scheduler_tmpl_t::input_reader_t +noise_generator_factory_t::create_noise_generator( + noise_generator_info_t &info) +{ + std::unique_ptr noise_generator_begin = + create_noise_generator_begin(info); + std::unique_ptr noise_generator_end = create_noise_generator_end(); + typename sched_type_t::input_reader_t reader( + std::move(noise_generator_begin), std::move(noise_generator_end), info.tid); + return reader; +} + +template <> +std::unique_ptr +noise_generator_factory_t::create_noise_generator_begin( + noise_generator_info_t &info) +{ + return std::unique_ptr(new noise_generator_t(info)); +} + +template <> +std::unique_ptr +noise_generator_factory_t::create_noise_generator_end() +{ + return std::unique_ptr(new noise_generator_t()); +} + +template <> +std::unique_ptr +noise_generator_factory_t::create_noise_generator_begin( + noise_generator_info_t &info) +{ + // TODO i#7216: we'll need a record_reader_t noise generator to create core sharded + // traces via record_filter_t. + error_string_ = "Noise generator is not suppported for record_reader_t"; + return std::unique_ptr(); +} + +template <> +std::unique_ptr +noise_generator_factory_t::create_noise_generator_end() +{ + // TODO i#7216: we'll need a record_reader_t noise generator to create core sharded + // traces via record_filter_t. + error_string_ = "Noise generator is not suppported for record_reader_t"; + return std::unique_ptr(); +} + +template class noise_generator_factory_t; +template class noise_generator_factory_t; + +} // namespace drmemtrace +} // namespace dynamorio diff --git a/clients/drcachesim/scheduler/noise_generator.h b/clients/drcachesim/scheduler/noise_generator.h new file mode 100644 index 00000000000..490a0ee6bdd --- /dev/null +++ b/clients/drcachesim/scheduler/noise_generator.h @@ -0,0 +1,130 @@ +/* ********************************************************** + * Copyright (c) 2025 Google, Inc. All rights reserved. + * **********************************************************/ + +/* + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of Google, Inc. nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#ifndef _NOISE_GENERATOR_H_ +#define _NOISE_GENERATOR_H_ 1 + +#include "reader.h" +#include "scheduler.h" +#include "trace_entry.h" + +namespace dynamorio { +namespace drmemtrace { + +/** + * Contains metadata information to drive the noise generation. + */ +struct noise_generator_info_t { + noise_generator_info_t() {}; + noise_generator_info_t(addr_t pid, addr_t tid, uint64_t num_records_to_generate) + : pid(pid) + , tid(tid) + , num_records_to_generate(num_records_to_generate) + { + } + // TODO i#7216: temporary default values. + addr_t pid = 1; + addr_t tid = 1; + uint64_t num_records_to_generate = 1000; +}; + +/** + * Generates synthetic #dynamorio::drmemtrace::memref_t trace entries in a single thread + * and presents them via an iterator interface to the scheduler. + */ +class noise_generator_t : public reader_t { +public: + noise_generator_t(); + + noise_generator_t(noise_generator_info_t &info); + + virtual ~noise_generator_t(); + + bool + init() override; + + std::string + get_stream_name() const override; + +protected: + // Makes sure the noise records generated by generate_trace_entry() are preceded by + // TRACE_TYPE_THREAD, TRACE_TYPE_PID, TRACE_MARKER_TYPE_TIMESTAMP and followed by + // TRACE_TYPE_THREAD_EXIT. + virtual trace_entry_t * + read_next_entry() override; + + // Contains the main logic to generate noise records. + virtual trace_entry_t + generate_trace_entry(); + + // This counter does not count TRACE_TYPE_THREAD, TRACE_TYPE_PID, and + // TRACE_MARKER_TYPE_TIMESTAMP. The idea is that when the user wants to generate at + // at least one record, tid, pid, and timestamp always have to be there, otherwise + // the scheduler will report an error. + uint64_t num_records_to_generate_ = 0; + addr_t pid_ = 0; + addr_t tid_ = 0; + +private: + trace_entry_t entry_ = {}; + bool pid_generated_ = false; + bool tid_generated_ = false; + bool marker_timestamp_generated_ = false; +}; + +/** + * Factory to create noise_generator_t. + */ +template class noise_generator_factory_t { +public: + typedef scheduler_tmpl_t sched_type_t; + + std::string + get_error_string(); + + typename sched_type_t::input_reader_t + create_noise_generator(noise_generator_info_t &info); + +protected: + std::unique_ptr + create_noise_generator_begin(noise_generator_info_t &info); + + std::unique_ptr + create_noise_generator_end(); + + std::string error_string_; +}; + +} // namespace drmemtrace +} // namespace dynamorio + +#endif /* _NOISE_GENERATOR_H_ */ diff --git a/clients/drcachesim/tests/schedule_stats_noise_generator.templatex b/clients/drcachesim/tests/schedule_stats_noise_generator.templatex new file mode 100644 index 00000000000..133043ad2c9 --- /dev/null +++ b/clients/drcachesim/tests/schedule_stats_noise_generator.templatex @@ -0,0 +1,5 @@ +Schedule stats tool results: +Total counts: + *[1-9][0-9]* cores + 2 threads:.* +.* diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp index 2b3223ba314..84c51ee4603 100644 --- a/clients/drcachesim/tests/scheduler_unit_tests.cpp +++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp @@ -52,6 +52,7 @@ #include "mock_reader.h" #include "memref.h" #include "trace_entry.h" +#include "noise_generator.h" #ifdef HAS_ZIP # include "zipfile_istream.h" # include "zipfile_ostream.h" @@ -6908,6 +6909,101 @@ test_options_match() test.check_options(); } +// A mock noise generator that only generates TRACE_TYPE_READ records with +// address 0xdeadbeef. +class mock_noise_generator_t : public noise_generator_t { +public: + mock_noise_generator_t() {}; + + mock_noise_generator_t(noise_generator_info_t &info, addr_t addr_to_generate) + : noise_generator_t(info) + , addr_to_generate_(addr_to_generate) {}; + +protected: + trace_entry_t + generate_trace_entry() override + { + trace_entry_t generated_entry = { TRACE_TYPE_READ, 4, { addr_to_generate_ } }; + return generated_entry; + } + +private: + addr_t addr_to_generate_ = 0x0; +}; + +static void +test_noise_generator() +{ + std::cerr << "\n----------------\nTesting noise generator\n"; + static constexpr addr_t noise_generator_addr_to_generate = 0xdeadbeef; + static constexpr memref_tid_t TID_A = 42; + static constexpr memref_tid_t TID_B = 99; + std::vector refs_A = { + /* clang-format off */ + make_thread(TID_A), + make_pid(1), + make_version(4), + make_timestamp(10), + make_instr(10), + make_timestamp(30), + make_instr(30), + make_timestamp(50), + make_instr(50), + make_exit(TID_A), + /* clang-format on */ + }; + std::vector refs_B = { + /* clang-format off */ + make_thread(TID_B), + make_pid(1), + make_version(4), + make_timestamp(20), + make_instr(20), + make_timestamp(40), + make_instr(40), + make_timestamp(60), + make_instr(60), + make_exit(TID_B), + /* clang-format on */ + }; + std::vector readers; + readers.emplace_back(std::unique_ptr(new mock_reader_t(refs_A)), + std::unique_ptr(new mock_reader_t()), TID_A); + readers.emplace_back(std::unique_ptr(new mock_reader_t(refs_B)), + std::unique_ptr(new mock_reader_t()), TID_B); + // Add noise. + noise_generator_info_t noise_generator_info = { 1, 1, 10 }; + readers.emplace_back( + std::unique_ptr(new mock_noise_generator_t( + noise_generator_info, noise_generator_addr_to_generate)), + std::unique_ptr(new mock_noise_generator_t()), + INVALID_THREAD_ID); + scheduler_t scheduler; + std::vector sched_inputs; + sched_inputs.emplace_back(std::move(readers)); + if (scheduler.init(sched_inputs, 1, + scheduler_t::make_scheduler_serial_options(/*verbosity=*/4)) != + scheduler_t::STATUS_SUCCESS) + assert(false); + auto *stream = scheduler.get_stream(0); + memref_t memref; + bool found_at_least_one_read = false; + for (scheduler_t::stream_status_t status = stream->next_record(memref); + status != scheduler_t::STATUS_EOF; status = stream->next_record(memref)) { + assert(status == scheduler_t::STATUS_OK); + // There is just one output so we expect to always see 0 as the ordinal. + assert(stream->get_input_workload_ordinal() == 0); + // All TRACE_TYPE_READ records are generated by the noise generator and their + // addr is always the constant noise_generator_addr_to_generate. + if (memref.data.type == TRACE_TYPE_READ) { + assert(memref.data.addr == + static_cast(noise_generator_addr_to_generate)); + found_at_least_one_read = true; + } + } + assert(found_at_least_one_read); +} + int test_main(int argc, const char *argv[]) { @@ -6956,6 +7052,7 @@ test_main(int argc, const char *argv[]) test_exit_early(); test_marker_updates(); test_options_match(); + test_noise_generator(); dr_standalone_exit(); return 0; diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 7f26e36fb09..169e5d829da 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -4019,6 +4019,13 @@ if (BUILD_CLIENTS) torunonly_simtool(reuse_offline ${ci_shared_app} "-infile ${small_trace_file} -tool reuse_distance -reuse_distance_histogram" "") + # The schedule_stats tool triggers the scheduler, to which we add one noise + # generator process with a single thread (current default configuration). + # We test with a single-threaded trace for a total of 2 threads, which we check + # for in the tool's output. + torunonly_simtool(schedule_stats_noise_generator ${ci_shared_app} + "-infile ${small_trace_file} -tool schedule_stats -noise_generator_enable" "") + # Our multi-threaded sample trace is larger so we require gzip. if (ZLIB_FOUND) set(thread_trace_dir