From 2515ff641437a6b72a96ab8704f18d9ba2dd763d Mon Sep 17 00:00:00 2001 From: romnnn Date: Thu, 13 Jul 2023 20:43:22 +0200 Subject: [PATCH] box and playground: stats conversion --- Cargo.lock | 53 ++++++ Cargo.toml | 1 + playground/src/bridge/main.rs | 86 +++++---- playground/src/ref/bridge/stats.cc | 20 ++- src/ported/cache.rs | 8 +- src/ported/instruction.rs | 2 +- src/ported/interconn.rs | 89 +++++----- src/ported/ldst_unit.rs | 64 ++++++- src/ported/mem_fetch.rs | 26 ++- src/ported/mod.rs | 271 ++++++++++++++++++++++------- src/ported/stats.rs | 190 ++++++++++++++++++-- 11 files changed, 641 insertions(+), 169 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05e671cf..19db8638 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,6 +141,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "anyhow" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" + [[package]] name = "async-channel" version = "1.9.0" @@ -488,6 +494,7 @@ dependencies = [ "phf", "playground", "pretty_assertions", + "pretty_assertions_sorted", "pyo3", "regex", "rmp-serde", @@ -864,6 +871,18 @@ dependencies = [ "syn 2.0.25", ] +[[package]] +name = "darrentsung_debug_parser" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf488eca7807ce3c8e64bee95c3fbf8f1935c905b3b73835e75db16fc458fdc4" +dependencies = [ + "anyhow", + "html-escape", + "nom", + "ordered-float", +] + [[package]] name = "data-encoding" version = "2.4.0" @@ -1437,6 +1456,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "html-escape" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" +dependencies = [ + "utf8-width", +] + [[package]] name = "http" version = "0.2.9" @@ -2120,6 +2148,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "ordered-float" +version = "2.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" +dependencies = [ + "num-traits", +] + [[package]] name = "os_pipe" version = "1.1.4" @@ -2477,6 +2514,16 @@ dependencies = [ "yansi", ] +[[package]] +name = "pretty_assertions_sorted" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa95d32882f2adbdfd30312733271b83c527ee8007bf78dc21afe510463ac6a0" +dependencies = [ + "darrentsung_debug_parser", + "pretty_assertions", +] + [[package]] name = "prettyplease" version = "0.2.10" @@ -3625,6 +3672,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf8-width" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" + [[package]] name = "utf8parse" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index aa9e5858..660881fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,6 +71,7 @@ optional = true [dev-dependencies] pretty_assertions = "1" +pretty_assertions_sorted = "1" cxx = "1" # sanity check stats against playground diff --git a/playground/src/bridge/main.rs b/playground/src/bridge/main.rs index 1947633c..1a2e8535 100644 --- a/playground/src/bridge/main.rs +++ b/playground/src/bridge/main.rs @@ -12,15 +12,15 @@ pub struct CacheStats { pub accesses: HashMap<(AccessType, AccessStat), u64>, } -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum AccessStat { ReservationFailure(ReservationFailure), Status(RequestStatus), } -#[derive(Debug, Clone, Default)] -pub struct Stats { - // memory accesses +/// Memory accesses +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct Accesses { pub num_mem_write: usize, pub num_mem_read: usize, pub num_mem_const: usize, @@ -29,7 +29,14 @@ pub struct Stats { pub num_mem_write_global: usize, pub num_mem_read_local: usize, pub num_mem_write_local: usize, - // instructions + pub num_mem_l2_writeback: usize, + pub num_mem_l1_write_allocate: usize, + pub num_mem_l2_write_allocate: usize, +} + +/// Instruction counts +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct Instructions { pub num_load_instructions: usize, pub num_store_instructions: usize, pub num_shared_mem_instructions: usize, @@ -37,10 +44,16 @@ pub struct Stats { pub num_texture_instructions: usize, pub num_const_instructions: usize, pub num_param_instructions: usize, - // other stuff - pub num_mem_l2_writeback: usize, - pub num_mem_l1_write_allocate: usize, - pub num_mem_l2_write_allocate: usize, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct Other {} + +#[derive(Debug, Clone, Default)] +pub struct Stats { + pub accesses: Accesses, + pub instructions: Instructions, + pub other: Other, // per cache stats pub l1i_stats: HashMap, @@ -84,56 +97,60 @@ impl Stats { // memory accesses pub fn set_num_mem_write(&mut self, v: usize) { - self.num_mem_write = v; + self.accesses.num_mem_write = v; } pub fn set_num_mem_read(&mut self, v: usize) { - self.num_mem_read = v; + self.accesses.num_mem_read = v; } pub fn set_num_mem_const(&mut self, v: usize) { - self.num_mem_const = v; + self.accesses.num_mem_const = v; } pub fn set_num_mem_texture(&mut self, v: usize) { - self.num_mem_texture = v; + self.accesses.num_mem_texture = v; } pub fn set_num_mem_read_global(&mut self, v: usize) { - self.num_mem_read_global = v; + self.accesses.num_mem_read_global = v; } pub fn set_num_mem_write_global(&mut self, v: usize) { - self.num_mem_write_global = v; + self.accesses.num_mem_write_global = v; } pub fn set_num_mem_read_local(&mut self, v: usize) { - self.num_mem_read_local = v; + self.accesses.num_mem_read_local = v; } pub fn set_num_mem_write_local(&mut self, v: usize) { - self.num_mem_write_local = v; + self.accesses.num_mem_write_local = v; + } + pub fn set_num_mem_l2_writeback(&mut self, v: usize) { + self.accesses.num_mem_l2_writeback = v; + } + pub fn set_num_mem_l1_write_allocate(&mut self, v: usize) { + self.accesses.num_mem_l1_write_allocate = v; + } + pub fn set_num_mem_l2_write_allocate(&mut self, v: usize) { + self.accesses.num_mem_l2_write_allocate = v; } - // instructions + // instruction counts pub fn set_num_load_instructions(&mut self, v: usize) { - self.num_load_instructions = v; + self.instructions.num_load_instructions = v; } pub fn set_num_store_instructions(&mut self, v: usize) { - self.num_store_instructions = v; + self.instructions.num_store_instructions = v; } pub fn set_num_shared_mem_instructions(&mut self, v: usize) { - self.num_shared_mem_instructions = v; + self.instructions.num_shared_mem_instructions = v; } pub fn set_num_sstarr_instructions(&mut self, v: usize) { - self.num_sstarr_instructions = v; + self.instructions.num_sstarr_instructions = v; } pub fn set_num_texture_instructions(&mut self, v: usize) { - self.num_texture_instructions = v; + self.instructions.num_texture_instructions = v; } pub fn set_num_const_instructions(&mut self, v: usize) { - self.num_const_instructions = v; + self.instructions.num_const_instructions = v; } pub fn set_num_param_instructions(&mut self, v: usize) { - self.num_param_instructions = v; - } - - // other stuff - pub fn set_num_mem_l2_writeback(&mut self, v: usize) { - self.num_mem_l2_writeback = v; + self.instructions.num_param_instructions = v; } } @@ -173,8 +190,11 @@ mod default { fn set_num_mem_write_global(self: &mut Stats, v: usize); fn set_num_mem_read_local(self: &mut Stats, v: usize); fn set_num_mem_write_local(self: &mut Stats, v: usize); + fn set_num_mem_l2_writeback(self: &mut Stats, v: usize); + fn set_num_mem_l1_write_allocate(self: &mut Stats, v: usize); + fn set_num_mem_l2_write_allocate(self: &mut Stats, v: usize); - // instructions + // instruction counts fn set_num_load_instructions(self: &mut Stats, v: usize); fn set_num_store_instructions(self: &mut Stats, v: usize); fn set_num_shared_mem_instructions(self: &mut Stats, v: usize); @@ -182,10 +202,6 @@ mod default { fn set_num_texture_instructions(self: &mut Stats, v: usize); fn set_num_const_instructions(self: &mut Stats, v: usize); fn set_num_param_instructions(self: &mut Stats, v: usize); - - // fn set_num_mem_l2_writeback(self: &mut Stats, v: usize); - // fn set_num_mem_l1_write_allocate(self: &mut Stats, v: usize); - // fn set_num_mem_l2_write_allocate(self: &mut Stats, v: usize); } unsafe extern "C++" { diff --git a/playground/src/ref/bridge/stats.cc b/playground/src/ref/bridge/stats.cc index 0cce1a7d..16799bc6 100644 --- a/playground/src/ref/bridge/stats.cc +++ b/playground/src/ref/bridge/stats.cc @@ -27,10 +27,6 @@ void transfer_cache_stats(CacheKind cache, unsigned cache_id, } } -// class trace_gpgpu_sim_bridge : public trace_gpgpu_sim { -// public: -// using trace_gpgpu_sim::trace_gpgpu_sim; -// void trace_gpgpu_sim_bridge::transfer_stats(Stats &stats) { transfer_general_stats(stats); @@ -61,12 +57,19 @@ void trace_gpgpu_sim_bridge::transfer_general_stats(Stats &stats) { // see: m_shader_stats->print(stdout); // stats.set_num_stall_shared_mem(m_shader_stats->gpgpu_n_stall_shd_mem); - stats.set_num_mem_read_local(m_shader_stats->gpgpu_n_mem_read_local); - stats.set_num_mem_write_local(m_shader_stats->gpgpu_n_mem_write_local); + stats.set_num_mem_write(m_shader_stats->made_write_mfs); + stats.set_num_mem_read(m_shader_stats->made_read_mfs); + stats.set_num_mem_const(m_shader_stats->gpgpu_n_mem_const); + stats.set_num_mem_texture(m_shader_stats->gpgpu_n_mem_texture); stats.set_num_mem_read_global(m_shader_stats->gpgpu_n_mem_read_global); stats.set_num_mem_write_global(m_shader_stats->gpgpu_n_mem_write_global); - stats.set_num_mem_texture(m_shader_stats->gpgpu_n_mem_texture); - stats.set_num_mem_const(m_shader_stats->gpgpu_n_mem_const); + stats.set_num_mem_read_local(m_shader_stats->gpgpu_n_mem_read_local); + stats.set_num_mem_write_local(m_shader_stats->gpgpu_n_mem_write_local); + stats.set_num_mem_l2_writeback(m_shader_stats->gpgpu_n_mem_l2_writeback); + stats.set_num_mem_l1_write_allocate( + m_shader_stats->gpgpu_n_mem_l1_write_allocate); + stats.set_num_mem_l2_write_allocate( + m_shader_stats->gpgpu_n_mem_l2_write_allocate); stats.set_num_load_instructions(m_shader_stats->gpgpu_n_load_insn); stats.set_num_store_instructions(m_shader_stats->gpgpu_n_store_insn); @@ -144,4 +147,3 @@ void trace_gpgpu_sim_bridge::transfer_l2d_stats(Stats &stats) { }; } } -// }; diff --git a/src/ported/cache.rs b/src/ported/cache.rs index c0a93052..a3096544 100644 --- a/src/ported/cache.rs +++ b/src/ported/cache.rs @@ -3,7 +3,9 @@ use crate::config; use std::collections::VecDeque; use std::sync::{Arc, Mutex}; -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive( + Debug, strum::EnumIter, Clone, Copy, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize, +)] pub enum RequestStatus { HIT = 0, HIT_RESERVED, @@ -14,7 +16,9 @@ pub enum RequestStatus { // NUM_CACHE_REQUEST_STATUS, } -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive( + Debug, strum::EnumIter, Clone, Copy, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize, +)] pub enum ReservationFailure { /// all line are reserved LINE_ALLOC_FAIL = 0, diff --git a/src/ported/instruction.rs b/src/ported/instruction.rs index 691c6e9c..487dba7d 100644 --- a/src/ported/instruction.rs +++ b/src/ported/instruction.rs @@ -10,7 +10,7 @@ use bitvec::{array::BitArray, field::BitField, BitArr}; use std::collections::{HashMap, VecDeque}; use trace_model as trace; -#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub enum MemorySpace { // undefined_space = 0, // reg_space, diff --git a/src/ported/interconn.rs b/src/ported/interconn.rs index 1fcc6c13..e853853c 100644 --- a/src/ported/interconn.rs +++ b/src/ported/interconn.rs @@ -233,48 +233,53 @@ impl MemFetchInterface for CoreMemoryInterface { { let mut stats = self.stats.lock().unwrap(); - if fetch.is_write() { - stats.num_mem_write += 1; - } else { - stats.num_mem_read += 1; - } - - match fetch.access_kind() { - mem_fetch::AccessKind::CONST_ACC_R => { - stats.num_mem_const += 1; - } - mem_fetch::AccessKind::TEXTURE_ACC_R => { - stats.num_mem_texture += 1; - } - mem_fetch::AccessKind::GLOBAL_ACC_R => { - stats.num_mem_read_global += 1; - } - mem_fetch::AccessKind::GLOBAL_ACC_W => { - stats.num_mem_write_global += 1; - } - mem_fetch::AccessKind::LOCAL_ACC_R => { - stats.num_mem_read_local += 1; - } - mem_fetch::AccessKind::LOCAL_ACC_W => { - stats.num_mem_write_local += 1; - } - mem_fetch::AccessKind::INST_ACC_R => { - stats.num_mem_read_inst += 1; - } - mem_fetch::AccessKind::L1_WRBK_ACC => { - stats.num_mem_write_global += 1; - } - mem_fetch::AccessKind::L2_WRBK_ACC => { - stats.num_mem_l2_writeback += 1; - } - mem_fetch::AccessKind::L1_WR_ALLOC_R => { - stats.num_mem_l1_write_allocate += 1; - } - mem_fetch::AccessKind::L2_WR_ALLOC_R => { - stats.num_mem_l2_write_allocate += 1; - } - _ => {} - } + // let counters = &mut stats.counters; + // if fetch.is_write() { + // counters.num_mem_write += 1; + // } else { + // counters.num_mem_read += 1; + // } + // + let access_kind = *fetch.access_kind(); + debug_assert_eq!(fetch.is_write(), access_kind.is_write()); + stats.accesses.inc(access_kind, 1); + // match fetch.access_kind() { + // mem_fetch::AccessKind::CONST_ACC_R => { + // counters.num_mem_const += 1; + // } + // mem_fetch::AccessKind::TEXTURE_ACC_R => { + // counters.num_mem_texture += 1; + // } + // mem_fetch::AccessKind::GLOBAL_ACC_R => { + // counters.num_mem_read_global += 1; + // } + // mem_fetch::AccessKind::GLOBAL_ACC_W => { + // counters.num_mem_write_global += 1; + // } + // mem_fetch::AccessKind::LOCAL_ACC_R => { + // counters.num_mem_read_local += 1; + // } + // mem_fetch::AccessKind::LOCAL_ACC_W => { + // counters.num_mem_write_local += 1; + // } + // mem_fetch::AccessKind::INST_ACC_R => { + // // TODO: this is wrong + // counters.num_mem_load_instructions += 1; + // } + // mem_fetch::AccessKind::L1_WRBK_ACC => { + // counters.num_mem_write_global += 1; + // } + // mem_fetch::AccessKind::L2_WRBK_ACC => { + // counters.num_mem_l2_writeback += 1; + // } + // mem_fetch::AccessKind::L1_WR_ALLOC_R => { + // counters.num_mem_l1_write_allocate += 1; + // } + // mem_fetch::AccessKind::L2_WR_ALLOC_R => { + // counters.num_mem_l2_write_allocate += 1; + // } + // _ => {} + // } } let dest_sub_partition_id = fetch.sub_partition_id(); diff --git a/src/ported/ldst_unit.rs b/src/ported/ldst_unit.rs index ff3a2fff..31f74e8d 100644 --- a/src/ported/ldst_unit.rs +++ b/src/ported/ldst_unit.rs @@ -875,8 +875,70 @@ where } // inst->op_pipe = MEM__OP; - // // stat collection + // m_core->mem_instruction_stats(*inst); + if let Some(mem_space) = instr.memory_space { + let mut stats = self.stats.lock().unwrap(); + let active_count = instr.active_thread_count(); + stats + .instructions + .inc(mem_space, instr.is_store(), active_count); + // match instr.memory_space { + // Some(MemorySpace::Local | MemorySpace::Global) => { + // if instr.is_store() { + // stats.counters.num_mem_store_instructions += active_count; + // } else { + // stats.counters.num_mem_load_instructions += active_count; + // } + // } + // Some(MemorySpace::Shared) => { + // stats.counters.num_shared_mem_instructions += active_count; + // if instr.is_store() { + // stats.counters.num_shared_mem_store_instructions += active_count; + // } else { + // stats.counters.num_shared_mem_load_instructions += active_count; + // } + // } + // Some(MemorySpace::Texture) => {} + // stats.counters.num_tex_mem_instructions += active_count; + // Some(MemorySpace::Constant) => { + // stats.counters.num_const_mem_instructions += active_count; + // } + // None => {} + // } + } + + // switch (inst.space.get_type()) { + // case undefined_space: + // case reg_space: + // break; + // case shared_space: + // m_stats->gpgpu_n_shmem_insn += active_count; + // break; + // case sstarr_space: + // m_stats->gpgpu_n_sstarr_insn += active_count; + // break; + // case const_space: + // m_stats->gpgpu_n_const_insn += active_count; + // break; + // case param_space_kernel: + // case param_space_local: + // m_stats->gpgpu_n_param_insn += active_count; + // break; + // case tex_space: + // m_stats->gpgpu_n_tex_insn += active_count; + // break; + // case global_space: + // case local_space: + // if (inst.is_store()) + // m_stats->gpgpu_n_store_insn += active_count; + // else + // m_stats->gpgpu_n_load_insn += active_count; + // break; + // default: + // abort(); + // } + // m_core->incmem_stat(m_core->get_config()->warp_size, 1); self.pipelined_simd_unit.issue(instr); diff --git a/src/ported/mem_fetch.rs b/src/ported/mem_fetch.rs index e973e13a..eac4ba4e 100644 --- a/src/ported/mem_fetch.rs +++ b/src/ported/mem_fetch.rs @@ -61,7 +61,9 @@ pub enum Status { NUM_MEM_REQ_STAT, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] +#[derive( + Debug, strum::EnumIter, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize, +)] pub enum AccessKind { GLOBAL_ACC_R, LOCAL_ACC_R, @@ -74,7 +76,27 @@ pub enum AccessKind { INST_ACC_R, L1_WR_ALLOC_R, L2_WR_ALLOC_R, - NUM_MEM_ACCESS_TYPE, + // NUM_MEM_ACCESS_TYPE, +} + +impl AccessKind { + #[must_use] + pub fn is_write(&self) -> bool { + match self { + AccessKind::GLOBAL_ACC_R + | AccessKind::LOCAL_ACC_R + | AccessKind::CONST_ACC_R + | AccessKind::TEXTURE_ACC_R + | AccessKind::INST_ACC_R + | AccessKind::L1_WR_ALLOC_R + | AccessKind::L2_WR_ALLOC_R => false, + // | AccessKind::NUM_MEM_ACCESS_TYPE => false, + AccessKind::GLOBAL_ACC_W + | AccessKind::LOCAL_ACC_W + | AccessKind::L1_WRBK_ACC + | AccessKind::L2_WRBK_ACC => true, + } + } } #[derive(Clone, Debug, Hash, Eq, PartialEq)] diff --git a/src/ported/mod.rs b/src/ported/mod.rs index 138339c0..d54e4771 100644 --- a/src/ported/mod.rs +++ b/src/ported/mod.rs @@ -1045,29 +1045,38 @@ pub fn accelmain( break; } - let stats: Stats = sim.stats.lock().unwrap().clone(); - println!("STATS:\n{:#?}", &stats); - - let mut l1_inst_stats = stats::CacheStats::default(); - let mut l1_data_stats = stats::CacheStats::default(); + let mut stats: Stats = sim.stats.lock().unwrap().clone(); for cluster in sim.clusters { for core in cluster.cores.lock().unwrap().iter() { - l1_inst_stats += core.inner.instr_l1_cache.stats().lock().unwrap().clone(); + let core_id = core.inner.core_id; + stats.l1i_stats.insert( + core_id, + core.inner.instr_l1_cache.stats().lock().unwrap().clone(), + ); let ldst_unit = &core.inner.load_store_unit.lock().unwrap(); let data_l1 = ldst_unit.data_l1.as_ref().unwrap(); - l1_data_stats += data_l1.stats().lock().unwrap().clone(); + stats + .l1d_stats + .insert(core_id, data_l1.stats().lock().unwrap().clone()); + stats + .l1c_stats + .insert(core_id, stats::CacheStats::default()); + stats + .l1t_stats + .insert(core_id, stats::CacheStats::default()); } } - println!("L1 INST:\n{:#?}", &l1_inst_stats); - println!("L1 DATA:\n{:#?}", &l1_data_stats); - let mut l2_cache_stats = stats::CacheStats::default(); for sub in sim.mem_sub_partitions.iter() { let sub: &MemorySubPartition = &sub.as_ref().borrow(); let l2_cache = sub.l2_cache.as_ref().unwrap(); - l2_cache_stats += l2_cache.stats().lock().unwrap().clone(); + stats + .l2d_stats + .insert(sub.id, l2_cache.stats().lock().unwrap().clone()); } - println!("L2 DATA:\n{:#?}", &l2_cache_stats); + + println!("STATS:\n{:#?}", &stats); + log::info!("STATS:\n{:#?}", &stats); // save stats to file // let stats_file_path = stats_out_file @@ -1081,16 +1090,39 @@ pub fn accelmain( #[cfg(test)] mod tests { - use super::Stats; + use super::instruction::MemorySpace; + use super::mem_fetch; + use super::stats::{self, Stats}; use color_eyre::eyre; - use pretty_assertions::assert_eq as diff_assert_eq; + use pretty_assertions_sorted as diff; + use std::collections::HashMap; use std::path::PathBuf; + pub trait ConvertHashMap + where + IK: Into, + IV: Into, + { + fn convert(self) -> HashMap; + } + + impl ConvertHashMap for HashMap + where + IK: Into, + IV: Into, + K: Eq + std::hash::Hash, + { + fn convert(self) -> HashMap { + self.into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect() + } + } + #[test] fn test_vectoradd() -> eyre::Result<()> { let manifest_dir = PathBuf::from(std::env!("CARGO_MANIFEST_DIR")); let vec_add_trace_dir = manifest_dir.join("results/vectorAdd/vectorAdd-100-32"); - // manifest_dir.join("test-apps/vectoradd/traces/vectoradd-100-32-trace/"); let kernelslist = vec_add_trace_dir.join("accelsim-trace/kernelslist.g"); let gpgpusim_config = manifest_dir.join("accelsim/gtx1080/gpgpusim.config"); @@ -1103,9 +1135,12 @@ mod tests { assert!(trace_config.is_file()); assert!(inter_config.is_file()); - let stats = super::accelmain(&vec_add_trace_dir.join("trace"), None)?; + let start = std::time::Instant::now(); + let box_stats = super::accelmain(&vec_add_trace_dir.join("trace"), None)?; + let box_dur = start.elapsed(); - let ref_stats: playground::Stats = std::thread::spawn(move || { + let start = std::time::Instant::now(); + let play_stats: playground::Stats = { let mut args = vec![ "-trace", kernelslist.as_os_str().to_str().unwrap(), @@ -1116,59 +1151,173 @@ mod tests { "-inter_config_file", inter_config.as_os_str().to_str().unwrap(), ]; - // let kernelslist = kernelslist.to_string_lossy().to_string(); - // let gpgpusim_config = gpgpusim_config.to_string_lossy().to_string(); - // let trace_config = trace_config.to_string_lossy().to_string(); - // let inter_config = inter_config.to_string_lossy().to_string(); - // let mut args = vec![ - // "-trace", - // &kernelslist, - // "-config", - // &gpgpusim_config, - // "-config", - // &trace_config, - // "-inter_config_file", - // &inter_config, - // ]; - dbg!(&args); let config = playground::Config::default(); let ref_stats = playground::run(&config, &args)?; Ok::<_, eyre::Report>(ref_stats) - }) - .join() - .unwrap()?; + }?; + let playground_dur = start.elapsed(); + + // let ref_stats: Stats = ref_stats.clone().into(); + dbg!(&play_stats); + dbg!(&box_stats); - let ref_stats: Stats = ref_stats.clone().into(); - dbg!(&ref_stats); - dbg!(&stats); + dbg!(&playground_dur); + dbg!(&box_dur); // compare stats here - diff_assert_eq!(&ref_stats, &stats); + diff::assert_eq_sorted!( + &stats::PerCacheStats::from(play_stats.l1i_stats), + &box_stats.l1i_stats + ); + diff::assert_eq_sorted!( + &stats::PerCacheStats::from(play_stats.l1d_stats), + &box_stats.l1d_stats, + ); + diff::assert_eq_sorted!( + &stats::PerCacheStats::from(play_stats.l1t_stats), + &box_stats.l1t_stats, + ); + diff::assert_eq_sorted!( + &stats::PerCacheStats::from(play_stats.l1c_stats), + &box_stats.l1c_stats, + ); + diff::assert_eq_sorted!( + &stats::PerCacheStats::from(play_stats.l2d_stats), + &box_stats.l2d_stats, + ); + + let box_accesses = &box_stats.accesses; + diff::assert_eq_sorted!( + play_stats.accesses, + playground::Accesses { + num_mem_write: box_accesses.num_writes(), + num_mem_read: box_accesses.num_reads(), + num_mem_const: box_accesses + .get(&mem_fetch::AccessKind::CONST_ACC_R) + .copied() + .unwrap_or(0), + num_mem_texture: box_accesses + .get(&mem_fetch::AccessKind::TEXTURE_ACC_R) + .copied() + .unwrap_or(0), + num_mem_read_global: box_accesses + .get(&mem_fetch::AccessKind::GLOBAL_ACC_R) + .copied() + .unwrap_or(0), + num_mem_write_global: box_accesses + .get(&mem_fetch::AccessKind::GLOBAL_ACC_W) + .copied() + .unwrap_or(0), + num_mem_read_local: box_accesses + .get(&mem_fetch::AccessKind::LOCAL_ACC_R) + .copied() + .unwrap_or(0), + num_mem_write_local: box_accesses + .get(&mem_fetch::AccessKind::LOCAL_ACC_W) + .copied() + .unwrap_or(0), + num_mem_l2_writeback: box_accesses + .get(&mem_fetch::AccessKind::L2_WRBK_ACC) + .copied() + .unwrap_or(0), + num_mem_l1_write_allocate: box_accesses + .get(&mem_fetch::AccessKind::L1_WR_ALLOC_R) + .copied() + .unwrap_or(0), + num_mem_l2_write_allocate: box_accesses + .get(&mem_fetch::AccessKind::L2_WR_ALLOC_R) + .copied() + .unwrap_or(0), + } + ); + + dbg!(&play_stats.accesses); + dbg!(&box_stats.accesses); + + dbg!(&play_stats.instructions); + dbg!(&box_stats.instructions); + + let box_instructions = &box_stats.instructions; + let playground_instructions = { + let num_global_loads = box_instructions + .get(&(MemorySpace::Global, false)) + .copied() + .unwrap_or(0); + let num_local_loads = box_instructions + .get(&(MemorySpace::Local, false)) + .copied() + .unwrap_or(0); + let num_global_stores = box_instructions + .get(&(MemorySpace::Global, true)) + .copied() + .unwrap_or(0); + let num_local_stores = box_instructions + .get(&(MemorySpace::Local, true)) + .copied() + .unwrap_or(0); + let num_shmem = box_instructions.get_total(MemorySpace::Shared); + let num_tex = box_instructions.get_total(MemorySpace::Texture); + let num_const = box_instructions.get_total(MemorySpace::Constant); + + playground::Instructions { + num_load_instructions: num_local_loads + num_global_loads, + num_store_instructions: num_local_stores + num_global_stores, + num_shared_mem_instructions: num_shmem, + num_sstarr_instructions: 0, + num_texture_instructions: num_tex, + num_const_instructions: num_const, + num_param_instructions: 0, + // ..playground::Instructions::default() + } + }; + + diff::assert_eq_sorted!(&play_stats.instructions, &playground_instructions); + + assert!(false, "all good!"); Ok(()) } - impl From for Stats { - fn from(stats: playground::Stats) -> Self { - Self { - num_mem_write: stats.num_mem_write, - num_mem_read: stats.num_mem_read, - num_mem_const: stats.num_mem_const, - num_mem_texture: stats.num_mem_texture, - num_mem_read_global: stats.num_mem_read_global, - num_mem_write_global: stats.num_mem_write_global, - num_mem_read_local: stats.num_mem_read_global, - num_mem_write_local: stats.num_mem_write_local, - num_mem_read_inst: stats.num_load_instructions, - num_mem_l2_writeback: stats.num_mem_l2_writeback, - num_mem_l1_write_allocate: stats.num_mem_l1_write_allocate, - num_mem_l2_write_allocate: stats.num_mem_l2_write_allocate, - l1_data: super::stats::CacheStats::default(), - } + impl From> for super::stats::PerCacheStats { + fn from(stats: HashMap) -> Self { + Self(stats.convert()) } } + // impl From for super::stats::Counters { + // fn from(stats: playground::Stats) -> Self { + // Self { + // // num_mem_write: stats.num_mem_write, + // // num_mem_read: stats.num_mem_read, + // // num_mem_const: stats.num_mem_const, + // // num_mem_texture: stats.num_mem_texture, + // // num_mem_read_global: stats.num_mem_read_global, + // // num_mem_write_global: stats.num_mem_write_global, + // // num_mem_read_local: stats.num_mem_read_global, + // // num_mem_write_local: stats.num_mem_write_local, + // // num_mem_load_instructions: stats.num_load_instructions, + // // num_mem_store_instructions: stats.num_store_instructions, + // // num_mem_l2_writeback: stats.num_mem_l2_writeback, + // // num_mem_l1_write_allocate: stats.num_mem_l1_write_allocate, + // // num_mem_l2_write_allocate: stats.num_mem_l2_write_allocate, + // } + // } + // } + + // impl From for Stats { + // fn from(stats: playground::Stats) -> Self { + // Self { + // // counters: stats.clone().into(), + // l1i_stats: stats.l1i_stats.into(), + // l1c_stats: stats.l1c_stats.into(), + // l1t_stats: stats.l1t_stats.into(), + // l1d_stats: stats.l1d_stats.into(), + // l2d_stats: stats.l2d_stats.into(), + // } + // } + // } + impl From for super::cache::RequestStatus { fn from(status: playground::RequestStatus) -> Self { use super::cache::RequestStatus; @@ -1227,9 +1376,9 @@ mod tests { } } - impl From for super::mem_fetch::AccessKind { + impl From for mem_fetch::AccessKind { fn from(kind: playground::AccessType) -> Self { - use super::mem_fetch::AccessKind; + use mem_fetch::AccessKind; match kind { playground::AccessType::GLOBAL_ACC_R => AccessKind::GLOBAL_ACC_R, playground::AccessType::LOCAL_ACC_R => AccessKind::LOCAL_ACC_R, @@ -1242,7 +1391,9 @@ mod tests { playground::AccessType::INST_ACC_R => AccessKind::INST_ACC_R, playground::AccessType::L1_WR_ALLOC_R => AccessKind::L1_WR_ALLOC_R, playground::AccessType::L2_WR_ALLOC_R => AccessKind::L2_WR_ALLOC_R, - playground::AccessType::NUM_MEM_ACCESS_TYPE => AccessKind::NUM_MEM_ACCESS_TYPE, + other @ playground::AccessType::NUM_MEM_ACCESS_TYPE => { + panic!("bad mem access type: {:?}", other) + } } } } @@ -1336,7 +1487,7 @@ mod tests { let ref_stats: Vec<_> = ref_stats?; let ref_stats: playground::Stats = ref_stats[0].clone(); - let ref_stats: Stats = ref_stats.clone().into(); + // let ref_stats: Stats = ref_stats.clone().into(); dbg!(&ref_stats); } diff --git a/src/ported/stats.rs b/src/ported/stats.rs index d4aa5683..3e48fe61 100644 --- a/src/ported/stats.rs +++ b/src/ported/stats.rs @@ -3,8 +3,6 @@ use crate::config; use std::collections::HashMap; use std::sync::Mutex; -pub type CacheRequestStatusCounters = HashMap<(mem_fetch::AccessKind, cache::AccessStat), usize>; - #[derive(Clone, Default, Debug, serde::Serialize, serde::Deserialize)] pub struct DRAMStats { /// bank writes [shader id][dram chip id][bank id] @@ -34,29 +32,187 @@ impl DRAMStats { } #[derive(Clone, Default, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -pub struct Stats { - pub num_mem_write: usize, - pub num_mem_read: usize, - pub num_mem_const: usize, - pub num_mem_texture: usize, - pub num_mem_read_global: usize, - pub num_mem_write_global: usize, - pub num_mem_read_local: usize, - pub num_mem_write_local: usize, - pub num_mem_read_inst: usize, - pub num_mem_l2_writeback: usize, - pub num_mem_l1_write_allocate: usize, - pub num_mem_l2_write_allocate: usize, - - pub l1_data: CacheStats, +pub struct PerCacheStats(pub HashMap); + +impl PerCacheStats { + pub fn shave(&mut self) { + for stats in self.values_mut() { + stats.shave(); + } + } + + pub fn reduce(&self) -> CacheStats { + let mut out = CacheStats::default(); + for stats in self.0.values() { + out += stats.clone(); + } + out + } +} + +impl std::ops::Deref for PerCacheStats { + type Target = HashMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for PerCacheStats { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +// #[derive(Clone, Default, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +// pub struct Counters { +// // pub num_mem_write: usize, +// // pub num_mem_read: usize, +// // pub num_mem_const: usize, +// // pub num_mem_texture: usize, +// // pub num_mem_read_global: usize, +// // pub num_mem_write_global: usize, +// // pub num_mem_read_local: usize, +// // pub num_mem_write_local: usize, +// // pub num_mem_load_instructions: usize, +// // pub num_mem_store_instructions: usize, +// // pub num_mem_l2_writeback: usize, +// // pub num_mem_l1_write_allocate: usize, +// // pub num_mem_l2_write_allocate: usize, +// } + +#[derive(Clone, Default, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct MemAccesses(pub HashMap); + +impl MemAccesses { + pub fn num_writes(&self) -> usize { + self.0 + .iter() + .filter(|(kind, _)| kind.is_write()) + .map(|(_, count)| count) + .sum() + } + + pub fn num_reads(&self) -> usize { + self.0 + .iter() + .filter(|(kind, _)| !kind.is_write()) + .map(|(_, count)| count) + .sum() + } + + pub fn inc(&mut self, kind: mem_fetch::AccessKind, count: usize) { + *self.0.entry(kind).or_insert(0) += count; + } +} + +impl std::ops::Deref for MemAccesses { + type Target = HashMap; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for MemAccesses { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } } #[derive(Clone, Default, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct InstructionCounts(pub HashMap<(super::instruction::MemorySpace, bool), usize>); + +impl InstructionCounts { + pub fn get_total(&self, space: super::instruction::MemorySpace) -> usize { + let stores = self.0.get(&(space, true)).unwrap_or(&0); + let loads = self.0.get(&(space, false)).unwrap_or(&0); + stores + loads + } + + pub fn inc(&mut self, space: super::instruction::MemorySpace, is_store: bool, count: usize) { + *self.0.entry((space, is_store)).or_insert(0) += count; + } +} + +impl std::ops::Deref for InstructionCounts { + type Target = HashMap<(super::instruction::MemorySpace, bool), usize>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for InstructionCounts { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +#[derive(Clone, Default, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct Stats { + // pub counters: Counters, + pub accesses: MemAccesses, + pub instructions: InstructionCounts, + pub l1i_stats: PerCacheStats, + pub l1c_stats: PerCacheStats, + pub l1t_stats: PerCacheStats, + pub l1d_stats: PerCacheStats, + pub l2d_stats: PerCacheStats, +} + +pub type CacheRequestStatusCounters = HashMap<(mem_fetch::AccessKind, cache::AccessStat), usize>; + +#[derive(Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct CacheStats { pub accesses: CacheRequestStatusCounters, } +impl Default for CacheStats { + fn default() -> Self { + use strum::IntoEnumIterator; + let mut accesses = HashMap::new(); + for access_kind in mem_fetch::AccessKind::iter() { + for status in cache::RequestStatus::iter() { + accesses.insert((access_kind, cache::AccessStat::Status(status)), 0); + } + for failure in cache::ReservationFailure::iter() { + accesses.insert( + (access_kind, cache::AccessStat::ReservationFailure(failure)), + 0, + ); + } + } + Self { accesses } + } +} + +impl std::fmt::Debug for CacheStats { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let mut accesses: Vec<_> = self + .accesses + .iter() + .filter(|(_, &count)| count > 0) + .map(|((access_kind, access_stat), count)| match access_stat { + cache::AccessStat::Status(status) => { + format!("{:?}[{:?}]={}", access_kind, status, count) + } + cache::AccessStat::ReservationFailure(failure) => { + format!("{:?}[{:?}]={}", access_kind, failure, count) + } + }) + .collect(); + accesses.sort(); + + f.debug_list().entries(accesses).finish() + } +} + impl CacheStats { + pub fn shave(&mut self) { + self.accesses.retain(|_, v| *v > 0); + } + #[deprecated] pub fn sub_stats(&self) { use cache::{AccessStat, RequestStatus};