diff --git a/base_test/matmul_test/READE.md b/base_test/matmul_test/READE.md new file mode 100644 index 0000000..59ed02f --- /dev/null +++ b/base_test/matmul_test/READE.md @@ -0,0 +1,68 @@ +Matmul 自动化测试 +# 1. 脚本说明 +matmul 放置位置: +```shell +# mudnn_bench 默认存放在 /usr/local/musa/ 下 +mudnn_bench +├── bench_test_matmul.sh +├── bin +│ ├── mudnn_bench -> mudnn_bench-x.x.x +│ └── mudnn_bench-x.x.x +├── matmul_test +``` +mudnn_bench 示例: +**部分旧版本mudnn_bench和mudnn版本不支持混合精度测试,需要和开发者做确认.** +```shell + +# 示例 1:单卡,大矩阵,f32 +MUSA_VISIBLE_DEVICES=4 ./bin/mudnn_bench -m --mm_m 6144 --mm_n 3584 --mm_k 6144 --warmup 30 --tm i --tmv 1000 -p -t f32 + +# 示例 2:多卡,标准尺寸,bf16 +MUSA_VISIBLE_DEVICES=0,1 ./bin/mudnn_bench -m --mm_m 4096 --mm_n 4096 --mm_k 4096 --warmup 30 --tm i --tmv 1000 -p -t bf16 + +# 示例 3:单卡,特殊组合,int8 +MUSA_VISIBLE_DEVICES=2 ./bin/mudnn_bench -m --mm_m 8192 --mm_n 8192 --mm_k 768 --warmup 30 --tm i --tmv 1000 -p -t int8 + +# 示例 4:使用混合精度格式 +MUSA_VISIBLE_DEVICES=3 ./bin/mudnn_bench -m --mm_m 2048 --mm_n 2048 --mm_k 2048 --warmup 30 --tm i --tmv 1000 -p -t bf16:q4:bf16:bf16 +``` + +# 2. 测试 +可在测试脚本中自行批量配置测试MNK,warmup,iter等。 +## 2.1 fp64, tf32 测试 +注意:fp64和tf32 数据类型调用非 mudnn 接口 +```shell +# 1. 编译 +cd ./fp64_tf32_src + +bash build_gemm_tf32.sh +bash build_gemm_fp64.sh + +## 2. 测试 +bash test_gemm_fp64_tf32.sh +``` + +## 2.2 f32_f16_bf16_q8_fp8 测试 +mudnn_bench 测试矩阵value默认说明: +- 浮点:-0.5~0.5 +- fp8: 整型-10~10转浮点 +- qint4:-7~7 +- 整型:-127~127 +> 部分版本 mudnn_bench 工具支持全 0 测试(参数 `-z` 实现),需要和开发者确认 +```shell +bash test_gemm_f32_f16_bf16_q8_fp8.sh +``` + +## 2.3 混合精度测试 +```shell +# A,B: fp16, C,D: f32: "f16:f16:f32:f32" +# A,B: bf16, C,D: f32: "bf16:bf16:f32:f32" +# A,B: tf32, C,D: f32: "f32" +# A,B: int8, C,D: int32: "int8" +# W8A8: "q8:q8:f32:f32" +# W4A16: "bf16:q4:bf16:bf16" +# A,B: fp8, C,D: fp16: "float8_e4m3:float8_e4m3:f16:f16" + +bash test_gemm_mixed.sh +``` + diff --git a/base_test/matmul_test/exetrct_log_tools/summarize_f32_f16_bf16_q8_fp8_log.py b/base_test/matmul_test/exetrct_log_tools/summarize_f32_f16_bf16_q8_fp8_log.py new file mode 100644 index 0000000..95a2a28 --- /dev/null +++ b/base_test/matmul_test/exetrct_log_tools/summarize_f32_f16_bf16_q8_fp8_log.py @@ -0,0 +1,122 @@ +import re +import os +import sys +from typing import List, Dict, Optional + +def extract_matmul_data(log_path: str) -> List[Dict[str, str]]: + patterns = { + "datatype": re.compile(r"DataType (\w+)"), + "mat_params": re.compile(r"m (\d+), n (\d+), k (\d+)"), + "elapsed_time": re.compile(r"AverageElapsedTime\(ms\) : (\d+\.\d+)"), + "throughput_gops": re.compile(r"Throughput (\d+\.\d+) GOPS") + } + + extracted = [] + current_block = {} + + try: + with open(log_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + + dt_match = patterns["datatype"].search(line) + if dt_match: + current_block["datatype"] = dt_match.group(1) + + mp_match = patterns["mat_params"].search(line) + if mp_match: + current_block["m"] = mp_match.group(1) + current_block["n"] = mp_match.group(2) + current_block["k"] = mp_match.group(3) + + et_match = patterns["elapsed_time"].search(line) + if et_match: + current_block["elapsed_time"] = et_match.group(1) + + tp_match = patterns["throughput_gops"].search(line) + if tp_match: + tops = round(float(tp_match.group(1)) / 1000, 4) + current_block["throughput_tops"] = str(tops) + + if line == "==============================" and current_block: + required = ["datatype", "m", "n", "k", "elapsed_time", "throughput_tops"] + if all(key in current_block for key in required): + dim = f"{current_block['m']}-{current_block['n']}-{current_block['k']}" + extracted.append({ + "datatype": current_block["datatype"], + "shape": dim, + "Throughput(TOPS)": current_block["throughput_tops"], + "AverageElapsedTime(ms)": current_block["elapsed_time"] + }) + current_block = {} + + required = ["datatype", "m", "n", "k", "elapsed_time", "throughput_tops"] + if current_block and all(key in current_block for key in required): + dim = f"{current_block['m']}×{current_block['n']}×{current_block['k']}" + extracted.append({ + "datatype": current_block["datatype"], + "shape": dim, + "Throughput(TOPS)": current_block["throughput_tops"], + "AverageElapsedTime(ms)": current_block["elapsed_time"] + }) + + except Exception as e: + print(f"❌ 读取日志失败:{str(e)}") + return [] + + return extracted + +def generate_csv(data: List[Dict[str, str]], output_path: str) -> bool: + if not data: + print("⚠️ 未提取到有效数据,跳过CSV生成") + return False + + headers = ["datatype", "shape", "Throughput(TOPS)", "AverageElapsedTime(ms)"] + + try: + with open(output_path, 'w', encoding='utf-8') as f: + f.write(", ".join(headers) + "\n") + for item in data: + row = [item[h] for h in headers] + f.write(", ".join(row) + "\n") + print(f"✅ CSV生成成功:{output_path}") + return True + except Exception as e: + print(f"❌ 生成CSV失败:{str(e)}") + return False + +def main(input_log: str, output_csv: Optional[str] = None): + if not os.path.isfile(input_log): + print(f"❌ 输入日志文件不存在:{input_log}") + return + + if not output_csv: + log_dir = os.path.dirname(input_log) + log_name = os.path.splitext(os.path.basename(input_log))[0] + output_csv = os.path.join(log_dir, f"{log_name}_summary.csv") + + print(f"📊 开始提取日志数据:{input_log}") + matmul_data = extract_matmul_data(input_log) + + if not matmul_data: + print("❌ 未提取到任何有效测试数据") + return + + print(f"✅ 成功提取 {len(matmul_data)} 条测试记录") + + generate_csv(matmul_data, output_csv) + print("🎯 所有操作完成!") + +if __name__ == "__main__": + # 修正sys.argv判断(sys.argv[0]是脚本名,需至少传入1个输入文件路径) + if len(sys.argv) < 2: + print("用法:") + print(" python summarize_fp64_tf32_log.py <输入日志文件路径>") + print("示例:") + print(" python summarize_fp64_tf32_log.py bench.log") + sys.exit(1) + + input_path = sys.argv[1] + output_path = sys.argv[1].replace('.log', '.csv') # 日志文件同名CSV输出 + main(input_path, output_path) + diff --git a/base_test/matmul_test/exetrct_log_tools/summarize_fp64_tf32_log.py b/base_test/matmul_test/exetrct_log_tools/summarize_fp64_tf32_log.py new file mode 100644 index 0000000..31dacbd --- /dev/null +++ b/base_test/matmul_test/exetrct_log_tools/summarize_fp64_tf32_log.py @@ -0,0 +1,120 @@ +import re +import sys +import os +from typing import List, Dict, Optional + +def extract_matmul_data(log_path: str) -> List[Dict[str, str]]: + patterns = { + "datatype": re.compile(r"MatMul (\w+) Test \(MUSA\)"), + "mat_params": re.compile(r"m = (\d+), n = (\d+), k = (\d+)"), + "duration_us": re.compile(r"Duration:(\s*[\d\.]+) us"), + "tflops": re.compile(r"computation-\w+=(\s*[\d\.]+)") + } + + extracted = [] + current_block = {} + + try: + with open(log_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + + dt_match = patterns["datatype"].search(line) + if dt_match: + current_block["datatype"] = dt_match.group(1) + + mp_match = patterns["mat_params"].search(line) + if mp_match: + current_block["m"] = mp_match.group(1) + current_block["n"] = mp_match.group(2) + current_block["k"] = mp_match.group(3) + + dur_match = patterns["duration_us"].search(line) + if dur_match: + us_val = float(dur_match.group(1).strip()) + ms_val = round(us_val / 1000, 6) + current_block["duration_ms"] = str(ms_val) + + tf_match = patterns["tflops"].search(line) + if tf_match: + tf_val = tf_match.group(1).strip() + current_block["tflops"] = str(round(float(tf_val), 6)) + + if line == "========================================" and current_block: + required = ["datatype", "m", "n", "k", "duration_ms", "tflops"] + if all(key in current_block for key in required): + shape = f"{current_block['m']}-{current_block['n']}-{current_block['k']}" + extracted.append({ + "DataType": current_block["datatype"], + "shape": shape, + "Compute_ability(TFLOPS)": current_block["tflops"], + "AverageElapsedTime(ms)": current_block["duration_ms"] + }) + current_block = {} + + required = ["datatype", "m", "n", "k", "duration_ms", "tflops"] + if current_block and all(key in current_block for key in required): + shape = f"{current_block['m']}-{current_block['n']}-{current_block['k']}" + extracted.append({ + "DataType": current_block["datatype"], + "shape": shape, + "Compute_ability(TFLOPS)": current_block["tflops"], + "AverageElapsedTime(ms)": current_block["duration_ms"] + }) + + except Exception as e: + print(f"❌ 读取日志失败:{str(e)}") + return [] + + return extracted + +def generate_csv(data: List[Dict[str, str]], output_path: str) -> bool: + if not data: + print("⚠️ 未提取到有效数据,跳过CSV生成") + return False + + headers = ["DataType", "shape", "Compute_ability(TFLOPS)", "AverageElapsedTime(ms)"] + try: + with open(output_path, 'w', encoding='utf-8') as f: + f.write(", ".join(headers) + "\n") + for item in data: + row = [item[h] for h in headers] + f.write(",".join(row) + "\n") + print(f"✅ CSV生成成功:{output_path}") + return True + except Exception as e: + print(f"❌ 生成CSV失败:{str(e)}") + return False + +def main(input_log: str, output_csv: Optional[str] = None): + if not os.path.isfile(input_log): + print(f"❌ 输入日志文件不存在:{input_log}") + return + + if not output_csv: + log_dir = os.path.dirname(input_log) + log_name = os.path.splitext(os.path.basename(input_log))[0] + output_csv = os.path.join(log_dir, f"{log_name}_summary.csv") + + print(f"📊 开始提取日志数据:{input_log}") + matmul_data = extract_matmul_data(input_log) + + if not matmul_data: + print("❌ 未提取到任何有效测试数据") + return + + print(f"✅ 成功提取 {len(matmul_data)} 条测试记录") + generate_csv(matmul_data, output_csv) + print("🎯 所有操作完成!") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("用法:") + print(" python summarize_fp64_tf32_log.py <输入日志文件路径>") + print("示例:") + print(" python summarize_fp64_tf32_log.py bench.log") + sys.exit(1) + + input_path = sys.argv[1] + output_path = sys.argv[1].replace('.log', '.csv') + main(input_path, output_path) diff --git a/base_test/matmul_test/exetrct_log_tools/summary_mixed_data.py b/base_test/matmul_test/exetrct_log_tools/summary_mixed_data.py new file mode 100644 index 0000000..5edfa15 --- /dev/null +++ b/base_test/matmul_test/exetrct_log_tools/summary_mixed_data.py @@ -0,0 +1,64 @@ +import re +import sys +import csv +import os + +if len(sys.argv) < 2: + print("Usage: python summary_fix_data.py ") + sys.exit(1) + +log_file = sys.argv[1] +print(f"📊 正在读取并解析日志:{log_file}") + +if not os.path.exists(log_file): + print("❌ 日志文件不存在") + sys.exit(1) + +# 收集结果 +records = [] + +# 正则模式 +re_start = re.compile(r"测试:\s*M=(\d+),\s*N=(\d+),\s*K=(\d+),\s*Type=([\w:]+)") +re_result = re.compile(r"AverageElapsedTime\(ms\)\s*:\s*([\d\.]+)\s*,\s*Throughput\s*([\d\.]+)\s*GOPS") + +cur_M = cur_N = cur_K = cur_type = None + +with open(log_file, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + + # 匹配开始参数 + m1 = re_start.search(line) + if m1: + cur_M, cur_N, cur_K, cur_type = m1.groups() + continue + + # 匹配结果 + m2 = re_result.search(line) + if m2 and cur_M is not None: + elapsed, gops = m2.groups() + records.append({ + "M": cur_M, + "N": cur_N, + "K": cur_K, + "Type": cur_type, + "AvgTime(ms)": elapsed, + "GOPS": gops + }) + # 清空当前块(防止串行) + cur_M = cur_N = cur_K = cur_type = None + +# 输出 CSV +if not records: + print("⚠️ 未提取到任何有效数据") + sys.exit(0) + +csv_path = log_file.replace(".log", ".csv") +with open(csv_path, "w", newline="", encoding="utf-8") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=records[0].keys()) + writer.writeheader() + writer.writerows(records) + +print(f"✅ 解析完成,共 {len(records)} 条数据") +print(f"📄 CSV 已生成:{csv_path}") + diff --git a/base_test/matmul_test/fp64_tf32_src/build_gemm_fp64.sh b/base_test/matmul_test/fp64_tf32_src/build_gemm_fp64.sh new file mode 100644 index 0000000..4d33fd1 --- /dev/null +++ b/base_test/matmul_test/fp64_tf32_src/build_gemm_fp64.sh @@ -0,0 +1 @@ +mcc gemm_fp64.mu -lmusart -lmublas -o gemm_fp64 --offload-arch=mp_31 diff --git a/base_test/matmul_test/fp64_tf32_src/build_gemm_tf32.sh b/base_test/matmul_test/fp64_tf32_src/build_gemm_tf32.sh new file mode 100644 index 0000000..83b5acb --- /dev/null +++ b/base_test/matmul_test/fp64_tf32_src/build_gemm_tf32.sh @@ -0,0 +1 @@ +g++ gemm_tf32.cpp -std=c++17 -I/usr/local/musa/include -L /usr/local/musa/lib/ -fopenmp -lmudnn -lmusart -o gemm_tf32 -O2 diff --git a/base_test/matmul_test/fp64_tf32_src/gemm_fp64.mu b/base_test/matmul_test/fp64_tf32_src/gemm_fp64.mu new file mode 100644 index 0000000..ac62c9e --- /dev/null +++ b/base_test/matmul_test/fp64_tf32_src/gemm_fp64.mu @@ -0,0 +1,122 @@ +#include +#include +#include +#include +#include + +size_t M = 16384; +size_t N = 16384; +size_t K = 16384; + +struct PrecisionConfig +{ + int bytesPerElement; + const char *name; + int NUM_ITERATIONS; + int WARMUP_ITERATIONS = 10; +}; + +void test(const PrecisionConfig &config) +{ + double *d_A, *d_B, *d_C; + std::vector h_A(M * K, double(0.9f)); + std::vector h_B(K * N, double(1.2f)); + std::vector h_C(M * N); + + musaMalloc(&d_A, M * K * config.bytesPerElement); + musaMalloc(&d_B, K * N * config.bytesPerElement); + musaMalloc(&d_C, M * N * config.bytesPerElement); + + musaMemcpy(d_A, h_A.data(), M * K * config.bytesPerElement, musaMemcpyHostToDevice); + musaMemcpy(d_B, h_B.data(), K * N * config.bytesPerElement, musaMemcpyHostToDevice); + + mublasHandle_t handle; + mublasCreate(&handle); + + double alpha = 1.0f; + double beta = 0.0f; + + for (int i = 0; i < config.WARMUP_ITERATIONS; ++i) + { + mublasDgemm(handle, MUBLAS_OP_N, MUBLAS_OP_T, + M, N, K, &alpha, + d_A, M, + d_B, N, + &beta, + d_C, M); + } + + musaError_t syncError = musaDeviceSynchronize(); + auto start = std::chrono::high_resolution_clock::now(); + + if (syncError != musaSuccess) + { + std::cout << "MUSA error: " << musaGetErrorString(syncError) << std::endl; + } + + for (int i = 0; i < config.NUM_ITERATIONS; ++i) + { + mublasDgemm(handle, MUBLAS_OP_N, MUBLAS_OP_T, + M, N, K, &alpha, + d_A, M, + d_B, N, + &beta, + d_C, M); + } + syncError = musaDeviceSynchronize(); + auto end = std::chrono::high_resolution_clock::now(); + + if (syncError != musaSuccess) + { + std::cout << "MUSA error: " << musaGetErrorString(syncError) << std::endl; + } + auto duration = + std::chrono::duration_cast(end - start); + std::cout << "Average " << config.name << " Single Op Duration: " + << duration.count() / config.NUM_ITERATIONS << " us" << std::endl; + + double time_second = duration.count() / 1.0e6; + double flops = 2.0 * M * N * K * config.NUM_ITERATIONS; + double FLOPS = flops / time_second; + double TFLOPS = FLOPS / 1.0e12; + + std::cout << "[FlagPerf Result]" << "computation-FP64=" << TFLOPS << "TFLOPS" + << std::endl; + + musaMemcpy(h_C.data(), d_C, M * N * config.bytesPerElement, musaMemcpyDeviceToHost); + + musaFree(d_A); + musaFree(d_B); + musaFree(d_C); + + mublasDestroy(handle); +} + +int main(int argc, char* argv[]) { + + if (argc != 5) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + std::cerr << "Example: " << argv[0] << " 128 128 128 10" << std::endl; + return EXIT_FAILURE; + } + + int m = std::atoi(argv[1]); + int n = std::atoi(argv[2]); + int k = std::atoi(argv[3]); + int iter = std::atoi(argv[4]); + + std::cout << "========================================" << std::endl; + std::cout << "MatMul FP64 Test (MUSA)" << std::endl; + std::cout << "m = " << m << ", n = " << n << ", k = " << k << std::endl; + std::cout << "Test Iterations = " << iter << std::endl; + + M = m; + N = n; + K = k; + musaSetDevice(0); + PrecisionConfig fp64_PrecisionConfig = {sizeof(double), "FP64", iter, 40}; + + test(fp64_PrecisionConfig); + + return 0; +} diff --git a/base_test/matmul_test/fp64_tf32_src/gemm_tf32.cpp b/base_test/matmul_test/fp64_tf32_src/gemm_tf32.cpp new file mode 100644 index 0000000..6221eed --- /dev/null +++ b/base_test/matmul_test/fp64_tf32_src/gemm_tf32.cpp @@ -0,0 +1,678 @@ +/* Copyright @2020-2024 Moore Threads Technology Co., Ltd("Moore Threads"). All + * rights reserved. + * + * This software ("this software and its documentations" or "the software") is + * protected by Copyright and the information contained herein is confidential. + * + * The software contained herein is PROPRIETARY to Moore Threads and is being + * provided under the terms and conditions of a form of Moore Threads software + * license agreement by and between Moore Threads and Licensee ("License + * Agreement") or electronically accepted by Licensee. Notwithstanding any + * terms or conditions to the contrary in the License Agreement, copy or + * disclosure of the software to any third party without the express written + * consent of Moore Threads is prohibited. + * + * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE + * AGREEMENT, MOORE THREADS MAKES NO REPRESENTATION ABOUT ANY WARRANTIES, + * INCLUDING BUT NOT LIMITED TO THE SUITABILITY OF THE SOFTWARE FOR ANY + * PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF + * ANY KIND. MOORE THREADS DISCLAIMS ALL WARRANTIES WITH REGARD TO THE + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, + * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. + * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE + * LICENSE AGREEMENT, IN NO EVENT SHALL MOORE THREADS BE LIABLE FOR ANY + * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY + * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THE SOFTWARE. + */ +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +using qint8 = int8_t; + +#define SHOW printf + +namespace Eigen { + struct half; + struct bfloat16; +} +using Eigen::bfloat16; +using Eigen::half; + + +struct MatMulParam { + bool split_k{ false }; + bool trans_a{ false }; + bool trans_b{ true }; + int batch{ 1 }; + int m{ 6144 }; + int n{ 8192 }; + int k{ 19200 }; + double alpha{ 1.0 }; + double beta{ 0.0 }; + double gamma{ 0.0 }; + int mode{ 0 }; // 0 tensor, 1 scalar +}; + +#define CHECK_MUSA(...) \ + do { \ + int err = CheckMusaError(__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__); \ + if (err) \ + exit(err); \ + } while (0) + +#define CHECK_ERR(...) \ + do { \ + int err = CheckError(__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__); \ + if (err) \ + exit(err); \ + } while (0) + +int CheckMusaError(musaError_t code, const char* expr, const char* file, + int line) { + if (code) { + printf("MUSA error at %s:%d, code=%d (%s) in '%s'", file, line, (int)code, + musaGetErrorString(code), expr); + return 1; + } + return 0; +} + +int CheckError(bool code, const char* expr, const char* file, int line) { + if (code) { + printf("General error at %s:%d, code=%d (%s) in '%s'", file, line, + (int)code, "general error", expr); + return 1; + } + return 0; +} + +template +void GenerateRandom(Type* data, int64_t size, uint seed = 2333) { + // non-determistic seed source + // std::random_device rd; + // std::default_random_engine engine(rd()); + // constexpr auto seed = 2333; + std::default_random_engine engine(seed); + if (std::is_floating_point_v) { + std::uniform_real_distribution dist(0, 0); + for (auto i = 0; i < size; i++) { + data[i] = (Type)(dist(engine)); + } + } + else { + std::uniform_int_distribution dist(0, 0); + for (auto i = 0; i < size; i++) { + data[i] = (Type)(dist(engine)); + } + } +} + +void MemFree(void* ptr) { + if (ptr) { + musaFree(ptr); + } +} + +::musa::dnn::MemoryHandler MemoryFunc(size_t size) { + void* data = nullptr; + if (size) { + musaMalloc(&data, size); + musaMemset(data, 0, size); + } + return ::musa::dnn::MemoryHandler(data, MemFree); +} + +enum DType { + f32, + f16, + q8, + bf16, +}; + + +class TestMatMul { +public: + inline float F32MaskFormatTF32(float f) { + unsigned int t = 0; + std::memcpy(&t, &f, sizeof(f)); + // 1110 0000 0000 0000 + t = t & 0xffffe000; + std::memcpy(&f, &t, sizeof(f)); + return f; + } + + // Random num generator + + + TestMatMul(const musaStream_t& _stream, const int _device_id, const DType _dtype, const MatMulParam _param, const int _iters) + { + stream = _stream; + device_id = _device_id; + dtype = _dtype; + dtype_size = 4; + + switch (dtype) { + case DType::f32: + dtype_str = "float32"; + dtype_size = 4; + break; + case DType::f16: + dtype_str = "float16"; + dtype_size = 2; + break; + case DType::bf16: + dtype_str = "bfloat16"; + dtype_size = 2; + break; + case DType::q8: + dtype_str = "qint8"; + dtype_size = 1; + break; + default: + bool DType_Not_Suppoted = true; + CHECK_ERR(DType_Not_Suppoted); + break; + } + split_k = _param.split_k; + trans_a = _param.trans_a; + trans_b = _param.trans_b; + batch = _param.batch; + m = _param.m; + n = _param.n; + k = _param.k; + alpha = _param.alpha; + beta = _param.beta; + gamma = _param.gamma; + mode = _param.mode; + + iters = _iters; + + handle = new ::musa::dnn::Handle(device_id); + handle->SetStream(stream); + }; + ~TestMatMul() { +#define FREE_H(_PTR) \ + if (_PTR != nullptr) { \ + operator delete(_PTR); \ + } +#define FREE_D(_PTR) \ + if (_PTR != nullptr) { \ + CHECK_MUSA(musaFree(_PTR)); \ + } + + FREE_H(h_buf_a); + FREE_H(h_buf_b); + FREE_H(h_buf_c); + FREE_H(h_buf_o); + FREE_H(h_buf_z); + + FREE_D(d_a); + FREE_D(d_b); + FREE_D(d_c); + FREE_D(d_z); + + FREE_D(d_base); + FREE_D(d_bool); + FREE_D(d_nonz); + FREE_H(h_nonz); + +#undef FREE_H +#undef FREE_D + + if (handle) { + delete handle; + } + }; + + bool Test() { + // check parameters + CheckParams(); + // initial memory && dnn tensor op + Init(); + // warm up && prepare base golden + int warmup_iters = 40; + for (int i = 0; i < warmup_iters; i++) { + Exec(); + } + // main loop + float elapsed_ms = 0.f; + musaEvent_t start, stop; + if (performance) { + CHECK_MUSA(musaEventCreate(&start)); + CHECK_MUSA(musaEventCreate(&stop)); + CHECK_MUSA(musaEventRecord(start, stream)); + } + + std::chrono::milliseconds bubble_time(bubble); + std::chrono::milliseconds duration_time(duration); + std::chrono::milliseconds show_gap_time(60000); + int show_gap_count = 0; + auto start_time = std::chrono::steady_clock::now(); + auto current_time = start_time; + const bool blocking = (bubble > 0) || (iters == 0 && duration > 0); + int stable_check_gap_count = 1; + int run_iters_count = 0; + int i = 0; + while ((iters > 0 && i < iters) || + (iters == 0 && (current_time - start_time) <= duration_time)) { + // operator running + Exec(blocking); + + if (bubble > 0) { + // SHOW("sleeping %d ms\n", bubble); + std::this_thread::sleep_for(bubble_time); + } + current_time = std::chrono::steady_clock::now(); + if ((iters == 0 && duration > 0) && + (current_time - start_time) > show_gap_time * show_gap_count) { + std::cout << "--- now execution time passed " + << (show_gap_time * show_gap_count).count() << std::endl; + show_gap_count++; + } + // SHOW("run loop %d\n", run_iters_count); + i++, stable_check_gap_count++, run_iters_count++; + } + // performance testing and stability checking are mutually exclusive + if (performance) { + CHECK_MUSA(musaEventRecord(stop, stream)); + CHECK_MUSA(musaEventSynchronize(stop)); + CHECK_MUSA(musaEventElapsedTime(&elapsed_ms, start, stop)); + elapsed_ms = elapsed_ms / run_iters_count; + ShowPerformance(elapsed_ms, (size_t)m * n * k * 2 / elapsed_ms * 1e-6, + !stable_check); + CHECK_MUSA(musaEventDestroy(start)); + CHECK_MUSA(musaEventDestroy(stop)); + } + return true; + } + + void ShowPerformance(float t, float gops, bool credible) { + // SHOW("dev_time : %f, gops : %f %s\n", t, credible ? gops : 0.f, + // credible + // ? " " + // : " - the performance is not credible when enable stable checking"); + SHOW("Average TF32 Single Op Duration:%f us\n", t * 1.0e3); + SHOW("[FlagPerf Result]computation-TF32=%f TFLOPS\n", gops / 1.0e3); + + } + +private: + void* h_buf_a = nullptr; + void* h_buf_b = nullptr; + void* h_buf_c = nullptr; + void* h_buf_o = nullptr; + void* h_buf_z = nullptr; + + void* d_a = nullptr; + void* d_b = nullptr; + void* d_c = nullptr; + void* d_z = nullptr; + + void* d_base = nullptr; + void* d_bool = nullptr; + void* d_nonz = nullptr; + int64_t* h_nonz = nullptr; + + bool result_check = false; + bool stable_check = false; + bool stable_check_gpu = false; + bool performance = true; + bool verbose = false; + int iters = 1; + int duration = 0; + int bubble = 0; + int gap = 1; + uint seed = 2333; + + DType dtype = DType::f32; + std::string dtype_str = "float32"; + size_t dtype_size = 4; + bool split_k = false; + bool trans_a = false; + bool trans_b = false; + int batch = 1; + int m = 1; + int n = 1; + int k = 1; + double alpha = 1.0; + double beta = 0.0; + double gamma = 0.0; + int mode = 0; + + // qint8 variables + const float scale_a = 1.f / 32.f; + const float scale_b = 1.f / 32.f; + const float scale_c = 32.f; + + // mudnn variables + musaStream_t stream; + int device_id; + ::musa::dnn::Handle* handle; + ::musa::dnn::MatMul op; + + ::musa::dnn::Tensor tensor_a; + ::musa::dnn::Tensor tensor_b; + ::musa::dnn::Tensor tensor_c; + ::musa::dnn::Tensor tensor_z; + ::musa::dnn::Tensor tensor_base; + ::musa::dnn::Tensor tensor_bool; + ::musa::dnn::Tensor tensor_nonz; + +private: + + + ::musa::dnn::Tensor::Type GetmuDNNType(const std::string& dtype) { + using T = ::musa::dnn::Tensor::Type; + static std::map type_mapping = { + {"int8", T::INT8}, + {"int16", T::INT16}, + {"int32", T::INT32}, + + {"int", T::INT64}, + {"int64", T::INT64}, + + {"uint8", T::UINT8}, + {"uint16", T::UINT16}, + {"uint32", T::UINT32}, + + {"uint", T::UINT64}, + {"uint64", T::UINT64}, + + {"half", T::HALF}, + {"float16", T::HALF}, + {"bfloat16", T::BFLOAT16}, + + {"float32", T::FLOAT}, + {"qint8", T::QINT8}, + + {"float", T::FLOAT}, + {"float64", T::DOUBLE}, + {"double", T::DOUBLE}, + + {"bool", T::BOOL}, + }; + if (type_mapping.find(dtype) != type_mapping.end()) { + return type_mapping.at(dtype); + } + else { + std::cerr << "GetmuDNNType error : " << dtype << std::endl; + return type_mapping.at("float"); + } + } + bool CheckParams() { + bool pass = true; + // param checking + if (mode != 0 && mode != 1) { + std::cerr << "MatMul mode setting error, fallback 0" << std::endl; + mode = 0; + } + if (m <= 0 || n <= 0 || k <= 0) { + std::cerr << "MatMul param setting error, fallback 1" << std::endl; + m = m > 0 ? m : 1; + n = n > 0 ? n : 1; + k = k > 0 ? k : 1; + } + if (gamma != 0) { + std::cerr << "MatMul unsupported gamma != 0 temporarily, fallback 0" + << std::endl; + gamma = 0; + } + if (beta != 0) { + if (mode == 0) { + std::cerr << "MatMul unsupported beta != 0 when mode == 0, fallback 0" + << std::endl; + beta = 0; + } + + } + if (dtype == DType::q8) { + // To be removed when binary supports QINT8 + if (stable_check_gpu) { + std::cerr + << "MatMul unsupported qint8 for stable_check_gpu, fallback cpu " + << std::endl; + stable_check_gpu = false; + } + if (mode != 0) { + std::cerr << "MatMul mode must be 0 when qint8, fallback 0" + << std::endl; + mode = 0; + } + } + + return pass; + } + + bool Init() { + size_t nr_elem_a = (size_t)(m)*k; + size_t nr_elem_b = (size_t)(k)*n; + size_t nr_elem_c = (size_t)(m)*n; + size_t nr_elem_z = (size_t)(n); + + size_t size_a = nr_elem_a * dtype_size; + size_t size_b = nr_elem_b * dtype_size; + size_t size_c = nr_elem_c * dtype_size; + size_t size_z = nr_elem_z * dtype_size; + + size_t mem_total, mem_free; + CHECK_MUSA(musaMemGetInfo(&mem_free, &mem_total)); + size_t available_gpu_mem = mem_free; + size_t total_gpu_mem = mem_total; + size_t need_gpu_mem = size_a + size_b + size_c; + if (gamma != 0) { + need_gpu_mem += size_z; + } + if (stable_check && stable_check_gpu) { + need_gpu_mem += + size_c + sizeof(bool) * nr_elem_c + sizeof(int64_t) * m * n * 2; + } + if ((need_gpu_mem > available_gpu_mem) || verbose) { + SHOW("%s : Need Device Memory %.2f GiB, Available Device Memory %.2f GiB " + "(Total %.2f GiB)\n", + (need_gpu_mem > available_gpu_mem) ? "Error" : "Verbose", + need_gpu_mem / 1024.f / 1024 / 1024, + available_gpu_mem / 1024.f / 1024 / 1024, + total_gpu_mem / 1024.f / 1024 / 1024); + } + CHECK_ERR(need_gpu_mem > available_gpu_mem); + + // host buffer + h_buf_a = operator new(size_a); // new char[size_a](); + h_buf_b = operator new(size_b); // new char[size_b](); + h_buf_c = operator new(size_c); // new char[size_c](); + h_buf_o = operator new(size_c); // new char[size_c](); + + // host data initialization + if (dtype == DType::f16) { + GenerateRandom((half*)(h_buf_a), nr_elem_a, seed); + GenerateRandom((half*)(h_buf_b), nr_elem_b, seed); + GenerateRandom((half*)(h_buf_c), nr_elem_c, seed); + + } + else if (dtype == DType::bf16) { + GenerateRandom((bfloat16*)(h_buf_a), nr_elem_a, seed); + GenerateRandom((bfloat16*)(h_buf_b), nr_elem_b, seed); + GenerateRandom((bfloat16*)(h_buf_c), nr_elem_c, seed); + } + else if (dtype == DType::q8) { + GenerateRandom((qint8*)(h_buf_a), nr_elem_a, seed); + GenerateRandom((qint8*)(h_buf_b), nr_elem_b, seed); + GenerateRandom((qint8*)(h_buf_c), nr_elem_c, seed); + } + else { + GenerateRandom((float*)(h_buf_a), nr_elem_a, seed); + GenerateRandom((float*)(h_buf_b), nr_elem_b, seed); + GenerateRandom((float*)(h_buf_c), nr_elem_c, seed); + } + + // tensor float 32 format + if ((dtype == DType::f32) && mode == 0) { + for (size_t i = 0; i < nr_elem_a; i++) { + ((float*)h_buf_a)[i] = (float)F32MaskFormatTF32(((float*)h_buf_a)[i]); + } + for (size_t i = 0; i < nr_elem_b; i++) { + ((float*)h_buf_b)[i] = (float)F32MaskFormatTF32(((float*)h_buf_b)[i]); + } + for (size_t i = 0; i < nr_elem_c; i++) { + ((float*)h_buf_c)[i] = (float)F32MaskFormatTF32(((float*)h_buf_c)[i]); + } + } + + // device buffer + CHECK_MUSA(musaMalloc(&d_a, size_a)); + CHECK_MUSA(musaMalloc(&d_b, size_b)); + CHECK_MUSA(musaMalloc(&d_c, size_c)); + + // transfer host data to device + + CHECK_MUSA(musaMemcpy(d_a, h_buf_a, size_a, musaMemcpyHostToDevice)); + CHECK_MUSA(musaMemcpy(d_b, h_buf_b, size_b, musaMemcpyHostToDevice)); + CHECK_MUSA(musaMemcpy(d_c, h_buf_c, size_c, musaMemcpyHostToDevice)); + + // host and device buffer for gamma + if (gamma != 0) { + h_buf_z = new char[size_z](); + CHECK_MUSA(musaMalloc(&d_z, size_z)); + CHECK_MUSA(musaMemcpy(d_z, h_buf_z, size_z, musaMemcpyHostToDevice)); + if (dtype == DType::f16) { + GenerateRandom((half*)(h_buf_z), nr_elem_z, seed); + } + else if (dtype == DType::bf16) { + GenerateRandom((bfloat16*)(h_buf_z), nr_elem_z, seed); + } + else if (dtype == DType::q8) { + GenerateRandom((qint8*)(h_buf_z), nr_elem_z, seed); + } + else { + GenerateRandom((float*)(h_buf_z), nr_elem_z, seed); + } + } + + + ::musa::dnn::Tensor::Type ttype = GetmuDNNType(dtype_str); + tensor_a.SetAddr(d_a); + tensor_a.SetType(ttype); + if (DType::q8 == dtype) { + tensor_a.SetQuantizationInfo(scale_a); + } + if (trans_a) { + tensor_a.SetNdInfo({ k, m }); + } + else { + tensor_a.SetNdInfo({ m, k }); + } + + tensor_b.SetAddr(d_b); + tensor_b.SetType(ttype); + if (DType::q8 == dtype) { + tensor_b.SetQuantizationInfo(scale_b); + } + if (trans_b) { + tensor_b.SetNdInfo({ n, k }); + } + else { + tensor_b.SetNdInfo({ k, n }); + } + + tensor_c.SetAddr(d_c); + tensor_c.SetType(ttype); + tensor_c.SetNdInfo({ m, n }); + if (DType::q8 == dtype) { + tensor_c.SetQuantizationInfo(scale_c); + } + + tensor_z.SetAddr(d_z); + tensor_z.SetType(ttype); + tensor_z.SetNdInfo({ n }); + + CHECK_MUSA(musaStreamSynchronize(stream)); + CHECK_MUSA(musaDeviceSynchronize()); + + + op.SetTranspose(trans_a, trans_b); + // op.SetSplitK(split_k); + op.SetAlpha(alpha); + op.SetBeta(beta); + op.SetGamma(gamma); + op.SetComputeMode(static_cast<::musa::dnn::MatMul::ComputeMode>(mode)); + + return true; + } + + void Exec(bool sync = false) { + CHECK_ERR(::musa::dnn::Status::SUCCESS != + op.RunWithBiasAdd(*handle, tensor_c, tensor_a, tensor_b, tensor_z, MemoryFunc)); + CHECK_MUSA(musaGetLastError()); + if (sync) { + CHECK_MUSA(musaStreamSynchronize(stream)); + } + } +}; + +int RunMatMul() { + + + int device_id = 5; + CHECK_MUSA(musaGetDevice(&device_id)); + + MatMulParam param; + const int iters = 42000; + musaStream_t stream; + CHECK_MUSA(musaStreamCreate(&stream)); + TestMatMul test_mm(stream, device_id, DType::f32, param, iters); + bool ret = test_mm.Test(); + CHECK_MUSA(musaStreamDestroy(stream)); + return ret; +} + + +int main(int argc, char* argv[]) { + + if (argc != 5) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + std::cerr << "Example: " << argv[0] << " 128 128 128 10" << std::endl; + return EXIT_FAILURE; + } + + int m = std::atoi(argv[1]); + int n = std::atoi(argv[2]); + int k = std::atoi(argv[3]); + int iter = std::atoi(argv[4]); + + std::cout << "========================================" << std::endl; + std::cout << "MatMul TF32 Test (MUSA)" << std::endl; + std::cout << "m = " << m << ", n = " << n << ", k = " << k << std::endl; + std::cout << "Test Iterations = " << iter << std::endl; + + int device_id = 0; + CHECK_MUSA(musaGetDevice(&device_id)); + + MatMulParam param; + param.m = m; + param.n = n; + param.k = k; + const int iters = iter; + musaStream_t stream; + CHECK_MUSA(musaStreamCreate(&stream)); + TestMatMul test_mm(stream, device_id, DType::f32, param, iters); + bool ret = test_mm.Test(); + CHECK_MUSA(musaStreamDestroy(stream)); + return ret; +} diff --git a/base_test/matmul_test/test_gemm_f32_f16_bf16_q8_fp8.sh b/base_test/matmul_test/test_gemm_f32_f16_bf16_q8_fp8.sh new file mode 100644 index 0000000..736c237 --- /dev/null +++ b/base_test/matmul_test/test_gemm_f32_f16_bf16_q8_fp8.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +input_data=$(cat <<'EOF' +128 128 128 +256 256 256 +512 512 512 +1024 1024 1024 +2048 2048 2048 +4096 4096 4096 +8192 8192 8192 +4098 4098 4098 +8190 8190 8190 +EOF +) +test_iter=1000 + +TEST_TYPES=("f32" "f16" "bf16" "q8" "float8_e4m3" "float8_e5m2") +# TEST_TYPES=("f32") +LOG_DIR="mudnn_bench_logs" +mkdir -p "$LOG_DIR" +log_file="${LOG_DIR}/bench_f32_f16_bf16_q8_fp8.log" +> "$log_file" + +for type in "${TEST_TYPES[@]}"; do + echo "开始测试数据类型:$type" + while IFS=$'\t' read -r m n k; do + m=$(echo "$m" | tr -d ' ') + n=$(echo "$n" | tr -d ' ') + k=$(echo "$k" | tr -d ' ') + echo "$m $n $k" + + if [[ -n "$m" && -n "$n" && -n "$k" ]]; then + MUSA_VISIBLE_DEVICES=7 ../bin/mudnn_bench -m \ + -t "$type" \ + --mm_m="$m" --mm_n="$n" --mm_k="$k" \ + --mm_mode=0 \ + --tm i \ + --tmv "$test_iter" \ + -p \ + >> "$log_file" 2>&1 + sleep 2 + fi + done < <(echo "$input_data") +done + +python exetrct_log_tools/summarize_f32_f16_bf16_q8_fp8_log.py "$log_file" + +echo "所有测试完成!日志目录:$LOG_DIR" diff --git a/base_test/matmul_test/test_gemm_fp64_tf32.sh b/base_test/matmul_test/test_gemm_fp64_tf32.sh new file mode 100644 index 0000000..924c556 --- /dev/null +++ b/base_test/matmul_test/test_gemm_fp64_tf32.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +# 输入矩阵大小列表 +input_data=$(cat <<'EOF' +128 128 128 +256 256 256 +512 512 512 +1024 1024 1024 +2048 2048 2048 +4096 4096 4096 +8192 8192 8192 +4098 4098 4098 +8190 8190 8190 +8192 768 8192 +EOF +) + +# 每组测试迭代次数 +test_iter=1000 + +# 测试类型列表 +TEST_TYPES=("fp64" "tf32") + +# GEMM 可执行文件目录 +EXE_DIR="./fp64_tf32_src" + +# 日志目录 +LOG_DIR="mudnn_bench_logs" +mkdir -p "$LOG_DIR" +ABS_LOG_DIR=$(realpath "$LOG_DIR") +log_file="${ABS_LOG_DIR}/bench_fp64_tf32_types.log" +> "$log_file" + +# Python 分析脚本路径 +PYTHON_SUMMARIZE="exetrct_log_tools/summarize_fp64_tf32_log.py" + +for type in "${TEST_TYPES[@]}"; do + echo "==============================" + echo "开始测试:$type" + echo "==============================" + + # 根据类型选择可执行文件 + if [[ "$type" == "fp64" ]]; then + exe="${EXE_DIR}/gemm_fp64" + elif [[ "$type" == "tf32" ]]; then + exe="${EXE_DIR}/gemm_tf32" + else + echo "未知类型: $type" + continue + fi + + # 检查可执行文件是否存在 + if [[ ! -f "$exe" ]]; then + echo "错误:找不到可执行文件 $exe" + continue + fi + + # 遍历矩阵大小 + while read -r m n k; do + # 清理可能的空格 + m=$(echo "$m" | tr -d ' ') + n=$(echo "$n" | tr -d ' ') + k=$(echo "$k" | tr -d ' ') + + echo "矩阵大小: M=$m, N=$n, K=$k" + + if [[ -n "$m" && -n "$n" && -n "$k" ]]; then + # 执行 GEMM 测试并记录日志 + MUSA_VISIBLE_DEVICES=7 "$exe" "$m" "$n" "$k" "$test_iter" >> "$log_file" 2>&1 + sleep 1 + fi + done <<< "$input_data" + +done + +# 调用 Python 分析脚本 +if [[ -f "$PYTHON_SUMMARIZE" ]]; then + python "$PYTHON_SUMMARIZE" "$log_file" +else + echo "警告:Python 分析脚本不存在: $PYTHON_SUMMARIZE" +fi + +echo "所有测试完成!日志目录:$ABS_LOG_DIR" + diff --git a/base_test/matmul_test/test_gemm_mixed.sh b/base_test/matmul_test/test_gemm_mixed.sh new file mode 100644 index 0000000..076a95d --- /dev/null +++ b/base_test/matmul_test/test_gemm_mixed.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +set -e + +input_data=$(cat <<'EOF' +128 128 128 +256 256 256 +512 512 512 +1024 1024 1024 +2048 2048 2048 +4096 4096 4096 +8192 8192 8192 +4098 4098 4098 +8190 8190 8190 +8192 768 8192 +EOF +) +test_iter=1000 + +TEST_TYPES=("f16:f16:f32:f32" "bf16:bf16:f32:f32" "f32" "int8" "q8:q8:f32:f32" "bf16:q4:bf16:bf16" "float8_e4m3:float8_e4m3:f16:f16") +# TEST_TYPES=("f32") +LOG_DIR="mudnn_bench_logs" +mkdir -p "$LOG_DIR" +log_file="${LOG_DIR}/bench_fix_matmul.log" +> "$log_file" + +# 先测试命令是否存在 +if [ ! -f "../bin/mudnn_bench" ]; then + echo "错误:未找到 ../bin/mudnn_bench 可执行文件" | tee -a "$log_file" + exit 1 +fi + +echo "开始测试,日志文件:$log_file" + +for type in "${TEST_TYPES[@]}"; do + echo "开始测试数据类型:$type" | tee -a "$log_file" + + # 使用 while 循环逐行读取 + echo "$input_data" | while IFS= read -r line; do + # 跳过空行 + [ -z "$line" ] && continue + + # 使用 awk 或直接读取三个数字 + # 方法1:使用 read + read m n k <<< "$line" + + # 或者方法2:使用 awk(更可靠) + # m=$(echo "$line" | awk '{print $1}') + # n=$(echo "$line" | awk '{print $2}') + # k=$(echo "$line" | awk '{print $3}') + + echo "测试: M=$m, N=$n, K=$k, Type=$type" | tee -a "$log_file" + + # 检查参数是否正确 + if ! [[ "$m" =~ ^[0-9]+$ ]] || ! [[ "$n" =~ ^[0-9]+$ ]] || ! [[ "$k" =~ ^[0-9]+$ ]]; then + echo "错误:参数不是数字: m=$m, n=$n, k=$k" | tee -a "$log_file" + continue + fi + + # 临时保存命令 + cmd="MUSA_VISIBLE_DEVICES=7 ../bin/mudnn_bench -m --mm_m=\"$m\" --mm_n=\"$n\" --mm_k=\"$k\" --warmup 30 --tm i --tmv \"$test_iter\" -p -c -t \"$type\"" + echo "执行命令: $cmd" >> "$log_file" + + # 执行命令并捕获退出状态 + if MUSA_VISIBLE_DEVICES=7 ../bin/mudnn_bench -m \ + --mm_m="$m" --mm_n="$n" --mm_k="$k" \ + --warmup 30 \ + --tm i \ + --tmv "$test_iter" \ + -p \ + -c \ + -t "$type" >> "$log_file" 2>&1; then + echo "测试成功: M=$m, N=$n, K=$k, Type=$type" | tee -a "$log_file" + else + exit_code=$? + echo "测试失败: M=$m, N=$n, K=$k, Type=$type, 退出码: $exit_code" | tee -a "$log_file" + fi + + echo "----------------------------------------" >> "$log_file" + sleep 2 + done +done + +python sexetrct_log_tool/summary_mixed_data.py "$log_file" +echo "所有测试完成!日志目录:$LOG_DIR" +echo "查看日志:cat $log_file" diff --git a/script/monitor/README.md b/base_test/monitor/README.md similarity index 100% rename from script/monitor/README.md rename to base_test/monitor/README.md diff --git a/script/monitor/monitor_gpu.sh b/base_test/monitor/monitor_gpu.sh old mode 100755 new mode 100644 similarity index 100% rename from script/monitor/monitor_gpu.sh rename to base_test/monitor/monitor_gpu.sh