From b54598ebdbddeb25be7a48eb1e302502635f6891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BA=91=E5=BE=AE?= <1067852565@qq.com> Date: Sat, 17 Aug 2024 23:37:16 +1000 Subject: [PATCH] bench: add new micro benchmark about maps and read/write userspace memory (#334) * add read write result * Add new syscount bench * fix bench command * Add hash map op bench * update * add per cpu map op bench --- Makefile | 2 +- benchmark/.gitignore | 1 + benchmark/README.md | 331 ++++++++++++------ benchmark/hash_map/.gitignore | 10 - benchmark/hash_map/Makefile | 138 -------- benchmark/hash_map/README.md | 42 --- benchmark/hash_map/uprobe.bpf.c | 43 --- benchmark/hash_map/uprobe.c | 68 ---- benchmark/run_benchmark.py | 4 +- benchmark/syscount/Makefile | 2 + benchmark/syscount/read-sendmsg.c | 100 ++++++ benchmark/syscount/test.sh | 21 ++ benchmark/syscount/testfile.txt | 1 + benchmark/test.c | 67 ++-- benchmark/test_embed.c | 4 +- benchmark/tools/.gitignore | 1 - benchmark/tools/Makefile | 2 - benchmark/tools/driving.py | 108 ------ benchmark/tools/fig.py | 29 -- benchmark/tools/readlink.cpp | 15 - benchmark/uprobe/uprobe-override.c | 2 +- benchmark/uprobe/uprobe.bpf.c | 81 ++++- benchmark/uprobe/uprobe.c | 3 - example/minimal/README.md | 2 +- .../bpf_map/userspace/per_cpu_array_map.cpp | 2 +- 25 files changed, 477 insertions(+), 602 deletions(-) delete mode 100644 benchmark/hash_map/.gitignore delete mode 100644 benchmark/hash_map/Makefile delete mode 100644 benchmark/hash_map/README.md delete mode 100644 benchmark/hash_map/uprobe.bpf.c delete mode 100644 benchmark/hash_map/uprobe.c create mode 100644 benchmark/syscount/Makefile create mode 100644 benchmark/syscount/read-sendmsg.c create mode 100644 benchmark/syscount/test.sh create mode 100644 benchmark/syscount/testfile.txt delete mode 100644 benchmark/tools/.gitignore delete mode 100644 benchmark/tools/Makefile delete mode 100644 benchmark/tools/driving.py delete mode 100644 benchmark/tools/fig.py delete mode 100644 benchmark/tools/readlink.cpp diff --git a/Makefile b/Makefile index 55ebb63d..884fd33f 100644 --- a/Makefile +++ b/Makefile @@ -65,7 +65,7 @@ release: ## build the release version release-with-llvm-jit: ## build the package, with llvm-jit cmake -Bbuild -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo \ - -DBPFTIME_LLVM_JIT=1 + -DBPFTIME_LLVM_JIT=1 \ -DBUILD_BPFTIME_DAEMON=1 cmake --build build --config RelWithDebInfo --target install -j$(JOBS) diff --git a/benchmark/.gitignore b/benchmark/.gitignore index 173b04ce..e904185d 100644 --- a/benchmark/.gitignore +++ b/benchmark/.gitignore @@ -1,3 +1,4 @@ test micro-bench benchmark-output.json +syscount/read-sendmsg \ No newline at end of file diff --git a/benchmark/README.md b/benchmark/README.md index 3d71c865..71ad1d1c 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -2,10 +2,13 @@ With userspace eBPF runntime, we can: -- speed up the uprobe and uretprobe by approximate 10x -- with out any kernel patch or modify the tracing eBPF program +- Speed up the uprobe and uretprobe by approximate `10x` +- The userspace read and write user memory is approximate `10x` faster than kernel (~5ns vs ~50ns) +- With out any kernel patch or modify the tracing eBPF program - No privilege is needed for running the eBPF tracing program. +Probes: + | Probe/Tracepoint Types | Kernel (ns) | Userspace (ns) | Insn Count | |------------------------|-------------:|---------------:|---------------:| | Uprobe | 3224.172760 | 314.569110 | 4 | @@ -13,6 +16,20 @@ With userspace eBPF runntime, we can: | Syscall Tracepoint | 151.82801 | 232.57691 | 4 | | Embedding runtime | Not avaliable | 110.008430 | 4 | +Read and write user memory: + +| Probe/Tracepoint Types | Kernel (ns) | Userspace (ns) | +|-------------------------|----------------:|---------------:| +| bpf_probe_read - uprobe | 46.820830 | 2.200530 | +| bpf_probe_write_user - uprobe | 45.004100 | 8.101980 | + +## Suggest build configuration + +```sh +cmake -Bbuild -DLLVM_DIR=/usr/lib/llvm-15/cmake -DCMAKE_BUILD_TYPE:STRING=RelWithDebInfo -DBPFTIME_LLVM_JIT=1 -DBPFTIME_ENABLE_LTO=1 +cmake --build build --config RelWithDebInfo --target install -j +``` + ## build and run at a click Build the agent first. In project root: @@ -34,20 +51,10 @@ cd benchmark python3 run_benchmark.py ``` -## test environment - -```console -$ uname -a -Linux yunwei37server 6.2.0-32-generic #32-Ubuntu SMP PREEMPT_DYNAMIC Mon Aug 14 10:03:50 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux -``` - ## base line -```console -$ benchmark/test -a[b] + c for 100000 times -Elapsed time: 0.000446995 seconds -avg function elapse time: 4.469950 ns +```sh +benchmark/test ``` The base line function elapsed time is 0.000243087 seconds, for the test function: @@ -72,22 +79,14 @@ make -C benchmark/uretprobe run the uprobe: -```console -$ sudo benchmark/uprobe/uprobe -libbpf: loading object 'uprobe_bpf' from buffer -libbpf: elf: section(2) .symtab, size 120, link 1, flags 0, type=2 -... -loaded ebpf program... -... +```sh +sudo benchmark/uprobe/uprobe ``` in another terminal, run the benchmark: -```console -$ benchmark/test -a[b] + c for 100000 times -Elapsed time: 0.322417276 seconds -avg function elapse time: 3224.172760 ns +```sh +benchmark/test ``` The uprobe or uretprobe function we used is like: @@ -100,53 +99,18 @@ int BPF_UPROBE(__benchmark_test_function, const char *a, int b, uint64_t c) } ``` -## kernel uretuprobe - -run the uretprobe: - -```console -$ sudo benchmark/uretprobe/uretprobe -libbpf: loading object 'uprobe_bpf' from buffer -libbpf: elf: section(2) .symtab, size 120, link 1, flags 0, type=2 -... -loaded ebpf program... -... - -in another terminal, run the benchmark: - -```console -$ benchmark/test -a[b] + c for 100000 times -Elapsed time: 0.589970682 seconds -avg function elapse time: 3996.799580 ns -``` - ## userspace uprobe run the uprobe: -```console -$ LD_PRELOAD=build/runtime/syscall-server/libbpftime-syscall-server.so benchmark/uprobe/uprobe -manager constructed -global_shm_open_type 0 for bpftime_maps_shm -Closing 3 -libbpf: loading object 'uprobe_bpf' from buffer -libbpf: elf: section(2) .symtab, size 120, link 1, flags 0, type=2 -... -loaded ebpf program... -... +```sh +LD_PRELOAD=build/runtime/syscall-server/libbpftime-syscall-server.so benchmark/uprobe/uprobe ``` in another terminal, run the benchmark: -```console -$ LD_PRELOAD=build/runtime/agent/libbpftime-agent.so benchmark/test -attaching prog 3 to fd 4 -Successfully attached - -a[b] + c for 100000 times -Elapsed time: 0.031456911 seconds -avg function elapse time: 314.569110 ns +```sh +LD_PRELOAD=build/runtime/agent/libbpftime-agent.so benchmark/test ``` If errors like: @@ -159,43 +123,10 @@ Aborted (core dumped) happpens, try to use `sudo` mode. -## userspace uretprobe - -run the uretprobe: - -```console -$ LD_PRELOAD=build/runtime/syscall-server/libbpftime-syscall-server.so benchmark/uretprobe/uretprobe -manager constructed -global_shm_open_type 0 for bpftime_maps_shm -Closing 3 -libbpf: loading object 'uprobe_bpf' from buffer -libbpf: elf: section(2) .symtab, size 120, link 1, flags 0, type=2 -... -loaded ebpf program... -... -``` - -in another terminal, run the benchmark: - -```console -$ LD_PRELOAD=build/runtime/agent/libbpftime-agent.so benchmark/test -attaching prog 3 to fd 4 -Successfully attached - -a[b] + c for 100000 times -Elapsed time: 0.038127027 seconds -avg function elapse time: 381.270270 ns -``` - ## embed runtime -```console -$ build/benchmark/simple-benchmark-with-embed-ebpf-calling -uprobe elf: /home/yunwei/bpftime/build/benchmark/uprobe_prog.bpf.o -uretprobe elf:/home/yunwei/bpftime/build/benchmark/uretprobe_prog.bpf.o -a[b] + c for 100000 times -Elapsed time: 0.011000843 seconds -avg function elapse time: 110.008430 ns +```sh +build/benchmark/simple-benchmark-with-embed-ebpf-calling ``` ## userspace syscall @@ -222,8 +153,200 @@ You can use python script to run the benchmark: python3 benchmark/tools/driving.py ``` -## Benchmark runner +## Test syscall trace and untrace with syscount + +run the test: + +```sh +bash ./benchmark/syscount/test.sh +``` + +result: + +```txt +# baseline, no trace syscall +Average read() time over 10 runs: 349 ns +Average sendmsg() time over 10 runs: 3640 ns +# trace with syscount +Average read() time over 10 runs: 437 ns +Average sendmsg() time over 10 runs: 3952 ns +# filter out the pid +Average read() time over 10 runs: 398 ns +Average sendmsg() time over 10 runs: 3690 ns +# trace with userspace syscall tracepoint +Average read() time over 10 runs: 531 ns +Average sendmsg() time over 10 runs: 3681 ns +``` + +## Results for uprobe, uretprobe, and syscall tracepoint + +| Probe/Tracepoint Types | Kernel (ns) | Userspace (ns) | Insn Count | +|------------------------|-------------:|---------------:|---------------:| +| Uprobe | 3224.172760 | 314.569110 | 4 | +| Uretprobe | 3996.799580 | 381.270270 | 2 | +| Syscall Tracepoint | 151.82801 | 232.57691 | 4 | +| Embedding runtime | Not avaliable | 110.008430 | 4 | + +Tested on `6.2.0-32-generic` kernel and `Intel(R) Core(TM) i7-11800H CPU @ 2.30GHz`. + +## Results on another machine + +Tested on `kernel version 6.2` and `Intel(R) Xeon(R) Gold 5418Y` CPU. + +### Uprobe and read/write with `bpf_probe_write_user` and `bpf_probe_read_user` + +Userspace: + +```txt +Benchmarking __bench_uprobe_uretprobe in thread 1 +Average time usage 3060.196770 ns, iter 100000 times + +Benchmarking __bench_uretprobe in thread 1 +Average time usage 2958.493390 ns, iter 100000 times + +Benchmarking __bench_uprobe in thread 1 +Average time usage 1910.731360 ns, iter 100000 times + +Benchmarking __bench_read in thread 1 +Average time usage 1957.552190 ns, iter 100000 times + +Benchmarking __bench_write in thread 1 +Average time usage 1955.735460 ns, iter 100000 times +``` + +Userspace: + +```txt +Benchmarking __bench_uprobe_uretprobe in thread 1 +Average time usage 391.967450 ns, iter 100000 times + +Benchmarking __bench_uretprobe in thread 1 +Average time usage 383.851670 ns, iter 100000 times + +Benchmarking __bench_uprobe in thread 1 +Average time usage 380.935190 ns, iter 100000 times + +Benchmarking __bench_read in thread 1 +Average time usage 383.135720 ns, iter 100000 times + +Benchmarking __bench_write in thread 1 +Average time usage 389.037170 ns, iter 100000 times +``` + +### maps operations + +Run the map op 1000 times in one function. Userspace map op is also faster than the kernel in the current version. Current version is 10x faster than stupid old version. + +```c +SEC("uprobe/benchmark/test:__bench_hash_map_lookup") +int test_lookup(struct pt_regs *ctx) +{ + for (int i = 0; i < 1000; i++) { + u32 key = i; + u64 value = i; + bpf_map_lookup_elem(&test_hash_map, &key); + } + return 0; +} +``` + +Kernel map op cost: + +```txt + +Benchmarking __bench_hash_map_update in thread 1 +Average time usage 64738.264680 ns, iter 100000 times + +Benchmarking __bench_hash_map_lookup in thread 1 +Average time usage 17805.898280 ns, iter 100000 times + +Benchmarking __bench_hash_map_delete in thread 1 +Average time usage 21795.665340 ns, iter 100000 times + +Benchmarking __bench_array_map_update in thread 1 +Average time usage 11449.295960 ns, iter 100000 times + +Benchmarking __bench_array_map_lookup in thread 1 +Average time usage 2093.886500 ns, iter 100000 times + +Benchmarking __bench_array_map_delete in thread 1 +Average time usage 2126.820310 ns, iter 100000 times + +Benchmarking __bench_per_cpu_hash_map_update in thread 1 +Average time usage 35050.915650 ns, iter 100000 times + +Benchmarking __bench_per_cpu_hash_map_lookup in thread 1 +Average time usage 15999.969590 ns, iter 100000 times + +Benchmarking __bench_per_cpu_hash_map_delete in thread 1 +Average time usage 21664.294940 ns, iter 100000 times + +Benchmarking __bench_per_cpu_array_map_update in thread 1 +Average time usage 10886.969860 ns, iter 100000 times + +Benchmarking __bench_per_cpu_array_map_lookup in thread 1 +Average time usage 2749.468760 ns, iter 100000 times + +Benchmarking __bench_per_cpu_array_map_delete in thread 1 +Average time usage 2778.679460 ns, iter 100000 times +``` + +Userspace map op cost: + +```txt +Benchmarking __bench_hash_map_update in thread 1 +Average time usage 30676.986820 ns, iter 100000 times + +Benchmarking __bench_hash_map_lookup in thread 1 +Average time usage 23486.304570 ns, iter 100000 times + +Benchmarking __bench_hash_map_delete in thread 1 +Average time usage 13435.901160 ns, iter 100000 times + +Benchmarking __bench_array_map_update in thread 1 +Average time usage 7081.922160 ns, iter 100000 times + +Benchmarking __bench_array_map_lookup in thread 1 +Average time usage 4685.450360 ns, iter 100000 times + +Benchmarking __bench_array_map_delete in thread 1 +Average time usage 6367.443010 ns, iter 100000 times + +Benchmarking __bench_per_cpu_hash_map_update in thread 1 +Average time usage 95918.602090 ns, iter 100000 times + +Benchmarking __bench_per_cpu_hash_map_lookup in thread 1 +Average time usage 63294.791110 ns, iter 100000 times + +Benchmarking __bench_per_cpu_hash_map_delete in thread 1 +Average time usage 460207.364100 ns, iter 100000 times + +Benchmarking __bench_per_cpu_array_map_update in thread 1 +Average time usage 26109.863360 ns, iter 100000 times + +Benchmarking __bench_per_cpu_array_map_lookup in thread 1 +Average time usage 9139.355980 ns, iter 100000 times + +Benchmarking __bench_per_cpu_array_map_delete in thread 1 +Average time usage 5203.339320 ns, iter 100000 times +``` -### Usage -- `make -C ./benchmark` -- `python3 ./benchmark/run_benchmark.py` +The benchmark without inline the map op function: + +| Map Operation | Kernel (op - uprobe) (ns) | Userspace (op - uprobe) (ns) | +|------------------------------------|--------------------------:|-----------------------------:| +| __bench_hash_map_update | 62827.533320 | 30296.051630 | +| __bench_hash_map_lookup | 15895.166920 | 23005.369380 | +| __bench_hash_map_delete | 19884.933980 | 13054.965970 | +| __bench_array_map_update | 9538.564600 | 6701.987970 | +| __bench_array_map_lookup | 183.155140 | 4305.515170 | +| __bench_array_map_delete | 216.088950 | 5987.507820 | +| __bench_per_cpu_hash_map_update | 33140.184290 | 95537.666900 | +| __bench_per_cpu_hash_map_lookup | 14089.238230 | 62913.855920 | +| __bench_per_cpu_hash_map_delete | 19753.563580 | 459826.428910 | +| __bench_per_cpu_array_map_update | 8885.238500 | 25728.928170 | +| __bench_per_cpu_array_map_lookup | 1838.737400 | 8759.420790 | +| __bench_per_cpu_array_map_delete | 1867.948100 | 4802.404130 | + +- Some overhead can be reduced by inlining the map op function. +- We need to fix the performance issue of the per-cpu map in the userspace runtime. \ No newline at end of file diff --git a/benchmark/hash_map/.gitignore b/benchmark/hash_map/.gitignore deleted file mode 100644 index 3028a00f..00000000 --- a/benchmark/hash_map/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -.vscode -package.json -*.o -*.skel.json -*.skel.yaml -package.yaml -ecli -.output -test -uprobe diff --git a/benchmark/hash_map/Makefile b/benchmark/hash_map/Makefile deleted file mode 100644 index 87a6287c..00000000 --- a/benchmark/hash_map/Makefile +++ /dev/null @@ -1,138 +0,0 @@ -# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -OUTPUT := .output -CLANG ?= clang -LIBBPF_SRC := $(abspath ../../third_party/libbpf/src) -BPFTOOL_SRC := $(abspath ../../third_party/bpftool/src) -LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a) -BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool) -BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool -ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \ - | sed 's/arm.*/arm/' \ - | sed 's/aarch64/arm64/' \ - | sed 's/ppc64le/powerpc/' \ - | sed 's/mips.*/mips/' \ - | sed 's/riscv64/riscv/' \ - | sed 's/loongarch64/loongarch/') -VMLINUX := ../../third_party/vmlinux/$(ARCH)/vmlinux.h -# Use our own libbpf API headers and Linux UAPI headers distributed with -# libbpf to avoid dependency on system-wide headers, which could be missing or -# outdated -INCLUDES := -I$(OUTPUT) -I../../third_party/libbpf/include/uapi -I$(dir $(VMLINUX)) -CFLAGS := -g -Wall -ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS) - -APPS = uprobe # minimal minimal_legacy kprobe fentry usdt sockfilter tc ksyscall - -CARGO ?= $(shell which cargo) -ifeq ($(strip $(CARGO)),) -BZS_APPS := -else -BZS_APPS := # profile -APPS += $(BZS_APPS) -# Required by libblazesym -ALL_LDFLAGS += -lrt -ldl -lpthread -lm -endif - -# Get Clang's default includes on this system. We'll explicitly add these dirs -# to the includes list when compiling with `-target bpf` because otherwise some -# architecture-specific dirs will be "missing" on some architectures/distros - -# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h, -# sys/cdefs.h etc. might be missing. -# -# Use '-idirafter': Don't interfere with include mechanics except where the -# build would have failed anyways. -CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - &1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') - -ifeq ($(V),1) - Q = - msg = -else - Q = @ - msg = @printf ' %-8s %s%s\n' \ - "$(1)" \ - "$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \ - "$(if $(3), $(3))"; - MAKEFLAGS += --no-print-directory -endif - -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -$(call allow-override,CC,$(CROSS_COMPILE)cc) -$(call allow-override,LD,$(CROSS_COMPILE)ld) - -.PHONY: all -all: $(APPS) - -.PHONY: clean -clean: - $(call msg,CLEAN) - $(Q)rm -rf $(OUTPUT) $(APPS) - -$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT): - $(call msg,MKDIR,$@) - $(Q)mkdir -p $@ - -# Build libbpf -$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf - $(call msg,LIB,$@) - $(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \ - OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \ - INCLUDEDIR= LIBDIR= UAPIDIR= \ - install - -# Build bpftool -$(BPFTOOL): | $(BPFTOOL_OUTPUT) - $(call msg,BPFTOOL,$@) - $(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap - - -$(LIBBLAZESYM_SRC)/target/release/libblazesym.a:: - $(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release - -$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) - $(call msg,LIB, $@) - $(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@ - -$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT) - $(call msg,LIB,$@) - $(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@ - -# Build BPF code -$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL) - $(call msg,BPF,$@) - $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \ - $(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \ - -c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@) - $(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@) - -# Generate BPF skeletons -$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL) - $(call msg,GEN-SKEL,$@) - $(Q)$(BPFTOOL) gen skeleton $< > $@ - -# Build user-space code -$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h - -$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT) - $(call msg,CC,$@) - $(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@ - -$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER) - -$(BZS_APPS): $(LIBBLAZESYM_OBJ) - -# Build application binary -$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT) - $(call msg,BINARY,$@) - $(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@ - -# delete failed targets -.DELETE_ON_ERROR: - -# keep intermediate (.skel.h, .bpf.o, etc) targets -.SECONDARY: diff --git a/benchmark/hash_map/README.md b/benchmark/hash_map/README.md deleted file mode 100644 index c0b80d5a..00000000 --- a/benchmark/hash_map/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# benchmark of hash maps - -- __benchmark_test_function1: hashmap bpf_map_lookup_elem -- __benchmark_test_function2: hashmap bpf_map_delete_elem -- __benchmark_test_function3: hashmap bpf_map_update_elem - -run the uprobe: - -```console -$ LD_PRELOAD=build/runtime/syscall-server/libbpftime-syscall-server.so benchmark/hash_map/uprobe -manager constructed -global_shm_open_type 0 for bpftime_maps_shm -Closing 3 -libbpf: loading object 'uprobe_bpf' from buffer -libbpf: elf: section(2) .symtab, size 120, link 1, flags 0, type=2 -... -loaded ebpf program... -... -``` - -in another terminal, run the benchmark: - -```console -$ LD_PRELOAD=build/runtime/agent/libbpftime-agent.so benchmark/test - -Benchmarking __benchmark_test_function1 -a[b] + c for 100000 times -Elapsed time: 0.038217773 seconds -Average time usage 382.177730 ns - -Benchmarking __benchmark_test_function2 -a[b] + c for 100000 times -Elapsed time: 0.020004455 seconds -Average time usage 200.044550 ns - -Benchmarking __benchmark_test_function3 -a[b] + c for 100000 times -Elapsed time: 0.047916014 seconds -Average time usage 479.160140 ns - -INFO [34534]: Global shm destructed -``` diff --git a/benchmark/hash_map/uprobe.bpf.c b/benchmark/hash_map/uprobe.bpf.c deleted file mode 100644 index ca9bc011..00000000 --- a/benchmark/hash_map/uprobe.bpf.c +++ /dev/null @@ -1,43 +0,0 @@ -#define BPF_NO_GLOBAL_DATA -#include -#include -#include - -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(max_entries, 1024); - __type(key, u32); - __type(value, u64); -} libc_malloc_calls_total SEC(".maps"); - -SEC("uprobe/benchmark/test:__benchmark_test_function3") -int test_update(struct pt_regs *ctx) -{ - u32 key = 0; - u64 value = 0; - bpf_map_update_elem(&libc_malloc_calls_total, &key, &value, 0); - - return 0; -} - -SEC("uprobe/benchmark/test:__benchmark_test_function2") -int test_delete(struct pt_regs *ctx) -{ - u32 key = 0; - u64 value = 0; - bpf_map_delete_elem(&libc_malloc_calls_total, &key); - - return 0; -} - -SEC("uprobe/benchmark/test:__benchmark_test_function1") -int test_lookup(struct pt_regs *ctx) -{ - u32 key = 0; - u64 value = 0; - bpf_map_lookup_elem(&libc_malloc_calls_total, &key); - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/benchmark/hash_map/uprobe.c b/benchmark/hash_map/uprobe.c deleted file mode 100644 index c0ed0303..00000000 --- a/benchmark/hash_map/uprobe.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -/* Copyright (c) 2020 Facebook */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "uprobe.skel.h" -#define warn(...) fprintf(stderr, __VA_ARGS__) - -static int libbpf_print_fn(enum libbpf_print_level level, const char *format, - va_list args) -{ - return vfprintf(stderr, format, args); -} - -static volatile bool exiting = false; - -static void sig_handler(int sig) -{ - exiting = true; -} - -int main(int argc, char **argv) -{ - struct uprobe_bpf *skel; - int err; - - /* Set up libbpf errors and debug info callback */ - libbpf_set_print(libbpf_print_fn); - - /* Cleaner handling of Ctrl-C */ - signal(SIGINT, sig_handler); - signal(SIGTERM, sig_handler); - - /* Load and verify BPF application */ - skel = uprobe_bpf__open(); - if (!skel) { - fprintf(stderr, "Failed to open and load BPF skeleton\n"); - return 1; - } - - /* Load & verify BPF programs */ - err = uprobe_bpf__load(skel); - if (err) { - fprintf(stderr, "Failed to load and verify BPF skeleton\n"); - goto cleanup; - } - err = uprobe_bpf__attach(skel); - if (err) { - fprintf(stderr, "Failed to attach BPF skeleton\n"); - goto cleanup; - } - - while (!exiting) { - sleep(1); - printf("loaded ebpf program...\n"); - } -cleanup: - /* Clean up */ - uprobe_bpf__destroy(skel); - - return err < 0 ? -err : 0; -} diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py index 5f1b964c..a2a78bea 100644 --- a/benchmark/run_benchmark.py +++ b/benchmark/run_benchmark.py @@ -84,7 +84,7 @@ async def run_userspace_uprobe_test(): server.stdout, should_exit, "SERVER", - (server_start_cb, "__benchmark_test_function3 is for uprobe only"), + (server_start_cb, "__bench_probe is for uprobe only"), ) ) await server_start_cb.wait() @@ -131,7 +131,7 @@ async def run_kernel_uprobe_test(): server.stdout, should_exit, "SERVER", - (server_start_cb, "__benchmark_test_function3 is for uprobe only"), + (server_start_cb, "__bench_probe is for uprobe only"), ) ) await server_start_cb.wait() diff --git a/benchmark/syscount/Makefile b/benchmark/syscount/Makefile new file mode 100644 index 00000000..75f7fc0b --- /dev/null +++ b/benchmark/syscount/Makefile @@ -0,0 +1,2 @@ +syscount-driver: read-sendmsg.c + gcc read-sendmsg.c -g -O3 -lpthread -o read-sendmsg diff --git a/benchmark/syscount/read-sendmsg.c b/benchmark/syscount/read-sendmsg.c new file mode 100644 index 00000000..1884beb0 --- /dev/null +++ b/benchmark/syscount/read-sendmsg.c @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NUM_ITERATIONS 1000000 + +void measure_read_time(int fd) { + char buffer[1024]; + struct timespec start, end; + long total_time_ns = 0; + + for (int i = 0; i < NUM_ITERATIONS; i++) { + clock_gettime(CLOCK_MONOTONIC, &start); + ssize_t bytes_read = read(fd, buffer, sizeof(buffer)); + clock_gettime(CLOCK_MONOTONIC, &end); + + if (bytes_read == -1) { + perror("read"); + exit(EXIT_FAILURE); + } + + long time_ns = (end.tv_sec - start.tv_sec) * 1e9 + (end.tv_nsec - start.tv_nsec); + total_time_ns += time_ns; + } + + printf("Average read() time: %ld ns\n", total_time_ns / NUM_ITERATIONS); +} + +void measure_sendmsg_time(int sockfd) { + struct msghdr msg; + struct iovec iov; + char buffer[1024] = "test message"; + struct timespec start, end; + long total_time_ns = 0; + + memset(&msg, 0, sizeof(msg)); + iov.iov_base = buffer; + iov.iov_len = sizeof(buffer); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + struct sockaddr_in dest_addr; + memset(&dest_addr, 0, sizeof(dest_addr)); + dest_addr.sin_family = AF_INET; + dest_addr.sin_port = htons(12345); // Arbitrary port number + inet_pton(AF_INET, "127.0.0.1", &dest_addr.sin_addr); // Loopback address + + msg.msg_name = &dest_addr; + msg.msg_namelen = sizeof(dest_addr); + + for (int i = 0; i < NUM_ITERATIONS; i++) { + clock_gettime(CLOCK_MONOTONIC, &start); + ssize_t bytes_sent = sendmsg(sockfd, &msg, 0); + clock_gettime(CLOCK_MONOTONIC, &end); + + if (bytes_sent == -1) { + perror("sendmsg"); + exit(EXIT_FAILURE); + } + + long time_ns = (end.tv_sec - start.tv_sec) * 1e9 + (end.tv_nsec - start.tv_nsec); + total_time_ns += time_ns; + } + + printf("Average sendmsg() time: %ld ns\n", total_time_ns / NUM_ITERATIONS); +} + +int main() { + // Open a file for reading + int fd = open("./benchmark/syscount/testfile.txt", O_RDONLY); + if (fd == -1) { + perror("open"); + exit(EXIT_FAILURE); + } + + // Create a socket for sendmsg() + int sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd == -1) { + perror("socket"); + close(fd); + exit(EXIT_FAILURE); + } + + // Measure read() time + measure_read_time(fd); + + // Measure sendmsg() time + measure_sendmsg_time(sockfd); + + close(fd); + close(sockfd); + return 0; +} diff --git a/benchmark/syscount/test.sh b/benchmark/syscount/test.sh new file mode 100644 index 00000000..c3369ecd --- /dev/null +++ b/benchmark/syscount/test.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +total_read_time=0 +total_sendmsg_time=0 + +for i in {1..10} +do + # output=$(sudo AGENT_SO=/tmp/bpftime/runtime/agent/libbpftime-agent.so LD_PRELOAD=/tmp/bpftime/attach/text_segment_transformer/libbpftime-agent-transformer.so ./benchmark/syscount/read-sendmsg) + output=$(sudo ./benchmark/syscount/read-sendmsg) + read_time=$(echo "$output" | grep "Average read() time" | awk '{print $4}') + sendmsg_time=$(echo "$output" | grep "Average sendmsg() time" | awk '{print $4}') + + total_read_time=$((total_read_time + read_time)) + total_sendmsg_time=$((total_sendmsg_time + sendmsg_time)) +done + +avg_read_time=$((total_read_time / 10)) +avg_sendmsg_time=$((total_sendmsg_time / 10)) + +echo "Average read() time over 10 runs: $avg_read_time ns" +echo "Average sendmsg() time over 10 runs: $avg_sendmsg_time ns" diff --git a/benchmark/syscount/testfile.txt b/benchmark/syscount/testfile.txt new file mode 100644 index 00000000..0fd26800 --- /dev/null +++ b/benchmark/syscount/testfile.txt @@ -0,0 +1 @@ +hhhhhhhhhhhhhhhhhh diff --git a/benchmark/test.c b/benchmark/test.c index ae654c58..73418af0 100644 --- a/benchmark/test.c +++ b/benchmark/test.c @@ -4,27 +4,31 @@ #include #include -__attribute_noinline__ uint64_t __benchmark_test_function3(const char *a, int b, - uint64_t c) -{ - return a[b] + c; -} - -__attribute_noinline__ uint64_t __benchmark_test_function2(const char *a, int b, - uint64_t c) -{ - static int i = 0; - __sync_fetch_and_add(&i, 1); - return a[b] + c; -} - -__attribute_noinline__ uint64_t __benchmark_test_function1(const char *a, int b, - uint64_t c) -{ - return a[b] + c; +#define BENCH_FUNC(name) \ +__attribute_noinline__ uint64_t name(char *a, int b, uint64_t c) \ +{ \ + return a[b] + c; \ } -typedef uint64_t (*benchmark_test_function_t)(const char *, int, uint64_t); +BENCH_FUNC(__bench_array_map_lookup) +BENCH_FUNC(__bench_array_map_delete) +BENCH_FUNC(__bench_array_map_update) +BENCH_FUNC(__bench_hash_map_lookup) +BENCH_FUNC(__bench_hash_map_delete) +BENCH_FUNC(__bench_hash_map_update) +BENCH_FUNC(__bench_per_cpu_hash_map_lookup) +BENCH_FUNC(__bench_per_cpu_hash_map_delete) +BENCH_FUNC(__bench_per_cpu_hash_map_update) +BENCH_FUNC(__bench_per_cpu_array_map_lookup) +BENCH_FUNC(__bench_per_cpu_array_map_delete) +BENCH_FUNC(__bench_per_cpu_array_map_update) +BENCH_FUNC(__bench_read) +BENCH_FUNC(__bench_write) +BENCH_FUNC(__bench_uprobe) +BENCH_FUNC(__bench_uretprobe) +BENCH_FUNC(__bench_uprobe_uretprobe) + +typedef uint64_t (*benchmark_test_function_t)(char *, int, uint64_t); void start_timer(struct timespec *start_time) { @@ -53,9 +57,10 @@ static double get_function_time(benchmark_test_function_t func, int iter) // The timespec struct holds seconds and nanoseconds struct timespec start_time, end_time; start_timer(&start_time); + char buffer[20] = "hello world"; // test base line for (int i = 0; i < iter; i++) { - func("hello", i % 4, i); + func(buffer, i % 4, i); } end_timer(&end_time); double time = get_elapsed_time(start_time, end_time); @@ -74,7 +79,7 @@ void do_benchmark_userspace(benchmark_test_function_t func, const char *name, #define do_benchmark_func(func, iter, id) \ do { \ - do_benchmark_userspace(func, #func, iter, id); \ + do_benchmark_userspace(func, #func, iter, id); \ } while (0) int iter = 100 * 1000; @@ -83,9 +88,23 @@ void *run_bench_functions(void *id_ptr) { int id = *(int *)id_ptr; printf("id: %d\n", id); - do_benchmark_func(__benchmark_test_function1, iter, id); - do_benchmark_func(__benchmark_test_function2, iter, id); - do_benchmark_func(__benchmark_test_function3, iter, id); + do_benchmark_func(__bench_uprobe_uretprobe, iter, id); + do_benchmark_func(__bench_uretprobe, iter, id); + do_benchmark_func(__bench_uprobe, iter, id); + do_benchmark_func(__bench_read, iter, id); + do_benchmark_func(__bench_write, iter, id); + do_benchmark_func(__bench_hash_map_update, iter, id); + do_benchmark_func(__bench_hash_map_lookup, iter, id); + do_benchmark_func(__bench_hash_map_delete, iter, id); + do_benchmark_func(__bench_array_map_update, iter, id); + do_benchmark_func(__bench_array_map_lookup, iter, id); + do_benchmark_func(__bench_array_map_delete, iter, id); + do_benchmark_func(__bench_per_cpu_hash_map_update, iter, id); + do_benchmark_func(__bench_per_cpu_hash_map_lookup, iter, id); + do_benchmark_func(__bench_per_cpu_hash_map_delete, iter, id); + do_benchmark_func(__bench_per_cpu_array_map_update, iter, id); + do_benchmark_func(__bench_per_cpu_array_map_lookup, iter, id); + do_benchmark_func(__bench_per_cpu_array_map_delete, iter, id); return NULL; } diff --git a/benchmark/test_embed.c b/benchmark/test_embed.c index 4fc90266..ee595972 100644 --- a/benchmark/test_embed.c +++ b/benchmark/test_embed.c @@ -76,7 +76,7 @@ void end_timer() clock_gettime(CLOCK_MONOTONIC_RAW, &end_time); } -__attribute_noinline__ uint64_t __benchmark_test_function3(const char *a, int b, +__attribute_noinline__ uint64_t __bench_probe(const char *a, int b, uint64_t c) { return a[b] + c; @@ -93,7 +93,7 @@ uint64_t test_func_wrapper(const char *a, int b, uint64_t c) PT_REGS_PARM3(®s) = c; ebpf_exec(begin_vm, ®s, sizeof(regs), &ret); } - uint64_t hook_func_ret = __benchmark_test_function3(a, b, c); + uint64_t hook_func_ret = __bench_probe(a, b, c); if (enable_ebpf) { memset(®s, 0, sizeof(regs)); PT_REGS_PARM1(®s) = hook_func_ret; diff --git a/benchmark/tools/.gitignore b/benchmark/tools/.gitignore deleted file mode 100644 index b66ea85e..00000000 --- a/benchmark/tools/.gitignore +++ /dev/null @@ -1 +0,0 @@ -readlink diff --git a/benchmark/tools/Makefile b/benchmark/tools/Makefile deleted file mode 100644 index 3c81d575..00000000 --- a/benchmark/tools/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -readlink: readlink.cpp - g++ readlink.cpp -o readlink diff --git a/benchmark/tools/driving.py b/benchmark/tools/driving.py deleted file mode 100644 index fe75c3c1..00000000 --- a/benchmark/tools/driving.py +++ /dev/null @@ -1,108 +0,0 @@ -import re -import json -import numpy as np -import subprocess -import os -import signal -import time - -def run_command(cmd): - """Run a command in the background and return its process.""" - process = subprocess.Popen(cmd, shell=True) - return process - - -def kill_process(process): - """Kill a given process.""" - os.kill(process.pid, signal.SIGKILL) - print("Process killed") - # Give the process some time to terminate. - time.sleep(1) - - # Check if the process has really terminated. If it has, poll() should return the exit code. - if process.poll() is None: - print(f"Process {process.pid} was not killed, forcing kill.") - os.kill(process.pid, signal.SIGKILL) - else: - print(f"Process {process.pid} was successfully killed.") - - -# Function to run the command and extract the average write time -def run_command_and_extract_time(name: str, library: str): - print("run_command_and_extract_time") - try: - result = subprocess.check_output( - [ - "sudo", - library, - name, - ], - universal_newlines=True, - ) - match = re.search(r"Average time usage (\d+\.\d+)ns,", result) - print(float(match.group(1))) - if match: - return float(match.group(1)) - else: - print("Warning: No match found in the output") - return None - except Exception as e: - print(f"Error during command execution: {e}") - return None - - -def save_micro_benchmark_data(name: str, library: str, output_file: str): - # Run the command 100 times and collect the average write times - times = [run_command_and_extract_time(name, library) for _ in range(20)] - times = [time for time in times if time is not None] # Filter out None values - - # Compute metrics - mean_time = np.mean(times) - median_time = np.median(times) - min_time = np.min(times) - max_time = np.max(times) - std_dev_time = np.std(times) - - # Prepare the data for the JSON file - data = { - "raw_times": times, - "mean": mean_time, - "median": median_time, - "min": min_time, - "max": max_time, - "std_dev": std_dev_time, - } - - # Save the data to a JSON file - with open(output_file, "w") as f: - json.dump(data, f, indent=4) - -def run_kernel_syscall_tracepoint_test(): - server = run_command("sudo benchmark/syscall/syscall") - save_micro_benchmark_data( - "benchmark/syscall/victim", "A=B", "benchmark/micro-bench/kernel-syscall.json" - ) - kill_process(server) - run_command("pkill syscall/syscall") - -def run_userspace_syscall_tracepoint_test(): - server = run_command( - "sudo LD_PRELOAD=build/runtime/syscall-server/libbpftime-syscall-server.so benchmark/syscall/syscall" - ) - save_micro_benchmark_data( - "benchmark/syscall/victim", - "LD_PRELOAD=build/runtime/agent/libbpftime-agent.so", - "benchmark/micro-bench/userspace-syscall.json", - ) - kill_process(server) - run_command("pkill syscall/syscall") - -def run_syscall_baseline_test(): - save_micro_benchmark_data( - "benchmark/syscall/victim", - "LD_PRELOAD=build/runtime/agent/libbpftime-agent.so", - "benchmark/micro-bench/baseline-syscall.json", - ) - - -run_syscall_baseline_test() \ No newline at end of file diff --git a/benchmark/tools/fig.py b/benchmark/tools/fig.py deleted file mode 100644 index 0a33c99b..00000000 --- a/benchmark/tools/fig.py +++ /dev/null @@ -1,29 +0,0 @@ -import matplotlib.pyplot as plt - -# Categories -categories = ["Syscall Tracepoint", "Uprobe", "Uretprobe"] - -# Time values for Kernel and Userspace -kernel_times = [1499.47708, 4751.462610, 5899.706820] -userspace_times = [1489.04251, 445.169770, 472.972220] - -bar_width = 0.35 -index = range(len(categories)) - -plt.figure(figsize=(12, 7)) - -# Plot bars for Kernel and Userspace -bar1 = plt.bar(index, kernel_times, bar_width, color='b', label='Kernel') -bar2 = plt.bar([i + bar_width for i in index], userspace_times, bar_width, color='r', label='Userspace') - -# Labeling the figure -plt.xlabel('Probe/Tracepoint Types') -plt.ylabel('Avg Time (ns)') -plt.title('Comparison of Kernel vs. Userspace for Different Probe/Tracepoint Types') -plt.xticks([i + bar_width/2 for i in index], categories) -plt.legend() - -plt.tight_layout() -plt.grid(True, which="both", ls="--", c="0.65") -plt.savefig("trace_overhead.png") -plt.show() diff --git a/benchmark/tools/readlink.cpp b/benchmark/tools/readlink.cpp deleted file mode 100644 index 62bd4bcc..00000000 --- a/benchmark/tools/readlink.cpp +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include -#include - -int main() { - char execPath[PATH_MAX]; - ssize_t len = readlink("/proc/self/exe", execPath, sizeof(execPath) - 1); - if (len != -1) { - execPath[len] = '\0'; // Null-terminate the string - std::cout << "Executable Path: " << execPath << std::endl; - } else { - std::cerr << "Error retrieving executable path" << std::endl; - } - return 0; -} \ No newline at end of file diff --git a/benchmark/uprobe/uprobe-override.c b/benchmark/uprobe/uprobe-override.c index a16003f2..96b9f5da 100644 --- a/benchmark/uprobe/uprobe-override.c +++ b/benchmark/uprobe/uprobe-override.c @@ -55,7 +55,7 @@ int main(int argc, char **argv) } err = bpf_prog_attach_uprobe_with_override( bpf_program__fd(skel->progs.do_uprobe_override_patch), "benchmark/test", - "__benchmark_test_function1"); + "__bench_uprobe_uretprobe"); if (err) { fprintf(stderr, "Failed to attach BPF program\n"); goto cleanup; diff --git a/benchmark/uprobe/uprobe.bpf.c b/benchmark/uprobe/uprobe.bpf.c index 7ac7471e..a91e0ef9 100644 --- a/benchmark/uprobe/uprobe.bpf.c +++ b/benchmark/uprobe/uprobe.bpf.c @@ -3,25 +3,92 @@ #include #include -SEC("uprobe/benchmark/test:__benchmark_test_function3") -int BPF_UPROBE(__benchmark_test_function3, const char *a, int b, uint64_t c) +#define DEFINE_MAP_OPERATIONS(map_name, map_type) \ +struct { \ + __uint(type, map_type); \ + __uint(max_entries, 1024); \ + __type(key, u32); \ + __type(value, u64); \ +} map_name SEC(".maps"); \ +\ +SEC("uprobe/benchmark/test:__bench_" #map_name "_update") \ +int map_name##_update(struct pt_regs *ctx) \ +{ \ + for (int i = 0; i < 1000; i++) { \ + u32 key = i; \ + u64 value = i; \ + bpf_map_update_elem(&map_name, &key, &value, BPF_ANY); \ + } \ + return 0; \ +} \ +\ +SEC("uprobe/benchmark/test:__bench_" #map_name "_delete") \ +int map_name##_delete(struct pt_regs *ctx) \ +{ \ + for (int i = 0; i < 1000; i++) { \ + u32 key = i; \ + bpf_map_delete_elem(&map_name, &key); \ + } \ + return 0; \ +} \ +\ +SEC("uprobe/benchmark/test:__bench_" #map_name "_lookup") \ +int map_name##_lookup(struct pt_regs *ctx) \ +{ \ + for (int i = 0; i < 1000; i++) { \ + u32 key = i; \ + bpf_map_lookup_elem(&map_name, &key); \ + } \ + return 0; \ +} + +// Define operations for an array map +DEFINE_MAP_OPERATIONS(array_map, BPF_MAP_TYPE_ARRAY) + +// Define operations for a hash map +DEFINE_MAP_OPERATIONS(hash_map, BPF_MAP_TYPE_HASH) + +// Define operations for a per-cpu array map +DEFINE_MAP_OPERATIONS(per_cpu_hash_map, BPF_MAP_TYPE_PERCPU_HASH) + +// Define operations for a per-cpu hash map +DEFINE_MAP_OPERATIONS(per_cpu_array_map, BPF_MAP_TYPE_PERCPU_ARRAY) + +SEC("uprobe/benchmark/test:__bench_write") +int BPF_UPROBE(__bench_write, char *a, int b, uint64_t c) +{ + char buffer[5] = "text"; + bpf_probe_write_user(a, buffer, sizeof(buffer)); + return b + c; +} + +SEC("uprobe/benchmark/test:__bench_read") +int BPF_UPROBE(__bench_read, char *a, int b, uint64_t c) +{ + char buffer[5]; + int res = bpf_probe_read_user(buffer, sizeof(buffer), a); + return b + c + res + buffer[1]; +} + +SEC("uprobe/benchmark/test:__bench_uprobe") +int BPF_UPROBE(__bench_uprobe, char *a, int b, uint64_t c) { return b + c; } -SEC("uretprobe/benchmark/test:__benchmark_test_function2") -int BPF_URETPROBE(__benchmark_test_function2, int ret) +SEC("uretprobe/benchmark/test:__bench_uretprobe") +int BPF_URETPROBE(__bench_uretprobe, int ret) { return ret; } -SEC("uprobe/benchmark/test:__benchmark_test_function1") -int BPF_UPROBE(__benchmark_test_function1_1, const char *a, int b, uint64_t c) +SEC("uprobe/benchmark/test:__bench_uprobe_uretprobe") +int BPF_UPROBE(__bench_uprobe_uretprobe_1, char *a, int b, uint64_t c) { return b + c; } -SEC("uretprobe/benchmark/test:__benchmark_test_function1") +SEC("uretprobe/benchmark/test:__bench_uprobe_uretprobe") int BPF_URETPROBE(__benchmark_test_function_1_2, int ret) { return ret; diff --git a/benchmark/uprobe/uprobe.c b/benchmark/uprobe/uprobe.c index 38e52abb..9850abc1 100644 --- a/benchmark/uprobe/uprobe.c +++ b/benchmark/uprobe/uprobe.c @@ -57,9 +57,6 @@ int main(int argc, char **argv) } printf("Successfully started! Press Ctrl+C to stop.\n"); - printf("__benchmark_test_function1 is for both uprobe and uretprobe\n"); - printf("__benchmark_test_function2 is for uretprobe only\n"); - printf("__benchmark_test_function3 is for uprobe only\n"); fflush(stdout); while (!exiting) { sleep(1); diff --git a/example/minimal/README.md b/example/minimal/README.md index 33d89c71..528a4756 100644 --- a/example/minimal/README.md +++ b/example/minimal/README.md @@ -121,5 +121,5 @@ client ```sh sudo ~/.bpftime/bpftime start -s ./victim -# or AGENT_SO=build/runtime/agent/libbpftime-agent.so LD_PRELOAD=build/runtime/agent-transformer/libbpftime-agent-transformer.so ./victim +# or AGENT_SO=build/runtime/agent/libbpftime-agent.so LD_PRELOAD=build/attach/text_segment_transformer/libbpftime-agent-transformer.so ./victim ``` diff --git a/runtime/src/bpf_map/userspace/per_cpu_array_map.cpp b/runtime/src/bpf_map/userspace/per_cpu_array_map.cpp index 7b0f81e0..b4114f9a 100644 --- a/runtime/src/bpf_map/userspace/per_cpu_array_map.cpp +++ b/runtime/src/bpf_map/userspace/per_cpu_array_map.cpp @@ -69,7 +69,7 @@ long per_cpu_array_map_impl::elem_update(const void *key, const void *value, long per_cpu_array_map_impl::elem_delete(const void *key) { errno = ENOTSUP; - SPDLOG_ERROR("Deleting of per cpu array is not supported"); + SPDLOG_DEBUG("Deleting of per cpu array is not supported"); return -1; }