From a0bbf4cb415083e1007e1271f66c0526e1b674b3 Mon Sep 17 00:00:00 2001 From: Luis Augenstein Date: Thu, 4 Dec 2025 10:23:37 +0100 Subject: [PATCH] added strace kernel build script Signed-off-by: Luis Augenstein --- README.md | 1 + sbom_analysis/strace_kernel_build/.gitignore | 9 ++ sbom_analysis/strace_kernel_build/README.md | 29 ++++++ .../strace_kernel_build.sh | 94 +++++++++++++++++++ 4 files changed, 133 insertions(+) create mode 100644 sbom_analysis/strace_kernel_build/.gitignore create mode 100644 sbom_analysis/strace_kernel_build/README.md create mode 100755 sbom_analysis/strace_kernel_build/strace_kernel_build.sh diff --git a/README.md b/README.md index 3e4d2ab..6b8ef2d 100644 --- a/README.md +++ b/README.md @@ -291,6 +291,7 @@ flowchart TD - `sbom_analysis/` - Additional scripts for analyzing the outputs produced by the main script. - [sbom_analysis/cmd_graph_based_kernel_build/](sbom_analysis/cmd_graph_based_kernel_build/README.md) - Validation of cmd graph completeness by rebuilding the linux kernel only with files referenced in the cmd graph. - [sbom_analysis/cmd_graph_visualization/](sbom_analysis/cmd_graph_visualization/README.md) - Interactive visualization of the cmd graph + - [sbom_analysis/strace_kernel_build/](sbom_analysis/strace_kernel_build/README.md) - build kernel with strace and compare touched files with files found with cmd graph - `testdata_generation/` - Describes how the precompiled kernel builds in [KernelSbom-TestData](https://fileshare.tngtech.com/library/98e7e6f8-bffe-4a55-a8d2-817d4f3e51e8/KernelSbom-TestData/) were generated. The main contribution of this repository is the content of the `sbom` directory which eventually should be moved into the `linux/tools/` directory in the official [linux](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git) source tree. diff --git a/sbom_analysis/strace_kernel_build/.gitignore b/sbom_analysis/strace_kernel_build/.gitignore new file mode 100644 index 0000000..b83b6cf --- /dev/null +++ b/sbom_analysis/strace_kernel_build/.gitignore @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# SPDX-FileCopyrightText: 2025 TNG Technology Consulting GmbH + +strace.log +files_touched.txt +source_files_touched.txt +filtered_source_files_touched.txt +strace_only.txt +sbom_used_files_only.txt diff --git a/sbom_analysis/strace_kernel_build/README.md b/sbom_analysis/strace_kernel_build/README.md new file mode 100644 index 0000000..289ebab --- /dev/null +++ b/sbom_analysis/strace_kernel_build/README.md @@ -0,0 +1,29 @@ + + +# Strace Kernel Build + +The script `strace-kernel_build.sh` runs a Linux kernel build with `strace` to track all files accessed during the build. These files provide an upper bound on what should be captured by the `cmd graph`. + +## Usage + +1. Prepare a clean kernel source tree with a `.config` in a dedicated object tree, for example: + ```bash + make tinyconfig O=kernel_build + ``` + Ensure all dependencies are installed so that `make O=kernel_build` succeeds. + +2. Run the script, providing the source tree (relative to the script), the object tree (relative to the source tree) and the path to the `sbom.used-files.txt` generated by the main script: + ```bash + ./strace-kernel_build.sh "../../../linux" "kernel_build" "../../sbom.used-files.txt" + ``` + + The script generates the following output files (always placed next to the script): + - `strace.log` — full raw strace output of the build + - `files_touched.txt` — all files the build attempted to open + - `source_files_touched.txt` — subset of files inside the source tree + - `filtered_source_files-_touched.txt` — final source files, excluding files not tracked by the cmd graph (`Makefile*`, `Kbuild*`, `Kconfig*`, `tools/*`, `scripts/*`) + - `strace_only.txt` — list of files found with strace but not with the cmd graph. This list should be empty. If not it should be investigated if these files should be included in the cmd graph. For example try removing all these files in the source tree and check if the kernel can still be built successfully. + - `sbom_used_files_only.txt` — list of files found with the cmd graph but not with strace. This list should be empty. If it is not empty the files within should either be excluded from the cmd graph or it should be investigated why strace did not find them. diff --git a/sbom_analysis/strace_kernel_build/strace_kernel_build.sh b/sbom_analysis/strace_kernel_build/strace_kernel_build.sh new file mode 100755 index 0000000..3e6393a --- /dev/null +++ b/sbom_analysis/strace_kernel_build/strace_kernel_build.sh @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0-only +# SPDX-FileCopyrightText: 2025 TNG Technology Consulting GmbH +#!/usr/bin/env bash +set -euo pipefail + +SRC_TREE="../../../linux.v6.17.tinyconfig.x86" # relative to script root +OBJ_TREE="kernel_build" # relative to src tree +SBOM_USED_FILES="../../sbom.used-files.txt" # relative to script root + +[ $# -ge 1 ] && SRC_TREE="$1" +[ $# -ge 2 ] && OBJ_TREE="$2" + +STRACE_LOG="strace.log" # Full strace log capturing all file access (open/openat) during the kernel build. +FILES_TOUCHED="files_touched.txt" # Unique list of all files the kernel build process attempted to open, extracted from the strace log. +SOURCE_FILES="source_files_touched.txt" # Subset of FILES_TOUCHED that are actual source files inside the source tree +FILTERED_SOURCE_FILES="filtered_source_files_touched.txt" # Subset of SOURCE_FILES, excluding build system files not tracked by the cmd graph +STRACE_ONLY="strace_only.txt" # Subset of FILTERED_SOURCE_FILES, excluding the files that are found in the cmd graph +SBOM_USED_FILES_ONLY="sbom_used_files_only.txt" # Subset of SBOM_USED_FILES, excluding the files that are found in strace + +# Absolute paths +SCRIPT_DIR=$(dirname "$(realpath "$0")") +SRC_TREE_ABSOLUTE=$(realpath "$SCRIPT_DIR/$SRC_TREE") +OBJ_TREE_ABSOLUTE=$(realpath "$SRC_TREE_ABSOLUTE/$OBJ_TREE") +SBOM_USED_FILES_ABSOLUTE=$(realpath "$SCRIPT_DIR/$SBOM_USED_FILES") + +STRACE_LOG_ABSOLUTE="$SCRIPT_DIR/$STRACE_LOG" +FILES_TOUCHED_ABSOLUTE="$SCRIPT_DIR/$FILES_TOUCHED" +SOURCE_FILES_ABSOLUTE="$SCRIPT_DIR/$SOURCE_FILES" +FILTERED_SOURCE_FILES_ABSOLUTE="$SCRIPT_DIR/$FILTERED_SOURCE_FILES" +STRACE_ONLY_ABSOLUTE="$SCRIPT_DIR/$STRACE_ONLY" +SBOM_USED_FILES_ONLY_ABSOLUTE="$SCRIPT_DIR/$SBOM_USED_FILES_ONLY" + +# Run the kernel build with strace +cd "$SRC_TREE_ABSOLUTE" +strace -f -e trace=file -o "$STRACE_LOG_ABSOLUTE" \ + make -j"$(nproc)" O="$OBJ_TREE_ABSOLUTE" + +# Extract filenames from strace log +awk -F\" '/open(at)?\(/ {print $2}' "$STRACE_LOG_ABSOLUTE" \ + | sort -u \ + > "$FILES_TOUCHED_ABSOLUTE" + +echo "Files touched: $(wc -l < "$FILES_TOUCHED_ABSOLUTE")" + + +# Filter source files +while read -r f; do + # file paths are either relative to the OBJ_TREE or absolute. Convert all paths to absolute ones. + if [[ "$f" != /* ]]; then + f="$OBJ_TREE_ABSOLUTE/$f" + fi + + if [[ ! -f "$f" ]]; then + continue + fi + + f=$(realpath "$f") + + # if $f lies in the src tree and not in the output tree then it is a valid source file + if [[ "$f" == "$SRC_TREE_ABSOLUTE/"* && "$f" != "$OBJ_TREE_ABSOLUTE/"* ]]; then + echo "$f" + fi +done < "$FILES_TOUCHED_ABSOLUTE" >| "$SOURCE_FILES_ABSOLUTE" +sort -u "$SOURCE_FILES_ABSOLUTE" -o "$SOURCE_FILES_ABSOLUTE" +echo "Source files touched: $(wc -l < "$SOURCE_FILES_ABSOLUTE")" + + +# Filter out files not considered in cmd graph +while read -r f; do + basename=$(basename "$f") + if [[ "$basename" == Kbuild* || "$basename" == Makefile* || "$basename" == Kconfig* ]]; then + continue + fi + + f="${f#$SRC_TREE_ABSOLUTE/}" + + if [[ "$f" == "tools/"* || "$f" == "scripts/"* || "$f" == ".git/"* ]]; then + continue + fi + + echo "$f" +done < "$SOURCE_FILES_ABSOLUTE" >| "$FILTERED_SOURCE_FILES_ABSOLUTE" +sort -u $FILTERED_SOURCE_FILES_ABSOLUTE -o $FILTERED_SOURCE_FILES_ABSOLUTE +echo "Filtered source files touched: $(wc -l < "$FILTERED_SOURCE_FILES_ABSOLUTE")" + + +# Compare strace files with sbom.used-files.txt generated from the cmd graph +sorted_sbom_used_files=$(sort "$SBOM_USED_FILES_ABSOLUTE") + +comm -23 "$FILTERED_SOURCE_FILES_ABSOLUTE" <(sort "$SBOM_USED_FILES_ABSOLUTE") >| "$STRACE_ONLY_ABSOLUTE" +comm -13 "$FILTERED_SOURCE_FILES_ABSOLUTE" <(sort "$SBOM_USED_FILES_ABSOLUTE") >| "$SBOM_USED_FILES_ONLY_ABSOLUTE" + +echo "Files in strace but not in cmd graph: $(wc -l < $STRACE_ONLY_ABSOLUTE)" +echo "Files in sbom.used-files but not in strace: $(wc -l < $SBOM_USED_FILES_ONLY_ABSOLUTE)"