diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7cba398a..b41694b9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.15)
project(csdiff CXX)
enable_testing()
-# C/C++ sources
+# source code
add_subdirectory(src)
# regression tests
diff --git a/make-srpm.sh b/make-srpm.sh
index 1a696c67..0363a768 100755
--- a/make-srpm.sh
+++ b/make-srpm.sh
@@ -202,6 +202,7 @@ make version.cc
%doc README
%license COPYING
%{_bindir}/csdiff
+%{_bindir}/csfilter-kfp
%{_bindir}/csgrep
%{_bindir}/cshtml
%{_bindir}/cslinker
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e8ea9c18..174de791 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -82,6 +82,11 @@ install(TARGETS
cstrans-df-run
DESTINATION ${CMAKE_INSTALL_BINDIR})
+# install the csfilter-kfp script
+install(PROGRAMS
+ csfilter-kfp
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+
# optionally build statically linked csgrep-static
option(CSGREP_STATIC "Set to ON to build the csgrep-static executable" OFF)
if(CSGREP_STATIC)
diff --git a/src/csfilter-kfp b/src/csfilter-kfp
new file mode 100755
index 00000000..5f919609
--- /dev/null
+++ b/src/csfilter-kfp
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2024 Red Hat, Inc.
+#
+# This file is part of csdiff.
+#
+# csdiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# csdiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with csdiff. If not, see .
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+
+
+# if neither --kfp-dir nor --kfp-git-url is specified, use the known-false-positives RPM pacakge
+DEFAULT_KFP_DIR = "/usr/share/csmock/known-false-positives.d"
+DEFAULT_KFP_JSON = "/usr/share/csmock/known-false-positives.js"
+
+
+def construct_init_cmd(args):
+ # make bash exit on error
+ cmd = 'set -e\n'
+
+ # make bash propagate exit code from piped commands
+ cmd += 'set -o pipefail\n'
+
+ # make bash expand empty globs
+ cmd += 'shopt -s nullglob\n'
+
+ # create a temporary directory with an automatic destructor
+ cmd += 'export td=$(mktemp -d /tmp/tmp-csfilter-kfp.XXXXXXXXXX)\n'
+ cmd += 'trap "rm -fr \'${td}\'" EXIT\n'
+
+ if args.verbose:
+ # run shell in XTRACE mode
+ cmd += 'set -x\n'
+
+ return cmd
+
+
+def construct_git_cmd(kfp_git_url):
+ # split kfp_git_url into the clone URL and (optional) revision
+ m = re.match("^(.*)#([0-9a-f]+)", kfp_git_url)
+ if m:
+ # checkout a specific revision
+ return f"git clone {m.group(1)} ${{td}}/kfp\n" \
+ f"(cd ${{td}}/kfp && git reset -q --hard {m.group(2)})\n"
+ else:
+ # shallow clone of the default branch
+ return f"git clone --depth 1 {kfp_git_url} ${{td}}/kfp\n"
+
+
+def construct_prep_cmd(args):
+ # check which KFP will be used
+ have_kfp_json = False
+ if args.kfp_git_url:
+ # clone git repo
+ cmd = construct_git_cmd(args.kfp_git_url)
+ elif args.kfp_dir:
+ # symlink a directory
+ cmd = f'ln -s "{args.kfp_dir}" "${{td}}/kfp"\n'
+ elif os.path.isfile(DEFAULT_KFP_JSON):
+ # create symlinks to the known-false-positives RPM package installed on the system
+ cmd = f'ln -s "{DEFAULT_KFP_DIR}" "${{td}}/kfp"\n' \
+ f'ln -s "{DEFAULT_KFP_JSON}" "${{td}}/kfp.json"\n'
+ have_kfp_json = True
+ else:
+ raise RuntimeError("no source of KFP specified, please use --kfp-dir or --kfp-git-url" \
+ " (or install the known-false-positives RPM pacakge)")
+
+ if not have_kfp_json:
+ # create all-in-one kfp.json file from files in ${td}/kfp
+ cmd += '(cd "${td}/kfp" && touch empty.err && csgrep --mode=json --remove-duplicates'
+ cmd += ' empty.err */ignore.err */true-positives-ignore.err >"${td}/kfp.json")\n'
+
+ return cmd
+
+
+def construct_path_filter(args):
+ if args.project_nvr is None:
+ # TODO: read project_nvr from scan properties if available
+ return ' cat\n'
+
+ # cut off the `-version-release` or `-version` suffix to obtain package name where `version` can be
+ # a number optionally prefixed by `v` or a full-size SHA1 hash encoded in lowercase as, for example,
+ # in `project-koku-koku-cbe5e5c3355c1e140aa1cca7377aebe09d8d8466`
+ proj = re.sub("-(([v]?[0-9][^-]*)|([0-9a-f]{40}))(-[0-9][^-]*)?$", "", args.project_nvr)
+
+ # validate the resulting project name
+ if not re.match("^[A-Za-z0-9-_]+$", proj):
+ raise RuntimeError(f"invalid project name: {proj}")
+
+ # generate a script that will construct the filter at run-time
+ cmd = f' ep="${{td}}/kfp/{proj}/exclude-paths.txt"\n'
+ cmd += ' re=\n'
+ cmd += ' while read line; do\n'
+ cmd += ' re="${re}|(${line})"\n'
+ cmd += ' done < <(grep -Esv "^(#|\\\\$)" "$ep")\n'
+ cmd += ' if test -n "$re"; then\n'
+ cmd += ' csgrep --mode=json --invert-match --path="${re#|}"\n'
+ cmd += ' else\n'
+ cmd += ' cat\n'
+ cmd += ' fi\n'
+ return cmd
+
+
+def construct_filter_cmd(args):
+ # set shell options and create a temporary diretory ${td}
+ cmd = construct_init_cmd(args)
+
+ # prepare the KFP data from the specified source
+ cmd += construct_prep_cmd(args)
+
+ # read the whole input into a JSON file
+ cmd += 'csgrep --mode=json >"${td}/input.json"\n'
+
+ # define path-based filter
+ path_filter = construct_path_filter(args)
+ cmd += f'path_filter() {{\n{path_filter}}}\n'
+
+ # exclude individual findings
+ cmd += 'csdiff --show-internal "${td}/kfp.json" "${td}/input.json"'
+
+ # exclude paths in the scan results
+ cmd += ' | path_filter >${td}/output.json\n'
+
+ if args.record_excluded:
+ # record excluded findings to the specified file
+ cmd += 'csdiff "${td}/output.json" "${td}/input.json"'
+ cmd += f' >"{args.record_excluded}"\n'
+
+ if not args.json_output:
+ # export plain-text format
+ cmd += 'csgrep "${td}/output.json"\n'
+ return cmd
+
+ # export JSON format
+ cmd += 'csgrep --mode=json "${td}/output.json"'
+
+ # optionally record the source of known-false-positives
+ if args.kfp_dir:
+ cmd += f' --set-scan-prop="known-false-positives-dir:{args.kfp_dir}"'
+ elif args.kfp_git_url:
+ cmd += f' --set-scan-prop="known-false-positives-git-url:{args.kfp_git_url}"'
+ cmd += '\n'
+
+ return cmd
+
+
+def main():
+ # initialize argument parser
+ parser = argparse.ArgumentParser()
+
+ parser.add_argument(
+ "input-file", nargs="?",
+ help="optional name of the input file (standard input is used by default)")
+
+ # source of known-false-positives
+ kfp_source = parser.add_mutually_exclusive_group()
+ kfp_source.add_argument(
+ "--kfp-dir",
+ help="known false positives file")
+ kfp_source.add_argument(
+ "--kfp-git-url",
+ help="known false positives git URL (optionally taking a revision delimited by #)")
+
+ parser.add_argument(
+ "--project-nvr",
+ help="Name-Version-Release (NVR) of the scanned project, used to match path exclusions")
+
+ parser.add_argument(
+ "--record-excluded",
+ help="file to store all excluded findings to")
+
+ parser.add_argument(
+ "--json-output", action="store_true", default=(not os.isatty(sys.stdout.fileno())),
+ help="produce JSON output (default if stdout is not connected to a terminal)")
+
+ parser.add_argument(
+ "-v", "--verbose", action="store_true",
+ help="run shell in XTRACE mode while executing the filtering script")
+
+ parser.add_argument(
+ "-n", "--dry-run", action="store_true",
+ help="do not execute anything, only print the shell script that would be executed")
+
+ # parse command-line arguments
+ args = parser.parse_args()
+
+ # if --kfp-dir is used, check that a directory was given
+ if args.kfp_dir and not os.path.isdir(args.kfp_dir):
+ parser.error(f"'{args.kfp_dir}' given to --kfp-dir is not a directory")
+
+ # construct the command to filter
+ try:
+ cmd = construct_filter_cmd(args)
+ except RuntimeError as e:
+ parser.error(e)
+
+ if args.dry_run:
+ # print the command and exit successfully
+ print(cmd)
+ sys.exit(0)
+
+ # run the command
+ try:
+ subprocess.run(cmd, shell=True, check=True)
+ except subprocess.CalledProcessError as e:
+ sys.exit(e.returncode)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 41a12f9e..e68a5447 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -29,8 +29,8 @@ set(jsfilter "sed -e 's|\"version\": \"[^\"]*\"|\"version\": \"\"|g'")
macro(add_test_wrap test_name cmd)
add_test("${test_name}" bash -c "${cmd}")
- set_tests_properties(${test_name} PROPERTIES
- ENVIRONMENT "PROJECT_ROOT=${CMAKE_SOURCE_DIR}")
+ set_tests_properties(${test_name} PROPERTIES ENVIRONMENT
+ "PATH=${CMAKE_BINARY_DIR}/src:$ENV{PATH};PROJECT_ROOT=${CMAKE_SOURCE_DIR}")
set_tests_properties(${test_name} PROPERTIES COST ${test_cost})
math(EXPR test_cost "${test_cost} - 1")
@@ -45,6 +45,7 @@ endmacro()
set(test_cost 1048576)
add_subdirectory(csdiff)
+add_subdirectory(csfilter-kfp)
add_subdirectory(csgrep)
add_subdirectory(cshtml)
add_subdirectory(cslinker)
diff --git a/tests/csfilter-kfp/0001-args.txt b/tests/csfilter-kfp/0001-args.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/csfilter-kfp/0001-stdout.txt b/tests/csfilter-kfp/0001-stdout.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/csfilter-kfp/CMakeLists.txt b/tests/csfilter-kfp/CMakeLists.txt
new file mode 100644
index 00000000..cd952334
--- /dev/null
+++ b/tests/csfilter-kfp/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright (C) 2024 Red Hat, Inc.
+#
+# This file is part of csdiff.
+#
+# csdiff is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# csdiff is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with csdiff. If not, see .
+
+# a generic template for cstrans-df-run tests
+macro(test_csfilter_kfp tst)
+ set(test_data_prefix "${CMAKE_CURRENT_SOURCE_DIR}/${tst}")
+ set(cmd "${CMAKE_SOURCE_DIR}/src/csfilter-kfp")
+ file(READ ${test_data_prefix}-args.txt args)
+ string(REPLACE "\n" "" args "${args}")
+ set(cmd "${cmd} ${args} <${test_data_prefix}-stdin.txt")
+ set(cmd "${cmd} | ${diffcmd} ${test_data_prefix}-stdout.txt -")
+ add_test_wrap("csfilter-kfp-${tst}" "${cmd}")
+endmacro()
+
+# csfilter-kpf tests
+test_csfilter_kfp(0001)