Skip to content

Commit

Permalink
csfilter-kfp: script to filter known false positives
Browse files Browse the repository at this point in the history
  • Loading branch information
kdudka committed Aug 28, 2024
1 parent 007e64e commit ec2c96b
Show file tree
Hide file tree
Showing 17 changed files with 51,590 additions and 4 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.15)
project(csdiff CXX)
enable_testing()

# C/C++ sources
# source code
add_subdirectory(src)

# regression tests
Expand Down
1 change: 1 addition & 0 deletions make-srpm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ make version.cc
%doc README
%license COPYING
%{_bindir}/csdiff
%{_bindir}/csfilter-kfp
%{_bindir}/csgrep
%{_bindir}/cshtml
%{_bindir}/cslinker
Expand Down
5 changes: 5 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ install(TARGETS
cstrans-df-run
DESTINATION ${CMAKE_INSTALL_BINDIR})

# install the csfilter-kfp script
install(PROGRAMS
csfilter-kfp
DESTINATION ${CMAKE_INSTALL_BINDIR})

# optionally build statically linked csgrep-static
option(CSGREP_STATIC "Set to ON to build the csgrep-static executable" OFF)
if(CSGREP_STATIC)
Expand Down
230 changes: 230 additions & 0 deletions src/csfilter-kfp
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
#!/usr/bin/env python3

# Copyright (C) 2024 Red Hat, Inc.
#
# This file is part of csdiff.
#
# csdiff is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# csdiff is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with csdiff. If not, see <http://www.gnu.org/licenses/>.

import argparse
import os
import re
import subprocess
import sys


# if neither --kfp-dir nor --kfp-git-url is specified, use the known-false-positives RPM pacakge
DEFAULT_KFP_DIR = "/usr/share/csmock/known-false-positives.d"
DEFAULT_KFP_JSON = "/usr/share/csmock/known-false-positives.js"


def construct_init_cmd(args):
# make bash exit on error
cmd = 'set -e\n'

# make bash propagate exit code from piped commands
cmd += 'set -o pipefail\n'

# make bash expand empty globs
cmd += 'shopt -s nullglob\n'

# create a temporary directory with an automatic destructor
cmd += 'export td=$(mktemp -d /tmp/tmp-csfilter-kfp.XXXXXXXXXX)\n'
cmd += 'trap "rm -fr \'${td}\'" EXIT\n'

if args.verbose:
# run shell in XTRACE mode
cmd += 'set -x\n'

return cmd


def construct_git_cmd(kfp_git_url):
# split kfp_git_url into the clone URL and (optional) revision
m = re.match("^(.*)#([0-9a-f]+)", kfp_git_url)
if m:
# checkout a specific revision
return f"git clone {m.group(1)} ${{td}}/kfp\n" \
f"(cd ${{td}}/kfp && git reset -q --hard {m.group(2)})\n"
else:
# shallow clone of the default branch
return f"git clone --depth 1 {kfp_git_url} ${{td}}/kfp\n"


def construct_prep_cmd(args):
# check which KFP will be used
have_kfp_json = False
if args.kfp_git_url:
# clone git repo
cmd = construct_git_cmd(args.kfp_git_url)
elif args.kfp_dir:
# symlink an absolute path to the directory
kfp_abs = os.path.realpath(args.kfp_dir)
cmd = f'ln -s "{kfp_abs}" "${{td}}/kfp"\n'
elif os.path.isfile(DEFAULT_KFP_JSON):
# create symlinks to the known-false-positives RPM package installed on the system
cmd = f'ln -s "{DEFAULT_KFP_DIR}" "${{td}}/kfp"\n' \
f'ln -s "{DEFAULT_KFP_JSON}" "${{td}}/kfp.json"\n'
have_kfp_json = True
else:
raise RuntimeError("no source of KFP specified, please use --kfp-dir or --kfp-git-url" \
" (or install the known-false-positives RPM pacakge)")

if not have_kfp_json:
# create all-in-one kfp.json file from files in ${td}/kfp
cmd += 'touch "${td}/empty.err"\n'
cmd += '(cd "${td}/kfp" && csgrep --mode=json --remove-duplicates ${td}/empty.err'
cmd += ' */ignore.err */true-positives-ignore.err >"${td}/kfp.json")\n'

return cmd


def construct_path_filter(args):
if args.project_nvr is None:
# TODO: read project_nvr from scan properties if available
return ' cat\n'

# cut off the `-version-release` or `-version` suffix to obtain package name where `version` can be
# a number optionally prefixed by `v` or a full-size SHA1 hash encoded in lowercase as, for example,
# in `project-koku-koku-cbe5e5c3355c1e140aa1cca7377aebe09d8d8466`
proj = re.sub("-(([v]?[0-9][^-]*)|([0-9a-f]{40}))(-[0-9][^-]*)?$", "", args.project_nvr)

# validate the resulting project name
if not re.match("^[A-Za-z0-9-_]+$", proj):
raise RuntimeError(f"invalid project name: {proj}")

# generate a script that will construct the filter at run-time
cmd = f' ep="${{td}}/kfp/{proj}/exclude-paths.txt"\n'
cmd += ' re=\n'
cmd += ' while read line; do\n'
cmd += ' re="${re}|(${line})"\n'
cmd += ' done < <(grep -Esv "^(#|\\\\$)" "$ep")\n'
cmd += ' if test -n "$re"; then\n'
cmd += ' csgrep --mode=json --invert-match --path="${re#|}"\n'
cmd += ' else\n'
cmd += ' cat\n'
cmd += ' fi\n'
return cmd


def construct_filter_cmd(args):
# set shell options and create a temporary diretory ${td}
cmd = construct_init_cmd(args)

# prepare the KFP data from the specified source
cmd += construct_prep_cmd(args)

# read the whole input into a JSON file
cmd += 'csgrep --mode=json'
if args.input_file:
cmd += f' {args.input_file}'
cmd += ' >"${td}/input.json"\n'

# define path-based filter
path_filter = construct_path_filter(args)
cmd += f'path_filter() {{\n{path_filter}}}\n'

# exclude individual findings
cmd += 'csdiff --show-internal "${td}/kfp.json" "${td}/input.json"'

# exclude paths in the scan results
cmd += ' | path_filter >${td}/output.json\n'

if args.record_excluded:
# record excluded findings to the specified file
cmd += 'csdiff "${td}/output.json" "${td}/input.json"'
cmd += f' >"{args.record_excluded}"\n'

if not args.json_output:
# export plain-text format
cmd += 'csgrep "${td}/output.json"\n'
return cmd

# export JSON format
cmd += 'csgrep --mode=json "${td}/output.json"'

# optionally record the source of known-false-positives
if args.kfp_dir:
cmd += f' --set-scan-prop="known-false-positives-dir:{args.kfp_dir}"'
elif args.kfp_git_url:
cmd += f' --set-scan-prop="known-false-positives-git-url:{args.kfp_git_url}"'
cmd += '\n'

return cmd


def main():
# initialize argument parser
parser = argparse.ArgumentParser()

parser.add_argument(
"input_file", nargs="?",
help="optional name of the input file (standard input is used by default)")

# source of known-false-positives
kfp_source = parser.add_mutually_exclusive_group()
kfp_source.add_argument(
"--kfp-dir",
help="known false positives file")
kfp_source.add_argument(
"--kfp-git-url",
help="known false positives git URL (optionally taking a revision delimited by #)")

parser.add_argument(
"--project-nvr",
help="Name-Version-Release (NVR) of the scanned project, used to match path exclusions")

parser.add_argument(
"--record-excluded",
help="file to store all excluded findings to")

parser.add_argument(
"--json-output", action="store_true", default=(not os.isatty(sys.stdout.fileno())),
help="produce JSON output (default if stdout is not connected to a terminal)")

parser.add_argument(
"-v", "--verbose", action="store_true",
help="run shell in XTRACE mode while executing the filtering script")

parser.add_argument(
"-n", "--dry-run", action="store_true",
help="do not execute anything, only print the shell script that would be executed")

# parse command-line arguments
args = parser.parse_args()

# if --kfp-dir is used, check that a directory was given
if args.kfp_dir and not os.path.isdir(args.kfp_dir):
parser.error(f"'{args.kfp_dir}' given to --kfp-dir is not a directory")

# construct the command to filter
try:
cmd = construct_filter_cmd(args)
except RuntimeError as e:
parser.error(e)

if args.dry_run:
# print the command and exit successfully
print(cmd, end='')
sys.exit(0)

# run the command
try:
subprocess.run(cmd, shell=True, check=True, executable='/bin/bash')
except subprocess.CalledProcessError as e:
sys.exit(e.returncode)


if __name__ == "__main__":
main()
6 changes: 4 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ set(diffcmd "diff -up")

# eliminate csdiff's version string in the output
set(jsfilter "sed -e 's|\"version\": \"[^\"]*\"|\"version\": \"\"|g'")
set(jsfilter "${jsfilter} -e 's|${CMAKE_SOURCE_DIR}/tests/csfilter-kfp/|\$PROJECT_ROOT/tests/csfilter-kfp/|'")

macro(add_test_wrap test_name cmd)
add_test("${test_name}" bash -c "${cmd}")
set_tests_properties(${test_name} PROPERTIES
ENVIRONMENT "PROJECT_ROOT=${CMAKE_SOURCE_DIR}")
set_tests_properties(${test_name} PROPERTIES ENVIRONMENT
"PATH=${CMAKE_BINARY_DIR}/src:$ENV{PATH};PROJECT_ROOT=${CMAKE_SOURCE_DIR}")

set_tests_properties(${test_name} PROPERTIES COST ${test_cost})
math(EXPR test_cost "${test_cost} - 1")
Expand All @@ -45,6 +46,7 @@ endmacro()
set(test_cost 1048576)

add_subdirectory(csdiff)
add_subdirectory(csfilter-kfp)
add_subdirectory(csgrep)
add_subdirectory(cshtml)
add_subdirectory(cslinker)
Expand Down
1 change: 1 addition & 0 deletions tests/csfilter-kfp/0001-args.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--kfp-dir "$PROJECT_ROOT/tests/csfilter-kfp/0001-kfp" --project-nvr project-koku-koku-cbe5e5c3355c1e140aa1cca7377aebe09d8d8466
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.*/test/.*
.*/tests/.*
.*testing/.*
.*/[^/]*test_[^/]*\.py$
.*docker-compose.*
34 changes: 34 additions & 0 deletions tests/csfilter-kfp/0001-kfp/project-koku-koku/ignore.err
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
Error: SNYK_CODE_WARNING (CWE-89):
project-koku-koku-5c7647f/koku/masu/api/db_performance/dbp_views.py:483:24: error[python/Sqli]: Unsanitized input from the HTTP request body flows into execute, where it is used in an SQL query. This may result in an SQL Injection vulnerability.
# 481| with DBPerformanceStats(get_identity_username(request), CONFIGURATOR) as dbp:
# 482| try:
# 483|-> data = dbp.explain_sql(query_params["sql_statement"])
# 484| except ProgrammingError as e:
# 485| data = {"query_plan": f"{type(e).__name__}: {str(e)}"}
# dbp.explain_sql parses and sanitizes the inputted query params. The query is then run through a read-only db connection.

Error: IDENTIFIER_TYPO (CWE-688):
project-koku-koku-30de2cf/koku/api/settings/tags/mapping/utils.py:96: identifier_typo: Using "provider__uuid" appears to be a typo:
* Identifier "provider__uuid" is only known to be referenced here, or in copies of this code.
* Identifier "provider_uuid" is referenced elsewhere at least 216 times.
project-koku-koku-30de2cf/koku/api/settings/tags/mapping/utils.py:96: remediation: Should identifier "provider__uuid" be replaced by "provider_uuid"?
project-koku-koku-30de2cf/dev/scripts/trino_query.py:11: identifier_use: Example 1: Using identifier "provider_uuid" (2 total uses in this function).
project-koku-koku-30de2cf/koku/api/provider/models.py:224: identifier_use: Example 2: Using identifier "provider_uuid".
project-koku-koku-30de2cf/koku/api/report/ocp/query_handler.py:147: identifier_use: Example 3: Using identifier "provider_uuid".
project-koku-koku-30de2cf/koku/api/test_utils.py:303: identifier_use: Example 4: Using identifier "provider_uuid".
project-koku-koku-30de2cf/koku/cost_models/cost_model_manager.py:123: identifier_use: Example 5: Using identifier "provider_uuid".
# 94| provider_uuids = (
# 95| OCPUsageReportPeriod.objects.filter(cluster_id__in=clusters, report_period_start=start_date)
# 96|-> .values_list("provider__uuid", flat=True)
# 97| .distinct()
# 98| )
# This is Django syntax to query through a foreign key.

Error: SNYK_CODE_WARNING (CWE-89):
project-koku-koku-cf77b7a/koku/masu/api/trino.py:56:13: error[python/Sqli]: Unsanitized input from the HTTP request body flows into execute, where it is used in an SQL query. This may result in an SQL Injection vulnerability.
# 54| ) as conn:
# 55| cur = conn.cursor()
# 56|-> cur.execute(query)
# 57| cols = [des[0] for des in cur.description]
# 58| rows = cur.fetchall()
# this trino connection has been converted to a READONLY connection
Loading

0 comments on commit ec2c96b

Please sign in to comment.