From edc46973cf4fe298abd175f5687c696391b47231 Mon Sep 17 00:00:00 2001 From: Joe Chacko <143064+joe-chacko@users.noreply.github.com> Date: Fri, 31 May 2024 18:36:12 +0100 Subject: [PATCH 1/4] OPS: make copyright check less chatty --- .github/workflows/gradle.yml | 2 +- scripts/check-copyright.sh | 229 ++++++++++++++++++++--------------- 2 files changed, 132 insertions(+), 99 deletions(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index a2c74cd0..3effeb31 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -41,7 +41,7 @@ jobs: if: github.event_name == 'pull_request' run: | git fetch --depth=1 origin ${{github.base_ref}} - scripts/check-copyright.sh origin/${{github.base_ref}} + scripts/check-copyright.sh -- origin/${{github.base_ref}} - name: Build with Gradle run: ./gradlew build - name: Upload test reports diff --git a/scripts/check-copyright.sh b/scripts/check-copyright.sh index a58ac167..4b18f411 100755 --- a/scripts/check-copyright.sh +++ b/scripts/check-copyright.sh @@ -16,103 +16,136 @@ # # SPDX-License-Identifier: Apache-2.0 -# Stop on first unexpected error -set -e - -die() { - echo "$@" >&2 - exit 1 -} - -# Check that git is a command -command -v git > /dev/null 2>&1 || die "Can not find 'git' command." - -# Check that base ref has been specified -[ -n "$1" ] || die "Missing required first parameter: git-base-ref" -BASE="$1" - -# Check it is a valid ref -git rev-parse --quiet --verify "$BASE" > /dev/null || die "Specified git base ref '$BASE' is not a valid ref in this repository." - -# git uses the following one-character change status codes -# A - Added -# C - Copied -# D - Deleted -# M - Modified -# R - Renamed - -# Keep track of how many files failed the copyright check so we can find them all and return 0 if there were none -FAILED=0 - -# Copy stdout to another file descriptor for logging -exec 3>&1 -log() { echo>&3 "$@"; } - -# Look for unsupported changes with Broken (B), changed (T), Unmerged (U), or Unknown (X) status -BAD_FILES="$(git diff --name-status --diff-filter=BTUX "$BASE")" - -[ -z "$BAD_FILES" ] || { - echo "‼️ This script ($0) may need fixing to deal with more types of change." >&2 - echo "$BAD_FILES" | sed 's/^/🤯 Unsupported change type: /' - FAILED=$(( FAILED + $(echo "$BAD_FILES"|wc -l) )) -} - -# Print deleted files, just for completeness -git diff --name-only --diff-filter=D "$BASE" | sed 's/^/🫥 Ignoring deleted file: /' - - -# Function to print each file from stdin to stdout unless it has good copyright -badCopyrightFilter() { - while read filePath; do - [ -f "$filePath" ] || die "Cannot check copyright in non-existent file: '$filePath'" - grep -Eq "SPDX-License-Identifier: Apache-2.0" "$filePath" || { - log "👿 License identifier not found: $filePath" - echo "$filePath" - continue - } - yearModified=`git log -1 --pretty=format:%cd --date=format:%Y -- "$filePath"` - grep -Eq "Copyright $yearModified IBM Corporation and" "$filePath" && log "😅 Copyright OK: $filePath" || { - existingModifiedYear="$(grep -Eo 'Copyright [0-9]{4} IBM Corporation and' "$filePath" | cut -d ' ' -f 2 )" - case "$existingModifiedYear" in - "$yearModified") continue ;; - "") log "🤬 No copyright year in '$filePath': expected '$yearModified'." ;; - *) log "😡 Wrong copyright year in '$filePath': expected '$yearModified' but found '$existingModifiedYear'." ;; - esac - echo "$filePath" - } - done -} - -echo "Checking added and modified files..." -FAILED=$((FAILED + $(git diff --name-only --find-copies-harder --diff-filter=AM "$BASE" | badCopyrightFilter | wc -l))) - - -# Renamed (R) and copied (C) files are more complicated. -# They can report as less than 100% identical even when their contents are the same. -# This is apparently due to metadata changes. Shrug. -# So check whether the contents have changed significantly. - -# Define how to compare a file against its origin for significant content changes. -# Succeed if there are differences, and print the filename. -isReallyDifferent() { ! git diff --ignore-all-space --quiet "$1" "$2" 2>/dev/null && echo "$2"; } - -# Read status, source, and destination as separate records (lines). -# Check the status is R... or C... (otherwise it was parsed incorrectly). -# Then compare the source and dest for significant differences. -# Lastly, if they were different, check them for copyright. -badCopyrightFilter2() { - while read status && read src && read dst - do - case "$status" in - R*) isReallyDifferent "$BASE:$src" "$dst" || log "🫥 Ignoring renamed file: $src -> $dst" ;; - C*) isReallyDifferent "$BASE:$src" "$dst" || log "🫥 Ignoring copied file: $src -> $dst" ;; - *) die "Unexpected status while parsing git diff output: status='$status' src='$src' dst='$dst'" ;; +# Enforce top-level subshell to avoid leaking environment changes (in case script is sourced) +( + # Stop on first unexpected error + set -e + + usage() { + echo "usage:\t$0 [-q|--quiet|-t|--terse|-v|--verbose] git-base-ref" + echo "\t-h,--help\tprints this usage info" + echo "\t-t,--terse\tsuppresses all informational outpout" + echo "\t-q,--quiet\tsuppresses all non-error outpout" + echo "\t-v,--verbose\tenables verbose output" + } + + # Copy stdout and stderr to other file descriptors for logging and error reporting + exec 3>/dev/null 4>&1 5>&1 6>&2 + # Define semantic functions to echo or cat messages to the various file descriptors + echocat() { { [ $# -gt 0 ] && echo "$@"; } || cat; } + log() { echocat "$@" >&3; } + inf() { echocat "$@" >&4; } + wrn() { echocat "$@" >&5; } + err() { echocat "$@" >&6; } + # Define a Perl-style "die" function to emit an error message and exit with an error code + die() { + err "$@" + exit 1 + } + + # Parse script options + while [ $# -gt 0 ]; do + case "$1" in + # -t disables logging and info + -h|--help) usage; exit 0;; + # -t disables logging and info + -t|--terse) exec 3>/dev/null 4>/dev/null 5>&2; shift;; + # -q disables logging, info, and warning + -q|--quiet) exec 3>/dev/null 4>/dev/null 5>/dev/null; shift;; + # -v enables logging + -v|--verbose) exec 3>&1 4>&1 5>&2; shift;; + # -- indicates the explicit end of options, so consume it and exit the loop + --) shift; break;; + # any other option-like string is an error + -*) err "$0: unknown option '$1'"; usage | die;; + # any non-option-like string indicates the end of the options + *) break;; esac - done | badCopyrightFilter -} - -echo "Checking renamed and copied files..." -OUTPUT="$(git diff --name-status --find-copies-harder --diff-filter=CR -z "$BASE" 2>/dev/null | tr '\0' '\n')" -FAILED=$((FAILED + $(echo "$OUTPUT"| badCopyrightFilter2 | wc -l))) + done -exit $FAILED + # Check that git is a command + command -v git > /dev/null 2>&1 || die "Can not find 'git' command." + + # Check that base ref has been specified + [ -n "$1" ] || die "Missing required first parameter: git-base-ref" + BASE="$1" + + # Check it is a valid ref + git rev-parse --quiet --verify "$BASE" > /dev/null || die "Specified git base ref '$BASE' is not a valid ref in this repository." + + # Git uses the following one-character change status codes + # A - Added + # C - Copied + # D - Deleted + # M - Modified + # R - Renamed + + # Keep track of how many files failed the copyright check so we can find them all and return 0 if there were none + FAILED=0 + + # Look for unsupported changes with Broken (B), changed (T), Unmerged (U), or Unknown (X) status + BAD_FILES="$(git diff --name-status --diff-filter=BTUX "$BASE")" + + [ -z "$BAD_FILES" ] || { + err "‼️ This script ($0) may need fixing to deal with more types of change." + echo "$BAD_FILES" | sed 's/^/🤯 Unsupported change type: /' | err + FAILED=$(( FAILED + $(echo "$BAD_FILES"|wc -l) )) + } + + # Log deleted files + git diff --name-only --diff-filter=D "$BASE" | sed 's/^/🫥 Ignoring deleted file: /' | log + + # Function to print each file from stdin to stdout unless it has good copyright + badCopyrightFilter() { + while read filePath; do + [ -f "$filePath" ] || die "Cannot check copyright in non-existent file: '$filePath'" + grep -Eq "SPDX-License-Identifier: Apache-2.0" "$filePath" || { + wrn "👿 License identifier not found: $filePath" + echo "$filePath" + continue + } + yearModified=`git log -1 --pretty=format:%cd --date=format:%Y -- "$filePath"` + grep -Eq "Copyright $yearModified IBM Corporation and" "$filePath" && inf "😅 Copyright OK: $filePath" || { + existingModifiedYear="$(grep -Eo 'Copyright [0-9]{4} IBM Corporation and' "$filePath" | cut -d ' ' -f 2 )" + case "$existingModifiedYear" in + "$yearModified") continue ;; + "") wrn "🤬 No copyright year in '$filePath': expected '$yearModified'." ;; + *) wrn "😡 Wrong copyright year in '$filePath': expected '$yearModified' but found '$existingModifiedYear'." ;; + esac + echo "$filePath" + } + done + } + + inf "Checking added and modified files..." + FAILED=$((FAILED + $(git diff --name-only --find-copies-harder --diff-filter=AM "$BASE" | badCopyrightFilter | wc -l))) + + # Renamed (R) and copied (C) files are more complicated. + # They can report as less than 100% identical even when their contents are the same. + # This is apparently due to metadata changes. Shrug. + # So check whether the contents have changed significantly. + + # Define how to compare a file against its origin for significant content changes. + # Succeed if there are differences, and print the filename. + isReallyDifferent() { ! git diff --ignore-all-space --quiet "$1" "$2" 2>/dev/null && echo "$2"; } + + # Read status, source, and destination as separate records (lines). + # Check the status is R... or C... (otherwise it was parsed incorrectly). + # Then compare the source and dest for significant differences. + # Lastly, if they were different, check them for copyright. + badCopyrightFilter2() { + while read status && read src && read dst + do + case "$status" in + R*) isReallyDifferent "$BASE:$src" "$dst" || log "🫥 Ignoring renamed file: $src -> $dst" ;; + C*) isReallyDifferent "$BASE:$src" "$dst" || log "🫥 Ignoring copied file: $src -> $dst" ;; + *) die "Unexpected status while parsing git diff output: status='$status' src='$src' dst='$dst'" ;; + esac + done | badCopyrightFilter + } + + inf "Checking renamed and copied files..." + OUTPUT="$(git diff --name-status --find-copies-harder --diff-filter=CR -z "$BASE" 2>/dev/null | tr '\0' '\n')" + FAILED=$((FAILED + $(echo "$OUTPUT"| badCopyrightFilter2 | wc -l))) + exit $FAILED +) From 8ea70693649489f7788365d6dd5b31743190dbd7 Mon Sep 17 00:00:00 2001 From: Joe Chacko <143064+joe-chacko@users.noreply.github.com> Date: Fri, 31 May 2024 20:07:52 +0100 Subject: [PATCH 2/4] OPS: avoid git diff if 100% similar --- scripts/check-copyright.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/check-copyright.sh b/scripts/check-copyright.sh index 4b18f411..94cae7a4 100755 --- a/scripts/check-copyright.sh +++ b/scripts/check-copyright.sh @@ -137,6 +137,8 @@ while read status && read src && read dst do case "$status" in + R100) log "🫥 Ignoring renamed file: $src -> $dst" ;; + C100) log "🫥 Ignoring copied file: $src -> $dst" ;; R*) isReallyDifferent "$BASE:$src" "$dst" || log "🫥 Ignoring renamed file: $src -> $dst" ;; C*) isReallyDifferent "$BASE:$src" "$dst" || log "🫥 Ignoring copied file: $src -> $dst" ;; *) die "Unexpected status while parsing git diff output: status='$status' src='$src' dst='$dst'" ;; From a6555ef5cafaccf5726fb1d5b3dc035ad6cd01fc Mon Sep 17 00:00:00 2001 From: Joe Chacko <143064+joe-chacko@users.noreply.github.com> Date: Fri, 31 May 2024 23:40:31 +0100 Subject: [PATCH 3/4] OPS: print only files in check-copyright.sh -t --- scripts/check-copyright.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/check-copyright.sh b/scripts/check-copyright.sh index 94cae7a4..b7898183 100755 --- a/scripts/check-copyright.sh +++ b/scripts/check-copyright.sh @@ -49,11 +49,11 @@ # -t disables logging and info -h|--help) usage; exit 0;; # -t disables logging and info - -t|--terse) exec 3>/dev/null 4>/dev/null 5>&2; shift;; + -t|--terse) exec 3>/dev/null 4>/dev/null 5>&1; wrn() { for arg in "$@"; do :; done; echo "$arg" >&5; }; shift;; # -q disables logging, info, and warning -q|--quiet) exec 3>/dev/null 4>/dev/null 5>/dev/null; shift;; # -v enables logging - -v|--verbose) exec 3>&1 4>&1 5>&2; shift;; + -v|--verbose) exec 3>&1 4>&1 5>&1; shift;; # -- indicates the explicit end of options, so consume it and exit the loop --) shift; break;; # any other option-like string is an error @@ -100,7 +100,7 @@ while read filePath; do [ -f "$filePath" ] || die "Cannot check copyright in non-existent file: '$filePath'" grep -Eq "SPDX-License-Identifier: Apache-2.0" "$filePath" || { - wrn "👿 License identifier not found: $filePath" + wrn "👿 License identifier not found:" "$filePath" echo "$filePath" continue } @@ -109,8 +109,8 @@ existingModifiedYear="$(grep -Eo 'Copyright [0-9]{4} IBM Corporation and' "$filePath" | cut -d ' ' -f 2 )" case "$existingModifiedYear" in "$yearModified") continue ;; - "") wrn "🤬 No copyright year in '$filePath': expected '$yearModified'." ;; - *) wrn "😡 Wrong copyright year in '$filePath': expected '$yearModified' but found '$existingModifiedYear'." ;; + "") wrn "🤬 No copyright year (expected '$yearModified'):" "$filePath" ;; + *) wrn "😡 Wrong copyright year (expected '$yearModified' but was '$existingModifiedYear'):" "$filePath" ;; esac echo "$filePath" } From 82d2335f9b478fb917a5bcf7d724c07b2547bf21 Mon Sep 17 00:00:00 2001 From: Joe Chacko <143064+joe-chacko@users.noreply.github.com> Date: Mon, 3 Jun 2024 13:30:44 +0100 Subject: [PATCH 4/4] OPS: minor tidying after review --- scripts/check-copyright.sh | 48 +++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/scripts/check-copyright.sh b/scripts/check-copyright.sh index b7898183..2e3fc314 100755 --- a/scripts/check-copyright.sh +++ b/scripts/check-copyright.sh @@ -23,10 +23,10 @@ usage() { echo "usage:\t$0 [-q|--quiet|-t|--terse|-v|--verbose] git-base-ref" - echo "\t-h,--help\tprints this usage info" - echo "\t-t,--terse\tsuppresses all informational outpout" - echo "\t-q,--quiet\tsuppresses all non-error outpout" - echo "\t-v,--verbose\tenables verbose output" + echo "\t-h,--help\tprint this usage info" + echo "\t-t,--terse\tprint only the failing file paths" + echo "\t-q,--quiet\tsuppress all non-error output" + echo "\t-v,--verbose\tenable verbose output" } # Copy stdout and stderr to other file descriptors for logging and error reporting @@ -46,18 +46,40 @@ # Parse script options while [ $# -gt 0 ]; do case "$1" in - # -t disables logging and info - -h|--help) usage; exit 0;; - # -t disables logging and info - -t|--terse) exec 3>/dev/null 4>/dev/null 5>&1; wrn() { for arg in "$@"; do :; done; echo "$arg" >&5; }; shift;; + # -h print a usage message and exits successfully + -h|--help) + usage + exit 0 + ;; + # -t disable logging and info + # print only last arg to warning + # (this should be just the pathname to make it easy to open in an editor) + -t|--terse) + exec 3>/dev/null 4>/dev/null 5>&1 + wrn() { [ $# -le 1 ] || shift $(($#-1)); echocat "$@" >&5; } + shift + ;; # -q disables logging, info, and warning - -q|--quiet) exec 3>/dev/null 4>/dev/null 5>/dev/null; shift;; + -q|--quiet) + exec 3>/dev/null 4>/dev/null 5>/dev/null; + shift + ;; # -v enables logging - -v|--verbose) exec 3>&1 4>&1 5>&1; shift;; + -v|--verbose) + exec 3>&1 4>&1 5>&1 + shift + ;; # -- indicates the explicit end of options, so consume it and exit the loop - --) shift; break;; + --) + shift + break + ;; # any other option-like string is an error - -*) err "$0: unknown option '$1'"; usage | die;; + # print error and usage and exit with an error code + -*) + err "$0: unknown option '$1'"; + usage | die + ;; # any non-option-like string indicates the end of the options *) break;; esac @@ -95,7 +117,7 @@ # Log deleted files git diff --name-only --diff-filter=D "$BASE" | sed 's/^/🫥 Ignoring deleted file: /' | log - # Function to print each file from stdin to stdout unless it has good copyright + # Function to print each pathname from stdin to stdout unless it has good copyright badCopyrightFilter() { while read filePath; do [ -f "$filePath" ] || die "Cannot check copyright in non-existent file: '$filePath'"