Skip to content

Commit

Permalink
add untils to clean history
Browse files Browse the repository at this point in the history
  • Loading branch information
Skitionek committed Jan 5, 2024
1 parent c24496d commit 19a571c
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 0 deletions.
9 changes: 9 additions & 0 deletions .gitleaks.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[extend]
useDefault = true

[allowlist]
paths = [
'''client/src/app/drawing-tool/services/mock_data/sample-annotation.ts''',
'''appserver/fixtures/seed.json''',
'''appserver/migrations/upgrade_data/initial_project''',
]
22 changes: 22 additions & 0 deletions .gitleaksignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
f1b5abe6ca54f38ca67cecb8df28d0723c236311:appserver/tests/api/drawing_tool_test.py:hashicorp-tf-password:54
f1b5abe6ca54f38ca67cecb8df28d0723c236311:appserver/tests/api/drawing_tool_test.py:hashicorp-tf-password:18
e5a87dfb0aea632407638b86d34cf64151d842ef:appserver/tests/database/services/annotations/annotations_test.py:generic-api-key:2232
e5a87dfb0aea632407638b86d34cf64151d842ef:appserver/tests/database/services/annotations/annotations_test.py:generic-api-key:2019
9820fadd89469c1406446cffe527ffb85e7f8cfe:logstash/pipeline/beats.conf:hashicorp-tf-password:14
9820fadd89469c1406446cffe527ffb85e7f8cfe:docker-compose.override.yml:hashicorp-tf-password:204
69351c9f22b36c992f2bb67bb9518ceb8e04448f:docker-compose.override.yml:hashicorp-tf-password:215
d1b73cc5c04057634718ff40bbe784c856ddc371:docker-compose.middleware.yml:hashicorp-tf-password:47
921709df8283c32e3422543ba2feb2850922ba82:docker-compose.override.yml:hashicorp-tf-password:215
a7eddbb0195f18c538923f940bb5f05ca04d0601:docker-scripts/docker-compose.override.yml:hashicorp-tf-password:109
f0f102a5acbf35d1cd443c93ab3a5f7caf18b884:helm/lifelike/examples/external-db.yaml:hashicorp-tf-password:27
6749f05a9522b5b1c231fd3414051210e5c8fde1:helm/lifelike/examples/external-db.yaml:hashicorp-tf-password:27
c47f8693c24b5c2128bab0d6d5301db8c072b00d:client/src/app/shared/components/form/password-input.component.ts:hashicorp-tf-password:15
c47f8693c24b5c2128bab0d6d5301db8c072b00d:appserver/neo4japp/constants.py:hashicorp-tf-password:90
19e78377c66e96d42771e48c83004351f382ef16:appserver/neo4japp/constants.py:hashicorp-tf-password:90
59d67cd18c6f11f0ff8160ede51686b865a9fed0:client/src/app/shared/components/form/password-input.component.ts:hashicorp-tf-password:15
7ab3a7d47eea78eb121d68ef79442c1b4b94459a:docker-compose.local.yml:hashicorp-tf-password:329
7b1ff75bb7d95492b1d607632bab3f2685702f76:docker-compose.yml:hashicorp-tf-password:326
80a392671fe2f8bbff734c9b0950a9ee522e884c:docker-compose.local.yml:hashicorp-tf-password:348
87dbe3bcca6cd28c33c43d0ce59413e050d1e03e:docker-compose.local.yml:hashicorp-tf-password:329
f22a62517eb0ed280b0d8b906eb2ee81e77567c5:docker-compose.yml:hashicorp-tf-password:326
11613a33702a1c5d80c499a9de8f440c8926d0e9:docker-compose.local.yml:hashicorp-tf-password:348
167 changes: 167 additions & 0 deletions go-open.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/bin/bash
# -----------------------------------------------------------------------------------------
# go-open.sh - Git Repository History Cleansing
# -----------------------------------------------------------------------------------------
ANALYSIS_PATH=.git/filter-repo/analysis
# -----------------------------------------------------------------------------------------

(return 0 2>/dev/null) && SOURCED=1 || SOURCED=0

if [[ $SOURCED == 0 ]]; then
set -e
fi

function verbose() {
[[ -z $VERBOSE ]] || echo "$@"
}

# Tempporary directory
TMP_DIR=$(mktemp -d -t go-open)
verbose "TMP_DIR: $TMP_DIR"

function edit_file() {
if [[ -z $2 ]]; then
vi $1
else
# Insert comment at the top of the file, for the time of editing
sed -i '' "1s/^/$2/" $1 && vi $1 && sed -i '' "1s/^$2//" $1
fi
}

function confirm() {
while true; do

read -p "Do you want to proceed? (yes/no) " yn

case $yn in
yes ) echo ok, we will proceed;
break;;
no ) echo exiting...;
exit;;
* ) echo invalid response;;
esac

done
}

function delete_deleted_binaries() {
# Delete all files that are not currently being tracked in the HEAD commit.
# In other words, remove all traces of files that once existed, but were later removed.
# This is done by using `git-filter-repo` tool.
DELETED_PATHS=$TMP_DIR/deleted-paths.txt
DELETED_BINARIES_PATH=$TMP_DIR/deleted-binaries.txt

verbose "Delete old analysis reports"
rm -rf $ANALYSIS_PATH

verbose "Analyze repository history"
git filter-repo --analyze

verbose "Get list of deleted files"
tail +3 $ANALYSIS_PATH/path-deleted-sizes.txt |
tr -s ' ' |
cut -d ' ' -f 5- > $DELETED_PATHS

verbose "Get list of deleted binary files"
cat $DELETED_PATHS |
git log --all --numstat |
grep '^-' |
cut -f3 |
sed -E 's|(.*)\{(.*) => (.*)\}(.*)|\1\2\4\n\1\3\4|g' |
sort -u |
grep -ivE '.*.(jpg|png|ico)' > $DELETED_BINARIES_PATH

verbose "Let user delete paths to files that should not be deleted from history"
edit_file $DELETED_BINARIES_PATH "# Delete the paths to files that should not be deleted from history\n\n"

echo -e "Selected paths to be deleted from history:\n$(cat $DELETED_BINARIES_PATH)"

verbose "git filter-repo --dry-run --force --invert-paths --paths-from-file $DELETED_BINARIES_PATH"
git filter-repo --dry-run --force --invert-paths --paths-from-file $DELETED_BINARIES_PATH
}

function clean_history_from_currently_used_deployment_secrets() {
DEPLOYMENT_SECRETS_PATH=$TMP_DIR/deployment-secrets.txt

verbose "Checkout deployment submodule"
git submodule update --init deployment

verbose "Pull encryption key"
make ansible-secrets

verbose "Decrypt deployment secrets"
find deployment -type f -name vault.yml -print0 \
| xargs -L 1 -0 \
ansible-vault decrypt --output - --vault-password-file=deployment/ansible/.vault_secrets_pw > $DEPLOYMENT_SECRETS_PATH

verbose "Deinitialize deployment submodule"
git submodule deinit deployment

verbose "Let user comment out deployment secrets"
edit_file $DEPLOYMENT_SECRETS_PATH "# Please comment out any commonly useed values (like \"root\" - which should not be in secrets in first place)\n\n"

verbose "List deployment secrets keys"
echo -e "Selected deployment secrets:\n$(yq '.* | key' $DEPLOYMENT_SECRETS_PATH)"

verbose "Parse deployment secrets into a mask mapping (value ==> **key**)"
yq -i '.* | ( . + "==>" + "***" + key + "***" )' $DEPLOYMENT_SECRETS_PATH

verbose "git filter-repo --dry-run --force --replace-text $DEPLOYMENT_SECRETS_PATH"
git filter-repo --dry-run --force --replace-text $DEPLOYMENT_SECRETS_PATH
}

function clean_history_from_gitleaks_secrets() {
GITLEAKS_REPORT_PATH=$TMP_DIR/gitleaks-report.txt
GITLEAKS_SECRETS_PATH=$TMP_DIR/gitleaks-secrets.txt

verbose "Run gitleaks in search for secrets"
gitleaks detect --report-path=$GITLEAKS_REPORT_PATH -f json

verbose "Parse gitleaks report into a list of secrets"
jq -r '.[].Secret' $GITLEAKS_REPORT_PATH | sort -u > $GITLEAKS_SECRETS_PATH

verbose "Let user comment out secrets"
edit_file $GITLEAKS_SECRETS_PATH "# Please check this list, adjust .gitleaks.toml or .gitleaksignore to modify it\n\n"

verbose "git filter-repo --dry-run --force --replace-text $GITLEAKS_SECRETS_PATH"
git filter-repo --dry-run --force --replace-text $GITLEAKS_SECRETS_PATH
}

function clean_history_from_trufflehog_secrets() {
TRUFFLEHOG_REPORT_PATH=$TMP_DIR/trufflehog-report.txt
TRUFFLEHOG_SECRETS_PATH=$TMP_DIR/trufflehog-secrets.txt

verbose "Run trufflehog in search for secrets"
trufflehog git "file://$PWD" --json --exclude-paths trufflehog-exclude-patterns.txt > $TRUFFLEHOG_REPORT_PATH

verbose "Parse trufflehog report into a list of secrets"
cat $TRUFFLEHOG_REPORT_PATH | jq -r '.Raw' | sort -u > $TRUFFLEHOG_SECRETS_PATH

verbose "Let user comment out secrets"
edit_file $TRUFFLEHOG_SECRETS_PATH "# Please check this list, adjust .trufflehog-exclude-patterns.txt to modify it\n\n"

verbose "git filter-repo --dry-run --force --replace-text $TRUFFLEHOG_SECRETS_PATH"
git filter-repo --dry-run --force --replace-text $TRUFFLEHOG_SECRETS_PATH
}

function main() {
echo -e "\nI. Clean history from deleted binary files (excluding .jpg, .png, .ico)"
verbose "This operation will delete all traces of binary files that once existed, but were later removed."
verbose "This is done to clean history from potentially copyrighted materials like publications or DB dumps."
delete_deleted_binaries

echo -e "\nII. Clean history from currently used deployment secrets"
verbose "This operation will delete all traces of secrets that are currently used in the deployment."
verbose "This is done to clean history from deployment secrets that are not supposed to be stored in the repository."
clean_history_from_currently_used_deployment_secrets

echo -e "\nIII. Clean history from secrets found using gitleaks"
verbose "This operation will delete all traces of secrets that were found using gitleaks."
verbose "This is done to clean history from old secrets that are not currently used in deployment but still might contain valid credentials."
clean_history_from_gitleaks_secrets

echo -e "\nIV. Cleanup history from secrets found using trufflehog"
verbose "This operation will delete all traces of secrets that were found using trufflehog."
verbose "This is done to clean history from any secrets that might have been missed by gitleaks."
clean_history_from_trufflehog_secrets
}
3 changes: 3 additions & 0 deletions trufflehog-exclude-patterns.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
(.*/)?package-lock.json
(.*/)?Pipfile.lock
(.*/)?yarn.lock

0 comments on commit 19a571c

Please sign in to comment.