diff --git a/Dockerfile b/Dockerfile index a149feab..2ae508d7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,13 @@ FROM --platform=linux/x86_64 public.ecr.aws/lambda/python:3.9 + +WORKDIR /opt/app + # Set up working directories -RUN mkdir -p /opt/app RUN mkdir -p /opt/app/build RUN mkdir -p /opt/app/bin/ # Copy in the lambda source -WORKDIR /opt/app COPY ./*.py /opt/app/ COPY requirements.txt /opt/app/requirements.txt diff --git a/clamav.py b/clamav.py index 6575e367..2c033951 100644 --- a/clamav.py +++ b/clamav.py @@ -189,7 +189,7 @@ def scan_file(path): av_env["LD_LIBRARY_PATH"] = CLAMAVLIB_PATH print("Starting clamscan of %s." % path) av_proc = subprocess.Popen( - [CLAMSCAN_PATH, "-v", "-a", "--stdout", "-d", AV_DEFINITION_PATH, path], + [CLAMSCAN_PATH, "-v", "-a", "--alert-macros=yes", "--stdout", "-d", AV_DEFINITION_PATH, path], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, env=av_env, @@ -205,6 +205,6 @@ def scan_file(path): signature = summary.get(path, AV_SIGNATURE_UNKNOWN) return AV_STATUS_INFECTED, signature else: - msg = "Unexpected exit code from clamscan: %s.\n" % av_proc.returncode + msg = "Unexpected exit code from clamscan for %s: %s.\n" % (path, av_proc.returncode) print(msg) raise Exception(msg) diff --git a/retrigger.sh b/retrigger.sh new file mode 100755 index 00000000..f98c689e --- /dev/null +++ b/retrigger.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +INPUT=$1 + +BUCKET=`echo $INPUT | cut -d'/' -f1` + +FILE_AND_PATH=${INPUT#"$BUCKET/"} + +echo $BUCKET +echo $FILE_AND_PATH + +JSON_STRING=$( jq -n \ + --arg b "$BUCKET" \ + --arg f "$FILE_AND_PATH" \ + '{ "Records": [{ "s3": { "bucket": { "name": $b }, "object": { "key": $f } } } ] }') + +echo $JSON_STRING + +FILE=`basename "$FILE_AND_PATH"` +FOLDER=`dirname "$FILE_AND_PATH"` + +echo $FILE +URL_ENCODED_FILE=`php -r "echo urlencode('$FILE');"` + +echo "https://s3.console.aws.amazon.com/s3/buckets/$BUCKET/$FOLDER/?region=eu-west-1&tab=overview&prefixSearch=${URL_ENCODED_FILE}" + +aws lambda invoke --cli-binary-format raw-in-base64-out --function-name bucket-antivirus-function --payload "$JSON_STRING" /tmp/lambda_invoke diff --git a/scan.py b/scan.py index cb9ba3a8..a9fe1bf7 100644 --- a/scan.py +++ b/scan.py @@ -19,6 +19,7 @@ from urllib.parse import unquote_plus import boto3 +import botocore import clamav import metrics @@ -211,6 +212,13 @@ def lambda_handler(event, context): print("Script starting at %s\n" % (start_time)) s3_object = event_object(event, event_source=EVENT_SOURCE) + if s3_object.key.endswith("/"): + # we often see that creating "directories" with no file data triggers + # an error in the lambda, which in turn triggers an alert in cloudwatch. + # So if things are created as "directories", we can just skip this scan + # and circuit break early. + return + if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): verify_s3_object_version(s3, s3_object) @@ -221,7 +229,20 @@ def lambda_handler(event, context): file_path = get_local_path(s3_object, "/tmp") create_dir(os.path.dirname(file_path)) - s3_object.download_file(file_path) + + try: + s3_object.download_file(file_path) + except botocore.exceptions.ClientError as e: + print("Unexpected exception for file %s.\n" % file_path) + if s3_object.key.startswith("test"): + # I hypothesise that the test bucket has lots of create/delete operations + # and the deletes cause this exception to throw. If this is the case, then + # we should exit without doing anything. + print("Ignoring file in test folder %s.\n" % file_path) + return + else: + # But... if it's not, then we'll throw + raise to_download = clamav.update_defs_from_s3( s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX diff --git a/update.py b/update.py index 80aa46d1..671f7657 100644 --- a/update.py +++ b/update.py @@ -24,6 +24,7 @@ from common import CLAMAVLIB_PATH from common import S3_ENDPOINT from common import get_timestamp +import shutil def lambda_handler(event, context): @@ -31,16 +32,24 @@ def lambda_handler(event, context): s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) print("Script starting at %s\n" % (get_timestamp())) + + for root, dirs, files in os.walk(AV_DEFINITION_PATH): + for f in files: + os.unlink(os.path.join(root, f)) + for d in dirs: + shutil.rmtree(os.path.join(root, d)) + to_download = clamav.update_defs_from_s3( s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX ) - for download in to_download.values(): - s3_path = download["s3_path"] - local_path = download["local_path"] - print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) - s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) - print("Downloading definition file %s complete!" % (local_path)) + print("Skipping clamav definition download %s\n" % (get_timestamp())) + # for download in to_download.values(): + # s3_path = download["s3_path"] + # local_path = download["local_path"] + # print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) + # s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) + # print("Downloading definition file %s complete!" % (local_path)) clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) # If main.cvd gets updated (very rare), we will need to force freshclam