From 2ffc0cdfec946ea93d7b347398129a6ea83a45e4 Mon Sep 17 00:00:00 2001 From: illuminati1618 Date: Thu, 12 Feb 2026 10:41:09 -0800 Subject: [PATCH 1/2] Add log filtering functionality to process security-relevant log entries --- filter.py | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 9 +++ 2 files changed, 185 insertions(+) create mode 100644 filter.py diff --git a/filter.py b/filter.py new file mode 100644 index 0000000..0fd4298 --- /dev/null +++ b/filter.py @@ -0,0 +1,176 @@ +""" +filter.py — Log filtering stage + +Reads raw log files from LOG_DIR and writes only security-relevant lines +to LOG_DIR/important/.log in real-time. + +Flask log format (Gunicorn access log): + - - [] " HTTP/x.x" "" "" + +Spring log format: + --- [] : +""" + +import os +import re +import time +import signal +import logging +import threading +from pathlib import Path + +LOG_DIR = Path(os.environ.get("LOG_DIR", "/app/logs")) +IMPORTANT_DIR = LOG_DIR / "important" +POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "5")) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) +log = logging.getLogger(__name__) + +# shutdown_event is injected by main.py when used as a module; +# falls back to a local event when run standalone +shutdown_event = threading.Event() + + +def _standalone_signal_handler(sig, frame): + shutdown_event.set() + + +if __name__ == "__main__": + signal.signal(signal.SIGTERM, _standalone_signal_handler) + signal.signal(signal.SIGINT, _standalone_signal_handler) + + +# --------------------------------------------------------------------------- +# Flask filter rules +# Match lines that contain security-relevant paths or non-2xx status codes +# --------------------------------------------------------------------------- + +# Paths that are always important regardless of status +FLASK_IMPORTANT_PATHS = re.compile( + r'"(?:POST|PUT|DELETE|PATCH) (?:' + r'/users/reset_password/\d+' # password reset + r'|/users/delete/\d+' # user deletion + r'|/delete_user/[^"]*' # kasm user deletion + r'|/update_user/[^"]*' # user update + r'|/api/user' # user creation + r'|/login' # login attempts + r')' +) + +# Any response that is 4xx or 5xx is worth logging +FLASK_ERROR_STATUS = re.compile(r'" [45]\d\d ') + + +def is_flask_important(line: str) -> bool: + return bool(FLASK_IMPORTANT_PATHS.search(line) or FLASK_ERROR_STATUS.search(line)) + + +# --------------------------------------------------------------------------- +# Spring filter rules +# --------------------------------------------------------------------------- + +SPRING_IMPORTANT_PATTERNS = re.compile( + r'(?:' + r'ERROR' # any ERROR level log + r'|password' # password-related operations + r'|/api/person.*(?:POST|PUT|DELETE)' # user CRUD + r'|delete' # deletion operations + r'|migration' # schema migrations + r'|Exception' # exceptions + r'|WARN.*(?:auth|login|token|jwt|forbidden|unauthorized)', # auth warnings + re.IGNORECASE +) + + +def is_spring_important(line: str) -> bool: + return bool(SPRING_IMPORTANT_PATTERNS.search(line)) + + +# --------------------------------------------------------------------------- +# Per-source filter dispatch +# --------------------------------------------------------------------------- + +FILTERS = { + "flask": is_flask_important, + "spring": is_spring_important, +} + + +def detect_source(filename: str) -> str: + name = filename.lower() + if "flask" in name: + return "flask" + if "spring" in name or "java" in name: + return "spring" + return "unknown" + + +def filter_source_matches(line: str) -> bool: + """Fallback: keep ERROR/WARN lines from any unknown source.""" + return bool(re.search(r'\b(?:ERROR|WARN|Exception)\b', line)) + + +# --------------------------------------------------------------------------- +# File tail + filter loop +# --------------------------------------------------------------------------- + +def tail_and_filter(raw_log: Path): + source = detect_source(raw_log.name) + is_important = FILTERS.get(source, filter_source_matches) + + out_path = IMPORTANT_DIR / raw_log.name + out_path.parent.mkdir(parents=True, exist_ok=True) + + log.info(f"Filtering {raw_log.name} ({source}) -> {out_path}") + + # Wait for the raw log to exist + while not raw_log.exists() and not shutdown_event.is_set(): + shutdown_event.wait(POLL_INTERVAL) + + with open(raw_log, "r") as infile, open(out_path, "a") as outfile: + # Seek to end so we only process new lines going forward + infile.seek(0, 2) + + while not shutdown_event.is_set(): + line = infile.readline() + if not line: + shutdown_event.wait(0.2) + continue + if is_important(line): + outfile.write(line) + outfile.flush() + + +def watch_for_new_logs(): + """Watch LOG_DIR for new *.log files and spin up filter threads for them.""" + known: set[str] = set() + threads: list[threading.Thread] = [] + + while not shutdown_event.is_set(): + current = {p.name for p in LOG_DIR.glob("*.log")} + new = current - known + for name in new: + raw_log = LOG_DIR / name + t = threading.Thread( + target=tail_and_filter, + args=(raw_log,), + name=f"filter-{name}", + daemon=True, + ) + t.start() + threads.append(t) + known.add(name) + shutdown_event.wait(POLL_INTERVAL) + + for t in threads: + t.join(timeout=5) + + +if __name__ == "__main__": + IMPORTANT_DIR.mkdir(parents=True, exist_ok=True) + log.info(f"Filter service starting — watching {LOG_DIR} for *.log files") + watch_for_new_logs() + log.info("Filter service stopped.") diff --git a/main.py b/main.py index c3cbb35..641214a 100644 --- a/main.py +++ b/main.py @@ -7,6 +7,7 @@ from pathlib import Path import docker +import filter as log_filter # --- Configuration --- CONTAINER_NAMES = os.environ.get("CONTAINER_NAMES", "flask_open,java_springv1").split(",") @@ -85,6 +86,14 @@ def main(): t.start() threads.append(t) + # Share our shutdown event with the filter module + log_filter.shutdown_event = shutdown_event + + # Start filter stage — watches LOG_DIR for new *.log files and filters them + filter_thread = threading.Thread(target=log_filter.watch_for_new_logs, name="filter", daemon=True) + filter_thread.start() + threads.append(filter_thread) + # Keep main thread alive until shutdown while not shutdown_event.is_set(): # Log a heartbeat every minute so operators know the service is running From 5c29c0e0928d5f57365d593de6ba1aff13128e76 Mon Sep 17 00:00:00 2001 From: illuminati1618 Date: Fri, 20 Feb 2026 13:26:37 -0800 Subject: [PATCH 2/2] Add snapshot functionality for Aurora/RDS and SQLite databases --- requirements.txt | 1 + snapshot.py | 247 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 snapshot.py diff --git a/requirements.txt b/requirements.txt index 0fb3792..6d768b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ docker==7.1.0 +boto3==1.35.0 diff --git a/snapshot.py b/snapshot.py new file mode 100644 index 0000000..297aaa5 --- /dev/null +++ b/snapshot.py @@ -0,0 +1,247 @@ +""" +snapshot.py — Automated database snapshot system + +Handles two database types: + 1. Aurora/RDS (Flask production) — uses boto3 to create AWS snapshots + 2. SQLite (Spring production) — copies the .db file to a timestamped backup + +Can be run via cron, manually, or imported as a module (for future API use). +""" + +import os +import sys +import shutil +import logging +from datetime import datetime, timedelta +from pathlib import Path + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", +) +log = logging.getLogger(__name__) + +# --- Configuration --- +AWS_REGION = os.environ.get("AWS_REGION", "us-west-2") +RDS_INSTANCE_ID = os.environ.get("RDS_INSTANCE_ID", "") +SPRING_SQLITE_PATH = Path(os.environ.get("SPRING_SQLITE_PATH", "/spring-volumes/sqlite.db")) +BACKUP_DIR = Path(os.environ.get("BACKUP_DIR", "/app/backups")) + +RETENTION_DAILY = int(os.environ.get("RETENTION_DAILY", "7")) +RETENTION_WEEKLY = int(os.environ.get("RETENTION_WEEKLY", "4")) +RETENTION_MONTHLY = int(os.environ.get("RETENTION_MONTHLY", "3")) + + +# --------------------------------------------------------------------------- +# Aurora/RDS snapshots +# --------------------------------------------------------------------------- + +def snapshot_aurora(trigger: str = "scheduled") -> bool: + """Create an RDS snapshot with metadata tags.""" + if not RDS_INSTANCE_ID: + log.warning("RDS_INSTANCE_ID not set, skipping Aurora snapshot") + return False + + try: + import boto3 + except ImportError: + log.error("boto3 not installed, cannot create Aurora snapshot") + return False + + now = datetime.utcnow() + snapshot_id = f"{RDS_INSTANCE_ID}-{now.strftime('%Y%m%d-%H%M%S')}" + + try: + rds = boto3.client("rds", region_name=AWS_REGION) + log.info(f"Creating RDS snapshot: {snapshot_id}") + + rds.create_db_snapshot( + DBSnapshotIdentifier=snapshot_id, + DBInstanceIdentifier=RDS_INSTANCE_ID, + Tags=[ + {"Key": "trigger", "Value": trigger}, + {"Key": "created_by", "Value": "db-automator"}, + {"Key": "timestamp", "Value": now.isoformat()}, + ], + ) + log.info(f"Aurora snapshot '{snapshot_id}' creation initiated") + return True + + except Exception as e: + log.error(f"Failed to create Aurora snapshot: {e}") + return False + + +def cleanup_aurora(): + """Delete Aurora snapshots beyond retention policy.""" + if not RDS_INSTANCE_ID: + return + + try: + import boto3 + except ImportError: + return + + try: + rds = boto3.client("rds", region_name=AWS_REGION) + response = rds.describe_db_snapshots( + DBInstanceIdentifier=RDS_INSTANCE_ID, + SnapshotType="manual", + ) + snapshots = response.get("DBSnapshots", []) + + # Only manage snapshots created by us + our_snapshots = [] + for snap in snapshots: + tags_resp = rds.list_tags_for_resource(ResourceName=snap["DBSnapshotArn"]) + tags = {t["Key"]: t["Value"] for t in tags_resp.get("TagList", [])} + if tags.get("created_by") == "db-automator": + our_snapshots.append({ + "id": snap["DBSnapshotIdentifier"], + "time": snap["SnapshotCreateTime"], + }) + + our_snapshots.sort(key=lambda s: s["time"], reverse=True) + _apply_retention(our_snapshots, delete_fn=lambda s: _delete_aurora_snapshot(rds, s["id"])) + + except Exception as e: + log.error(f"Aurora cleanup failed: {e}") + + +def _delete_aurora_snapshot(rds, snapshot_id: str): + log.info(f"Deleting Aurora snapshot: {snapshot_id}") + rds.delete_db_snapshot(DBSnapshotIdentifier=snapshot_id) + + +# --------------------------------------------------------------------------- +# SQLite snapshots +# --------------------------------------------------------------------------- + +def snapshot_sqlite(trigger: str = "scheduled") -> bool: + """Copy the Spring SQLite database to a timestamped backup.""" + if not SPRING_SQLITE_PATH.exists(): + log.warning(f"SQLite file not found at {SPRING_SQLITE_PATH}, skipping") + return False + + now = datetime.utcnow() + dest_dir = BACKUP_DIR / "spring" + dest_dir.mkdir(parents=True, exist_ok=True) + + filename = f"sqlite_{now.strftime('%Y%m%d_%H%M%S')}.db" + dest = dest_dir / filename + + try: + log.info(f"Copying SQLite database: {SPRING_SQLITE_PATH} -> {dest}") + shutil.copy2(SPRING_SQLITE_PATH, dest) + + # Also copy WAL and SHM files if they exist (for consistency) + for suffix in ["-wal", "-shm"]: + wal = SPRING_SQLITE_PATH.parent / (SPRING_SQLITE_PATH.name + suffix) + if wal.exists(): + shutil.copy2(wal, dest_dir / (filename + suffix)) + + size_mb = dest.stat().st_size / (1024 * 1024) + log.info(f"SQLite snapshot saved: {dest} ({size_mb:.1f} MB)") + return True + + except Exception as e: + log.error(f"Failed to create SQLite snapshot: {e}") + return False + + +def cleanup_sqlite(): + """Delete old SQLite backups beyond retention policy.""" + backup_dir = BACKUP_DIR / "spring" + if not backup_dir.exists(): + return + + backups = sorted(backup_dir.glob("sqlite_*.db"), key=lambda p: p.stat().st_mtime, reverse=True) + + snapshots = [{"id": p.name, "time": datetime.fromtimestamp(p.stat().st_mtime), "path": p} for p in backups] + + def delete_sqlite_backup(snap): + path = snap["path"] + log.info(f"Deleting old SQLite backup: {path.name}") + path.unlink() + # Clean up associated WAL/SHM files + for suffix in ["-wal", "-shm"]: + companion = path.parent / (path.name + suffix) + if companion.exists(): + companion.unlink() + + _apply_retention(snapshots, delete_fn=delete_sqlite_backup) + + +# --------------------------------------------------------------------------- +# Retention logic +# --------------------------------------------------------------------------- + +def _apply_retention(snapshots: list[dict], delete_fn): + """ + Keep the most recent RETENTION_DAILY snapshots, + plus one per week for RETENTION_WEEKLY weeks, + plus one per month for RETENTION_MONTHLY months. + Delete the rest. + """ + if not snapshots: + return + + now = datetime.utcnow() + keep = set() + + # Keep the N most recent (daily) + for snap in snapshots[:RETENTION_DAILY]: + keep.add(snap["id"]) + + # Keep one per week for the last N weeks + for weeks_ago in range(RETENTION_WEEKLY): + week_start = now - timedelta(weeks=weeks_ago + 1) + week_end = now - timedelta(weeks=weeks_ago) + for snap in snapshots: + t = snap["time"] + # Handle timezone-aware datetimes + if hasattr(t, 'tzinfo') and t.tzinfo is not None: + t = t.replace(tzinfo=None) + if week_start <= t < week_end: + keep.add(snap["id"]) + break + + # Keep one per month for the last N months + for months_ago in range(RETENTION_MONTHLY): + month_start = now - timedelta(days=30 * (months_ago + 1)) + month_end = now - timedelta(days=30 * months_ago) + for snap in snapshots: + t = snap["time"] + if hasattr(t, 'tzinfo') and t.tzinfo is not None: + t = t.replace(tzinfo=None) + if month_start <= t < month_end: + keep.add(snap["id"]) + break + + # Delete everything not in the keep set + for snap in snapshots: + if snap["id"] not in keep: + delete_fn(snap) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def run_all(trigger: str = "scheduled"): + """Run all snapshot + cleanup operations.""" + log.info(f"=== Snapshot run starting (trigger={trigger}) ===") + + aurora_ok = snapshot_aurora(trigger=trigger) + sqlite_ok = snapshot_sqlite(trigger=trigger) + + cleanup_aurora() + cleanup_sqlite() + + log.info(f"=== Snapshot run complete (aurora={'OK' if aurora_ok else 'SKIP'}, sqlite={'OK' if sqlite_ok else 'SKIP'}) ===") + return aurora_ok or sqlite_ok + + +if __name__ == "__main__": + trigger = sys.argv[1] if len(sys.argv) > 1 else "manual" + run_all(trigger=trigger)