From 2ffc0cdfec946ea93d7b347398129a6ea83a45e4 Mon Sep 17 00:00:00 2001
From: illuminati1618 <yashpparikh@gmail.com>
Date: Thu, 12 Feb 2026 10:41:09 -0800
Subject: [PATCH 1/2] Add log filtering functionality to process
 security-relevant log entries

---
 filter.py | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 main.py   |   9 +++
 2 files changed, 185 insertions(+)
 create mode 100644 filter.py
diff --git a/filter.py b/filter.py
new file mode 100644
index 0000000..0fd4298
--- /dev/null
+++ b/filter.py
@@ -0,0 +1,176 @@
+"""
+filter.py — Log filtering stage
+
+Reads raw log files from LOG_DIR and writes only security-relevant lines
+to LOG_DIR/important/<container>.log in real-time.
+
+Flask log format (Gunicorn access log):
+  <docker_ts> <ip> - - [<date>] "<METHOD> <path> HTTP/x.x" <status> <bytes> "<referer>" "<ua>"
+
+Spring log format:
+  <docker_ts> <app_ts> <LEVEL> <pid> --- [<thread>] <logger> : <message>
+"""
+
+import os
+import re
+import time
+import signal
+import logging
+import threading
+from pathlib import Path
+
+LOG_DIR = Path(os.environ.get("LOG_DIR", "/app/logs"))
+IMPORTANT_DIR = LOG_DIR / "important"
+POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "5"))
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+log = logging.getLogger(__name__)
+
+# shutdown_event is injected by main.py when used as a module;
+# falls back to a local event when run standalone
+shutdown_event = threading.Event()
+
+
+def _standalone_signal_handler(sig, frame):
+    shutdown_event.set()
+
+
+if __name__ == "__main__":
+    signal.signal(signal.SIGTERM, _standalone_signal_handler)
+    signal.signal(signal.SIGINT, _standalone_signal_handler)
+
+
+# ---------------------------------------------------------------------------
+# Flask filter rules
+# Match lines that contain security-relevant paths or non-2xx status codes
+# ---------------------------------------------------------------------------
+
+# Paths that are always important regardless of status
+FLASK_IMPORTANT_PATHS = re.compile(
+    r'"(?:POST|PUT|DELETE|PATCH) (?:'
+    r'/users/reset_password/\d+'       # password reset
+    r'|/users/delete/\d+'             # user deletion
+    r'|/delete_user/[^"]*'            # kasm user deletion
+    r'|/update_user/[^"]*'            # user update
+    r'|/api/user'                     # user creation
+    r'|/login'                        # login attempts
+    r')'
+)
+
+# Any response that is 4xx or 5xx is worth logging
+FLASK_ERROR_STATUS = re.compile(r'" [45]\d\d ')
+
+
+def is_flask_important(line: str) -> bool:
+    return bool(FLASK_IMPORTANT_PATHS.search(line) or FLASK_ERROR_STATUS.search(line))
+
+
+# ---------------------------------------------------------------------------
+# Spring filter rules
+# ---------------------------------------------------------------------------
+
+SPRING_IMPORTANT_PATTERNS = re.compile(
+    r'(?:'
+    r'ERROR'                          # any ERROR level log
+    r'|password'                      # password-related operations
+    r'|/api/person.*(?:POST|PUT|DELETE)'  # user CRUD
+    r'|delete'                        # deletion operations
+    r'|migration'                     # schema migrations
+    r'|Exception'                     # exceptions
+    r'|WARN.*(?:auth|login|token|jwt|forbidden|unauthorized)',  # auth warnings
+    re.IGNORECASE
+)
+
+
+def is_spring_important(line: str) -> bool:
+    return bool(SPRING_IMPORTANT_PATTERNS.search(line))
+
+
+# ---------------------------------------------------------------------------
+# Per-source filter dispatch
+# ---------------------------------------------------------------------------
+
+FILTERS = {
+    "flask": is_flask_important,
+    "spring": is_spring_important,
+}
+
+
+def detect_source(filename: str) -> str:
+    name = filename.lower()
+    if "flask" in name:
+        return "flask"
+    if "spring" in name or "java" in name:
+        return "spring"
+    return "unknown"
+
+
+def filter_source_matches(line: str) -> bool:
+    """Fallback: keep ERROR/WARN lines from any unknown source."""
+    return bool(re.search(r'\b(?:ERROR|WARN|Exception)\b', line))
+
+
+# ---------------------------------------------------------------------------
+# File tail + filter loop
+# ---------------------------------------------------------------------------
+
+def tail_and_filter(raw_log: Path):
+    source = detect_source(raw_log.name)
+    is_important = FILTERS.get(source, filter_source_matches)
+
+    out_path = IMPORTANT_DIR / raw_log.name
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+
+    log.info(f"Filtering {raw_log.name} ({source}) -> {out_path}")
+
+    # Wait for the raw log to exist
+    while not raw_log.exists() and not shutdown_event.is_set():
+        shutdown_event.wait(POLL_INTERVAL)
+
+    with open(raw_log, "r") as infile, open(out_path, "a") as outfile:
+        # Seek to end so we only process new lines going forward
+        infile.seek(0, 2)
+
+        while not shutdown_event.is_set():
+            line = infile.readline()
+            if not line:
+                shutdown_event.wait(0.2)
+                continue
+            if is_important(line):
+                outfile.write(line)
+                outfile.flush()
+
+
+def watch_for_new_logs():
+    """Watch LOG_DIR for new *.log files and spin up filter threads for them."""
+    known: set[str] = set()
+    threads: list[threading.Thread] = []
+
+    while not shutdown_event.is_set():
+        current = {p.name for p in LOG_DIR.glob("*.log")}
+        new = current - known
+        for name in new:
+            raw_log = LOG_DIR / name
+            t = threading.Thread(
+                target=tail_and_filter,
+                args=(raw_log,),
+                name=f"filter-{name}",
+                daemon=True,
+            )
+            t.start()
+            threads.append(t)
+            known.add(name)
+        shutdown_event.wait(POLL_INTERVAL)
+
+    for t in threads:
+        t.join(timeout=5)
+
+
+if __name__ == "__main__":
+    IMPORTANT_DIR.mkdir(parents=True, exist_ok=True)
+    log.info(f"Filter service starting — watching {LOG_DIR} for *.log files")
+    watch_for_new_logs()
+    log.info("Filter service stopped.")
diff --git a/main.py b/main.py
index c3cbb35..641214a 100644
--- a/main.py
+++ b/main.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 
 import docker
+import filter as log_filter
 
 # --- Configuration ---
 CONTAINER_NAMES = os.environ.get("CONTAINER_NAMES", "flask_open,java_springv1").split(",")
@@ -85,6 +86,14 @@ def main():
         t.start()
         threads.append(t)
 
+    # Share our shutdown event with the filter module
+    log_filter.shutdown_event = shutdown_event
+
+    # Start filter stage — watches LOG_DIR for new *.log files and filters them
+    filter_thread = threading.Thread(target=log_filter.watch_for_new_logs, name="filter", daemon=True)
+    filter_thread.start()
+    threads.append(filter_thread)
+
     # Keep main thread alive until shutdown
     while not shutdown_event.is_set():
         # Log a heartbeat every minute so operators know the service is running

From 5c29c0e0928d5f57365d593de6ba1aff13128e76 Mon Sep 17 00:00:00 2001
From: illuminati1618 <yashpparikh@gmail.com>
Date: Fri, 20 Feb 2026 13:26:37 -0800
Subject: [PATCH 2/2] Add snapshot functionality for Aurora/RDS and SQLite
 databases

---
 requirements.txt |   1 +
 snapshot.py      | 247 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 248 insertions(+)
 create mode 100644 snapshot.py

diff --git a/requirements.txt b/requirements.txt
index 0fb3792..6d768b6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
 docker==7.1.0
+boto3==1.35.0
diff --git a/snapshot.py b/snapshot.py
new file mode 100644
index 0000000..297aaa5
--- /dev/null
+++ b/snapshot.py
@@ -0,0 +1,247 @@
+"""
+snapshot.py — Automated database snapshot system
+
+Handles two database types:
+  1. Aurora/RDS (Flask production) — uses boto3 to create AWS snapshots
+  2. SQLite (Spring production) — copies the .db file to a timestamped backup
+
+Can be run via cron, manually, or imported as a module (for future API use).
+"""
+
+import os
+import sys
+import shutil
+import logging
+from datetime import datetime, timedelta
+from pathlib import Path
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+log = logging.getLogger(__name__)
+
+# --- Configuration ---
+AWS_REGION = os.environ.get("AWS_REGION", "us-west-2")
+RDS_INSTANCE_ID = os.environ.get("RDS_INSTANCE_ID", "")
+SPRING_SQLITE_PATH = Path(os.environ.get("SPRING_SQLITE_PATH", "/spring-volumes/sqlite.db"))
+BACKUP_DIR = Path(os.environ.get("BACKUP_DIR", "/app/backups"))
+
+RETENTION_DAILY = int(os.environ.get("RETENTION_DAILY", "7"))
+RETENTION_WEEKLY = int(os.environ.get("RETENTION_WEEKLY", "4"))
+RETENTION_MONTHLY = int(os.environ.get("RETENTION_MONTHLY", "3"))
+
+
+# ---------------------------------------------------------------------------
+# Aurora/RDS snapshots
+# ---------------------------------------------------------------------------
+
+def snapshot_aurora(trigger: str = "scheduled") -> bool:
+    """Create an RDS snapshot with metadata tags."""
+    if not RDS_INSTANCE_ID:
+        log.warning("RDS_INSTANCE_ID not set, skipping Aurora snapshot")
+        return False
+
+    try:
+        import boto3
+    except ImportError:
+        log.error("boto3 not installed, cannot create Aurora snapshot")
+        return False
+
+    now = datetime.utcnow()
+    snapshot_id = f"{RDS_INSTANCE_ID}-{now.strftime('%Y%m%d-%H%M%S')}"
+
+    try:
+        rds = boto3.client("rds", region_name=AWS_REGION)
+        log.info(f"Creating RDS snapshot: {snapshot_id}")
+
+        rds.create_db_snapshot(
+            DBSnapshotIdentifier=snapshot_id,
+            DBInstanceIdentifier=RDS_INSTANCE_ID,
+            Tags=[
+                {"Key": "trigger", "Value": trigger},
+                {"Key": "created_by", "Value": "db-automator"},
+                {"Key": "timestamp", "Value": now.isoformat()},
+            ],
+        )
+        log.info(f"Aurora snapshot '{snapshot_id}' creation initiated")
+        return True
+
+    except Exception as e:
+        log.error(f"Failed to create Aurora snapshot: {e}")
+        return False
+
+
+def cleanup_aurora():
+    """Delete Aurora snapshots beyond retention policy."""
+    if not RDS_INSTANCE_ID:
+        return
+
+    try:
+        import boto3
+    except ImportError:
+        return
+
+    try:
+        rds = boto3.client("rds", region_name=AWS_REGION)
+        response = rds.describe_db_snapshots(
+            DBInstanceIdentifier=RDS_INSTANCE_ID,
+            SnapshotType="manual",
+        )
+        snapshots = response.get("DBSnapshots", [])
+
+        # Only manage snapshots created by us
+        our_snapshots = []
+        for snap in snapshots:
+            tags_resp = rds.list_tags_for_resource(ResourceName=snap["DBSnapshotArn"])
+            tags = {t["Key"]: t["Value"] for t in tags_resp.get("TagList", [])}
+            if tags.get("created_by") == "db-automator":
+                our_snapshots.append({
+                    "id": snap["DBSnapshotIdentifier"],
+                    "time": snap["SnapshotCreateTime"],
+                })
+
+        our_snapshots.sort(key=lambda s: s["time"], reverse=True)
+        _apply_retention(our_snapshots, delete_fn=lambda s: _delete_aurora_snapshot(rds, s["id"]))
+
+    except Exception as e:
+        log.error(f"Aurora cleanup failed: {e}")
+
+
+def _delete_aurora_snapshot(rds, snapshot_id: str):
+    log.info(f"Deleting Aurora snapshot: {snapshot_id}")
+    rds.delete_db_snapshot(DBSnapshotIdentifier=snapshot_id)
+
+
+# ---------------------------------------------------------------------------
+# SQLite snapshots
+# ---------------------------------------------------------------------------
+
+def snapshot_sqlite(trigger: str = "scheduled") -> bool:
+    """Copy the Spring SQLite database to a timestamped backup."""
+    if not SPRING_SQLITE_PATH.exists():
+        log.warning(f"SQLite file not found at {SPRING_SQLITE_PATH}, skipping")
+        return False
+
+    now = datetime.utcnow()
+    dest_dir = BACKUP_DIR / "spring"
+    dest_dir.mkdir(parents=True, exist_ok=True)
+
+    filename = f"sqlite_{now.strftime('%Y%m%d_%H%M%S')}.db"
+    dest = dest_dir / filename
+
+    try:
+        log.info(f"Copying SQLite database: {SPRING_SQLITE_PATH} -> {dest}")
+        shutil.copy2(SPRING_SQLITE_PATH, dest)
+
+        # Also copy WAL and SHM files if they exist (for consistency)
+        for suffix in ["-wal", "-shm"]:
+            wal = SPRING_SQLITE_PATH.parent / (SPRING_SQLITE_PATH.name + suffix)
+            if wal.exists():
+                shutil.copy2(wal, dest_dir / (filename + suffix))
+
+        size_mb = dest.stat().st_size / (1024 * 1024)
+        log.info(f"SQLite snapshot saved: {dest} ({size_mb:.1f} MB)")
+        return True
+
+    except Exception as e:
+        log.error(f"Failed to create SQLite snapshot: {e}")
+        return False
+
+
+def cleanup_sqlite():
+    """Delete old SQLite backups beyond retention policy."""
+    backup_dir = BACKUP_DIR / "spring"
+    if not backup_dir.exists():
+        return
+
+    backups = sorted(backup_dir.glob("sqlite_*.db"), key=lambda p: p.stat().st_mtime, reverse=True)
+
+    snapshots = [{"id": p.name, "time": datetime.fromtimestamp(p.stat().st_mtime), "path": p} for p in backups]
+
+    def delete_sqlite_backup(snap):
+        path = snap["path"]
+        log.info(f"Deleting old SQLite backup: {path.name}")
+        path.unlink()
+        # Clean up associated WAL/SHM files
+        for suffix in ["-wal", "-shm"]:
+            companion = path.parent / (path.name + suffix)
+            if companion.exists():
+                companion.unlink()
+
+    _apply_retention(snapshots, delete_fn=delete_sqlite_backup)
+
+
+# ---------------------------------------------------------------------------
+# Retention logic
+# ---------------------------------------------------------------------------
+
+def _apply_retention(snapshots: list[dict], delete_fn):
+    """
+    Keep the most recent RETENTION_DAILY snapshots,
+    plus one per week for RETENTION_WEEKLY weeks,
+    plus one per month for RETENTION_MONTHLY months.
+    Delete the rest.
+    """
+    if not snapshots:
+        return
+
+    now = datetime.utcnow()
+    keep = set()
+
+    # Keep the N most recent (daily)
+    for snap in snapshots[:RETENTION_DAILY]:
+        keep.add(snap["id"])
+
+    # Keep one per week for the last N weeks
+    for weeks_ago in range(RETENTION_WEEKLY):
+        week_start = now - timedelta(weeks=weeks_ago + 1)
+        week_end = now - timedelta(weeks=weeks_ago)
+        for snap in snapshots:
+            t = snap["time"]
+            # Handle timezone-aware datetimes
+            if hasattr(t, 'tzinfo') and t.tzinfo is not None:
+                t = t.replace(tzinfo=None)
+            if week_start <= t < week_end:
+                keep.add(snap["id"])
+                break
+
+    # Keep one per month for the last N months
+    for months_ago in range(RETENTION_MONTHLY):
+        month_start = now - timedelta(days=30 * (months_ago + 1))
+        month_end = now - timedelta(days=30 * months_ago)
+        for snap in snapshots:
+            t = snap["time"]
+            if hasattr(t, 'tzinfo') and t.tzinfo is not None:
+                t = t.replace(tzinfo=None)
+            if month_start <= t < month_end:
+                keep.add(snap["id"])
+                break
+
+    # Delete everything not in the keep set
+    for snap in snapshots:
+        if snap["id"] not in keep:
+            delete_fn(snap)
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def run_all(trigger: str = "scheduled"):
+    """Run all snapshot + cleanup operations."""
+    log.info(f"=== Snapshot run starting (trigger={trigger}) ===")
+
+    aurora_ok = snapshot_aurora(trigger=trigger)
+    sqlite_ok = snapshot_sqlite(trigger=trigger)
+
+    cleanup_aurora()
+    cleanup_sqlite()
+
+    log.info(f"=== Snapshot run complete (aurora={'OK' if aurora_ok else 'SKIP'}, sqlite={'OK' if sqlite_ok else 'SKIP'}) ===")
+    return aurora_ok or sqlite_ok
+
+
+if __name__ == "__main__":
+    trigger = sys.argv[1] if len(sys.argv) > 1 else "manual"
+    run_all(trigger=trigger)