diff --git a/CLAUDE.md b/CLAUDE.md index cbeb430..8669495 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -17,7 +17,7 @@ SDLC Code Scanner is a Python-based security assessment utility for AWS infrastr **Repository Detection** - `repo_detector.py`: Automatically identifies IaC frameworks and languages in a repository -- Detects: Terraform (.tf), CloudFormation (.yaml/.json/.template), Python (.py), npm (package.json) +- Detects: Terraform (.tf), CloudFormation (.yaml/.json/.template), Python (.py), npm (package.json), Dockerfiles - Returns list of applicable scanners to run for that repository **Report Aggregation Pipeline** @@ -45,6 +45,9 @@ SDLC Code Scanner is a Python-based security assessment utility for AWS infrastr **Secrets Detection** - `secrets_scanner.py`: Gitleaks +**Container Security** +- `container_scanner.py`: Trivy image vulnerability scanning (builds and scans Docker images) + ## Development Commands ### Docker Operations diff --git a/LICENSE b/LICENSE index 223467b..1500044 100644 --- a/LICENSE +++ b/LICENSE @@ -131,6 +131,6 @@ of your licenses. --- -Required Notice: Copyright (c) 2025 Crofton Cloud (https://crofton.cloud) +Required Notice: Copyright (c) 2026 Crofton Cloud (https://crofton.cloud) For licensing inquiries, contact: licensing@crofton.cloud diff --git a/config/config.yaml b/config/config.yaml index 7521330..1316896 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -120,6 +120,21 @@ tools: gitleaks: - "generic-api-key" # High false positive rate + # Container Image Scanning + container: + enabled: true + build_images: true # Build images from Dockerfiles before scanning + cleanup_images: true # Remove built images after scanning + image_name_prefix: "scan-" # Prefix for built image names + severities: "UNKNOWN,LOW,MEDIUM,HIGH,CRITICAL" # Severity levels to report + ignore_unfixed: false # Skip vulnerabilities without fixes + build_timeout: 600 # Timeout for docker build (seconds) + scan_timeout: 300 # Timeout for trivy scan (seconds) + # images: [] # Optional: scan specific pre-built images instead of building + # Rule exclusions/exceptions + exclude_rules: + trivy: [] # Example: ["CVE-2021-44228"] + # Tool-Specific Configuration tool_config: checkov: diff --git a/src/config_loader.py b/src/config_loader.py index ca8e4e9..e0224da 100644 --- a/src/config_loader.py +++ b/src/config_loader.py @@ -79,6 +79,16 @@ def _get_default_config(self) -> Dict[str, Any]: "enabled": True, "gitleaks": True, }, + "container": { + "enabled": True, + "build_images": True, + "cleanup_images": True, + "image_name_prefix": "scan-", + "severities": "UNKNOWN,LOW,MEDIUM,HIGH,CRITICAL", + "ignore_unfixed": False, + "build_timeout": 600, + "scan_timeout": 300, + }, }, "severity": { "fail_on": "HIGH", diff --git a/src/main.py b/src/main.py index 702d792..95db26d 100644 --- a/src/main.py +++ b/src/main.py @@ -20,6 +20,7 @@ from src.scanners.secrets_scanner import SecretsScanner from src.scanners.python_scanner import PythonScanner from src.scanners.npm_scanner import NPMScanner +from src.scanners.container_scanner import ContainerScanner from src.formatters.json_formatter import JSONFormatter from src.formatters.html_formatter import HTMLFormatter from src.formatters.markdown_formatter import MarkdownFormatter @@ -193,6 +194,8 @@ def scan_local( # Always run NPM scanner if enabled - it has IaC scanning capability via Snyk if config.get("tools", {}).get("npm", {}).get("enabled", True): scanners.append(NPMScanner(config, logger)) + if "container" in applicable_scanners: + scanners.append(ContainerScanner(config, logger)) if "secrets" in applicable_scanners: scanners.append(SecretsScanner(config, logger)) @@ -288,6 +291,8 @@ def list_tools(ctx): click.echo(" - Bandit (code security)") click.echo(" - Safety (dependency vulnerabilities)") click.echo(" - Pylint (code quality)") + click.echo("\nContainer:") + click.echo(" - Trivy (image vulnerability scanning)") click.echo("\nSecrets Detection:") click.echo(" - Gitleaks") diff --git a/src/repo_detector.py b/src/repo_detector.py index 0425f6c..29c1b70 100644 --- a/src/repo_detector.py +++ b/src/repo_detector.py @@ -38,6 +38,7 @@ def detect_all(self) -> Dict[str, bool]: "python": self.has_python(), "typescript": self.has_typescript(), "javascript": self.has_javascript(), + "container": self.has_dockerfile(), } def has_terraform(self) -> bool: @@ -108,6 +109,16 @@ def has_javascript(self) -> bool: """Check if repository contains JavaScript files""" return self._has_files_with_extension([".js", ".jsx"]) + def has_dockerfile(self) -> bool: + """Check if repository contains Dockerfiles""" + dockerfile_names = ["Dockerfile", "dockerfile", "Containerfile"] + + for root, _, files in os.walk(self.repo_path): + for file in files: + if file in dockerfile_names or file.startswith("Dockerfile."): + return True + return False + def get_applicable_scanners(self) -> List[str]: """ Get list of scanner types applicable to this repository @@ -133,6 +144,9 @@ def get_applicable_scanners(self) -> List[str]: if detections["python"]: scanners.append("python") + if detections["container"]: + scanners.append("container") + # Always run secrets scanner scanners.append("secrets") diff --git a/src/scanners/container_scanner.py b/src/scanners/container_scanner.py new file mode 100644 index 0000000..8f008c3 --- /dev/null +++ b/src/scanners/container_scanner.py @@ -0,0 +1,266 @@ +""" +Container Scanner +Scans Docker container images for vulnerabilities using Trivy +""" + +import json +import os +import logging +from typing import List, Dict, Any, Optional + +from src.scanner_base import ScannerBase, Finding, Severity + + +class ContainerScanner(ScannerBase): + """Scanner for Docker container images using Trivy""" + + def __init__(self, config: Dict[str, Any], logger: Optional[logging.Logger] = None): + super().__init__(config, logger) + self.container_config = config.get("tools", {}).get("container", {}) + + def is_applicable(self, path: str) -> bool: + """Check if repository contains Dockerfiles""" + return self._has_dockerfile(path) + + def is_enabled(self) -> bool: + """Check if container scanning is enabled""" + return self.container_config.get("enabled", True) + + def _has_dockerfile(self, path: str) -> bool: + """Check if repository contains a Dockerfile""" + dockerfile_names = ["Dockerfile", "dockerfile", "Containerfile"] + + for root, _, files in os.walk(path): + # Skip excluded paths + if self.is_path_excluded(root): + continue + for file in files: + if file in dockerfile_names or file.startswith("Dockerfile."): + return True + return False + + def _find_dockerfiles(self, path: str) -> List[str]: + """Find all Dockerfiles in the repository""" + dockerfiles = [] + dockerfile_names = ["Dockerfile", "dockerfile", "Containerfile"] + + for root, _, files in os.walk(path): + # Skip excluded paths + if self.is_path_excluded(root): + continue + for file in files: + if file in dockerfile_names or file.startswith("Dockerfile."): + dockerfiles.append(os.path.join(root, file)) + + return dockerfiles + + def run(self, path: str) -> List[Finding]: + """Run container image vulnerability scanning""" + self.findings = [] + + if not self.is_enabled(): + self.logger.info("Container scanning is disabled") + return self.findings + + # Check for trivy availability + trivy_check, _, _ = self.execute_command(["trivy", "--version"]) + if trivy_check != 0: + self.logger.warning("Trivy not found, skipping container scanning") + return self.findings + + # Find Dockerfiles + dockerfiles = self._find_dockerfiles(path) + if not dockerfiles: + self.logger.info("No Dockerfiles found") + return self.findings + + self.logger.info("Found %d Dockerfile(s)", len(dockerfiles)) + + # Check if we should build and scan images + build_images = self.container_config.get("build_images", True) + image_name_prefix = self.container_config.get("image_name_prefix", "scan-") + + if build_images: + # Check for docker availability when building images + docker_check, _, _ = self.execute_command(["docker", "--version"]) + if docker_check != 0: + self.logger.warning( + "Docker not found, skipping container image building" + ) + return self.findings + + for dockerfile in dockerfiles: + self.findings.extend( + self._build_and_scan(path, dockerfile, image_name_prefix) + ) + else: + # Just scan existing images if specified in config + images = self.container_config.get("images", []) + for image in images: + self.findings.extend(self._scan_image(image)) + + return self.findings + + def _build_and_scan( + self, repo_path: str, dockerfile: str, prefix: str + ) -> List[Finding]: + """Build Docker image and scan it""" + findings = [] + + # Determine build context and image name + dockerfile_dir = os.path.dirname(dockerfile) + + # Use the directory name for the image tag + if dockerfile_dir == repo_path or dockerfile_dir == ".": + image_tag = f"{prefix}root:latest" + build_context = repo_path + else: + relative_dir = os.path.relpath(dockerfile_dir, repo_path) + # Sanitize for Docker image naming: lowercase, alphanumeric and hyphens only + safe_name = relative_dir.replace("/", "-").replace("\\", "-") + safe_name = safe_name.lower() + safe_name = "".join( + c if c.isalnum() or c == "-" else "-" for c in safe_name + ) + safe_name = safe_name.strip("-") # Remove leading/trailing hyphens + image_tag = f"{prefix}{safe_name}:latest" + build_context = dockerfile_dir + + self.logger.info("Building image %s from %s", image_tag, dockerfile) + + # Build the image + build_cmd = [ + "docker", + "build", + "-t", + image_tag, + "-f", + dockerfile, + build_context, + ] + + return_code, stdout, stderr = self.execute_command( + build_cmd, + cwd=repo_path, + timeout=self.container_config.get("build_timeout", 600), + ) + + if return_code != 0: + self.logger.error("Failed to build image from %s: %s", dockerfile, stderr) + # Add a finding for build failure + findings.append( + Finding( + tool="trivy-container", + severity=Severity.HIGH, + rule_id="BUILD_FAILURE", + title="Docker image build failed", + description=f"Failed to build Docker image from {dockerfile}: {stderr[:500]}", + file_path=dockerfile, + ) + ) + return findings + + # Scan the built image + findings.extend(self._scan_image(image_tag, dockerfile)) + + # Clean up the image after scanning (optional) + if self.container_config.get("cleanup_images", True): + self.execute_command(["docker", "rmi", "-f", image_tag]) + + return findings + + def _scan_image( + self, image: str, dockerfile: Optional[str] = None + ) -> List[Finding]: + """Scan a Docker image with Trivy""" + findings = [] + self.logger.info("Scanning image: %s", image) + + # Build trivy command + cmd = ["trivy", "image", "--format", "json", "--exit-code", "0", "--quiet"] + + # Add severity filter if configured + severities = self.container_config.get( + "severities", "UNKNOWN,LOW,MEDIUM,HIGH,CRITICAL" + ) + cmd.extend(["--severity", severities]) + + # Skip unfixed vulnerabilities if configured + if self.container_config.get("ignore_unfixed", False): + cmd.append("--ignore-unfixed") + + cmd.append(image) + + return_code, stdout, stderr = self.execute_command( + cmd, timeout=self.container_config.get("scan_timeout", 300) + ) + + if return_code != 0 and not stdout: + self.logger.error("Trivy scan failed for %s: %s", image, stderr) + return findings + + try: + if stdout: + result = json.loads(stdout) + findings.extend(self._parse_trivy_output(result, image, dockerfile)) + except json.JSONDecodeError as e: + self.logger.error("Failed to parse Trivy output: %s", e) + + return self.filter_excluded_findings(findings, "trivy", "container") + + def _parse_trivy_output( + self, result: Dict, image: str, dockerfile: Optional[str] = None + ) -> List[Finding]: + """Parse Trivy JSON output for container vulnerabilities""" + findings = [] + + for res in result.get("Results", []): + target = res.get("Target", image) + target_type = res.get("Type", "") + + # Process vulnerabilities + for vuln in res.get("Vulnerabilities", []): + vuln_id = vuln.get("VulnerabilityID", "UNKNOWN") + pkg_name = vuln.get("PkgName", "") + installed_version = vuln.get("InstalledVersion", "") + fixed_version = vuln.get("FixedVersion", "") + + title = f"{vuln_id}: {pkg_name}" + if installed_version: + title += f" ({installed_version})" + + description = vuln.get("Description", "") + if fixed_version: + remediation = f"Upgrade {pkg_name} to version {fixed_version}" + else: + remediation = "No fix available yet" + + findings.append( + Finding( + tool="trivy-container", + severity=self.severity_from_string( + vuln.get("Severity", "MEDIUM") + ), + rule_id=vuln_id, + title=title, + description=description, + file_path=dockerfile, + resource=f"{target} ({target_type})" if target_type else target, + remediation=remediation, + metadata={ + "image": image, + "package": pkg_name, + "installed_version": installed_version, + "fixed_version": fixed_version, + "references": vuln.get("References", []), + "cvss": vuln.get("CVSS", {}), + }, + ) + ) + + return findings + + def parse_output(self, output: str, stderr: str, return_code: int) -> List[Finding]: + """Parse Trivy output - implemented for interface compliance""" + # Main parsing is done in _parse_trivy_output + return []