diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d8906ad..e736362 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,12 +81,15 @@ jobs: needs: [test, lint] steps: - name: Clear Space - if: github.event_name == 'pull_request' + # Note: Manual cleanup sufficient for CPU images (python:3.12-slim base ~150MB) + # Only CUDA images need aggressive cleanup due to their ~10-15GB size run: | rm -rf /usr/share/dotnet rm -rf /opt/ghc rm -rf "/usr/local/share/boost" rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -132,13 +135,20 @@ jobs: runs-on: ubuntu-latest needs: [test, lint] steps: - - name: Clear Space - if: github.event_name == 'pull_request' + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Additional cleanup and report run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -179,13 +189,20 @@ jobs: runs-on: ubuntu-latest needs: [test, lint] steps: - - name: Clear Space - if: github.event_name == 'pull_request' + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Additional cleanup and report run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -247,12 +264,20 @@ jobs: needs: [test, lint, docker-test, release] if: github.ref == 'refs/heads/main' && github.event_name == 'push' && !needs.release.outputs.release_created steps: - - name: Clear Space + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Additional cleanup and report run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -306,6 +331,8 @@ jobs: rm -rf /opt/ghc rm -rf "/usr/local/share/boost" rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -353,12 +380,20 @@ jobs: needs: [test, lint, docker-test, docker-test-lb, release] if: github.ref == 'refs/heads/main' && github.event_name == 'push' && !needs.release.outputs.release_created steps: - - name: Clear Space + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Additional cleanup and report run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -406,12 +441,20 @@ jobs: needs: [test, lint, docker-test, docker-test-lb-cpu, release] if: github.ref == 'refs/heads/main' && github.event_name == 'push' && !needs.release.outputs.release_created steps: - - name: Clear Space + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Additional cleanup and report run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -459,12 +502,20 @@ jobs: needs: [release] if: needs.release.outputs.release_created steps: - - name: Clear Space + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Additional cleanup and report run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -528,6 +579,8 @@ jobs: rm -rf /opt/ghc rm -rf "/usr/local/share/boost" rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -585,12 +638,20 @@ jobs: needs: [release] if: needs.release.outputs.release_created steps: - - name: Clear Space + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Additional cleanup and report run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 @@ -648,12 +709,20 @@ jobs: needs: [release] if: needs.release.outputs.release_created steps: - - name: Clear Space + - name: Free Disk Space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: true + + - name: Additional cleanup and report run: | - rm -rf /usr/share/dotnet - rm -rf /opt/ghc - rm -rf "/usr/local/share/boost" - rm -rf "$AGENT_TOOLSDIRECTORY" + docker system prune -af + df -h - name: Checkout repository uses: actions/checkout@v4 diff --git a/Dockerfile b/Dockerfile index cb60949..de8c9b8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,8 +17,11 @@ RUN mkdir -p /root/.cache/apt/archives/partial \ && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache # Install system dependencies and uv +# Note: build-essential not pre-installed to reduce image size (400MB savings) +# Automatic detection will install it when needed (no manual action required) +# Advanced: Users can pre-install via system_dependencies=["build-essential"] RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \ - build-essential curl ca-certificates nala git \ + curl ca-certificates nala git \ && curl -LsSf https://astral.sh/uv/install.sh | sh \ && cp ~/.local/bin/uv /usr/local/bin/uv \ && chmod +x /usr/local/bin/uv \ diff --git a/Dockerfile-cpu b/Dockerfile-cpu index 65add8c..37e1081 100644 --- a/Dockerfile-cpu +++ b/Dockerfile-cpu @@ -12,8 +12,11 @@ RUN mkdir -p /root/.cache/apt/archives/partial \ && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache # Install system dependencies and uv +# Note: build-essential not pre-installed to reduce image size (400MB savings) +# Automatic detection will install it when needed (no manual action required) +# Advanced: Users can pre-install via system_dependencies=["build-essential"] RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \ - build-essential curl ca-certificates nala git \ + curl ca-certificates nala git \ && curl -LsSf https://astral.sh/uv/install.sh | sh \ && cp ~/.local/bin/uv /usr/local/bin/uv \ && chmod +x /usr/local/bin/uv \ diff --git a/Dockerfile-lb b/Dockerfile-lb index 40c7999..2cc8952 100644 --- a/Dockerfile-lb +++ b/Dockerfile-lb @@ -17,8 +17,11 @@ RUN mkdir -p /root/.cache/apt/archives/partial \ && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache # Install system dependencies and uv +# Note: build-essential not pre-installed to reduce image size (400MB savings) +# Automatic detection will install it when needed (no manual action required) +# Advanced: Users can pre-install via system_dependencies=["build-essential"] RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \ - build-essential curl ca-certificates nala git \ + curl ca-certificates nala git \ && curl -LsSf https://astral.sh/uv/install.sh | sh \ && cp ~/.local/bin/uv /usr/local/bin/uv \ && chmod +x /usr/local/bin/uv \ diff --git a/Dockerfile-lb-cpu b/Dockerfile-lb-cpu index af6ea3d..f39f321 100644 --- a/Dockerfile-lb-cpu +++ b/Dockerfile-lb-cpu @@ -12,8 +12,11 @@ RUN mkdir -p /root/.cache/apt/archives/partial \ && echo 'Dir::Cache "/root/.cache/apt";' > /etc/apt/apt.conf.d/01cache # Install system dependencies and uv +# Note: build-essential not pre-installed to reduce image size (400MB savings) +# Automatic detection will install it when needed (no manual action required) +# Advanced: Users can pre-install via system_dependencies=["build-essential"] RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get install -y --no-install-recommends \ - build-essential curl ca-certificates nala git \ + curl ca-certificates nala git \ && curl -LsSf https://astral.sh/uv/install.sh | sh \ && cp ~/.local/bin/uv /usr/local/bin/uv \ && chmod +x /usr/local/bin/uv \ diff --git a/src/dependency_installer.py b/src/dependency_installer.py index 85c018d..cf23d17 100644 --- a/src/dependency_installer.py +++ b/src/dependency_installer.py @@ -21,7 +21,8 @@ def install_dependencies( self, packages: List[str], accelerate_downloads: bool = True ) -> FunctionResponse: """ - Install Python packages using uv or regular pip + Install Python packages using uv or regular pip. + Automatically installs build-essential if compilation is required. Args: packages: List of package names or package specifications @@ -48,13 +49,51 @@ def install_dependencies( operation_name = f"Installing Python packages ({'accelerated' if accelerate_downloads else 'standard'})" try: - return run_logged_subprocess( + result = run_logged_subprocess( command=command, logger=self.logger, operation_name=operation_name, timeout=300, env=os.environ.copy(), ) + + # Check if installation failed due to missing compiler + if not result.success and self._needs_compilation(result): + self.logger.info( + "Package compilation required but build tools missing. " + "Auto-installing build-essential..." + ) + + # Install build-essential + build_result = self.install_system_dependencies( + ["build-essential"], accelerate_downloads + ) + + if not build_result.success: + return FunctionResponse( + success=False, + error=( + f"Failed to install build tools: {build_result.error}\n\n" + "Troubleshooting:\n" + "1. Try manually specifying: system_dependencies=['build-essential']\n" + "2. Check system package repositories are accessible\n" + "3. Verify sufficient disk space for build tools (~400MB)" + ), + stdout=result.stdout, + ) + + # Retry package installation with fresh timeout budget + self.logger.info("Retrying package installation with build tools...") + result = run_logged_subprocess( + command=command, + logger=self.logger, + operation_name=f"{operation_name} (retry with build tools)", + timeout=300, # Fresh 300s timeout for retry (compilation may take longer) + env=os.environ.copy(), + ) + + return result + except Exception as e: return FunctionResponse(success=False, error=str(e)) @@ -117,6 +156,53 @@ def _check_nala_available(self) -> bool: return self._nala_available + def _needs_compilation(self, result: FunctionResponse) -> bool: + """ + Detect if a package installation failure was due to missing compilation tools. + + Args: + result: FunctionResponse from failed pip installation + + Returns: + True if the error indicates missing compiler/build tools, False otherwise + """ + # Common error patterns indicating missing compiler + # Note: Patterns are specific to avoid false positives on package names + error_indicators = [ + "gcc: command not found", + "gcc: error", + "g++: command not found", + "g++: error", + "cc: command not found", + "cc: error", + "c++: command not found", + "c++: error", + "command 'gcc' failed", + "command 'g++' failed", + "command 'cc' failed", + "command 'c++' failed", + "unable to execute 'gcc'", + "unable to execute 'g++'", + "unable to execute 'cc'", + "unable to execute 'c++'", + "error: command 'gcc'", + "error: command 'cc'", + "no such file or directory: 'gcc'", + "no such file or directory: 'cc'", + "no such file or directory: 'g++'", + "no such file or directory: 'c++'", + "_distutils_hack", + "distutils.errors.compileerror", + "distutils.errors.distutilsexecerror", + ] + + error_text = (result.error or "") + (result.stdout or "") + error_text_lower = error_text.lower() + + return any( + indicator.lower() in error_text_lower for indicator in error_indicators + ) + def _is_docker_environment(self) -> bool: """ Detect if we're running in a Docker container. diff --git a/tests/unit/test_dependency_installer.py b/tests/unit/test_dependency_installer.py index 5f132d4..a8181a9 100644 --- a/tests/unit/test_dependency_installer.py +++ b/tests/unit/test_dependency_installer.py @@ -295,3 +295,204 @@ def test_install_dependencies_timeout(self, mock_subprocess): assert result.success is False assert "timed out after 300 seconds" in result.error + + +class TestCompilationAutoRetry: + """Test automatic build-essential installation when compilation needed.""" + + def setup_method(self): + """Setup for each test method.""" + self.installer = DependencyInstaller() + + def test_needs_compilation_gcc_not_found(self): + """Test detection of missing gcc compiler.""" + result = FunctionResponse( + success=False, + error="error: command 'gcc' failed: No such file or directory", + ) + + assert self.installer._needs_compilation(result) is True + + def test_needs_compilation_unable_to_execute_gcc(self): + """Test detection of unable to execute gcc.""" + result = FunctionResponse( + success=False, + error="unable to execute 'gcc': No such file or directory", + ) + + assert self.installer._needs_compilation(result) is True + + def test_needs_compilation_distutils_error(self): + """Test detection of distutils compilation errors.""" + result = FunctionResponse( + success=False, + error="distutils.errors.CompileError: command 'gcc' failed", + ) + + assert self.installer._needs_compilation(result) is True + + def test_needs_compilation_cc_command_failed(self): + """Test detection of cc command failure.""" + result = FunctionResponse( + success=False, stdout="error: command 'cc' failed with exit code 1" + ) + + assert self.installer._needs_compilation(result) is True + + def test_needs_compilation_gxx_missing(self): + """Test detection of missing g++ compiler.""" + result = FunctionResponse( + success=False, error="unable to execute 'g++': No such file or directory" + ) + + assert self.installer._needs_compilation(result) is True + + def test_needs_compilation_false_for_unrelated_error(self): + """Test that unrelated errors don't trigger compilation detection.""" + result = FunctionResponse( + success=False, error="Network error: Could not find package" + ) + + assert self.installer._needs_compilation(result) is False + + def test_needs_compilation_false_for_success(self): + """Test that successful installations don't trigger compilation detection.""" + result = FunctionResponse(success=True, stdout="Successfully installed") + + assert self.installer._needs_compilation(result) is False + + @patch("platform.system") + @patch("dependency_installer.run_logged_subprocess") + def test_auto_retry_installs_build_essential_on_gcc_error( + self, mock_subprocess, mock_platform + ): + """Test auto-retry automatically installs build-essential when gcc missing.""" + mock_platform.return_value = "Linux" + + # First call: pip install fails with gcc error + # Second call: nala check (not available) + # Third call: apt-get update + # Fourth call: apt-get install build-essential + # Fifth call: pip install retry (succeeds) + mock_subprocess.side_effect = [ + FunctionResponse( + success=False, error="error: command 'gcc' failed: No such file" + ), + FunctionResponse(success=False), # nala not available + FunctionResponse(success=True, stdout="Updated"), # apt-get update + FunctionResponse( + success=True, stdout="Installed build-essential" + ), # apt-get install + FunctionResponse(success=True, stdout="Successfully installed package"), + ] + + result = self.installer.install_dependencies(["some-package-needing-gcc"]) + + assert result.success is True + assert "Successfully installed package" in result.stdout + assert mock_subprocess.call_count == 5 + + @patch("dependency_installer.run_logged_subprocess") + def test_auto_retry_no_retry_for_non_compilation_errors(self, mock_subprocess): + """Test that non-compilation errors don't trigger auto-retry.""" + # Pip install fails with network error (not compilation related) + mock_subprocess.return_value = FunctionResponse( + success=False, error="Network error: Could not fetch package" + ) + + result = self.installer.install_dependencies(["some-package"]) + + assert result.success is False + assert "Network error" in result.error + # Should only be called once (no retry) + assert mock_subprocess.call_count == 1 + + @patch("platform.system") + @patch("dependency_installer.run_logged_subprocess") + def test_auto_retry_fails_if_build_essential_install_fails( + self, mock_subprocess, mock_platform + ): + """Test that if build-essential installation fails, the error is returned.""" + mock_platform.return_value = "Linux" + + # First call: pip install fails with gcc error + # Second call: nala check (not available) + # Third call: apt-get update (fails) + mock_subprocess.side_effect = [ + FunctionResponse( + success=False, error="error: command 'gcc' failed: No such file" + ), + FunctionResponse(success=False), # nala not available + FunctionResponse(success=False, error="apt-get update failed"), + ] + + result = self.installer.install_dependencies(["some-package-needing-gcc"]) + + assert result.success is False + assert "Failed to install build tools" in result.error + + @patch("platform.system") + @patch("dependency_installer.run_logged_subprocess") + def test_auto_retry_with_nala_acceleration(self, mock_subprocess, mock_platform): + """Test auto-retry uses nala when available for build-essential installation.""" + mock_platform.return_value = "Linux" + + # First call: pip install fails with gcc error + # Second call: nala check (available) + # Third call: nala update + # Fourth call: nala install build-essential + # Fifth call: pip install retry (succeeds) + mock_subprocess.side_effect = [ + FunctionResponse( + success=False, error="error: command 'gcc' failed: No such file" + ), + FunctionResponse(success=True, stdout="/usr/bin/nala"), # nala available + FunctionResponse(success=True, stdout="Updated with nala"), + FunctionResponse( + success=True, stdout="Installed build-essential with nala" + ), + FunctionResponse(success=True, stdout="Successfully installed package"), + ] + + result = self.installer.install_dependencies(["some-package-needing-gcc"]) + + assert result.success is True + assert "Successfully installed package" in result.stdout + assert mock_subprocess.call_count == 5 + + @patch("platform.system") + @patch("dependency_installer.run_logged_subprocess") + def test_auto_retry_succeeds_with_warnings_no_infinite_loop( + self, mock_subprocess, mock_platform + ): + """Test that warnings mentioning gcc in retry output don't trigger another retry.""" + mock_platform.return_value = "Linux" + + # First call: pip install fails with gcc error + # Second call: nala check (not available) + # Third call: apt-get update + # Fourth call: apt-get install build-essential + # Fifth call: pip install retry (succeeds but has warning mentioning gcc) + mock_subprocess.side_effect = [ + FunctionResponse( + success=False, + error="error: command 'gcc' failed: No such file or directory", + ), + FunctionResponse(success=False), # nala not available + FunctionResponse(success=True, stdout="Updated"), # apt-get update + FunctionResponse( + success=True, stdout="Installed build-essential" + ), # apt-get install + FunctionResponse( + success=True, + stdout="Successfully installed package\nWarning: gcc was used for compilation", + ), + ] + + result = self.installer.install_dependencies(["some-package-needing-gcc"]) + + assert result.success is True + assert "Successfully installed package" in result.stdout + assert "Warning: gcc was used" in result.stdout + # Should only be called 5 times (no infinite retry loop) + assert mock_subprocess.call_count == 5