diff --git a/.github/workflows/container-build-push.yaml b/.github/workflows/container-build-push.yaml index 701717b1..dd253e7f 100644 --- a/.github/workflows/container-build-push.yaml +++ b/.github/workflows/container-build-push.yaml @@ -1,3 +1,4 @@ +--- name: Container Build and Push on: @@ -8,15 +9,145 @@ on: - v* pull_request: +defaults: + run: + shell: bash + permissions: contents: read packages: write - # This is used to complete the identity challenge - # with sigstore/fulcio. + # This is used to complete the identity challenge with sigstore/fulcio. id-token: write +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }} + jobs: build-push: - uses: darbiadev/.github/.github/workflows/docker-build-push.yaml@142663fca1c211af6a7dccf3a57cac48cfc3c017 # v13.0.5 - with: - file-name: Dockerfile + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + + - name: Install cosign + uses: sigstore/cosign-installer@4959ce089c160fddf62f7b42464195ba1a56d382 # v3.6.0 + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1 + with: + platforms: ${{ matrix.platform }} + + - name: Log in to container registry (${{ env.REGISTRY }}) + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract Docker metadata + id: docker_meta + uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=edge + # FIXME: Remove explicit `latest` tag once we start tagging releases + type=raw,value=latest,enable={{is_default_branch}} + type=ref,event=tag + type=sha,format=long + + - name: Build and push Docker image + id: docker_build_push + uses: docker/build-push-action@4f58ea79222b3b9dc2c8bbdd6debcef730109a75 # v6.9.0 + with: + builder: ${{ steps.buildx.outputs.name }} + build-args: | + git_sha=${{ github.sha }} + cache-from: type=gha,scope=${{ matrix.platform }} + cache-to: type=gha,mode=max,scope=${{ matrix.platform }} + labels: ${{ steps.docker_meta.outputs.labels }} + platforms: ${{ matrix.platform }} + push: ${{ github.ref == 'refs/heads/main' || startswith(github.event.ref, 'refs/tags/v') }} + tags: ${{ steps.docker_meta.outputs.tags }} + + # Sign the resulting Docker image digest. + # This will only write to the public Rekor transparency log when the Docker repository is public to avoid leaking + # data. If you would like to publish transparency data even for private images, pass --force to cosign below. + # https://github.com/sigstore/cosign + - name: Sign the published Docker image + if: ${{ github.ref == 'refs/heads/main' || startswith(github.event.ref, 'refs/tags/v') }} + # This step uses the identity token to provision an ephemeral certificate against the sigstore community Fulcio + # instance. + run: cosign sign --yes ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@${{ steps.docker_build_push.outputs.digest }} + + - name: Export digest + if: ${{ github.ref == 'refs/heads/main' || startswith(github.event.ref, 'refs/tags/v') }} + run: | + mkdir -p /tmp/digests + digest='${{ steps.docker_build_push.outputs.digest }}' + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + if: ${{ github.ref == 'refs/heads/main' || startswith(github.event.ref, 'refs/tags/v') }} + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + with: + if-no-files-found: error + name: digests + path: /tmp/digests/* + retention-days: 1 + + merge: + if: ${{ github.ref == 'refs/heads/main' || startswith(github.event.ref, 'refs/tags/v') }} + needs: + - build-push + + runs-on: ubuntu-24.04 + steps: + - name: Download digests + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + name: digests + path: /tmp/digests + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1 + + - name: Log in to container registry (${{ env.REGISTRY }}) + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract Docker metadata + id: docker_meta + uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=edge + # FIXME: Remove explicit `latest` tag once we start tagging releases + type=raw,value=latest,enable={{is_default_branch}} + type=ref,event=tag + type=sha,format=long + + - name: Create manifest list and push + working-directory: /tmp/digests + run: > + docker buildx imagetools create \ + $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${DOCKER_METADATA_OUTPUT_JSON}") \ + $(printf ' ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: >- + docker buildx imagetools inspect \ + '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.docker_meta.outputs.version }}' diff --git a/.github/workflows/generate-migration-sql.yaml b/.github/workflows/generate-migration-sql.yaml index 1ba5012c..8b8e1b96 100644 --- a/.github/workflows/generate-migration-sql.yaml +++ b/.github/workflows/generate-migration-sql.yaml @@ -22,7 +22,7 @@ jobs: runs-on: ubuntu-24.04 steps: - name: Checkout (base) - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: ref: ${{ github.event.pull_request.base.sha }} @@ -50,7 +50,7 @@ jobs: echo "BASE_MIGRATION_REVISION=${base_head}" >>"${GITHUB_ENV}" - name: Checkout (HEAD) - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 with: clean: false ref: ${{ github.event.pull_request.head.sha }} diff --git a/.github/workflows/lint-test.yaml b/.github/workflows/lint-test.yaml index 92650a57..4e021dac 100644 --- a/.github/workflows/lint-test.yaml +++ b/.github/workflows/lint-test.yaml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: "Checkout repository" - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: "Setup PDM" uses: pdm-project/setup-pdm@568ddd69406b30de1774ec0044b73ae06e716aa4 # v4 diff --git a/.github/workflows/publish-docs.yaml b/.github/workflows/publish-docs.yaml index 330dc5b8..c95b690e 100644 --- a/.github/workflows/publish-docs.yaml +++ b/.github/workflows/publish-docs.yaml @@ -21,7 +21,7 @@ jobs: steps: - name: "Checkout repository" - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: "Setup PDM" uses: pdm-project/setup-pdm@568ddd69406b30de1774ec0044b73ae06e716aa4 # v4 diff --git a/.github/workflows/sentry-release.yaml b/.github/workflows/sentry-release.yaml index 024199cb..efe4116b 100644 --- a/.github/workflows/sentry-release.yaml +++ b/.github/workflows/sentry-release.yaml @@ -11,7 +11,7 @@ jobs: steps: - name: "Checkout repository" - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 - name: "Create Sentry release" uses: getsentry/action-release@e769183448303de84c5a06aaaddf9da7be26d6c7 # v1.7.0 diff --git a/Dockerfile b/Dockerfile index 90463699..081915a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12-slim@sha256:740d94a19218c8dd584b92f804b1158f85b0d241e5215ea26ed2dcade2b9d138 as builder +FROM python:3.12-slim@sha256:af4e85f1cac90dd3771e47292ea7c8a9830abfabbe4faa5c53f158854c2e819d as builder RUN pip install -U pip setuptools wheel RUN pip install pdm @@ -22,7 +22,7 @@ ENV GIT_SHA="testing" CMD ["pdm", "run", "coverage"] -FROM python:3.12-slim@sha256:740d94a19218c8dd584b92f804b1158f85b0d241e5215ea26ed2dcade2b9d138 as prod +FROM python:3.12-slim@sha256:af4e85f1cac90dd3771e47292ea7c8a9830abfabbe4faa5c53f158854c2e819d as prod # Define Git SHA build argument for sentry ARG git_sha="development" diff --git a/alembic/versions/587c186d91ee_better_match_information.py b/alembic/versions/587c186d91ee_better_match_information.py deleted file mode 100644 index cacffaf8..00000000 --- a/alembic/versions/587c186d91ee_better_match_information.py +++ /dev/null @@ -1,29 +0,0 @@ -"""better-match-information - -Revision ID: 587c186d91ee -Revises: 6991bcb18f89 -Create Date: 2024-07-27 19:51:33.408128 - -""" - -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = "587c186d91ee" -down_revision = "6991bcb18f89" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.add_column("scans", sa.Column("files", postgresql.JSONB(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column("scans", "files") - # ### end Alembic commands ### diff --git a/alembic/versions/a62a93704798_add_distributions.py b/alembic/versions/a62a93704798_add_distributions.py deleted file mode 100644 index ea7407b3..00000000 --- a/alembic/versions/a62a93704798_add_distributions.py +++ /dev/null @@ -1,23 +0,0 @@ -"""add distributions - -Revision ID: a62a93704798 -Revises: 587c186d91ee -Create Date: 2024-08-11 08:12:42.354151 - -""" - -from alembic import op - -# revision identifiers, used by Alembic. -revision = "a62a93704798" -down_revision = "587c186d91ee" -branch_labels = None -depends_on = None - - -def upgrade() -> None: - op.alter_column("scans", "files", new_column_name="distributions") - - -def downgrade() -> None: - op.alter_column("scans", "distributions", new_column_name="files") diff --git a/docs/source/database_schema.rst b/docs/source/database_schema.rst index faf3828b..9d49e73c 100644 --- a/docs/source/database_schema.rst +++ b/docs/source/database_schema.rst @@ -44,8 +44,7 @@ Database Schema pending_by text, finished_by text, commit_hash text, - fail_reason text, - files jsonb + fail_reason text ); ALTER TABLE ONLY public.download_urls diff --git a/logging/development.toml b/logging/development.toml index a576bc1f..f2451ef4 100644 --- a/logging/development.toml +++ b/logging/development.toml @@ -11,9 +11,9 @@ propagate = true [loggers."sqlalchemy.engine"] handlers = ["default"] level = "INFO" -propagate = true +propagate = false [loggers."sqlalchemy.pool"] handlers = ["default"] level = "DEBUG" -propagate = true +propagate = false diff --git a/pdm.lock b/pdm.lock index c96e0dd8..c4389830 100644 --- a/pdm.lock +++ b/pdm.lock @@ -7,6 +7,7 @@ strategy = ["inherit_metadata"] lock_version = "4.5.0" content_hash = "sha256:422d739ccd9902dc7ffbf11845f29004da4a9171ed6539c79ccc847c07c81773" + [[metadata.targets]] requires_python = ">=3.12,<3.13" @@ -651,6 +652,30 @@ files = [ {file = "pre_commit-3.7.1.tar.gz", hash = "sha256:8ca3ad567bc78a4972a3f1a477e94a79d4597e8140a6e0b651c5e33899c3654a"}, ] +[[package]] +name = "prometheus-client" +version = "0.21.0" +requires_python = ">=3.8" +summary = "Python client for the Prometheus monitoring system." +files = [ + {file = "prometheus_client-0.21.0-py3-none-any.whl", hash = "sha256:4fa6b4dd0ac16d58bb587c04b1caae65b8c5043e85f778f42f5f632f6af2e166"}, + {file = "prometheus_client-0.21.0.tar.gz", hash = "sha256:96c83c606b71ff2b0a433c98889d275f51ffec6c5e267de37c7a2b5c9aa9233e"}, +] + +[[package]] +name = "prometheus-fastapi-instrumentator" +version = "7.0.0" +requires_python = ">=3.8.1,<4.0.0" +summary = "Instrument your FastAPI with Prometheus metrics." +dependencies = [ + "prometheus-client<1.0.0,>=0.8.0", + "starlette<1.0.0,>=0.30.0", +] +files = [ + {file = "prometheus_fastapi_instrumentator-7.0.0-py3-none-any.whl", hash = "sha256:96030c43c776ee938a3dae58485ec24caed7e05bfc60fe067161e0d5b5757052"}, + {file = "prometheus_fastapi_instrumentator-7.0.0.tar.gz", hash = "sha256:5ba67c9212719f244ad7942d75ded80693b26331ee5dfc1e7571e4794a9ccbed"}, +] + [[package]] name = "psycopg2-binary" version = "2.9.9" diff --git a/pyproject.toml b/pyproject.toml index 4d10d4a1..8f4dcb49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,8 @@ dependencies = [ "structlog-sentry==2.1.0", "structlog==24.4.0", "uvicorn[standard]==0.30.3", + "prometheus-client>=0.21.0", + "prometheus-fastapi-instrumentator>=7.0.0", ] dynamic = ["version"] @@ -133,4 +135,3 @@ omit = [ [tool.coverage.report] fail_under = 100 -exclude_also = ["if TYPE_CHECKING:"] diff --git a/src/mainframe/dependencies.py b/src/mainframe/dependencies.py index 32eb5ac0..438292f4 100644 --- a/src/mainframe/dependencies.py +++ b/src/mainframe/dependencies.py @@ -18,6 +18,10 @@ def get_pypi_client() -> PyPIServices: return PyPIServices(http_client) +def get_httpx_client(request: Request) -> httpx.Client: + return request.app.state.http_session + + def get_rules(request: Request) -> Rules: return request.app.state.rules diff --git a/src/mainframe/endpoints/package.py b/src/mainframe/endpoints/package.py index 71f7b835..1a6f1554 100644 --- a/src/mainframe/endpoints/package.py +++ b/src/mainframe/endpoints/package.py @@ -26,6 +26,8 @@ QueuePackageResponse, ) +from mainframe.metrics import packages_ingested, packages_in_queue, packages_fail, packages_success + router = APIRouter(tags=["package"]) logger: structlog.stdlib.BoundLogger = structlog.get_logger() @@ -64,6 +66,8 @@ def submit_results( log.error( f"Scan {name}@{version} already in a FINISHED state", error_message=error.detail, tag="already_finished" ) + packages_fail.inc() + packages_in_queue.dec() raise error with session, session.begin(): @@ -80,7 +84,6 @@ def submit_results( scan.score = result.score scan.finished_by = auth.subject scan.commit_hash = result.commit - scan.distributions = result.distributions # These are the rules that already have an entry in the database rules = session.scalars(select(Rule).where(Rule.name.in_(result.rules_matched))).all() @@ -107,6 +110,9 @@ def submit_results( tag="scan_submitted", ) + packages_success.inc() + packages_in_queue.dec() + @router.get( "/package", @@ -234,6 +240,9 @@ def batch_queue_package( session.add(scan) + packages_ingested.inc(len(packages)) + packages_in_queue.inc(len(packages)) + @router.post( "/package", @@ -302,4 +311,7 @@ def queue_package( tag="package_added", ) + packages_ingested.inc() + packages_in_queue.inc() + return QueuePackageResponse(id=str(new_package.scan_id)) diff --git a/src/mainframe/endpoints/report.py b/src/mainframe/endpoints/report.py index 250a2499..4eeed521 100644 --- a/src/mainframe/endpoints/report.py +++ b/src/mainframe/endpoints/report.py @@ -5,13 +5,12 @@ import structlog from fastapi import APIRouter, Depends, HTTPException from fastapi.encoders import jsonable_encoder -from letsbuilda.pypi import PackageNotFoundError, PyPIServices from sqlalchemy import select from sqlalchemy.orm import Session, joinedload from mainframe.constants import mainframe_settings from mainframe.database import get_db -from mainframe.dependencies import get_pypi_client, validate_token +from mainframe.dependencies import get_httpx_client, validate_token from mainframe.json_web_token import AuthenticationData from mainframe.models.orm import Scan from mainframe.models.schemas import ( @@ -22,6 +21,8 @@ ReportPackageBody, ) +from mainframe.metrics import packages_reported + logger: structlog.stdlib.BoundLogger = structlog.get_logger() @@ -138,12 +139,11 @@ def _validate_additional_information(body: ReportPackageBody, scan: Scan): raise error -def _validate_pypi(name: str, version: str, pypi_client: PyPIServices): +def _validate_pypi(name: str, version: str, http_client: httpx.Client): log = logger.bind(package={"name": name, "version": version}) - try: - pypi_client.get_package_metadata(name, version) - except PackageNotFoundError: + response = http_client.get(f"https://pypi.org/project/{name}") + if response.status_code == 404: error = HTTPException(404, detail="Package not found on PyPI") log.error("Package not found on PyPI", tag="package_not_found_pypi") raise error @@ -160,7 +160,7 @@ def report_package( body: ReportPackageBody, session: Annotated[Session, Depends(get_db)], auth: Annotated[AuthenticationData, Depends(validate_token)], - pypi_client: Annotated[PyPIServices, Depends(get_pypi_client)], + httpx_client: Annotated[httpx.Client, Depends(get_httpx_client)], ): """ Report a package to PyPI. @@ -206,7 +206,7 @@ def report_package( # If execution reaches here, we must have found a matching scan in our # database. Check if the package we want to report exists on PyPI. - _validate_pypi(name, version, pypi_client) + _validate_pypi(name, version, httpx_client) rules_matched: list[str] = [rule.name for rule in scan.rules] @@ -220,7 +220,7 @@ def report_package( additional_information=body.additional_information, ) - httpx.post(f"{mainframe_settings.reporter_url}/report/email", json=jsonable_encoder(report)) + httpx_client.post(f"{mainframe_settings.reporter_url}/report/email", json=jsonable_encoder(report)) else: # We previously checked this condition, but the typechecker isn't smart # enough to figure that out @@ -233,7 +233,7 @@ def report_package( extra=dict(yara_rules=rules_matched), ) - httpx.post(f"{mainframe_settings.reporter_url}/report/{name}", json=jsonable_encoder(report)) + httpx_client.post(f"{mainframe_settings.reporter_url}/report/{name}", json=jsonable_encoder(report)) with session.begin(): scan.reported_by = auth.subject @@ -253,3 +253,5 @@ def report_package( }, reported_by=auth.subject, ) + + packages_reported.inc() diff --git a/src/mainframe/endpoints/stats.py b/src/mainframe/endpoints/stats.py deleted file mode 100644 index 6a2755c3..00000000 --- a/src/mainframe/endpoints/stats.py +++ /dev/null @@ -1,52 +0,0 @@ -from datetime import datetime, timedelta, timezone -from typing import Annotated - -from fastapi import APIRouter, Depends -from sqlalchemy import func, select -from sqlalchemy.orm import Session - -from mainframe.database import get_db -from mainframe.dependencies import validate_token -from mainframe.models.orm import Scan, Status -from mainframe.models.schemas import StatsResponse - -router = APIRouter(tags=["stats"]) - - -def _get_package_ingest(session: Session) -> int: - scalar_result = session.scalars( - select(func.count()).select_from(Scan).where(Scan.queued_at > datetime.now(timezone.utc) - timedelta(hours=24)) - ) - return scalar_result.one() - - -def _get_average_scan_time(session: Session) -> float: - scalar_result = session.scalars( - select(func.avg(Scan.finished_at - Scan.pending_at)) - .where(Scan.pending_at.is_not(None)) - .where(Scan.finished_at.is_not(None)) - .where(Scan.queued_at > datetime.now(timezone.utc) - timedelta(hours=24)) - ) - - return scalar_result.one().total_seconds() - - -def _get_failed_packages(session: Session) -> int: - scalar_result = session.scalars( - select(func.count()) - .select_from(Scan) - .where(Scan.status == Status.FAILED) - .where(Scan.queued_at > datetime.now(timezone.utc) - timedelta(hours=24)) - ) - - return scalar_result.one() - - -@router.get("/stats", dependencies=[Depends(validate_token)]) -def get_stats(session: Annotated[Session, Depends(get_db)]) -> StatsResponse: - with session, session.begin(): - return StatsResponse( - ingested=_get_package_ingest(session), - average_scan_time=_get_average_scan_time(session), - failed=_get_failed_packages(session), - ) diff --git a/src/mainframe/metrics.py b/src/mainframe/metrics.py new file mode 100644 index 00000000..54ee6d84 --- /dev/null +++ b/src/mainframe/metrics.py @@ -0,0 +1,14 @@ +from prometheus_client import Counter, Gauge + + +packages_ingested = Counter("packages_ingested", "Total number of packages ingested") + +packages_in_queue = Gauge( + "packages_in_queue", + "Packages that are currently waiting to be scanned. Includes queued and pending packages.", +) + +packages_success = Counter("packages_success", "Number of packages scanned successfully") +packages_fail = Counter("packages_fail", "Number of packages that failed scanning") + +packages_reported = Counter("packages_reported", "Number of packages reported") diff --git a/src/mainframe/models/__init__.py b/src/mainframe/models/__init__.py index a0f2e447..7bb09cc1 100644 --- a/src/mainframe/models/__init__.py +++ b/src/mainframe/models/__init__.py @@ -1,27 +1 @@ """Database models.""" - -from typing import Optional, Any, Type -from pydantic import BaseModel -from sqlalchemy import Dialect, TypeDecorator -from sqlalchemy.dialects.postgresql import JSONB - - -class Pydantic[T: BaseModel](TypeDecorator[T]): - """TypeDecorator to convert between Pydantic models and JSONB.""" - - impl = JSONB - cache_ok = True - - def __init__(self, pydantic_type: Type[T]): - super().__init__() - self.pydantic_type = pydantic_type - - def process_bind_param(self, value: Optional[T], dialect: Dialect) -> dict[str, Any]: - if value: - return value.model_dump() - else: - return {} - - def process_result_value(self, value: Any, dialect: Dialect) -> Optional[T]: - if value: - return self.pydantic_type.model_validate(value) diff --git a/src/mainframe/models/orm.py b/src/mainframe/models/orm.py index a8211bbc..b2c6265f 100644 --- a/src/mainframe/models/orm.py +++ b/src/mainframe/models/orm.py @@ -27,9 +27,6 @@ relationship, ) -from mainframe.models import Pydantic -from mainframe.models.schemas import Distributions - class Base(MappedAsDataclass, DeclarativeBase, kw_only=True): pass @@ -102,8 +99,6 @@ class Scan(Base): commit_hash: Mapped[Optional[str]] = mapped_column(default=None) - distributions: Mapped[Optional[Distributions]] = mapped_column(Pydantic(Distributions), default=None) - Index(None, Scan.status, postgresql_where=or_(Scan.status == Status.QUEUED, Scan.status == Status.PENDING)) diff --git a/src/mainframe/models/schemas.py b/src/mainframe/models/schemas.py index a27a11ff..cad4d420 100644 --- a/src/mainframe/models/schemas.py +++ b/src/mainframe/models/schemas.py @@ -1,62 +1,10 @@ -from __future__ import annotations - import datetime from enum import Enum -from typing import TYPE_CHECKING, Annotated, Any, Optional - -from pydantic import BaseModel, Field, field_serializer, ConfigDict, RootModel - -if TYPE_CHECKING: - from mainframe.models.orm import Scan - -type MetaValue = int | float | bool | str | bytes - - -class Range(BaseModel): - """Represents the inclusive range in the source file that was matched.""" - - start: int - end: int - - -class Match(BaseModel): - """Represents a specific match by a pattern in a rule.""" - - range: Range - data: list[Annotated[int, Field(ge=0, lt=256)]] - - -class PatternMatch(BaseModel): - """Represents the data matched by a pattern inside a rule.""" +from typing import Any, Optional - identifier: str - matches: list[Match] +from pydantic import BaseModel, Field, field_serializer, ConfigDict - -class RuleMatch(BaseModel): - """Represents the matches of a rule on a file""" - - identifier: str - patterns: list[PatternMatch] - metadata: dict[str, MetaValue] - - -class File(BaseModel): - """Represents a file and the rule matches for it.""" - - path: str - matches: list[RuleMatch] - - -Files = list[File] - - -class Distribution(BaseModel): - download_url: str - files: Files - - -Distributions = RootModel[list[Distribution]] +from .orm import Scan class ServerMetadata(BaseModel): @@ -96,8 +44,6 @@ class Package(BaseModel): commit_hash: Optional[str] - distributions: Optional[Distributions] - @classmethod def from_db(cls, scan: Scan): return cls( @@ -118,7 +64,6 @@ def from_db(cls, scan: Scan): finished_at=scan.finished_at, finished_by=scan.finished_by, commit_hash=scan.commit_hash, - distributions=scan.distributions, ) @field_serializer( @@ -187,7 +132,6 @@ class PackageScanResult(PackageSpecifier): score: int = 0 inspector_url: Optional[str] = None rules_matched: list[str] = [] - distributions: Optional[Distributions] = None class PackageScanResultFail(PackageSpecifier): diff --git a/src/mainframe/server.py b/src/mainframe/server.py index bd702549..fd699664 100644 --- a/src/mainframe/server.py +++ b/src/mainframe/server.py @@ -21,6 +21,8 @@ from mainframe.models.schemas import ServerMetadata from mainframe.rules import Rules, fetch_rules +from prometheus_fastapi_instrumentator import Instrumentator + from . import __version__ @@ -73,6 +75,8 @@ async def lifespan(app_: FastAPI): version=__version__, ) +Instrumentator().instrument(app).expose(app) + if GIT_SHA in ("development", "testing"): app.dependency_overrides[validate_token] = validate_token_override diff --git a/tests/test_package.py b/tests/test_package.py index eeeef6f3..ba18a3dc 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -20,18 +20,10 @@ from mainframe.json_web_token import AuthenticationData from mainframe.models.orm import Scan, Status from mainframe.models.schemas import ( - Distribution, - Distributions, - File, - Files, - Match, Package, PackageScanResult, PackageScanResultFail, PackageSpecifier, - PatternMatch, - Range, - RuleMatch, ) from mainframe.rules import Rules @@ -98,32 +90,6 @@ def test_package_lookup_rejects_invalid_combinations( assert e.value.status_code == 400 -def test_package_lookup_files(db_session: Session): - """Test that `lookup_package_info` returns detailed file information.""" - - range_ = Range(start=0, end=4) - match = Match(range=range_, data=[0xDE, 0xAD, 0xBE, 0xEF]) - pattern = PatternMatch(identifier="$pat", matches=[match]) - rule = RuleMatch(identifier="rule1", patterns=[pattern], metadata={"author": "remmy", "score": 5}) - file = File(path="dist1/a/b.py", matches=[rule]) - files = Files([file]) - distros = Distributions([Distribution(download_url="http://example.com", files=files)]) - scan = Scan( - name="abc", - version="1.0.0", - status=Status.FINISHED, - queued_by="remmy", - distributions=distros, - ) - - with db_session.begin(): - db_session.add(scan) - - package = lookup_package_info(db_session, name="abc", version="1.0.0")[0] - - assert package.distributions == distros - - def test_handle_success(db_session: Session, test_data: list[Scan], auth: AuthenticationData, rules_state: Rules): job = get_jobs(db_session, auth, rules_state, batch=1) @@ -132,14 +98,6 @@ def test_handle_success(db_session: Session, test_data: list[Scan], auth: Authen name = job.name version = job.version - range_ = Range(start=0, end=4) - match = Match(range=range_, data=[0xDE, 0xAD, 0xBE, 0xEF]) - pattern = PatternMatch(identifier="$pat", matches=[match]) - rule = RuleMatch(identifier="rule1", patterns=[pattern], metadata={"author": "remmy", "score": 5}) - file = File(path="dist1/a/b.py", matches=[rule]) - files = Files([file]) - distros = Distributions([Distribution(download_url="http://example.com", files=files)]) - body = PackageScanResult( name=job.name, version=job.version, @@ -147,7 +105,6 @@ def test_handle_success(db_session: Session, test_data: list[Scan], auth: Authen score=2, inspector_url="test inspector url", rules_matched=["a", "b", "c"], - distributions=distros, ) submit_results(body, db_session, auth) @@ -160,7 +117,6 @@ def test_handle_success(db_session: Session, test_data: list[Scan], auth: Authen assert record.score == 2 assert record.inspector_url == "test inspector url" assert {rule.name for rule in record.rules} == {"a", "b", "c"} - assert record.distributions == distros else: assert all(scan.status != Status.QUEUED for scan in test_data) diff --git a/tests/test_report.py b/tests/test_report.py index 92619d89..fde95f05 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -7,9 +7,6 @@ import pytest from fastapi import HTTPException from fastapi.encoders import jsonable_encoder -from letsbuilda.pypi import PyPIServices -from letsbuilda.pypi.exceptions import PackageNotFoundError -from pytest import MonkeyPatch from sqlalchemy import select from sqlalchemy.orm import Session, sessionmaker @@ -79,7 +76,6 @@ def test_report( sm: sessionmaker[Session], db_session: Session, auth: AuthenticationData, - pypi_client: PyPIServices, body: ReportPackageBody, url: str, expected: EmailReport | ObservationReport, @@ -107,11 +103,11 @@ def test_report( with db_session.begin(): db_session.add(scan) - httpx.post = MagicMock() + mock_httpx_client = MagicMock() - report_package(body, sm(), auth, pypi_client) + report_package(body, sm(), auth, mock_httpx_client) - httpx.post.assert_called_once_with(url, json=jsonable_encoder(expected)) + mock_httpx_client.post.assert_called_once_with(url, json=jsonable_encoder(expected)) with sm() as sess, sess.begin(): s = sess.scalar(select(Scan).where(Scan.name == "c").where(Scan.version == "1.0.0")) @@ -121,18 +117,12 @@ def test_report( assert s.reported_at is not None -def test_report_package_not_on_pypi( - pypi_client: PyPIServices, - monkeypatch: MonkeyPatch, -): - # Make get_package_metadata always throw PackageNotFoundError to simulate an invalid package - def _side_effect(name: str, version: str): - raise PackageNotFoundError(name, version) - - monkeypatch.setattr(pypi_client, "get_package_metadata", _side_effect) +def test_report_package_not_on_pypi(): + mock_httpx_client = MagicMock(spec=httpx.Client) + mock_httpx_client.configure_mock(**{"get.return_value.status_code": 404}) with pytest.raises(HTTPException) as e: - _validate_pypi("c", "1.0.0", pypi_client) + _validate_pypi("c", "1.0.0", mock_httpx_client) assert e.value.status_code == 404 diff --git a/tests/test_stats.py b/tests/test_stats.py deleted file mode 100644 index f8a4d79c..00000000 --- a/tests/test_stats.py +++ /dev/null @@ -1,36 +0,0 @@ -from datetime import datetime, timedelta -from math import isclose - -from sqlalchemy.orm import Session - -from mainframe.endpoints.stats import get_stats -from mainframe.models.orm import DownloadURL, Rule, Scan, Status - - -def test_stats(db_session: Session): - scan = Scan( - name="c", - version="1.0.0", - status=Status.FINISHED, - score=5, - inspector_url="test inspector url", - rules=[Rule(name="test rule")], - download_urls=[DownloadURL(url="test download url")], - queued_at=datetime.now() - timedelta(seconds=60), - queued_by="remmy", - pending_at=datetime.now() - timedelta(seconds=30), - pending_by="remmy", - finished_at=datetime.now(), - finished_by="remmy", - reported_at=None, - reported_by=None, - fail_reason=None, - commit_hash="test commit hash", - ) - with db_session.begin(): - db_session.add(scan) - - stats = get_stats(db_session) - assert stats.ingested == 1 - assert isclose(stats.average_scan_time, 30, rel_tol=0.01) # float precision is ridiculous - assert stats.failed == 0