From 841dac153aa5f875640631903ed65e6f7b9fdc1f Mon Sep 17 00:00:00 2001 From: Lucas Parzianello Date: Wed, 12 Mar 2025 14:23:00 -0400 Subject: [PATCH 1/4] gwy: returning a more readable message on missing files for capture creation and update --- .../api_methods/views/capture_endpoints.py | 91 +++++++++++++------ sdk/pyproject.toml | 2 +- 2 files changed, 63 insertions(+), 30 deletions(-) diff --git a/gateway/sds_gateway/api_methods/views/capture_endpoints.py b/gateway/sds_gateway/api_methods/views/capture_endpoints.py index 151e6c98..5c9a5a63 100644 --- a/gateway/sds_gateway/api_methods/views/capture_endpoints.py +++ b/gateway/sds_gateway/api_methods/views/capture_endpoints.py @@ -2,20 +2,20 @@ import tempfile import uuid from pathlib import Path -from typing import Any -from typing import cast +from typing import Any, cast from django.db.models import QuerySet from django.shortcuts import get_object_or_404 from drf_spectacular.types import OpenApiTypes -from drf_spectacular.utils import OpenApiExample -from drf_spectacular.utils import OpenApiParameter -from drf_spectacular.utils import OpenApiResponse -from drf_spectacular.utils import extend_schema +from drf_spectacular.utils import ( + OpenApiExample, + OpenApiParameter, + OpenApiResponse, + extend_schema, +) from loguru import logger as log from opensearchpy import exceptions as os_exceptions -from rest_framework import status -from rest_framework import viewsets +from rest_framework import status, viewsets from rest_framework.pagination import PageNumberPagination from rest_framework.permissions import IsAuthenticated from rest_framework.request import Request @@ -26,16 +26,19 @@ from sds_gateway.api_methods.helpers.extract_drf_metadata import ( validate_metadata_by_channel, ) -from sds_gateway.api_methods.helpers.index_handling import UnknownIndexError -from sds_gateway.api_methods.helpers.index_handling import index_capture_metadata -from sds_gateway.api_methods.helpers.reconstruct_file_tree import find_rh_metadata_file -from sds_gateway.api_methods.helpers.reconstruct_file_tree import reconstruct_tree +from sds_gateway.api_methods.helpers.index_handling import ( + UnknownIndexError, + index_capture_metadata, +) +from sds_gateway.api_methods.helpers.reconstruct_file_tree import ( + find_rh_metadata_file, + reconstruct_tree, +) from sds_gateway.api_methods.helpers.rh_schema_generator import load_rh_file from sds_gateway.api_methods.helpers.search_captures import search_captures -from sds_gateway.api_methods.models import Capture -from sds_gateway.api_methods.models import CaptureType -from sds_gateway.api_methods.serializers.capture_serializers import CaptureGetSerializer +from sds_gateway.api_methods.models import Capture, CaptureType from sds_gateway.api_methods.serializers.capture_serializers import ( + CaptureGetSerializer, CapturePostSerializer, ) from sds_gateway.api_methods.utils.metadata_schemas import infer_index_name @@ -123,6 +126,8 @@ def ingest_capture( requester: The user making the request rh_scan_group: Optional scan group UUID for RH captures top_level_dir: Path to directory containing files to connect to capture + Raise: + FileNotFoundError: If there are no files to connect to this capture """ # check if the top level directory was passed if not top_level_dir: @@ -147,6 +152,14 @@ def ingest_capture( verbose=True, ) + if not files_to_connect: + msg = ( + f"No files found for '{top_level_dir}' " + f"matching capture '{capture.capture_type}'" + ) + log.warning(msg) + raise FileNotFoundError(msg) + # try to validate and index metadata before connecting files self._validate_and_index_metadata( capture=capture, @@ -279,21 +292,31 @@ def create(self, request: Request) -> Response: # noqa: PLR0911 requester=requester, top_level_dir=requested_top_level_dir, ) - except UnknownIndexError as e: - user_msg = f"Unknown index: '{e}'. Try recreating this capture." + except UnknownIndexError as err: + user_msg = f"Unknown index: '{err}'. Try recreating this capture." server_msg = ( - f"Unknown index: '{e}'. Try running the init_indices " + f"Unknown index: '{err}'. Try running the init_indices " "subcommand if this is index should exist." ) log.error(server_msg) capture.soft_delete() return Response({"detail": user_msg}, status=status.HTTP_400_BAD_REQUEST) - except ValueError as e: - user_msg = f"Error handling metadata for capture '{capture.uuid}': {e}" + except FileNotFoundError as err: + user_msg = ( + "Could not find relevant files to create capture. " + f"Please, check if your files are in '{unsafe_top_level_dir}' " + "under SDS. You can list the files in this directory to verify." + f" {err}" + ) + log.warning(user_msg) capture.soft_delete() return Response({"detail": user_msg}, status=status.HTTP_400_BAD_REQUEST) - except os_exceptions.ConnectionError as e: - user_msg = f"Error connecting to OpenSearch: {e}" + except ValueError as err: + user_msg = f"Error handling metadata for capture '{capture.uuid}': {err}" + capture.soft_delete() + return Response({"detail": user_msg}, status=status.HTTP_400_BAD_REQUEST) + except os_exceptions.ConnectionError as err: + user_msg = f"Error connecting to OpenSearch: {err}" log.error(user_msg) capture.soft_delete() return Response(status=status.HTTP_503_SERVICE_UNAVAILABLE) @@ -530,19 +553,29 @@ def update(self, request: Request, pk: str | None = None) -> Response: requester=owner, top_level_dir=requested_top_level_dir, ) - except UnknownIndexError as e: - user_msg = f"Unknown index: '{e}'. Try recreating this capture." + except UnknownIndexError as err: + user_msg = f"Unknown index: '{err}'. Try recreating this capture." server_msg = ( - f"Unknown index: '{e}'. Try running the init_indices " + f"Unknown index: '{err}'. Try running the init_indices " "subcommand if this is index should exist." ) log.error(server_msg) return Response({"detail": user_msg}, status=status.HTTP_400_BAD_REQUEST) - except ValueError as e: - msg = f"Error handling metadata for capture '{target_capture.uuid}': {e}" + except FileNotFoundError as err: + user_msg = ( + "Could not find relevant files to update capture. " + "Please, check if your files are still in " + f"'{target_capture.top_level_dir}' " + "under SDS. You can list the files in this directory to verify." + f" {err}" + ) + log.warning(user_msg) + return Response({"detail": user_msg}, status=status.HTTP_400_BAD_REQUEST) + except ValueError as err: + msg = f"Error handling metadata for capture '{target_capture.uuid}': {err}" return Response({"detail": msg}, status=status.HTTP_400_BAD_REQUEST) - except os_exceptions.ConnectionError as e: - msg = f"Error connecting to OpenSearch: {e}" + except os_exceptions.ConnectionError as err: + msg = f"Error connecting to OpenSearch: {err}" log.error(msg) return Response(status=status.HTTP_503_SERVICE_UNAVAILABLE) diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 38c6c8dd..2d80f75a 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -153,13 +153,13 @@ # https://docs.astral.sh/ruff/settings/#lint_ignore "COM812", # disabled following ruff's recommendation "ISC001", # disabled following ruff's recommendation - # "N811", # Constant imports aliased to non-constant-style names (false positives with django) "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` "S101", # Use of assert detected https://docs.astral.sh/ruff/rules/assert/ "S104", # Possible binding to all interfaces "SIM102", # sometimes it's better to nest "UP038", # Checks for uses of isinstance/issubclass that take a tuple # of types for comparison. + # "N811", # Constant imports aliased to non-constant-style names (false positives with django) # UP038 deactivated because it can make the code slow: # https://github.com/astral-sh/ruff/issues/7871 ] From 473584f72ab3435c84a4c6b4c39c0373533c3f06 Mon Sep 17 00:00:00 2001 From: Lucas Parzianello Date: Wed, 2 Apr 2025 13:51:07 -0400 Subject: [PATCH 2/4] gwy: silencing too many return statmts --- gateway/sds_gateway/api_methods/views/capture_endpoints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gateway/sds_gateway/api_methods/views/capture_endpoints.py b/gateway/sds_gateway/api_methods/views/capture_endpoints.py index 5c9a5a63..2732cdc8 100644 --- a/gateway/sds_gateway/api_methods/views/capture_endpoints.py +++ b/gateway/sds_gateway/api_methods/views/capture_endpoints.py @@ -519,7 +519,7 @@ def list(self, request: Request) -> Response: ), summary="Update Capture", ) - def update(self, request: Request, pk: str | None = None) -> Response: + def update(self, request: Request, pk: str | None = None) -> Response: # noqa: PLR0911 """Update a capture by adding files or re-indexing metadata.""" if pk is None: return Response( From 7a65da71fd66d6c106dc2668723aaedc7e67317f Mon Sep 17 00:00:00 2001 From: Lucas Parzianello Date: Wed, 2 Apr 2025 13:51:40 -0400 Subject: [PATCH 3/4] gwy: ci: added pre-commit make target --- gateway/makefile | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/gateway/makefile b/gateway/makefile index 045cd72c..71796220 100644 --- a/gateway/makefile +++ b/gateway/makefile @@ -1,7 +1,7 @@ # Build and manage a local deploy of the SDS Gateway - NOT PRODUCTION READY -.PHONY: all redeploy build build-full up logs logs-once down pre-commit restart \ - serve-coverage test update +.PHONY: all redeploy build build-full down logs logs-once pre-commit restart \ + serve-coverage test up update all: build up logs redeploy: build down up logs @@ -11,19 +11,19 @@ APP_CONTAINER := sds-gateway-local-app COMPOSE_FILE := compose.local.yaml ENV_FILE := .envs/local/opensearch.env -build-full: - @echo "Pulling and building sds-gateway WITHOUT CACHE" - @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) pull --ignore-buildable - @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) build --no-cache $(ARGS) - build: @echo "Pulling and building sds-gateway" @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) pull --ignore-buildable @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) build $(ARGS) -up: - @echo "Starting sds-gateway" - @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) up -d --remove-orphans $(ARGS) +build-full: + @echo "Pulling and building sds-gateway WITHOUT CACHE" + @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) pull --ignore-buildable + @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) build --no-cache $(ARGS) + +down: + @echo "Stopping sds-gateway" + @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) down $(ARGS) logs: @echo "Showing sds-gateway logs…" @@ -33,10 +33,6 @@ logs-once: @echo "Showing gateway logs once…" @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) logs $(ARGS) -down: - @echo "Stopping sds-gateway" - @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) down $(ARGS) - pre-commit: uv run --dev pre-commit run --all-files @@ -59,6 +55,10 @@ test: @# Django's test runner: obsolete, subset of pytest tests, left as reference. @# @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) run $(APP_CONTAINER) python manage.py test --no-input --force-color --verbosity 0 +up: + @echo "Starting sds-gateway" + @COMPOSE_FILE=$(COMPOSE_FILE) docker compose --env-file $(ENV_FILE) up -d --remove-orphans $(ARGS) + update: @# uv sync --upgrade # re-enable when uv integration is done uv run pre-commit autoupdate From 9f060ea9b550e414446bdbc86722e81db2a5a390 Mon Sep 17 00:00:00 2001 From: Lucas Parzianello Date: Sun, 8 Jun 2025 11:34:02 -0400 Subject: [PATCH 4/4] kwargs and linting rules --- .../api_methods/tests/test_opensearch.py | 23 ++++++++---- .../api_methods/views/capture_endpoints.py | 35 +++++++++---------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/gateway/sds_gateway/api_methods/tests/test_opensearch.py b/gateway/sds_gateway/api_methods/tests/test_opensearch.py index f976b727..a053536d 100644 --- a/gateway/sds_gateway/api_methods/tests/test_opensearch.py +++ b/gateway/sds_gateway/api_methods/tests/test_opensearch.py @@ -83,7 +83,9 @@ def setUp(self) -> None: # Setup test data and create initial capture self._setup_test_data() - self.capture = self._create_test_capture(self.user, self.top_level_dir) + self.capture = self._create_test_capture( + owner=self.user, top_level_dir=self.top_level_dir + ) self._initialize_test_index() self._index_test_capture(self.capture) @@ -152,6 +154,7 @@ def _setup_test_data(self) -> None: def _create_test_capture(self, owner: User, top_level_dir: str) -> Capture: """Create and index a test capture.""" + # TODO: create files for this capture return Capture.objects.create( owner=owner, scan_group=self.scan_group, @@ -165,7 +168,7 @@ def _create_test_file(self, owner: User) -> File: json_content = json.dumps(self.json_file).encode("utf-8") self.uploaded_file = SimpleUploadedFile( "test.rh.json", - json_content, + content=json_content, content_type="application/json", ) @@ -388,7 +391,9 @@ def test_duplicate_capture_deletion(self) -> None: assert initial_response["hits"]["total"]["value"] == 1 # Create duplicate capture - duplicate_capture = self._create_test_capture(self.user, self.top_level_dir) + duplicate_capture = self._create_test_capture( + owner=self.user, top_level_dir=self.top_level_dir + ) self._index_test_capture(duplicate_capture) # Verify duplicate capture was created @@ -508,7 +513,9 @@ def setUp(self) -> None: # setup test data and create initial capture self._setup_test_data() - self.capture = self._create_test_capture(self.user, self.top_level_dir) + self.capture = self._create_test_capture( + owner=self.user, top_level_dir=self.top_level_dir + ) self._initialize_test_index() self._index_test_capture(self.capture) @@ -792,7 +799,9 @@ def test_duplicate_capture_deletion(self) -> None: assert initial_response["hits"]["total"]["value"] == 1 # Create duplicate capture - duplicate_capture = self._create_test_capture(self.user, self.top_level_dir) + duplicate_capture = self._create_test_capture( + owner=self.user, top_level_dir=self.top_level_dir + ) self._index_test_capture(duplicate_capture) # Verify duplicate capture was created @@ -832,7 +841,9 @@ def test_no_capture_deletion_multiple_owners(self) -> None: other_top_level_dir = f"/files/{other_user.email}/{self.channel}" expected_count = 2 # Create a duplicate capture for the second user - duplicate_capture = self._create_test_capture(other_user, other_top_level_dir) + duplicate_capture = self._create_test_capture( + owner=other_user, top_level_dir=other_top_level_dir + ) self._index_test_capture(duplicate_capture) # Get initial document diff --git a/gateway/sds_gateway/api_methods/views/capture_endpoints.py b/gateway/sds_gateway/api_methods/views/capture_endpoints.py index 2732cdc8..6ab0d179 100644 --- a/gateway/sds_gateway/api_methods/views/capture_endpoints.py +++ b/gateway/sds_gateway/api_methods/views/capture_endpoints.py @@ -2,20 +2,20 @@ import tempfile import uuid from pathlib import Path -from typing import Any, cast +from typing import Any +from typing import cast from django.db.models import QuerySet from django.shortcuts import get_object_or_404 from drf_spectacular.types import OpenApiTypes -from drf_spectacular.utils import ( - OpenApiExample, - OpenApiParameter, - OpenApiResponse, - extend_schema, -) +from drf_spectacular.utils import OpenApiExample +from drf_spectacular.utils import OpenApiParameter +from drf_spectacular.utils import OpenApiResponse +from drf_spectacular.utils import extend_schema from loguru import logger as log from opensearchpy import exceptions as os_exceptions -from rest_framework import status, viewsets +from rest_framework import status +from rest_framework import viewsets from rest_framework.pagination import PageNumberPagination from rest_framework.permissions import IsAuthenticated from rest_framework.request import Request @@ -26,19 +26,16 @@ from sds_gateway.api_methods.helpers.extract_drf_metadata import ( validate_metadata_by_channel, ) -from sds_gateway.api_methods.helpers.index_handling import ( - UnknownIndexError, - index_capture_metadata, -) -from sds_gateway.api_methods.helpers.reconstruct_file_tree import ( - find_rh_metadata_file, - reconstruct_tree, -) +from sds_gateway.api_methods.helpers.index_handling import UnknownIndexError +from sds_gateway.api_methods.helpers.index_handling import index_capture_metadata +from sds_gateway.api_methods.helpers.reconstruct_file_tree import find_rh_metadata_file +from sds_gateway.api_methods.helpers.reconstruct_file_tree import reconstruct_tree from sds_gateway.api_methods.helpers.rh_schema_generator import load_rh_file from sds_gateway.api_methods.helpers.search_captures import search_captures -from sds_gateway.api_methods.models import Capture, CaptureType +from sds_gateway.api_methods.models import Capture +from sds_gateway.api_methods.models import CaptureType +from sds_gateway.api_methods.serializers.capture_serializers import CaptureGetSerializer from sds_gateway.api_methods.serializers.capture_serializers import ( - CaptureGetSerializer, CapturePostSerializer, ) from sds_gateway.api_methods.utils.metadata_schemas import infer_index_name @@ -216,7 +213,7 @@ def ingest_capture( ), summary="Create Capture", ) - def create(self, request: Request) -> Response: # noqa: PLR0911 + def create(self, request: Request) -> Response: # noqa: C901, PLR0911 """Create a capture object, connecting files and indexing the metadata.""" drf_channel = request.data.get("channel", None) rh_scan_group = request.data.get("scan_group", None)