Skip to content

Commit

Permalink
Feature/django control file directly (#996)
Browse files Browse the repository at this point in the history
* remove unused code

* remove unused tests

* direct file control

* check index exists first

* removed http mocking

* wip

* delete unused files

* formatting

* tests passing

* remove static

* reinstated mocking unstructured

* moved s3_client to conftest

* core_file.uuid now random

* core_file.uuid now random

---------

Co-authored-by: George Burton <g.e.c.cburton@gmail.com>
  • Loading branch information
gecBurton and George Burton authored Sep 3, 2024
1 parent e0d9bc6 commit d78e1d4
Show file tree
Hide file tree
Showing 11 changed files with 59 additions and 160 deletions.
3 changes: 0 additions & 3 deletions django_app/redbox_app/redbox_core/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,18 @@
import json
import logging

from django.conf import settings
from django.contrib import admin
from django.db.models import QuerySet
from django.http import HttpResponse
from django_q.tasks import async_task
from import_export.admin import ExportMixin, ImportExportMixin

from redbox_app.redbox_core.client import CoreApiClient
from redbox_app.worker import ingest

from . import models
from .serializers import UserSerializer

logger = logging.getLogger(__name__)
core_api = CoreApiClient(host=settings.CORE_API_HOST, port=settings.CORE_API_PORT)


class UserAdmin(ImportExportMixin, admin.ModelAdmin):
Expand Down
67 changes: 0 additions & 67 deletions django_app/redbox_app/redbox_core/client.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
from django.utils import timezone
from requests.exceptions import RequestException

from redbox_app.redbox_core.client import CoreApiClient
from redbox_app.redbox_core.models import INACTIVE_STATUSES, Chat, File, StatusEnum

logger = logging.getLogger(__name__)
core_api = CoreApiClient(host=settings.CORE_API_HOST, port=settings.CORE_API_PORT)


def post_summary_to_slack(message):
Expand Down Expand Up @@ -58,16 +56,16 @@ def handle(self, *_args, **_kwargs):
)

try:
core_api.delete_file(file.core_file_uuid, file.user)
file.delete_from_elastic()
file.delete_from_s3()

except RequestException as e:
logger.exception("Error deleting file object %s using core-api", file, exc_info=e)
except BotoCoreError as e:
logger.exception("Error deleting file object %s from storage", file, exc_info=e)
file.status = StatusEnum.errored
file.save()
failure_counter += 1
except BotoCoreError as e:
logger.exception("Error deleting file object %s from storage", file, exc_info=e)
except Exception as e:
logger.exception("Error deleting file object %s", file, exc_info=e)
file.status = StatusEnum.errored
file.save()
failure_counter += 1
Expand Down
13 changes: 13 additions & 0 deletions django_app/redbox_app/redbox_core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,17 @@
from jose import jwt
from yarl import URL

from redbox.models import Settings
from redbox_app.redbox_core import prompts
from redbox_app.redbox_core.utils import get_date_group

logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
logger = logging.getLogger(__name__)

env = Settings()

es_client = env.elasticsearch_client()


class UUIDPrimaryKeyBase(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
Expand Down Expand Up @@ -295,6 +300,14 @@ def delete_from_s3(self):
"""Manually deletes the file from S3 storage."""
self.original_file.delete(save=False)

def delete_from_elastic(self):
index = f"{env.elastic_root_index}-chunk"
if es_client.indices.exists(index=index):
es_client.delete_by_query(
index=index,
body={"query": {"term": {"metadata.file_name.keyword": self.unique_name}}},
)

def update_status_from_core(self, status_label):
match status_label:
case "complete":
Expand Down
2 changes: 0 additions & 2 deletions django_app/redbox_app/redbox_core/views/chat_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,9 @@
from django.views import View
from yarl import URL

from redbox_app.redbox_core.client import CoreApiClient
from redbox_app.redbox_core.models import Chat, ChatMessage, ChatRoleEnum, File

logger = logging.getLogger(__name__)
core_api = CoreApiClient(host=settings.CORE_API_HOST, port=settings.CORE_API_PORT)


class ChatsView(View):
Expand Down
3 changes: 0 additions & 3 deletions django_app/redbox_app/redbox_core/views/citation_views.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
import logging
import uuid

from django.conf import settings
from django.contrib.auth.decorators import login_required
from django.http import HttpRequest, HttpResponse
from django.shortcuts import get_object_or_404, redirect, render
from django.urls import reverse
from django.utils.decorators import method_decorator
from django.views import View

from redbox_app.redbox_core.client import CoreApiClient
from redbox_app.redbox_core.models import ChatMessage, File

logger = logging.getLogger(__name__)
core_api = CoreApiClient(host=settings.CORE_API_HOST, port=settings.CORE_API_PORT)


class CitationsView(View):
Expand Down
23 changes: 3 additions & 20 deletions django_app/redbox_app/redbox_core/views/document_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from collections.abc import MutableSequence, Sequence
from pathlib import Path

from django.conf import settings
from django.contrib.auth.decorators import login_required
from django.core.exceptions import FieldError, ValidationError
from django.core.files.uploadedfile import UploadedFile
Expand All @@ -16,12 +15,10 @@
from django_q.tasks import async_task
from requests.exceptions import RequestException

from redbox_app.redbox_core.client import CoreApiClient
from redbox_app.redbox_core.models import File, StatusEnum, User
from redbox_app.worker import ingest

logger = logging.getLogger(__name__)
core_api = CoreApiClient(host=settings.CORE_API_HOST, port=settings.CORE_API_PORT)
CHUNK_SIZE = 1024
# move this somewhere
APPROVED_FILE_EXTENSIONS = [
Expand Down Expand Up @@ -133,7 +130,6 @@ def validate_uploaded_file(uploaded_file: UploadedFile) -> Sequence[str]:

@staticmethod
def ingest_file(uploaded_file: UploadedFile, user: User) -> Sequence[str]:
errors: MutableSequence[str] = []
try:
logger.info("getting file from s3")
file = File.objects.create(
Expand All @@ -142,23 +138,11 @@ def ingest_file(uploaded_file: UploadedFile, user: User) -> Sequence[str]:
original_file=uploaded_file,
original_file_name=uploaded_file.name,
)
file.save()
except (ValueError, FieldError, ValidationError) as e:
logger.exception("Error creating File model object for %s.", uploaded_file, exc_info=e)
errors.append(e.args[0])
return e.args
else:
try:
logger.info("pushing file to core")
upload_file_response = core_api.upload_file(file.unique_name, user)
except RequestException as e:
logger.exception("Error uploading file object %s.", file, exc_info=e)
file.delete()
errors.append("failed to connect to core-api")
else:
file.core_file_uuid = upload_file_response.uuid
file.save()
async_task(ingest, file.id, task_name=file.unique_name, group="ingest")
return errors
async_task(ingest, file.id, task_name=file.unique_name, group="ingest")


@login_required
Expand All @@ -168,11 +152,10 @@ def remove_doc_view(request, doc_id: uuid):

if request.method == "POST":
try:
core_api.delete_file(file.core_file_uuid, request.user)
file.delete_from_elastic()
except RequestException as e:
logger.exception("Error deleting file object %s.", file, exc_info=e)
errors.append("There was an error deleting this file")

else:
logger.info("Removing document: %s", request.POST["doc_id"])
file.delete_from_s3()
Expand Down
1 change: 0 additions & 1 deletion django_app/redbox_app/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def ingest(file_id: UUID):
key=file.unique_name,
bucket=settings.BUCKET_NAME,
creator_user_uuid=file.user.id,
uuid=file.core_file_uuid,
)
core_file.ingest_status = ProcessingStatusEnum.embedding
if error := ingest_file(core_file):
Expand Down
37 changes: 31 additions & 6 deletions django_app/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
from datetime import UTC, datetime, timedelta
from pathlib import Path

import boto3
import pytest
from botocore.exceptions import ClientError
from django.conf import settings
from django.core.files.uploadedfile import SimpleUploadedFile, UploadedFile
from django.core.management import call_command
from django.utils import timezone
from freezegun import freeze_time

from redbox_app.redbox_core import client
from redbox_app.redbox_core.models import (
AISettings,
Chat,
Expand All @@ -26,6 +28,34 @@
logger = logging.getLogger(__name__)


@pytest.fixture()
def s3_client():
if settings.OBJECT_STORE == "s3":
client = boto3.client(
"s3",
aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_S3_SECRET_ACCESS_KEY,
region_name=settings.AWS_S3_REGION_NAME,
)
else:
client = boto3.client(
"s3",
aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_S3_SECRET_ACCESS_KEY,
endpoint_url=f"http://{settings.MINIO_HOST}:{settings.MINIO_PORT}",
)

try:
client.create_bucket(
Bucket=settings.BUCKET_NAME,
CreateBucketConfiguration={"LocationConstraint": settings.AWS_S3_REGION_NAME},
)
except ClientError as e:
if e.response["Error"]["Code"] != "BucketAlreadyOwnedByYou":
raise
return client


@pytest.fixture(autouse=True, scope="session")
def _collect_static():
call_command("collectstatic", "--no-input")
Expand Down Expand Up @@ -118,11 +148,6 @@ def file_py_path() -> Path:
return Path(__file__).parent / "data" / "py" / "test_data.py"


@pytest.fixture()
def s3_client():
return client.s3_client()


@pytest.fixture()
def chat(alice: User) -> Chat:
session_id = uuid.uuid4()
Expand Down
Loading

0 comments on commit d78e1d4

Please sign in to comment.