bugfx/int-envvvars (#1004)

* fixd int envvvars * reduce retries by a factor of 30 * embedding_max_retries=1 * uped worker_ingest_min_chunk_size * Update infrastructure/aws/variables.tf * rationalised envvars * setting log level * fixed test * switched to BM25Strategy * dont put secrets in logs * remove hard coded image * set workers to 1 * revert changes to chunk size * dded filter to Token Admin --------- Co-authored-by: George Burton <g.e.c.cburton@gmail.com>
i-dot-ai · Aug 30, 2024 · a072a2f · a072a2f
1 parent 1438a69
commit a072a2f
Show file tree

Hide file tree

Showing 15 changed files with 39 additions and 55 deletions.
diff --git a/core-api/core_api/dependencies.py b/core-api/core_api/dependencies.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from functools import lru_cache
 from typing import Annotated
 
@@ -11,7 +12,7 @@
 from redbox.models import Settings
 from redbox.chains.components import get_embeddings
 
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
 log = logging.getLogger()
 
 

diff --git a/core-api/core_api/routes/chat.py b/core-api/core_api/routes/chat.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from typing import Annotated
 from uuid import UUID
 
@@ -19,7 +20,7 @@
 
 # === Logging ===
 
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
 log = logging.getLogger()
 
 

diff --git a/django_app/redbox_app/redbox_core/admin.py b/django_app/redbox_app/redbox_core/admin.py
@@ -94,7 +94,8 @@ class ChatMessageTokenUseInline(admin.StackedInline):
 
 
 class ChatMessageTokenUseAdmin(ExportMixin, admin.ModelAdmin):
-    pass
+    list_display = ["chat_message", "use_type", "model_name", "token_count"]
+    list_filter = ["use_type", "model_name"]
 
 
 class ChatMessageAdmin(ExportMixin, admin.ModelAdmin):

diff --git a/django_app/redbox_app/redbox_core/consumers.py b/django_app/redbox_app/redbox_core/consumers.py
@@ -127,12 +127,12 @@ async def handle_route(self, response: ClientResponse, show_route: bool) -> str:
 
     async def handle_metadata(self, current_metadata: MetadataDetail, metadata_event: MetadataDetail):
         result = current_metadata.model_copy(deep=True)
-        for model,token_count in metadata_event.input_tokens.items():
-            result.input_tokens[model] = current_metadata.input_tokens.get(model, 0) + token_count 
-        for model,token_count in metadata_event.output_tokens.items():
+        for model, token_count in metadata_event.input_tokens.items():
+            result.input_tokens[model] = current_metadata.input_tokens.get(model, 0) + token_count
+        for model, token_count in metadata_event.output_tokens.items():
             result.output_tokens[model] = current_metadata.output_tokens.get(model, 0) + token_count
         return result
-    
+
     async def handle_error(self, response: ClientResponse) -> str:
         match response.data.code:
             case "no-document-selected":

diff --git a/django_app/redbox_app/redbox_core/models.py b/django_app/redbox_app/redbox_core/models.py
@@ -1,4 +1,5 @@
 import logging
+import os
 import uuid
 from collections.abc import Collection, Sequence
 from datetime import UTC, date, datetime, timedelta
@@ -20,6 +21,7 @@
 from redbox_app.redbox_core import prompts
 from redbox_app.redbox_core.utils import get_date_group
 
+logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
 logger = logging.getLogger(__name__)
 
 
@@ -496,4 +498,4 @@ class UseTypeEnum(models.TextChoices):
     token_count = models.PositiveIntegerField(null=True, blank=True)
 
     def __str__(self) -> str:
-        return f"{self.chat_message} {self.model_name} {self.use_type}"
+        return f"{self.model_name} {self.use_type}"
diff --git a/django_app/redbox_app/settings.py b/django_app/redbox_app/settings.py
@@ -337,11 +337,12 @@
 
 Q_CLUSTER = {
     "name": "redbox_django",
-    "timeout": os.environ.get("Q_TIMEOUT", 300),
-    "retry": os.environ.get("Q_RETRY", 900),
-    "max_attempts": os.environ.get("Q_MAX_ATTEMPTS", 3),
+    "timeout": env.int("Q_TIMEOUT", 300),
+    "retry": env.int("Q_RETRY", 900),
+    "max_attempts": env.int("Q_MAX_ATTEMPTS", 1),
     "catch_up": False,
     "orm": "default",
+    "workers": 1,
 }
 
 UNSTRUCTURED_HOST = env.str("UNSTRUCTURED_HOST")
diff --git a/django_app/tests/management/test_commands.py b/django_app/tests/management/test_commands.py
@@ -301,4 +301,4 @@ def test_reingest_files_unstructured_fail(uploaded_file: File, requests_mock: Mo
     # Then
     uploaded_file.refresh_from_db()
     assert uploaded_file.status == StatusEnum.errored
-    assert uploaded_file.ingest_error == "Unstructured failed to extract text for this file"
+    assert uploaded_file.ingest_error == "<class 'ValueError'>: Unstructured failed to extract text for this file"
diff --git a/django_app/tests/test_consumers.py b/django_app/tests/test_consumers.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import os
 from asyncio import CancelledError
 from collections.abc import Sequence
 from datetime import UTC, datetime
@@ -17,6 +18,7 @@
 from redbox_app.redbox_core.models import Chat, ChatMessage, ChatMessageTokenUse, ChatRoleEnum, File, User
 from redbox_app.redbox_core.prompts import CHAT_MAP_QUESTION_PROMPT
 
+logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
 logger = logging.getLogger(__name__)
 
 

diff --git a/infrastructure/aws/data.tf b/infrastructure/aws/data.tf
@@ -9,26 +9,6 @@ locals {
     {
       "EMBEDDING_DOCUMENT_FIELD_NAME" : var.embedding_document_field_name,
       "AZURE_OPENAI_MODEL" : var.azure_openai_model,
-      "AI__MAX_DOCUMENT_TOKENS" : var.max_document_tokens,
-      "AI__CONTEXT_WINDOW_SIZE" : var.context_window_size,
-      "AI__LLM_MAX_TOKENS" : var.llm_max_tokens,
-      "AI__RAG_K" : var.rag_k,
-      "AI__RAG_NUM_CANDIDATES" : var.rag_num_candidates,
-      "AI__RAG_DESIRED_CHUNK_SIZE" : var.rag_desired_chunk_size,
-      "AI__ELBOW_FILTER_ENABLED" : var.elbow_filter_enabled,
-      "AI__CHAT_SYSTEM_PROMPT" : var.chat_system_prompt,
-      "AI__CHAT_QUESTION_PROMPT" : var.chat_question_prompt,
-      "AI__STUFF_CHUNK_CONTEXT_RATIO" : var.stuff_chunk_context_ratio,
-      "AI__CHAT_WITH_DOCS_SYSTEM_PROMPT" : var.chat_with_docs_system_prompt,
-      "AI__CHAT_WITH_DOCS_QUESTION_PROMPT" : var.chat_with_docs_question_prompt,
-      "AI__CHAT_WITH_DOCS_REDUCE_SYSTEM_PROMPT" : var.chat_with_docs_reduce_system_prompt,
-      "AI__CHAT_WITH_DOCS_REDUCE_QUESTION_PROMPT" : var.chat_with_docs_reduce_question_prompt,
-      "AI__RETRIEVAL_SYSTEM_PROMPT" : var.retrieval_system_prompt,
-      "AI__RETRIEVAL_QUESTION_PROMPT" : var.retrieval_question_prompt,
-      "AI__CONDENSE_SYSTEM_PROMPT" : var.condense_system_prompt,
-      "AI__CONDENSE_QUESTION_PROMPT" : var.condense_question_prompt,
-      "AI__SUMMARISATION_SYSTEM_PROMPT" : var.summarisation_system_prompt,
-      "AI__SUMMARISATION_QUESTION_PROMPT" : var.summarisation_question_prompt,
     }
   )
 
@@ -54,7 +34,7 @@ locals {
     "FROM_EMAIL" : var.from_email,
     "GOVUK_NOTIFY_PLAIN_EMAIL_TEMPLATE_ID" : var.govuk_notify_plain_email_template_id,
     "EMAIL_BACKEND_TYPE" : "GOVUKNOTIFY",
-    "DJANGO_LOG_LEVEL" : "DEBUG",
+    "DJANGO_LOG_LEVEL" : "INFO",
     "CONTACT_EMAIL" : var.contact_email,
     "FILE_EXPIRY_IN_DAYS" : 30,
     "MAX_SECURITY_CLASSIFICATION" : "OFFICIAL_SENSITIVE",

diff --git a/infrastructure/aws/variables.tf b/infrastructure/aws/variables.tf
@@ -445,19 +445,19 @@ variable "embedding_document_field_name" {
 
 variable "embedding_max_retries" {
   type        = number
-  default     = 10
+  default     = 1
   description = "Number of retries to external embedding services (rate limiting)"
 }
 
 variable "embedding_retry_min_seconds" {
   type        = number
-  default     = 5
+  default     = 120
   description = "Number of seconds to wait before retry to external embedding services (rate limiting)"
 }
 
 variable "embedding_retry_max_seconds" {
   type        = number
-  default     = 120
+  default     = 300
   description = "Maximum number of seconds to wait before retry to external embedding services (rate limiting)"
 }
 
@@ -499,6 +499,6 @@ variable "django_queue_retry" {
 
 variable "django_queue_max_attempts" {
   type        = number
-  default     = 3
+  default     = 1
   description = "How many attempts to run unstructured task"
 }
diff --git a/redbox-core/redbox/loader/ingester.py b/redbox-core/redbox/loader/ingester.py
@@ -2,7 +2,7 @@
 from typing import TYPE_CHECKING
 
 from langchain_core.runnables import RunnableParallel
-from langchain_elasticsearch.vectorstores import BM25RetrievalStrategy, ElasticsearchStore
+from langchain_elasticsearch.vectorstores import BM25Strategy, ElasticsearchStore
 
 from redbox.chains.components import get_embeddings
 from redbox.chains.ingest import ingest_from_loader
@@ -33,9 +33,7 @@ def get_elasticsearch_store(es, es_index_name: str):
 
 
 def get_elasticsearch_store_without_embeddings(es, es_index_name: str):
-    return ElasticsearchStore(
-        index_name=es_index_name, es_connection=es, query_field="text", strategy=BM25RetrievalStrategy()
-    )
+    return ElasticsearchStore(index_name=es_index_name, es_connection=es, query_field="text", strategy=BM25Strategy())
 
 
 def get_elasticsearch_storage_handler(es):
@@ -75,7 +73,7 @@ def ingest_file(core_file: File) -> str | None:
     except Exception as e:
         logging.exception("Error while processing file [%s]", core_file)
         core_file.ingest_status = ProcessingStatusEnum.failed
-        return str(e.args[0])
+        return f"{type(e)}: {e.args[0]}"
 
     finally:
         storage_handler.update_item(core_file)
diff --git a/redbox-core/redbox/models/chain.py b/redbox-core/redbox/models/chain.py
@@ -95,7 +95,7 @@ class AISettings(BaseModel):
 
     rag_k: int = 30
     rag_num_candidates: int = 10
-    rag_desired_chunk_size: int = 300
+    rag_desired_chunk_size: int = 300  # this is superseded by worker_ingest_min_chunk_size
     elbow_filter_enabled: bool = False
     chat_system_prompt: str = CHAT_SYSTEM_PROMPT
     chat_question_prompt: str = CHAT_QUESTION_PROMPT

diff --git a/redbox-core/redbox/models/settings.py b/redbox-core/redbox/models/settings.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from functools import lru_cache
 from typing import Literal
 
@@ -7,6 +8,8 @@
 from pydantic import BaseModel
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
+
+logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
 log = logging.getLogger()
 
 
@@ -66,9 +69,9 @@ class Settings(BaseSettings):
     llm_max_tokens: int = 1024
 
     embedding_backend: Literal["azure", "openai", "fake"] = "azure"
-    embedding_max_retries: int = 10
-    embedding_retry_min_seconds: int = 10
-    embedding_retry_max_seconds: int = 120
+    embedding_max_retries: int = 1
+    embedding_retry_min_seconds: int = 120  # Azure uses 60s
+    embedding_retry_max_seconds: int = 300
     embedding_max_batch_size: int = 512
     embedding_document_field_name: str = "embedding"
 
@@ -120,8 +123,6 @@ class Settings(BaseSettings):
     @lru_cache(1)
     def elasticsearch_client(self) -> Elasticsearch:
         if isinstance(self.elastic, ElasticLocalSettings):
-            log.info("Connecting to self managed Elasticsearch")
-            log.info("Elasticsearch host = %s", self.elastic.host)
             return Elasticsearch(
                 hosts=[
                     {
@@ -132,11 +133,6 @@ def elasticsearch_client(self) -> Elasticsearch:
                 ],
                 basic_auth=(self.elastic.user, self.elastic.password),
             )
-
-        log.info("Connecting to Elastic Cloud Cluster")
-        log.info("Cloud ID = %s", self.elastic.cloud_id)
-        log.info("Elastic Cloud API Key = %s", self.elastic.api_key)
-
         return Elasticsearch(cloud_id=self.elastic.cloud_id, api_key=self.elastic.api_key)
 
     def s3_client(self):

diff --git a/redbox-core/redbox/storage/elasticsearch.py b/redbox-core/redbox/storage/elasticsearch.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from collections.abc import Sequence
 from uuid import UUID
 
@@ -10,7 +11,7 @@
 from redbox.models.base import PersistableModel
 from redbox.storage.storage_handler import BaseStorageHandler
 
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
 log = logging.getLogger()
 
 

diff --git a/tests/test_journey.py b/tests/test_journey.py
@@ -1,4 +1,5 @@
 import logging
+import os
 import string
 import subprocess
 from pathlib import Path
@@ -13,8 +14,8 @@
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
+logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO"))
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.DEBUG)
 
 BASE_URL = URL("http://localhost:8090/")
 TEST_ROOT = Path(__file__).parent