Skip to content

Commit

Permalink
Fix Model Server (onyx-dot-app#1320)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 authored Apr 11, 2024
1 parent f346c2f commit b599128
Show file tree
Hide file tree
Showing 18 changed files with 48 additions and 53 deletions.
8 changes: 1 addition & 7 deletions backend/Dockerfile.model_server
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,13 @@ RUN apt-get remove -y --allow-remove-essential perl-base && \

WORKDIR /app

# Needed for model configs and defaults
COPY ./danswer/configs /app/danswer/configs
COPY ./danswer/dynamic_configs /app/danswer/dynamic_configs

# Utils used by model server
COPY ./danswer/utils/logger.py /app/danswer/utils/logger.py
COPY ./danswer/utils/timing.py /app/danswer/utils/timing.py
COPY ./danswer/utils/telemetry.py /app/danswer/utils/telemetry.py

# Place to fetch version information
COPY ./danswer/__init__.py /app/danswer/__init__.py

# Request/Response models
# Shared between Danswer Backend and Model Server
COPY ./shared_configs /app/shared_configs

# Model Server main code
Expand Down
6 changes: 3 additions & 3 deletions backend/danswer/background/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT
from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED
from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from danswer.configs.app_configs import INDEXING_MODEL_SERVER_HOST
from danswer.configs.app_configs import LOG_LEVEL
from danswer.configs.app_configs import MODEL_SERVER_PORT
from danswer.configs.app_configs import NUM_INDEXING_WORKERS
from danswer.db.connector import fetch_connectors
from danswer.db.connector_credential_pair import get_connector_credential_pairs
Expand Down Expand Up @@ -46,6 +43,9 @@
from danswer.db.models import IndexModelStatus
from danswer.search.search_nlp_models import warm_up_encoders
from danswer.utils.logger import setup_logger
from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
from shared_configs.configs import LOG_LEVEL
from shared_configs.configs import MODEL_SERVER_PORT

logger = setup_logger()

Expand Down
16 changes: 1 addition & 15 deletions backend/danswer/configs/app_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,19 +209,6 @@
)


#####
# Model Server Configs
#####
MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost"
MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0"
MODEL_SERVER_PORT = int(os.environ.get("MODEL_SERVER_PORT") or "9000")
# Model server for indexing should use a separate one to not allow indexing to introduce delay
# for inference
INDEXING_MODEL_SERVER_HOST = (
os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST
)


#####
# Miscellaneous
#####
Expand All @@ -246,8 +233,7 @@
)
# Anonymous usage telemetry
DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true"
# notset, debug, info, warning, error, or critical
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")

TOKEN_BUDGET_GLOBALLY_ENABLED = (
os.environ.get("TOKEN_BUDGET_GLOBALLY_ENABLED", "").lower() == "true"
)
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
from danswer.search.models import OptionalSearchSetting
from danswer.search.models import RetrievalDetails
from danswer.utils.logger import setup_logger
from shared_configs.nlp_model_configs import ENABLE_RERANKING_ASYNC_FLOW
from shared_configs.configs import ENABLE_RERANKING_ASYNC_FLOW

logger_base = setup_logger()

Expand Down
4 changes: 2 additions & 2 deletions backend/danswer/danswerbot/slack/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from slack_sdk.socket_mode.response import SocketModeResponse
from sqlalchemy.orm import Session

from danswer.configs.app_configs import MODEL_SERVER_HOST
from danswer.configs.app_configs import MODEL_SERVER_PORT
from danswer.configs.constants import MessageType
from danswer.configs.danswerbot_configs import DANSWER_BOT_RESPOND_EVERY_CHANNEL
from danswer.configs.danswerbot_configs import NOTIFY_SLACKBOT_NO_ANSWER
Expand Down Expand Up @@ -47,6 +45,8 @@
from danswer.search.search_nlp_models import warm_up_encoders
from danswer.server.manage.models import SlackBotTokens
from danswer.utils.logger import setup_logger
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT

logger = setup_logger()

Expand Down
4 changes: 2 additions & 2 deletions backend/danswer/indexing/embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from sqlalchemy.orm import Session

from danswer.configs.app_configs import ENABLE_MINI_CHUNK
from danswer.configs.app_configs import INDEXING_MODEL_SERVER_HOST
from danswer.configs.app_configs import MODEL_SERVER_PORT
from danswer.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
from danswer.db.embedding_model import get_current_db_embedding_model
Expand All @@ -20,6 +18,8 @@
from danswer.search.search_nlp_models import EmbeddingModel
from danswer.utils.batching import batch_list
from danswer.utils.logger import setup_logger
from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT


logger = setup_logger()
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from langchain.schema.messages import SystemMessage
from tiktoken.core import Encoding

from danswer.configs.app_configs import LOG_LEVEL
from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY
from danswer.configs.constants import GEN_AI_DETECTED_MODEL
from danswer.configs.constants import MessageType
Expand All @@ -37,6 +36,7 @@
from danswer.indexing.models import InferenceChunk
from danswer.llm.interfaces import LLM
from danswer.utils.logger import setup_logger
from shared_configs.configs import LOG_LEVEL

if TYPE_CHECKING:
from danswer.llm.answering.models import PreviousMessage
Expand Down
6 changes: 3 additions & 3 deletions backend/danswer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@
from danswer.configs.app_configs import AUTH_TYPE
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
from danswer.configs.app_configs import MODEL_SERVER_HOST
from danswer.configs.app_configs import MODEL_SERVER_PORT
from danswer.configs.app_configs import OAUTH_CLIENT_ID
from danswer.configs.app_configs import OAUTH_CLIENT_SECRET
from danswer.configs.app_configs import SECRET
Expand Down Expand Up @@ -81,7 +79,9 @@
from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType
from danswer.utils.variable_functionality import fetch_versioned_implementation
from shared_configs.nlp_model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT


logger = setup_logger()
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from danswer.db.models import Persona
from danswer.search.enums import OptionalSearchSetting
from danswer.search.enums import SearchType
from shared_configs.nlp_model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW


MAX_METRICS_CONTENT = (
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/search/preprocessing/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from danswer.utils.threadpool_concurrency import FunctionCall
from danswer.utils.threadpool_concurrency import run_functions_in_parallel
from danswer.utils.timing import log_function_time
from shared_configs.nlp_model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW


logger = setup_logger()
Expand Down
4 changes: 2 additions & 2 deletions backend/danswer/search/retrieval/search_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from sqlalchemy.orm import Session

from danswer.chat.models import LlmDoc
from danswer.configs.app_configs import MODEL_SERVER_HOST
from danswer.configs.app_configs import MODEL_SERVER_PORT
from danswer.configs.chat_configs import HYBRID_ALPHA
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.db.embedding_model import get_current_db_embedding_model
Expand All @@ -26,6 +24,8 @@
from danswer.utils.logger import setup_logger
from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel
from danswer.utils.timing import log_function_time
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT


logger = setup_logger()
Expand Down
4 changes: 2 additions & 2 deletions backend/danswer/search/search_nlp_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
import requests
from transformers import logging as transformer_logging # type:ignore

from danswer.configs.app_configs import MODEL_SERVER_HOST
from danswer.configs.app_configs import MODEL_SERVER_PORT
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL
from danswer.search.enums import EmbedTextType
from danswer.utils.logger import setup_logger
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT
from shared_configs.model_server_models import EmbedRequest
from shared_configs.model_server_models import EmbedResponse
from shared_configs.model_server_models import IntentRequest
Expand Down
2 changes: 1 addition & 1 deletion backend/danswer/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from collections.abc import MutableMapping
from typing import Any

from danswer.configs.app_configs import LOG_LEVEL
from shared_configs.configs import LOG_LEVEL


class IndexAttemptSingleton:
Expand Down
6 changes: 3 additions & 3 deletions backend/model_server/custom_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@

from model_server.constants import MODEL_WARM_UP_STRING
from model_server.utils import simple_log_function_time
from shared_configs.configs import INDEXING_ONLY
from shared_configs.configs import INTENT_MODEL_CONTEXT_SIZE
from shared_configs.configs import INTENT_MODEL_VERSION
from shared_configs.model_server_models import IntentRequest
from shared_configs.model_server_models import IntentResponse
from shared_configs.nlp_model_configs import INDEXING_ONLY
from shared_configs.nlp_model_configs import INTENT_MODEL_CONTEXT_SIZE
from shared_configs.nlp_model_configs import INTENT_MODEL_VERSION


router = APIRouter(prefix="/custom")
Expand Down
6 changes: 3 additions & 3 deletions backend/model_server/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
from danswer.utils.logger import setup_logger
from model_server.constants import MODEL_WARM_UP_STRING
from model_server.utils import simple_log_function_time
from shared_configs.configs import CROSS_EMBED_CONTEXT_SIZE
from shared_configs.configs import CROSS_ENCODER_MODEL_ENSEMBLE
from shared_configs.configs import INDEXING_ONLY
from shared_configs.model_server_models import EmbedRequest
from shared_configs.model_server_models import EmbedResponse
from shared_configs.model_server_models import RerankRequest
from shared_configs.model_server_models import RerankResponse
from shared_configs.nlp_model_configs import CROSS_EMBED_CONTEXT_SIZE
from shared_configs.nlp_model_configs import CROSS_ENCODER_MODEL_ENSEMBLE
from shared_configs.nlp_model_configs import INDEXING_ONLY

logger = setup_logger()

Expand Down
12 changes: 6 additions & 6 deletions backend/model_server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@
from transformers import logging as transformer_logging # type:ignore

from danswer import __version__
from danswer.configs.app_configs import MODEL_SERVER_ALLOWED_HOST
from danswer.configs.app_configs import MODEL_SERVER_PORT
from danswer.utils.logger import setup_logger
from model_server.custom_models import router as custom_models_router
from model_server.custom_models import warm_up_intent_model
from model_server.encoders import router as encoders_router
from model_server.encoders import warm_up_cross_encoders
from shared_configs.nlp_model_configs import ENABLE_RERANKING_ASYNC_FLOW
from shared_configs.nlp_model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
from shared_configs.nlp_model_configs import INDEXING_ONLY
from shared_configs.nlp_model_configs import MIN_THREADS_ML_MODELS
from shared_configs.configs import ENABLE_RERANKING_ASYNC_FLOW
from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW
from shared_configs.configs import INDEXING_ONLY
from shared_configs.configs import MIN_THREADS_ML_MODELS
from shared_configs.configs import MODEL_SERVER_ALLOWED_HOST
from shared_configs.configs import MODEL_SERVER_PORT

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
Expand Down
1 change: 1 addition & 0 deletions backend/requirements/model_server.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
fastapi==0.109.2
h5py==3.9.0
pydantic==1.10.7
safetensors==0.4.2
sentence-transformers==2.6.1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
import os


MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost"
MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0"
MODEL_SERVER_PORT = int(os.environ.get("MODEL_SERVER_PORT") or "9000")
# Model server for indexing should use a separate one to not allow indexing to introduce delay
# for inference
INDEXING_MODEL_SERVER_HOST = (
os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST
)

# Danswer custom Deep Learning Models
INTENT_MODEL_VERSION = "danswer/intent-model"
INTENT_MODEL_CONTEXT_SIZE = 256
Expand All @@ -23,4 +32,9 @@
# model. If torch finds more threads on its own, this value is not used.
MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)

# Model server that has indexing only set will throw exception if used for reranking
# or intent classification
INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true"

# notset, debug, info, warning, error, or critical
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")

0 comments on commit b599128

Please sign in to comment.