Add support for Azure OpenAI

zylon-ai · Mar 10, 2024 · 4174a2d · 4174a2d
1 parent 1b03b36
commit 4174a2d
Show file tree

Hide file tree

Showing 7 changed files with 363 additions and 3 deletions.
diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx
@@ -98,6 +98,43 @@ to run an OpenAI compatible server. Then, you can run PrivateGPT using the `sett
 
 `PGPT_PROFILES=vllm make run`
 
+### Using Azure OpenAI
+
+If you cannot run a local model (because you don't have a GPU, for example) or for testing purposes, you may
+decide to run PrivateGPT using Azure OpenAI as the LLM and Embeddings model.
+
+In order to do so, create a profile `settings-azopenai.yaml` with the following contents:
+
+```yaml
+llm:
+  mode: azopenai
+
+embedding:
+  mode: azopenai
+
+azopenai:
+  api_key: <your_azopenai_api_key>  # You could skip this configuration and use the AZ_OPENAI_API_KEY env var instead
+  azure_endpoint: <your_azopenai_endpoint> # You could skip this configuration and use the AZ_OPENAI_ENDPOINT env var instead
+  api_version: <api_version> # The API version to use. Default is "2023_05_15"
+  embedding_deployment_name: <your_embedding_deployment_name> # You could skip this configuration and use the AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME env var instead
+  embedding_model: <openai_embeddings_to_use> # Optional model to use. Default is "text-embedding-ada-002" 
+  llm_deployment_name: <your_model_deployment_name> # You could skip this configuration and use the AZ_OPENAI_LLM_DEPLOYMENT_NAME env var instead
+  llm_model: <openai_model_to_use> # Optional model to use. Default is "gpt-35-turbo"
+```
+
+And run PrivateGPT loading that profile you just created:
+
+`PGPT_PROFILES=azopenai make run`
+
+or
+
+`PGPT_PROFILES=azopenai poetry run python -m private_gpt`
+
+When the server is started it will print a log *Application startup complete*.
+Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
+You'll notice the speed and quality of response is higher, given you are using Azure OpenAI's servers for the heavy
+computations.
+
 ### Using AWS Sagemaker
 
 For a fully private & performant setup, you can choose to have both your LLM and Embeddings model deployed using Sagemaker.

diff --git a/poetry.lock b/poetry.lock
diff --git a/private_gpt/components/embedding/embedding_component.py b/private_gpt/components/embedding/embedding_component.py
@@ -72,6 +72,22 @@ def __init__(self, settings: Settings) -> None:
                     model_name=ollama_settings.embedding_model,
                     base_url=ollama_settings.api_base,
                 )
+            case "azopenai":
+                try:
+                    from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "Azure OpenAI dependencies not found, install with `poetry install --extras embeddings-azopenai`"
+                    ) from e
+
+                azopenai_settings = settings.azopenai
+                self.embedding_model = AzureOpenAIEmbedding(
+                    model=azopenai_settings.embedding_model,
+                    deployment_name=azopenai_settings.embedding_deployment_name,
+                    api_key=azopenai_settings.api_key,
+                    azure_endpoint=azopenai_settings.azure_endpoint,
+                    api_version=azopenai_settings.api_version,
+                )
             case "mock":
                 # Not a random number, is the dimensionality used by
                 # the default embedding model

diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
@@ -111,5 +111,21 @@ def __init__(self, settings: Settings) -> None:
                 self.llm = Ollama(
                     model=ollama_settings.llm_model, base_url=ollama_settings.api_base
                 )
+            case "azopenai":
+                try:
+                    from llama_index.llms.azure_openai import AzureOpenAI  # type: ignore
+                except ImportError as e:
+                    raise ImportError(
+                        "Azure OpenAI dependencies not found, install with `poetry install --extras llms-azopenai`"
+                    ) from e
+
+                azopenai_settings = settings.azopenai
+                self.llm = AzureOpenAI(
+                    model=azopenai_settings.llm_model,
+                    deployment_name=azopenai_settings.llm_deployment_name,
+                    api_key=azopenai_settings.api_key,
+                    azure_endpoint=azopenai_settings.azure_endpoint,
+                    api_version=azopenai_settings.api_version,
+                )
             case "mock":
                 self.llm = MockLLM()
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -81,7 +81,7 @@ class DataSettings(BaseModel):
 
 
 class LLMSettings(BaseModel):
-    mode: Literal["llamacpp", "openai", "openailike", "sagemaker", "mock", "ollama"]
+    mode: Literal["llamacpp", "openai", "openailike", "azopenai", "sagemaker", "mock", "ollama"]
     max_new_tokens: int = Field(
         256,
         description="The maximum number of token that the LLM is authorized to generate in one completion.",
@@ -127,7 +127,7 @@ class HuggingFaceSettings(BaseModel):
 
 
 class EmbeddingSettings(BaseModel):
-    mode: Literal["huggingface", "openai", "sagemaker", "ollama", "mock"]
+    mode: Literal["huggingface", "openai", "azopenai", "sagemaker", "ollama", "mock"]
     ingest_mode: Literal["simple", "batch", "parallel"] = Field(
         "simple",
         description=(
@@ -185,6 +185,23 @@ class OllamaSettings(BaseModel):
         description="Model to use. Example: 'nomic-embed-text'.",
     )
 
+class AzureOpenAISettings(BaseModel):
+    api_key: str
+    azure_endpoint: str
+    api_version: str = Field(
+        "2023_05_15",
+        description="The API version to use for this operation. This follows the YYYY-MM-DD format.",
+    )
+    embedding_deployment_name: str
+    embedding_model: str = Field(
+        "text-embedding-ada-002",
+        description="OpenAI Model to use. Example: 'text-embedding-ada-002'.",
+    )
+    llm_deployment_name: str
+    llm_model: str = Field(
+        "gpt-35-turbo",
+        description="OpenAI Model to use. Example: 'gpt-4'.",
+    )
 
 class UISettings(BaseModel):
     enabled: bool
@@ -304,6 +321,7 @@ class Settings(BaseModel):
     sagemaker: SagemakerSettings
     openai: OpenAISettings
     ollama: OllamaSettings
+    azopenai: AzureOpenAISettings
     vectorstore: VectorstoreSettings
     qdrant: QdrantSettings | None = None
     pgvector: PGVectorSettings | None = None

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,9 +21,11 @@ llama-index-llms-llama-cpp = {version = "^0.1.3", optional = true}
 llama-index-llms-openai = {version = "^0.1.6", optional = true}
 llama-index-llms-openai-like = {version ="^0.1.3", optional = true}
 llama-index-llms-ollama = {version ="^0.1.2", optional = true}
+llama-index-llms-azure-openai = {version ="^0.1.5", optional = true}
 llama-index-embeddings-ollama = {version ="^0.1.2", optional = true}
 llama-index-embeddings-huggingface = {version ="^0.1.4", optional = true}
 llama-index-embeddings-openai = {version ="^0.1.6", optional = true}
+llama-index-embeddings-azure-openai = {version ="^0.1.6", optional = true}
 llama-index-vector-stores-qdrant = {version ="^0.1.3", optional = true}
 llama-index-vector-stores-chroma = {version ="^0.1.4", optional = true}
 llama-index-vector-stores-postgres = {version ="^0.1.2", optional = true}
@@ -39,10 +41,12 @@ llms-openai = ["llama-index-llms-openai"]
 llms-openai-like = ["llama-index-llms-openai-like"]
 llms-ollama = ["llama-index-llms-ollama"]
 llms-sagemaker = ["boto3"]
+llms-azopenai = ["llama-index-llms-azure-openai"]
 embeddings-ollama = ["llama-index-embeddings-ollama"]
 embeddings-huggingface = ["llama-index-embeddings-huggingface"]
 embeddings-openai = ["llama-index-embeddings-openai"]
 embeddings-sagemaker = ["boto3"]
+embeddings-azopenai = ["llama-index-embeddings-azure-openai"]
 vector-stores-qdrant = ["llama-index-vector-stores-qdrant"]
 vector-stores-chroma = ["llama-index-vector-stores-chroma"]
 vector-stores-postgres = ["llama-index-vector-stores-postgres"]

diff --git a/settings.yaml b/settings.yaml
@@ -81,3 +81,13 @@ ollama:
   llm_model: llama2
   embedding_model: nomic-embed-text
   api_base: http://localhost:11434
+
+
+azopenai:
+  api_key: ${AZ_OPENAI_API_KEY:}
+  azure_endpoint: ${AZ_OPENAI_ENDPOINT:}
+  embedding_deployment_name: ${AZ_OPENAI_EMBEDDING_DEPLOYMENT_NAME:}
+  llm_deployment_name: ${AZ_OPENAI_LLM_DEPLOYMENT_NAME:}
+  api_version: 2023_05_15
+  embedding_model: text-embedding-ada-002
+  llm_model: gpt-35-turbo