From 33d9aeb3484ff7ca44ea53a407d68243d9a5a927 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Mon, 12 Aug 2024 10:34:06 -0400 Subject: [PATCH 1/3] x --- .../langchain_openai/embeddings/base.py | 83 +++++++++++++++++-- 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py index 8894364b24fcb..28a1e4137bc15 100644 --- a/libs/partners/openai/langchain_openai/embeddings/base.py +++ b/libs/partners/openai/langchain_openai/embeddings/base.py @@ -99,23 +99,90 @@ def _process_batched_chunked_embeddings( class OpenAIEmbeddings(BaseModel, Embeddings): - """OpenAI embedding models. + """OpenAI embedding model integration. - To use, you should have the - environment variable ``OPENAI_API_KEY`` set with your API key or pass it - as a named parameter to the constructor. + Setup: + Install ``langchain_openai`` and set environment variable ``OPENAI_API_KEY``. - In order to use the library with Microsoft Azure endpoints, use - AzureOpenAIEmbeddings. + .. code-block:: bash - Example: + pip install -U langchain_openai + export OPENAI_API_KEY="your-api-key" + + Key init args — embedding params: + model: str + Name of OpenAI model to use. + dimensions: Optional[int] = None + The number of dimensions the resulting output embeddings should have. + Only supported in `text-embedding-3` and later models. + + Key init args — client params: + api_key: Optional[SecretStr] = None + OpenAI API key. + organization: Optional[str] = None + OpenAI organization ID. If not passed in will be read + from env var OPENAI_ORG_ID. + max_retries: int = 2 + Maximum number of retries to make when generating. + request_timeout: Optional[Union[float, Tuple[float, float], Any]] = None + Timeout for requests to OpenAI completion API + + See full list of supported init args and their descriptions in the params section. + + Instantiate: .. code-block:: python from langchain_openai import OpenAIEmbeddings - model = OpenAIEmbeddings(model="text-embedding-3-large") + embed = OpenAIEmbeddings( + model="text-embedding-3-large", + # With the `text-embedding-3` class + # of models, you can specify the size + # of the embeddings you want returned. + # dimensions=1024 + ) + + Embed multiple texts: + + .. code-block:: python + + vectors = embeddings.embed_documents(['hello', 'goodbye']) + # Showing only the first 3 coordinates + print(len(vectors)) + print(vectors[0][:3]) + + .. code-block:: python + + 2 + [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] + + Embed single text: + + .. code-block:: python + + input_text = "The meaning of life is 42" + vector = embeddings.embed_query('hello') + print(vector[:3]) + + .. code-block:: python + + [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] + + Async: + .. code-block:: python + + await embed.aembed_query(input_text) + print(vector[:3]) + + # multiple: + # await embed.aembed_documents(input_texts) + + .. code-block:: python + + [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188] """ + client: Any = Field(default=None, exclude=True) #: :meta private: async_client: Any = Field(default=None, exclude=True) #: :meta private: model: str = "text-embedding-ada-002" From 2f3ab8f1e37f093105a02df42930f7b1c2e687a9 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Mon, 12 Aug 2024 10:34:22 -0400 Subject: [PATCH 2/3] x --- .../langchain_openai/embeddings/base.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py index 28a1e4137bc15..9e9ccb49c9e33 100644 --- a/libs/partners/openai/langchain_openai/embeddings/base.py +++ b/libs/partners/openai/langchain_openai/embeddings/base.py @@ -142,30 +142,30 @@ class OpenAIEmbeddings(BaseModel, Embeddings): # dimensions=1024 ) - Embed multiple texts: + Embed single text: .. code-block:: python - vectors = embeddings.embed_documents(['hello', 'goodbye']) - # Showing only the first 3 coordinates - print(len(vectors)) - print(vectors[0][:3]) + input_text = "The meaning of life is 42" + vector = embeddings.embed_query('hello') + print(vector[:3]) .. code-block:: python - 2 [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] - Embed single text: + Embed multiple texts: .. code-block:: python - input_text = "The meaning of life is 42" - vector = embeddings.embed_query('hello') - print(vector[:3]) + vectors = embeddings.embed_documents(['hello', 'goodbye']) + # Showing only the first 3 coordinates + print(len(vectors)) + print(vectors[0][:3]) .. code-block:: python + 2 [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] Async: From 1d437df6a80cf145456bbc373037f21bd7d6c0a3 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Mon, 12 Aug 2024 10:51:46 -0400 Subject: [PATCH 3/3] update --- .../langchain_openai/embeddings/base.py | 129 +++++++++--------- 1 file changed, 64 insertions(+), 65 deletions(-) diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py index 9e9ccb49c9e33..645242ac14594 100644 --- a/libs/partners/openai/langchain_openai/embeddings/base.py +++ b/libs/partners/openai/langchain_openai/embeddings/base.py @@ -101,87 +101,86 @@ def _process_batched_chunked_embeddings( class OpenAIEmbeddings(BaseModel, Embeddings): """OpenAI embedding model integration. - Setup: - Install ``langchain_openai`` and set environment variable ``OPENAI_API_KEY``. - - .. code-block:: bash - - pip install -U langchain_openai - export OPENAI_API_KEY="your-api-key" - - Key init args — embedding params: - model: str - Name of OpenAI model to use. - dimensions: Optional[int] = None - The number of dimensions the resulting output embeddings should have. - Only supported in `text-embedding-3` and later models. - - Key init args — client params: - api_key: Optional[SecretStr] = None - OpenAI API key. - organization: Optional[str] = None - OpenAI organization ID. If not passed in will be read - from env var OPENAI_ORG_ID. - max_retries: int = 2 - Maximum number of retries to make when generating. - request_timeout: Optional[Union[float, Tuple[float, float], Any]] = None - Timeout for requests to OpenAI completion API - - See full list of supported init args and their descriptions in the params section. - - Instantiate: - .. code-block:: python - - from langchain_openai import OpenAIEmbeddings - - embed = OpenAIEmbeddings( - model="text-embedding-3-large", - # With the `text-embedding-3` class - # of models, you can specify the size - # of the embeddings you want returned. - # dimensions=1024 - ) + Setup: + Install ``langchain_openai`` and set environment variable ``OPENAI_API_KEY``. - Embed single text: + .. code-block:: bash - .. code-block:: python + pip install -U langchain_openai + export OPENAI_API_KEY="your-api-key" - input_text = "The meaning of life is 42" - vector = embeddings.embed_query('hello') - print(vector[:3]) + Key init args — embedding params: + model: str + Name of OpenAI model to use. + dimensions: Optional[int] = None + The number of dimensions the resulting output embeddings should have. + Only supported in `text-embedding-3` and later models. - .. code-block:: python + Key init args — client params: + api_key: Optional[SecretStr] = None + OpenAI API key. + organization: Optional[str] = None + OpenAI organization ID. If not passed in will be read + from env var OPENAI_ORG_ID. + max_retries: int = 2 + Maximum number of retries to make when generating. + request_timeout: Optional[Union[float, Tuple[float, float], Any]] = None + Timeout for requests to OpenAI completion API - [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] + See full list of supported init args and their descriptions in the params section. - Embed multiple texts: + Instantiate: + .. code-block:: python - .. code-block:: python + from langchain_openai import OpenAIEmbeddings - vectors = embeddings.embed_documents(['hello', 'goodbye']) - # Showing only the first 3 coordinates - print(len(vectors)) - print(vectors[0][:3]) + embed = OpenAIEmbeddings( + model="text-embedding-3-large" + # With the `text-embedding-3` class + # of models, you can specify the size + # of the embeddings you want returned. + # dimensions=1024 + ) - .. code-block:: python + Embed single text: - 2 - [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] + .. code-block:: python - Async: - .. code-block:: python + input_text = "The meaning of life is 42" + vector = embeddings.embed_query("hello") + print(vector[:3]) - await embed.aembed_query(input_text) - print(vector[:3]) + .. code-block:: python - # multiple: - # await embed.aembed_documents(input_texts) + [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] - .. code-block:: python + Embed multiple texts: - [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188] - """ + .. code-block:: python + + vectors = embeddings.embed_documents(["hello", "goodbye"]) + # Showing only the first 3 coordinates + print(len(vectors)) + print(vectors[0][:3]) + + .. code-block:: python + + 2 + [-0.024603435769677162, -0.007543657906353474, 0.0039630369283258915] + Async: + .. code-block:: python + + await embed.aembed_query(input_text) + print(vector[:3]) + + # multiple: + # await embed.aembed_documents(input_texts) + + .. code-block:: python + + [-0.009100092574954033, 0.005071679595857859, -0.0029193938244134188] + """ client: Any = Field(default=None, exclude=True) #: :meta private: async_client: Any = Field(default=None, exclude=True) #: :meta private: