diff --git a/README.md b/README.md index c12d381..6f33b09 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,11 @@ If you wish to use `SentenceTransformerEncoder` dense encoder, you will need to pip install pinecone-text[dense] ``` +If you wish to use `OpenAIEncoder` dense encoder, you will need to install the `openai` extra: +```bash +pip install pinecone-text[openai] +``` + ## Sparse Encoding To convert your own text corpus to sparse vectors, you can either use [BM25](https://www.pinecone.io/learn/semantic-search/#bm25) or [SPLADE](https://www.pinecone.io/learn/splade/). @@ -41,7 +46,7 @@ To encode your documents and queries using BM25 as vector for dot product search > When conducting a search, you may come across queries that contain terms not found in the training corpus but are present in the database. To address this scenario, BM25Encoder uses a default document frequency value of 1 when encoding such terms. #### Usage -For an end-to-end example, you can refer to our Quora dataset generation with BM25 [notebook](https://github.com/pinecone-io/examples/blob/master/pinecone/sparse/bm25/bm25-vector-generation.ipynb). +For an end-to-end example, you can refer to our Quora dataset generation with BM25 [notebook](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/search/semantic-search/sparse/bm25/bm25-vector-generation.ipynb). ```python from pinecone_text.sparse import BM25Encoder @@ -91,7 +96,7 @@ Currently the `SpladeEncoder` class supprts only the [naver/splade-cocondenser-e #### Usage -For an end-to-end example, you can refer to our Quora dataset generation with SPLADE [notebook](https://github.com/pinecone-io/examples/blob/master/pinecone/sparse/splade/splade-vector-generation.ipynb). +For an end-to-end example, you can refer to our Quora dataset generation with SPLADE [notebook](https://colab.research.google.com/github/pinecone-io/examples/blob/master/learn/search/semantic-search/sparse/splade/splade-vector-generation.ipynb). ```python from pinecone_text.sparse import SpladeEncoder @@ -138,8 +143,9 @@ encoder.encode_queries(["Who jumped over the lazy dog?"]) ### OpenAI models -When using the `OpenAIEncoder`, you need to provide an API key for the OpenAI API, and store it in the `OPENAI_API_KEY` environment variable. +When using the `OpenAIEncoder`, you need to provide an API key for the OpenAI API, and store it in the `OPENAI_API_KEY` environment variable before you import the encoder. +By default the encoder will use `text-embedding-ada-002` as recommended by OpenAI. You can also specify a different model name using the `model_name` parameter. #### Usage ```python from pinecone_text.dense.openai_encoder import OpenAIEncoder diff --git a/pinecone_text/dense/__init__.py b/pinecone_text/dense/__init__.py index 25357fa..e69de29 100644 --- a/pinecone_text/dense/__init__.py +++ b/pinecone_text/dense/__init__.py @@ -1,6 +0,0 @@ -""" -Sentance Transformers are a class of models that take a sentence as input and output a vector representation of the sentence. -These models are useful for tasks such as semantic search, clustering, and classification. The sentence transformer models are -the work of the research team led by Nils Reimers at the University of Stuttgart. For more information, see the [Sentence Transformers paper](https://arxiv.org/abs/1908.10084). - -""" diff --git a/pinecone_text/dense/openai_encoder.py b/pinecone_text/dense/openai_encoder.py index 1253ef0..17e8a0b 100644 --- a/pinecone_text/dense/openai_encoder.py +++ b/pinecone_text/dense/openai_encoder.py @@ -1,7 +1,14 @@ -import openai from typing import Union, List from pinecone_text.dense.base_dense_ecoder import BaseDenseEncoder +try: + import openai +except (OSError, ImportError, ModuleNotFoundError) as e: + raise ImportError( + "Failed to import openai. Make sure you install openai extra dependencies by running: " + "`pip install pinecone-text[openai]" + ) from e + class OpenAIEncoder(BaseDenseEncoder): """ diff --git a/pyproject.toml b/pyproject.toml index 8e2b83c..7caa638 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pinecone-text" -version = "0.5.3" +version = "0.5.4" description = "Text utilities library by Pinecone.io" authors = ["Pinecone.io"] readme = "README.md" @@ -19,6 +19,7 @@ openai = { version = "^0.27.3", optional = true } [tool.poetry.extras] splade = ["torch", "transformers", "sentence-transformers"] dense = ["torch", "transformers", "sentence-transformers", "openai"] +openai = ["openai"] [tool.poetry.group.dev] optional = true