diff --git a/pinecone_text/dense/openai_encoder.py b/pinecone_text/dense/openai_encoder.py index ee9ac3e..ce4a655 100644 --- a/pinecone_text/dense/openai_encoder.py +++ b/pinecone_text/dense/openai_encoder.py @@ -66,17 +66,23 @@ def _encode( f"texts must be a string or list of strings, got: {type(texts)}" ) - try: - response = self._client.embeddings.create( - input=texts_input, model=self._model_name - ) - except OpenAIError as e: - # TODO: consider wrapping external provider errors - raise e - - if isinstance(texts, str): - return response.data[0].embedding - return [result.embedding for result in response.data] + batch_size = 16 # Azure OpenAI limit as of 2023-11-27 + result = [] + for i in range(0, len(texts), batch_size): + batch = texts[i : i + batch_size] + try: + response = self._client.embeddings.create( + input=batch, model=self._model_name + ) + except OpenAIError as e: + # TODO: consider wrapping external provider errors + raise e + + if isinstance(batch, str): + result.extend(response.data[0].embedding) + result.extend([result.embedding for result in response.data]) + + return result class AzureOpenAIEncoder(OpenAIEncoder):