Skip to content

Commit e5ec6a4

Browse files
authored
Merge branch 'run-llama:main' into main
2 parents 42d9cea + 2377848 commit e5ec6a4

File tree

37 files changed

+2705
-1
lines changed

37 files changed

+2705
-1
lines changed

docs/docs/examples/embeddings/textembed.ipynb

Lines changed: 132 additions & 0 deletions
Large diffs are not rendered by default.

docs/docs/examples/vector_stores/TiDBVector.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"source": [
77
"# TiDB Vector Store\n",
88
"\n",
9-
"> [TiDB Cloud](https://tidbcloud.com/), is a comprehensive Database-as-a-Service (DBaaS) solution, that provides dedicated and serverless options. TiDB Serverless is now integrating a built-in vector search into the MySQL landscape. With this enhancement, you can seamlessly develop AI applications using TiDB Serverless without the need for a new database or additional technical stacks. Be among the first to experience it by joining the waitlist for the private beta at https://tidb.cloud/ai.\n",
9+
"> [TiDB Cloud](https://www.pingcap.com/tidb-serverless/), is a comprehensive Database-as-a-Service (DBaaS) solution, that provides dedicated and serverless options. TiDB Serverless is now integrating a built-in vector search into the MySQL landscape. With this enhancement, you can seamlessly develop AI applications using TiDB Serverless without the need for a new database or additional technical stacks. Create a free TiDB Serverless cluster and start using the vector search feature at https://pingcap.com/ai.\n",
1010
"\n",
1111
"This notebook provides a detailed guide on utilizing the tidb vector search in LlamaIndex."
1212
]

llama-index-core/llama_index/core/query_pipeline/query.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
)
3434
from llama_index.core.utils import print_text
3535
from llama_index.core.query_pipeline.components.stateful import BaseStatefulComponent
36+
import llama_index.core.instrumentation as instrument
37+
38+
dispatcher = instrument.get_dispatcher(__name__)
3639

3740

3841
# TODO: Make this (safely) pydantic?
@@ -386,6 +389,7 @@ def set_callback_manager(self, callback_manager: CallbackManager) -> None:
386389
for module in self.module_dict.values():
387390
module.set_callback_manager(callback_manager)
388391

392+
@dispatcher.span
389393
def run(
390394
self,
391395
*args: Any,
@@ -529,6 +533,7 @@ def run_multi_with_intermediates(
529533
) as query_event:
530534
return self._run_multi(module_input_dict, show_intermediates=True)
531535

536+
@dispatcher.span
532537
async def arun(
533538
self,
534539
*args: Any,
@@ -725,6 +730,7 @@ def _get_single_result_output(
725730
else:
726731
return result_output
727732

733+
@dispatcher.span
728734
def _run(
729735
self,
730736
*args: Any,
@@ -780,6 +786,7 @@ def _run(
780786
intermediates,
781787
)
782788

789+
@dispatcher.span
783790
async def _arun(
784791
self,
785792
*args: Any,
@@ -904,6 +911,7 @@ def get_run_state(
904911
root_key, kwargs = self._get_root_key_and_kwargs(**pipeline_inputs)
905912
return RunState(self.module_dict, {root_key: kwargs})
906913

914+
@dispatcher.span
907915
def _run_multi(
908916
self, module_input_dict: Dict[str, Any], show_intermediates=False
909917
) -> Tuple[Dict[str, Any], Dict[str, ComponentIntermediates]]:
@@ -947,6 +955,7 @@ def _run_multi(
947955

948956
return run_state.result_outputs, run_state.intermediate_outputs
949957

958+
@dispatcher.span
950959
async def _arun_multi(
951960
self, module_input_dict: Dict[str, Any], show_intermediates: bool = False
952961
) -> Tuple[Dict[str, Any], Dict[str, ComponentIntermediates]]:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
poetry_requirements(
2+
name="poetry",
3+
)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
2+
3+
help: ## Show all Makefile targets.
4+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
5+
6+
format: ## Run code autoformatters (black).
7+
pre-commit install
8+
git ls-files | xargs pre-commit run black --files
9+
10+
lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy
11+
pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
12+
13+
test: ## Run tests via pytest.
14+
pytest tests
15+
16+
watch-docs: ## Build and watch documentation.
17+
sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# TextEmbed - Embedding Inference Server
2+
3+
Maintained by Keval Dekivadiya, TextEmbed is licensed under the [Apache-2.0 License](https://opensource.org/licenses/Apache-2.0).
4+
5+
TextEmbed is a high-throughput, low-latency REST API designed for serving vector embeddings. It supports a wide range of sentence-transformer models and frameworks, making it suitable for various applications in natural language processing.
6+
7+
## Features
8+
9+
- **High Throughput & Low Latency**: Designed to handle a large number of requests efficiently.
10+
- **Flexible Model Support**: Works with various sentence-transformer models.
11+
- **Scalable**: Easily integrates into larger systems and scales with demand.
12+
- **Batch Processing**: Supports batch processing for better and faster inference.
13+
- **OpenAI Compatible REST API Endpoint**: Provides an OpenAI compatible REST API endpoint.
14+
- **Single Line Command Deployment**: Deploy multiple models via a single command for efficient deployment.
15+
- **Support for Embedding Formats**: Supports binary, float16, and float32 embeddings formats for faster retrieval.
16+
17+
## Getting Started
18+
19+
### Prerequisites
20+
21+
Ensure you have Python 3.10 or higher installed. You will also need to install the required dependencies.
22+
23+
### Installation via PyPI
24+
25+
Install the required dependencies:
26+
27+
```bash
28+
pip install -U textembed
29+
```
30+
31+
### Start the TextEmbed Server
32+
33+
Start the TextEmbed server with your desired models:
34+
35+
```bash
36+
python -m textembed.server --models sentence-transformers/all-MiniLM-L12-v2 --workers 4 --api-key TextEmbed
37+
```
38+
39+
### Example Usage with llama-index
40+
41+
Here's a simple example to get you started with llama-index:
42+
43+
```python
44+
from llama_index.embeddings.textembed import TextEmbedEmbedding
45+
46+
# Initialize the TextEmbedEmbedding class
47+
embed = TextEmbedEmbedding(
48+
model_name="sentence-transformers/all-MiniLM-L12-v2",
49+
base_url="http://0.0.0.0:8000/v1",
50+
auth_token="TextEmbed",
51+
)
52+
53+
# Get embeddings for a batch of texts
54+
embeddings = embed.get_text_embedding_batch(
55+
[
56+
"It is raining cats and dogs here!",
57+
"India has a diverse cultural heritage.",
58+
]
59+
)
60+
61+
print(embeddings)
62+
```
63+
64+
For more information, please read the [documentation](https://github.com/kevaldekivadiya2415/textembed/blob/main/docs/setup.md).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
python_sources()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from llama_index.embeddings.textembed.base import TextEmbedEmbedding
2+
3+
__all__ = ["TextEmbedEmbedding"]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
"""TextEmbed: Embedding Inference Server.
2+
3+
TextEmbed offers a high-throughput, low-latency service for generating embeddings using various sentence-transformer models.
4+
It now also supports image embedding models, providing flexibility and scalability for diverse applications.
5+
6+
Maintained by Keval Dekivadiya, TextEmbed is licensed under Apache-2.0.
7+
"""
8+
9+
from typing import Callable, List, Optional, Union
10+
11+
import aiohttp
12+
import requests
13+
14+
from llama_index.core.base.embeddings.base import (
15+
DEFAULT_EMBED_BATCH_SIZE,
16+
BaseEmbedding,
17+
)
18+
from llama_index.core.bridge.pydantic import Field
19+
from llama_index.core.callbacks import CallbackManager
20+
21+
DEFAULT_URL = "http://0.0.0.0:8000/v1"
22+
23+
24+
class TextEmbedEmbedding(BaseEmbedding):
25+
"""TextEmbedEmbedding is a class for interfacing with the TextEmbed: embedding inference server."""
26+
27+
base_url: str = Field(
28+
default=DEFAULT_URL,
29+
description="Base URL for the text embeddings service.",
30+
)
31+
timeout: float = Field(
32+
default=60.0,
33+
description="Timeout in seconds for the request.",
34+
)
35+
auth_token: Optional[Union[str, Callable[[str], str]]] = Field(
36+
default=None,
37+
description="Authentication token or authentication token generating function for authenticated requests",
38+
)
39+
40+
def __init__(
41+
self,
42+
model_name: str,
43+
base_url: str = DEFAULT_URL,
44+
embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE,
45+
timeout: float = 60.0,
46+
callback_manager: Optional[CallbackManager] = None,
47+
auth_token: Optional[Union[str, Callable[[str], str]]] = None,
48+
):
49+
"""
50+
Initializes the TextEmbedEmbedding object with specified parameters.
51+
52+
Args:
53+
model_name (str): The name of the model to be used for embeddings.
54+
base_url (str): The base URL of the embedding service.
55+
embed_batch_size (int): The batch size for embedding requests.
56+
timeout (float): Timeout for requests.
57+
callback_manager (Optional[CallbackManager]): Manager for handling callbacks.
58+
auth_token (Optional[Union[str, Callable[[str], str]]]): Authentication token or function for generating it.
59+
"""
60+
super().__init__(
61+
base_url=base_url,
62+
model_name=model_name,
63+
embed_batch_size=embed_batch_size,
64+
timeout=timeout,
65+
callback_manager=callback_manager,
66+
auth_token=auth_token,
67+
)
68+
69+
def _call_api(self, texts: List[str]) -> List[List[float]]:
70+
"""
71+
Calls the TextEmbed API to get embeddings for a list of texts.
72+
73+
Args:
74+
texts (List[str]): A list of texts to get embeddings for.
75+
76+
Returns:
77+
List[List[float]]: A list of embeddings for the input texts.
78+
79+
Raises:
80+
Exception: If the API responds with a status code other than 200.
81+
"""
82+
headers = {
83+
"Content-Type": "application/json",
84+
"Authorization": f"Bearer {self.auth_token}" if self.auth_token else None,
85+
}
86+
json_data = {"input": texts, "model": self.model_name}
87+
with requests.post(
88+
f"{self.base_url}/embedding",
89+
headers=headers,
90+
json=json_data,
91+
timeout=self.timeout,
92+
) as response:
93+
if response.status_code != 200:
94+
raise Exception(
95+
f"TextEmbed responded with an unexpected status message "
96+
f"{response.status_code}: {response.text}"
97+
)
98+
return [e["embedding"] for e in response.json()["data"]]
99+
100+
async def _acall_api(self, texts: List[str]) -> List[List[float]]:
101+
"""
102+
Asynchronously calls the TextEmbed API to get embeddings for a list of texts.
103+
104+
Args:
105+
texts (List[str]): A list of texts to get embeddings for.
106+
107+
Returns:
108+
List[List[float]]: A list of embeddings for the input texts.
109+
110+
Raises:
111+
Exception: If the API responds with a status code other than 200.
112+
"""
113+
headers = {
114+
"Content-Type": "application/json",
115+
"Authorization": f"Bearer {self.auth_token}" if self.auth_token else None,
116+
}
117+
json_data = {"input": texts, "model": self.model_name}
118+
async with aiohttp.ClientSession() as session:
119+
async with session.post(
120+
f"{self.base_url}/embedding",
121+
headers=headers,
122+
json=json_data,
123+
timeout=self.timeout,
124+
) as response:
125+
if response.status != 200:
126+
raise Exception(
127+
f"TextEmbed responded with an unexpected status message "
128+
f"{response.status}: {response.text}"
129+
)
130+
data = await response.json()
131+
return [e["embedding"] for e in data["data"]]
132+
133+
def _get_query_embedding(self, query: str) -> List[float]:
134+
"""
135+
Gets the embedding for a single query.
136+
137+
Args:
138+
query (str): The query to get the embedding for.
139+
140+
Returns:
141+
List[float]: The embedding for the query.
142+
"""
143+
return self._call_api([query])[0]
144+
145+
def _get_text_embedding(self, text: str) -> List[float]:
146+
"""
147+
Gets the embedding for a single text.
148+
149+
Args:
150+
text (str): The text to get the embedding for.
151+
152+
Returns:
153+
List[float]: The embedding for the text.
154+
"""
155+
return self._call_api([text])[0]
156+
157+
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
158+
"""
159+
Gets the embeddings for a list of texts.
160+
161+
Args:
162+
texts (List[str]): The texts to get the embeddings for.
163+
164+
Returns:
165+
List[List[float]]: A list of embeddings for the input texts.
166+
"""
167+
return self._call_api(texts)
168+
169+
async def _aget_query_embedding(self, query: str) -> List[float]:
170+
"""
171+
Asynchronously gets the embedding for a single query.
172+
173+
Args:
174+
query (str): The query to get the embedding for.
175+
176+
Returns:
177+
List[float]: The embedding for the query.
178+
"""
179+
return (await self._acall_api([query]))[0]
180+
181+
async def _aget_text_embedding(self, text: str) -> List[float]:
182+
"""
183+
Asynchronously gets the embedding for a single text.
184+
185+
Args:
186+
text (str): The text to get the embedding for.
187+
188+
Returns:
189+
List[float]: The embedding for the text.
190+
"""
191+
return (await self._acall_api([text]))[0]
192+
193+
async def _aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
194+
"""
195+
Asynchronously gets the embeddings for a list of texts.
196+
197+
Args:
198+
texts (List[str]): The texts to get the embeddings for.
199+
200+
Returns:
201+
List[List[float]]: A list of embeddings for the input texts.
202+
"""
203+
return await self._acall_api(texts)

0 commit comments

Comments
 (0)