Skip to content

Commit

Permalink
Merge pull request #146 from aurelio-labs/james/index-fix
Browse files Browse the repository at this point in the history
feat: separate indexes and PineconeIndex
  • Loading branch information
jamescalam authored Feb 14, 2024
2 parents 8d7579f + 66a7c79 commit 966ac06
Show file tree
Hide file tree
Showing 8 changed files with 1,047 additions and 635 deletions.
1,104 changes: 560 additions & 544 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ readme = "README.md"
packages = [{include = "semantic_router"}]

[tool.poetry.dependencies]
python = "^3.9"
python = ">=3.9,<3.13"
pydantic = "^2.5.3"
openai = "^1.10.0"
cohere = "^4.32"
Expand All @@ -30,11 +30,12 @@ transformers = {version = "^4.36.2", optional = true}
llama-cpp-python = {version = "^0.2.28", optional = true}
black = "^23.12.1"
colorama = "^0.4.6"

pinecone-client = {version="^3.0.0", optional = true}
[tool.poetry.extras]
hybrid = ["pinecone-text"]
fastembed = ["fastembed"]
local = ["torch", "transformers", "llama-cpp-python"]
pinecone = ["pinecone-client"]

[tool.poetry.group.dev.dependencies]
ipykernel = "^6.25.0"
Expand All @@ -45,6 +46,7 @@ pytest-cov = "^4.1.0"
pytest-xdist = "^3.5.0"
mypy = "^1.7.1"
types-pyyaml = "^6.0.12.12"
types-requests = "^2.31.0"

[build-system]
requires = ["poetry-core"]
Expand Down
9 changes: 9 additions & 0 deletions semantic_router/index/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from semantic_router.index.base import BaseIndex
from semantic_router.index.local import LocalIndex
from semantic_router.index.pinecone import PineconeIndex

__all__ = [
"BaseIndex",
"LocalIndex",
"PineconeIndex",
]
59 changes: 59 additions & 0 deletions semantic_router/index/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from pydantic.v1 import BaseModel
from typing import Any, List, Tuple, Optional, Union
import numpy as np


class BaseIndex(BaseModel):
"""
Base class for indices using Pydantic's BaseModel.
This class outlines the expected interface for index classes.
Actual method implementations should be provided in subclasses.
"""

# You can define common attributes here if there are any.
# For example, a placeholder for the index attribute:
index: Optional[Any] = None
routes: Optional[np.ndarray] = None
utterances: Optional[np.ndarray] = None
dimensions: Union[int, None] = None
type: str = "base"

def add(
self, embeddings: List[List[float]], routes: List[str], utterances: List[str]
):
"""
Add embeddings to the index.
This method should be implemented by subclasses.
"""
raise NotImplementedError("This method should be implemented by subclasses.")

def delete(self, route_name: str):
"""
Deletes route by route name.
This method should be implemented by subclasses.
"""
raise NotImplementedError("This method should be implemented by subclasses.")

def describe(self) -> dict:
"""
Returns a dictionary with index details such as type, dimensions, and total vector count.
This method should be implemented by subclasses.
"""
raise NotImplementedError("This method should be implemented by subclasses.")

def query(self, vector: np.ndarray, top_k: int = 5) -> Tuple[np.ndarray, List[str]]:
"""
Search the index for the query_vector and return top_k results.
This method should be implemented by subclasses.
"""
raise NotImplementedError("This method should be implemented by subclasses.")

def delete_index(self):
"""
Deletes or resets the index.
This method should be implemented by subclasses.
"""
raise NotImplementedError("This method should be implemented by subclasses.")

class Config:
arbitrary_types_allowed = True
101 changes: 101 additions & 0 deletions semantic_router/index/local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import numpy as np
from typing import List, Tuple, Optional
from semantic_router.linear import similarity_matrix, top_scores
from semantic_router.index.base import BaseIndex


class LocalIndex(BaseIndex):
def __init__(
self,
index: Optional[np.ndarray] = None,
routes: Optional[np.ndarray] = None,
utterances: Optional[np.ndarray] = None,
):
super().__init__(index=index, routes=routes, utterances=utterances)
self.type = "local"

class Config: # Stop pydantic from complaining about Optional[np.ndarray] type hints.
arbitrary_types_allowed = True

def add(
self, embeddings: List[List[float]], routes: List[str], utterances: List[str]
):
embeds = np.array(embeddings) # type: ignore
routes_arr = np.array(routes)
utterances_arr = np.array(utterances)
if self.index is None:
self.index = embeds # type: ignore
self.routes = routes_arr
self.utterances = utterances_arr
else:
self.index = np.concatenate([self.index, embeds])
self.routes = np.concatenate([self.routes, routes_arr])
self.utterances = np.concatenate([self.utterances, utterances_arr])

def get_routes(self) -> List[Tuple]:
"""
Gets a list of route and utterance objects currently stored in the index.
Returns:
List[Tuple]: A list of (route_name, utterance) objects.
"""
if self.routes is None or self.utterances is None:
raise ValueError("No routes have been added to the index.")
return list(zip(self.routes, self.utterances))

def describe(self) -> dict:
return {
"type": self.type,
"dimensions": self.index.shape[1] if self.index is not None else 0,
"vectors": self.index.shape[0] if self.index is not None else 0,
}

def query(self, vector: np.ndarray, top_k: int = 5) -> Tuple[np.ndarray, List[str]]:
"""
Search the index for the query and return top_k results.
"""
if self.index is None or self.routes is None:
raise ValueError("Index or routes are not populated.")
sim = similarity_matrix(vector, self.index)
# extract the index values of top scoring vectors
scores, idx = top_scores(sim, top_k)
# get routes from index values
route_names = self.routes[idx].copy()
return scores, route_names

def delete(self, route_name: str):
"""
Delete all records of a specific route from the index.
"""
if (
self.index is not None
and self.routes is not None
and self.utterances is not None
):
delete_idx = self._get_indices_for_route(route_name=route_name)
self.index = np.delete(self.index, delete_idx, axis=0)
self.routes = np.delete(self.routes, delete_idx, axis=0)
self.utterances = np.delete(self.utterances, delete_idx, axis=0)
else:
raise ValueError(
"Attempted to delete route records but either index, routes or utterances is None."
)

def delete_index(self):
"""
Deletes the index, effectively clearing it and setting it to None.
"""
self.index = None

def _get_indices_for_route(self, route_name: str):
"""Gets an array of indices for a specific route."""
if self.routes is None:
raise ValueError("Routes are not populated.")
idx = [i for i, route in enumerate(self.routes) if route == route_name]
return idx

def __len__(self):
if self.index is not None:
return self.index.shape[0]
else:
return 0
Loading

0 comments on commit 966ac06

Please sign in to comment.