-
Notifications
You must be signed in to change notification settings - Fork 238
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #146 from aurelio-labs/james/index-fix
feat: separate indexes and PineconeIndex
- Loading branch information
Showing
8 changed files
with
1,047 additions
and
635 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
from semantic_router.index.base import BaseIndex | ||
from semantic_router.index.local import LocalIndex | ||
from semantic_router.index.pinecone import PineconeIndex | ||
|
||
__all__ = [ | ||
"BaseIndex", | ||
"LocalIndex", | ||
"PineconeIndex", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from pydantic.v1 import BaseModel | ||
from typing import Any, List, Tuple, Optional, Union | ||
import numpy as np | ||
|
||
|
||
class BaseIndex(BaseModel): | ||
""" | ||
Base class for indices using Pydantic's BaseModel. | ||
This class outlines the expected interface for index classes. | ||
Actual method implementations should be provided in subclasses. | ||
""" | ||
|
||
# You can define common attributes here if there are any. | ||
# For example, a placeholder for the index attribute: | ||
index: Optional[Any] = None | ||
routes: Optional[np.ndarray] = None | ||
utterances: Optional[np.ndarray] = None | ||
dimensions: Union[int, None] = None | ||
type: str = "base" | ||
|
||
def add( | ||
self, embeddings: List[List[float]], routes: List[str], utterances: List[str] | ||
): | ||
""" | ||
Add embeddings to the index. | ||
This method should be implemented by subclasses. | ||
""" | ||
raise NotImplementedError("This method should be implemented by subclasses.") | ||
|
||
def delete(self, route_name: str): | ||
""" | ||
Deletes route by route name. | ||
This method should be implemented by subclasses. | ||
""" | ||
raise NotImplementedError("This method should be implemented by subclasses.") | ||
|
||
def describe(self) -> dict: | ||
""" | ||
Returns a dictionary with index details such as type, dimensions, and total vector count. | ||
This method should be implemented by subclasses. | ||
""" | ||
raise NotImplementedError("This method should be implemented by subclasses.") | ||
|
||
def query(self, vector: np.ndarray, top_k: int = 5) -> Tuple[np.ndarray, List[str]]: | ||
""" | ||
Search the index for the query_vector and return top_k results. | ||
This method should be implemented by subclasses. | ||
""" | ||
raise NotImplementedError("This method should be implemented by subclasses.") | ||
|
||
def delete_index(self): | ||
""" | ||
Deletes or resets the index. | ||
This method should be implemented by subclasses. | ||
""" | ||
raise NotImplementedError("This method should be implemented by subclasses.") | ||
|
||
class Config: | ||
arbitrary_types_allowed = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import numpy as np | ||
from typing import List, Tuple, Optional | ||
from semantic_router.linear import similarity_matrix, top_scores | ||
from semantic_router.index.base import BaseIndex | ||
|
||
|
||
class LocalIndex(BaseIndex): | ||
def __init__( | ||
self, | ||
index: Optional[np.ndarray] = None, | ||
routes: Optional[np.ndarray] = None, | ||
utterances: Optional[np.ndarray] = None, | ||
): | ||
super().__init__(index=index, routes=routes, utterances=utterances) | ||
self.type = "local" | ||
|
||
class Config: # Stop pydantic from complaining about Optional[np.ndarray] type hints. | ||
arbitrary_types_allowed = True | ||
|
||
def add( | ||
self, embeddings: List[List[float]], routes: List[str], utterances: List[str] | ||
): | ||
embeds = np.array(embeddings) # type: ignore | ||
routes_arr = np.array(routes) | ||
utterances_arr = np.array(utterances) | ||
if self.index is None: | ||
self.index = embeds # type: ignore | ||
self.routes = routes_arr | ||
self.utterances = utterances_arr | ||
else: | ||
self.index = np.concatenate([self.index, embeds]) | ||
self.routes = np.concatenate([self.routes, routes_arr]) | ||
self.utterances = np.concatenate([self.utterances, utterances_arr]) | ||
|
||
def get_routes(self) -> List[Tuple]: | ||
""" | ||
Gets a list of route and utterance objects currently stored in the index. | ||
Returns: | ||
List[Tuple]: A list of (route_name, utterance) objects. | ||
""" | ||
if self.routes is None or self.utterances is None: | ||
raise ValueError("No routes have been added to the index.") | ||
return list(zip(self.routes, self.utterances)) | ||
|
||
def describe(self) -> dict: | ||
return { | ||
"type": self.type, | ||
"dimensions": self.index.shape[1] if self.index is not None else 0, | ||
"vectors": self.index.shape[0] if self.index is not None else 0, | ||
} | ||
|
||
def query(self, vector: np.ndarray, top_k: int = 5) -> Tuple[np.ndarray, List[str]]: | ||
""" | ||
Search the index for the query and return top_k results. | ||
""" | ||
if self.index is None or self.routes is None: | ||
raise ValueError("Index or routes are not populated.") | ||
sim = similarity_matrix(vector, self.index) | ||
# extract the index values of top scoring vectors | ||
scores, idx = top_scores(sim, top_k) | ||
# get routes from index values | ||
route_names = self.routes[idx].copy() | ||
return scores, route_names | ||
|
||
def delete(self, route_name: str): | ||
""" | ||
Delete all records of a specific route from the index. | ||
""" | ||
if ( | ||
self.index is not None | ||
and self.routes is not None | ||
and self.utterances is not None | ||
): | ||
delete_idx = self._get_indices_for_route(route_name=route_name) | ||
self.index = np.delete(self.index, delete_idx, axis=0) | ||
self.routes = np.delete(self.routes, delete_idx, axis=0) | ||
self.utterances = np.delete(self.utterances, delete_idx, axis=0) | ||
else: | ||
raise ValueError( | ||
"Attempted to delete route records but either index, routes or utterances is None." | ||
) | ||
|
||
def delete_index(self): | ||
""" | ||
Deletes the index, effectively clearing it and setting it to None. | ||
""" | ||
self.index = None | ||
|
||
def _get_indices_for_route(self, route_name: str): | ||
"""Gets an array of indices for a specific route.""" | ||
if self.routes is None: | ||
raise ValueError("Routes are not populated.") | ||
idx = [i for i, route in enumerate(self.routes) if route == route_name] | ||
return idx | ||
|
||
def __len__(self): | ||
if self.index is not None: | ||
return self.index.shape[0] | ||
else: | ||
return 0 |
Oops, something went wrong.