From da1ff80ecaffde8a56b013d3326f059202a2d309 Mon Sep 17 00:00:00 2001 From: hananel Date: Thu, 11 Jan 2024 11:43:10 +0200 Subject: [PATCH 1/4] make all type hints lowercase --- semantic_router/encoders/fastembed.py | 6 +++--- semantic_router/linear.py | 4 +--- semantic_router/utils/splitters.py | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/semantic_router/encoders/fastembed.py b/semantic_router/encoders/fastembed.py index 98cfc6cc..33c81f39 100644 --- a/semantic_router/encoders/fastembed.py +++ b/semantic_router/encoders/fastembed.py @@ -1,4 +1,4 @@ -from typing import Any, List, Optional +from typing import Any, Optional import numpy as np from pydantic import PrivateAttr @@ -44,8 +44,8 @@ def _initialize_client(self): def __call__(self, docs: list[str]) -> list[list[float]]: try: - embeds: List[np.ndarray] = list(self._client.embed(docs)) - embeddings: List[List[float]] = [e.tolist() for e in embeds] + embeds: list[np.ndarray] = list(self._client.embed(docs)) + embeddings: list[list[float]] = [e.tolist() for e in embeds] return embeddings except Exception as e: raise ValueError(f"FastEmbed embed failed. Error: {e}") diff --git a/semantic_router/linear.py b/semantic_router/linear.py index 1c13262f..09b911fb 100644 --- a/semantic_router/linear.py +++ b/semantic_router/linear.py @@ -1,5 +1,3 @@ -from typing import Tuple - import numpy as np from numpy.linalg import norm @@ -21,7 +19,7 @@ def similarity_matrix(xq: np.ndarray, index: np.ndarray) -> np.ndarray: return sim -def top_scores(sim: np.ndarray, top_k: int = 5) -> Tuple[np.ndarray, np.ndarray]: +def top_scores(sim: np.ndarray, top_k: int = 5) -> tuple[np.ndarray, np.ndarray]: # get indices of top_k records top_k = min(top_k, sim.shape[0]) idx = np.argpartition(sim, -top_k)[-top_k:] diff --git a/semantic_router/utils/splitters.py b/semantic_router/utils/splitters.py index 74601520..f469fbcc 100644 --- a/semantic_router/utils/splitters.py +++ b/semantic_router/utils/splitters.py @@ -26,7 +26,7 @@ def semantic_splitter( split_method (str): The method to use for splitting. Returns: - Dict[str, list[str]]: Splits with corresponding documents. + dict[str, list[str]]: Splits with corresponding documents. """ total_docs = len(docs) splits = {} From a1fa7419c5847235e2ca9f6ad84dfc287a5d4d3d Mon Sep 17 00:00:00 2001 From: hananel Date: Thu, 11 Jan 2024 19:05:19 +0200 Subject: [PATCH 2/4] additional improvements --- semantic_router/layer.py | 7 ++++--- semantic_router/route.py | 2 +- semantic_router/schema.py | 18 ++++++++++-------- semantic_router/utils/logger.py | 2 +- semantic_router/utils/splitters.py | 5 ++++- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/semantic_router/layer.py b/semantic_router/layer.py index cf546bfc..7ff7a15b 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -1,6 +1,6 @@ import json import os -from typing import Optional +from typing import Optional, Any import numpy as np import yaml @@ -14,6 +14,7 @@ def is_valid(layer_config: str) -> bool: + """Make sure the given string is json format and contains the 3 keys: ["encoder_name", "encoder_type", "routes"]""" try: output_json = json.loads(layer_config) required_keys = ["encoder_name", "encoder_type", "routes"] @@ -73,7 +74,7 @@ def __init__( self.routes = routes @classmethod - def from_file(cls, path: str): + def from_file(cls, path: str) -> "LayerConfig": """Load the routes from a file in JSON or YAML format""" logger.info(f"Loading route config from {path}") _, ext = os.path.splitext(path) @@ -98,7 +99,7 @@ def from_file(cls, path: str): else: raise Exception("Invalid config JSON or YAML") - def to_dict(self): + def to_dict(self) -> dict[str, Any]: return { "encoder_type": self.encoder_type, "encoder_name": self.encoder_name, diff --git a/semantic_router/route.py b/semantic_router/route.py index 6cca7eaf..b492ae13 100644 --- a/semantic_router/route.py +++ b/semantic_router/route.py @@ -62,7 +62,7 @@ def __call__(self, query: str) -> RouteChoice: func_call = None return RouteChoice(name=self.name, function_call=func_call) - def to_dict(self): + def to_dict(self) -> dict[str, Any]: return self.dict() @classmethod diff --git a/semantic_router/schema.py b/semantic_router/schema.py index bb1a4c6a..8d479ec9 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Optional +from typing import Optional, Literal from pydantic import BaseModel from pydantic.dataclasses import dataclass @@ -55,12 +55,12 @@ class Message(BaseModel): role: str content: str - def to_openai(self): + def to_openai(self) -> dict[str, str]: if self.role.lower() not in ["user", "assistant", "system"]: raise ValueError("Role must be either 'user', 'assistant' or 'system'") return {"role": self.role, "content": self.content} - def to_cohere(self): + def to_cohere(self) -> dict[str, str]: return {"role": self.role, "message": self.content} @@ -68,11 +68,13 @@ class Conversation(BaseModel): messages: list[Message] def split_by_topic( - self, - encoder: BaseEncoder, - threshold: float = 0.5, - split_method: str = "consecutive_similarity_drop", - ): + self, + encoder: BaseEncoder, + threshold: float = 0.5, + split_method: Literal[ + "consecutive_similarity_drop", "cumulative_similarity_drop" + ] = "consecutive_similarity_drop", + ) -> dict[str, list[str]]: docs = [f"{m.role}: {m.content}" for m in self.messages] return semantic_splitter( encoder=encoder, docs=docs, threshold=threshold, split_method=split_method diff --git a/semantic_router/utils/logger.py b/semantic_router/utils/logger.py index 00c83693..607f09d5 100644 --- a/semantic_router/utils/logger.py +++ b/semantic_router/utils/logger.py @@ -40,4 +40,4 @@ def setup_custom_logger(name): return logger -logger = setup_custom_logger(__name__) +logger: logging.Logger = setup_custom_logger(__name__) diff --git a/semantic_router/utils/splitters.py b/semantic_router/utils/splitters.py index f469fbcc..20160319 100644 --- a/semantic_router/utils/splitters.py +++ b/semantic_router/utils/splitters.py @@ -1,4 +1,5 @@ import numpy as np +from typing import Literal from semantic_router.encoders import BaseEncoder @@ -7,7 +8,9 @@ def semantic_splitter( encoder: BaseEncoder, docs: list[str], threshold: float, - split_method: str = "consecutive_similarity_drop", + split_method: Literal[ + "consecutive_similarity_drop", "cumulative_similarity_drop" + ] = "consecutive_similarity_drop", ) -> dict[str, list[str]]: """ Splits a list of documents base on semantic similarity changes. From 8adc914b1f8419dcaadda6778fa0a8ee55f08bad Mon Sep 17 00:00:00 2001 From: hananel Date: Thu, 11 Jan 2024 19:09:33 +0200 Subject: [PATCH 3/4] remove hints that failed mypy --- semantic_router/schema.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/semantic_router/schema.py b/semantic_router/schema.py index 8d479ec9..c7912fa1 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -55,12 +55,12 @@ class Message(BaseModel): role: str content: str - def to_openai(self) -> dict[str, str]: + def to_openai(self): if self.role.lower() not in ["user", "assistant", "system"]: raise ValueError("Role must be either 'user', 'assistant' or 'system'") return {"role": self.role, "content": self.content} - def to_cohere(self) -> dict[str, str]: + def to_cohere(self): return {"role": self.role, "message": self.content} @@ -68,12 +68,12 @@ class Conversation(BaseModel): messages: list[Message] def split_by_topic( - self, - encoder: BaseEncoder, - threshold: float = 0.5, - split_method: Literal[ - "consecutive_similarity_drop", "cumulative_similarity_drop" - ] = "consecutive_similarity_drop", + self, + encoder: BaseEncoder, + threshold: float = 0.5, + split_method: Literal[ + "consecutive_similarity_drop", "cumulative_similarity_drop" + ] = "consecutive_similarity_drop", ) -> dict[str, list[str]]: docs = [f"{m.role}: {m.content}" for m in self.messages] return semantic_splitter( From 4f88ad3e029ed79d837d3acf3dab2bd9d54fabdb Mon Sep 17 00:00:00 2001 From: hananel Date: Fri, 12 Jan 2024 11:58:14 +0200 Subject: [PATCH 4/4] type annotations to uppercase --- semantic_router/encoders/base.py | 3 ++- semantic_router/encoders/bm25.py | 8 ++++---- semantic_router/encoders/cohere.py | 4 ++-- semantic_router/encoders/fastembed.py | 8 ++++---- semantic_router/encoders/huggingface.py | 6 +++--- semantic_router/encoders/openai.py | 4 ++-- semantic_router/hybrid_layer.py | 12 ++++++------ semantic_router/layer.py | 18 +++++++++--------- semantic_router/linear.py | 4 +++- semantic_router/llms/base.py | 4 ++-- semantic_router/llms/cohere.py | 4 ++-- semantic_router/llms/openai.py | 4 ++-- semantic_router/llms/openrouter.py | 4 ++-- semantic_router/route.py | 12 ++++++------ semantic_router/schema.py | 8 ++++---- semantic_router/utils/function_call.py | 12 ++++++------ semantic_router/utils/splitters.py | 10 +++++----- 17 files changed, 64 insertions(+), 61 deletions(-) diff --git a/semantic_router/encoders/base.py b/semantic_router/encoders/base.py index f5968578..edc98641 100644 --- a/semantic_router/encoders/base.py +++ b/semantic_router/encoders/base.py @@ -1,3 +1,4 @@ +from typing import List from pydantic import BaseModel, Field @@ -9,5 +10,5 @@ class BaseEncoder(BaseModel): class Config: arbitrary_types_allowed = True - def __call__(self, docs: list[str]) -> list[list[float]]: + def __call__(self, docs: List[str]) -> List[List[float]]: raise NotImplementedError("Subclasses must implement this method") diff --git a/semantic_router/encoders/bm25.py b/semantic_router/encoders/bm25.py index 451273cd..83cbccc0 100644 --- a/semantic_router/encoders/bm25.py +++ b/semantic_router/encoders/bm25.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any, Optional, List, Dict from semantic_router.encoders import BaseEncoder from semantic_router.utils.logger import logger @@ -6,7 +6,7 @@ class BM25Encoder(BaseEncoder): model: Optional[Any] = None - idx_mapping: Optional[dict[int, int]] = None + idx_mapping: Optional[Dict[int, int]] = None type: str = "sparse" def __init__( @@ -40,7 +40,7 @@ def _set_idx_mapping(self): else: raise TypeError("Expected a dictionary for 'doc_freq'") - def __call__(self, docs: list[str]) -> list[list[float]]: + def __call__(self, docs: List[str]) -> List[List[float]]: if self.model is None or self.idx_mapping is None: raise ValueError("Model or index mapping is not initialized.") if len(docs) == 1: @@ -60,7 +60,7 @@ def __call__(self, docs: list[str]) -> list[list[float]]: embeds[i][position] = val return embeds - def fit(self, docs: list[str]): + def fit(self, docs: List[str]): if self.model is None: raise ValueError("Model is not initialized.") self.model.fit(docs) diff --git a/semantic_router/encoders/cohere.py b/semantic_router/encoders/cohere.py index ec8ee0f8..803fe779 100644 --- a/semantic_router/encoders/cohere.py +++ b/semantic_router/encoders/cohere.py @@ -1,5 +1,5 @@ import os -from typing import Optional +from typing import Optional, List import cohere @@ -27,7 +27,7 @@ def __init__( except Exception as e: raise ValueError(f"Cohere API client failed to initialize. Error: {e}") - def __call__(self, docs: list[str]) -> list[list[float]]: + def __call__(self, docs: List[str]) -> List[List[float]]: if self.client is None: raise ValueError("Cohere client is not initialized.") try: diff --git a/semantic_router/encoders/fastembed.py b/semantic_router/encoders/fastembed.py index 33c81f39..ec356317 100644 --- a/semantic_router/encoders/fastembed.py +++ b/semantic_router/encoders/fastembed.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any, Optional, List import numpy as np from pydantic import PrivateAttr @@ -42,10 +42,10 @@ def _initialize_client(self): embedding = Embedding(**embedding_args) return embedding - def __call__(self, docs: list[str]) -> list[list[float]]: + def __call__(self, docs: List[str]) -> List[List[float]]: try: - embeds: list[np.ndarray] = list(self._client.embed(docs)) - embeddings: list[list[float]] = [e.tolist() for e in embeds] + embeds: List[np.ndarray] = list(self._client.embed(docs)) + embeddings: List[List[float]] = [e.tolist() for e in embeds] return embeddings except Exception as e: raise ValueError(f"FastEmbed embed failed. Error: {e}") diff --git a/semantic_router/encoders/huggingface.py b/semantic_router/encoders/huggingface.py index ace18921..2166ea13 100644 --- a/semantic_router/encoders/huggingface.py +++ b/semantic_router/encoders/huggingface.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any, Optional, List from pydantic import PrivateAttr @@ -60,11 +60,11 @@ def _initialize_hf_model(self): def __call__( self, - docs: list[str], + docs: List[str], batch_size: int = 32, normalize_embeddings: bool = True, pooling_strategy: str = "mean", - ) -> list[list[float]]: + ) -> List[List[float]]: all_embeddings = [] for i in range(0, len(docs), batch_size): batch_docs = docs[i : i + batch_size] diff --git a/semantic_router/encoders/openai.py b/semantic_router/encoders/openai.py index 169761af..3b06d33d 100644 --- a/semantic_router/encoders/openai.py +++ b/semantic_router/encoders/openai.py @@ -1,6 +1,6 @@ import os from time import sleep -from typing import Optional +from typing import Optional, List import openai from openai import OpenAIError @@ -31,7 +31,7 @@ def __init__( except Exception as e: raise ValueError(f"OpenAI API client failed to initialize. Error: {e}") - def __call__(self, docs: list[str]) -> list[list[float]]: + def __call__(self, docs: List[str]) -> List[List[float]]: if self.client is None: raise ValueError("OpenAI client is not initialized.") embeds = None diff --git a/semantic_router/hybrid_layer.py b/semantic_router/hybrid_layer.py index d4c81b13..ad8d6ec2 100644 --- a/semantic_router/hybrid_layer.py +++ b/semantic_router/hybrid_layer.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, List, Dict, Tuple import numpy as np from numpy.linalg import norm @@ -21,7 +21,7 @@ def __init__( self, encoder: BaseEncoder, sparse_encoder: Optional[BM25Encoder] = None, - routes: list[Route] = [], + routes: List[Route] = [], alpha: float = 0.3, ): self.encoder = encoder @@ -81,7 +81,7 @@ def _add_route(self, route: Route): else: self.sparse_index = np.concatenate([self.sparse_index, sparse_embeds]) - def _add_routes(self, routes: list[Route]): + def _add_routes(self, routes: List[Route]): # create embeddings for all routes logger.info("Creating embeddings for all routes...") all_utterances = [ @@ -153,8 +153,8 @@ def _convex_scaling(self, dense: np.ndarray, sparse: np.ndarray): sparse = np.array(sparse) * (1 - self.alpha) return dense, sparse - def _semantic_classify(self, query_results: list[dict]) -> tuple[str, list[float]]: - scores_by_class: dict[str, list[float]] = {} + def _semantic_classify(self, query_results: List[Dict]) -> Tuple[str, List[float]]: + scores_by_class: Dict[str, List[float]] = {} for result in query_results: score = result["score"] route = result["route"] @@ -174,7 +174,7 @@ def _semantic_classify(self, query_results: list[dict]) -> tuple[str, list[float logger.warning("No classification found for semantic classifier.") return "", [] - def _pass_threshold(self, scores: list[float], threshold: float) -> bool: + def _pass_threshold(self, scores: List[float], threshold: float) -> bool: if scores: return max(scores) > threshold else: diff --git a/semantic_router/layer.py b/semantic_router/layer.py index 7ff7a15b..bce160ba 100644 --- a/semantic_router/layer.py +++ b/semantic_router/layer.py @@ -1,6 +1,6 @@ import json import os -from typing import Optional, Any +from typing import Optional, Any, List, Dict, Tuple import numpy as np import yaml @@ -48,11 +48,11 @@ class LayerConfig: RouteLayer. """ - routes: list[Route] = [] + routes: List[Route] = [] def __init__( self, - routes: list[Route] = [], + routes: List[Route] = [], encoder_type: str = "openai", encoder_name: Optional[str] = None, ): @@ -99,7 +99,7 @@ def from_file(cls, path: str) -> "LayerConfig": else: raise Exception("Invalid config JSON or YAML") - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: return { "encoder_type": self.encoder_type, "encoder_name": self.encoder_name, @@ -158,7 +158,7 @@ def __init__( self, encoder: Optional[BaseEncoder] = None, llm: Optional[BaseLLM] = None, - routes: Optional[list[Route]] = None, + routes: Optional[List[Route]] = None, top_k_routes: int = 3, ): logger.info("Initializing RouteLayer") @@ -247,7 +247,7 @@ def add(self, route: Route): # add route to routes list self.routes.append(route) - def _add_routes(self, routes: list[Route]): + def _add_routes(self, routes: List[Route]): # create embeddings for all routes all_utterances = [ utterance for route in routes for utterance in route.utterances @@ -290,8 +290,8 @@ def _query(self, text: str, top_k: int = 5): logger.warning("No index found for route layer.") return [] - def _semantic_classify(self, query_results: list[dict]) -> tuple[str, list[float]]: - scores_by_class: dict[str, list[float]] = {} + def _semantic_classify(self, query_results: List[dict]) -> Tuple[str, List[float]]: + scores_by_class: Dict[str, List[float]] = {} for result in query_results: score = result["score"] route = result["route"] @@ -311,7 +311,7 @@ def _semantic_classify(self, query_results: list[dict]) -> tuple[str, list[float logger.warning("No classification found for semantic classifier.") return "", [] - def _pass_threshold(self, scores: list[float], threshold: float) -> bool: + def _pass_threshold(self, scores: List[float], threshold: float) -> bool: if scores: return max(scores) > threshold else: diff --git a/semantic_router/linear.py b/semantic_router/linear.py index 09b911fb..1c13262f 100644 --- a/semantic_router/linear.py +++ b/semantic_router/linear.py @@ -1,3 +1,5 @@ +from typing import Tuple + import numpy as np from numpy.linalg import norm @@ -19,7 +21,7 @@ def similarity_matrix(xq: np.ndarray, index: np.ndarray) -> np.ndarray: return sim -def top_scores(sim: np.ndarray, top_k: int = 5) -> tuple[np.ndarray, np.ndarray]: +def top_scores(sim: np.ndarray, top_k: int = 5) -> Tuple[np.ndarray, np.ndarray]: # get indices of top_k records top_k = min(top_k, sim.shape[0]) idx = np.argpartition(sim, -top_k)[-top_k:] diff --git a/semantic_router/llms/base.py b/semantic_router/llms/base.py index bf5f29b6..12d89f2d 100644 --- a/semantic_router/llms/base.py +++ b/semantic_router/llms/base.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, List from pydantic import BaseModel @@ -11,5 +11,5 @@ class BaseLLM(BaseModel): class Config: arbitrary_types_allowed = True - def __call__(self, messages: list[Message]) -> Optional[str]: + def __call__(self, messages: List[Message]) -> Optional[str]: raise NotImplementedError("Subclasses must implement this method") diff --git a/semantic_router/llms/cohere.py b/semantic_router/llms/cohere.py index 0ec21f35..0eebbe6d 100644 --- a/semantic_router/llms/cohere.py +++ b/semantic_router/llms/cohere.py @@ -1,5 +1,5 @@ import os -from typing import Optional +from typing import Optional, List import cohere @@ -26,7 +26,7 @@ def __init__( except Exception as e: raise ValueError(f"Cohere API client failed to initialize. Error: {e}") - def __call__(self, messages: list[Message]) -> str: + def __call__(self, messages: List[Message]) -> str: if self.client is None: raise ValueError("Cohere client is not initialized.") try: diff --git a/semantic_router/llms/openai.py b/semantic_router/llms/openai.py index 8b3442c7..06d6865c 100644 --- a/semantic_router/llms/openai.py +++ b/semantic_router/llms/openai.py @@ -1,5 +1,5 @@ import os -from typing import Optional +from typing import Optional, List import openai @@ -33,7 +33,7 @@ def __init__( self.temperature = temperature self.max_tokens = max_tokens - def __call__(self, messages: list[Message]) -> str: + def __call__(self, messages: List[Message]) -> str: if self.client is None: raise ValueError("OpenAI client is not initialized.") try: diff --git a/semantic_router/llms/openrouter.py b/semantic_router/llms/openrouter.py index 4cc15d6b..8c3efb8d 100644 --- a/semantic_router/llms/openrouter.py +++ b/semantic_router/llms/openrouter.py @@ -1,5 +1,5 @@ import os -from typing import Optional +from typing import Optional, List import openai @@ -38,7 +38,7 @@ def __init__( self.temperature = temperature self.max_tokens = max_tokens - def __call__(self, messages: list[Message]) -> str: + def __call__(self, messages: List[Message]) -> str: if self.client is None: raise ValueError("OpenRouter client is not initialized.") try: diff --git a/semantic_router/route.py b/semantic_router/route.py index b492ae13..cc7dc17a 100644 --- a/semantic_router/route.py +++ b/semantic_router/route.py @@ -1,6 +1,6 @@ import json import re -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, Optional, Union, List, Dict from pydantic import BaseModel @@ -40,9 +40,9 @@ def is_valid(route_config: str) -> bool: class Route(BaseModel): name: str - utterances: list[str] + utterances: List[str] description: Optional[str] = None - function_schema: Optional[dict[str, Any]] = None + function_schema: Optional[Dict[str, Any]] = None llm: Optional[BaseLLM] = None def __call__(self, query: str) -> RouteChoice: @@ -62,11 +62,11 @@ def __call__(self, query: str) -> RouteChoice: func_call = None return RouteChoice(name=self.name, function_call=func_call) - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: return self.dict() @classmethod - def from_dict(cls, data: dict[str, Any]): + def from_dict(cls, data: Dict[str, Any]): return cls(**data) @classmethod @@ -92,7 +92,7 @@ def _parse_route_config(cls, config: str) -> str: raise ValueError("No tags found in the output.") @classmethod - def _generate_dynamic_route(cls, llm: BaseLLM, function_schema: dict[str, Any]): + def _generate_dynamic_route(cls, llm: BaseLLM, function_schema: Dict[str, Any]): logger.info("Generating dynamic route...") prompt = f""" diff --git a/semantic_router/schema.py b/semantic_router/schema.py index c7912fa1..7dcb7fde 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import Optional, Literal +from typing import Optional, Literal, List, Dict from pydantic import BaseModel from pydantic.dataclasses import dataclass @@ -47,7 +47,7 @@ def __init__(self, type: str, name: Optional[str]): else: raise ValueError - def __call__(self, texts: list[str]) -> list[list[float]]: + def __call__(self, texts: List[str]) -> List[List[float]]: return self.model(texts) @@ -65,7 +65,7 @@ def to_cohere(self): class Conversation(BaseModel): - messages: list[Message] + messages: List[Message] def split_by_topic( self, @@ -74,7 +74,7 @@ def split_by_topic( split_method: Literal[ "consecutive_similarity_drop", "cumulative_similarity_drop" ] = "consecutive_similarity_drop", - ) -> dict[str, list[str]]: + ) -> Dict[str, List[str]]: docs = [f"{m.role}: {m.content}" for m in self.messages] return semantic_splitter( encoder=encoder, docs=docs, threshold=threshold, split_method=split_method diff --git a/semantic_router/utils/function_call.py b/semantic_router/utils/function_call.py index cedd9b6e..ad09970f 100644 --- a/semantic_router/utils/function_call.py +++ b/semantic_router/utils/function_call.py @@ -1,6 +1,6 @@ import inspect import json -from typing import Any, Callable, Union +from typing import Any, Callable, Union, Dict, List from pydantic import BaseModel @@ -9,7 +9,7 @@ from semantic_router.utils.logger import logger -def get_schema(item: Union[BaseModel, Callable]) -> dict[str, Any]: +def get_schema(item: Union[BaseModel, Callable]) -> Dict[str, Any]: if isinstance(item, BaseModel): signature_parts = [] for field_name, field_model in item.__annotations__.items(): @@ -42,8 +42,8 @@ def get_schema(item: Union[BaseModel, Callable]) -> dict[str, Any]: def extract_function_inputs( - query: str, llm: BaseLLM, function_schema: dict[str, Any] -) -> dict: + query: str, llm: BaseLLM, function_schema: Dict[str, Any] +) -> Dict[str, Any]: logger.info("Extracting function input...") prompt = f""" @@ -87,7 +87,7 @@ def extract_function_inputs( return function_inputs -def is_valid_inputs(inputs: dict[str, Any], function_schema: dict[str, Any]) -> bool: +def is_valid_inputs(inputs: Dict[str, Any], function_schema: Dict[str, Any]) -> bool: """Validate the extracted inputs against the function schema""" try: # Extract parameter names and types from the signature string @@ -110,7 +110,7 @@ def is_valid_inputs(inputs: dict[str, Any], function_schema: dict[str, Any]) -> # TODO: Add route layer object to the input, solve circular import issue async def route_and_execute( - query: str, llm: BaseLLM, functions: list[Callable], layer + query: str, llm: BaseLLM, functions: List[Callable], layer ) -> Any: route_choice: RouteChoice = layer(query) diff --git a/semantic_router/utils/splitters.py b/semantic_router/utils/splitters.py index 20160319..83a32839 100644 --- a/semantic_router/utils/splitters.py +++ b/semantic_router/utils/splitters.py @@ -1,17 +1,17 @@ import numpy as np -from typing import Literal +from typing import List, Dict, Literal from semantic_router.encoders import BaseEncoder def semantic_splitter( encoder: BaseEncoder, - docs: list[str], + docs: List[str], threshold: float, split_method: Literal[ "consecutive_similarity_drop", "cumulative_similarity_drop" ] = "consecutive_similarity_drop", -) -> dict[str, list[str]]: +) -> Dict[str, List[str]]: """ Splits a list of documents base on semantic similarity changes. @@ -23,13 +23,13 @@ def semantic_splitter( Args: encoder (BaseEncoder): Encoder for document embeddings. - docs (list[str]): Documents to split. + docs (List[str]): Documents to split. threshold (float): The similarity drop value that will trigger a new document split. split_method (str): The method to use for splitting. Returns: - dict[str, list[str]]: Splits with corresponding documents. + Dict[str, List[str]]: Splits with corresponding documents. """ total_docs = len(docs) splits = {}