From 7f2fb87f83212d8eb7389a1f681fd5a6613716df Mon Sep 17 00:00:00 2001 From: James Briggs <35938317+jamescalam@users.noreply.github.com> Date: Sun, 19 Jan 2025 23:24:10 +0000 Subject: [PATCH 1/2] feat: shared torch base encoder and st encoder --- pyproject.toml | 3 +- semantic_router/encoders/__init__.py | 8 ++++ semantic_router/encoders/clip.py | 27 ++--------- semantic_router/encoders/local.py | 10 ++++ .../encoders/sentence_transformers.py | 47 +++++++++++++++++++ semantic_router/encoders/torch.py | 33 +++++++++++++ semantic_router/encoders/vit.py | 23 +++------ semantic_router/schema.py | 2 + 8 files changed, 112 insertions(+), 41 deletions(-) create mode 100644 semantic_router/encoders/local.py create mode 100644 semantic_router/encoders/sentence_transformers.py create mode 100644 semantic_router/encoders/torch.py diff --git a/pyproject.toml b/pyproject.toml index ea1aea5b..8f21737a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,8 +41,9 @@ sphinxawesome-theme = {version = "^5.2.0", optional = true} tornado = {version = "^6.4.2", optional = true} [tool.poetry.extras] -hybrid = ["pinecone-text"] +hybrid = ["pinecone-text"] # we need to drop this dependency local = ["torch", "transformers", "tokenizers", "huggingface-hub", "llama-cpp-python"] +# local-cpu = ["torch"] https://download.pytorch.org/whl/cpu https://stackoverflow.com/questions/77205123/how-do-i-slim-down-sberts-sentencer-transformer-library pinecone = ["pinecone"] vision = ["torch", "torchvision", "transformers", "pillow"] mistralai = ["mistralai"] diff --git a/semantic_router/encoders/__init__.py b/semantic_router/encoders/__init__.py index 07e468d8..a211e8c5 100644 --- a/semantic_router/encoders/__init__.py +++ b/semantic_router/encoders/__init__.py @@ -10,8 +10,10 @@ from semantic_router.encoders.google import GoogleEncoder from semantic_router.encoders.huggingface import HuggingFaceEncoder from semantic_router.encoders.huggingface import HFEndpointEncoder +from semantic_router.encoders.local import LocalEncoder from semantic_router.encoders.mistral import MistralEncoder from semantic_router.encoders.openai import OpenAIEncoder +from semantic_router.encoders.sentence_transformers import STEncoder from semantic_router.encoders.tfidf import TfidfEncoder from semantic_router.encoders.vit import VitEncoder from semantic_router.encoders.zure import AzureOpenAIEncoder @@ -23,8 +25,10 @@ "SparseEncoder", "AzureOpenAIEncoder", "CohereEncoder", + "LocalEncoder", "OpenAIEncoder", "BM25Encoder", + "STEncoder", "TfidfEncoder", "FastEmbedEncoder", "HuggingFaceEncoder", @@ -50,6 +54,10 @@ def __init__(self, type: str, name: Optional[str]): self.model = AzureOpenAIEncoder(model=name) elif self.type == EncoderType.COHERE: self.model = CohereEncoder(name=name) + elif self.type == EncoderType.SENTENCE_TRANSFORMERS: + self.model = STEncoder(name=name) + elif self.type == EncoderType.LOCAL: + self.model = LocalEncoder(name=name) elif self.type == EncoderType.OPENAI: self.model = OpenAIEncoder(name=name) elif self.type == EncoderType.AURELIO: diff --git a/semantic_router/encoders/clip.py b/semantic_router/encoders/clip.py index 065ff115..cde360db 100644 --- a/semantic_router/encoders/clip.py +++ b/semantic_router/encoders/clip.py @@ -3,20 +3,18 @@ import numpy as np from pydantic import PrivateAttr from typing import Dict -from semantic_router.encoders import DenseEncoder +from semantic_router.encoders.torch import TorchAbstractDenseEncoder -class CLIPEncoder(DenseEncoder): +class CLIPEncoder(TorchAbstractDenseEncoder): name: str = "openai/clip-vit-base-patch16" type: str = "huggingface" tokenizer_kwargs: Dict = {} processor_kwargs: Dict = {} model_kwargs: Dict = {} - device: Optional[str] = None _tokenizer: Any = PrivateAttr() _processor: Any = PrivateAttr() _model: Any = PrivateAttr() - _torch: Any = PrivateAttr() _Image: Any = PrivateAttr() def __init__(self, **data): @@ -59,14 +57,8 @@ def _initialize_hf_model(self): "`pip install semantic-router[vision]`" ) - try: - import torch - except ImportError: - raise ImportError( - "Please install Pytorch to use CLIPEncoder. " - "You can install it with: " - "`pip install semantic-router[vision]`" - ) + # use abstract torch init + torch = self._initialize_torch() try: from PIL import Image @@ -91,17 +83,6 @@ def _initialize_hf_model(self): model.to(self.device) return tokenizer, processor, model - def _get_device(self) -> str: - if self.device: - device = self.device - elif self._torch.cuda.is_available(): - device = "cuda" - elif self._torch.backends.mps.is_available(): - device = "mps" - else: - device = "cpu" - return device - def _encode_text(self, docs: List[str]) -> Any: inputs = self._tokenizer( docs, return_tensors="pt", padding=True, truncation=True diff --git a/semantic_router/encoders/local.py b/semantic_router/encoders/local.py new file mode 100644 index 00000000..1e6d834d --- /dev/null +++ b/semantic_router/encoders/local.py @@ -0,0 +1,10 @@ +from semantic_router.encoders.sentence_transformers import STEncoder + + +class LocalEncoder(STEncoder): + """The local encoder uses the underlying STEncoder (ie a sentence-transformers + bi-encoder). Designed as our recommended local encoder option for generating dense + embeddings. + """ + + pass diff --git a/semantic_router/encoders/sentence_transformers.py b/semantic_router/encoders/sentence_transformers.py new file mode 100644 index 00000000..9cd94f68 --- /dev/null +++ b/semantic_router/encoders/sentence_transformers.py @@ -0,0 +1,47 @@ +import numpy as np +from pydantic import PrivateAttr + +from semantic_router.encoders.torch import TorchAbstractDenseEncoder + + +class STEncoder(TorchAbstractDenseEncoder): + """Base class for sentence-transformers bi-encoders. Our recommended encoder for + generating dense embeddings locally. + """ + name: str = "all-MiniLM-L6-v2" + type: str = "sentence-transformers" + dimensions: int = 384 + device: str | None = None + _model: any = PrivateAttr() + + def __init__(self, **kwargs): + if kwargs.get("score_threshold") is None: + kwargs["score_threshold"] = 0.5 + super().__init__(**kwargs) + self._model = self._initialize_st_model() + + def _initialize_st_model(self): + try: + from sentence_transformers import SentenceTransformer + except ImportError: + raise ImportError( + "Please install sentence-transformers to use STEncoder. " + "You can install it with: " + "`pip install semantic-router[local]`" + ) + model = SentenceTransformer(self.name) + model.to(self.device) + return model + + def __call__( + self, + docs: list[any], + batch_size: int = 32, + normalize_embeddings: bool = True, + ) -> list[list[float]]: + # compute document embeddings `xd` + xd = self._model.encode(docs, batch_size=batch_size) + if normalize_embeddings: + # TODO not sure if required + xd = xd / np.linalg.norm(xd, axis=0) + return xd diff --git a/semantic_router/encoders/torch.py b/semantic_router/encoders/torch.py new file mode 100644 index 00000000..f9219eda --- /dev/null +++ b/semantic_router/encoders/torch.py @@ -0,0 +1,33 @@ +from pydantic import PrivateAttr + +from semantic_router.encoders import DenseEncoder + + +class TorchAbstractDenseEncoder(DenseEncoder): + _torch: any = PrivateAttr() + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._torch = self._initialize_torch() + + def _initialize_torch(self): + try: + import torch + except ImportError: + raise ImportError( + f"Please install PyTorch to use {self.__class__.__name__}. " + "You can install it with: `pip install semantic-router[local]`" + ) + + return torch + + def _get_device(self) -> str: + if self.device: + device = self.device + elif self._torch.cuda.is_available(): + device = "cuda" + elif self._torch.backends.mps.is_available(): + device = "mps" + else: + device = "cpu" + return device diff --git a/semantic_router/encoders/vit.py b/semantic_router/encoders/vit.py index dec768e4..ebb40102 100644 --- a/semantic_router/encoders/vit.py +++ b/semantic_router/encoders/vit.py @@ -2,18 +2,16 @@ from pydantic import PrivateAttr -from semantic_router.encoders import DenseEncoder +from semantic_router.encoders.torch import TorchAbstractDenseEncoder -class VitEncoder(DenseEncoder): +class VitEncoder(TorchAbstractDenseEncoder): name: str = "google/vit-base-patch16-224" type: str = "huggingface" processor_kwargs: Dict = {} model_kwargs: Dict = {} - device: Optional[str] = None _processor: Any = PrivateAttr() _model: Any = PrivateAttr() - _torch: Any = PrivateAttr() _T: Any = PrivateAttr() _Image: Any = PrivateAttr() @@ -32,13 +30,15 @@ def _initialize_hf_model(self): "You can install it with: " "`pip install semantic-router[vision]`" ) + + # use abstract torch init + torch = self._initialize_torch() try: - import torch import torchvision.transforms as T except ImportError: raise ImportError( - "Please install Pytorch to use VitEncoder. " + "Please install torchvision to use VitEncoder. " "You can install it with: " "`pip install semantic-router[vision]`" ) @@ -67,17 +67,6 @@ def _initialize_hf_model(self): return processor, model - def _get_device(self) -> str: - if self.device: - device = self.device - elif self._torch.cuda.is_available(): - device = "cuda" - elif self._torch.backends.mps.is_available(): - device = "mps" - else: - device = "cpu" - return device - def _process_images(self, images: List[Any]): rgb_images = [self._ensure_rgb(img) for img in images] processed_images = self._processor(images=rgb_images, return_tensors="pt") diff --git a/semantic_router/schema.py b/semantic_router/schema.py index 273043f5..8d9bf163 100644 --- a/semantic_router/schema.py +++ b/semantic_router/schema.py @@ -13,8 +13,10 @@ class EncoderType(Enum): AURELIO = "aurelio" AZURE = "azure" COHERE = "cohere" + LOCAL = "local" OPENAI = "openai" BM25 = "bm25" + SENTENCE_TRANSFORMERS = "sentence-transformers" TFIDF = "tfidf" FASTEMBED = "fastembed" HUGGINGFACE = "huggingface" From 2eb709c84cca604da2a2c58c435df84cb928092b Mon Sep 17 00:00:00 2001 From: James Briggs <35938317+jamescalam@users.noreply.github.com> Date: Tue, 21 Jan 2025 13:55:48 +0400 Subject: [PATCH 2/2] feat: local intro notebook --- docs/00a-introduction-local.ipynb | 313 ++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 docs/00a-introduction-local.ipynb diff --git a/docs/00a-introduction-local.ipynb b/docs/00a-introduction-local.ipynb new file mode 100644 index 00000000..94b15cc5 --- /dev/null +++ b/docs/00a-introduction-local.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "K7NsuSPNf3px" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/00-introduction.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/00-introduction.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Am2hmLzTf3py" + }, + "source": [ + "# Semantic Router Intro" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k1nRRAbYf3py" + }, + "source": [ + "The Semantic Router library can be used as a super fast decision making layer on top of LLMs. That means rather than waiting on a slow agent to decide what to do, we can use the magic of semantic vector space to make routes. Cutting decision making time down from seconds to milliseconds.\n", + "\n", + "In this notebook we will be introducing the library (as done in the `00-introduction.ipynb` notebook) but using the `LocalEncoder` class, allowing us to run the library locally without the need for any APIs or external services." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NggrMQP2f3py" + }, + "source": [ + "## Getting Started" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9zP-l_T7f3py" + }, + "source": [ + "We start by installing the library:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4YI81tu0f3pz" + }, + "outputs": [], + "source": [ + "!pip install -qU \"semantic-router==0.1.0.dev6[local]\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HfB8252ff3pz" + }, + "source": [ + "We start by defining a dictionary mapping routes to example phrases that should trigger those routes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lslfqYOEf3pz", + "outputId": "c13e3e77-310c-4b86-e291-4b6005d698bd" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Siraj\\Documents\\Personal\\Work\\Aurelio\\Virtual Environments\\semantic_router_3\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from semantic_router import Route\n", + "\n", + "politics = Route(\n", + " name=\"politics\",\n", + " utterances=[\n", + " \"isn't politics the best thing ever\",\n", + " \"why don't you tell me about your political opinions\",\n", + " \"don't you just love the president\",\n", + " \"don't you just hate the president\",\n", + " \"they're going to destroy this country!\",\n", + " \"they will save the country!\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WYLHUDa1f3p0" + }, + "source": [ + "Let's define another for good measure:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LAdY1jdxf3p0" + }, + "outputs": [], + "source": [ + "chitchat = Route(\n", + " name=\"chitchat\",\n", + " utterances=[\n", + " \"how's the weather today?\",\n", + " \"how are things going?\",\n", + " \"lovely weather today\",\n", + " \"the weather is horrendous\",\n", + " \"let's go to the chippy\",\n", + " ],\n", + ")\n", + "\n", + "routes = [politics, chitchat]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ReN59ieGf3p0" + }, + "source": [ + "Now we initialize our encoder. Under-the-hood we're using the `sentence-transformers` library, which supports loading encoders from the HuggingFace Hub. We'll be using Nvidia's [nvidia/NV-Embed-v2](https://huggingface.co/nvidia/NV-Embed-v2) encoder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MF47W_Sof3p2" + }, + "outputs": [], + "source": [ + "from semantic_router.encoders import LocalEncoder\n", + "\n", + "encoder = LocalEncoder(name=\"nvidia/NV-Embed-v2\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lYuLO0l9f3p3" + }, + "source": [ + "Now we define the `Router`. When called, the router will consume text (a query) and output the category (`Route`) it belongs to — to initialize a `Router` we need our `encoder` model and a list of `routes`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dh1U8IDOf3p3", + "outputId": "872810da-956a-47af-a91f-217ce351a88b" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-05-07 15:02:46 INFO semantic_router.utils.logger local\u001b[0m\n" + ] + } + ], + "source": [ + "from semantic_router.routers import SemanticRouter\n", + "\n", + "sr = SemanticRouter(encoder=encoder, routes=routes, auto_sync=\"local\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xj32uEF-f3p3" + }, + "source": [ + "Now we can test it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fIXOjRp9f3p3", + "outputId": "8b9b5746-ae7c-43bb-d84f-5fa7c30e423e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "RouteChoice(name='politics', function_call=None, similarity_score=None)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sr(\"don't you love politics?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0UN2mKvjf3p4", + "outputId": "062f9499-7db3-49d2-81ef-e7d5dc9a88f6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "RouteChoice(name='chitchat', function_call=None, similarity_score=None)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sr(\"how's the weather today?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NHZWZKoTf3p4" + }, + "source": [ + "Both are classified accurately, what if we send a query that is unrelated to our existing `Route` objects?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0WnvGJByf3p4", + "outputId": "4496e9b2-7cd8-4466-fe1a-3e6f5cf30b0d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "RouteChoice(name=None, function_call=None, similarity_score=None)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sr(\"I'm interested in learning about llama 2\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With this we see `None` is returned, ie no routes were matched." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "decision-layer", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}