From 7f2fb87f83212d8eb7389a1f681fd5a6613716df Mon Sep 17 00:00:00 2001
From: James Briggs <35938317+jamescalam@users.noreply.github.com>
Date: Sun, 19 Jan 2025 23:24:10 +0000
Subject: [PATCH 1/2] feat: shared torch base encoder and st encoder

---
 pyproject.toml                                |  3 +-
 semantic_router/encoders/__init__.py          |  8 ++++
 semantic_router/encoders/clip.py              | 27 ++---------
 semantic_router/encoders/local.py             | 10 ++++
 .../encoders/sentence_transformers.py         | 47 +++++++++++++++++++
 semantic_router/encoders/torch.py             | 33 +++++++++++++
 semantic_router/encoders/vit.py               | 23 +++------
 semantic_router/schema.py                     |  2 +
 8 files changed, 112 insertions(+), 41 deletions(-)
 create mode 100644 semantic_router/encoders/local.py
 create mode 100644 semantic_router/encoders/sentence_transformers.py
 create mode 100644 semantic_router/encoders/torch.py

diff --git a/pyproject.toml b/pyproject.toml
index ea1aea5b..8f21737a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,8 +41,9 @@ sphinxawesome-theme = {version = "^5.2.0", optional = true}
 tornado = {version = "^6.4.2", optional = true}
 
 [tool.poetry.extras]
-hybrid = ["pinecone-text"]
+hybrid = ["pinecone-text"]  # we need to drop this dependency
 local = ["torch", "transformers", "tokenizers", "huggingface-hub", "llama-cpp-python"]
+# local-cpu = ["torch"] https://download.pytorch.org/whl/cpu https://stackoverflow.com/questions/77205123/how-do-i-slim-down-sberts-sentencer-transformer-library
 pinecone = ["pinecone"]
 vision = ["torch", "torchvision", "transformers", "pillow"]
 mistralai = ["mistralai"]
diff --git a/semantic_router/encoders/__init__.py b/semantic_router/encoders/__init__.py
index 07e468d8..a211e8c5 100644
--- a/semantic_router/encoders/__init__.py
+++ b/semantic_router/encoders/__init__.py
@@ -10,8 +10,10 @@
 from semantic_router.encoders.google import GoogleEncoder
 from semantic_router.encoders.huggingface import HuggingFaceEncoder
 from semantic_router.encoders.huggingface import HFEndpointEncoder
+from semantic_router.encoders.local import LocalEncoder
 from semantic_router.encoders.mistral import MistralEncoder
 from semantic_router.encoders.openai import OpenAIEncoder
+from semantic_router.encoders.sentence_transformers import STEncoder
 from semantic_router.encoders.tfidf import TfidfEncoder
 from semantic_router.encoders.vit import VitEncoder
 from semantic_router.encoders.zure import AzureOpenAIEncoder
@@ -23,8 +25,10 @@
     "SparseEncoder",
     "AzureOpenAIEncoder",
     "CohereEncoder",
+    "LocalEncoder",
     "OpenAIEncoder",
     "BM25Encoder",
+    "STEncoder",
     "TfidfEncoder",
     "FastEmbedEncoder",
     "HuggingFaceEncoder",
@@ -50,6 +54,10 @@ def __init__(self, type: str, name: Optional[str]):
             self.model = AzureOpenAIEncoder(model=name)
         elif self.type == EncoderType.COHERE:
             self.model = CohereEncoder(name=name)
+        elif self.type == EncoderType.SENTENCE_TRANSFORMERS:
+            self.model = STEncoder(name=name)
+        elif self.type == EncoderType.LOCAL:
+            self.model = LocalEncoder(name=name)
         elif self.type == EncoderType.OPENAI:
             self.model = OpenAIEncoder(name=name)
         elif self.type == EncoderType.AURELIO:
diff --git a/semantic_router/encoders/clip.py b/semantic_router/encoders/clip.py
index 065ff115..cde360db 100644
--- a/semantic_router/encoders/clip.py
+++ b/semantic_router/encoders/clip.py
@@ -3,20 +3,18 @@
 import numpy as np
 from pydantic import PrivateAttr
 from typing import Dict
-from semantic_router.encoders import DenseEncoder
+from semantic_router.encoders.torch import TorchAbstractDenseEncoder
 
 
-class CLIPEncoder(DenseEncoder):
+class CLIPEncoder(TorchAbstractDenseEncoder):
     name: str = "openai/clip-vit-base-patch16"
     type: str = "huggingface"
     tokenizer_kwargs: Dict = {}
     processor_kwargs: Dict = {}
     model_kwargs: Dict = {}
-    device: Optional[str] = None
     _tokenizer: Any = PrivateAttr()
     _processor: Any = PrivateAttr()
     _model: Any = PrivateAttr()
-    _torch: Any = PrivateAttr()
     _Image: Any = PrivateAttr()
 
     def __init__(self, **data):
@@ -59,14 +57,8 @@ def _initialize_hf_model(self):
                 "`pip install semantic-router[vision]`"
             )
 
-        try:
-            import torch
-        except ImportError:
-            raise ImportError(
-                "Please install Pytorch to use CLIPEncoder. "
-                "You can install it with: "
-                "`pip install semantic-router[vision]`"
-            )
+        # use abstract torch init
+        torch = self._initialize_torch()
 
         try:
             from PIL import Image
@@ -91,17 +83,6 @@ def _initialize_hf_model(self):
         model.to(self.device)
         return tokenizer, processor, model
 
-    def _get_device(self) -> str:
-        if self.device:
-            device = self.device
-        elif self._torch.cuda.is_available():
-            device = "cuda"
-        elif self._torch.backends.mps.is_available():
-            device = "mps"
-        else:
-            device = "cpu"
-        return device
-
     def _encode_text(self, docs: List[str]) -> Any:
         inputs = self._tokenizer(
             docs, return_tensors="pt", padding=True, truncation=True
diff --git a/semantic_router/encoders/local.py b/semantic_router/encoders/local.py
new file mode 100644
index 00000000..1e6d834d
--- /dev/null
+++ b/semantic_router/encoders/local.py
@@ -0,0 +1,10 @@
+from semantic_router.encoders.sentence_transformers import STEncoder
+
+
+class LocalEncoder(STEncoder):
+    """The local encoder uses the underlying STEncoder (ie a sentence-transformers
+    bi-encoder). Designed as our recommended local encoder option for generating dense
+    embeddings.
+    """
+
+    pass
diff --git a/semantic_router/encoders/sentence_transformers.py b/semantic_router/encoders/sentence_transformers.py
new file mode 100644
index 00000000..9cd94f68
--- /dev/null
+++ b/semantic_router/encoders/sentence_transformers.py
@@ -0,0 +1,47 @@
+import numpy as np
+from pydantic import PrivateAttr
+
+from semantic_router.encoders.torch import TorchAbstractDenseEncoder
+
+
+class STEncoder(TorchAbstractDenseEncoder):
+    """Base class for sentence-transformers bi-encoders. Our recommended encoder for
+    generating dense embeddings locally.
+    """
+    name: str = "all-MiniLM-L6-v2"
+    type: str = "sentence-transformers"
+    dimensions: int = 384
+    device: str | None = None
+    _model: any = PrivateAttr()
+
+    def __init__(self, **kwargs):
+        if kwargs.get("score_threshold") is None:
+            kwargs["score_threshold"] = 0.5
+        super().__init__(**kwargs)
+        self._model = self._initialize_st_model()
+
+    def _initialize_st_model(self):
+        try:
+            from sentence_transformers import SentenceTransformer
+        except ImportError:
+            raise ImportError(
+                "Please install sentence-transformers to use STEncoder. "
+                "You can install it with: "
+                "`pip install semantic-router[local]`"
+            )
+        model = SentenceTransformer(self.name)
+        model.to(self.device)
+        return model
+
+    def __call__(
+        self,
+        docs: list[any],
+        batch_size: int = 32,
+        normalize_embeddings: bool = True,
+    ) -> list[list[float]]:
+        # compute document embeddings `xd`
+        xd = self._model.encode(docs, batch_size=batch_size)
+        if normalize_embeddings:
+            # TODO not sure if required
+            xd = xd / np.linalg.norm(xd, axis=0)
+        return xd
diff --git a/semantic_router/encoders/torch.py b/semantic_router/encoders/torch.py
new file mode 100644
index 00000000..f9219eda
--- /dev/null
+++ b/semantic_router/encoders/torch.py
@@ -0,0 +1,33 @@
+from pydantic import PrivateAttr
+
+from semantic_router.encoders import DenseEncoder
+
+
+class TorchAbstractDenseEncoder(DenseEncoder):
+    _torch: any = PrivateAttr()
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._torch = self._initialize_torch()
+
+    def _initialize_torch(self):
+        try:
+            import torch
+        except ImportError:
+            raise ImportError(
+                f"Please install PyTorch to use {self.__class__.__name__}. "
+                "You can install it with: `pip install semantic-router[local]`"
+            )
+        
+        return torch
+    
+    def _get_device(self) -> str:
+        if self.device:
+            device = self.device
+        elif self._torch.cuda.is_available():
+            device = "cuda"
+        elif self._torch.backends.mps.is_available():
+            device = "mps"
+        else:
+            device = "cpu"
+        return device
diff --git a/semantic_router/encoders/vit.py b/semantic_router/encoders/vit.py
index dec768e4..ebb40102 100644
--- a/semantic_router/encoders/vit.py
+++ b/semantic_router/encoders/vit.py
@@ -2,18 +2,16 @@
 
 from pydantic import PrivateAttr
 
-from semantic_router.encoders import DenseEncoder
+from semantic_router.encoders.torch import TorchAbstractDenseEncoder
 
 
-class VitEncoder(DenseEncoder):
+class VitEncoder(TorchAbstractDenseEncoder):
     name: str = "google/vit-base-patch16-224"
     type: str = "huggingface"
     processor_kwargs: Dict = {}
     model_kwargs: Dict = {}
-    device: Optional[str] = None
     _processor: Any = PrivateAttr()
     _model: Any = PrivateAttr()
-    _torch: Any = PrivateAttr()
     _T: Any = PrivateAttr()
     _Image: Any = PrivateAttr()
 
@@ -32,13 +30,15 @@ def _initialize_hf_model(self):
                 "You can install it with: "
                 "`pip install semantic-router[vision]`"
             )
+        
+        # use abstract torch init
+        torch = self._initialize_torch()
 
         try:
-            import torch
             import torchvision.transforms as T
         except ImportError:
             raise ImportError(
-                "Please install Pytorch to use VitEncoder. "
+                "Please install torchvision to use VitEncoder. "
                 "You can install it with: "
                 "`pip install semantic-router[vision]`"
             )
@@ -67,17 +67,6 @@ def _initialize_hf_model(self):
 
         return processor, model
 
-    def _get_device(self) -> str:
-        if self.device:
-            device = self.device
-        elif self._torch.cuda.is_available():
-            device = "cuda"
-        elif self._torch.backends.mps.is_available():
-            device = "mps"
-        else:
-            device = "cpu"
-        return device
-
     def _process_images(self, images: List[Any]):
         rgb_images = [self._ensure_rgb(img) for img in images]
         processed_images = self._processor(images=rgb_images, return_tensors="pt")
diff --git a/semantic_router/schema.py b/semantic_router/schema.py
index 273043f5..8d9bf163 100644
--- a/semantic_router/schema.py
+++ b/semantic_router/schema.py
@@ -13,8 +13,10 @@ class EncoderType(Enum):
     AURELIO = "aurelio"
     AZURE = "azure"
     COHERE = "cohere"
+    LOCAL = "local"
     OPENAI = "openai"
     BM25 = "bm25"
+    SENTENCE_TRANSFORMERS = "sentence-transformers"
     TFIDF = "tfidf"
     FASTEMBED = "fastembed"
     HUGGINGFACE = "huggingface"

From 2eb709c84cca604da2a2c58c435df84cb928092b Mon Sep 17 00:00:00 2001
From: James Briggs <35938317+jamescalam@users.noreply.github.com>
Date: Tue, 21 Jan 2025 13:55:48 +0400
Subject: [PATCH 2/2] feat: local intro notebook

---
 docs/00a-introduction-local.ipynb | 313 ++++++++++++++++++++++++++++++
 1 file changed, 313 insertions(+)
 create mode 100644 docs/00a-introduction-local.ipynb

diff --git a/docs/00a-introduction-local.ipynb b/docs/00a-introduction-local.ipynb
new file mode 100644
index 00000000..94b15cc5
--- /dev/null
+++ b/docs/00a-introduction-local.ipynb
@@ -0,0 +1,313 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "K7NsuSPNf3px"
+      },
+      "source": [
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/aurelio-labs/semantic-router/blob/main/docs/00-introduction.ipynb) [![Open nbviewer](https://raw.githubusercontent.com/pinecone-io/examples/master/assets/nbviewer-shield.svg)](https://nbviewer.org/github/aurelio-labs/semantic-router/blob/main/docs/00-introduction.ipynb)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Am2hmLzTf3py"
+      },
+      "source": [
+        "# Semantic Router Intro"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "k1nRRAbYf3py"
+      },
+      "source": [
+        "The Semantic Router library can be used as a super fast decision making layer on top of LLMs. That means rather than waiting on a slow agent to decide what to do, we can use the magic of semantic vector space to make routes. Cutting decision making time down from seconds to milliseconds.\n",
+        "\n",
+        "In this notebook we will be introducing the library (as done in the `00-introduction.ipynb` notebook) but using the `LocalEncoder` class, allowing us to run the library locally without the need for any APIs or external services."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NggrMQP2f3py"
+      },
+      "source": [
+        "## Getting Started"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9zP-l_T7f3py"
+      },
+      "source": [
+        "We start by installing the library:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4YI81tu0f3pz"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -qU \"semantic-router==0.1.0.dev6[local]\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "HfB8252ff3pz"
+      },
+      "source": [
+        "We start by defining a dictionary mapping routes to example phrases that should trigger those routes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "lslfqYOEf3pz",
+        "outputId": "c13e3e77-310c-4b86-e291-4b6005d698bd"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "c:\\Users\\Siraj\\Documents\\Personal\\Work\\Aurelio\\Virtual Environments\\semantic_router_3\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+            "  from .autonotebook import tqdm as notebook_tqdm\n"
+          ]
+        }
+      ],
+      "source": [
+        "from semantic_router import Route\n",
+        "\n",
+        "politics = Route(\n",
+        "    name=\"politics\",\n",
+        "    utterances=[\n",
+        "        \"isn't politics the best thing ever\",\n",
+        "        \"why don't you tell me about your political opinions\",\n",
+        "        \"don't you just love the president\",\n",
+        "        \"don't you just hate the president\",\n",
+        "        \"they're going to destroy this country!\",\n",
+        "        \"they will save the country!\",\n",
+        "    ],\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WYLHUDa1f3p0"
+      },
+      "source": [
+        "Let's define another for good measure:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LAdY1jdxf3p0"
+      },
+      "outputs": [],
+      "source": [
+        "chitchat = Route(\n",
+        "    name=\"chitchat\",\n",
+        "    utterances=[\n",
+        "        \"how's the weather today?\",\n",
+        "        \"how are things going?\",\n",
+        "        \"lovely weather today\",\n",
+        "        \"the weather is horrendous\",\n",
+        "        \"let's go to the chippy\",\n",
+        "    ],\n",
+        ")\n",
+        "\n",
+        "routes = [politics, chitchat]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ReN59ieGf3p0"
+      },
+      "source": [
+        "Now we initialize our encoder. Under-the-hood we're using the `sentence-transformers` library, which supports loading encoders from the HuggingFace Hub. We'll be using Nvidia's [nvidia/NV-Embed-v2](https://huggingface.co/nvidia/NV-Embed-v2) encoder"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MF47W_Sof3p2"
+      },
+      "outputs": [],
+      "source": [
+        "from semantic_router.encoders import LocalEncoder\n",
+        "\n",
+        "encoder = LocalEncoder(name=\"nvidia/NV-Embed-v2\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "lYuLO0l9f3p3"
+      },
+      "source": [
+        "Now we define the `Router`. When called, the router will consume text (a query) and output the category (`Route`) it belongs to — to initialize a `Router` we need our `encoder` model and a list of `routes`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dh1U8IDOf3p3",
+        "outputId": "872810da-956a-47af-a91f-217ce351a88b"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "\u001b[32m2024-05-07 15:02:46 INFO semantic_router.utils.logger local\u001b[0m\n"
+          ]
+        }
+      ],
+      "source": [
+        "from semantic_router.routers import SemanticRouter\n",
+        "\n",
+        "sr = SemanticRouter(encoder=encoder, routes=routes, auto_sync=\"local\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Xj32uEF-f3p3"
+      },
+      "source": [
+        "Now we can test it:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fIXOjRp9f3p3",
+        "outputId": "8b9b5746-ae7c-43bb-d84f-5fa7c30e423e"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "RouteChoice(name='politics', function_call=None, similarity_score=None)"
+            ]
+          },
+          "execution_count": 6,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "sr(\"don't you love politics?\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0UN2mKvjf3p4",
+        "outputId": "062f9499-7db3-49d2-81ef-e7d5dc9a88f6"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "RouteChoice(name='chitchat', function_call=None, similarity_score=None)"
+            ]
+          },
+          "execution_count": 7,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "sr(\"how's the weather today?\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NHZWZKoTf3p4"
+      },
+      "source": [
+        "Both are classified accurately, what if we send a query that is unrelated to our existing `Route` objects?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "0WnvGJByf3p4",
+        "outputId": "4496e9b2-7cd8-4466-fe1a-3e6f5cf30b0d"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "RouteChoice(name=None, function_call=None, similarity_score=None)"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "sr(\"I'm interested in learning about llama 2\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "With this we see `None` is returned, ie no routes were matched."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "---"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "decision-layer",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.4"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}