feat(tracing): add LangChainInstrumentor for nested chains (#1472)

* fix: langchain retrieval qa * add cleanup * doc strings * Update instrumentor.py Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com> * remove base ABC * pr comments * remove dead code --------- Co-authored-by: Roger Yang <80478925+RogerHYang@users.noreply.github.com>
Arize-ai · Sep 26, 2023 · e5bf60e · e5bf60e
1 parent 205d1fd
commit e5bf60e
Show file tree

Hide file tree

Showing 3 changed files with 230 additions and 1 deletion.
diff --git a/src/phoenix/trace/langchain/__init__.py b/src/phoenix/trace/langchain/__init__.py
@@ -1,3 +1,4 @@
+from .instrumentor import LangChainInstrumentor
 from .tracer import OpenInferenceTracer
 
-__all__ = ["OpenInferenceTracer"]
+__all__ = ["OpenInferenceTracer", "LangChainInstrumentor"]
diff --git a/src/phoenix/trace/langchain/instrumentor.py b/src/phoenix/trace/langchain/instrumentor.py
@@ -0,0 +1,37 @@
+from typing import Any, Optional
+
+from .tracer import OpenInferenceTracer
+
+
+class LangChainInstrumentor:
+    """
+    Instruments the OpenInferenceTracer for LangChain automatically by patching the
+    BaseCallbackManager in LangChain.
+    """
+
+    def __init__(self, tracer: Optional[OpenInferenceTracer] = None) -> None:
+        self._tracer = tracer if tracer is not None else OpenInferenceTracer()
+
+    def instrument(self) -> None:
+        try:
+            from langchain.callbacks.base import BaseCallbackManager
+        except ImportError:
+            # Raise a cleaner error if LangChain is not installed
+            raise ImportError(
+                "LangChain is not installed. Please install LangChain first to use the instrumentor"
+            )
+
+        source_init = BaseCallbackManager.__init__
+
+        # Keep track of the source init so we can tell if the patching occurred
+        self._source_callback_manager_init = source_init
+
+        tracer = self._tracer
+
+        # Patch the init method of the BaseCallbackManager to add the tracer
+        # to all callback managers
+        def patched_init(self: BaseCallbackManager, *args: Any, **kwargs: Any) -> None:
+            source_init(self, *args, **kwargs)
+            self.add_handler(tracer, True)
+
+        BaseCallbackManager.__init__ = patched_init  # type: ignore
diff --git a/tutorials/internal/langchain_retrieval_qa_with_sources_chan_tracing_tutorial.ipynb b/tutorials/internal/langchain_retrieval_qa_with_sources_chan_tracing_tutorial.ipynb
@@ -0,0 +1,191 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -qqq langchain chromadb openai tiktoken playwright asyncio nest_asyncio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "import openai\n",
+    "from langchain.indexes import VectorstoreIndexCreator\n",
+    "\n",
+    "if not (openai_api_key := os.getenv(\"OPENAI_API_KEY\")):\n",
+    "    openai_api_key = getpass(\"🔑 Enter your OpenAI API key: \")\n",
+    "openai.api_key = openai_api_key\n",
+    "os.environ[\"OPENAI_API_KEY\"] = openai_api_key"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import asyncio\n",
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "import openai\n",
+    "import phoenix as px\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from phoenix.trace.langchain import LangChainInstrumentor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "LangChainInstrumentor().instrument()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "px.launch_app()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nest_asyncio\n",
+    "\n",
+    "nest_asyncio.apply()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!playwright install"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.document_loaders import AsyncChromiumLoader\n",
+    "from langchain.document_transformers import BeautifulSoupTransformer\n",
+    "\n",
+    "# Load HTML\n",
+    "loader = AsyncChromiumLoader([\"https://www.bogleheads.org/wiki/Asset_protection\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "html = loader.load()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Transform\n",
+    "bs_transformer = BeautifulSoupTransformer()\n",
+    "docs_transformed = bs_transformer.transform_documents(html, tags_to_extract=[\"p\", \"li\", \"div\", \"a\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "index = VectorstoreIndexCreator().from_loaders([loader])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "docs_transformed[0].page_content[0:500]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = ChatOpenAI(temperature=0, model=\"gpt-4\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat_model_name = \"gpt-4\"\n",
+    "llm = ChatOpenAI(model_name=chat_model_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"\"\"\n",
+    "You are a financial planner.\n",
+    "What are the best ways for me to protect my assets?\n",
+    "Answer in detail for each strategy\n",
+    "\"\"\"\n",
+    "result = index.query_with_sources(query, llm=llm)\n",
+    "result"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}