Merge branch 'main' into docs

Arize-ai · Sep 26, 2023 · b9ba61f · b9ba61f
2 parents d901dbb + 9bc0414
commit b9ba61f
Show file tree

Hide file tree

Showing 39 changed files with 1,443 additions and 393 deletions.
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -2,48 +2,34 @@
 
 - [Developer's Guide](#developers-guide)
   - [Setting Up Your macOS Development Environment](#setting-up-your-macos-development-environment)
-  - [Running Scripts with `hatch`](#running-scripts-with-hatch)
+  - [Testing and Linting](#testing-and-linting)
   - [Installing Pre-Commit Hooks](#installing-pre-commit-hooks)
-  - [Building the `phoenix` Package](#building-the-phoenix-package)
-  - [Installing a `phoenix` Build](#installing-a-phoenix-build)
+  - [Building the Package](#building-the-package)
+  - [Installing a Phoenix Build](#installing-a-phoenix-build)
+  - [Installing a `git` Branch on Colab](#installing-a-git-branch-on-colab)
   - [Setting Up Your Windows Test Environment](#setting-up-your-windows-test-environment)
     - [Selecting a Virtualization Option](#selecting-a-virtualization-option)
-    - [Installing Python and `phoenix`](#installing-python-and-phoenix)
+    - [Installing Python and Phoenix](#installing-python-and-phoenix)
     - [Configuring a Remote Interpreter](#configuring-a-remote-interpreter)
     - [Troubleshooting](#troubleshooting)
   - [Publishing a New Release](#publishing-a-new-release)
 
 ## Setting Up Your macOS Development Environment
 
-This section shows you how to set up an isolated virtual environment using `pyenv` and `virtualenvwrapper`. If you are new to `pyenv`, you can install it via `brew` with
+We recommend using a virtual environment to isolate your Python dependencies. This guide will use `conda`, but you can use a different virtual environment management tool if you want.
 
-```bash
-brew install pyenv
-```
-
-Next, install a `phoenix`-supported Python version, e.g., `3.10.8`, with
+First, ensure that your virtual environment manager is installed. For macOS users, we recommend installing `conda` via `brew` with
 
-```bash
-export PHOENIX_PYTHON_VERSION=<your-supported-python-version>
-pyenv install $PHOENIX_PYTHON_VERSION
 ```
-
-Set the global `pyenv` version with
-
-```bash
-pyenv global $PHOENIX_PYTHON_VERSION
+brew install --cask mambaforge
 ```
 
-Install `virtualenvwrapper` with
+For non-mac users, you can follow the instruction [here](https://github.com/conda-forge/miniforge#miniforge) to install `conda` for your particular operating system.
 
-```bash
-pip install virtualenvwrapper
-```
-
-Create a new virtual environment with
+Create a new virtual environment with a Phoenix-compatible Python version. For example,
 
 ```bash
-mkvirtualenv phoenix-env
+conda create --name phoenix python=3.8
 ```
 
 Install web build dependancies
@@ -53,14 +39,30 @@ Make sure you have npm (node package manager) available on your terminal as well
 Install `phoenix` in development mode (using the `-e` flag) and with development dependencies (using the `[dev]` extra) by running
 
 ```bash
-pip install -e ".[dev]"
+pip install -e ".[dev,experimental]"
 ```
 
 from the repository root.
 
-## Running Scripts with `hatch`
+If you are working on our LLM orchestration framework integrations, you may also wish to install LlamaIndex or LangChain from source. To install LlamaIndex from source,
+
+- Uninstall any pre-existing version of LlamaIndex with `pip uninstall llama-index`.
+- Fork and clone LlamaIndex using one of the following two methods:
+  - If you are an Arize employee, clone [Arize's fork of LlamaIndex](https://github.com/Arize-ai/llama_index).
+  - If you are an external contributor, fork and clone [LlamaIndex's upstream repository](https://github.com/jerryjliu/llama_index).
+- Run `pip install -e .` from the repository root.
+
+To install LangChain from source,
+
+- Uninstall any pre-existing version of LangChain with `pip uninstall langchain`.
+- Fork and clone LangChain using one of the following two methods:
+  - If you are an Arize employee, clone [Arize's fork of LangChain](https://github.com/Arize-ai/langchain).
+  - If you are an external contributor, fork and clone [LangChain's upstream repository](https://github.com/langchain-ai/langchain).
+- Run `pip install -e .` from `libs/langchain`.
+
+## Testing and Linting
 
-`hatch` is the project management tool used to build `phoenix`. After installing and activating the `phoenix-env` virtual environment, view the project environments, dependencies and scripts defined in `pyproject.toml` with
+Phoenix uses `hatch` as the project management tool to lint and test source code and to build the package. After creating and activating your `phoenix` virtual environment, view your `hatch` environments, dependencies and, scripts defined in `pyproject.toml` with
 
 ```bash
 hatch env show
@@ -72,19 +74,19 @@ Scripts belonging to the various environments can be run with
 hatch run <env-name>:<script-name>
 ```
 
-For example, you can check types with
+To type-check your code, run
 
 ```bash
 hatch run type:check
 ```
 
-You can fix styles with
+To format your code, run
 
 ```bash
 hatch run style:fix
 ```
 
-You can run tests with coverage with
+To run tests with coverage, run
 
 ```bash
 hatch run test:coverage
@@ -105,27 +107,27 @@ pre-commit install
 
 Once installed, the pre-commit hooks configured in `.pre-commit-config.yaml` will automatically run prior to each `git commit`. Pre-commit hooks can be skipped by passing the `-n`/ `--no-verify` flag to the `git commit` command.
 
-## Building the `phoenix` Package
+## Building the Package
 
-To build `phoenix`, run
+To build Phoenix, run
 
 ```bash
 hatch build
 ```
 
 If successful, a source distribution (a tarball) and a Python `wheel` will appear in the `dist` folder at the repo base directory.
 
-## Installing a `phoenix` Build
+## Installing a Phoenix Build
 
-We recommend using a separate virtual environment (e.g., `phoenix-test-env`) for installing and testing the builds created above.
+We recommend using a separate virtual environment (e.g., `phoenixtest`) for installing and testing the builds created above.
 
-To install `phoenix` from the source distribution (i.e., tarball), run
+To install Phoenix from the source distribution (i.e., tarball), run
 
 ```bash
 pip install /path/to/source/distribution/tarball.tar.gz
 ```
 
-To install `phoenix` from the Python `wheel`, you must first install `wheel` with
+To install Phoenix from the Python `wheel`, you must first install `wheel` with
 
 ```bash
 pip install wheel
@@ -145,7 +147,7 @@ To make sure everything works, install `jupyter` with
 pip install jupyter
 ```
 
-and run the notebooks in the `examples` directory.
+and run the notebooks in the `tutorials` directory.
 
 ## Installing a `git` Branch on Colab
 
@@ -160,7 +162,7 @@ The code below installs the `main` branch in [Colab](https://colab.research.goog
 
 ## Setting Up Your Windows Test Environment
 
-It is occasionally necessary to manually test a `phoenix` build or to run `phoenix` from source on Windows. The following instructions enable macOS developers who do not have a PC to quickly set up a Windows Python environment in a cloud or local virtual machine.
+It is occasionally necessary to manually test a Phoenix build or to run Phoenix from source on Windows. The following instructions enable macOS developers who do not have a PC to quickly set up a Windows Python environment in a cloud or local virtual machine.
 
 ### Selecting a Virtualization Option
 
@@ -179,7 +181,7 @@ Hence, if you are a macOS developer using an Apple Silicon machine and you wish
 
 If you elect to use an Azure VM, we recommend that you select a non-headless OS (we use Windows Server 2019), configure an inbound port rule for RDP on port 3389 while creating the VM and screenshare with your VM using Microsoft Remote Desktop, which can be downloaded from the Apple App Store. This will enable you to [configure an SSH server](#configuring-a-remote-interpreter) on the VM for remote development.
 
-### Installing Python and `phoenix`
+### Installing Python and Phoenix
 
 The following instructions assume you have created a Windows virtual machine either locally or in the cloud. These instructions have been tested on Windows Server 2019 and assume you are using Powershell.
 
@@ -229,13 +231,13 @@ Install `virtualenvwrapper-win` with
 pip install virtualenvwrapper-win
 ```
 
-Create a virtual environment called `phoenix-env` with
+Create a virtual environment called `phoenix` with
 
 ```powershell
 mkvirtualenv phoenix-env
 ```
 
-Activate your virtual environment. You can now [install a `phoenix` build](#installing-a-phoenix-build). Alternatively, if you wish to run `phoenix` from source, clone the repo and install `phoenix` in development mode with
+Activate your virtual environment. You can now [install a Phoenix build](#installing-a-phoenix-build). Alternatively, if you wish to run Phoenix from source, clone the repo and install Phoenix in development mode with
 
 ```powershell
 pip install -e ".[dev]"

diff --git a/README.md b/README.md
@@ -36,20 +36,20 @@ Phoenix provides MLOps and LLMOps insights at lightning speed with zero-config o
 
 **Table of Contents**
 
--   [Installation](#installation)
--   [LLM App Tracing](#llm-app-tracing)
-    -   [Tracing with LlamaIndex](#tracing-with-llamaindex)
-    -   [Tracing with LangChain](#tracing-with-langchain)
--   [LLM Evals](#llm-evals)
--   [Embedding Analysis](#embedding-analysis)
-    -   [UMAP-based Exploratory Data Analysis](#umap-based-exploratory-data-analysis)
-    -   [Cluster-driven Drift and Performance Analysis](#cluster-driven-drift-and-performance-analysis)
-    -   [Exportable Clusters](#exportable-clusters)
--   [RAG Introspection](#rag-introspection)
--   [Structured Data Analysis](#structured-data-analysis)
--   [Community](#community)
--   [Thanks](#thanks)
--   [Copyright, Patent, and License](#copyright-patent-and-license)
+- [Installation](#installation)
+- [LLM App Tracing](#llm-app-tracing)
+  - [Tracing with LlamaIndex](#tracing-with-llamaindex)
+  - [Tracing with LangChain](#tracing-with-langchain)
+- [LLM Evals](#llm-evals)
+- [Embedding Analysis](#embedding-analysis)
+  - [UMAP-based Exploratory Data Analysis](#umap-based-exploratory-data-analysis)
+  - [Cluster-driven Drift and Performance Analysis](#cluster-driven-drift-and-performance-analysis)
+  - [Exportable Clusters](#exportable-clusters)
+- [Retrieval-Augmented Generation Analysis](#retrieval-augmented-generation-analysis)
+- [Structured Data Analysis](#structured-data-analysis)
+- [Community](#community)
+- [Thanks](#thanks)
+- [Copyright, Patent, and License](#copyright-patent-and-license)
 
 ## Installation
 
@@ -313,13 +313,13 @@ Break-apart your data into clusters of high drift or bad performance using HDBSC
 
 Export your clusters to `parquet` files or dataframes for further analysis and fine-tuning.
 
-## RAG Introspection
+## Retrieval-Augmented Generation Analysis
 
 [![Open in Colab](https://img.shields.io/static/v1?message=Open%20in%20Colab&logo=googlecolab&labelColor=grey&color=blue&logoColor=orange&label=%20)](https://colab.research.google.com/github/Arize-ai/phoenix/blob/main/tutorials/llama_index_search_and_retrieval_tutorial.ipynb) [![Open in GitHub](https://img.shields.io/static/v1?message=Open%20in%20GitHub&logo=github&labelColor=grey&color=blue&logoColor=white&label=%20)](https://github.com/Arize-ai/phoenix/blob/main/tutorials/llama_index_search_and_retrieval_tutorial.ipynb)
 
-![RAG Introspection](https://github.com/Arize-ai/phoenix-assets/blob/main/gifs/corpus_search_and_retrieval.gif?raw=true)
+![RAG Analysis](https://github.com/Arize-ai/phoenix-assets/blob/main/gifs/corpus_search_and_retrieval.gif?raw=true)
 
-Search and retrieval is a critical component of many LLM Applications as it allows you to extend the LLM's capabilities to encompass knowledge about private data. This process is known as RAG (retrieval augmented generation) and often times a vector store is leveraged to store chunks of documents encoded as embeddings so that they can be retrieved at inference time.
+Search and retrieval is a critical component of many LLM Applications as it allows you to extend the LLM's capabilities to encompass knowledge about private data. This process is known as RAG (retrieval-augmented generation) and often times a vector store is leveraged to store chunks of documents encoded as embeddings so that they can be retrieved at inference time.
 
 To help you better understand your RAG application, Phoenix allows you to upload a corpus of your knowledge base along with your LLM application's inferences to help you troubleshoot hard to find bugs with retrieval.
 

diff --git a/app/schema.graphql b/app/schema.graphql
@@ -68,17 +68,6 @@ type Dataset {
   events(eventIds: [ID!]!, dimensions: [DimensionInput!]): [Event!]!
 }
 
-type DatasetInfo {
-  """The start bookend of the data"""
-  startTime: DateTime!
-
-  """The end bookend of the data"""
-  endTime: DateTime!
-
-  """The record count of the data"""
-  recordCount: Int!
-}
-
 enum DatasetRole {
   primary
   reference
@@ -471,7 +460,7 @@ type Query {
     clusterSelectionEpsilon: Float! = 0
   ): [Cluster!]!
   spans(timeRange: TimeRange, traceIds: [ID!], first: Int = 50, last: Int, after: String, before: String, sort: SpanSort, rootSpansOnly: Boolean = false, filterCondition: String = null): SpanConnection!
-  traceDatasetInfo: DatasetInfo
+  traceDatasetInfo: TraceDatasetInfo
 }
 
 type Retrieval {
@@ -617,6 +606,20 @@ type TimeSeriesDataPoint {
   value: Float
 }
 
+type TraceDatasetInfo {
+  """The start bookend of the data"""
+  startTime: DateTime!
+
+  """The end bookend of the data"""
+  endTime: DateTime!
+
+  """The record count of the data"""
+  recordCount: Int!
+
+  """Count of total (prompt + completion) tokens in the trace data"""
+  tokenCountTotal: Int!
+}
+
 type UMAPPoint {
   id: GlobalID!
 

diff --git a/app/src/openInference/tracing/semanticConventions.ts b/app/src/openInference/tracing/semanticConventions.ts
@@ -9,7 +9,8 @@ export const SemanticAttributePrefixes = {
 } as const;
 
 export const LLMAttributePostfixes = {
-  messages: "messages",
+  input_messages: "input_messages",
+  output_messages: "output_messages",
   invocation_parameters: "invocation_parameters",
   prompts: "prompts",
 } as const;
@@ -50,8 +51,16 @@ export const DocumentAttributePostfixes = {
  * Typically seen in openAI chat completions
  * @see https://beta.openai.com/docs/api-reference/completions/create
  */
-export const LLM_MESSAGES =
-  `${SemanticAttributePrefixes.llm}.${LLMAttributePostfixes.messages}` as const;
+export const LLM_INPUT_MESSAGES =
+  `${SemanticAttributePrefixes.llm}.${LLMAttributePostfixes.input_messages}` as const;
+
+/**
+ * The messages received from the LLM for completions
+ * Typically seen in openAI chat completions
+ * @see https://platform.openai.com/docs/api-reference/chat/object#choices-message
+ */
+export const LLM_OUTPUT_MESSAGES =
+  `${SemanticAttributePrefixes.llm}.${LLMAttributePostfixes.output_messages}` as const;
 
 /**
  * The role that the LLM assumes the message is from