From c55aaffc8d50301eace88a439d96c9bfc526b217 Mon Sep 17 00:00:00 2001
From: Alexander Kovrigin <alexander.kovrigin@jetbrains.com>
Date: Sun, 23 Jun 2024 00:15:16 +0200
Subject: [PATCH] upd wandb weave

---
 code_editing/agents/agent_codeeditor.py       |   7 +-
 code_editing/agents/run.py                    |   8 +-
 code_editing/agents/tools/edit_tool.py        |   8 +-
 code_editing/metrics/gpt4_eval.py             |   5 -
 code_editing/utils/backbones/baseline.py      |  29 +-
 code_editing/utils/backbones/hf_backbone.py   |  24 +-
 .../utils/backbones/openai_backbone.py        |  18 +-
 code_editing/utils/wandb_utils.py             | 174 +-----------
 poetry.lock                                   | 250 +++++++++++++++++-
 pyproject.toml                                |   1 +
 10 files changed, 267 insertions(+), 257 deletions(-)

diff --git a/code_editing/agents/agent_codeeditor.py b/code_editing/agents/agent_codeeditor.py
index abb89a4..3a59995 100644
--- a/code_editing/agents/agent_codeeditor.py
+++ b/code_editing/agents/agent_codeeditor.py
@@ -1,6 +1,7 @@
 import logging
 from typing import Dict
 
+import weave
 from hydra.utils import instantiate
 from langchain_core.runnables import RunnableConfig, RunnableLambda
 
@@ -11,7 +12,6 @@
 from code_editing.code_editor import CEInput, CEOutput, CodeEditor
 from code_editing.configs.agents.context_providers.context_config import ContextConfig
 from code_editing.utils.git_utils import get_head_diff_unsafe
-from code_editing.utils.wandb_utils import log_codeeditor_trace
 
 
 class AgentCodeEditor(CodeEditor):
@@ -35,8 +35,8 @@ def __init__(
         self.context_providers_cfg = context_providers_cfg
         self.runnable_config = runnable_config
 
-    @log_codeeditor_trace()
-    def generate_diff(self, req: CEInput, root_span) -> CEOutput:
+    @weave.op()
+    def generate_diff(self, req: CEInput) -> CEOutput:
         # Get repository full path
         repo_path = req["code_base"].get(CheckoutExtractor.REPO_KEY, None)
         if repo_path is None:
@@ -55,7 +55,6 @@ def generate_diff(self, req: CEInput, root_span) -> CEOutput:
         # Tools available to the agent
         tools = self.tool_factory.build(
             run_overview_manager=run_overview_manager,
-            root_span=root_span,  # W&B root span
         )
 
         # Build the graph runnable
diff --git a/code_editing/agents/run.py b/code_editing/agents/run.py
index 7e3b8fe..390c8f6 100644
--- a/code_editing/agents/run.py
+++ b/code_editing/agents/run.py
@@ -8,8 +8,9 @@
 
 class ToolInfo(TypedDict):
     calls: int
-    errors: int
+    success: int
     failures: int
+    errors: int
 
 
 # enum class: calls, errors, failures
@@ -35,6 +36,7 @@ def __init__(
         self.start_ms = wandb_utils.get_current_ms()
 
     def log_tool_use(self, tool_name, status: ToolUseStatus):
+        status = status.value
         self.tools_info.setdefault(tool_name, {}).setdefault(status, 0)
         self.tools_info[tool_name][status] += 1
 
@@ -42,9 +44,7 @@ def get_run_summary(self):
         end_ms = wandb_utils.get_current_ms()
         return {
             "tools": self.tools_info,
-            "start_ms": self.start_ms,
-            "end_ms": end_ms,
-            "duration_ms": end_ms - self.start_ms,
+            "duration_sec": (end_ms - self.start_ms) / 1000,
         }
 
     def get_ctx_provider(self, ctx_provider_name) -> ContextProvider:
diff --git a/code_editing/agents/tools/edit_tool.py b/code_editing/agents/tools/edit_tool.py
index 744c0e1..deb9a13 100644
--- a/code_editing/agents/tools/edit_tool.py
+++ b/code_editing/agents/tools/edit_tool.py
@@ -27,7 +27,7 @@ class EditToolInput(BaseModel):
     The instruction should be a prompt for the editing LLM."""
     args_schema = EditToolInput
 
-    def __init__(self, backbone: CEBackbone = None, root_span=None, **kwargs):
+    def __init__(self, backbone: CEBackbone = None, **kwargs):
         super().__init__(**kwargs)
         self.args_schema = self.EditToolInput
 
@@ -35,7 +35,6 @@ def __init__(self, backbone: CEBackbone = None, root_span=None, **kwargs):
             return
 
         self.backbone = backbone
-        self.root_span = root_span
 
         if self.backbone is None:
             raise ValueError("Backbone is required for the edit tool")
@@ -48,9 +47,7 @@ def _run_tool(self, file_name: str, start_index: int, instruction: str, context:
         file = parse_file(file_name, self.repo_path)
         contents, lines, start, end = read_file(context, file, start_index)
         # Send to the editing LLM
-        resp = self.backbone.generate_diff(
-            {"instruction": instruction, "code_base": {file_name: contents}}, parent_span=self.root_span
-        )
+        resp = self.backbone.generate_diff({"instruction": instruction, "code_base": {file_name: contents}})
         new_contents = resp["prediction"]
         # Save
         with open(file, "w") as f:
@@ -67,4 +64,3 @@ def short_name(self) -> str:
 
     backbone: CEBackbone = None
     retrieval_helper: RetrievalHelper = None
-    root_span: Any = None
diff --git a/code_editing/metrics/gpt4_eval.py b/code_editing/metrics/gpt4_eval.py
index 3c82d76..6372630 100644
--- a/code_editing/metrics/gpt4_eval.py
+++ b/code_editing/metrics/gpt4_eval.py
@@ -48,11 +48,6 @@ def _score_single(self, diff_true: str, diff_pred: str, full_row: Dict):
         if found:
             try:
                 res = float(found[0])
-                # Log to W&B
-                if wandb.run is not None:
-                    wandb_utils.gpt4_eval_trace(
-                        diff_true, patch, start_ms, end_ms, response, res, metadata={"model": self.model_name}
-                    )
                 return res
             except:
                 pass
diff --git a/code_editing/utils/backbones/baseline.py b/code_editing/utils/backbones/baseline.py
index c972164..95209bc 100644
--- a/code_editing/utils/backbones/baseline.py
+++ b/code_editing/utils/backbones/baseline.py
@@ -1,6 +1,3 @@
-import wandb
-from wandb.sdk.data_types.trace_tree import StatusCode
-
 from code_editing.code_editor import CEBackbone, CEInput, CEOutput, CodeEditor
 from code_editing.utils import wandb_utils
 from code_editing.utils.preprocessors.base_preprocessor import CEPreprocessor
@@ -13,37 +10,13 @@ def __init__(self, backbone: CEBackbone, preprocessor: CEPreprocessor):
         self.run_name = backbone.name
 
     def generate_diff(self, req: CEInput) -> CEOutput:
-        # Initialize the root span for W&B
-        root_span = None
-        if wandb.run is not None:
-            root_span = wandb_utils.build_main_trace(
-                req,
-                wandb_utils.get_current_ms(),
-                "Code Editing",
-                metadata={
-                    "preprocessor_name": self.preprocessor.name,
-                    "backbone_name": self.backbone.name,
-                },
-            )
-
         # Preprocess the input
         start_ms = wandb_utils.get_current_ms()
         old_req = req
         req = self.preprocessor(req)
         after_preprocess_ms = wandb_utils.get_current_ms()
-        # Log the preprocessing trace to W&B
-        if wandb.run is not None:
-            wandb_utils.log_preprocessor_trace(old_req, req, start_ms, after_preprocess_ms, root_span)
-
         # Generate the diff using the backbone
-        try:
-            resp = self.backbone.generate_diff(req, parent_span=root_span)
-            if wandb.run is not None:
-                wandb_utils.log_main_trace(root_span, old_req, resp, StatusCode.SUCCESS)
-        except Exception as e:
-            if wandb.run is not None:
-                wandb_utils.log_main_trace(root_span, old_req, None, StatusCode.ERROR, str(e))
-            raise e
+        resp = self.backbone.generate_diff(req)
 
         return resp
 
diff --git a/code_editing/utils/backbones/hf_backbone.py b/code_editing/utils/backbones/hf_backbone.py
index 3e0b888..0f1608e 100644
--- a/code_editing/utils/backbones/hf_backbone.py
+++ b/code_editing/utils/backbones/hf_backbone.py
@@ -1,13 +1,12 @@
 import logging
-from typing import Dict, Optional
+from typing import Dict
 
 import torch
+import weave
 from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, set_seed
-from wandb.sdk.data_types.trace_tree import Trace
 
 from code_editing.code_editor import CEBackbone, CEInput, CEOutput
 from code_editing.configs.backbones_configs import HFGenerationConfig, HFModelConfig
-from code_editing.utils import wandb_utils
 from code_editing.utils.prompts.base_prompt import CEPrompt
 
 
@@ -83,15 +82,7 @@ def generate_diff(self, req: CEInput, **kwargs) -> CEOutput:
         if not self._prompt:
             raise ValueError("Prompt is required for HuggingFace models.")
 
-        # Initialize the root span for W&B
-        parent_span: Optional[Trace] = kwargs.get("parent_span", None)
-
-        @wandb_utils.log_prompt_trace(
-            parent_span,
-            metadata={
-                "prompt_name": self._prompt.name,
-            },
-        )
+        @weave.op(name="prompt")
         def get_inp(r):
             return self._prompt.hf(
                 r,
@@ -102,14 +93,7 @@ def get_inp(r):
         preprocessed_inputs = get_inp(req)
         encoding = self._tokenizer(preprocessed_inputs, return_tensors="pt").to(self._device)
 
-        @wandb_utils.log_llm_trace(
-            parent_span=parent_span,
-            model_name=self._name_or_path,
-            metadata={
-                "model_config": self._model.config.to_dict(),
-                "generation_config": self._generation_config.to_dict(),
-            },
-        )
+        @weave.op(name="generate")
         def get_resp(_):
             return self._model.generate(
                 **encoding,
diff --git a/code_editing/utils/backbones/openai_backbone.py b/code_editing/utils/backbones/openai_backbone.py
index f63c4c5..e4cc0a7 100644
--- a/code_editing/utils/backbones/openai_backbone.py
+++ b/code_editing/utils/backbones/openai_backbone.py
@@ -1,11 +1,8 @@
 import logging
-from typing import Optional
 
 from openai import OpenAI
-from wandb.sdk.data_types.trace_tree import Trace
 
 from code_editing.code_editor import CEBackbone, CEInput, CEOutput
-from code_editing.utils import wandb_utils
 from code_editing.utils.prompts.base_prompt import CEPrompt
 
 
@@ -24,19 +21,8 @@ def __init__(self, model_name: str, prompt: CEPrompt, **kwargs):
         logging.getLogger("httpx").setLevel(logging.WARNING)
 
     def generate_diff(self, req: CEInput, **kwargs) -> CEOutput:
-        # Initialize the root span for W&B
-        parent_span: Optional[Trace] = kwargs.get("parent_span", None)
-
-        preprocessed_inputs = wandb_utils.log_prompt_trace(
-            parent_span,
-            metadata={
-                "prompt_name": self._prompt.name,
-            },
-        )(
-            self._prompt.chat
-        )(req)
-
-        @wandb_utils.log_llm_trace(parent_span=parent_span, model_name=self._model_name)
+        preprocessed_inputs = self._prompt.chat(req)
+
         def openai_request(inp):
             resp = self.api.chat.completions.create(
                 messages=inp,
diff --git a/code_editing/utils/wandb_utils.py b/code_editing/utils/wandb_utils.py
index 5954945..561eab9 100644
--- a/code_editing/utils/wandb_utils.py
+++ b/code_editing/utils/wandb_utils.py
@@ -1,8 +1,7 @@
 import time
-from typing import Any, Callable, Dict, List, Optional
+from typing import Dict, List
 
 import wandb
-from wandb.sdk.data_types.trace_tree import StatusCode, Trace
 
 from code_editing.code_editor import CEInput
 
@@ -40,174 +39,3 @@ def req_beautify(req: CEInput) -> dict:
             [f"* `{file_name}`\n```\n{file_contents}\n```" for file_name, file_contents in req["code_base"].items()]
         ),
     }
-
-
-def log_prompt_trace(
-    parent_span: Trace,
-    metadata: Optional[Dict[str, str]] = None,
-):
-    def wrapper(func: Callable[[CEInput], Any]):
-        def new_func(req: CEInput):
-            start_ms = get_current_ms()
-            res = func(req)
-            end_ms = get_current_ms()
-            if not is_run_active():
-                return res
-            # Log to W&B
-            if isinstance(res, str):
-                chat = {"prompt": res}
-            else:
-                chat = chat_to_dict(res)
-            trace = Trace(
-                name="Prompt Generation",
-                kind="tool",
-                status_code="success",
-                start_time_ms=start_ms,
-                end_time_ms=end_ms,
-                inputs=req_beautify(req),
-                outputs=chat,
-                model_dict=metadata,
-            )
-            parent_span.add_child(trace)
-            return res
-
-        return new_func
-
-    return wrapper
-
-
-def log_llm_trace(
-    parent_span: Trace,
-    model_name: str,
-    metadata: Optional[Dict[str, str]] = None,
-):
-    def wrapper(func):
-
-        def new_func(input_obj):
-            start_ms = get_current_ms()
-            output_str = func(input_obj)
-            end_ms = get_current_ms()
-            if not is_run_active():
-                return output_str
-            if isinstance(input_obj, str):
-                input_dict = {"input": input_obj}
-            else:
-                input_dict = chat_to_dict(input_obj)
-            trace = Trace(
-                name=f"{model_name} Inference",
-                kind="llm",
-                status_code="success",
-                start_time_ms=start_ms,
-                end_time_ms=end_ms,
-                inputs=input_dict,
-                outputs={"output": output_str},
-                model_dict=metadata,
-            )
-            parent_span.add_child(trace)
-            return output_str
-
-        return new_func
-
-    return wrapper
-
-
-def build_main_trace(
-    req: CEInput,
-    start_ms: int,
-    name: str,
-    metadata: Optional[Dict[str, str]] = None,
-):
-    return Trace(
-        name=f"Code Editing: {name}",
-        kind="agent",
-        status_code="success",
-        start_time_ms=start_ms,
-        inputs=req_beautify(req),
-        model_dict=metadata,
-    )
-
-
-def log_preprocessor_trace(
-    before: CEInput,
-    after: CEInput,
-    start_ms: int,
-    end_ms: int,
-    parent_span: Trace,
-):
-    if not is_run_active():
-        return
-    trace = Trace(
-        name="Preprocessor",
-        kind="tool",
-        status_code="success",
-        start_time_ms=start_ms,
-        end_time_ms=end_ms,
-        inputs=req_beautify(before),
-        outputs=req_beautify(after),
-    )
-    parent_span.add_child(trace)
-
-
-def log_main_trace(root_span, old_req, resp, status_code, status_message=None):
-    if not is_run_active():
-        return
-    root_span.add_inputs_and_outputs(inputs=req_beautify(old_req), outputs=resp)
-    root_span._span.status_code = status_code
-    root_span._span.status_message = status_message
-    root_span._span.end_time_ms = get_current_ms()
-    root_span.log("Code Editing")
-
-
-def gpt4_eval_trace(
-    diff_true: str,
-    diff_pred: str,
-    start_ms: int,
-    end_ms: int,
-    score_text: str,
-    score_value: Optional[float],
-    metadata: Optional[Dict[str, str]] = None,
-):
-    if not is_run_active():
-        return
-    trace = Trace(
-        name="GPT4 Evaluation",
-        kind="llm",
-        status_code="success",
-        start_time_ms=start_ms,
-        end_time_ms=end_ms,
-        inputs={"diff_true": diff_true, "diff_pred": diff_pred},
-        outputs={"score_text": score_text, "score_value": score_value},
-        model_dict=metadata,
-    )
-    trace.log("GPT4 Evaluation")
-
-
-def log_codeeditor_trace():
-    def wrapper(func):
-        def new_func(*args):
-            req = args[-1]
-            metadata = {}
-            if len(args) > 1:
-                code_editor = args[0]  # self
-                metadata.update(code_editor.metadata)
-            start_ms = get_current_ms()
-            trace = build_main_trace(req, start_ms, func.__name__, metadata)
-            try:
-                res = func(*args, root_span=trace)
-                err = None
-            except Exception as e:
-                res = None
-                err = e
-            if not is_run_active():
-                if err:
-                    raise err
-                return res
-            log_main_trace(trace, req, res, StatusCode.SUCCESS if res else StatusCode.ERROR, str(err) if err else None)
-            trace.log("Code Editor")
-            if err:
-                raise err
-            return res
-
-        return new_func
-
-    return wrapper
diff --git a/poetry.lock b/poetry.lock
index 31b8de5..36d2b83 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -31,6 +31,17 @@ test-prod = ["parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "py
 test-trackers = ["comet-ml", "dvclive", "tensorboard", "wandb"]
 testing = ["bitsandbytes", "datasets", "diffusers", "evaluate", "parameterized", "pytest (>=7.2.0,<=8.0.0)", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"]
 
+[[package]]
+name = "aiofiles"
+version = "23.2.1"
+description = "File support for asyncio."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "aiofiles-23.2.1-py3-none-any.whl", hash = "sha256:19297512c647d4b27a2cf7c34caa7e405c0d60b5560618a29a9fe027b18b0107"},
+    {file = "aiofiles-23.2.1.tar.gz", hash = "sha256:84ec2218d8419404abcb9f0c02df3f34c6e0a68ed41072acfb1cef5cbc29051a"},
+]
+
 [[package]]
 name = "aiohttp"
 version = "3.9.5"
@@ -127,6 +138,20 @@ yarl = ">=1.0,<2.0"
 [package.extras]
 speedups = ["Brotli", "aiodns", "brotlicffi"]
 
+[[package]]
+name = "aioprocessing"
+version = "2.0.1"
+description = "A Python 3.5+ library that integrates the multiprocessing module with asyncio."
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "aioprocessing-2.0.1-py3-none-any.whl", hash = "sha256:8fcac4b0108b72eb9df76e06a9d7e05720ee1e8330829d3fd53fa059879be586"},
+    {file = "aioprocessing-2.0.1.tar.gz", hash = "sha256:fe01c7b1a38c78168611d3040e73d93036c3b7c8a649d636dc9ed7a3bc9b1ba2"},
+]
+
+[package.extras]
+dill = ["multiprocess"]
+
 [[package]]
 name = "aiosignal"
 version = "1.3.1"
@@ -141,6 +166,22 @@ files = [
 [package.dependencies]
 frozenlist = ">=1.1.0"
 
+[[package]]
+name = "analytics-python"
+version = "1.2.9"
+description = "The hassle-free way to integrate analytics into any python application."
+optional = false
+python-versions = "*"
+files = [
+    {file = "analytics-python-1.2.9.tar.gz", hash = "sha256:f3d1ca27cb277da67c10d71a5c9c593d2a9ec99109e31409ab771b44821a86bf"},
+    {file = "analytics_python-1.2.9-py2.py3-none-any.whl", hash = "sha256:69d88b2d3e2c350e6803487a1a802e0fd111e86665d4c9b16c3c6f5fbc6c445f"},
+]
+
+[package.dependencies]
+python-dateutil = ">2.1"
+requests = ">=2.7,<3.0"
+six = ">=1.5"
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -347,6 +388,17 @@ files = [
 [package.extras]
 dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
 
+[[package]]
+name = "backoff"
+version = "2.2.1"
+description = "Function decoration for backoff and retry"
+optional = false
+python-versions = ">=3.7,<4.0"
+files = [
+    {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
+    {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
+]
+
 [[package]]
 name = "beautifulsoup4"
 version = "4.12.3"
@@ -960,6 +1012,23 @@ files = [
 [package.dependencies]
 six = ">=1.4.0"
 
+[[package]]
+name = "emoji"
+version = "2.12.1"
+description = "Emoji for Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "emoji-2.12.1-py3-none-any.whl", hash = "sha256:a00d62173bdadc2510967a381810101624a2f0986145b8da0cffa42e29430235"},
+    {file = "emoji-2.12.1.tar.gz", hash = "sha256:4aa0488817691aa58d83764b6c209f8a27c0b3ab3f89d1b8dceca1a62e4973eb"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.7.0"
+
+[package.extras]
+dev = ["coverage", "pytest (>=7.4.4)"]
+
 [[package]]
 name = "evaluate"
 version = "0.4.2"
@@ -1334,6 +1403,47 @@ gitdb = ">=4.0.1,<5"
 doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"]
 test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"]
 
+[[package]]
+name = "gql"
+version = "3.5.0"
+description = "GraphQL client for Python"
+optional = false
+python-versions = "*"
+files = [
+    {file = "gql-3.5.0-py2.py3-none-any.whl", hash = "sha256:70dda5694a5b194a8441f077aa5fb70cc94e4ec08016117523f013680901ecb7"},
+    {file = "gql-3.5.0.tar.gz", hash = "sha256:ccb9c5db543682b28f577069950488218ed65d4ac70bb03b6929aaadaf636de9"},
+]
+
+[package.dependencies]
+anyio = ">=3.0,<5"
+backoff = ">=1.11.1,<3.0"
+graphql-core = ">=3.2,<3.3"
+requests = {version = ">=2.26,<3", optional = true, markers = "extra == \"requests\""}
+requests-toolbelt = {version = ">=1.0.0,<2", optional = true, markers = "extra == \"requests\""}
+yarl = ">=1.6,<2.0"
+
+[package.extras]
+aiohttp = ["aiohttp (>=3.8.0,<4)", "aiohttp (>=3.9.0b0,<4)"]
+all = ["aiohttp (>=3.8.0,<4)", "aiohttp (>=3.9.0b0,<4)", "botocore (>=1.21,<2)", "httpx (>=0.23.1,<1)", "requests (>=2.26,<3)", "requests-toolbelt (>=1.0.0,<2)", "websockets (>=10,<12)"]
+botocore = ["botocore (>=1.21,<2)"]
+dev = ["aiofiles", "aiohttp (>=3.8.0,<4)", "aiohttp (>=3.9.0b0,<4)", "black (==22.3.0)", "botocore (>=1.21,<2)", "check-manifest (>=0.42,<1)", "flake8 (==3.8.1)", "httpx (>=0.23.1,<1)", "isort (==4.3.21)", "mock (==4.0.2)", "mypy (==0.910)", "parse (==1.15.0)", "pytest (==7.4.2)", "pytest-asyncio (==0.21.1)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "requests (>=2.26,<3)", "requests-toolbelt (>=1.0.0,<2)", "sphinx (>=5.3.0,<6)", "sphinx-argparse (==0.2.5)", "sphinx-rtd-theme (>=0.4,<1)", "types-aiofiles", "types-mock", "types-requests", "vcrpy (==4.4.0)", "websockets (>=10,<12)"]
+httpx = ["httpx (>=0.23.1,<1)"]
+requests = ["requests (>=2.26,<3)", "requests-toolbelt (>=1.0.0,<2)"]
+test = ["aiofiles", "aiohttp (>=3.8.0,<4)", "aiohttp (>=3.9.0b0,<4)", "botocore (>=1.21,<2)", "httpx (>=0.23.1,<1)", "mock (==4.0.2)", "parse (==1.15.0)", "pytest (==7.4.2)", "pytest-asyncio (==0.21.1)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "requests (>=2.26,<3)", "requests-toolbelt (>=1.0.0,<2)", "vcrpy (==4.4.0)", "websockets (>=10,<12)"]
+test-no-transport = ["aiofiles", "mock (==4.0.2)", "parse (==1.15.0)", "pytest (==7.4.2)", "pytest-asyncio (==0.21.1)", "pytest-console-scripts (==1.3.1)", "pytest-cov (==3.0.0)", "vcrpy (==4.4.0)"]
+websockets = ["websockets (>=10,<12)"]
+
+[[package]]
+name = "graphql-core"
+version = "3.2.3"
+description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL."
+optional = false
+python-versions = ">=3.6,<4"
+files = [
+    {file = "graphql-core-3.2.3.tar.gz", hash = "sha256:06d2aad0ac723e35b1cb47885d3e5c45e956a53bc1b209a9fc5369007fe46676"},
+    {file = "graphql_core-3.2.3-py3-none-any.whl", hash = "sha256:5766780452bd5ec8ba133f8bf287dc92713e3868ddd83aee4faab9fc3e303dc3"},
+]
+
 [[package]]
 name = "greenlet"
 version = "3.0.3"
@@ -1726,6 +1836,20 @@ files = [
 [package.extras]
 colors = ["colorama (>=0.4.6)"]
 
+[[package]]
+name = "janus"
+version = "1.0.0"
+description = "Mixed sync-async queue to interoperate between asyncio tasks and classic threads"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "janus-1.0.0-py3-none-any.whl", hash = "sha256:2596ea5482711c1ee3ef2df6c290aaf370a13c55a007826e8f7c32d696d1d00a"},
+    {file = "janus-1.0.0.tar.gz", hash = "sha256:df976f2cdcfb034b147a2d51edfc34ff6bfb12d4e2643d3ad0e10de058cb1612"},
+]
+
+[package.dependencies]
+typing-extensions = ">=3.7.4.3"
+
 [[package]]
 name = "jedi"
 version = "0.19.1"
@@ -2569,6 +2693,30 @@ html5 = ["html5lib"]
 htmlsoup = ["BeautifulSoup4"]
 source = ["Cython (>=3.0.10)"]
 
+[[package]]
+name = "markdown-it-py"
+version = "3.0.0"
+description = "Python port of markdown-it. Markdown parsing, done right!"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"},
+    {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"},
+]
+
+[package.dependencies]
+mdurl = ">=0.1,<1.0"
+
+[package.extras]
+benchmarking = ["psutil", "pytest", "pytest-benchmark"]
+code-style = ["pre-commit (>=3.0,<4.0)"]
+compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"]
+linkify = ["linkify-it-py (>=1,<3)"]
+plugins = ["mdit-py-plugins"]
+profiling = ["gprof2dot"]
+rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"]
+testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
+
 [[package]]
 name = "markupsafe"
 version = "2.1.5"
@@ -2723,6 +2871,17 @@ files = [
 [package.dependencies]
 traitlets = "*"
 
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+description = "Markdown URL utilities"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"},
+    {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"},
+]
+
 [[package]]
 name = "mistune"
 version = "3.0.2"
@@ -4517,6 +4676,20 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "requests-toolbelt"
+version = "1.0.0"
+description = "A utility belt for advanced users of python-requests"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+files = [
+    {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"},
+    {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"},
+]
+
+[package.dependencies]
+requests = ">=2.0.1,<3.0.0"
+
 [[package]]
 name = "rfc3339-validator"
 version = "0.1.4"
@@ -4542,6 +4715,24 @@ files = [
     {file = "rfc3986_validator-0.1.1.tar.gz", hash = "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055"},
 ]
 
+[[package]]
+name = "rich"
+version = "13.7.1"
+description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"},
+    {file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"},
+]
+
+[package.dependencies]
+markdown-it-py = ">=2.2.0"
+pygments = ">=2.13.0,<3.0.0"
+
+[package.extras]
+jupyter = ["ipywidgets (>=7.5.1,<9)"]
+
 [[package]]
 name = "rpds-py"
 version = "0.18.1"
@@ -6247,6 +6438,46 @@ files = [
     {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
 ]
 
+[[package]]
+name = "weave"
+version = "0.50.5"
+description = "A toolkit for building composable interactive data driven applications."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "weave-0.50.5-py3-none-any.whl", hash = "sha256:bb38df0b3444c69e0dd878d4a315dc083c2736284364c6edc0abce95ff63ffb1"},
+    {file = "weave-0.50.5.tar.gz", hash = "sha256:9124986260faf711639279f5ad186b3f6cf2b6f430f4108579ddf54c6e507371"},
+]
+
+[package.dependencies]
+aiofiles = ">=22.1.0"
+aiohttp = ">=3.8.3"
+aioprocessing = ">=2.0.1"
+analytics-python = ">=1.2.9"
+emoji = ">=2.12.1"
+gql = {version = ">=3.4.1", extras = ["requests"]}
+graphql-core = ">3"
+janus = ">=1.0.0"
+numpy = ">=1.21,<2.0.0"
+openai = ">=1.0.0"
+packaging = ">=21.0"
+pyarrow = ">=14.0.1"
+pydantic = ">=2.0.0"
+python-json-logger = ">=2.0.4"
+rich = ">=13.7.0"
+tenacity = ">=8.3.0,<8.4.0 || >8.4.0"
+tiktoken = ">=0.4.0"
+typing-extensions = ">=4.0.0"
+wandb = ">=0.16.4"
+Werkzeug = ">=3.0.3"
+
+[package.extras]
+datadog = ["datadog (==0.44.0)", "ddtrace (==1.7.5)"]
+ecosystem = ["bertviz (>=1.4.0)", "datasets (>=2.9.0)", "faiss-cpu (>=1.7.4)", "h5py (>=3.8.0)", "hdbscan (>=0.8.27)", "langchain (>=0.0.132)", "matplotlib (>3,!=3.5)", "openai (>=1.0.0)", "pillow (>=10.0.1)", "plotly (>=5.13.0)", "replicate (>=0.4.0)", "shap (>=0.41.0)", "spacy (>=3.0.0,<4.0.0)", "sqlalchemy (>=2.0.1)", "tiktoken (>=0.4.0)", "torch (>=1.13.1)", "torchvision (>=0.14.1)", "transformers (<4.21)", "xgboost (>=1.7.3)"]
+engine = ["black (>=22.3.0)", "faiss-cpu", "flask (>=2.1)", "flask-cors (>=3.0.10)", "ipynbname (>=2021.3.2)", "ipython (>=7.34)", "notebook (>=6.4.8)", "objgraph (>=3.6.0)", "pandas (>=1.5.3)", "pillow (>=10.0.1)", "scikit-learn (>=1.2.1)", "sentry-sdk (<1.29.0)", "tabulate", "typeguard (>=4.1.3)", "umap-learn (>=0.5.3)"]
+examples = ["bertviz (>=1.4.0)", "datasets (>=2.9.0)", "faiss-cpu (>=1.7.4)", "h5py (>=3.8.0)", "hdbscan (>=0.8.27)", "langchain (>=0.0.132)", "matplotlib (>3,!=3.5)", "openai (>=1.0.0)", "pillow (>=10.0.1)", "plotly (>=5.13.0)", "replicate (>=0.4.0)", "shap (>=0.41.0)", "spacy (>=3.0.0,<4.0.0)", "sqlalchemy (>=2.0.1)", "tiktoken (>=0.4.0)", "torch (>=1.13.1)", "torchvision (>=0.14.1)", "transformers (<4.21)", "xgboost (>=1.7.3)"]
+modal = ["modal", "python-dotenv"]
+
 [[package]]
 name = "webcolors"
 version = "24.6.0"
@@ -6305,6 +6536,23 @@ docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"]
 optional = ["python-socks", "wsaccel"]
 test = ["websockets"]
 
+[[package]]
+name = "werkzeug"
+version = "3.0.3"
+description = "The comprehensive WSGI web application library."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"},
+    {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"},
+]
+
+[package.dependencies]
+MarkupSafe = ">=2.1.1"
+
+[package.extras]
+watchdog = ["watchdog (>=2.3)"]
+
 [[package]]
 name = "widgetsnbextension"
 version = "4.0.11"
@@ -6564,4 +6812,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.10"
-content-hash = "8eebf58ce46671c4c65143b48f4d1c84ba6e7b848e3dd4bf3523d2202ad82399"
+content-hash = "82e1508a5c8114bc0ba04844b3c3c958506ceda8fe0bda45ca776f288bb44df2"
diff --git a/pyproject.toml b/pyproject.toml
index 6e0b633..95cc78d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,6 +64,7 @@ tqdm = "^4.66.4"
 wandb = "^0.17.0"
 scikit-learn = "^1.5.0"
 scipy = "^1.13.1"
+weave = "^0.50.5"
 
 
 [tool.poetry.group.aider.dependencies]