From e9e2131f0c0fe19e5de062f398e7a920ab7e67ca Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 12 Nov 2024 18:38:53 -0800
Subject: [PATCH 01/88] wip

---
 python/langsmith/client.py  | 89 +++++++++++++++++++++++++++++++++++++
 python/langsmith/schemas.py |  3 ++
 2 files changed, 92 insertions(+)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index eb397b4c4..99368aa85 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -82,6 +82,7 @@
     _SIZE_LIMIT_BYTES,
 )
 from langsmith._internal._multipart import (
+    MultipartPart,
     MultipartPartsAndContext,
     join_multipart_parts_and_context,
 )
@@ -3369,6 +3370,94 @@ def create_example_from_run(
             created_at=created_at,
         )
 
+    def upsert_example_multipart(
+        self,
+        *,
+        upserts: List[ls_schemas.ExampleCreateWithAttachments] = None,
+    ) -> None:
+        """Upsert examples"""
+        parts = list[MultipartPart]
+
+        for example in upserts:
+
+            if example.id is not None:
+                example_id = str(example.id) # is the conversion to string neccessary?
+            else:
+                example_id = str(uuid.uuid4())
+
+            remaining_values = {
+                "dataset_id": example.dataset_id,
+                "created_at": example.created_at,
+                "metadata": example.metadata,
+                "split": example.split
+            }
+            valb = _dumps_json(remaining_values)
+            
+            parts.append(
+                f"{example_id}",
+                (
+                    None,
+                    valb,
+                    "application/json",
+                    {"Content-Length": str(len(valb))},
+                ),
+            ),
+
+            inputsb = example.inputs
+            outputsb = example.outputs
+
+            parts.append(
+                f"{example_id}.inputs",
+                (
+                    None,
+                    inputsb,
+                    "application/json",
+                    {"Content-Length": str(len(inputsb))},
+                ),
+            ),
+        
+            parts.append(
+                f"{example_id}.outputs",
+                (
+                    None,
+                    outputsb,
+                    "application/json",
+                    {"Content-Length": str(len(outputsb))},
+                ),
+            ),
+        
+            if example.attachments:
+                for attachment in example.attachments:
+                    parts.append(
+                        f"{example_id}.attachment.{attachment.mime_type}",
+                        (
+                            None,
+                            attachment.data,
+                            "application/json", # I feel like this is wrong
+                            {"Content-Length": str(len(attachment.data))},
+                        ),
+                    ),
+                    
+        encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY)
+        if encoder.len <= 20_000_000:  # ~20 MB
+            data = encoder.to_string()
+        else:
+            data = encoder
+        
+
+        response = self.request_with_retries(
+            "POST",
+            "/v1/examples/multipart", # No clue what this is supposed to be
+            request_kwargs={
+                "data": data,
+                "headers": {
+                    **self._headers,
+                    "Content-Type": encoder.content_type,
+                },
+            },
+        )
+        ls_utils.raise_for_status_with_text(response)
+
     def create_examples(
         self,
         *,
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 80e112e46..ff8528d29 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -88,6 +88,9 @@ class ExampleCreate(ExampleBase):
     created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     split: Optional[Union[str, List[str]]] = None
 
+class ExampleCreateWithAttachments(ExampleCreate):
+    """Example create with attachments."""
+    attachments: Optional[List[Attachment]] = None
 
 class Example(ExampleBase):
     """Example model."""

From ff3054182f91f0305e42718e6b8431d96e4adca8 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Wed, 13 Nov 2024 08:02:55 -0800
Subject: [PATCH 02/88] unit test

---
 python/langsmith/client.py             | 122 +++++++++++++++----------
 python/langsmith/schemas.py            |   5 +-
 python/tests/unit_tests/test_client.py |  87 ++++++++++++++++++
 3 files changed, 167 insertions(+), 47 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 99368aa85..dca29d9fc 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3375,13 +3375,12 @@ def upsert_example_multipart(
         *,
         upserts: List[ls_schemas.ExampleCreateWithAttachments] = None,
     ) -> None:
-        """Upsert examples"""
-        parts = list[MultipartPart]
+        """Upsert examples."""
+        parts: list[MultipartPart] = []
 
         for example in upserts:
-
             if example.id is not None:
-                example_id = str(example.id) # is the conversion to string neccessary?
+                example_id = str(example.id)  # is the conversion to string neccessary?
             else:
                 example_id = str(uuid.uuid4())
 
@@ -3389,65 +3388,96 @@ def upsert_example_multipart(
                 "dataset_id": example.dataset_id,
                 "created_at": example.created_at,
                 "metadata": example.metadata,
-                "split": example.split
+                "split": example.split,
             }
             valb = _dumps_json(remaining_values)
-            
-            parts.append(
-                f"{example_id}",
-                (
-                    None,
-                    valb,
-                    "application/json",
-                    {"Content-Length": str(len(valb))},
+
+            (
+                parts.append(
+                    (
+                        f"{example_id}",
+                        (
+                            None,
+                            valb,
+                            "application/json",
+                            {"Content-Length": str(len(valb))},
+                        ),
+                    )
                 ),
-            ),
+            )
 
-            inputsb = example.inputs
-            outputsb = example.outputs
+            inputsb = _dumps_json(example.inputs)
+            outputsb = _dumps_json(example.outputs)
 
-            parts.append(
-                f"{example_id}.inputs",
-                (
-                    None,
-                    inputsb,
-                    "application/json",
-                    {"Content-Length": str(len(inputsb))},
-                ),
-            ),
-        
-            parts.append(
-                f"{example_id}.outputs",
-                (
-                    None,
-                    outputsb,
-                    "application/json",
-                    {"Content-Length": str(len(outputsb))},
+            (
+                parts.append(
+                    (
+                        f"{example_id}.inputs",
+                        (
+                            None,
+                            inputsb,
+                            "application/json",
+                            {"Content-Length": str(len(inputsb))},
+                        ),
+                    )
                 ),
-            ),
-        
-            if example.attachments:
-                for attachment in example.attachments:
-                    parts.append(
-                        f"{example_id}.attachment.{attachment.mime_type}",
+            )
+
+            (
+                parts.append(
+                    (
+                        f"{example_id}.outputs",
                         (
                             None,
-                            attachment.data,
-                            "application/json", # I feel like this is wrong
-                            {"Content-Length": str(len(attachment.data))},
+                            outputsb,
+                            "application/json",
+                            {"Content-Length": str(len(outputsb))},
                         ),
-                    ),
-                    
+                    )
+                ),
+            )
+
+            if example.attachments:
+                for name, attachment in example.attachments.items():
+                    if isinstance(attachment, tuple):
+                        mime_type, data = attachment
+                        (
+                            parts.append(
+                                (
+                                    f"{example_id}.attachment.{name}",
+                                    (
+                                        None,
+                                        data,
+                                        mime_type,
+                                        {"Content-Length": str(len(data))},
+                                    ),
+                                )
+                            ),
+                        )
+                    else:
+                        (
+                            parts.append(
+                                (
+                                    f"{example_id}.attachment.{name}",
+                                    (
+                                        None,
+                                        attachment.data,
+                                        attachment.mime_type,
+                                        {"Content-Length": str(len(attachment.data))},
+                                    ),
+                                )
+                            ),
+                        )
+
         encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY)
         if encoder.len <= 20_000_000:  # ~20 MB
             data = encoder.to_string()
         else:
             data = encoder
-        
 
         response = self.request_with_retries(
             "POST",
-            "/v1/examples/multipart", # No clue what this is supposed to be
+            "/v1/examples/multipart",  # No clue what this is supposed to be
             request_kwargs={
                 "data": data,
                 "headers": {
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index ff8528d29..4e8711002 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -88,9 +88,12 @@ class ExampleCreate(ExampleBase):
     created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     split: Optional[Union[str, List[str]]] = None
 
+
 class ExampleCreateWithAttachments(ExampleCreate):
     """Example create with attachments."""
-    attachments: Optional[List[Attachment]] = None
+
+    attachments: Optional[Attachments] = None
+
 
 class Example(ExampleBase):
     """Example model."""
diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py
index 5dc1bbe1e..edda5dd09 100644
--- a/python/tests/unit_tests/test_client.py
+++ b/python/tests/unit_tests/test_client.py
@@ -416,6 +416,93 @@ def test_create_run_mutate(
         assert outputs == {"messages": ["hi", "there"]}
 
 
+@mock.patch("langsmith.client.requests.Session")
+def test_upsert_example_multipart(mock_session_cls: mock.Mock) -> None:
+    """Test that upsert_example_multipart sends correct multipart data."""
+    mock_session = MagicMock()
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_session.request.return_value = mock_response
+    mock_session_cls.return_value = mock_session
+
+    client = Client(api_url="http://localhost:1984", api_key="123")
+
+    # Create test data
+    example_id = uuid.uuid4()
+    dataset_id = uuid.uuid4()
+    created_at = datetime(2015, 1, 1, 0, 0, 0)
+
+    example = ls_schemas.ExampleCreateWithAttachments(
+        id=example_id,
+        dataset_id=dataset_id,
+        created_at=created_at,
+        inputs={"input": "test input"},
+        outputs={"output": "test output"},
+        metadata={"meta": "data"},
+        split="train",
+        attachments={
+            "file1": ("text/plain", b"test data"),
+            "file2": ls_schemas.Attachment(
+                mime_type="application/json", data=b'{"key": "value"}'
+            ),
+        },
+    )
+    client.upsert_example_multipart(upserts=[example])
+
+    # Verify the request
+    assert mock_session.request.call_count == 2  # we always make a call to /info
+    call_args = mock_session.request.call_args
+
+    assert call_args[0][0] == "POST"
+    assert call_args[0][1].endswith("/v1/examples/multipart")
+
+    # Parse the multipart data
+    request_data = call_args[1]["data"]
+    content_type = call_args[1]["headers"]["Content-Type"]
+    boundary = parse_options_header(content_type)[1]["boundary"]
+
+    parser = MultipartParser(
+        io.BytesIO(
+            request_data
+            if isinstance(request_data, bytes)
+            else request_data.to_string()
+        ),
+        boundary,
+    )
+    parts = list(parser.parts())
+
+    # Verify all expected parts are present
+    expected_parts = {
+        str(example_id): {
+            "dataset_id": str(dataset_id),
+            "created_at": created_at.isoformat(),
+            "metadata": {"meta": "data"},
+            "split": "train",
+        },
+        f"{example_id}.inputs": {"input": "test input"},
+        f"{example_id}.outputs": {"output": "test output"},
+        f"{example_id}.attachment.file1": "test data",
+        f"{example_id}.attachment.file2": '{"key": "value"}',
+    }
+
+    assert len(parts) == len(expected_parts)
+
+    for part in parts:
+        name = part.name
+        assert name in expected_parts, f"Unexpected part: {name}"
+
+        if name.endswith(".attachment.file1"):
+            assert part.value == expected_parts[name]
+            assert part.headers["Content-Type"] == "text/plain"
+        elif name.endswith(".attachment.file2"):
+            assert part.value == expected_parts[name]
+            assert part.headers["Content-Type"] == "application/json"
+        else:
+            value = json.loads(part.value)
+            assert value == expected_parts[name]
+            assert part.headers["Content-Type"] == "application/json"
+
+
 class CallTracker:
     def __init__(self) -> None:
         self.counter = 0

From 152ec59849ec81622738c443765f6b5da91ce44e Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Wed, 13 Nov 2024 11:23:09 -0800
Subject: [PATCH 03/88] integration test skeleton

---
 python/langsmith/client.py                    | 11 ++-
 python/tests/integration_tests/test_client.py | 68 ++++++++++++++++++-
 python/tests/unit_tests/test_client.py        |  6 +-
 3 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index dca29d9fc..7e823573d 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3370,12 +3370,19 @@ def create_example_from_run(
             created_at=created_at,
         )
 
-    def upsert_example_multipart(
+    def upsert_examples_multipart(
         self,
         *,
-        upserts: List[ls_schemas.ExampleCreateWithAttachments] = None,
+        upserts: List[ls_schemas.ExampleCreateWithAttachments] = [],
     ) -> None:
         """Upsert examples."""
+        # not sure if the below checks are necessary
+        if not isinstance(upserts, list):
+            raise TypeError(f"upserts must be a list, got {type(upserts)}")
+        for item in upserts:
+            if not isinstance(item, ls_schemas.ExampleCreateWithAttachments):
+                raise TypeError(f"Each item must be ExampleCreateWithAttachments, got {type(item)}")
+            
         parts: list[MultipartPart] = []
 
         for example in upserts:
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 57a6e2171..bfe0d818a 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -20,7 +20,7 @@
 from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
 
 from langsmith.client import ID_TYPE, Client
-from langsmith.schemas import DataType
+from langsmith.schemas import DataType, ExampleCreateWithAttachments
 from langsmith.utils import (
     LangSmithConnectionError,
     LangSmithError,
@@ -369,6 +369,72 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
         client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm")
 
 
+@pytest.mark.parametrize("uri", ["http://dev.api.smith.langchain.com"])
+def test_upsert_examples_multipart(uri: str) -> None:
+    """Test upserting examples with attachments via multipart endpoint."""
+    dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
+    langchain_client = Client(api_url=uri, api_key="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")
+    if langchain_client.has_dataset(dataset_name=dataset_name):
+        langchain_client.delete_dataset(dataset_name=dataset_name)
+
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for multipart example upload",
+        data_type=DataType.kv,
+    )
+
+    # Test example with all fields
+    example_id = uuid4()
+    example_1 = ExampleCreateWithAttachments(
+        id=example_id,
+        dataset_id=dataset.id,
+        inputs={"text": "hello world"},
+        outputs={"response": "greeting"},
+        attachments={
+            "test_file": ("text/plain", b"test content"),
+        },
+    )
+    # Test example without id
+    example_2 = ExampleCreateWithAttachments(
+        dataset_id=dataset.id,
+        inputs={"text": "foo bar"},
+        outputs={"response": "baz"},
+        attachments={
+            "my_file": ("text/plain", b"more test content"),
+        },
+    )
+
+    langchain_client.upsert_examples_multipart([example_1, example_2])
+    
+    created_example = langchain_client.read_example(example_id)
+    assert created_example.inputs["text"] == "hello world"
+    assert created_example.outputs["response"] == "greeting"
+
+    all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)]
+    assert len(all_examples_in_dataset) == 2
+
+    # Test that adding invalid example fails - even if valid examples are added alongside
+    example_3 = ExampleCreateWithAttachments(
+        dataset_id=uuid4(), # not a real dataset
+        inputs={"text": "foo bar"},
+        outputs={"response": "baz"},
+        attachments={
+            "my_file": ("text/plain", b"more test content"),
+        },
+    )
+
+    # will this throw an error? idk need to test
+    langchain_client.upsert_examples_multipart([example_2, example_3]) # don't add example_1 because of explicit id
+
+    all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)]
+    assert len(all_examples_in_dataset) == 2
+
+    # Throw type errors when not passing ExampleCreateWithAttachments
+    with pytest.raises(TypeError):
+        langchain_client.upsert_examples_multipart([{"foo":"bar"}])
+
+    langchain_client.delete_dataset(dataset_name=dataset_name)
+
 def test_create_dataset(langchain_client: Client) -> None:
     dataset_name = "__test_create_dataset" + uuid4().hex[:4]
     if langchain_client.has_dataset(dataset_name=dataset_name):
diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py
index edda5dd09..4b68ce368 100644
--- a/python/tests/unit_tests/test_client.py
+++ b/python/tests/unit_tests/test_client.py
@@ -417,8 +417,8 @@ def test_create_run_mutate(
 
 
 @mock.patch("langsmith.client.requests.Session")
-def test_upsert_example_multipart(mock_session_cls: mock.Mock) -> None:
-    """Test that upsert_example_multipart sends correct multipart data."""
+def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None:
+    """Test that upsert_examples_multipart sends correct multipart data."""
     mock_session = MagicMock()
     mock_response = MagicMock()
     mock_response.status_code = 200
@@ -447,7 +447,7 @@ def test_upsert_example_multipart(mock_session_cls: mock.Mock) -> None:
             ),
         },
     )
-    client.upsert_example_multipart(upserts=[example])
+    client.upsert_examples_multipart(upserts=[example])
 
     # Verify the request
     assert mock_session.request.call_count == 2  # we always make a call to /info

From 27b15462180219cef6878bf80c27d4436b3b21a6 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Wed, 13 Nov 2024 12:48:09 -0800
Subject: [PATCH 04/88] integration test passing

---
 python/langsmith/client.py                    | 16 +++++++++-------
 python/tests/integration_tests/test_client.py | 15 ++++++++-------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 7e823573d..a6ff8d4c8 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3394,9 +3394,11 @@ def upsert_examples_multipart(
             remaining_values = {
                 "dataset_id": example.dataset_id,
                 "created_at": example.created_at,
-                "metadata": example.metadata,
-                "split": example.split,
             }
+            if example.metadata is not None:
+                remaining_values["metadata"] = example.metadata
+            if example.split is not None:
+                remaining_values["split"] = example.split
             valb = _dumps_json(remaining_values)
 
             (
@@ -3455,8 +3457,8 @@ def upsert_examples_multipart(
                                     (
                                         None,
                                         data,
-                                        mime_type,
-                                        {"Content-Length": str(len(data))},
+                                        f"{mime_type}; length={len(data)}",
+                                        {},
                                     ),
                                 )
                             ),
@@ -3469,8 +3471,8 @@ def upsert_examples_multipart(
                                     (
                                         None,
                                         attachment.data,
-                                        attachment.mime_type,
-                                        {"Content-Length": str(len(attachment.data))},
+                                        f"{attachment.mime_type}; length={len(attachment.data)}",
+                                        {},
                                     ),
                                 )
                             ),
@@ -3484,7 +3486,7 @@ def upsert_examples_multipart(
 
         response = self.request_with_retries(
             "POST",
-            "/v1/examples/multipart",  # No clue what this is supposed to be
+            "/v1/platform/examples/multipart",  # No clue what this is supposed to be
             request_kwargs={
                 "data": data,
                 "headers": {
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index bfe0d818a..c86ce3dc9 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -22,6 +22,7 @@
 from langsmith.client import ID_TYPE, Client
 from langsmith.schemas import DataType, ExampleCreateWithAttachments
 from langsmith.utils import (
+    LangSmithNotFoundError,
     LangSmithConnectionError,
     LangSmithError,
     get_env_var,
@@ -368,12 +369,12 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
     with pytest.raises(LangSmithConnectionError):
         client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm")
 
-
-@pytest.mark.parametrize("uri", ["http://dev.api.smith.langchain.com"])
+# NEED TO FIX ONCE CHANGES PUSH TO PROD
+@pytest.mark.parametrize("uri", ["https://dev.api.smith.langchain.com"])
 def test_upsert_examples_multipart(uri: str) -> None:
     """Test upserting examples with attachments via multipart endpoint."""
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
-    langchain_client = Client(api_url=uri, api_key="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")
+    langchain_client = Client(api_url=uri, api_key="NEED TO HARDCODE FOR TESTING")
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
 
@@ -404,7 +405,7 @@ def test_upsert_examples_multipart(uri: str) -> None:
         },
     )
 
-    langchain_client.upsert_examples_multipart([example_1, example_2])
+    langchain_client.upsert_examples_multipart(upserts=[example_1, example_2])
     
     created_example = langchain_client.read_example(example_id)
     assert created_example.inputs["text"] == "hello world"
@@ -423,15 +424,15 @@ def test_upsert_examples_multipart(uri: str) -> None:
         },
     )
 
-    # will this throw an error? idk need to test
-    langchain_client.upsert_examples_multipart([example_2, example_3]) # don't add example_1 because of explicit id
+    with pytest.raises(LangSmithNotFoundError):
+        langchain_client.upsert_examples_multipart(upserts=[example_3])
 
     all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)]
     assert len(all_examples_in_dataset) == 2
 
     # Throw type errors when not passing ExampleCreateWithAttachments
     with pytest.raises(TypeError):
-        langchain_client.upsert_examples_multipart([{"foo":"bar"}])
+        langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}])
 
     langchain_client.delete_dataset(dataset_name=dataset_name)
 

From 53a0f1494548b297c5d19d13e93cbd409bdeaa80 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Wed, 13 Nov 2024 16:33:15 -0800
Subject: [PATCH 05/88] wip

---
 python/bench/upload_examples_bench.py         | 124 ++++++++++++++++++
 python/langsmith/client.py                    |  29 ++--
 python/tests/integration_tests/test_client.py |   4 +-
 3 files changed, 138 insertions(+), 19 deletions(-)
 create mode 100644 python/bench/upload_examples_bench.py

diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py
new file mode 100644
index 000000000..41e2faa4e
--- /dev/null
+++ b/python/bench/upload_examples_bench.py
@@ -0,0 +1,124 @@
+import statistics
+import time
+from typing import Dict
+from uuid import uuid4
+from langsmith.schemas import DataType, ExampleCreateWithAttachments
+import sys
+sys.path.append('./../langsmith')
+from client import Client
+
+def create_large_json(length: int) -> Dict:
+    """Create a large JSON object for benchmarking purposes."""
+    large_array = [
+        {
+            "index": i,
+            "data": f"This is element number {i}",
+            "nested": {"id": i, "value": f"Nested value for element {i}"},
+        }
+        for i in range(length)
+    ]
+
+    return {
+        "name": "Huge JSON" + str(uuid4()),
+        "description": "This is a very large JSON object for benchmarking purposes.",
+        "array": large_array,
+        "metadata": {
+            "created_at": "2024-10-22T19:00:00Z",
+            "author": "Python Program",
+            "version": 1.0,
+        },
+    }
+
+
+def create_example_data(dataset_id: str, json_size: int) -> Dict:
+    """Create a single example data object."""
+    return ExampleCreateWithAttachments(**{
+        "dataset_id": dataset_id,
+        "inputs": create_large_json(json_size),
+        "outputs": create_large_json(json_size),
+    })
+
+DATASET_NAME = "TEST DATASET"
+def benchmark_example_uploading(num_examples: int, json_size: int, samples: int = 1) -> Dict:
+    """
+    Benchmark run creation with specified parameters.
+    Returns timing statistics.
+    """
+    multipart_timings, old_timings = [], []
+
+
+    for _ in range(samples):
+        client = Client(api_url="https://dev.api.smith.langchain.com", api_key="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")
+
+        if client.has_dataset(dataset_name=DATASET_NAME):
+            client.delete_dataset(dataset_name=DATASET_NAME)
+
+        dataset = client.create_dataset(
+            DATASET_NAME,
+            description="Test dataset for multipart example upload",
+            data_type=DataType.kv,
+        )
+        examples = [create_example_data(dataset.id, json_size) for i in range(num_examples)]
+
+        # Old method
+        old_start = time.perf_counter()
+        inputs=[e.inputs for e in examples]
+        outputs=[e.outputs for e in examples]
+        # the create_examples endpoint fails above 20mb
+        try:
+            client.create_examples(inputs=inputs,
+                                outputs=outputs,dataset_id=dataset.id)
+            old_elapsed = time.perf_counter() - old_start
+        except:
+            old_elapsed = 1000000
+
+        # New method
+        multipart_start = time.perf_counter()
+        client.upsert_examples_multipart(upserts=examples)
+        multipart_elapsed = time.perf_counter() - multipart_start
+
+        multipart_timings.append(multipart_elapsed)
+        old_timings.append(old_elapsed)
+
+    return {
+        "old": {
+            "mean": statistics.mean(old_timings),
+            "median": statistics.median(old_timings),
+            "stdev": statistics.stdev(old_timings) if len(old_timings) > 1 else 0,
+            "min": min(old_timings),
+            "max": max(old_timings),
+        },
+        "new": {
+            "mean": statistics.mean(multipart_timings),
+            "median": statistics.median(multipart_timings),
+            "stdev": statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0,
+            "min": min(multipart_timings),
+            "max": max(multipart_timings),
+        }
+    }
+
+json_size = 1000
+num_examples = 1000
+
+def main(json_size: int, num_examples: int):
+    """
+    Run benchmarks with different combinations of parameters and report results.
+    """
+    results = benchmark_example_uploading(num_examples=num_examples, json_size=json_size)
+    
+    print(f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:")
+    print("-" * 60)
+    print(f"{'Metric':<15} {'Old Method':>20} {'New Method':>20}")
+    print("-" * 60)
+    
+    metrics = ['mean', 'median', 'stdev', 'min', 'max']
+    for metric in metrics:
+        print(f"{metric:<15} {results['old'][metric]:>20.4f} {results['new'][metric]:>20.4f}")
+    
+    print("-" * 60)
+    print(f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} {num_examples / results['new']['mean']:>20.2f}")
+    print("(examples/second)")
+
+
+if __name__ == "__main__":
+    main(json_size, num_examples)
\ No newline at end of file
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index a6ff8d4c8..7d31a8652 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3373,33 +3373,28 @@ def create_example_from_run(
     def upsert_examples_multipart(
         self,
         *,
-        upserts: List[ls_schemas.ExampleCreateWithAttachments] = [],
+        upserts: List[ls_schemas.ExampleCreateWithAttachments] = None,
     ) -> None:
         """Upsert examples."""
-        # not sure if the below checks are necessary
-        if not isinstance(upserts, list):
-            raise TypeError(f"upserts must be a list, got {type(upserts)}")
-        for item in upserts:
-            if not isinstance(item, ls_schemas.ExampleCreateWithAttachments):
-                raise TypeError(f"Each item must be ExampleCreateWithAttachments, got {type(item)}")
-            
+        if upserts is None:
+            upserts = []
         parts: list[MultipartPart] = []
 
         for example in upserts:
             if example.id is not None:
-                example_id = str(example.id)  # is the conversion to string neccessary?
+                example_id = str(example.id)
             else:
                 example_id = str(uuid.uuid4())
 
-            remaining_values = {
+            example_body = {
                 "dataset_id": example.dataset_id,
                 "created_at": example.created_at,
             }
             if example.metadata is not None:
-                remaining_values["metadata"] = example.metadata
+                example_body["metadata"] = example.metadata
             if example.split is not None:
-                remaining_values["split"] = example.split
-            valb = _dumps_json(remaining_values)
+                example_body["split"] = example.split
+            valb = _dumps_json(example_body)
 
             (
                 parts.append(
@@ -3409,7 +3404,7 @@ def upsert_examples_multipart(
                             None,
                             valb,
                             "application/json",
-                            {"Content-Length": str(len(valb))},
+                            {},
                         ),
                     )
                 ),
@@ -3426,7 +3421,7 @@ def upsert_examples_multipart(
                             None,
                             inputsb,
                             "application/json",
-                            {"Content-Length": str(len(inputsb))},
+                            {},
                         ),
                     )
                 ),
@@ -3440,7 +3435,7 @@ def upsert_examples_multipart(
                             None,
                             outputsb,
                             "application/json",
-                            {"Content-Length": str(len(outputsb))},
+                            {},
                         ),
                     )
                 ),
@@ -3486,7 +3481,7 @@ def upsert_examples_multipart(
 
         response = self.request_with_retries(
             "POST",
-            "/v1/platform/examples/multipart",  # No clue what this is supposed to be
+            "/v1/platform/examples/multipart",
             request_kwargs={
                 "data": data,
                 "headers": {
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index c86ce3dc9..56112975a 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -374,7 +374,7 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
 def test_upsert_examples_multipart(uri: str) -> None:
     """Test upserting examples with attachments via multipart endpoint."""
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
-    langchain_client = Client(api_url=uri, api_key="NEED TO HARDCODE FOR TESTING")
+    langchain_client = Client(api_url=uri, api_key="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")
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
 
@@ -431,7 +431,7 @@ def test_upsert_examples_multipart(uri: str) -> None:
     assert len(all_examples_in_dataset) == 2
 
     # Throw type errors when not passing ExampleCreateWithAttachments
-    with pytest.raises(TypeError):
+    with pytest.raises(AttributeError):
         langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}])
 
     langchain_client.delete_dataset(dataset_name=dataset_name)

From 025aa6d65fa8b469d2969d63ca3750b29826eef6 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Wed, 13 Nov 2024 17:11:30 -0800
Subject: [PATCH 06/88] wip

---
 python/bench/upload_examples_bench.py         |  2 +-
 python/langsmith/client.py                    |  8 +++++++-
 python/langsmith/schemas.py                   |  2 ++
 python/tests/integration_tests/test_client.py | 19 ++++++++++++-------
 4 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py
index 41e2faa4e..dc7efeae4 100644
--- a/python/bench/upload_examples_bench.py
+++ b/python/bench/upload_examples_bench.py
@@ -48,7 +48,7 @@ def benchmark_example_uploading(num_examples: int, json_size: int, samples: int
 
 
     for _ in range(samples):
-        client = Client(api_url="https://dev.api.smith.langchain.com", api_key="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")
+        client = Client(api_url="https://dev.api.smith.langchain.com")
 
         if client.has_dataset(dataset_name=DATASET_NAME):
             client.delete_dataset(dataset_name=DATASET_NAME)
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 7d31a8652..94359446d 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3374,8 +3374,13 @@ def upsert_examples_multipart(
         self,
         *,
         upserts: List[ls_schemas.ExampleCreateWithAttachments] = None,
-    ) -> None:
+    ) -> dict: # Should we create an object for the return type - like UpsertExamplesResponse?
         """Upsert examples."""
+        if not (self.info.instance_flags or {}).get(
+                "examples_multipart_enabled", False
+            ):
+            raise ValueError("Your LangChain version does not allow using the multipart examples endpoint, please update to the latest version.")
+        
         if upserts is None:
             upserts = []
         parts: list[MultipartPart] = []
@@ -3491,6 +3496,7 @@ def upsert_examples_multipart(
             },
         )
         ls_utils.raise_for_status_with_text(response)
+        return response.json()
 
     def create_examples(
         self,
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 4e8711002..d309cb5fd 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -701,6 +701,8 @@ class LangSmithInfo(BaseModel):
     license_expiration_time: Optional[datetime] = None
     """The time the license will expire."""
     batch_ingest_config: Optional[BatchIngestConfig] = None
+    """The instance flags."""
+    instance_flags: dict[str, Any] = None
 
 
 Example.update_forward_refs()
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 56112975a..e202d6b39 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -370,11 +370,10 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
         client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm")
 
 # NEED TO FIX ONCE CHANGES PUSH TO PROD
-@pytest.mark.parametrize("uri", ["https://dev.api.smith.langchain.com"])
-def test_upsert_examples_multipart(uri: str) -> None:
+def test_upsert_examples_multipart() -> None:
     """Test upserting examples with attachments via multipart endpoint."""
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
-    langchain_client = Client(api_url=uri, api_key="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")
+    langchain_client = Client(api_url="https://dev.api.smith.langchain.com")
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
 
@@ -405,12 +404,18 @@ def test_upsert_examples_multipart(uri: str) -> None:
         },
     )
 
-    langchain_client.upsert_examples_multipart(upserts=[example_1, example_2])
+    created_examples = langchain_client.upsert_examples_multipart(upserts=[example_1, example_2])
+    assert created_examples['count'] == 2
     
-    created_example = langchain_client.read_example(example_id)
-    assert created_example.inputs["text"] == "hello world"
-    assert created_example.outputs["response"] == "greeting"
+    created_example_1 = langchain_client.read_example(created_examples['example_ids'][0])
+    assert created_example_1.inputs["text"] == "hello world"
+    assert created_example_1.outputs["response"] == "greeting"
 
+    created_example_2 = langchain_client.read_example(created_examples['example_ids'][1])
+    assert created_example_2.inputs["text"] == "foo bar"
+    assert created_example_2.outputs["response"] == "baz"
+
+    # make sure examples were sent to the correct dataset
     all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)]
     assert len(all_examples_in_dataset) == 2
 

From 4208b6e491a415d05514f2b22d85977746f04e8f Mon Sep 17 00:00:00 2001
From: Isaac Francisco <78627776+isahers1@users.noreply.github.com>
Date: Thu, 14 Nov 2024 07:50:14 -0800
Subject: [PATCH 07/88] Update python/langsmith/client.py

Co-authored-by: Ankush Gola <9536492+agola11@users.noreply.github.com>
---
 python/langsmith/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 94359446d..aa5f009bb 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3379,7 +3379,7 @@ def upsert_examples_multipart(
         if not (self.info.instance_flags or {}).get(
                 "examples_multipart_enabled", False
             ):
-            raise ValueError("Your LangChain version does not allow using the multipart examples endpoint, please update to the latest version.")
+            raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.")
         
         if upserts is None:
             upserts = []

From fd16baa5e721dac218f0e9b282b9f8024f8f4281 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Thu, 14 Nov 2024 09:06:47 -0800
Subject: [PATCH 08/88] more edits

---
 python/bench/upload_examples_bench.py         | 18 +++++-------
 python/langsmith/client.py                    |  8 +++---
 python/langsmith/schemas.py                   | 20 ++++++++-----
 python/tests/integration_tests/test_client.py | 28 +++++++++++++++----
 4 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py
index dc7efeae4..0dd979313 100644
--- a/python/bench/upload_examples_bench.py
+++ b/python/bench/upload_examples_bench.py
@@ -4,8 +4,7 @@
 from uuid import uuid4
 from langsmith.schemas import DataType, ExampleCreateWithAttachments
 import sys
-sys.path.append('./../langsmith')
-from client import Client
+from langsmith import Client
 
 def create_large_json(length: int) -> Dict:
     """Create a large JSON object for benchmarking purposes."""
@@ -38,7 +37,7 @@ def create_example_data(dataset_id: str, json_size: int) -> Dict:
         "outputs": create_large_json(json_size),
     })
 
-DATASET_NAME = "TEST DATASET"
+DATASET_NAME = "upsert_llm_evaluator_benchmark_dataset"
 def benchmark_example_uploading(num_examples: int, json_size: int, samples: int = 1) -> Dict:
     """
     Benchmark run creation with specified parameters.
@@ -64,13 +63,10 @@ def benchmark_example_uploading(num_examples: int, json_size: int, samples: int
         old_start = time.perf_counter()
         inputs=[e.inputs for e in examples]
         outputs=[e.outputs for e in examples]
-        # the create_examples endpoint fails above 20mb
-        try:
-            client.create_examples(inputs=inputs,
-                                outputs=outputs,dataset_id=dataset.id)
-            old_elapsed = time.perf_counter() - old_start
-        except:
-            old_elapsed = 1000000
+        # the create_examples endpoint fails above 20mb - so this will crash with json_size > ~100
+        client.create_examples(inputs=inputs,
+                            outputs=outputs,dataset_id=dataset.id)
+        old_elapsed = time.perf_counter() - old_start
 
         # New method
         multipart_start = time.perf_counter()
@@ -121,4 +117,4 @@ def main(json_size: int, num_examples: int):
 
 
 if __name__ == "__main__":
-    main(json_size, num_examples)
\ No newline at end of file
+    main(json_size, num_examples)
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index aa5f009bb..0f9455ff1 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3373,14 +3373,14 @@ def create_example_from_run(
     def upsert_examples_multipart(
         self,
         *,
-        upserts: List[ls_schemas.ExampleCreateWithAttachments] = None,
-    ) -> dict: # Should we create an object for the return type - like UpsertExamplesResponse?
+        upserts: List[ls_schemas.ExampleUpsertWithAttachments] = None,
+    ) -> ls_schemas.UpsertExamplesResponse:
         """Upsert examples."""
-        if not (self.info.instance_flags or {}).get(
+        """ if not (self.info.instance_flags or {}).get(
                 "examples_multipart_enabled", False
             ):
             raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.")
-        
+         """
         if upserts is None:
             upserts = []
         parts: list[MultipartPart] = []
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index d309cb5fd..7b94c2017 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -89,7 +89,7 @@ class ExampleCreate(ExampleBase):
     split: Optional[Union[str, List[str]]] = None
 
 
-class ExampleCreateWithAttachments(ExampleCreate):
+class ExampleUpsertWithAttachments(ExampleCreate):
     """Example create with attachments."""
 
     attachments: Optional[Attachments] = None
@@ -131,12 +131,6 @@ def url(self) -> Optional[str]:
         return None
 
 
-class ExampleSearch(ExampleBase):
-    """Example returned via search."""
-
-    id: UUID
-
-
 class ExampleUpdate(BaseModel):
     """Update class for Example."""
 
@@ -151,6 +145,10 @@ class Config:
 
         frozen = True
 
+class ExampleUpdateWithAttachments(ExampleUpdate):
+    """Example update with attachments."""
+    id: UUID
+    attachments: Optional[Attachments] = None
 
 class DataType(str, Enum):
     """Enum for dataset data types."""
@@ -988,3 +986,11 @@ class UsageMetadata(TypedDict):
 
     Does *not* need to sum to full output token count. Does *not* need to have all keys.
     """
+
+class UpsertExamplesResponse(TypedDict):
+    """Response object returned from the upsert_examples_multipart method."""
+
+    count: int
+    """The number of examples that were upserted."""
+    example_ids: List[str]
+    """The ids of the examples that were upserted."""
\ No newline at end of file
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index e202d6b39..bf99b9d62 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -20,7 +20,7 @@
 from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
 
 from langsmith.client import ID_TYPE, Client
-from langsmith.schemas import DataType, ExampleCreateWithAttachments
+from langsmith.schemas import DataType, ExampleUpsertWithAttachments
 from langsmith.utils import (
     LangSmithNotFoundError,
     LangSmithConnectionError,
@@ -373,7 +373,7 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
 def test_upsert_examples_multipart() -> None:
     """Test upserting examples with attachments via multipart endpoint."""
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
-    langchain_client = Client(api_url="https://dev.api.smith.langchain.com")
+    langchain_client = Client(api_url="https://dev.api.smith.langchain.com", api_key="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")
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
 
@@ -385,7 +385,7 @@ def test_upsert_examples_multipart() -> None:
 
     # Test example with all fields
     example_id = uuid4()
-    example_1 = ExampleCreateWithAttachments(
+    example_1 = ExampleUpsertWithAttachments(
         id=example_id,
         dataset_id=dataset.id,
         inputs={"text": "hello world"},
@@ -395,7 +395,7 @@ def test_upsert_examples_multipart() -> None:
         },
     )
     # Test example without id
-    example_2 = ExampleCreateWithAttachments(
+    example_2 = ExampleUpsertWithAttachments(
         dataset_id=dataset.id,
         inputs={"text": "foo bar"},
         outputs={"response": "baz"},
@@ -419,8 +419,24 @@ def test_upsert_examples_multipart() -> None:
     all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)]
     assert len(all_examples_in_dataset) == 2
 
+    example_1_update = ExampleUpsertWithAttachments(
+        id=example_id,
+        dataset_id=dataset.id,
+        inputs={"text": "bar baz"},
+        outputs={"response": "foo"},
+        attachments={
+            "my_file": ("text/plain", b"more test content"),
+        },
+    )
+    updated_examples = langchain_client.upsert_examples_multipart(upserts=[example_1_update])
+    assert updated_examples['count'] == 1
+    assert updated_examples['example_ids'][0] == str(example_id)
+    updated_example = langchain_client.read_example(updated_examples['example_ids'][0])
+    assert updated_example.inputs['text'] == "bar baz"
+    assert updated_example.outputs['response'] == "foo"
+
     # Test that adding invalid example fails - even if valid examples are added alongside
-    example_3 = ExampleCreateWithAttachments(
+    example_3 = ExampleUpsertWithAttachments(
         dataset_id=uuid4(), # not a real dataset
         inputs={"text": "foo bar"},
         outputs={"response": "baz"},
@@ -435,7 +451,7 @@ def test_upsert_examples_multipart() -> None:
     all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)]
     assert len(all_examples_in_dataset) == 2
 
-    # Throw type errors when not passing ExampleCreateWithAttachments
+    # Throw type errors when not passing ExampleUpsertWithAttachments
     with pytest.raises(AttributeError):
         langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}])
 

From 28a46771ecce2b4d5da950436fa5ce5b9a3bec25 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Thu, 14 Nov 2024 12:04:02 -0800
Subject: [PATCH 09/88] nit

---
 python/langsmith/client.py                    | 29 ++++++++++---------
 python/tests/integration_tests/test_client.py |  4 +--
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 0f9455ff1..9d6283c77 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3416,7 +3416,6 @@ def upsert_examples_multipart(
             )
 
             inputsb = _dumps_json(example.inputs)
-            outputsb = _dumps_json(example.outputs)
 
             (
                 parts.append(
@@ -3431,20 +3430,22 @@ def upsert_examples_multipart(
                     )
                 ),
             )
-
-            (
-                parts.append(
-                    (
-                        f"{example_id}.outputs",
+            
+            if example.outputs:
+                outputsb = _dumps_json(example.outputs)
+                (
+                    parts.append(
                         (
-                            None,
-                            outputsb,
-                            "application/json",
-                            {},
-                        ),
-                    )
-                ),
-            )
+                            f"{example_id}.outputs",
+                            (
+                                None,
+                                outputsb,
+                                "application/json",
+                                {},
+                            ),
+                        )
+                    ),
+                )
 
             if example.attachments:
                 for name, attachment in example.attachments.items():
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index bf99b9d62..068f75e73 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -389,7 +389,7 @@ def test_upsert_examples_multipart() -> None:
         id=example_id,
         dataset_id=dataset.id,
         inputs={"text": "hello world"},
-        outputs={"response": "greeting"},
+        # test without outputs
         attachments={
             "test_file": ("text/plain", b"test content"),
         },
@@ -409,7 +409,7 @@ def test_upsert_examples_multipart() -> None:
     
     created_example_1 = langchain_client.read_example(created_examples['example_ids'][0])
     assert created_example_1.inputs["text"] == "hello world"
-    assert created_example_1.outputs["response"] == "greeting"
+    assert created_example_1.outputs == None
 
     created_example_2 = langchain_client.read_example(created_examples['example_ids'][1])
     assert created_example_2.inputs["text"] == "foo bar"

From 816302d1ec888d8f42d34476e10982fe07422220 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Thu, 14 Nov 2024 13:14:53 -0800
Subject: [PATCH 10/88] nit

---
 python/tests/integration_tests/test_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 068f75e73..170220678 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -373,7 +373,7 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
 def test_upsert_examples_multipart() -> None:
     """Test upserting examples with attachments via multipart endpoint."""
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
-    langchain_client = Client(api_url="https://dev.api.smith.langchain.com", api_key="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")
+    langchain_client = Client(api_url="https://dev.api.smith.langchain.com", api_key="HARDCODE FOR TESTING")
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
 

From aa947a641e3859559624edbebb365cda90f221de Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 14:29:33 -0800
Subject: [PATCH 11/88] remove dev endpoint in test

---
 python/tests/integration_tests/test_client.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 170220678..b752800b1 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -369,11 +369,10 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
     with pytest.raises(LangSmithConnectionError):
         client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm")
 
-# NEED TO FIX ONCE CHANGES PUSH TO PROD
-def test_upsert_examples_multipart() -> None:
+
+def test_upsert_examples_multipart(langchain_client: Client) -> None:
     """Test upserting examples with attachments via multipart endpoint."""
-    dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
-    langchain_client = Client(api_url="https://dev.api.smith.langchain.com", api_key="HARDCODE FOR TESTING")
+    dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]"
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
 

From a82063b159929c2f708ecedacb4aad155f99e035 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 14:33:06 -0800
Subject: [PATCH 12/88] typo

---
 python/tests/integration_tests/test_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index b752800b1..74c9d5168 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -372,7 +372,7 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
 
 def test_upsert_examples_multipart(langchain_client: Client) -> None:
     """Test upserting examples with attachments via multipart endpoint."""
-    dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]"
+    dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
 

From ad19dafd46d4a72c323ce55dcdd6c68af060daf6 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:07:15 -0800
Subject: [PATCH 13/88] fmt

---
 python/bench/upload_examples_bench.py         | 73 ++++++++++++-------
 python/langsmith/client.py                    |  2 +-
 python/langsmith/schemas.py                   |  6 +-
 python/tests/integration_tests/test_client.py | 52 ++++++++-----
 4 files changed, 87 insertions(+), 46 deletions(-)

diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py
index 0dd979313..f6669b293 100644
--- a/python/bench/upload_examples_bench.py
+++ b/python/bench/upload_examples_bench.py
@@ -2,9 +2,10 @@
 import time
 from typing import Dict
 from uuid import uuid4
-from langsmith.schemas import DataType, ExampleCreateWithAttachments
-import sys
+
 from langsmith import Client
+from langsmith.schemas import DataType, ExampleCreateWithAttachments
+
 
 def create_large_json(length: int) -> Dict:
     """Create a large JSON object for benchmarking purposes."""
@@ -31,21 +32,27 @@ def create_large_json(length: int) -> Dict:
 
 def create_example_data(dataset_id: str, json_size: int) -> Dict:
     """Create a single example data object."""
-    return ExampleCreateWithAttachments(**{
-        "dataset_id": dataset_id,
-        "inputs": create_large_json(json_size),
-        "outputs": create_large_json(json_size),
-    })
+    return ExampleCreateWithAttachments(
+        **{
+            "dataset_id": dataset_id,
+            "inputs": create_large_json(json_size),
+            "outputs": create_large_json(json_size),
+        }
+    )
+
 
 DATASET_NAME = "upsert_llm_evaluator_benchmark_dataset"
-def benchmark_example_uploading(num_examples: int, json_size: int, samples: int = 1) -> Dict:
+
+
+def benchmark_example_uploading(
+    num_examples: int, json_size: int, samples: int = 1
+) -> Dict:
     """
     Benchmark run creation with specified parameters.
     Returns timing statistics.
     """
     multipart_timings, old_timings = [], []
 
-
     for _ in range(samples):
         client = Client(api_url="https://dev.api.smith.langchain.com")
 
@@ -57,15 +64,17 @@ def benchmark_example_uploading(num_examples: int, json_size: int, samples: int
             description="Test dataset for multipart example upload",
             data_type=DataType.kv,
         )
-        examples = [create_example_data(dataset.id, json_size) for i in range(num_examples)]
+        examples = [
+            create_example_data(dataset.id, json_size) for i in range(num_examples)
+        ]
 
         # Old method
         old_start = time.perf_counter()
-        inputs=[e.inputs for e in examples]
-        outputs=[e.outputs for e in examples]
-        # the create_examples endpoint fails above 20mb - so this will crash with json_size > ~100
-        client.create_examples(inputs=inputs,
-                            outputs=outputs,dataset_id=dataset.id)
+        inputs = [e.inputs for e in examples]
+        outputs = [e.outputs for e in examples]
+        # the create_examples endpoint fails above 20mb
+        # so this will crash with json_size > ~100
+        client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
         old_elapsed = time.perf_counter() - old_start
 
         # New method
@@ -87,32 +96,46 @@ def benchmark_example_uploading(num_examples: int, json_size: int, samples: int
         "new": {
             "mean": statistics.mean(multipart_timings),
             "median": statistics.median(multipart_timings),
-            "stdev": statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0,
+            "stdev": statistics.stdev(multipart_timings)
+            if len(multipart_timings) > 1
+            else 0,
             "min": min(multipart_timings),
             "max": max(multipart_timings),
-        }
+        },
     }
 
+
 json_size = 1000
 num_examples = 1000
 
+
 def main(json_size: int, num_examples: int):
     """
     Run benchmarks with different combinations of parameters and report results.
     """
-    results = benchmark_example_uploading(num_examples=num_examples, json_size=json_size)
-    
-    print(f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:")
+    results = benchmark_example_uploading(
+        num_examples=num_examples, json_size=json_size
+    )
+
+    print(
+        f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:"
+    )
     print("-" * 60)
     print(f"{'Metric':<15} {'Old Method':>20} {'New Method':>20}")
     print("-" * 60)
-    
-    metrics = ['mean', 'median', 'stdev', 'min', 'max']
+
+    metrics = ["mean", "median", "stdev", "min", "max"]
     for metric in metrics:
-        print(f"{metric:<15} {results['old'][metric]:>20.4f} {results['new'][metric]:>20.4f}")
-    
+        print(
+            f"{metric:<15} {results['old'][metric]:>20.4f} "
+            f"{results['new'][metric]:>20.4f}"
+        )
+
     print("-" * 60)
-    print(f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} {num_examples / results['new']['mean']:>20.2f}")
+    print(
+        f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} "
+        f"{num_examples / results['new']['mean']:>20.2f}"
+    )
     print("(examples/second)")
 
 
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 9d6283c77..ff77b81d8 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3430,7 +3430,7 @@ def upsert_examples_multipart(
                     )
                 ),
             )
-            
+
             if example.outputs:
                 outputsb = _dumps_json(example.outputs)
                 (
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 9409314c0..b5a74d0fb 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -149,11 +149,14 @@ class Config:
 
         frozen = True
 
+
 class ExampleUpdateWithAttachments(ExampleUpdate):
     """Example update with attachments."""
+
     id: UUID
     attachments: Optional[Attachments] = None
 
+
 class DataType(str, Enum):
     """Enum for dataset data types."""
 
@@ -991,10 +994,11 @@ class UsageMetadata(TypedDict):
     Does *not* need to sum to full output token count. Does *not* need to have all keys.
     """
 
+
 class UpsertExamplesResponse(TypedDict):
     """Response object returned from the upsert_examples_multipart method."""
 
     count: int
     """The number of examples that were upserted."""
     example_ids: List[str]
-    """The ids of the examples that were upserted."""
\ No newline at end of file
+    """The ids of the examples that were upserted."""
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 74c9d5168..d1d16ce0f 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -22,9 +22,9 @@
 from langsmith.client import ID_TYPE, Client
 from langsmith.schemas import DataType, ExampleUpsertWithAttachments
 from langsmith.utils import (
-    LangSmithNotFoundError,
     LangSmithConnectionError,
     LangSmithError,
+    LangSmithNotFoundError,
     get_env_var,
 )
 
@@ -403,19 +403,27 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
         },
     )
 
-    created_examples = langchain_client.upsert_examples_multipart(upserts=[example_1, example_2])
-    assert created_examples['count'] == 2
-    
-    created_example_1 = langchain_client.read_example(created_examples['example_ids'][0])
+    created_examples = langchain_client.upsert_examples_multipart(
+        upserts=[example_1, example_2]
+    )
+    assert created_examples["count"] == 2
+
+    created_example_1 = langchain_client.read_example(
+        created_examples["example_ids"][0]
+    )
     assert created_example_1.inputs["text"] == "hello world"
-    assert created_example_1.outputs == None
+    assert created_example_1.outputs is None
 
-    created_example_2 = langchain_client.read_example(created_examples['example_ids'][1])
+    created_example_2 = langchain_client.read_example(
+        created_examples["example_ids"][1]
+    )
     assert created_example_2.inputs["text"] == "foo bar"
     assert created_example_2.outputs["response"] == "baz"
 
     # make sure examples were sent to the correct dataset
-    all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)]
+    all_examples_in_dataset = [
+        example for example in langchain_client.list_examples(dataset_id=dataset.id)
+    ]
     assert len(all_examples_in_dataset) == 2
 
     example_1_update = ExampleUpsertWithAttachments(
@@ -427,16 +435,19 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
             "my_file": ("text/plain", b"more test content"),
         },
     )
-    updated_examples = langchain_client.upsert_examples_multipart(upserts=[example_1_update])
-    assert updated_examples['count'] == 1
-    assert updated_examples['example_ids'][0] == str(example_id)
-    updated_example = langchain_client.read_example(updated_examples['example_ids'][0])
-    assert updated_example.inputs['text'] == "bar baz"
-    assert updated_example.outputs['response'] == "foo"
-
-    # Test that adding invalid example fails - even if valid examples are added alongside
+    updated_examples = langchain_client.upsert_examples_multipart(
+        upserts=[example_1_update]
+    )
+    assert updated_examples["count"] == 1
+    assert updated_examples["example_ids"][0] == str(example_id)
+    updated_example = langchain_client.read_example(updated_examples["example_ids"][0])
+    assert updated_example.inputs["text"] == "bar baz"
+    assert updated_example.outputs["response"] == "foo"
+
+    # Test that adding invalid example fails
+    # even if valid examples are added alongside
     example_3 = ExampleUpsertWithAttachments(
-        dataset_id=uuid4(), # not a real dataset
+        dataset_id=uuid4(),  # not a real dataset
         inputs={"text": "foo bar"},
         outputs={"response": "baz"},
         attachments={
@@ -447,15 +458,18 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
     with pytest.raises(LangSmithNotFoundError):
         langchain_client.upsert_examples_multipart(upserts=[example_3])
 
-    all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)]
+    all_examples_in_dataset = [
+        example for example in langchain_client.list_examples(dataset_id=dataset.id)
+    ]
     assert len(all_examples_in_dataset) == 2
 
     # Throw type errors when not passing ExampleUpsertWithAttachments
     with pytest.raises(AttributeError):
-        langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}])
+        langchain_client.upsert_examples_multipart(upserts=[{"foo": "bar"}])
 
     langchain_client.delete_dataset(dataset_name=dataset_name)
 
+
 def test_create_dataset(langchain_client: Client) -> None:
     dataset_name = "__test_create_dataset" + uuid4().hex[:4]
     if langchain_client.has_dataset(dataset_name=dataset_name):

From 390ac66d62237d58759cf2aa7b65898b90187893 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:07:49 -0800
Subject: [PATCH 14/88] yml changes

---
 .github/actions/python-integration-tests/action.yml | 2 +-
 .github/workflows/integration_tests.yml             | 2 +-
 .github/workflows/release.yml                       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/actions/python-integration-tests/action.yml b/.github/actions/python-integration-tests/action.yml
index d62a15f0a..74553585d 100644
--- a/.github/actions/python-integration-tests/action.yml
+++ b/.github/actions/python-integration-tests/action.yml
@@ -23,7 +23,7 @@ runs:
       uses: actions/checkout@v3
 
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: ${{ inputs.python-version }}
         cache: "pip"
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index d9f6ddc27..1a2928d1a 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -33,7 +33,7 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - name: Set up Python 3.11
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: 3.11
           cache: "pip"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 0f933626b..7e10b9d67 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -25,7 +25,7 @@ jobs:
       - name: Install poetry
         run: pipx install poetry==$POETRY_VERSION
       - name: Set up Python 3.11
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.11"
           cache: "poetry"

From 523e5d11fec9fe2db88cec88d3c50a1f3996c957 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:12:46 -0800
Subject: [PATCH 15/88] fmt

---
 python/langsmith/client.py  | 102 ++++++++++++++++--------------------
 python/langsmith/schemas.py |   2 +-
 2 files changed, 47 insertions(+), 57 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index ff77b81d8..e71c85be8 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3373,7 +3373,7 @@ def create_example_from_run(
     def upsert_examples_multipart(
         self,
         *,
-        upserts: List[ls_schemas.ExampleUpsertWithAttachments] = None,
+        upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [],
     ) -> ls_schemas.UpsertExamplesResponse:
         """Upsert examples."""
         """ if not (self.info.instance_flags or {}).get(
@@ -3381,8 +3381,6 @@ def upsert_examples_multipart(
             ):
             raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.")
          """
-        if upserts is None:
-            upserts = []
         parts: list[MultipartPart] = []
 
         for example in upserts:
@@ -3401,82 +3399,74 @@ def upsert_examples_multipart(
                 example_body["split"] = example.split
             valb = _dumps_json(example_body)
 
-            (
-                parts.append(
+            parts.append(
+                (
+                    f"{example_id}",
                     (
-                        f"{example_id}",
-                        (
-                            None,
-                            valb,
-                            "application/json",
-                            {},
-                        ),
-                    )
-                ),
+                        None,
+                        valb,
+                        "application/json",
+                        {},
+                    ),
+                )
             )
 
             inputsb = _dumps_json(example.inputs)
 
-            (
+            
+            parts.append(
+                (
+                    f"{example_id}.inputs",
+                    (
+                        None,
+                        inputsb,
+                        "application/json",
+                        {},
+                    ),
+                )
+            )
+            
+
+            if example.outputs:
+                outputsb = _dumps_json(example.outputs)
                 parts.append(
                     (
-                        f"{example_id}.inputs",
+                        f"{example_id}.outputs",
                         (
                             None,
-                            inputsb,
+                            outputsb,
                             "application/json",
                             {},
                         ),
                     )
-                ),
-            )
-
-            if example.outputs:
-                outputsb = _dumps_json(example.outputs)
-                (
-                    parts.append(
-                        (
-                            f"{example_id}.outputs",
-                            (
-                                None,
-                                outputsb,
-                                "application/json",
-                                {},
-                            ),
-                        )
-                    ),
                 )
 
             if example.attachments:
                 for name, attachment in example.attachments.items():
                     if isinstance(attachment, tuple):
                         mime_type, data = attachment
-                        (
-                            parts.append(
+                        parts.append(
+                            (
+                                f"{example_id}.attachment.{name}",
                                 (
-                                    f"{example_id}.attachment.{name}",
-                                    (
-                                        None,
-                                        data,
-                                        f"{mime_type}; length={len(data)}",
-                                        {},
-                                    ),
-                                )
-                            ),
+                                    None,
+                                    data,
+                                    f"{mime_type}; length={len(data)}",
+                                    {},
+                                ),
+                            )
                         )
                     else:
-                        (
-                            parts.append(
+                        parts.append(
+                            (
+                                f"{example_id}.attachment.{name}",
                                 (
-                                    f"{example_id}.attachment.{name}",
-                                    (
-                                        None,
-                                        attachment.data,
-                                        f"{attachment.mime_type}; length={len(attachment.data)}",
-                                        {},
-                                    ),
-                                )
-                            ),
+                                    None,
+                                    attachment.data,
+                                    f"{attachment.mime_type}; length={len(attachment.data)}",
+                                    {},
+                                ),
+                            )
                         )
 
         encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY)
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index b5a74d0fb..9949f9998 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -707,7 +707,7 @@ class LangSmithInfo(BaseModel):
     """The time the license will expire."""
     batch_ingest_config: Optional[BatchIngestConfig] = None
     """The instance flags."""
-    instance_flags: dict[str, Any] = None
+    instance_flags: Optional[dict[str, Any]] = None
 
 
 Example.update_forward_refs()

From ed3aa1cda85518a503c5999b9afe8ded46ef78a7 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:15:35 -0800
Subject: [PATCH 16/88] example search restoration

---
 python/langsmith/schemas.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 9949f9998..6f84ccea6 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -135,6 +135,12 @@ def __repr__(self):
         return f"{self.__class__}(id={self.id}, dataset_id={self.dataset_id}, link='{self.url}')"
 
 
+class ExampleSearch(ExampleBase):
+     """Example returned via search."""
+
+     id: UUID
+
+
 class ExampleUpdate(BaseModel):
     """Update class for Example."""
 

From ce73afcfb6b04e5a887a3fdcc8691fde466bebb8 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:18:20 -0800
Subject: [PATCH 17/88] fmt

---
 python/bench/upload_examples_bench.py | 6 +++---
 python/langsmith/client.py            | 2 --
 python/langsmith/schemas.py           | 4 ++--
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py
index f6669b293..3fc79beb4 100644
--- a/python/bench/upload_examples_bench.py
+++ b/python/bench/upload_examples_bench.py
@@ -96,9 +96,9 @@ def benchmark_example_uploading(
         "new": {
             "mean": statistics.mean(multipart_timings),
             "median": statistics.median(multipart_timings),
-            "stdev": statistics.stdev(multipart_timings)
-            if len(multipart_timings) > 1
-            else 0,
+            "stdev": (
+                statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0
+            ),
             "min": min(multipart_timings),
             "max": max(multipart_timings),
         },
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index e71c85be8..cc3987649 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3413,7 +3413,6 @@ def upsert_examples_multipart(
 
             inputsb = _dumps_json(example.inputs)
 
-            
             parts.append(
                 (
                     f"{example_id}.inputs",
@@ -3425,7 +3424,6 @@ def upsert_examples_multipart(
                     ),
                 )
             )
-            
 
             if example.outputs:
                 outputsb = _dumps_json(example.outputs)
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 6f84ccea6..58b311b83 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -136,9 +136,9 @@ def __repr__(self):
 
 
 class ExampleSearch(ExampleBase):
-     """Example returned via search."""
+    """Example returned via search."""
 
-     id: UUID
+    id: UUID
 
 
 class ExampleUpdate(BaseModel):

From 460b16b15720a0f291c31814896f4100c7a4336b Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:22:08 -0800
Subject: [PATCH 18/88] list -> List

---
 python/langsmith/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index cc3987649..8f4351994 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3381,7 +3381,7 @@ def upsert_examples_multipart(
             ):
             raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.")
          """
-        parts: list[MultipartPart] = []
+        parts: List[MultipartPart] = []
 
         for example in upserts:
             if example.id is not None:

From 4e9edf4da4de1a8a3421f589345c929ae71ae3c4 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:24:11 -0800
Subject: [PATCH 19/88] dict -> Dict

---
 python/langsmith/schemas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 58b311b83..f134e1432 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -713,7 +713,7 @@ class LangSmithInfo(BaseModel):
     """The time the license will expire."""
     batch_ingest_config: Optional[BatchIngestConfig] = None
     """The instance flags."""
-    instance_flags: Optional[dict[str, Any]] = None
+    instance_flags: Optional[Dict[str, Any]] = None
 
 
 Example.update_forward_refs()

From b6b9d79d43709a1da95fbf85df7c59bbb5c91da5 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:28:08 -0800
Subject: [PATCH 20/88] fmt

---
 python/tests/unit_tests/test_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py
index 4b68ce368..a97e8329d 100644
--- a/python/tests/unit_tests/test_client.py
+++ b/python/tests/unit_tests/test_client.py
@@ -432,7 +432,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None:
     dataset_id = uuid.uuid4()
     created_at = datetime(2015, 1, 1, 0, 0, 0)
 
-    example = ls_schemas.ExampleCreateWithAttachments(
+    example = ls_schemas.ExampleUpsertWithAttachments(
         id=example_id,
         dataset_id=dataset_id,
         created_at=created_at,

From bc9ec6f5bf562b75a9081f1435ddc3aa799fa3ee Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:28:55 -0800
Subject: [PATCH 21/88] undo yml changes

---
 .github/actions/python-integration-tests/action.yml | 2 +-
 .github/workflows/integration_tests.yml             | 2 +-
 .github/workflows/release.yml                       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/actions/python-integration-tests/action.yml b/.github/actions/python-integration-tests/action.yml
index 74553585d..d62a15f0a 100644
--- a/.github/actions/python-integration-tests/action.yml
+++ b/.github/actions/python-integration-tests/action.yml
@@ -23,7 +23,7 @@ runs:
       uses: actions/checkout@v3
 
     - name: Setup Python
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ inputs.python-version }}
         cache: "pip"
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index 1a2928d1a..d9f6ddc27 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -33,7 +33,7 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - name: Set up Python 3.11
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v4
         with:
           python-version: 3.11
           cache: "pip"
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 7e10b9d67..0f933626b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -25,7 +25,7 @@ jobs:
       - name: Install poetry
         run: pipx install poetry==$POETRY_VERSION
       - name: Set up Python 3.11
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v4
         with:
           python-version: "3.11"
           cache: "poetry"

From 15708dc485f1ede8d2e8e245ffa5d9d7cb3c68ce Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:31:23 -0800
Subject: [PATCH 22/88] unit test fix

---
 python/tests/unit_tests/test_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py
index a97e8329d..9019e44f5 100644
--- a/python/tests/unit_tests/test_client.py
+++ b/python/tests/unit_tests/test_client.py
@@ -454,7 +454,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None:
     call_args = mock_session.request.call_args
 
     assert call_args[0][0] == "POST"
-    assert call_args[0][1].endswith("/v1/examples/multipart")
+    assert call_args[0][1].endswith("/v1/platform/examples/multipart")
 
     # Parse the multipart data
     request_data = call_args[1]["data"]

From 527174ab4330a763b1d4bd8adfdc314a81ed2980 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:36:18 -0800
Subject: [PATCH 23/88] unit test fix

---
 python/tests/unit_tests/test_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py
index 9019e44f5..46a946285 100644
--- a/python/tests/unit_tests/test_client.py
+++ b/python/tests/unit_tests/test_client.py
@@ -493,7 +493,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None:
 
         if name.endswith(".attachment.file1"):
             assert part.value == expected_parts[name]
-            assert part.headers["Content-Type"] == "text/plain"
+            assert part.headers["Content-Type"] == "text/plain; length=9"
         elif name.endswith(".attachment.file2"):
             assert part.value == expected_parts[name]
             assert part.headers["Content-Type"] == "application/json"

From 81f52492dfe50cac96f5e2106732b9740a6fa52d Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 18 Nov 2024 15:38:40 -0800
Subject: [PATCH 24/88] unit test fix

---
 python/tests/unit_tests/test_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py
index 46a946285..adb59e37e 100644
--- a/python/tests/unit_tests/test_client.py
+++ b/python/tests/unit_tests/test_client.py
@@ -496,7 +496,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None:
             assert part.headers["Content-Type"] == "text/plain; length=9"
         elif name.endswith(".attachment.file2"):
             assert part.value == expected_parts[name]
-            assert part.headers["Content-Type"] == "application/json"
+            assert part.headers["Content-Type"] == "application/json; length=16"
         else:
             value = json.loads(part.value)
             assert value == expected_parts[name]

From f36a0cb20389f75a8fc6ff31db8db874831e13c7 Mon Sep 17 00:00:00 2001
From: Isaac Francisco <78627776+isahers1@users.noreply.github.com>
Date: Tue, 19 Nov 2024 13:23:44 -0800
Subject: [PATCH 25/88] make evaluate function compatible with attachments
 (#1218)

Co-authored-by: Bagatur <baskaryan@gmail.com>
---
 python/langsmith/client.py                    | 32 +++++++
 python/langsmith/evaluation/_runner.py        | 90 +++++++++++++++--
 python/langsmith/schemas.py                   | 22 ++++-
 python/tests/integration_tests/test_client.py | 96 ++++++++++++++++++-
 4 files changed, 227 insertions(+), 13 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 8f4351994..aad7057fc 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3454,6 +3454,21 @@ def upsert_examples_multipart(
                                 ),
                             )
                         )
+                    elif isinstance(attachment, str):
+                        file_path = attachment
+                        mime_type = "application/octet-stream"
+                        file_size = os.path.getsize(file_path)
+                        parts.append(
+                            (
+                                f"{example_id}.attachment.{name}",
+                                (
+                                    None,
+                                    open(file_path, "rb"),
+                                    f"{mime_type}; length={file_size}",
+                                    {},
+                                ),
+                            )
+                        )
                     else:
                         parts.append(
                             (
@@ -3645,6 +3660,7 @@ def read_example(
                 "as_of": as_of.isoformat() if as_of else None,
             },
         )
+
         return ls_schemas.Example(
             **response.json(),
             _host_url=self._host_url,
@@ -3664,6 +3680,7 @@ def list_examples(
         limit: Optional[int] = None,
         metadata: Optional[dict] = None,
         filter: Optional[str] = None,
+        include_attachments: bool = False,
         **kwargs: Any,
     ) -> Iterator[ls_schemas.Example]:
         """Retrieve the example rows of the specified dataset.
@@ -3713,11 +3730,26 @@ def list_examples(
             params["dataset"] = dataset_id
         else:
             pass
+        if include_attachments:
+            params["select"] = ["attachment_urls", "outputs", "metadata"]
         for i, example in enumerate(
             self._get_paginated_list("/examples", params=params)
         ):
+            attachment_urls = {}
+            if example["attachment_urls"]:
+                for key, value in example["attachment_urls"].items():
+                    response = requests.get(value["presigned_url"], stream=True)
+                    response.raise_for_status()
+                    reader = io.BytesIO(response.content)
+                    attachment_urls[key.split(".")[1]] = (
+                        value["presigned_url"],
+                        reader,
+                    )
+            del example["attachment_urls"]
+
             yield ls_schemas.Example(
                 **example,
+                attachment_urls=attachment_urls,
                 _host_url=self._host_url,
                 _tenant_id=self._get_optional_tenant_id(),
             )
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 8ed55f6bf..d197de0d3 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -65,7 +65,7 @@
     DataFrame = Any
 logger = logging.getLogger(__name__)
 
-TARGET_T = Callable[[dict], dict]
+TARGET_T = Union[Callable[[dict], dict], Callable[[dict, dict], dict]]
 # Data format: dataset-name, dataset_id, or examples
 DATA_T = Union[str, uuid.UUID, Iterable[schemas.Example], schemas.Dataset]
 # Summary evaluator runs over the whole dataset
@@ -907,7 +907,6 @@ def _evaluate(
         runs,
         client,
     )
-
     manager = _ExperimentManager(
         data,
         client=client,
@@ -918,6 +917,7 @@ def _evaluate(
         # If provided, we don't need to create a new experiment.
         runs=runs,
         # Create or resolve the experiment.
+        include_attachments=_include_attachments(target),
     ).start()
     cache_dir = ls_utils.get_cache_dir(None)
     cache_path = (
@@ -1162,6 +1162,7 @@ def __init__(
         summary_results: Optional[Iterable[EvaluationResults]] = None,
         description: Optional[str] = None,
         num_repetitions: int = 1,
+        include_attachments: bool = False,
     ):
         super().__init__(
             experiment=experiment,
@@ -1175,11 +1176,16 @@ def __init__(
         self._evaluation_results = evaluation_results
         self._summary_results = summary_results
         self._num_repetitions = num_repetitions
+        self._include_attachments = include_attachments
 
     @property
     def examples(self) -> Iterable[schemas.Example]:
         if self._examples is None:
-            self._examples = _resolve_data(self._data, client=self.client)
+            self._examples = _resolve_data(
+                self._data,
+                client=self.client,
+                include_attachments=self._include_attachments,
+            )
             if self._num_repetitions > 1:
                 self._examples = itertools.chain.from_iterable(
                     itertools.tee(self._examples, self._num_repetitions)
@@ -1225,6 +1231,7 @@ def start(self) -> _ExperimentManager:
             client=self.client,
             runs=self._runs,
             evaluation_results=self._evaluation_results,
+            include_attachments=self._include_attachments,
         )
 
     def with_predictions(
@@ -1246,6 +1253,7 @@ def with_predictions(
             client=self.client,
             runs=(pred["run"] for pred in r2),
             # TODO: Can't do multiple prediction rounds rn.
+            include_attachments=self._include_attachments,
         )
 
     def with_evaluators(
@@ -1276,6 +1284,7 @@ def with_evaluators(
             runs=(result["run"] for result in r2),
             evaluation_results=(result["evaluation_results"] for result in r3),
             summary_results=self._summary_results,
+            include_attachments=self._include_attachments,
         )
 
     def with_summary_evaluators(
@@ -1296,6 +1305,7 @@ def with_summary_evaluators(
             runs=self.runs,
             evaluation_results=self._evaluation_results,
             summary_results=aggregate_feedback_gen,
+            include_attachments=self._include_attachments,
         )
 
     def get_results(self) -> Iterable[ExperimentResultRow]:
@@ -1325,14 +1335,23 @@ def get_summary_scores(self) -> Dict[str, List[dict]]:
     # Private methods
 
     def _predict(
-        self, target: TARGET_T, /, max_concurrency: Optional[int] = None
+        self,
+        target: TARGET_T,
+        /,
+        max_concurrency: Optional[int] = None,
     ) -> Generator[_ForwardResults, None, None]:
         """Run the target function on the examples."""
         fn = _ensure_traceable(target)
+
         if max_concurrency == 0:
             for example in self.examples:
                 yield _forward(
-                    fn, example, self.experiment_name, self._metadata, self.client
+                    fn,
+                    example,
+                    self.experiment_name,
+                    self._metadata,
+                    self.client,
+                    include_attachments=self._include_attachments,
                 )
 
         else:
@@ -1345,6 +1364,7 @@ def _predict(
                         self.experiment_name,
                         self._metadata,
                         self.client,
+                        include_attachments=self._include_attachments,
                     )
                     for example in self.examples
                 ]
@@ -1618,6 +1638,7 @@ def _forward(
     experiment_name: str,
     metadata: dict,
     client: langsmith.Client,
+    include_attachments: Optional[bool] = None,
 ) -> _ForwardResults:
     run: Optional[schemas.RunBase] = None
 
@@ -1627,8 +1648,13 @@ def _get_run(r: rt.RunTree) -> None:
 
     with rh.tracing_context(enabled=True):
         try:
+            args = (
+                (example.inputs, example.attachment_urls)
+                if include_attachments
+                else (example.inputs,)
+            )
             fn(
-                example.inputs,
+                *args,
                 langsmith_extra=rh.LangSmithExtra(
                     reference_example_id=example.id,
                     on_end=_get_run,
@@ -1655,15 +1681,26 @@ def _get_run(r: rt.RunTree) -> None:
 
 
 def _resolve_data(
-    data: DATA_T, *, client: langsmith.Client
+    data: DATA_T,
+    *,
+    client: langsmith.Client,
+    include_attachments: bool = False,
 ) -> Iterable[schemas.Example]:
     """Return the examples for the given dataset."""
+    # TODO: Find a smarter way of determining whether
+    # to get attachments (don't just default to true)
     if isinstance(data, str):
-        return client.list_examples(dataset_name=data)
+        return client.list_examples(
+            dataset_name=data, include_attachments=include_attachments
+        )
     elif isinstance(data, uuid.UUID):
-        return client.list_examples(dataset_id=data)
+        return client.list_examples(
+            dataset_id=data, include_attachments=include_attachments
+        )
     elif isinstance(data, schemas.Dataset):
-        return client.list_examples(dataset_id=data.id)
+        return client.list_examples(
+            dataset_id=data.id, include_attachments=include_attachments
+        )
     return data
 
 
@@ -1683,6 +1720,7 @@ def _ensure_traceable(
             "    ...\n"
             ")"
         )
+
     if rh.is_traceable_function(target):
         fn: rh.SupportsLangsmithExtra[[dict], dict] = target
     else:
@@ -1692,6 +1730,38 @@ def _ensure_traceable(
     return fn
 
 
+def _include_attachments(
+    target: Union[TARGET_T, Iterable[schemas.Run], Runnable],
+) -> bool:
+    """Whether the target function accepts attachments."""
+    if _is_langchain_runnable(target) or not callable(target):
+        return False
+    # Check function signature
+    sig = inspect.signature(target)
+    params = list(sig.parameters.values())
+    positional_params = [
+        p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+    ]
+
+    if len(positional_params) == 0:
+        raise ValueError(
+            "Target function must accept at least one positional argument (inputs)"
+        )
+    elif len(positional_params) > 2:
+        raise ValueError(
+            "Target function must accept at most two positional "
+            "arguments (inputs, attachments)"
+        )
+    elif len(positional_params) == 2:
+        if tuple(p.name for p in positional_params) != ("inputs", "attachments"):
+            raise ValueError(
+                "When target function has two positional arguments, they must be named "
+                "'inputs' and 'attachments', respectively."
+            )
+
+    return len(positional_params) == 2
+
+
 def _resolve_experiment(
     experiment: Optional[Union[schemas.TracerSession, str, uuid.UUID]],
     runs: Optional[Iterable[schemas.Run]],
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index f134e1432..1ea0e6b32 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -63,8 +63,22 @@ def my_function(bar: int, my_val: Attachment):
     data: bytes
 
 
-Attachments = Dict[str, Union[Tuple[str, bytes], Attachment]]
-"""Attachments associated with the run. Each entry is a tuple of (mime_type, bytes)."""
+Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, str]]
+"""Attachments associated with the run. 
+Each entry is a tuple of (mime_type, bytes), or a fliepath"""
+
+
+@runtime_checkable
+class BinaryIOLike(Protocol):
+    """Protocol for binary IO-like objects."""
+
+    def read(self, size: int = -1) -> bytes:
+        """Read function."""
+        ...
+
+    def write(self, b: bytes) -> int:
+        """Write function."""
+        ...
 
 
 class ExampleBase(BaseModel):
@@ -74,11 +88,15 @@ class ExampleBase(BaseModel):
     inputs: Dict[str, Any] = Field(default_factory=dict)
     outputs: Optional[Dict[str, Any]] = Field(default=None)
     metadata: Optional[Dict[str, Any]] = Field(default=None)
+    attachment_urls: Optional[Dict[str, Tuple[str, BinaryIOLike]]] = Field(default=None)
+    """Dictionary with attachment names as keys and a tuple of the S3 url
+    and a reader of the data for the file."""
 
     class Config:
         """Configuration class for the schema."""
 
         frozen = True
+        arbitrary_types_allowed = True
 
 
 class ExampleCreate(ExampleBase):
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index d1d16ce0f..cb89b1629 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -20,7 +20,8 @@
 from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
 
 from langsmith.client import ID_TYPE, Client
-from langsmith.schemas import DataType, ExampleUpsertWithAttachments
+from langsmith.evaluation import evaluate
+from langsmith.schemas import DataType, Example, ExampleUpsertWithAttachments, Run
 from langsmith.utils import (
     LangSmithConnectionError,
     LangSmithError,
@@ -1119,3 +1120,96 @@ def create_encoder(*args, **kwargs):
                 myobj["key_1"]
 
         assert not caplog.records
+
+
+@pytest.mark.skip(
+    reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first"
+)
+def test_list_examples_attachments_keys(langchain_client: Client) -> None:
+    """Test list_examples returns same keys with and without attachments."""
+    dataset_name = "__test_list_examples_attachments" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(dataset_name=dataset_name)
+
+    langchain_client.create_example(
+        inputs={"text": "hello world"},
+        outputs={"response": "hi there"},
+        dataset_id=dataset.id,
+        attachments={
+            "test_file": ("text/plain", b"test content"),
+        },
+    )
+
+    # Get examples with attachments
+    with_attachments = next(
+        langchain_client.list_examples(dataset_id=dataset.id, include_attachments=True)
+    )
+
+    # Get examples without attachments
+    without_attachments = next(
+        langchain_client.list_examples(dataset_id=dataset.id, include_attachments=False)
+    )
+
+    with_keys = set(with_attachments.dict().keys())
+    without_keys = set(without_attachments.dict().keys())
+    assert with_keys == without_keys, (
+        f"Keys differ when include_attachments=True vs False.\n"
+        f"Only in with_attachments: {with_keys - without_keys}\n"
+        f"Only in without_attachments: {without_keys - with_keys}"
+    )
+
+    langchain_client.delete_dataset(dataset_id=dataset.id)
+
+
+@pytest.mark.skip(
+    reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first"
+)
+def test_evaluate_with_attachments(langchain_client: Client) -> None:
+    """Test evaluating examples with attachments."""
+    dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]
+    # 1. Create dataset
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for evals with attachments",
+        data_type=DataType.kv,
+    )
+
+    # 2. Create example with attachments
+    example = ExampleUpsertWithAttachments(
+        dataset_id=dataset.id,
+        inputs={"question": "What is shown in the image?"},
+        outputs={"answer": "test image"},
+        attachments={
+            "image": ("image/png", b"fake image data for testing"),
+        },
+    )
+
+    langchain_client.upsert_examples_multipart(upserts=[example])
+
+    # 3. Define target function that uses attachments
+    def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
+        # Verify we receive the attachment data
+        assert "image" in attachments
+        image_url, image_data = attachments["image"]
+        assert image_data.read() == b"fake image data for testing"
+        return {"answer": "test image"}
+
+    # 4. Define simple evaluator
+    def evaluator(run: Run, example: Example) -> Dict[str, Any]:
+        return {
+            "score": float(
+                run.outputs.get("answer") == example.outputs.get("answer")  # type: ignore
+            )
+        }
+
+    # 5. Run evaluation
+    results = evaluate(
+        target, data=dataset_name, evaluators=[evaluator], client=langchain_client
+    )
+
+    # 6. Verify results
+    assert len(results) == 1
+    for result in results:
+        assert result["evaluation_results"]["results"][0].score == 1.0
+
+    # Cleanup
+    langchain_client.delete_dataset(dataset_name=dataset_name)

From ddbe2f5e54d8c3b888dccce9ae931a1973e00c8f Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 13:44:45 -0800
Subject: [PATCH 26/88] file path update

---
 python/langsmith/_internal/_operations.py | 26 ++++++++-----
 python/langsmith/client.py                | 47 ++++++++++++-----------
 python/langsmith/schemas.py               |  5 ++-
 3 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
index e1e99d6e2..80f07d8dd 100644
--- a/python/langsmith/_internal/_operations.py
+++ b/python/langsmith/_internal/_operations.py
@@ -2,6 +2,7 @@
 
 import itertools
 import logging
+from pathlib import Path
 import uuid
 from typing import Literal, Optional, Union, cast
 
@@ -256,18 +257,23 @@ def serialized_run_operation_to_multipart_parts_and_context(
                     " periods ('.'). Please rename the attachment and try again."
                 )
                 continue
-
-            acc_parts.append(
-                (
-                    f"attachment.{op.id}.{n}",
+            
+            if isinstance(valb, Path):
+                #TODO: actually deal with this case
+                # This is just for speed of getting something out
+                continue
+            else:
+                acc_parts.append(
                     (
-                        None,
-                        valb,
-                        content_type,
-                        {"Content-Length": str(len(valb))},
-                    ),
+                        f"attachment.{op.id}.{n}",
+                        (
+                            None,
+                            valb,
+                            content_type,
+                            {"Content-Length": str(len(valb))},
+                        ),
+                    )
                 )
-            )
     return MultipartPartsAndContext(
         acc_parts,
         f"trace={op.trace_id},id={op.id}",
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index aad7057fc..e9832e276 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -25,6 +25,7 @@
 import json
 import logging
 import os
+from pathlib import Path
 import random
 import threading
 import time
@@ -3442,33 +3443,33 @@ def upsert_examples_multipart(
             if example.attachments:
                 for name, attachment in example.attachments.items():
                     if isinstance(attachment, tuple):
-                        mime_type, data = attachment
-                        parts.append(
-                            (
-                                f"{example_id}.attachment.{name}",
+                        if isinstance(attachment[1], Path):
+                            mime_type, file_path = attachment
+                            file_size = os.path.getsize(file_path)
+                            parts.append(
                                 (
-                                    None,
-                                    data,
-                                    f"{mime_type}; length={len(data)}",
-                                    {},
-                                ),
+                                    f"{example_id}.attachment.{name}",
+                                    (
+                                        None,
+                                        open(file_path, "rb"),
+                                        f"{mime_type}; length={file_size}",
+                                        {},
+                                    ),
+                                )
                             )
-                        )
-                    elif isinstance(attachment, str):
-                        file_path = attachment
-                        mime_type = "application/octet-stream"
-                        file_size = os.path.getsize(file_path)
-                        parts.append(
-                            (
-                                f"{example_id}.attachment.{name}",
+                        else:
+                            mime_type, data = attachment
+                            parts.append(
                                 (
-                                    None,
-                                    open(file_path, "rb"),
-                                    f"{mime_type}; length={file_size}",
-                                    {},
-                                ),
+                                    f"{example_id}.attachment.{name}",
+                                    (
+                                        None,
+                                        data,
+                                        f"{mime_type}; length={len(data)}",
+                                        {},
+                                    ),
+                                )
                             )
-                        )
                     else:
                         parts.append(
                             (
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 1ea0e6b32..f4c3df0e9 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -5,6 +5,7 @@
 from datetime import datetime, timedelta, timezone
 from decimal import Decimal
 from enum import Enum
+from pathlib import Path
 from typing import (
     Any,
     Dict,
@@ -63,9 +64,9 @@ def my_function(bar: int, my_val: Attachment):
     data: bytes
 
 
-Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, str]]
+Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, Tuple[str, Path]]]
 """Attachments associated with the run. 
-Each entry is a tuple of (mime_type, bytes), or a fliepath"""
+Each entry is a tuple of (mime_type, bytes), or (mime_type, file_path)"""
 
 
 @runtime_checkable

From c1ba615bd9fb0754bd2da6d64d7810f4472be736 Mon Sep 17 00:00:00 2001
From: Jake Rachleff <jake@langchain.dev>
Date: Tue, 19 Nov 2024 14:17:21 -0800
Subject: [PATCH 27/88] add benchmarks

---
 ...load_example_with_large_file_attachment.py | 113 ++++++++++++++++++
 python/bench/upload_examples_bench.py         |  16 +--
 2 files changed, 121 insertions(+), 8 deletions(-)
 create mode 100644 python/bench/upload_example_with_large_file_attachment.py

diff --git a/python/bench/upload_example_with_large_file_attachment.py b/python/bench/upload_example_with_large_file_attachment.py
new file mode 100644
index 000000000..31b36b2e5
--- /dev/null
+++ b/python/bench/upload_example_with_large_file_attachment.py
@@ -0,0 +1,113 @@
+import statistics
+import time
+from pathlib import Path
+from typing import Dict
+from uuid import uuid4
+
+
+from langsmith import Client
+from langsmith.schemas import DataType, ExampleUpsertWithAttachments
+
+import os
+
+WRITE_BATCH = 10000
+
+def create_large_file(size: int, dir: str) -> str:
+    """Create a large file for benchmarking purposes."""
+    filename = f"large_file_{size}.txt"
+    filepath = os.path.join(dir, filename)
+
+    # delete the file if it exists
+    print("Deleting existing file...")
+    if os.path.exists(filepath):
+        os.remove(filepath)
+
+    print("Creating big file...")
+    with open(filepath, "w") as f:
+        curr_size = 0
+        while curr_size < size:
+            f.write("a" * (size - curr_size))
+            curr_size += size - curr_size
+
+    print("Done creating big file...")
+    return filepath
+
+DATASET_NAME = "upsert_big_file_to_dataset"
+
+def benchmark_big_file_upload(size_bytes: int, num_examples: int, samples: int = 1) -> Dict:
+    """
+    Benchmark run creation with specified parameters.
+    Returns timing statistics.
+    """
+    multipart_timings = []
+
+    for _ in range(samples):
+        client = Client()
+
+        if client.has_dataset(dataset_name=DATASET_NAME):
+            client.delete_dataset(dataset_name=DATASET_NAME)
+
+        dataset = client.create_dataset(
+            DATASET_NAME,
+            description="Test dataset for big file upload",
+        )
+        large_file = create_large_file(size_bytes, "/tmp")
+        examples = [
+            ExampleUpsertWithAttachments(
+                dataset_id=dataset.id,
+                inputs={"a": 1},
+                outputs={"b": 2},
+                attachments={
+                    "bigfile": ("text/plain", Path(large_file)),
+                },
+            ) for _ in range(num_examples)
+        ]
+
+        multipart_start = time.perf_counter()
+        client.upsert_examples_multipart(upserts=examples)
+        multipart_elapsed = time.perf_counter() - multipart_start
+
+        multipart_timings.append(multipart_elapsed)
+
+    return {
+        "mean": statistics.mean(multipart_timings),
+        "median": statistics.median(multipart_timings),
+        "stdev": (
+            statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0
+        ),
+        "min": min(multipart_timings),
+        "max": max(multipart_timings),
+    }
+
+
+size_bytes = 50000000
+num_examples = 10
+
+def main(size_bytes: int, num_examples: int = 1):
+    """
+    Run benchmarks with different combinations of parameters and report results.
+    """
+    results = benchmark_big_file_upload(size_bytes, num_examples)
+
+    print(
+        f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:"
+    )
+    print("-" * 30)
+    print(f"{'Metric':<15} {'Result':>20}")
+    print("-" * 30)
+
+    metrics = ["mean", "median", "stdev", "min", "max"]
+    for metric in metrics:
+        print(
+            f"{results[metric]:>20.4f}"
+        )
+
+    print("-" * 30)
+    print(
+        f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} "
+    )
+    print("(examples/second)")
+
+
+if __name__ == "__main__":
+    main(size_bytes, num_examples)
diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py
index 3fc79beb4..5a22a731b 100644
--- a/python/bench/upload_examples_bench.py
+++ b/python/bench/upload_examples_bench.py
@@ -4,7 +4,7 @@
 from uuid import uuid4
 
 from langsmith import Client
-from langsmith.schemas import DataType, ExampleCreateWithAttachments
+from langsmith.schemas import DataType, ExampleUpsertWithAttachments
 
 
 def create_large_json(length: int) -> Dict:
@@ -32,7 +32,7 @@ def create_large_json(length: int) -> Dict:
 
 def create_example_data(dataset_id: str, json_size: int) -> Dict:
     """Create a single example data object."""
-    return ExampleCreateWithAttachments(
+    return ExampleUpsertWithAttachments(
         **{
             "dataset_id": dataset_id,
             "inputs": create_large_json(json_size),
@@ -54,7 +54,7 @@ def benchmark_example_uploading(
     multipart_timings, old_timings = [], []
 
     for _ in range(samples):
-        client = Client(api_url="https://dev.api.smith.langchain.com")
+        client = Client()
 
         if client.has_dataset(dataset_name=DATASET_NAME):
             client.delete_dataset(dataset_name=DATASET_NAME)
@@ -70,11 +70,11 @@ def benchmark_example_uploading(
 
         # Old method
         old_start = time.perf_counter()
-        inputs = [e.inputs for e in examples]
-        outputs = [e.outputs for e in examples]
-        # the create_examples endpoint fails above 20mb
-        # so this will crash with json_size > ~100
-        client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
+        # inputs = [e.inputs for e in examples]
+        # outputs = [e.outputs for e in examples]
+        # # the create_examples endpoint fails above 20mb
+        # # so this will crash with json_size > ~100
+        # client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
         old_elapsed = time.perf_counter() - old_start
 
         # New method

From 354417182ae86c1c88762173916b333f8cac0473 Mon Sep 17 00:00:00 2001
From: Jake Rachleff <jake@langchain.dev>
Date: Tue, 19 Nov 2024 14:52:54 -0800
Subject: [PATCH 28/88] better error message

---
 python/langsmith/evaluation/_runner.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index d197de0d3..bb038f51a 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1753,10 +1753,17 @@ def _include_attachments(
             "arguments (inputs, attachments)"
         )
     elif len(positional_params) == 2:
-        if tuple(p.name for p in positional_params) != ("inputs", "attachments"):
+        mismatches = []
+        for i, (p, expected) in enumerate(zip(positional_params, ("inputs", "attachments"))):
+            if p.name != expected:
+                mismatches.append((i, p.name))
+
+        if mismatches:
             raise ValueError(
                 "When target function has two positional arguments, they must be named "
-                "'inputs' and 'attachments', respectively."
+                "'inputs' and 'attachments', respectively. Received: " + ",".join(
+                    f"'{p}' at index {i}" for i, p in mismatches
+                )
             )
 
     return len(positional_params) == 2

From 3cc32c57695c2f7a2d13c8c1bba971af7863b522 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 14:55:20 -0800
Subject: [PATCH 29/88] aevaluate

---
 python/langsmith/evaluation/_arunner.py | 42 +++++++++++++++++++++----
 python/langsmith/evaluation/_runner.py  |  2 +-
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index a2c3b2705..2ff023e3e 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -40,6 +40,7 @@
     _ExperimentManagerMixin,
     _extract_feedback_keys,
     _ForwardResults,
+    _include_attachments,
     _is_langchain_runnable,
     _load_examples_map,
     _load_experiment,
@@ -66,7 +67,9 @@
 
 logger = logging.getLogger(__name__)
 
-ATARGET_T = Callable[[dict], Awaitable[dict]]
+ATARGET_T = Union[
+    Callable[[dict], Awaitable[dict]], Callable[[dict, dict], Awaitable[dict]]
+]
 
 
 async def aevaluate(
@@ -401,6 +404,7 @@ async def _aevaluate(
         description=description,
         num_repetitions=num_repetitions,
         runs=runs,
+        include_attachments=_include_attachments(target),
     ).astart()
     cache_dir = ls_utils.get_cache_dir(None)
     if cache_dir is not None:
@@ -461,6 +465,7 @@ def __init__(
         summary_results: Optional[AsyncIterable[EvaluationResults]] = None,
         description: Optional[str] = None,
         num_repetitions: int = 1,
+        include_attachments: bool = False,
     ):
         super().__init__(
             experiment=experiment,
@@ -476,10 +481,15 @@ def __init__(
         self._evaluation_results = evaluation_results
         self._summary_results = summary_results
         self._num_repetitions = num_repetitions
+        self._include_attachments = include_attachments
 
     async def aget_examples(self) -> AsyncIterator[schemas.Example]:
         if self._examples is None:
-            self._examples = _aresolve_data(self._data, client=self.client)
+            self._examples = _aresolve_data(
+                self._data,
+                client=self.client,
+                include_attachments=self._include_attachments,
+            )
             if self._num_repetitions > 1:
                 self._examples = async_chain_from_iterable(
                     aitertools.atee(self._examples, self._num_repetitions)
@@ -545,6 +555,7 @@ async def astart(self) -> _AsyncExperimentManager:
             client=self.client,
             runs=self._runs,
             evaluation_results=self._evaluation_results,
+            include_attachments=self._include_attachments,
         )
 
     async def awith_predictions(
@@ -561,6 +572,7 @@ async def awith_predictions(
             metadata=self._metadata,
             client=self.client,
             runs=(pred["run"] async for pred in r2),
+            include_attachments=self._include_attachments,
         )
 
     async def awith_evaluators(
@@ -580,6 +592,7 @@ async def awith_evaluators(
             runs=(result["run"] async for result in r2),
             evaluation_results=(result["evaluation_results"] async for result in r3),
             summary_results=self._summary_results,
+            include_attachments=self._include_attachments,
         )
 
     async def awith_summary_evaluators(
@@ -596,6 +609,7 @@ async def awith_summary_evaluators(
             runs=self.aget_runs(),
             evaluation_results=self._evaluation_results,
             summary_results=aggregate_feedback_gen,
+            include_attachments=self._include_attachments,
         )
 
     async def aget_results(self) -> AsyncIterator[ExperimentResultRow]:
@@ -630,7 +644,12 @@ async def predict_all():
             async for example in await self.aget_examples():
                 # Yield the coroutine to be awaited later
                 yield _aforward(
-                    fn, example, self.experiment_name, self._metadata, self.client
+                    fn,
+                    example,
+                    self.experiment_name,
+                    self._metadata,
+                    self.client,
+                    include_attachments=self._include_attachments,
                 )
 
         async for result in aitertools.aiter_with_concurrency(
@@ -904,6 +923,7 @@ async def _aforward(
     experiment_name: str,
     metadata: dict,
     client: langsmith.Client,
+    include_attachments: bool = False,
 ) -> _ForwardResults:
     run: Optional[schemas.RunBase] = None
 
@@ -913,8 +933,13 @@ def _get_run(r: run_trees.RunTree) -> None:
 
     with rh.tracing_context(enabled=True):
         try:
+            args = (
+                (example.inputs, example.attachment_urls)
+                if include_attachments
+                else (example.inputs,)
+            )
             await fn(
-                example.inputs,
+                *args,
                 langsmith_extra=rh.LangSmithExtra(
                     reference_example_id=example.id,
                     on_end=_get_run,
@@ -971,12 +996,17 @@ def _ensure_async_traceable(
 
 
 def _aresolve_data(
-    data: Union[DATA_T, AsyncIterable[schemas.Example]], *, client: langsmith.Client
+    data: Union[DATA_T, AsyncIterable[schemas.Example]],
+    *,
+    client: langsmith.Client,
+    include_attachments: bool = False,
 ) -> AsyncIterator[schemas.Example]:
     """Return the examples for the given dataset."""
     if isinstance(data, AsyncIterable):
         return aitertools.ensure_async_iterator(data)
-    return aitertools.ensure_async_iterator(_resolve_data(data, client=client))
+    return aitertools.ensure_async_iterator(
+        _resolve_data(data, client=client, include_attachments=include_attachments)
+    )
 
 
 T = TypeVar("T")
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index d197de0d3..c2603b440 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1638,7 +1638,7 @@ def _forward(
     experiment_name: str,
     metadata: dict,
     client: langsmith.Client,
-    include_attachments: Optional[bool] = None,
+    include_attachments: bool = False,
 ) -> _ForwardResults:
     run: Optional[schemas.RunBase] = None
 

From 08a6f34aca8ff78745ea925a0699e20569d4f96c Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 17:15:41 -0800
Subject: [PATCH 30/88] unit test for _include_attachments

---
 .../unit_tests/evaluation/test_runner.py      | 69 ++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index 408d4508d..5c06292ea 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -5,6 +5,7 @@
 import itertools
 import json
 import random
+import re
 import sys
 import time
 import uuid
@@ -20,7 +21,7 @@
 from langsmith import schemas as ls_schemas
 from langsmith.client import Client
 from langsmith.evaluation._arunner import aevaluate, aevaluate_existing
-from langsmith.evaluation._runner import evaluate_existing
+from langsmith.evaluation._runner import evaluate_existing, _include_attachments
 from langsmith.evaluation.evaluator import _normalize_evaluator_func
 
 
@@ -566,3 +567,69 @@ async def atarget(x):
             await aevaluate(
                 atarget, data=ds_examples, evaluators=[eval_], client=client
             )
+
+
+@pytest.mark.parametrize(
+    "target,expected,error_msg",
+    [
+        # Valid cases
+        (lambda inputs: None, False, None),
+        (lambda inputs, attachments: None, True, None),
+        
+        # Invalid parameter names
+        (
+            lambda x, y: None, 
+            None, 
+            "When target function has two positional arguments, they must be named "
+            "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' at index 1"
+        ),
+        (
+            lambda input, attachment: None, 
+            None,
+            "When target function has two positional arguments, they must be named "
+            "'inputs' and 'attachments', respectively. Received: 'input' at index 0,"
+            "'attachment' at index 1"
+        ),
+        
+        # Too many parameters
+        (
+            lambda inputs, attachments, extra: None,
+            None,
+            re.escape("Target function must accept at most two positional arguments (inputs, attachments)")
+        ),
+        
+        # No positional parameters
+        (
+            lambda *, foo="bar": None,
+            None,
+            re.escape("Target function must accept at least one positional argument (inputs)")
+        ),
+        
+        # Mixed positional and keyword
+        (lambda inputs, *, optional=None: None, False, None),
+        (lambda inputs, attachments, *, optional=None: None, True, None),
+        
+        # Non-callable
+        ("not_a_function", False, None),
+    ],
+)
+def test_include_attachments(target, expected, error_msg):
+    """Test the _include_attachments function with various input cases."""
+    try:
+        from langchain_core.runnables import RunnableLambda
+    except ImportError:
+        if target == "runnable":
+            pytest.skip("langchain-core not installed")
+            return
+    
+    if target == "runnable":
+        target = RunnableLambda(lambda x: x)
+        expected = False
+        error_msg = None
+
+    if error_msg is not None:
+        with pytest.raises(ValueError, match=error_msg):
+            _include_attachments(target)
+    else:
+        result = _include_attachments(target)
+        assert result == expected
\ No newline at end of file

From 8e2e7042e8ad320ba80bf6df8705bca4380fe2e3 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 18:29:30 -0800
Subject: [PATCH 31/88] test that adding examples without attachments still
 lets you run evals

---
 python/tests/integration_tests/test_client.py | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index cb89b1629..bb5651997 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1213,3 +1213,51 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
 
     # Cleanup
     langchain_client.delete_dataset(dataset_name=dataset_name)
+
+
+def test_evaluate_with_no_attachments(langchain_client: Client) -> None:
+    """Test evaluating examples without attachments using a target that accepts attachments."""
+    dataset_name = "__test_evaluate_no_attachments" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for evals without attachments",
+        data_type=DataType.kv,
+    )
+
+    # Create example using old way, attachments should be set to {}
+    langchain_client.create_example(
+        dataset_id=dataset.id,
+        inputs={"question": "What is 2+2?"},
+        outputs={"answer": "4"},
+    )
+
+    # Verify we can create example the new way without attachments
+    example = ExampleUpsertWithAttachments(
+        dataset_id=dataset.id,
+        inputs={"question": "What is 3+1?"},
+        outputs={"answer": "4"},
+    )
+    langchain_client.upsert_examples_multipart(upserts=[example])
+
+    def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
+        # Verify we receive an empty attachments dict
+        assert isinstance(attachments, dict)
+        assert len(attachments) == 0
+        return {"answer": "4"}
+
+    def evaluator(run: Run, example: Example) -> Dict[str, Any]:
+        return {
+            "score": float(
+                run.outputs.get("answer") == example.outputs.get("answer")  # type: ignore
+            )
+        }
+
+    results = evaluate(
+        target, data=dataset_name, evaluators=[evaluator], client=langchain_client
+    )
+
+    assert len(results) == 2
+    for result in results:
+        assert result["evaluation_results"]["results"][0].score == 1.0
+
+    langchain_client.delete_dataset(dataset_name=dataset_name)
\ No newline at end of file

From cfa0e4c734738646e4f052d7e55e52323bd86ec5 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 18:47:03 -0800
Subject: [PATCH 32/88] fmt

---
 ...load_example_with_large_file_attachment.py | 30 +++++++++---------
 python/langsmith/_internal/_operations.py     |  6 ++--
 python/langsmith/client.py                    |  2 +-
 python/langsmith/evaluation/_runner.py        |  9 +++---
 python/tests/integration_tests/test_client.py |  4 +--
 .../unit_tests/evaluation/test_runner.py      | 31 ++++++++++---------
 6 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/python/bench/upload_example_with_large_file_attachment.py b/python/bench/upload_example_with_large_file_attachment.py
index 31b36b2e5..8aaedd696 100644
--- a/python/bench/upload_example_with_large_file_attachment.py
+++ b/python/bench/upload_example_with_large_file_attachment.py
@@ -1,17 +1,15 @@
+import os
 import statistics
 import time
 from pathlib import Path
 from typing import Dict
-from uuid import uuid4
-
 
 from langsmith import Client
-from langsmith.schemas import DataType, ExampleUpsertWithAttachments
-
-import os
+from langsmith.schemas import ExampleUpsertWithAttachments
 
 WRITE_BATCH = 10000
 
+
 def create_large_file(size: int, dir: str) -> str:
     """Create a large file for benchmarking purposes."""
     filename = f"large_file_{size}.txt"
@@ -32,9 +30,13 @@ def create_large_file(size: int, dir: str) -> str:
     print("Done creating big file...")
     return filepath
 
+
 DATASET_NAME = "upsert_big_file_to_dataset"
 
-def benchmark_big_file_upload(size_bytes: int, num_examples: int, samples: int = 1) -> Dict:
+
+def benchmark_big_file_upload(
+    size_bytes: int, num_examples: int, samples: int = 1
+) -> Dict:
     """
     Benchmark run creation with specified parameters.
     Returns timing statistics.
@@ -60,7 +62,8 @@ def benchmark_big_file_upload(size_bytes: int, num_examples: int, samples: int =
                 attachments={
                     "bigfile": ("text/plain", Path(large_file)),
                 },
-            ) for _ in range(num_examples)
+            )
+            for _ in range(num_examples)
         ]
 
         multipart_start = time.perf_counter()
@@ -83,29 +86,24 @@ def benchmark_big_file_upload(size_bytes: int, num_examples: int, samples: int =
 size_bytes = 50000000
 num_examples = 10
 
+
 def main(size_bytes: int, num_examples: int = 1):
     """
     Run benchmarks with different combinations of parameters and report results.
     """
     results = benchmark_big_file_upload(size_bytes, num_examples)
 
-    print(
-        f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:"
-    )
+    print(f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:")
     print("-" * 30)
     print(f"{'Metric':<15} {'Result':>20}")
     print("-" * 30)
 
     metrics = ["mean", "median", "stdev", "min", "max"]
     for metric in metrics:
-        print(
-            f"{results[metric]:>20.4f}"
-        )
+        print(f"{results[metric]:>20.4f}")
 
     print("-" * 30)
-    print(
-        f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} "
-    )
+    print(f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} ")
     print("(examples/second)")
 
 
diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
index 80f07d8dd..430c690b0 100644
--- a/python/langsmith/_internal/_operations.py
+++ b/python/langsmith/_internal/_operations.py
@@ -2,8 +2,8 @@
 
 import itertools
 import logging
-from pathlib import Path
 import uuid
+from pathlib import Path
 from typing import Literal, Optional, Union, cast
 
 import orjson
@@ -257,9 +257,9 @@ def serialized_run_operation_to_multipart_parts_and_context(
                     " periods ('.'). Please rename the attachment and try again."
                 )
                 continue
-            
+
             if isinstance(valb, Path):
-                #TODO: actually deal with this case
+                # TODO: actually deal with this case
                 # This is just for speed of getting something out
                 continue
             else:
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index e9832e276..e649849ba 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -25,7 +25,6 @@
 import json
 import logging
 import os
-from pathlib import Path
 import random
 import threading
 import time
@@ -35,6 +34,7 @@
 import warnings
 import weakref
 from inspect import signature
+from pathlib import Path
 from queue import PriorityQueue
 from typing import (
     TYPE_CHECKING,
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 6393dfbfe..a860c137b 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1754,16 +1754,17 @@ def _include_attachments(
         )
     elif len(positional_params) == 2:
         mismatches = []
-        for i, (p, expected) in enumerate(zip(positional_params, ("inputs", "attachments"))):
+        for i, (p, expected) in enumerate(
+            zip(positional_params, ("inputs", "attachments"))
+        ):
             if p.name != expected:
                 mismatches.append((i, p.name))
 
         if mismatches:
             raise ValueError(
                 "When target function has two positional arguments, they must be named "
-                "'inputs' and 'attachments', respectively. Received: " + ",".join(
-                    f"'{p}' at index {i}" for i, p in mismatches
-                )
+                "'inputs' and 'attachments', respectively. Received: "
+                + ",".join(f"'{p}' at index {i}" for i, p in mismatches)
             )
 
     return len(positional_params) == 2
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index bb5651997..cfd848e01 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1216,7 +1216,7 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
 
 
 def test_evaluate_with_no_attachments(langchain_client: Client) -> None:
-    """Test evaluating examples without attachments using a target that accepts attachments."""
+    """Test evaluating examples without attachments using a target with attachments."""
     dataset_name = "__test_evaluate_no_attachments" + uuid4().hex[:4]
     dataset = langchain_client.create_dataset(
         dataset_name,
@@ -1260,4 +1260,4 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
     for result in results:
         assert result["evaluation_results"]["results"][0].score == 1.0
 
-    langchain_client.delete_dataset(dataset_name=dataset_name)
\ No newline at end of file
+    langchain_client.delete_dataset(dataset_name=dataset_name)
diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index 5c06292ea..ffbcf2901 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -21,7 +21,7 @@
 from langsmith import schemas as ls_schemas
 from langsmith.client import Client
 from langsmith.evaluation._arunner import aevaluate, aevaluate_existing
-from langsmith.evaluation._runner import evaluate_existing, _include_attachments
+from langsmith.evaluation._runner import _include_attachments, evaluate_existing
 from langsmith.evaluation.evaluator import _normalize_evaluator_func
 
 
@@ -575,40 +575,41 @@ async def atarget(x):
         # Valid cases
         (lambda inputs: None, False, None),
         (lambda inputs, attachments: None, True, None),
-        
         # Invalid parameter names
         (
-            lambda x, y: None, 
-            None, 
+            lambda x, y: None,
+            None,
             "When target function has two positional arguments, they must be named "
-            "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' at index 1"
+            "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' "
+            "at index 1",
         ),
         (
-            lambda input, attachment: None, 
+            lambda input, attachment: None,
             None,
             "When target function has two positional arguments, they must be named "
             "'inputs' and 'attachments', respectively. Received: 'input' at index 0,"
-            "'attachment' at index 1"
+            "'attachment' at index 1",
         ),
-        
         # Too many parameters
         (
             lambda inputs, attachments, extra: None,
             None,
-            re.escape("Target function must accept at most two positional arguments (inputs, attachments)")
+            re.escape(
+                "Target function must accept at most two positional arguments "
+                "(inputs, attachments)"
+            ),
         ),
-        
         # No positional parameters
         (
             lambda *, foo="bar": None,
             None,
-            re.escape("Target function must accept at least one positional argument (inputs)")
+            re.escape(
+                "Target function must accept at least one positional argument (inputs)"
+            ),
         ),
-        
         # Mixed positional and keyword
         (lambda inputs, *, optional=None: None, False, None),
         (lambda inputs, attachments, *, optional=None: None, True, None),
-        
         # Non-callable
         ("not_a_function", False, None),
     ],
@@ -621,7 +622,7 @@ def test_include_attachments(target, expected, error_msg):
         if target == "runnable":
             pytest.skip("langchain-core not installed")
             return
-    
+
     if target == "runnable":
         target = RunnableLambda(lambda x: x)
         expected = False
@@ -632,4 +633,4 @@ def test_include_attachments(target, expected, error_msg):
             _include_attachments(target)
     else:
         result = _include_attachments(target)
-        assert result == expected
\ No newline at end of file
+        assert result == expected

From de38a37ffc244bce357b8d1f4d9e726970db7814 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 18:57:42 -0800
Subject: [PATCH 33/88] fmt

---
 python/langsmith/_internal/_operations.py | 2 +-
 python/langsmith/client.py                | 2 +-
 python/langsmith/evaluation/_arunner.py   | 4 ++--
 python/langsmith/evaluation/_runner.py    | 6 +++++-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
index 430c690b0..5ba4ff90e 100644
--- a/python/langsmith/_internal/_operations.py
+++ b/python/langsmith/_internal/_operations.py
@@ -215,7 +215,7 @@ def serialized_run_operation_to_multipart_parts_and_context(
     op: SerializedRunOperation,
 ) -> MultipartPartsAndContext:
     acc_parts: list[MultipartPart] = []
-
+    valb: Union[bytes, Path]
     # this is main object, minus inputs/outputs/events/attachments
     acc_parts.append(
         (
diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index e649849ba..ce5731cdb 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3451,7 +3451,7 @@ def upsert_examples_multipart(
                                     f"{example_id}.attachment.{name}",
                                     (
                                         None,
-                                        open(file_path, "rb"),
+                                        open(file_path, "rb"),  # type: ignore[arg-type]
                                         f"{mime_type}; length={file_size}",
                                         {},
                                     ),
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 2ff023e3e..2d77edbd4 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -991,8 +991,8 @@ def _ensure_async_traceable(
         return target  # type: ignore
     else:
         if _is_langchain_runnable(target):
-            target = target.ainvoke  # type: ignore[attr-defined]
-        return rh.traceable(name="AsyncTarget")(target)
+            target = target.ainvoke  # type: ignore[union-attr]
+        return rh.traceable(name="AsyncTarget")(target)  # type: ignore[arg-type]
 
 
 def _aresolve_data(
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index a860c137b..fefec7aa2 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -20,6 +20,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    AsyncIterable,
     Awaitable,
     Callable,
     DefaultDict,
@@ -44,6 +45,7 @@
 from langsmith import run_trees as rt
 from langsmith import schemas
 from langsmith import utils as ls_utils
+from langsmith.evaluation._arunner import ATARGET_T
 from langsmith.evaluation.evaluator import (
     ComparisonEvaluationResult,
     DynamicComparisonRunEvaluator,
@@ -1731,7 +1733,9 @@ def _ensure_traceable(
 
 
 def _include_attachments(
-    target: Union[TARGET_T, Iterable[schemas.Run], Runnable],
+    target: Union[
+        ATARGET_T, TARGET_T, Iterable[schemas.Run], AsyncIterable[dict], Runnable
+    ],
 ) -> bool:
     """Whether the target function accepts attachments."""
     if _is_langchain_runnable(target) or not callable(target):

From 2e747356eac1c309cf89dcc0705f0f6fe58846ff Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 19:03:54 -0800
Subject: [PATCH 34/88] fmt

---
 python/langsmith/evaluation/_arunner.py | 42 ++++++++++++++++++++++++-
 python/langsmith/evaluation/_runner.py  |  6 +---
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 2d77edbd4..a7cc2033c 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -5,6 +5,7 @@
 import asyncio
 import concurrent.futures as cf
 import datetime
+import inspect
 import logging
 import pathlib
 import uuid
@@ -40,7 +41,6 @@
     _ExperimentManagerMixin,
     _extract_feedback_keys,
     _ForwardResults,
-    _include_attachments,
     _is_langchain_runnable,
     _load_examples_map,
     _load_experiment,
@@ -965,6 +965,46 @@ def _get_run(r: run_trees.RunTree) -> None:
         )
 
 
+def _include_attachments(
+    target: Union[ATARGET_T, Iterable[schemas.Run], AsyncIterable[dict]],
+) -> bool:
+    """Whether the target function accepts attachments."""
+    if _is_langchain_runnable(target) or not callable(target):
+        return False
+    # Check function signature
+    sig = inspect.signature(target)
+    params = list(sig.parameters.values())
+    positional_params = [
+        p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+    ]
+
+    if len(positional_params) == 0:
+        raise ValueError(
+            "Target function must accept at least one positional argument (inputs)"
+        )
+    elif len(positional_params) > 2:
+        raise ValueError(
+            "Target function must accept at most two positional "
+            "arguments (inputs, attachments)"
+        )
+    elif len(positional_params) == 2:
+        mismatches = []
+        for i, (p, expected) in enumerate(
+            zip(positional_params, ("inputs", "attachments"))
+        ):
+            if p.name != expected:
+                mismatches.append((i, p.name))
+
+        if mismatches:
+            raise ValueError(
+                "When target function has two positional arguments, they must be named "
+                "'inputs' and 'attachments', respectively. Received: "
+                + ",".join(f"'{p}' at index {i}" for i, p in mismatches)
+            )
+
+    return len(positional_params) == 2
+
+
 def _ensure_async_traceable(
     target: ATARGET_T,
 ) -> rh.SupportsLangsmithExtra[[dict], Awaitable]:
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index fefec7aa2..a860c137b 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -20,7 +20,6 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    AsyncIterable,
     Awaitable,
     Callable,
     DefaultDict,
@@ -45,7 +44,6 @@
 from langsmith import run_trees as rt
 from langsmith import schemas
 from langsmith import utils as ls_utils
-from langsmith.evaluation._arunner import ATARGET_T
 from langsmith.evaluation.evaluator import (
     ComparisonEvaluationResult,
     DynamicComparisonRunEvaluator,
@@ -1733,9 +1731,7 @@ def _ensure_traceable(
 
 
 def _include_attachments(
-    target: Union[
-        ATARGET_T, TARGET_T, Iterable[schemas.Run], AsyncIterable[dict], Runnable
-    ],
+    target: Union[TARGET_T, Iterable[schemas.Run], Runnable],
 ) -> bool:
     """Whether the target function accepts attachments."""
     if _is_langchain_runnable(target) or not callable(target):

From f26c996ca33c6e4e9f623728cf45219130fa2ba2 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 19:06:08 -0800
Subject: [PATCH 35/88] attempt fix

---
 python/langsmith/client.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index ce5731cdb..ce51a09da 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3662,8 +3662,22 @@ def read_example(
             },
         )
 
+        example = response.json()
+        attachment_urls = {}
+        if example["attachment_urls"]:
+            for key, value in example["attachment_urls"].items():
+                response = requests.get(value["presigned_url"], stream=True)
+                response.raise_for_status()
+                reader = io.BytesIO(response.content)
+                attachment_urls[key.split(".")[1]] = (
+                    value["presigned_url"],
+                    reader,
+                )
+        del example["attachment_urls"]
+
         return ls_schemas.Example(
-            **response.json(),
+            **example,
+            attachment_urls=attachment_urls,
             _host_url=self._host_url,
             _tenant_id=self._get_optional_tenant_id(),
         )

From 095aae936aaa35387e2701a9ef2f9119dfdbb08e Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 19:24:54 -0800
Subject: [PATCH 36/88] fix test

---
 python/langsmith/evaluation/_runner.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index a860c137b..6c916f47a 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1740,7 +1740,10 @@ def _include_attachments(
     sig = inspect.signature(target)
     params = list(sig.parameters.values())
     positional_params = [
-        p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+        p
+        for p in params
+        if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+        and p.default is p.empty
     ]
 
     if len(positional_params) == 0:

From a99da233cd79e8112c51a8f28853f30badc76e35 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 19 Nov 2024 19:29:12 -0800
Subject: [PATCH 37/88] add unit test

---
 python/tests/unit_tests/evaluation/test_runner.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index ffbcf2901..6fb5a1739 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -16,6 +16,7 @@
 from unittest.mock import MagicMock
 
 import pytest
+from langchain_core.runnables import chain as as_runnable
 
 from langsmith import evaluate
 from langsmith import schemas as ls_schemas
@@ -569,6 +570,16 @@ async def atarget(x):
             )
 
 
+@as_runnable
+def nested_predict(inputs):
+    return {"output": "Yes"}
+
+
+@as_runnable
+def lc_predict(inputs):
+    return nested_predict.invoke(inputs)
+
+
 @pytest.mark.parametrize(
     "target,expected,error_msg",
     [
@@ -612,6 +623,8 @@ async def atarget(x):
         (lambda inputs, attachments, *, optional=None: None, True, None),
         # Non-callable
         ("not_a_function", False, None),
+        # Runnable
+        (lc_predict.invoke, False, None),
     ],
 )
 def test_include_attachments(target, expected, error_msg):

From b9dd0f28eb71724a840823d121d4fd01a07fe8d5 Mon Sep 17 00:00:00 2001
From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com>
Date: Wed, 20 Nov 2024 06:57:16 -0800
Subject: [PATCH 38/88] Bump version (rc)

---
 python/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index 191d61b22..fa5fed80b 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langsmith"
-version = "0.1.144rc1"
+version = "0.1.144rc3"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 authors = ["LangChain <support@langchain.dev>"]
 license = "MIT"

From 01ef4d01a775a24107062835bd98101da9f3fab5 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Wed, 27 Nov 2024 15:22:02 -0800
Subject: [PATCH 39/88] repetitions

---
 python/langsmith/client.py                    |  4 ++--
 python/langsmith/evaluation/_runner.py        |  4 ++++
 python/tests/integration_tests/test_client.py | 12 +++++++-----
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index f1213d939..d77a79d8e 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3381,11 +3381,11 @@ def upsert_examples_multipart(
         upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [],
     ) -> ls_schemas.UpsertExamplesResponse:
         """Upsert examples."""
-        """ if not (self.info.instance_flags or {}).get(
+        if not (self.info.instance_flags or {}).get(
                 "examples_multipart_enabled", False
             ):
             raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.")
-         """
+        
         parts: List[MultipartPart] = []
 
         for example in upserts:
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 6c916f47a..756375d9b 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1670,6 +1670,10 @@ def _get_run(r: rt.RunTree) -> None:
                     client=client,
                 ),
             )
+            if include_attachments:
+                for attachment in example.attachment_urls:
+                    _, reader = example.attachment_urls[attachment]
+                    reader.seek(0)
         except Exception as e:
             logger.error(
                 f"Error running target function: {e}", exc_info=True, stacklevel=1
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index cfd848e01..02510fccf 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1160,12 +1160,14 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
     langchain_client.delete_dataset(dataset_id=dataset.id)
 
 
-@pytest.mark.skip(
-    reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first"
-)
+
 def test_evaluate_with_attachments(langchain_client: Client) -> None:
     """Test evaluating examples with attachments."""
     dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]
+    langchain_client = Client(
+        api_key="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",
+        api_url="https://dev.api.smith.langchain.com"
+    )
     # 1. Create dataset
     dataset = langchain_client.create_dataset(
         dataset_name,
@@ -1203,11 +1205,11 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
 
     # 5. Run evaluation
     results = evaluate(
-        target, data=dataset_name, evaluators=[evaluator], client=langchain_client
+        target, data=dataset_name, evaluators=[evaluator], client=langchain_client, num_repetitions=2
     )
 
     # 6. Verify results
-    assert len(results) == 1
+    assert len(results) == 2
     for result in results:
         assert result["evaluation_results"]["results"][0].score == 1.0
 

From 3715c3070743e59caff2d38269f254c9516e40df Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Wed, 27 Nov 2024 15:22:35 -0800
Subject: [PATCH 40/88] nit

---
 python/tests/integration_tests/test_client.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 02510fccf..4ba418e1f 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1160,7 +1160,9 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
     langchain_client.delete_dataset(dataset_id=dataset.id)
 
 
-
+@pytest.mark.skip(
+    reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first"
+)
 def test_evaluate_with_attachments(langchain_client: Client) -> None:
     """Test evaluating examples with attachments."""
     dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]

From 49442d7aa4957d100e584894601aeb8f04f9bf50 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 2 Dec 2024 10:24:37 -0800
Subject: [PATCH 41/88] added upload endpoint

---
 python/langsmith/client.py                    |  71 ++++++++--
 python/langsmith/schemas.py                   |   6 +
 python/tests/integration_tests/test_client.py | 121 +++++++++++++++++-
 3 files changed, 183 insertions(+), 15 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index d77a79d8e..20bab0933 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3375,27 +3375,24 @@ def create_example_from_run(
             created_at=created_at,
         )
 
-    def upsert_examples_multipart(
+    def _prepate_multipart_data(
         self,
-        *,
-        upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [],
-    ) -> ls_schemas.UpsertExamplesResponse:
-        """Upsert examples."""
-        if not (self.info.instance_flags or {}).get(
-                "examples_multipart_enabled", False
-            ):
-            raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.")
-        
+        examples: List[
+            ls_schemas.ExampleUploadWithAttachments
+            | ls_schemas.ExampleUpsertWithAttachments
+        ],
+        include_dataset_id: bool = False,
+    ) -> List[MultipartPart]:
         parts: List[MultipartPart] = []
 
-        for example in upserts:
+        for example in examples:
             if example.id is not None:
                 example_id = str(example.id)
             else:
                 example_id = str(uuid.uuid4())
 
             example_body = {
-                "dataset_id": example.dataset_id,
+                **({"dataset_id": example.dataset_id} if include_dataset_id else {}),
                 "created_at": example.created_at,
             }
             if example.metadata is not None:
@@ -3493,6 +3490,56 @@ def upsert_examples_multipart(
         else:
             data = encoder
 
+        return encoder, data
+
+    def upload_examples_multipart(
+        self,
+        *,
+        uploads: List[ls_schemas.ExampleUploadWithAttachments] = [],
+    ) -> ls_schemas.UpsertExamplesResponse:
+        """Upload examples."""
+        if not (self.info.instance_flags or {}).get(
+            "examples_multipart_enabled", False
+        ):
+            raise ValueError(
+                "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."
+            )
+
+        encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False)
+        dataset_ids = set([example.dataset_id for example in uploads])
+        if len(dataset_ids) > 1:
+            raise ValueError("All examples must be in the same dataset.")
+        dataset_id = list(dataset_ids)[0]
+
+        response = self.request_with_retries(
+            "POST",
+            f"/v1/platform/datasets/{dataset_id}/examples",
+            request_kwargs={
+                "data": data,
+                "headers": {
+                    **self._headers,
+                    "Content-Type": encoder.content_type,
+                },
+            },
+        )
+        ls_utils.raise_for_status_with_text(response)
+        return response.json()
+
+    def upsert_examples_multipart(
+        self,
+        *,
+        upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [],
+    ) -> ls_schemas.UpsertExamplesResponse:
+        """Upsert examples."""
+        if not (self.info.instance_flags or {}).get(
+            "examples_multipart_enabled", False
+        ):
+            raise ValueError(
+                "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."
+            )
+
+        encoder, data = self._prepate_multipart_data(upserts, include_dataset_id=True)
+
         response = self.request_with_retries(
             "POST",
             "/v1/platform/examples/multipart",
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index f4c3df0e9..533cc8e67 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -114,6 +114,12 @@ class ExampleUpsertWithAttachments(ExampleCreate):
     attachments: Optional[Attachments] = None
 
 
+class ExampleUploadWithAttachments(ExampleUpsertWithAttachments):
+    """Example upload with attachments."""
+
+    pass
+
+
 class Example(ExampleBase):
     """Example model."""
 
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 4ba418e1f..277fb6faf 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -21,7 +21,13 @@
 
 from langsmith.client import ID_TYPE, Client
 from langsmith.evaluation import evaluate
-from langsmith.schemas import DataType, Example, ExampleUpsertWithAttachments, Run
+from langsmith.schemas import (
+    DataType,
+    Example,
+    ExampleUploadWithAttachments,
+    ExampleUpsertWithAttachments,
+    Run,
+)
 from langsmith.utils import (
     LangSmithConnectionError,
     LangSmithError,
@@ -371,6 +377,111 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
         client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm")
 
 
+def test_upload_examples_multipart(langchain_client: Client):
+    """Test uploading examples with attachments via multipart endpoint."""
+    dataset_name = "__test_upload_examples_multipart" + uuid4().hex[:4]
+    if langchain_client.has_dataset(dataset_name=dataset_name):
+        langchain_client.delete_dataset(dataset_name=dataset_name)
+
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for multipart example upload",
+        data_type=DataType.kv,
+    )
+
+    # Test example with all fields
+    example_id = uuid4()
+    example_1 = ExampleUploadWithAttachments(
+        id=example_id,
+        dataset_id=dataset.id,
+        inputs={"text": "hello world"},
+        attachments={
+            "test_file": ("text/plain", b"test content"),
+        },
+    )
+
+    # Test example with minimum required fields
+    example_2 = ExampleUploadWithAttachments(
+        dataset_id=dataset.id,
+        inputs={"text": "minimal example"},
+    )
+
+    # Test example with outputs and multiple attachments
+    example_3 = ExampleUploadWithAttachments(
+        dataset_id=dataset.id,
+        inputs={"text": "example with outputs"},
+        outputs={"response": "test response"},
+        attachments={
+            "file1": ("text/plain", b"content 1"),
+            "file2": ("text/plain", b"content 2"),
+        },
+    )
+
+    # Test uploading multiple examples at once
+    created_examples = langchain_client.upload_examples_multipart(
+        uploads=[example_1, example_2, example_3]
+    )
+    assert created_examples["count"] == 3
+
+    created_example_1 = langchain_client.read_example(example_id)
+    assert created_example_1.inputs["text"] == "hello world"
+
+    # Verify the examples were created correctly
+    examples = [
+        ex
+        for ex in langchain_client.list_examples(
+            dataset_id=dataset.id,
+            include_attachments=True,
+        )
+    ]
+    assert len(examples) == 3
+
+    # Verify example with ID was created with correct ID
+    example_with_id = [ex for ex in examples if ex.id == example_id][0]
+    assert example_with_id.inputs["text"] == "hello world"
+    assert "test_file" in example_with_id.attachment_urls
+
+    # Verify example with outputs and multiple attachments
+    example_with_outputs = next(
+        ex
+        for ex in examples
+        if ex.outputs and ex.outputs.get("response") == "test response"
+    )
+    assert len(example_with_outputs.attachment_urls) == 2
+    assert "file1" in example_with_outputs.attachment_urls
+    assert "file2" in example_with_outputs.attachment_urls
+
+    # Test uploading to non-existent dataset fails
+    fake_id = uuid4()
+    with pytest.raises(LangSmithNotFoundError):
+        langchain_client.upload_examples_multipart(
+            uploads=[
+                ExampleUploadWithAttachments(
+                    dataset_id=fake_id,
+                    inputs={"text": "should fail"},
+                )
+            ]
+        )
+
+    # Test uploading examples to different datasets fails
+    with pytest.raises(ValueError, match="All examples must be in the same dataset"):
+        langchain_client.upload_examples_multipart(
+            uploads=[
+                ExampleUploadWithAttachments(
+                    dataset_id=dataset.id,
+                    inputs={"text": "example 1"},
+                ),
+                ExampleUploadWithAttachments(
+                    dataset_id=uuid4(),
+                    inputs={"text": "example 2"},
+                ),
+            ]
+        )
+
+    # Clean up
+    langchain_client.delete_dataset(dataset_name=dataset_name)
+
+
 def test_upsert_examples_multipart(langchain_client: Client) -> None:
     """Test upserting examples with attachments via multipart endpoint."""
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
@@ -1168,7 +1279,7 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None:
     dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]
     langchain_client = Client(
         api_key="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",
-        api_url="https://dev.api.smith.langchain.com"
+        api_url="https://dev.api.smith.langchain.com",
     )
     # 1. Create dataset
     dataset = langchain_client.create_dataset(
@@ -1207,7 +1318,11 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
 
     # 5. Run evaluation
     results = evaluate(
-        target, data=dataset_name, evaluators=[evaluator], client=langchain_client, num_repetitions=2
+        target,
+        data=dataset_name,
+        evaluators=[evaluator],
+        client=langchain_client,
+        num_repetitions=2,
     )
 
     # 6. Verify results

From 484f2a5c1800497c251dc1a39a29a73e1c89712c Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Fri, 6 Dec 2024 13:01:58 -0800
Subject: [PATCH 42/88] comments

---
 python/langsmith/client.py                    |  7 ++-
 python/langsmith/evaluation/_arunner.py       |  5 +-
 python/langsmith/evaluation/_runner.py        |  2 +-
 .../unit_tests/evaluation/test_runner.py      | 51 +++++++++++++++----
 4 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 450427f4e..77b6aeff2 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3603,7 +3603,12 @@ def upsert_examples_multipart(
         *,
         upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [],
     ) -> ls_schemas.UpsertExamplesResponse:
-        """Upsert examples."""
+        """Upsert examples.
+
+        .. deprecated:: 0.1.0
+           This method is deprecated. Use :func:`langsmith.upload_examples_multipart` instead.
+
+        """  # noqa: E501
         if not (self.info.instance_flags or {}).get(
             "examples_multipart_enabled", False
         ):
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index ca04a57e0..1a77c75ff 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -1056,7 +1056,10 @@ def _include_attachments(
     sig = inspect.signature(target)
     params = list(sig.parameters.values())
     positional_params = [
-        p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+        p
+        for p in params
+        if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+        and p.default is p.empty
     ]
 
     if len(positional_params) == 0:
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index fa8b289a3..d62a8935d 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1801,8 +1801,8 @@ def _forward(
     experiment_name: str,
     metadata: dict,
     client: langsmith.Client,
-    include_attachments: bool = False,
     upload_results: bool,
+    include_attachments: bool = False,
 ) -> _ForwardResults:
     run: Optional[schemas.RunBase] = None
 
diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index d52614f91..e376bfd39 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -21,7 +21,13 @@
 from langsmith import evaluate
 from langsmith import schemas as ls_schemas
 from langsmith.client import Client
-from langsmith.evaluation._arunner import aevaluate, aevaluate_existing
+from langsmith.evaluation._arunner import (
+    _include_attachments as a_include_attachments,
+)
+from langsmith.evaluation._arunner import (
+    aevaluate,
+    aevaluate_existing,
+)
 from langsmith.evaluation._runner import _include_attachments, evaluate_existing
 from langsmith.evaluation.evaluator import (
     _normalize_comparison_evaluator_func,
@@ -689,12 +695,26 @@ def lc_predict(inputs):
     return nested_predict.invoke(inputs)
 
 
+async def async_just_inputs(inputs):
+    return None
+
+
+async def async_just_inputs_with_attachments(inputs, attachments):
+    return None
+
+
+async def async_extra_args(inputs, attachments, foo="bar"):
+    return None
+
+
 @pytest.mark.parametrize(
-    "target,expected,error_msg",
+    "target,expected,error_msg,is_async",
     [
         # Valid cases
-        (lambda inputs: None, False, None),
-        (lambda inputs, attachments: None, True, None),
+        (lambda inputs: None, False, None, False),
+        (lambda inputs, attachments: None, True, None, False),
+        (async_just_inputs, False, None, True),
+        (async_just_inputs_with_attachments, True, None, True),
         # Invalid parameter names
         (
             lambda x, y: None,
@@ -702,6 +722,7 @@ def lc_predict(inputs):
             "When target function has two positional arguments, they must be named "
             "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' "
             "at index 1",
+            False,
         ),
         (
             lambda input, attachment: None,
@@ -709,6 +730,7 @@ def lc_predict(inputs):
             "When target function has two positional arguments, they must be named "
             "'inputs' and 'attachments', respectively. Received: 'input' at index 0,"
             "'attachment' at index 1",
+            False,
         ),
         # Too many parameters
         (
@@ -718,6 +740,7 @@ def lc_predict(inputs):
                 "Target function must accept at most two positional arguments "
                 "(inputs, attachments)"
             ),
+            False,
         ),
         # No positional parameters
         (
@@ -726,17 +749,21 @@ def lc_predict(inputs):
             re.escape(
                 "Target function must accept at least one positional argument (inputs)"
             ),
+            False,
         ),
         # Mixed positional and keyword
-        (lambda inputs, *, optional=None: None, False, None),
-        (lambda inputs, attachments, *, optional=None: None, True, None),
+        (lambda inputs, *, optional=None: None, False, None, False),
+        (lambda inputs, attachments, *, optional=None: None, True, None, False),
         # Non-callable
-        ("not_a_function", False, None),
+        ("not_a_function", False, None, False),
         # Runnable
-        (lc_predict.invoke, False, None),
+        (lc_predict.invoke, False, None, False),
+        # Positional args with defaults
+        (lambda inputs, attachments, foo="bar": None, True, None, False),
+        (async_extra_args, True, None, True),
     ],
 )
-def test_include_attachments(target, expected, error_msg):
+def test_include_attachments(target, expected, error_msg, is_async):
     """Test the _include_attachments function with various input cases."""
     try:
         from langchain_core.runnables import RunnableLambda
@@ -750,13 +777,15 @@ def test_include_attachments(target, expected, error_msg):
         expected = False
         error_msg = None
 
+    func = _include_attachments if not is_async else a_include_attachments
     if error_msg is not None:
         with pytest.raises(ValueError, match=error_msg):
-            _include_attachments(target)
+            func(target)
     else:
-        result = _include_attachments(target)
+        result = func(target)
         assert result == expected
 
+
 def summary_eval_runs_examples(runs_, examples_):
     return {"score": len(runs_[0].dotted_order)}
 

From 28fe5d1d2aa285af9de231ab1b6de2c03f5517de Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Fri, 6 Dec 2024 13:15:06 -0800
Subject: [PATCH 43/88] fmt

---
 python/tests/integration_tests/test_client.py     | 2 +-
 python/tests/unit_tests/evaluation/test_runner.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 8cf8a6158..b717dbc58 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1381,7 +1381,7 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
 
     langchain_client.delete_dataset(dataset_name=dataset_name)
 
-    
+
 def test_examples_length_validation(langchain_client: Client) -> None:
     """Test that mismatched lengths raise ValueError for create and update examples."""
     dataset_name = "__test_examples_length_validation" + uuid4().hex[:4]
diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index 30021d5c6..a1e2d79de 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -18,7 +18,7 @@
 import pytest
 from langchain_core.runnables import chain as as_runnable
 
-from langsmith import Client, aevaluate, evaluate, evaluate_existing, aevaluate_existing
+from langsmith import Client, aevaluate, evaluate
 from langsmith import schemas as ls_schemas
 from langsmith.evaluation._arunner import (
     _include_attachments as a_include_attachments,

From 1e5eebfa0ba3f16d0fa6de36c2990c28cda6826b Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Fri, 6 Dec 2024 13:39:56 -0800
Subject: [PATCH 44/88] fmt

---
 python/langsmith/client.py              | 12 +++++-------
 python/langsmith/evaluation/_arunner.py |  2 +-
 python/langsmith/evaluation/_runner.py  |  2 +-
 python/langsmith/schemas.py             |  4 ++++
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 9e63e73a5..4e32188e6 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -1684,9 +1684,7 @@ def update_run(
         events: Optional[Sequence[dict]] = None,
         extra: Optional[Dict] = None,
         tags: Optional[List[str]] = None,
-        attachments: Optional[
-            Dict[str, tuple[str, bytes] | ls_schemas.Attachment]
-        ] = None,
+        attachments: Optional[ls_schemas.Attachments] = None,
         **kwargs: Any,
     ) -> None:
         """Update a run in the LangSmith API.
@@ -3464,12 +3462,12 @@ def create_example_from_run(
 
     def _prepate_multipart_data(
         self,
-        examples: List[
-            ls_schemas.ExampleUploadWithAttachments
-            | ls_schemas.ExampleUpsertWithAttachments
+        examples: Union[
+            List[ls_schemas.ExampleUploadWithAttachments]
+            | List[ls_schemas.ExampleUpsertWithAttachments]
         ],
         include_dataset_id: bool = False,
-    ) -> List[MultipartPart]:
+    ) -> Tuple[Any, bytes]:
         parts: List[MultipartPart] = []
 
         for example in examples:
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index af9714e6c..9412bf5f3 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -1055,7 +1055,7 @@ def _get_run(r: run_trees.RunTree) -> None:
 
 
 def _include_attachments(
-    target: Union[ATARGET_T, Iterable[schemas.Run], AsyncIterable[dict]],
+    target: Union[ATARGET_T, Iterable[schemas.Run], AsyncIterable[dict], Runnable],
 ) -> bool:
     """Whether the target function accepts attachments."""
     if _is_langchain_runnable(target) or not callable(target):
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 9ed6b954a..764225596 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1841,7 +1841,7 @@ def _get_run(r: rt.RunTree) -> None:
                 *args,
                 langsmith_extra=langsmith_extra,
             )
-            if include_attachments:
+            if include_attachments and example.attachment_urls is not None:
                 for attachment in example.attachment_urls:
                     _, reader = example.attachment_urls[attachment]
                     reader.seek(0)
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index cc2accec9..34de09aaa 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -81,6 +81,10 @@ def write(self, b: bytes) -> int:
         """Write function."""
         ...
 
+    def seek(self, offset: int, whence: int = 0) -> int:
+        """Seek function."""
+        ...
+
 
 class ExampleBase(BaseModel):
     """Example base model."""

From e013d72da72c6f99feb024e288cdcd96f6e24929 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Fri, 6 Dec 2024 13:59:56 -0800
Subject: [PATCH 45/88] fmt

---
 python/tests/integration_tests/test_client.py | 1 +
 python/tests/unit_tests/test_client.py        | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index b717dbc58..1c1a5a3eb 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -484,6 +484,7 @@ def test_upload_examples_multipart(langchain_client: Client):
 
 def test_upsert_examples_multipart(langchain_client: Client) -> None:
     """Test upserting examples with attachments via multipart endpoint."""
+    langchain_client._info = {"instance_flags": {"examples_multipart_enabled": True}}
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py
index 98e52d7b4..939aa9ad2 100644
--- a/python/tests/unit_tests/test_client.py
+++ b/python/tests/unit_tests/test_client.py
@@ -426,7 +426,11 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None:
     mock_session.request.return_value = mock_response
     mock_session_cls.return_value = mock_session
 
-    client = Client(api_url="http://localhost:1984", api_key="123")
+    client = Client(
+        api_url="http://localhost:1984",
+        api_key="123",
+        info={"instance_flags": {"examples_multipart_enabled": True}},
+    )
 
     # Create test data
     example_id = uuid.uuid4()
@@ -451,7 +455,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None:
     client.upsert_examples_multipart(upserts=[example])
 
     # Verify the request
-    assert mock_session.request.call_count == 2  # we always make a call to /info
+    assert mock_session.request.call_count == 1
     call_args = mock_session.request.call_args
 
     assert call_args[0][0] == "POST"

From bc2d4b647cd85503ab4d6819ec3f9e59dae05a39 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Fri, 6 Dec 2024 14:22:59 -0800
Subject: [PATCH 46/88] fmt

---
 python/langsmith/evaluation/_runner.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 764225596..ebd259e14 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1053,6 +1053,7 @@ def _evaluate(
         runs,
         client,
     )
+
     manager = _ExperimentManager(
         data,
         client=client,
@@ -1507,8 +1508,8 @@ def _predict(
                     self.experiment_name,
                     self._metadata,
                     self.client,
-                    self._include_attachments,
                     self._upload_results,
+                    self._include_attachments,
                 )
 
         else:
@@ -1521,8 +1522,8 @@ def _predict(
                         self.experiment_name,
                         self._metadata,
                         self.client,
-                        self._include_attachments,
                         self._upload_results,
+                        self._include_attachments,
                     )
                     for example in self.examples
                 ]

From 96f4246db88ebd73d4697e18b876e5478d720eab Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Fri, 6 Dec 2024 14:35:49 -0800
Subject: [PATCH 47/88] fix test

---
 python/tests/integration_tests/test_client.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 1c1a5a3eb..4d2478680 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -515,7 +515,6 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
             "my_file": ("text/plain", b"more test content"),
         },
     )
-
     created_examples = langchain_client.upsert_examples_multipart(
         upserts=[example_1, example_2]
     )
@@ -551,12 +550,7 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
     updated_examples = langchain_client.upsert_examples_multipart(
         upserts=[example_1_update]
     )
-    assert updated_examples["count"] == 1
-    assert updated_examples["example_ids"][0] == str(example_id)
-    updated_example = langchain_client.read_example(updated_examples["example_ids"][0])
-    assert updated_example.inputs["text"] == "bar baz"
-    assert updated_example.outputs["response"] == "foo"
-
+    assert updated_examples["count"] == 0
     # Test that adding invalid example fails
     # even if valid examples are added alongside
     example_3 = ExampleUpsertWithAttachments(
@@ -579,7 +573,6 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
     # Throw type errors when not passing ExampleUpsertWithAttachments
     with pytest.raises(AttributeError):
         langchain_client.upsert_examples_multipart(upserts=[{"foo": "bar"}])
-
     langchain_client.delete_dataset(dataset_name=dataset_name)
 
 

From 887782e931eaaabff96918970d0ab4b94ff3501f Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Fri, 6 Dec 2024 14:38:31 -0800
Subject: [PATCH 48/88] x

---
 python/tests/integration_tests/test_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 4d2478680..bc3f6f33f 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -483,7 +483,7 @@ def test_upload_examples_multipart(langchain_client: Client):
 
 
 def test_upsert_examples_multipart(langchain_client: Client) -> None:
-    """Test upserting examples with attachments via multipart endpoint."""
+    """Test upserting examples with attachments via the multipart endpoint."""
     langchain_client._info = {"instance_flags": {"examples_multipart_enabled": True}}
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
     if langchain_client.has_dataset(dataset_name=dataset_name):

From 66228e8af4047d27a878edd7c51c98245c2cc629 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 08:03:44 -0800
Subject: [PATCH 49/88] defaults

---
 python/langsmith/client.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 4e32188e6..9fe393125 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3580,7 +3580,7 @@ def _prepate_multipart_data(
     def upload_examples_multipart(
         self,
         *,
-        uploads: List[ls_schemas.ExampleUploadWithAttachments] = [],
+        uploads: List[ls_schemas.ExampleUploadWithAttachments] = None,
     ) -> ls_schemas.UpsertExamplesResponse:
         """Upload examples."""
         if not (self.info.instance_flags or {}).get(
@@ -3589,7 +3589,8 @@ def upload_examples_multipart(
             raise ValueError(
                 "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."
             )
-
+        if uploads is None:
+            uploads = []
         encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False)
         dataset_ids = set([example.dataset_id for example in uploads])
         if len(dataset_ids) > 1:
@@ -3613,7 +3614,7 @@ def upload_examples_multipart(
     def upsert_examples_multipart(
         self,
         *,
-        upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [],
+        upserts: List[ls_schemas.ExampleUpsertWithAttachments] = None,
     ) -> ls_schemas.UpsertExamplesResponse:
         """Upsert examples.
 
@@ -3627,6 +3628,8 @@ def upsert_examples_multipart(
             raise ValueError(
                 "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."
             )
+        if upserts is None:
+            upserts = []
 
         encoder, data = self._prepate_multipart_data(upserts, include_dataset_id=True)
 

From a5ee5990f47903a81d252f996d923758c02252f4 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 08:46:18 -0800
Subject: [PATCH 50/88] refactor

---
 python/langsmith/client.py                    | 29 ++++++------
 python/langsmith/evaluation/_arunner.py       |  6 ++-
 python/langsmith/evaluation/_runner.py        |  8 ++--
 python/langsmith/schemas.py                   | 30 +++++++++----
 python/tests/integration_tests/test_client.py | 44 +++++--------------
 5 files changed, 56 insertions(+), 61 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 9fe393125..778911808 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3580,6 +3580,7 @@ def _prepate_multipart_data(
     def upload_examples_multipart(
         self,
         *,
+        dataset_id: ID_TYPE,
         uploads: List[ls_schemas.ExampleUploadWithAttachments] = None,
     ) -> ls_schemas.UpsertExamplesResponse:
         """Upload examples."""
@@ -3592,10 +3593,6 @@ def upload_examples_multipart(
         if uploads is None:
             uploads = []
         encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False)
-        dataset_ids = set([example.dataset_id for example in uploads])
-        if len(dataset_ids) > 1:
-            raise ValueError("All examples must be in the same dataset.")
-        dataset_id = list(dataset_ids)[0]
 
         response = self.request_with_retries(
             "POST",
@@ -3823,21 +3820,21 @@ def read_example(
         )
 
         example = response.json()
-        attachment_urls = {}
+        attachments_info = {}
         if example["attachment_urls"]:
             for key, value in example["attachment_urls"].items():
                 response = requests.get(value["presigned_url"], stream=True)
                 response.raise_for_status()
                 reader = io.BytesIO(response.content)
-                attachment_urls[key.split(".")[1]] = (
-                    value["presigned_url"],
-                    reader,
-                )
+                attachments_info[key.split(".")[1]] = {
+                    "presigned_url": value["presigned_url"],
+                    "reader": reader,
+                }
         del example["attachment_urls"]
 
         return ls_schemas.Example(
             **example,
-            attachment_urls=attachment_urls,
+            attachments_info=attachments_info,
             _host_url=self._host_url,
             _tenant_id=self._get_optional_tenant_id(),
         )
@@ -3910,21 +3907,21 @@ def list_examples(
         for i, example in enumerate(
             self._get_paginated_list("/examples", params=params)
         ):
-            attachment_urls = {}
+            attachments_info = {}
             if example["attachment_urls"]:
                 for key, value in example["attachment_urls"].items():
                     response = requests.get(value["presigned_url"], stream=True)
                     response.raise_for_status()
                     reader = io.BytesIO(response.content)
-                    attachment_urls[key.split(".")[1]] = (
-                        value["presigned_url"],
-                        reader,
-                    )
+                    attachments_info[key.split(".")[1]] = {
+                        "presigned_url": value["presigned_url"],
+                        "reader": reader,
+                    }
             del example["attachment_urls"]
 
             yield ls_schemas.Example(
                 **example,
-                attachment_urls=attachment_urls,
+                attachments_info=attachments_info,
                 _host_url=self._host_url,
                 _tenant_id=self._get_optional_tenant_id(),
             )
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 9412bf5f3..5a22ba305 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -1023,7 +1023,7 @@ def _get_run(r: run_trees.RunTree) -> None:
     with rh.tracing_context(enabled=True):
         try:
             args = (
-                (example.inputs, example.attachment_urls)
+                (example.inputs, example.attachments_info)
                 if include_attachments
                 else (example.inputs,)
             )
@@ -1044,6 +1044,10 @@ def _get_run(r: run_trees.RunTree) -> None:
                     client=client,
                 ),
             )
+            if include_attachments and example.attachments_info is not None:
+                for attachment in example.attachments_info:
+                    reader = example.attachments_info[attachment]["reader"]
+                    reader.seek(0)
         except Exception as e:
             logger.error(
                 f"Error running target function: {e}", exc_info=True, stacklevel=1
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index ebd259e14..fbb096484 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1834,7 +1834,7 @@ def _get_run(r: rt.RunTree) -> None:
         )
         try:
             args = (
-                (example.inputs, example.attachment_urls)
+                (example.inputs, example.attachments_info)
                 if include_attachments
                 else (example.inputs,)
             )
@@ -1842,9 +1842,9 @@ def _get_run(r: rt.RunTree) -> None:
                 *args,
                 langsmith_extra=langsmith_extra,
             )
-            if include_attachments and example.attachment_urls is not None:
-                for attachment in example.attachment_urls:
-                    _, reader = example.attachment_urls[attachment]
+            if include_attachments and example.attachments_info is not None:
+                for attachment in example.attachments_info:
+                    reader = example.attachments_info[attachment]["reader"]
                     reader.seek(0)
         except Exception as e:
             logger.error(
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 34de09aaa..b06552dcd 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -93,9 +93,6 @@ class ExampleBase(BaseModel):
     inputs: Dict[str, Any] = Field(default_factory=dict)
     outputs: Optional[Dict[str, Any]] = Field(default=None)
     metadata: Optional[Dict[str, Any]] = Field(default=None)
-    attachment_urls: Optional[Dict[str, Tuple[str, BinaryIOLike]]] = Field(default=None)
-    """Dictionary with attachment names as keys and a tuple of the S3 url
-    and a reader of the data for the file."""
 
     class Config:
         """Configuration class for the schema."""
@@ -112,16 +109,30 @@ class ExampleCreate(ExampleBase):
     split: Optional[Union[str, List[str]]] = None
 
 
-class ExampleUpsertWithAttachments(ExampleCreate):
-    """Example create with attachments."""
+class ExampleUploadWithAttachments(BaseModel):
+    """Example upload with attachments."""
 
+    id: Optional[UUID]
+    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    inputs: Dict[str, Any] = Field(default_factory=dict)
+    outputs: Optional[Dict[str, Any]] = Field(default=None)
+    metadata: Optional[Dict[str, Any]] = Field(default=None)
+    split: Optional[Union[str, List[str]]] = None
     attachments: Optional[Attachments] = None
 
 
-class ExampleUploadWithAttachments(ExampleUpsertWithAttachments):
-    """Example upload with attachments."""
+class ExampleUpsertWithAttachments(ExampleUploadWithAttachments):
+    """Example create with attachments."""
+
+    dataset_id: UUID
 
-    pass
+
+class AttachmentInfo(TypedDict):
+    """Info for an attachment."""
+
+    presigned_url: str
+    reader: BinaryIOLike
+    # TODO: add mime type
 
 
 class Example(ExampleBase):
@@ -135,6 +146,9 @@ class Example(ExampleBase):
     modified_at: Optional[datetime] = Field(default=None)
     runs: List[Run] = Field(default_factory=list)
     source_run_id: Optional[UUID] = None
+    attachments_info: Optional[Dict[str, AttachmentInfo]] = Field(default=None)
+    """Dictionary with attachment names as keys and a tuple of the S3 url
+    and a reader of the data for the file."""
     _host_url: Optional[str] = PrivateAttr(default=None)
     _tenant_id: Optional[UUID] = PrivateAttr(default=None)
 
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index bc3f6f33f..e175efc9e 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -393,7 +393,6 @@ def test_upload_examples_multipart(langchain_client: Client):
     example_id = uuid4()
     example_1 = ExampleUploadWithAttachments(
         id=example_id,
-        dataset_id=dataset.id,
         inputs={"text": "hello world"},
         attachments={
             "test_file": ("text/plain", b"test content"),
@@ -402,13 +401,11 @@ def test_upload_examples_multipart(langchain_client: Client):
 
     # Test example with minimum required fields
     example_2 = ExampleUploadWithAttachments(
-        dataset_id=dataset.id,
         inputs={"text": "minimal example"},
     )
 
     # Test example with outputs and multiple attachments
     example_3 = ExampleUploadWithAttachments(
-        dataset_id=dataset.id,
         inputs={"text": "example with outputs"},
         outputs={"response": "test response"},
         attachments={
@@ -419,7 +416,7 @@ def test_upload_examples_multipart(langchain_client: Client):
 
     # Test uploading multiple examples at once
     created_examples = langchain_client.upload_examples_multipart(
-        uploads=[example_1, example_2, example_3]
+        dataset_id=dataset.id, uploads=[example_1, example_2, example_3]
     )
     assert created_examples["count"] == 3
 
@@ -439,7 +436,7 @@ def test_upload_examples_multipart(langchain_client: Client):
     # Verify example with ID was created with correct ID
     example_with_id = [ex for ex in examples if ex.id == example_id][0]
     assert example_with_id.inputs["text"] == "hello world"
-    assert "test_file" in example_with_id.attachment_urls
+    assert "test_file" in example_with_id.attachments_info
 
     # Verify example with outputs and multiple attachments
     example_with_outputs = next(
@@ -447,35 +444,20 @@ def test_upload_examples_multipart(langchain_client: Client):
         for ex in examples
         if ex.outputs and ex.outputs.get("response") == "test response"
     )
-    assert len(example_with_outputs.attachment_urls) == 2
-    assert "file1" in example_with_outputs.attachment_urls
-    assert "file2" in example_with_outputs.attachment_urls
+    assert len(example_with_outputs.attachments_info) == 2
+    assert "file1" in example_with_outputs.attachments_info
+    assert "file2" in example_with_outputs.attachments_info
 
     # Test uploading to non-existent dataset fails
     fake_id = uuid4()
     with pytest.raises(LangSmithNotFoundError):
         langchain_client.upload_examples_multipart(
+            dataset_id=fake_id,
             uploads=[
                 ExampleUploadWithAttachments(
-                    dataset_id=fake_id,
                     inputs={"text": "should fail"},
                 )
-            ]
-        )
-
-    # Test uploading examples to different datasets fails
-    with pytest.raises(ValueError, match="All examples must be in the same dataset"):
-        langchain_client.upload_examples_multipart(
-            uploads=[
-                ExampleUploadWithAttachments(
-                    dataset_id=dataset.id,
-                    inputs={"text": "example 1"},
-                ),
-                ExampleUploadWithAttachments(
-                    dataset_id=uuid4(),
-                    inputs={"text": "example 2"},
-                ),
-            ]
+            ],
         )
 
     # Clean up
@@ -1283,8 +1265,7 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None:
     )
 
     # 2. Create example with attachments
-    example = ExampleUpsertWithAttachments(
-        dataset_id=dataset.id,
+    example = ExampleUploadWithAttachments(
         inputs={"question": "What is shown in the image?"},
         outputs={"answer": "test image"},
         attachments={
@@ -1292,13 +1273,13 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None:
         },
     )
 
-    langchain_client.upsert_examples_multipart(upserts=[example])
+    langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
 
     # 3. Define target function that uses attachments
     def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
         # Verify we receive the attachment data
         assert "image" in attachments
-        image_url, image_data = attachments["image"]
+        image_data = attachments["image"]["reader"]
         assert image_data.read() == b"fake image data for testing"
         return {"answer": "test image"}
 
@@ -1345,12 +1326,11 @@ def test_evaluate_with_no_attachments(langchain_client: Client) -> None:
     )
 
     # Verify we can create example the new way without attachments
-    example = ExampleUpsertWithAttachments(
-        dataset_id=dataset.id,
+    example = ExampleUploadWithAttachments(
         inputs={"question": "What is 3+1?"},
         outputs={"answer": "4"},
     )
-    langchain_client.upsert_examples_multipart(upserts=[example])
+    langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
 
     def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
         # Verify we receive an empty attachments dict

From 2f1e6be60774db3e69ae29b91130005a88011214 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 08:54:04 -0800
Subject: [PATCH 51/88] fmt

---
 python/langsmith/client.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 778911808..c51f31d4a 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3469,6 +3469,13 @@ def _prepate_multipart_data(
         include_dataset_id: bool = False,
     ) -> Tuple[Any, bytes]:
         parts: List[MultipartPart] = []
+        if include_dataset_id:
+            if not isinstance(examples[0], ls_schemas.ExampleUpsertWithAttachments):
+                raise ValueError(
+                    "The examples must be of type ExampleUpsertWithAttachments"
+                    " if include_dataset_id is True"
+                )
+            dataset_id = examples[0].dataset_id
 
         for example in examples:
             if example.id is not None:
@@ -3477,7 +3484,7 @@ def _prepate_multipart_data(
                 example_id = str(uuid.uuid4())
 
             example_body = {
-                **({"dataset_id": example.dataset_id} if include_dataset_id else {}),
+                **({"dataset_id": dataset_id} if include_dataset_id else {}),
                 "created_at": example.created_at,
             }
             if example.metadata is not None:
@@ -3581,7 +3588,7 @@ def upload_examples_multipart(
         self,
         *,
         dataset_id: ID_TYPE,
-        uploads: List[ls_schemas.ExampleUploadWithAttachments] = None,
+        uploads: Optional[List[ls_schemas.ExampleUploadWithAttachments]] = None,
     ) -> ls_schemas.UpsertExamplesResponse:
         """Upload examples."""
         if not (self.info.instance_flags or {}).get(
@@ -3611,7 +3618,7 @@ def upload_examples_multipart(
     def upsert_examples_multipart(
         self,
         *,
-        upserts: List[ls_schemas.ExampleUpsertWithAttachments] = None,
+        upserts: Optional[List[ls_schemas.ExampleUpsertWithAttachments]] = None,
     ) -> ls_schemas.UpsertExamplesResponse:
         """Upsert examples.
 

From c9ade2e10689f0754e5b59634cafb5426ef838f9 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 09:25:56 -0800
Subject: [PATCH 52/88] fmt

---
 python/langsmith/client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index c51f31d4a..73190c4b6 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3828,7 +3828,7 @@ def read_example(
 
         example = response.json()
         attachments_info = {}
-        if example["attachment_urls"]:
+        if "attachment_urls" in example and example["attachment_urls"]:
             for key, value in example["attachment_urls"].items():
                 response = requests.get(value["presigned_url"], stream=True)
                 response.raise_for_status()
@@ -3915,7 +3915,7 @@ def list_examples(
             self._get_paginated_list("/examples", params=params)
         ):
             attachments_info = {}
-            if example["attachment_urls"]:
+            if "attachment_urls" in example and example["attachment_urls"]:
                 for key, value in example["attachment_urls"].items():
                     response = requests.get(value["presigned_url"], stream=True)
                     response.raise_for_status()

From 4576779a9b71097896a1e336380907aa3ef1d31e Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 10:08:06 -0800
Subject: [PATCH 53/88] fmt

---
 python/langsmith/client.py                    | 1 -
 python/tests/integration_tests/test_client.py | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 73190c4b6..49831e526 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -948,7 +948,6 @@ def _get_paginated_list(
                 params=params_,
             )
             items = response.json()
-
             if not items:
                 break
             yield from items
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index e175efc9e..371387c87 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1253,10 +1253,6 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
 def test_evaluate_with_attachments(langchain_client: Client) -> None:
     """Test evaluating examples with attachments."""
     dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]
-    langchain_client = Client(
-        api_key="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",
-        api_url="https://dev.api.smith.langchain.com",
-    )
     # 1. Create dataset
     dataset = langchain_client.create_dataset(
         dataset_name,

From 578a715af42b64940a4f301a5b5f39708006b44c Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 10:39:57 -0800
Subject: [PATCH 54/88] changes

---
 python/langsmith/client.py                    | 16 ++++++++--------
 python/langsmith/evaluation/_arunner.py       |  8 ++++----
 python/langsmith/evaluation/_runner.py        |  8 ++++----
 python/langsmith/schemas.py                   |  2 +-
 python/tests/integration_tests/test_client.py |  8 ++++----
 5 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 49831e526..eca9c1614 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3826,13 +3826,13 @@ def read_example(
         )
 
         example = response.json()
-        attachments_info = {}
-        if "attachment_urls" in example and example["attachment_urls"]:
+        attachment_urls = {}
+        if example["attachment_urls"]:
             for key, value in example["attachment_urls"].items():
                 response = requests.get(value["presigned_url"], stream=True)
                 response.raise_for_status()
                 reader = io.BytesIO(response.content)
-                attachments_info[key.split(".")[1]] = {
+                attachment_urls[key.split(".")[1]] = {
                     "presigned_url": value["presigned_url"],
                     "reader": reader,
                 }
@@ -3840,7 +3840,7 @@ def read_example(
 
         return ls_schemas.Example(
             **example,
-            attachments_info=attachments_info,
+            attachment_urls=attachment_urls,
             _host_url=self._host_url,
             _tenant_id=self._get_optional_tenant_id(),
         )
@@ -3913,13 +3913,13 @@ def list_examples(
         for i, example in enumerate(
             self._get_paginated_list("/examples", params=params)
         ):
-            attachments_info = {}
-            if "attachment_urls" in example and example["attachment_urls"]:
+            attachment_urls = {}
+            if example["attachment_urls"]:
                 for key, value in example["attachment_urls"].items():
                     response = requests.get(value["presigned_url"], stream=True)
                     response.raise_for_status()
                     reader = io.BytesIO(response.content)
-                    attachments_info[key.split(".")[1]] = {
+                    attachment_urls[key.split(".")[1]] = {
                         "presigned_url": value["presigned_url"],
                         "reader": reader,
                     }
@@ -3927,7 +3927,7 @@ def list_examples(
 
             yield ls_schemas.Example(
                 **example,
-                attachments_info=attachments_info,
+                attachment_urls=attachment_urls,
                 _host_url=self._host_url,
                 _tenant_id=self._get_optional_tenant_id(),
             )
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 5a22ba305..729166add 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -1023,7 +1023,7 @@ def _get_run(r: run_trees.RunTree) -> None:
     with rh.tracing_context(enabled=True):
         try:
             args = (
-                (example.inputs, example.attachments_info)
+                (example.inputs, example.attachment_urls)
                 if include_attachments
                 else (example.inputs,)
             )
@@ -1044,9 +1044,9 @@ def _get_run(r: run_trees.RunTree) -> None:
                     client=client,
                 ),
             )
-            if include_attachments and example.attachments_info is not None:
-                for attachment in example.attachments_info:
-                    reader = example.attachments_info[attachment]["reader"]
+            if include_attachments and example.attachment_urls is not None:
+                for attachment in example.attachment_urls:
+                    reader = example.attachment_urls[attachment]["reader"]
                     reader.seek(0)
         except Exception as e:
             logger.error(
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index fbb096484..199d8fa22 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1834,7 +1834,7 @@ def _get_run(r: rt.RunTree) -> None:
         )
         try:
             args = (
-                (example.inputs, example.attachments_info)
+                (example.inputs, example.attachment_urls)
                 if include_attachments
                 else (example.inputs,)
             )
@@ -1842,9 +1842,9 @@ def _get_run(r: rt.RunTree) -> None:
                 *args,
                 langsmith_extra=langsmith_extra,
             )
-            if include_attachments and example.attachments_info is not None:
-                for attachment in example.attachments_info:
-                    reader = example.attachments_info[attachment]["reader"]
+            if include_attachments and example.attachment_urls is not None:
+                for attachment in example.attachment_urls:
+                    reader = example.attachment_urls[attachment]["reader"]
                     reader.seek(0)
         except Exception as e:
             logger.error(
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index b06552dcd..41fa76cb0 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -146,7 +146,7 @@ class Example(ExampleBase):
     modified_at: Optional[datetime] = Field(default=None)
     runs: List[Run] = Field(default_factory=list)
     source_run_id: Optional[UUID] = None
-    attachments_info: Optional[Dict[str, AttachmentInfo]] = Field(default=None)
+    attachment_urls: Optional[Dict[str, AttachmentInfo]] = Field(default=None)
     """Dictionary with attachment names as keys and a tuple of the S3 url
     and a reader of the data for the file."""
     _host_url: Optional[str] = PrivateAttr(default=None)
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 371387c87..0281b1df6 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -436,7 +436,7 @@ def test_upload_examples_multipart(langchain_client: Client):
     # Verify example with ID was created with correct ID
     example_with_id = [ex for ex in examples if ex.id == example_id][0]
     assert example_with_id.inputs["text"] == "hello world"
-    assert "test_file" in example_with_id.attachments_info
+    assert "test_file" in example_with_id.attachment_urls
 
     # Verify example with outputs and multiple attachments
     example_with_outputs = next(
@@ -444,9 +444,9 @@ def test_upload_examples_multipart(langchain_client: Client):
         for ex in examples
         if ex.outputs and ex.outputs.get("response") == "test response"
     )
-    assert len(example_with_outputs.attachments_info) == 2
-    assert "file1" in example_with_outputs.attachments_info
-    assert "file2" in example_with_outputs.attachments_info
+    assert len(example_with_outputs.attachment_urls) == 2
+    assert "file1" in example_with_outputs.attachment_urls
+    assert "file2" in example_with_outputs.attachment_urls
 
     # Test uploading to non-existent dataset fails
     fake_id = uuid4()

From e4e3068accbbb4a9793d36d4c2fcd4fcc5daac92 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 10:54:12 -0800
Subject: [PATCH 55/88] fmt

---
 python/tests/integration_tests/test_client.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 0281b1df6..338ab482d 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -502,14 +502,12 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
     )
     assert created_examples["count"] == 2
 
-    created_example_1 = langchain_client.read_example(
-        created_examples["example_ids"][0]
-    )
+    created_example_1 = langchain_client.read_example(example_id)
     assert created_example_1.inputs["text"] == "hello world"
     assert created_example_1.outputs is None
 
     created_example_2 = langchain_client.read_example(
-        created_examples["example_ids"][1]
+        [id_ for id_ in created_examples["example_ids"] if id_ != example_id][0]
     )
     assert created_example_2.inputs["text"] == "foo bar"
     assert created_example_2.outputs["response"] == "baz"

From 6e91e05324ace1788b3d2b5457743169becdd74a Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 11:08:20 -0800
Subject: [PATCH 56/88] x

---
 python/tests/integration_tests/test_client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 338ab482d..d808adbfe 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -507,7 +507,7 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
     assert created_example_1.outputs is None
 
     created_example_2 = langchain_client.read_example(
-        [id_ for id_ in created_examples["example_ids"] if id_ != example_id][0]
+        [id_ for id_ in created_examples["example_ids"] if id_ != str(example_id)][0]
     )
     assert created_example_2.inputs["text"] == "foo bar"
     assert created_example_2.outputs["response"] == "baz"

From 020d07436de6eeef12214699397c5f0883026392 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 11:16:03 -0800
Subject: [PATCH 57/88] fmt

---
 python/langsmith/client.py                    | 7 +++++++
 python/tests/integration_tests/test_client.py | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index eca9c1614..e9196a4c9 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3477,6 +3477,13 @@ def _prepate_multipart_data(
             dataset_id = examples[0].dataset_id
 
         for example in examples:
+            if not isinstance(
+                example, ls_schemas.ExampleUploadWithAttachments
+            ) and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments):
+                raise ValueError(
+                    "The examples must be of type ExampleUploadWithAttachments"
+                    " or ExampleUpsertWithAttachments"
+                )
             if example.id is not None:
                 example_id = str(example.id)
             else:
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index d808adbfe..53e940c06 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -551,7 +551,7 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None:
     assert len(all_examples_in_dataset) == 2
 
     # Throw type errors when not passing ExampleUpsertWithAttachments
-    with pytest.raises(AttributeError):
+    with pytest.raises(ValueError):
         langchain_client.upsert_examples_multipart(upserts=[{"foo": "bar"}])
     langchain_client.delete_dataset(dataset_name=dataset_name)
 

From 1abe4f9a6ffd805fc902540df5e60cb7f6449c58 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 13:02:26 -0800
Subject: [PATCH 58/88] flag

---
 python/langsmith/client.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index e9196a4c9..a0c98cdcd 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3598,7 +3598,7 @@ def upload_examples_multipart(
     ) -> ls_schemas.UpsertExamplesResponse:
         """Upload examples."""
         if not (self.info.instance_flags or {}).get(
-            "examples_multipart_enabled", False
+            "dataset_examples_multipart_enabled", False
         ):
             raise ValueError(
                 "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."

From 39be3c772b7b32ead519bd586c8dabd8143f18aa Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 13:14:29 -0800
Subject: [PATCH 59/88] flags in tests

---
 python/tests/integration_tests/test_client.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 53e940c06..ae866521c 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -379,6 +379,9 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
 
 def test_upload_examples_multipart(langchain_client: Client):
     """Test uploading examples with attachments via multipart endpoint."""
+    langchain_client._info = {
+        "instance_flags": {"dataset_examples_multipart_enabled": True}
+    }
     dataset_name = "__test_upload_examples_multipart" + uuid4().hex[:4]
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
@@ -1245,11 +1248,11 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
     langchain_client.delete_dataset(dataset_id=dataset.id)
 
 
-@pytest.mark.skip(
-    reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first"
-)
 def test_evaluate_with_attachments(langchain_client: Client) -> None:
     """Test evaluating examples with attachments."""
+    langchain_client._info = {
+        "instance_flags": {"dataset_examples_multipart_enabled": True}
+    }
     dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]
     # 1. Create dataset
     dataset = langchain_client.create_dataset(
@@ -1305,6 +1308,9 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
 
 def test_evaluate_with_no_attachments(langchain_client: Client) -> None:
     """Test evaluating examples without attachments using a target with attachments."""
+    langchain_client._info = {
+        "instance_flags": {"dataset_examples_multipart_enabled": True}
+    }
     dataset_name = "__test_evaluate_no_attachments" + uuid4().hex[:4]
     dataset = langchain_client.create_dataset(
         dataset_name,

From 5c2c74dbd2bf7abe661e89d8249dd67bc31f5640 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 14:16:55 -0800
Subject: [PATCH 60/88] attachment_urls -> attachments

---
 python/langsmith/client.py              | 12 ++++++------
 python/langsmith/evaluation/_arunner.py |  8 ++++----
 python/langsmith/evaluation/_runner.py  |  8 ++++----
 python/langsmith/schemas.py             |  2 +-
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index a0c98cdcd..82c25e8da 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3833,13 +3833,13 @@ def read_example(
         )
 
         example = response.json()
-        attachment_urls = {}
+        attachments = {}
         if example["attachment_urls"]:
             for key, value in example["attachment_urls"].items():
                 response = requests.get(value["presigned_url"], stream=True)
                 response.raise_for_status()
                 reader = io.BytesIO(response.content)
-                attachment_urls[key.split(".")[1]] = {
+                attachments[key.split(".")[1]] = {
                     "presigned_url": value["presigned_url"],
                     "reader": reader,
                 }
@@ -3847,7 +3847,7 @@ def read_example(
 
         return ls_schemas.Example(
             **example,
-            attachment_urls=attachment_urls,
+            attachments=attachments,
             _host_url=self._host_url,
             _tenant_id=self._get_optional_tenant_id(),
         )
@@ -3920,13 +3920,13 @@ def list_examples(
         for i, example in enumerate(
             self._get_paginated_list("/examples", params=params)
         ):
-            attachment_urls = {}
+            attachments = {}
             if example["attachment_urls"]:
                 for key, value in example["attachment_urls"].items():
                     response = requests.get(value["presigned_url"], stream=True)
                     response.raise_for_status()
                     reader = io.BytesIO(response.content)
-                    attachment_urls[key.split(".")[1]] = {
+                    attachments[key.split(".")[1]] = {
                         "presigned_url": value["presigned_url"],
                         "reader": reader,
                     }
@@ -3934,7 +3934,7 @@ def list_examples(
 
             yield ls_schemas.Example(
                 **example,
-                attachment_urls=attachment_urls,
+                attachments=attachments,
                 _host_url=self._host_url,
                 _tenant_id=self._get_optional_tenant_id(),
             )
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 729166add..ccbad302b 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -1023,7 +1023,7 @@ def _get_run(r: run_trees.RunTree) -> None:
     with rh.tracing_context(enabled=True):
         try:
             args = (
-                (example.inputs, example.attachment_urls)
+                (example.inputs, example.attachments)
                 if include_attachments
                 else (example.inputs,)
             )
@@ -1044,9 +1044,9 @@ def _get_run(r: run_trees.RunTree) -> None:
                     client=client,
                 ),
             )
-            if include_attachments and example.attachment_urls is not None:
-                for attachment in example.attachment_urls:
-                    reader = example.attachment_urls[attachment]["reader"]
+            if include_attachments and example.attachments is not None:
+                for attachment in example.attachments:
+                    reader = example.attachments[attachment]["reader"]
                     reader.seek(0)
         except Exception as e:
             logger.error(
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 199d8fa22..5a00585d5 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1834,7 +1834,7 @@ def _get_run(r: rt.RunTree) -> None:
         )
         try:
             args = (
-                (example.inputs, example.attachment_urls)
+                (example.inputs, example.attachments)
                 if include_attachments
                 else (example.inputs,)
             )
@@ -1842,9 +1842,9 @@ def _get_run(r: rt.RunTree) -> None:
                 *args,
                 langsmith_extra=langsmith_extra,
             )
-            if include_attachments and example.attachment_urls is not None:
-                for attachment in example.attachment_urls:
-                    reader = example.attachment_urls[attachment]["reader"]
+            if include_attachments and example.attachments is not None:
+                for attachment in example.attachments:
+                    reader = example.attachments[attachment]["reader"]
                     reader.seek(0)
         except Exception as e:
             logger.error(
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 41fa76cb0..5b226a830 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -146,7 +146,7 @@ class Example(ExampleBase):
     modified_at: Optional[datetime] = Field(default=None)
     runs: List[Run] = Field(default_factory=list)
     source_run_id: Optional[UUID] = None
-    attachment_urls: Optional[Dict[str, AttachmentInfo]] = Field(default=None)
+    attachments: Optional[Dict[str, AttachmentInfo]] = Field(default=None)
     """Dictionary with attachment names as keys and a tuple of the S3 url
     and a reader of the data for the file."""
     _host_url: Optional[str] = PrivateAttr(default=None)

From 2b385b631ab32348fd00123993bc38a6438f49ee Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 14:20:06 -0800
Subject: [PATCH 61/88] x

---
 python/tests/integration_tests/test_client.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index ae866521c..83efc397e 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -439,7 +439,7 @@ def test_upload_examples_multipart(langchain_client: Client):
     # Verify example with ID was created with correct ID
     example_with_id = [ex for ex in examples if ex.id == example_id][0]
     assert example_with_id.inputs["text"] == "hello world"
-    assert "test_file" in example_with_id.attachment_urls
+    assert "test_file" in example_with_id.attachments
 
     # Verify example with outputs and multiple attachments
     example_with_outputs = next(
@@ -447,9 +447,9 @@ def test_upload_examples_multipart(langchain_client: Client):
         for ex in examples
         if ex.outputs and ex.outputs.get("response") == "test response"
     )
-    assert len(example_with_outputs.attachment_urls) == 2
-    assert "file1" in example_with_outputs.attachment_urls
-    assert "file2" in example_with_outputs.attachment_urls
+    assert len(example_with_outputs.attachments) == 2
+    assert "file1" in example_with_outputs.attachments
+    assert "file2" in example_with_outputs.attachments
 
     # Test uploading to non-existent dataset fails
     fake_id = uuid4()

From 0daf2459285529b6d151cdf4a48a817d2cc02018 Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Mon, 9 Dec 2024 14:23:49 -0800
Subject: [PATCH 62/88] fmt

---
 python/langsmith/evaluation/_arunner.py | 45 +------------------------
 python/langsmith/evaluation/_runner.py  | 34 +++++++++----------
 2 files changed, 18 insertions(+), 61 deletions(-)

diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 729166add..6ba6095f8 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -5,7 +5,6 @@
 import asyncio
 import concurrent.futures as cf
 import datetime
-import inspect
 import logging
 import pathlib
 import uuid
@@ -41,6 +40,7 @@
     _ExperimentManagerMixin,
     _extract_feedback_keys,
     _ForwardResults,
+    _include_attachments,
     _is_langchain_runnable,
     _load_examples_map,
     _load_experiment,
@@ -1058,49 +1058,6 @@ def _get_run(r: run_trees.RunTree) -> None:
         )
 
 
-def _include_attachments(
-    target: Union[ATARGET_T, Iterable[schemas.Run], AsyncIterable[dict], Runnable],
-) -> bool:
-    """Whether the target function accepts attachments."""
-    if _is_langchain_runnable(target) or not callable(target):
-        return False
-    # Check function signature
-    sig = inspect.signature(target)
-    params = list(sig.parameters.values())
-    positional_params = [
-        p
-        for p in params
-        if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
-        and p.default is p.empty
-    ]
-
-    if len(positional_params) == 0:
-        raise ValueError(
-            "Target function must accept at least one positional argument (inputs)"
-        )
-    elif len(positional_params) > 2:
-        raise ValueError(
-            "Target function must accept at most two positional "
-            "arguments (inputs, attachments)"
-        )
-    elif len(positional_params) == 2:
-        mismatches = []
-        for i, (p, expected) in enumerate(
-            zip(positional_params, ("inputs", "attachments"))
-        ):
-            if p.name != expected:
-                mismatches.append((i, p.name))
-
-        if mismatches:
-            raise ValueError(
-                "When target function has two positional arguments, they must be named "
-                "'inputs' and 'attachments', respectively. Received: "
-                + ",".join(f"'{p}' at index {i}" for i, p in mismatches)
-            )
-
-    return len(positional_params) == 2
-
-
 def _ensure_async_traceable(
     target: ATARGET_T,
 ) -> rh.SupportsLangsmithExtra[[dict], Awaitable]:
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 199d8fa22..bc24585d0 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1913,9 +1913,7 @@ def _ensure_traceable(
     return fn
 
 
-def _include_attachments(
-    target: Union[TARGET_T, Iterable[schemas.Run], Runnable],
-) -> bool:
+def _include_attachments(target: Any) -> bool:
     """Whether the target function accepts attachments."""
     if _is_langchain_runnable(target) or not callable(target):
         return False
@@ -1923,37 +1921,39 @@ def _include_attachments(
     sig = inspect.signature(target)
     params = list(sig.parameters.values())
     positional_params = [
-        p
-        for p in params
-        if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
-        and p.default is p.empty
+        p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
     ]
+    positional_no_default = [p for p in positional_params if p.default is p.empty]
 
     if len(positional_params) == 0:
         raise ValueError(
-            "Target function must accept at least one positional argument (inputs)"
+            "Target function must accept at least one positional argument (inputs)."
         )
-    elif len(positional_params) > 2:
+    elif len(positional_no_default) > 2:
         raise ValueError(
-            "Target function must accept at most two positional "
-            "arguments (inputs, attachments)"
+            "Target function must accept at most two "
+            "arguments without default values: (inputs, attachments)."
         )
-    elif len(positional_params) == 2:
+    else:
         mismatches = []
+        num_args = 0
         for i, (p, expected) in enumerate(
             zip(positional_params, ("inputs", "attachments"))
         ):
             if p.name != expected:
                 mismatches.append((i, p.name))
+            else:
+                num_args += 1
 
         if mismatches:
-            raise ValueError(
-                "When target function has two positional arguments, they must be named "
-                "'inputs' and 'attachments', respectively. Received: "
-                + ",".join(f"'{p}' at index {i}" for i, p in mismatches)
+            msg = (
+                "Target function is expected to have a first positional argument "
+                "'inputs' and optionally a second positional argument 'attachments'. "
+                "Received: " + ", ".join(f"'{p}' at index {i}" for i, p in mismatches)
             )
+            raise ValueError(msg)
 
-    return len(positional_params) == 2
+    return num_args == 2
 
 
 def _resolve_experiment(

From c8a2b01e0625312884233c879316d1a3a4037b9a Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 14:48:48 -0800
Subject: [PATCH 63/88] undo

---
 python/langsmith/_internal/_operations.py | 24 +++++++++--------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
index 3f82f5f9a..cc615d60d 100644
--- a/python/langsmith/_internal/_operations.py
+++ b/python/langsmith/_internal/_operations.py
@@ -214,7 +214,6 @@ def serialized_run_operation_to_multipart_parts_and_context(
     op: SerializedRunOperation,
 ) -> MultipartPartsAndContext:
     acc_parts: list[MultipartPart] = []
-    valb: Union[bytes, Path]
     # this is main object, minus inputs/outputs/events/attachments
     acc_parts.append(
         (
@@ -257,22 +256,17 @@ def serialized_run_operation_to_multipart_parts_and_context(
                 )
                 continue
 
-            if isinstance(valb, Path):
-                # TODO: actually deal with this case
-                # This is just for speed of getting something out
-                continue
-            else:
-                acc_parts.append(
+            acc_parts.append(
+                (
+                    f"attachment.{op.id}.{n}",
                     (
-                        f"attachment.{op.id}.{n}",
-                        (
-                            None,
-                            valb,
-                            content_type,
-                            {"Content-Length": str(len(valb))},
-                        ),
-                    )
+                        None,
+                        valb,
+                        content_type,
+                        {"Content-Length": str(len(valb))},
+                    ),
                 )
+            )
     return MultipartPartsAndContext(
         acc_parts,
         f"trace={op.trace_id},id={op.id}",

From 8033b7e1dc85d5e31cb48e970ae5591730bc484e Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 14:49:06 -0800
Subject: [PATCH 64/88] undo

---
 python/langsmith/_internal/_operations.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
index cc615d60d..c68c17499 100644
--- a/python/langsmith/_internal/_operations.py
+++ b/python/langsmith/_internal/_operations.py
@@ -3,7 +3,6 @@
 import itertools
 import logging
 import uuid
-from pathlib import Path
 from typing import Literal, Optional, Union, cast
 
 from langsmith import schemas as ls_schemas

From 114a79d8cf121cba285bcd41c5c58adee0b96398 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 14:49:28 -0800
Subject: [PATCH 65/88] fix

---
 python/langsmith/_internal/_operations.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
index c68c17499..24c40efa0 100644
--- a/python/langsmith/_internal/_operations.py
+++ b/python/langsmith/_internal/_operations.py
@@ -213,6 +213,7 @@ def serialized_run_operation_to_multipart_parts_and_context(
     op: SerializedRunOperation,
 ) -> MultipartPartsAndContext:
     acc_parts: list[MultipartPart] = []
+    
     # this is main object, minus inputs/outputs/events/attachments
     acc_parts.append(
         (

From b524f7235320974d833c0b3b63ef68cd3248bc8b Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 14:49:52 -0800
Subject: [PATCH 66/88] fix

---
 python/langsmith/_internal/_operations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
index 24c40efa0..66decff0f 100644
--- a/python/langsmith/_internal/_operations.py
+++ b/python/langsmith/_internal/_operations.py
@@ -213,7 +213,7 @@ def serialized_run_operation_to_multipart_parts_and_context(
     op: SerializedRunOperation,
 ) -> MultipartPartsAndContext:
     acc_parts: list[MultipartPart] = []
-    
+
     # this is main object, minus inputs/outputs/events/attachments
     acc_parts.append(
         (

From 23187f172276b1b18c5b44a42abc4f5ca1019cc8 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 15:13:49 -0800
Subject: [PATCH 67/88] test fix

---
 python/langsmith/client.py                    |  6 +--
 .../unit_tests/evaluation/test_runner.py      | 48 ++++++++++++++-----
 2 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 82c25e8da..0f39aa9c0 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3843,10 +3843,9 @@ def read_example(
                     "presigned_url": value["presigned_url"],
                     "reader": reader,
                 }
-        del example["attachment_urls"]
 
         return ls_schemas.Example(
-            **example,
+            **{k: v for k, v in example.items() if k != "attachment_urls"},
             attachments=attachments,
             _host_url=self._host_url,
             _tenant_id=self._get_optional_tenant_id(),
@@ -3930,10 +3929,9 @@ def list_examples(
                         "presigned_url": value["presigned_url"],
                         "reader": reader,
                     }
-            del example["attachment_urls"]
 
             yield ls_schemas.Example(
-                **example,
+                **{k: v for k, v in example.items() if k != "attachment_urls"},
                 attachments=attachments,
                 _host_url=self._host_url,
                 _tenant_id=self._get_optional_tenant_id(),
diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index a1e2d79de..87ebe6042 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -11,7 +11,7 @@
 import uuid
 from datetime import datetime, timezone
 from threading import Lock
-from typing import Callable, List
+from typing import Any, Callable, Dict, List, Tuple
 from unittest import mock
 from unittest.mock import MagicMock
 
@@ -53,7 +53,9 @@ def request(self, verb: str, endpoint: str, *args, **kwargs):
                 return res
             elif endpoint == "http://localhost:1984/examples":
                 res = MagicMock()
-                res.json.return_value = [e.dict() for e in self.ds_examples]
+                res.json.return_value = [
+                    e.dict() if not isinstance(e, dict) else e for e in self.ds_examples
+                ]
                 return res
             elif endpoint == "http://localhost:1984/sessions":
                 res = {}  # type: ignore
@@ -143,14 +145,23 @@ def _wait_until(condition: Callable, timeout: int = 8):
     raise TimeoutError("Condition not met")
 
 
-def _create_example(idx: int) -> ls_schemas.Example:
+def _create_example(idx: int) -> Tuple[ls_schemas.Example, Dict[str, Any]]:
+    _id = uuid.uuid4()
+    _created_at = datetime.now(timezone.utc)
     return ls_schemas.Example(
-        id=uuid.uuid4(),
+        id=_id,
         inputs={"in": idx},
         outputs={"answer": idx + 1},
         dataset_id="00886375-eb2a-4038-9032-efff60309896",
-        created_at=datetime.now(timezone.utc),
-    )
+        created_at=_created_at,
+    ), {
+        "id": _id,
+        "dataset_id": "00886375-eb2a-4038-9032-efff60309896",
+        "created_at": _created_at,
+        "inputs": {"in": idx},
+        "outputs": {"answer": idx + 1},
+        "attachment_urls": None,
+    }
 
 
 @pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher")
@@ -166,10 +177,13 @@ def test_evaluate_results(
 
     SPLIT_SIZE = 3
     NUM_REPETITIONS = 4
-    ds_examples = [_create_example(i) for i in range(10)]
+    ds_example_responses = [_create_example(i) for i in range(10)]
+    ds_examples = [e[0] for e in ds_example_responses]
     dev_split = random.sample(ds_examples, SPLIT_SIZE)
     tenant_id = str(uuid.uuid4())
-    fake_request = FakeRequest(ds_id, ds_name, ds_examples, tenant_id)
+    fake_request = FakeRequest(
+        ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id
+    )
     session.request = fake_request.request
     client = Client(
         api_url="http://localhost:1984",
@@ -393,7 +407,12 @@ def eval2(x, y, inputs):
             _normalize_evaluator_func(eval_)
 
         with pytest.raises(ValueError, match="Invalid evaluator function."):
-            evaluate((lambda x: x), data=ds_examples, evaluators=[eval_], client=client)
+            evaluate(
+                (lambda inputs: inputs),
+                data=ds_examples,
+                evaluators=[eval_],
+                client=client,
+            )
 
 
 def test_evaluate_raises_for_async():
@@ -437,10 +456,13 @@ async def test_aevaluate_results(
 
     SPLIT_SIZE = 3
     NUM_REPETITIONS = 4
-    ds_examples = [_create_example(i) for i in range(10)]
+    ds_example_responses = [_create_example(i) for i in range(10)]
+    ds_examples = [e[0] for e in ds_example_responses]
     dev_split = random.sample(ds_examples, SPLIT_SIZE)
     tenant_id = str(uuid.uuid4())
-    fake_request = FakeRequest(ds_id, ds_name, ds_examples, tenant_id)
+    fake_request = FakeRequest(
+        ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id
+    )
     session.request = fake_request.request
     client = Client(
         api_url="http://localhost:1984",
@@ -664,8 +686,8 @@ async def eval2(x, y, inputs):
 
     evaluators = [eval1, eval2]
 
-    async def atarget(x):
-        return x
+    async def atarget(inputs):
+        return inputs
 
     for eval_ in evaluators:
         with pytest.raises(ValueError, match="Invalid evaluator function."):

From 5471e88b566fac494f056caf11dc42a92ebbdd9f Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 15:17:37 -0800
Subject: [PATCH 68/88] fmt

---
 python/langsmith/schemas.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 5b226a830..30a65a018 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -5,7 +5,6 @@
 from datetime import datetime, timedelta, timezone
 from decimal import Decimal
 from enum import Enum
-from pathlib import Path
 from typing import (
     Any,
     Dict,
@@ -64,7 +63,7 @@ def my_function(bar: int, my_val: Attachment):
     data: bytes
 
 
-Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, Tuple[str, Path]]]
+Attachments = Dict[str, Union[Tuple[str, bytes], Attachment]]
 """Attachments associated with the run. 
 Each entry is a tuple of (mime_type, bytes), or (mime_type, file_path)"""
 

From 49246d06872857babfc607701abbea9ed996c5ec Mon Sep 17 00:00:00 2001
From: Bagatur <baskaryan@gmail.com>
Date: Mon, 9 Dec 2024 15:51:35 -0800
Subject: [PATCH 69/88] fmt

---
 python/langsmith/evaluation/_runner.py | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 805cfad03..51b464e68 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1934,26 +1934,13 @@ def _include_attachments(target: Any) -> bool:
             "Target function must accept at most two "
             "arguments without default values: (inputs, attachments)."
         )
-    else:
-        mismatches = []
-        num_args = 0
-        for i, (p, expected) in enumerate(
-            zip(positional_params, ("inputs", "attachments"))
-        ):
-            if p.name != expected:
-                mismatches.append((i, p.name))
-            else:
-                num_args += 1
-
-        if mismatches:
-            msg = (
-                "Target function is expected to have a first positional argument "
-                "'inputs' and optionally a second positional argument 'attachments'. "
-                "Received: " + ", ".join(f"'{p}' at index {i}" for i, p in mismatches)
-            )
+    elif len(positional_no_default) == 2:
+        if [p.name for p in positional_no_default] != ["inputs", "attachments"]:
+            msg = ""
             raise ValueError(msg)
-
-    return num_args == 2
+        return True
+    else:
+        return [p.name for p in positional_params[:2]] == ["inputs", "attachments"]
 
 
 def _resolve_experiment(

From b0921e06cd16ba0dbb78ad0dc7d3669273614900 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 15:57:54 -0800
Subject: [PATCH 70/88] tests

---
 python/langsmith/evaluation/_runner.py        |  7 +++--
 .../unit_tests/evaluation/test_runner.py      | 26 +++++++++----------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 51b464e68..836aaabe4 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1936,8 +1936,11 @@ def _include_attachments(target: Any) -> bool:
         )
     elif len(positional_no_default) == 2:
         if [p.name for p in positional_no_default] != ["inputs", "attachments"]:
-            msg = ""
-            raise ValueError(msg)
+            raise ValueError(
+                "When passing 2 positional arguments, they must be named "
+                "'inputs' and 'attachments', respectively. Received: "
+                f"{[p.name for p in positional_no_default]}"
+            )
         return True
     else:
         return [p.name for p in positional_params[:2]] == ["inputs", "attachments"]
diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index 87ebe6042..04b269100 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -20,9 +20,6 @@
 
 from langsmith import Client, aevaluate, evaluate
 from langsmith import schemas as ls_schemas
-from langsmith.evaluation._arunner import (
-    _include_attachments as a_include_attachments,
-)
 from langsmith.evaluation._runner import _include_attachments
 from langsmith.evaluation.evaluator import (
     _normalize_comparison_evaluator_func,
@@ -738,17 +735,19 @@ async def async_extra_args(inputs, attachments, foo="bar"):
         (
             lambda x, y: None,
             None,
-            "When target function has two positional arguments, they must be named "
-            "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' "
-            "at index 1",
+            re.escape(
+                "When passing 2 positional arguments, they must be named 'inputs' and "
+                "'attachments', respectively. Received: ['x', 'y']"
+            ),
             False,
         ),
         (
             lambda input, attachment: None,
             None,
-            "When target function has two positional arguments, they must be named "
-            "'inputs' and 'attachments', respectively. Received: 'input' at index 0,"
-            "'attachment' at index 1",
+            re.escape(
+                "When passing 2 positional arguments, they must be named 'inputs' and "
+                "'attachments', respectively. Received: ['input', 'attachment']"
+            ),
             False,
         ),
         # Too many parameters
@@ -756,8 +755,8 @@ async def async_extra_args(inputs, attachments, foo="bar"):
             lambda inputs, attachments, extra: None,
             None,
             re.escape(
-                "Target function must accept at most two positional arguments "
-                "(inputs, attachments)"
+                "Target function must accept at most two arguments without "
+                "default values: (inputs, attachments)."
             ),
             False,
         ),
@@ -796,12 +795,11 @@ def test_include_attachments(target, expected, error_msg, is_async):
         expected = False
         error_msg = None
 
-    func = _include_attachments if not is_async else a_include_attachments
     if error_msg is not None:
         with pytest.raises(ValueError, match=error_msg):
-            func(target)
+            _include_attachments(target)
     else:
-        result = func(target)
+        result = _include_attachments(target)
         assert result == expected
 
 

From 70c3f3c1a7c084ed545781228cc07e89677cd46e Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Mon, 9 Dec 2024 16:08:14 -0800
Subject: [PATCH 71/88] tests

---
 python/tests/integration_tests/test_client.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 83efc397e..bef79a594 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -56,7 +56,14 @@ def wait_for(
 @pytest.fixture
 def langchain_client() -> Client:
     get_env_var.cache_clear()
-    return Client()
+    return Client(
+        info={
+            "instance_flags": {
+                "dataset_examples_multipart_enabled": True,
+                "examples_multipart_enabled": True,
+            }
+        }
+    )
 
 
 def test_datasets(langchain_client: Client) -> None:
@@ -379,9 +386,6 @@ def test_error_surfaced_invalid_uri(uri: str) -> None:
 
 def test_upload_examples_multipart(langchain_client: Client):
     """Test uploading examples with attachments via multipart endpoint."""
-    langchain_client._info = {
-        "instance_flags": {"dataset_examples_multipart_enabled": True}
-    }
     dataset_name = "__test_upload_examples_multipart" + uuid4().hex[:4]
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
@@ -469,7 +473,6 @@ def test_upload_examples_multipart(langchain_client: Client):
 
 def test_upsert_examples_multipart(langchain_client: Client) -> None:
     """Test upserting examples with attachments via the multipart endpoint."""
-    langchain_client._info = {"instance_flags": {"examples_multipart_enabled": True}}
     dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]
     if langchain_client.has_dataset(dataset_name=dataset_name):
         langchain_client.delete_dataset(dataset_name=dataset_name)
@@ -1250,9 +1253,6 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
 
 def test_evaluate_with_attachments(langchain_client: Client) -> None:
     """Test evaluating examples with attachments."""
-    langchain_client._info = {
-        "instance_flags": {"dataset_examples_multipart_enabled": True}
-    }
     dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]
     # 1. Create dataset
     dataset = langchain_client.create_dataset(
@@ -1308,9 +1308,6 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
 
 def test_evaluate_with_no_attachments(langchain_client: Client) -> None:
     """Test evaluating examples without attachments using a target with attachments."""
-    langchain_client._info = {
-        "instance_flags": {"dataset_examples_multipart_enabled": True}
-    }
     dataset_name = "__test_evaluate_no_attachments" + uuid4().hex[:4]
     dataset = langchain_client.create_dataset(
         dataset_name,

From 8bb0826dfe589b6c418b1b0c1733da93b549b4ee Mon Sep 17 00:00:00 2001
From: Isaac Francisco <78627776+isahers1@users.noreply.github.com>
Date: Tue, 10 Dec 2024 11:54:53 -0800
Subject: [PATCH 72/88] update examples multipart (#1310)

---
 python/langsmith/client.py                    |  89 ++++-
 python/langsmith/schemas.py                   |  17 +
 python/tests/integration_tests/test_client.py | 306 ++++++++++++++++++
 3 files changed, 407 insertions(+), 5 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index 0f39aa9c0..c173cc7cb 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3464,6 +3464,7 @@ def _prepate_multipart_data(
         examples: Union[
             List[ls_schemas.ExampleUploadWithAttachments]
             | List[ls_schemas.ExampleUpsertWithAttachments]
+            | List[ls_schemas.ExampleUpdateWithAttachments],
         ],
         include_dataset_id: bool = False,
     ) -> Tuple[Any, bytes]:
@@ -3477,21 +3478,29 @@ def _prepate_multipart_data(
             dataset_id = examples[0].dataset_id
 
         for example in examples:
-            if not isinstance(
-                example, ls_schemas.ExampleUploadWithAttachments
-            ) and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments):
+            if (
+                not isinstance(example, ls_schemas.ExampleUploadWithAttachments)
+                and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments)
+                and not isinstance(example, ls_schemas.ExampleUpdateWithAttachments)
+            ):
                 raise ValueError(
                     "The examples must be of type ExampleUploadWithAttachments"
                     " or ExampleUpsertWithAttachments"
+                    " or ExampleUpdateWithAttachments"
                 )
             if example.id is not None:
                 example_id = str(example.id)
             else:
                 example_id = str(uuid.uuid4())
 
+            if isinstance(example, ls_schemas.ExampleUpdateWithAttachments):
+                created_at = None
+            else:
+                created_at = example.created_at
+
             example_body = {
                 **({"dataset_id": dataset_id} if include_dataset_id else {}),
-                "created_at": example.created_at,
+                **({"created_at": created_at} if created_at is not None else {}),
             }
             if example.metadata is not None:
                 example_body["metadata"] = example.metadata
@@ -3582,6 +3591,23 @@ def _prepate_multipart_data(
                             )
                         )
 
+            if (
+                isinstance(example, ls_schemas.ExampleUpdateWithAttachments)
+                and example.attachments_operations
+            ):
+                attachments_operationsb = _dumps_json(example.attachments_operations)
+                parts.append(
+                    (
+                        f"{example_id}.attachments_operations",
+                        (
+                            None,
+                            attachments_operationsb,
+                            "application/json",
+                            {},
+                        ),
+                    )
+                )
+
         encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY)
         if encoder.len <= 20_000_000:  # ~20 MB
             data = encoder.to_string()
@@ -3590,6 +3616,38 @@ def _prepate_multipart_data(
 
         return encoder, data
 
+    def update_examples_multipart(
+        self,
+        *,
+        dataset_id: ID_TYPE,
+        updates: Optional[List[ls_schemas.ExampleUpdateWithAttachments]] = None,
+    ) -> ls_schemas.UpsertExamplesResponse:
+        """Upload examples."""
+        if not (self.info.instance_flags or {}).get(
+            "dataset_examples_multipart_enabled", False
+        ):
+            raise ValueError(
+                "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version."
+            )
+        if updates is None:
+            updates = []
+
+        encoder, data = self._prepate_multipart_data(updates, include_dataset_id=False)
+
+        response = self.request_with_retries(
+            "PATCH",
+            f"/v1/platform/datasets/{dataset_id}/examples",
+            request_kwargs={
+                "data": data,
+                "headers": {
+                    **self._headers,
+                    "Content-Type": encoder.content_type,
+                },
+            },
+        )
+        ls_utils.raise_for_status_with_text(response)
+        return response.json()
+
     def upload_examples_multipart(
         self,
         *,
@@ -4072,6 +4130,7 @@ def update_example(
         metadata: Optional[Dict] = None,
         split: Optional[str | List[str]] = None,
         dataset_id: Optional[ID_TYPE] = None,
+        attachments_operations: Optional[ls_schemas.AttachmentsOperations] = None,
     ) -> Dict[str, Any]:
         """Update a specific example.
 
@@ -4096,12 +4155,20 @@ def update_example(
         Dict[str, Any]
             The updated example.
         """
+        if attachments_operations is not None:
+            if not (self.info.instance_flags or {}).get(
+                "dataset_examples_multipart_enabled", False
+            ):
+                raise ValueError(
+                    "Your LangSmith version does not allow using the attachment operations, please update to the latest version."
+                )
         example = dict(
             inputs=inputs,
             outputs=outputs,
             dataset_id=dataset_id,
             metadata=metadata,
             split=split,
+            attachments_operations=attachments_operations,
         )
         response = self.request_with_retries(
             "PATCH",
@@ -4121,6 +4188,9 @@ def update_examples(
         metadata: Optional[Sequence[Optional[Dict]]] = None,
         splits: Optional[Sequence[Optional[str | List[str]]]] = None,
         dataset_ids: Optional[Sequence[Optional[ID_TYPE]]] = None,
+        attachments_operations: Optional[
+            Sequence[Optional[ls_schemas.AttachmentsOperations]]
+        ] = None,
     ) -> Dict[str, Any]:
         """Update multiple examples.
 
@@ -4145,12 +4215,20 @@ def update_examples(
         Dict[str, Any]
             The response from the server (specifies the number of examples updated).
         """
+        if attachments_operations is not None:
+            if not (self.info.instance_flags or {}).get(
+                "dataset_examples_multipart_enabled", False
+            ):
+                raise ValueError(
+                    "Your LangSmith version does not allow using the attachment operations, please update to the latest version."
+                )
         sequence_args = {
             "inputs": inputs,
             "outputs": outputs,
             "metadata": metadata,
             "splits": splits,
             "dataset_ids": dataset_ids,
+            "attachments_operations": attachments_operations,
         }
         # Since inputs are required, we will check against them
         examples_len = len(example_ids)
@@ -4168,8 +4246,9 @@ def update_examples(
                 "dataset_id": dataset_id_,
                 "metadata": metadata_,
                 "split": split_,
+                "attachments_operations": attachments_operations_,
             }
-            for id_, in_, out_, metadata_, split_, dataset_id_ in zip(
+            for id_, in_, out_, metadata_, split_, dataset_id_, attachments_operations_ in zip(
                 example_ids,
                 inputs or [None] * len(example_ids),
                 outputs or [None] * len(example_ids),
diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
index 30a65a018..acedaf177 100644
--- a/python/langsmith/schemas.py
+++ b/python/langsmith/schemas.py
@@ -183,12 +183,24 @@ class ExampleSearch(ExampleBase):
     id: UUID
 
 
+class AttachmentsOperations(BaseModel):
+    """Operations to perform on attachments."""
+
+    rename: Dict[str, str] = Field(
+        default_factory=dict, description="Mapping of old attachment names to new names"
+    )
+    retain: List[str] = Field(
+        default_factory=list, description="List of attachment names to keep"
+    )
+
+
 class ExampleUpdate(BaseModel):
     """Update class for Example."""
 
     dataset_id: Optional[UUID] = None
     inputs: Optional[Dict[str, Any]] = None
     outputs: Optional[Dict[str, Any]] = None
+    attachments_operations: Optional[AttachmentsOperations] = None
     metadata: Optional[Dict[str, Any]] = None
     split: Optional[Union[str, List[str]]] = None
 
@@ -202,7 +214,12 @@ class ExampleUpdateWithAttachments(ExampleUpdate):
     """Example update with attachments."""
 
     id: UUID
+    inputs: Dict[str, Any] = Field(default_factory=dict)
+    outputs: Optional[Dict[str, Any]] = Field(default=None)
+    metadata: Optional[Dict[str, Any]] = Field(default=None)
+    split: Optional[Union[str, List[str]]] = None
     attachments: Optional[Attachments] = None
+    attachments_operations: Optional[AttachmentsOperations] = None
 
 
 class DataType(str, Enum):
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index bef79a594..33eec0f46 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -22,8 +22,10 @@
 from langsmith.client import ID_TYPE, Client
 from langsmith.evaluation import evaluate
 from langsmith.schemas import (
+    AttachmentsOperations,
     DataType,
     Example,
+    ExampleUpdateWithAttachments,
     ExampleUploadWithAttachments,
     ExampleUpsertWithAttachments,
     Run,
@@ -1392,3 +1394,307 @@ def test_examples_length_validation(langchain_client: Client) -> None:
 
     # Clean up
     langchain_client.delete_dataset(dataset_id=dataset.id)
+
+
+def test_update_example_with_attachments_operations(langchain_client: Client) -> None:
+    """Test updating an example with attachment operations."""
+    dataset_name = "__test_update_example_attachments" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name=dataset_name,
+        description="Test dataset for updating example attachments",
+    )
+
+    # Create example with attachments
+    example = ExampleUploadWithAttachments(
+        inputs={"query": "What's in this image?"},
+        outputs={"answer": "A test image"},
+        attachments={
+            "image1": ("image/png", b"fake image data 1"),
+            "image2": ("image/png", b"fake image data 2"),
+        },
+    )
+    created_example = langchain_client.upload_examples_multipart(
+        dataset_id=dataset.id, uploads=[example]
+    )
+
+    # Update example with attachment operations to rename and retain attachments
+    attachments_operations = AttachmentsOperations(
+        rename={"image1": "renamed_image"},
+        retain=["image2"],  # Only keep the renamed image1, drop image2
+    )
+
+    langchain_client.update_example(
+        example_id=created_example.id,
+        attachments_operations=attachments_operations,
+    )
+
+    # Verify the update
+    retrieved_example = langchain_client.read_example(
+        example_id=created_example.id,
+    )
+
+    # Check that only the renamed attachment exists
+    assert len(retrieved_example.attachments_info) == 2
+    assert "renamed_image" in retrieved_example.attachments_info
+    assert "image2" in retrieved_example.attachments_info
+    assert "image1" not in retrieved_example.attachments_info
+    assert (
+        retrieved_example.attachments_info["image2"]["reader"].read()
+        == b"fake image data 2"
+    )
+    assert (
+        retrieved_example.attachments_info["renamed_image"]["reader"].read()
+        == b"fake image data 1"
+    )
+
+    # Clean up
+    langchain_client.delete_dataset(dataset_id=dataset.id)
+
+
+def test_bulk_update_examples_with_attachments_operations(
+    langchain_client: Client,
+) -> None:
+    """Test bulk updating examples with attachment operations."""
+    dataset_name = "__test_bulk_update_attachments" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name=dataset_name,
+        description="Test dataset for bulk updating example attachments",
+    )
+
+    # Create two examples with attachments
+    example1 = ExampleUploadWithAttachments(
+        inputs={"query": "What's in this image?"},
+        outputs={"answer": "A test image 1"},
+        attachments={
+            "image1": ("image/png", b"fake image data 1"),
+            "extra": ("text/plain", b"extra data"),
+        },
+    )
+    example2 = ExampleUploadWithAttachments(
+        inputs={"query": "What's in this image?"},
+        outputs={"answer": "A test image 2"},
+        attachments={
+            "image2": ("image/png", b"fake image data 2"),
+            "extra": ("text/plain", b"extra data"),
+        },
+    )
+
+    created_examples = langchain_client.upload_examples_multipart(
+        dataset_id=dataset.id,
+        uploads=[example1, example2],
+    )
+    example_ids = [ex.id for ex in created_examples]
+
+    # Update both examples with different attachment operations
+    attachments_operations = [
+        AttachmentsOperations(
+            rename={"image1": "renamed_image1"},
+        ),
+        AttachmentsOperations(retain=["extra"]),
+    ]
+
+    langchain_client.update_examples(
+        example_ids=example_ids,
+        attachments_operations=attachments_operations,
+    )
+
+    # Verify the updates
+    updated_examples = list(
+        langchain_client.list_examples(
+            dataset_id=dataset.id,
+            example_ids=example_ids,
+            include_attachments=True,
+        )
+    )
+
+    # Check first example
+    assert len(updated_examples[0].attachments) == 1
+    assert "renamed_image1" in updated_examples[0].attachments
+    assert "extra" not in updated_examples[0].attachments
+
+    # Check second example
+    assert len(updated_examples[1].attachments) == 1
+    assert "extra" in updated_examples[1].attachments
+    assert "image2" not in updated_examples[1].attachments
+
+    # Check attachment data
+    assert (
+        updated_examples[0].attachments["renamed_image1"][1].read()
+        == b"fake image data 1"
+    )
+    assert updated_examples[1].attachments["extra"][1].read() == b"extra data"
+
+    # Clean up
+    langchain_client.delete_dataset(dataset_id=dataset.id)
+
+
+def test_update_examples_multipart(langchain_client: Client) -> None:
+    """Test updating examples with attachments via multipart endpoint."""
+    dataset_name = "__test_update_examples_multipart" + uuid4().hex[:4]
+    if langchain_client.has_dataset(dataset_name=dataset_name):
+        langchain_client.delete_dataset(dataset_name=dataset_name)
+
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for multipart example updates",
+        data_type=DataType.kv,
+    )
+
+    # First create some examples with attachments
+    example_1 = ExampleUploadWithAttachments(
+        inputs={"text": "hello world"},
+        attachments={
+            "file1": ("text/plain", b"original content 1"),
+            "file2": ("text/plain", b"original content 2"),
+        },
+    )
+
+    example_2 = ExampleUploadWithAttachments(
+        inputs={"text": "second example"},
+        attachments={
+            "file3": ("text/plain", b"original content 3"),
+            "file4": ("text/plain", b"original content 4"),
+        },
+    )
+
+    created_examples = langchain_client.upload_examples_multipart(
+        dataset_id=dataset.id, uploads=[example_1, example_2]
+    )
+    assert created_examples["count"] == 2
+
+    examples = list(langchain_client.list_examples(dataset_id=dataset.id))
+    example_ids = [ex.id for ex in examples]
+
+    # Now create update operations
+    update_1 = ExampleUpdateWithAttachments(
+        id=example_ids[0],
+        inputs={"text": "updated hello world"},
+        attachments={
+            "new_file1": ("text/plain", b"new content 1"),
+        },
+        attachments_operations=AttachmentsOperations(
+            rename={"file1": "renamed_file1"},
+        ),
+    )
+
+    update_2 = ExampleUpdateWithAttachments(
+        id=example_ids[1],
+        inputs={"text": "updated second example"},
+        attachments={
+            "new_file2": ("text/plain", b"new content 2"),
+        },
+        attachments_operations=AttachmentsOperations(retain=["file3"]),
+    )
+
+    # Test updating multiple examples at once
+    updated_examples = langchain_client.update_examples_multipart(
+        dataset_id=dataset.id, updates=[update_1, update_2]
+    )
+    assert updated_examples["count"] == 2
+
+    # Verify the updates
+    updated = list(
+        langchain_client.list_examples(
+            dataset_id=dataset.id,
+            include_attachments=True,
+        )
+    )
+
+    # Verify first example updates
+    example_1_updated = next(ex for ex in updated if ex.id == example_ids[0])
+    assert example_1_updated.inputs["text"] == "updated hello world"
+    assert "renamed_file1" in example_1_updated.attachments_info
+    assert "new_file1" in example_1_updated.attachments_info
+    assert "file2" not in example_1_updated.attachments_info
+    assert (
+        example_1_updated.attachments_info["renamed_file1"]["reader"].read()
+        == b"original content 1"
+    )
+    assert (
+        example_1_updated.attachments_info["new_file1"]["reader"].read()
+        == b"new content 1"
+    )
+
+    # Verify second example updates
+    example_2_updated = next(ex for ex in updated if ex.id == example_ids[1])
+    assert example_2_updated.inputs["text"] == "updated second example"
+    assert "file3" in example_2_updated.attachments_info
+    assert "new_file2" in example_2_updated.attachments_info
+    assert "file4" not in example_2_updated.attachments_info
+    assert (
+        example_2_updated.attachments_info["file3"]["reader"].read()
+        == b"original content 3"
+    )
+    assert (
+        example_2_updated.attachments_info["new_file2"]["reader"].read()
+        == b"new content 2"
+    )
+
+    # Test updating examples in different datasets fails
+    other_dataset = langchain_client.create_dataset(
+        dataset_name=dataset_name + "_other",
+        description="Other test dataset",
+    )
+    with pytest.raises(ValueError, match="All examples must be in the same dataset"):
+        langchain_client.update_examples_multipart(
+            dataset_id=dataset.id,
+            updates=[
+                ExampleUpsertWithAttachments(
+                    id=example_ids[0],
+                    inputs={"text": "update 1"},
+                ),
+                ExampleUpsertWithAttachments(
+                    id=uuid4(),
+                    inputs={"text": "update 2"},
+                ),
+            ],
+        )
+
+    # Test updating non-existent example fails
+    with pytest.raises(LangSmithNotFoundError):
+        langchain_client.update_examples_multipart(
+            dataset_id=dataset.id,
+            updates=[
+                ExampleUpsertWithAttachments(
+                    id=uuid4(),
+                    inputs={"text": "should fail"},
+                )
+            ],
+        )
+
+    # Test updating with mismatch named attachments fails
+    with pytest.raises(ValueError):
+        langchain_client.update_examples_multipart(
+            dataset_id=dataset.id,
+            updates=[
+                ExampleUpdateWithAttachments(
+                    id=example_ids[0],
+                    attachments={
+                        "renamed_file1": ("text/plain", b"new content 1"),
+                    },
+                    attachments_operations=AttachmentsOperations(
+                        retain=["renamed_file1"],
+                    ),
+                )
+            ],
+        )
+
+    with pytest.raises(ValueError):
+        langchain_client.update_examples_multipart(
+            dataset_id=dataset.id,
+            updates=[
+                ExampleUpdateWithAttachments(
+                    id=example_ids[0],
+                    attachments={
+                        "foo": ("text/plain", b"new content 1"),
+                    },
+                    attachments_operations=AttachmentsOperations(
+                        rename={"renamed_file1": "foo"},
+                    ),
+                )
+            ],
+        )
+
+    # Clean up
+    langchain_client.delete_dataset(dataset_id=dataset.id)
+    langchain_client.delete_dataset(dataset_id=other_dataset.id)

From c841ec6528513a3bf124c2061940a05975abcb53 Mon Sep 17 00:00:00 2001
From: Isaac Francisco <78627776+isahers1@users.noreply.github.com>
Date: Tue, 10 Dec 2024 12:29:22 -0800
Subject: [PATCH 73/88] add attachments to evaluate (#1237)

---
 python/langsmith/evaluation/_arunner.py       |   5 +-
 python/langsmith/evaluation/_runner.py        |  28 ++-
 python/langsmith/evaluation/evaluator.py      |  11 +-
 python/tests/integration_tests/test_client.py | 227 +++++++++++++++++-
 .../unit_tests/evaluation/test_runner.py      | 154 ++++++++++++
 5 files changed, 408 insertions(+), 17 deletions(-)

diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 59fe06caf..7cee6bcf5 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -37,6 +37,7 @@
     DATA_T,
     EVALUATOR_T,
     ExperimentResultRow,
+    _evaluators_include_attachments,
     _ExperimentManagerMixin,
     _extract_feedback_keys,
     _ForwardResults,
@@ -259,6 +260,7 @@ async def aevaluate(
         ... )  # doctest: +ELLIPSIS
         View the evaluation results for experiment:...
 
+
     .. versionchanged:: 0.2.0
 
         'max_concurrency' default updated from None (no limit on concurrency)
@@ -476,7 +478,8 @@ async def _aevaluate(
         description=description,
         num_repetitions=num_repetitions,
         runs=runs,
-        include_attachments=_include_attachments(target),
+        include_attachments=_include_attachments(target)
+        or _evaluators_include_attachments(evaluators),
         upload_results=upload_results,
     ).astart()
     cache_dir = ls_utils.get_cache_dir(None)
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 836aaabe4..ddbd9bf18 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1064,7 +1064,8 @@ def _evaluate(
         # If provided, we don't need to create a new experiment.
         runs=runs,
         # Create or resolve the experiment.
-        include_attachments=_include_attachments(target),
+        include_attachments=_include_attachments(target)
+        or _evaluators_include_attachments(evaluators),
         upload_results=upload_results,
     ).start()
     cache_dir = ls_utils.get_cache_dir(None)
@@ -1913,7 +1914,30 @@ def _ensure_traceable(
     return fn
 
 
-def _include_attachments(target: Any) -> bool:
+def _evaluators_include_attachments(
+    evaluators: Optional[Sequence[Union[EVALUATOR_T, AEVALUATOR_T]]],
+) -> bool:
+    if evaluators is None:
+        return False
+    return any(
+        any(
+            p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+            and p.name == "attachments"
+            for p in (
+                inspect.signature(
+                    e.__call__ if hasattr(e, "__call__") else e
+                ).parameters.values()
+                if callable(e) or hasattr(e, "__call__")
+                else []
+            )
+        )
+        for e in evaluators
+    )
+
+
+def _include_attachments(
+    target: Any,
+) -> bool:
     """Whether the target function accepts attachments."""
     if _is_langchain_runnable(target) or not callable(target):
         return False
diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py
index 02fab3b71..a1505699a 100644
--- a/python/langsmith/evaluation/evaluator.py
+++ b/python/langsmith/evaluation/evaluator.py
@@ -624,7 +624,14 @@ def _normalize_evaluator_func(
     Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT],
     Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]],
 ]:
-    supported_args = ("run", "example", "inputs", "outputs", "reference_outputs")
+    supported_args = (
+        "run",
+        "example",
+        "inputs",
+        "outputs",
+        "reference_outputs",
+        "attachments",
+    )
     sig = inspect.signature(func)
     positional_args = [
         pname
@@ -659,6 +666,7 @@ async def awrapper(
                     "example": example,
                     "inputs": example.inputs if example else {},
                     "outputs": run.outputs or {},
+                    "attachments": example.attachments or {} if example else {},
                     "reference_outputs": example.outputs or {} if example else {},
                 }
                 args = (arg_map[arg] for arg in positional_args)
@@ -679,6 +687,7 @@ def wrapper(run: Run, example: Example) -> _RUNNABLE_OUTPUT:
                     "example": example,
                     "inputs": example.inputs if example else {},
                     "outputs": run.outputs or {},
+                    "attachments": example.attachments or {},
                     "reference_outputs": example.outputs or {} if example else {},
                 }
                 args = (arg_map[arg] for arg in positional_args)
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 33eec0f46..f72a2ebdf 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -20,7 +20,7 @@
 from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor
 
 from langsmith.client import ID_TYPE, Client
-from langsmith.evaluation import evaluate
+from langsmith.evaluation import aevaluate, evaluate
 from langsmith.schemas import (
     AttachmentsOperations,
     DataType,
@@ -1215,9 +1215,6 @@ def create_encoder(*args, **kwargs):
         assert not caplog.records
 
 
-@pytest.mark.skip(
-    reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first"
-)
 def test_list_examples_attachments_keys(langchain_client: Client) -> None:
     """Test list_examples returns same keys with and without attachments."""
     dataset_name = "__test_list_examples_attachments" + uuid4().hex[:4]
@@ -1256,6 +1253,7 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
 def test_evaluate_with_attachments(langchain_client: Client) -> None:
     """Test evaluating examples with attachments."""
     dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]
+
     # 1. Create dataset
     dataset = langchain_client.create_dataset(
         dataset_name,
@@ -1274,37 +1272,89 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None:
 
     langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
 
-    # 3. Define target function that uses attachments
     def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
         # Verify we receive the attachment data
         assert "image" in attachments
+        assert "presigned_url" in attachments["image"]
         image_data = attachments["image"]["reader"]
         assert image_data.read() == b"fake image data for testing"
         return {"answer": "test image"}
 
-    # 4. Define simple evaluator
-    def evaluator(run: Run, example: Example) -> Dict[str, Any]:
+    def evaluator(
+        outputs: dict, reference_outputs: dict, attachments: dict
+    ) -> Dict[str, Any]:
+        assert "image" in attachments
+        assert "presigned_url" in attachments["image"]
+        image_data = attachments["image"]["reader"]
+        assert image_data.read() == b"fake image data for testing"
         return {
             "score": float(
-                run.outputs.get("answer") == example.outputs.get("answer")  # type: ignore
+                reference_outputs.get("answer") == outputs.get("answer")  # type: ignore
             )
         }
 
-    # 5. Run evaluation
-    results = evaluate(
+    results = langchain_client.evaluate(
+        target,
+        data=dataset_name,
+        evaluators=[evaluator],
+        num_repetitions=2,
+    )
+
+    assert len(results) == 2
+    for result in results:
+        assert result["evaluation_results"]["results"][0].score == 1.0
+
+    langchain_client.delete_dataset(dataset_name=dataset_name)
+
+
+def test_evaluate_with_attachments_not_in_target(langchain_client: Client) -> None:
+    """Test evaluating examples with attachments."""
+    dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for evals with attachments",
+        data_type=DataType.kv,
+    )
+
+    example = ExampleUploadWithAttachments(
+        dataset_id=dataset.id,
+        inputs={"question": "What is shown in the image?"},
+        outputs={"answer": "test image"},
+        attachments={
+            "image": ("image/png", b"fake image data for testing"),
+        },
+    )
+
+    langchain_client.upload_examples_multipart(uploads=[example])
+
+    def target(inputs: Dict[str, Any]) -> Dict[str, Any]:
+        # Verify we receive the attachment data
+        return {"answer": "test image"}
+
+    def evaluator(
+        outputs: dict, reference_outputs: dict, attachments: dict
+    ) -> Dict[str, Any]:
+        assert "image" in attachments
+        assert "presigned_url" in attachments["image"]
+        image_data = attachments["image"]["reader"]
+        assert image_data.read() == b"fake image data for testing"
+        return {
+            "score": float(
+                reference_outputs.get("answer") == outputs.get("answer")  # type: ignore
+            )
+        }
+
+    results = langchain_client.evaluate(
         target,
         data=dataset_name,
         evaluators=[evaluator],
-        client=langchain_client,
         num_repetitions=2,
     )
 
-    # 6. Verify results
     assert len(results) == 2
     for result in results:
         assert result["evaluation_results"]["results"][0].score == 1.0
 
-    # Cleanup
     langchain_client.delete_dataset(dataset_name=dataset_name)
 
 
@@ -1355,6 +1405,157 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]:
     langchain_client.delete_dataset(dataset_name=dataset_name)
 
 
+async def test_aevaluate_with_attachments(langchain_client: Client) -> None:
+    """Test evaluating examples with attachments."""
+    dataset_name = "__test_aevaluate_attachments" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for evals with attachments",
+        data_type=DataType.kv,
+    )
+
+    example = ExampleUploadWithAttachments(
+        inputs={"question": "What is shown in the image?"},
+        outputs={"answer": "test image"},
+        attachments={
+            "image": ("image/png", b"fake image data for testing"),
+        },
+    )
+
+    langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
+
+    async def target(
+        inputs: Dict[str, Any], attachments: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        # Verify we receive the attachment data
+        assert "image" in attachments
+        assert "presigned_url" in attachments["image"]
+        image_data = attachments["image"]["reader"]
+        assert image_data.read() == b"fake image data for testing"
+        return {"answer": "test image"}
+
+    async def evaluator(
+        outputs: dict, reference_outputs: dict, attachments: dict
+    ) -> Dict[str, Any]:
+        assert "image" in attachments
+        assert "presigned_url" in attachments["image"]
+        image_data = attachments["image"]["reader"]
+        assert image_data.read() == b"fake image data for testing"
+        return {
+            "score": float(
+                reference_outputs.get("answer") == outputs.get("answer")  # type: ignore
+            )
+        }
+
+    results = await langchain_client.aevaluate(
+        target, data=dataset_name, evaluators=[evaluator], num_repetitions=2
+    )
+
+    assert len(results) == 2
+    async for result in results:
+        assert result["evaluation_results"]["results"][0].score == 1.0
+
+    langchain_client.delete_dataset(dataset_name=dataset_name)
+
+
+async def test_aevaluate_with_attachments_not_in_target(
+    langchain_client: Client,
+) -> None:
+    """Test evaluating examples with attachments."""
+    dataset_name = "__test_aevaluate_attachments" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for evals with attachments",
+        data_type=DataType.kv,
+    )
+
+    example = ExampleUploadWithAttachments(
+        inputs={"question": "What is shown in the image?"},
+        outputs={"answer": "test image"},
+        attachments={
+            "image": ("image/png", b"fake image data for testing"),
+        },
+    )
+
+    langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
+
+    async def target(inputs: Dict[str, Any]) -> Dict[str, Any]:
+        # Verify we receive the attachment data
+        return {"answer": "test image"}
+
+    async def evaluator(
+        outputs: dict, reference_outputs: dict, attachments: dict
+    ) -> Dict[str, Any]:
+        assert "image" in attachments
+        assert "presigned_url" in attachments["image"]
+        image_data = attachments["image"]["reader"]
+        assert image_data.read() == b"fake image data for testing"
+        return {
+            "score": float(
+                reference_outputs.get("answer") == outputs.get("answer")  # type: ignore
+            )
+        }
+
+    results = await langchain_client.aevaluate(
+        target, data=dataset_name, evaluators=[evaluator], num_repetitions=2
+    )
+
+    assert len(results) == 2
+    async for result in results:
+        assert result["evaluation_results"]["results"][0].score == 1.0
+
+    langchain_client.delete_dataset(dataset_name=dataset_name)
+
+
+async def test_aevaluate_with_no_attachments(langchain_client: Client) -> None:
+    """Test evaluating examples without attachments using a target with attachments."""
+    dataset_name = "__test_aevaluate_no_attachments" + uuid4().hex[:4]
+    dataset = langchain_client.create_dataset(
+        dataset_name,
+        description="Test dataset for evals without attachments",
+        data_type=DataType.kv,
+    )
+
+    # Create example using old way, attachments should be set to {}
+    langchain_client.create_example(
+        dataset_id=dataset.id,
+        inputs={"question": "What is 2+2?"},
+        outputs={"answer": "4"},
+    )
+
+    # Verify we can create example the new way without attachments
+    example = ExampleUploadWithAttachments(
+        inputs={"question": "What is 3+1?"},
+        outputs={"answer": "4"},
+    )
+    langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
+
+    async def target(
+        inputs: Dict[str, Any], attachments: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        # Verify we receive an empty attachments dict
+        assert isinstance(attachments, dict)
+        assert len(attachments) == 0
+        return {"answer": "4"}
+
+    async def evaluator(run: Run, example: Example) -> Dict[str, Any]:
+        return {
+            "score": float(
+                run.outputs.get("answer") == example.outputs.get("answer")  # type: ignore
+            )
+        }
+
+    results = await aevaluate(
+        target, data=dataset_name, evaluators=[evaluator], client=langchain_client
+    )
+
+    assert len(results) == 2
+    async for result in results:
+        assert result["evaluation_results"]["results"][0].score == 1.0
+
+    langchain_client.delete_dataset(dataset_name=dataset_name)
+
+
 def test_examples_length_validation(langchain_client: Client) -> None:
     """Test that mismatched lengths raise ValueError for create and update examples."""
     dataset_name = "__test_examples_length_validation" + uuid4().hex[:4]
diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py
index 04b269100..e33d07fd5 100644
--- a/python/tests/unit_tests/evaluation/test_runner.py
+++ b/python/tests/unit_tests/evaluation/test_runner.py
@@ -242,6 +242,14 @@ def score_unpacked_inputs_outputs_reference(inputs, outputs, reference_outputs):
         ordering_of_stuff.append("evaluate")
         return {"score": reference_outputs["answer"]}
 
+    def score_unpacked_inputs_outputs_attachments(inputs, outputs, attachments):
+        ordering_of_stuff.append("evaluate")
+        return {"score": outputs["output"]}
+
+    def score_unpacked_outputs(outputs):
+        ordering_of_stuff.append("evaluate")
+        return {"score": outputs["output"]}
+
     def eval_float(run, example):
         ordering_of_stuff.append("evaluate")
         return 0.2
@@ -270,6 +278,8 @@ def summary_eval_outputs_reference(outputs, reference_outputs):
         score_value_first,
         score_unpacked_inputs_outputs,
         score_unpacked_inputs_outputs_reference,
+        score_unpacked_inputs_outputs_attachments,
+        score_unpacked_outputs,
         eval_float,
         eval_str,
         eval_list,
@@ -524,6 +534,14 @@ async def score_unpacked_inputs_outputs_reference(
         ordering_of_stuff.append("evaluate")
         return {"score": reference_outputs["answer"]}
 
+    async def score_unpacked_inputs_outputs_attachments(inputs, outputs, attachments):
+        ordering_of_stuff.append("evaluate")
+        return {"score": outputs["output"]}
+
+    async def score_unpacked_outputs(outputs):
+        ordering_of_stuff.append("evaluate")
+        return {"score": outputs["output"]}
+
     async def eval_float(run, example):
         ordering_of_stuff.append("evaluate")
         return 0.2
@@ -552,6 +570,8 @@ def summary_eval_outputs_reference(outputs, reference_outputs):
         score_value_first,
         score_unpacked_inputs_outputs,
         score_unpacked_inputs_outputs_reference,
+        score_unpacked_inputs_outputs_attachments,
+        score_unpacked_outputs,
         eval_float,
         eval_str,
         eval_list,
@@ -803,6 +823,140 @@ def test_include_attachments(target, expected, error_msg, is_async):
         assert result == expected
 
 
+def valid_single_supported(inputs, *, optional=None):
+    return {"score": 1}
+
+
+async def valid_single_supported_async(inputs, *, optional=None):
+    return {"score": 1}
+
+
+def valid_two_arbitrary(foo, bar, *, optional=None):
+    return {"score": 1}
+
+
+async def valid_two_arbitrary_async(foo, bar, *, optional=None):
+    return {"score": 1}
+
+
+def valid_multiple_supported(inputs, outputs, reference_outputs, *, optional=None):
+    return {"score": 1}
+
+
+async def valid_multiple_supported_async(
+    inputs, outputs, reference_outputs, *, optional=None
+):
+    return {"score": 1}
+
+
+def invalid_single_unsupported(foo, *, optional=None):
+    return {"score": 1}
+
+
+async def invalid_single_unsupported_async(foo, *, optional=None):
+    return {"score": 1}
+
+
+def invalid_three_args(inputs, outputs, foo, *, optional=None):
+    return {"score": 1}
+
+
+async def invalid_three_args_async(inputs, outputs, foo, *, optional=None):
+    return {"score": 1}
+
+
+def invalid_no_positional(*, inputs, outputs, optional=None):
+    return {"score": 1}
+
+
+async def invalid_no_positional_async(*, inputs, outputs, optional=None):
+    return {"score": 1}
+
+
+# Test cases that should succeed
+VALID_EVALUATOR_CASES = [
+    (valid_single_supported, False),
+    (valid_single_supported_async, True),
+    (valid_two_arbitrary, False),
+    (valid_two_arbitrary_async, True),
+    (valid_multiple_supported, False),
+    (valid_multiple_supported_async, True),
+]
+
+# Test cases that should raise ValueError
+INVALID_EVALUATOR_CASES = [
+    (invalid_single_unsupported, False),
+    (invalid_single_unsupported_async, True),
+    (invalid_three_args, False),
+    (invalid_three_args_async, True),
+    (invalid_no_positional, False),
+    (invalid_no_positional_async, True),
+]
+
+
+def target(inputs, attachments):
+    return {"foo": "bar"}
+
+
+async def atarget(inputs, attachments):
+    return {"foo": "bar"}
+
+
+@pytest.mark.parametrize("func,is_async", VALID_EVALUATOR_CASES)
+def test_normalize_evaluator_func_valid(func, is_async):
+    """Test _normalize_evaluator_func succeeds."""
+    func = _normalize_evaluator_func(func)
+    session = mock.Mock()
+    ds_name = "my-dataset"
+    ds_id = "00886375-eb2a-4038-9032-efff60309896"
+
+    ds_example_responses = [_create_example(i) for i in range(10)]
+    ds_examples = [e[0] for e in ds_example_responses]
+    tenant_id = str(uuid.uuid4())
+    fake_request = FakeRequest(
+        ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id
+    )
+    session.request = fake_request.request
+    client = Client(api_url="http://localhost:1984", api_key="123", session=session)
+    client._tenant_id = tenant_id  # type: ignore
+
+    if is_async:
+        asyncio.run(
+            aevaluate(atarget, data=ds_examples, evaluators=[func], client=client)
+        )
+    else:
+        evaluate(target, data=ds_examples, evaluators=[func], client=client)
+
+
+@pytest.mark.parametrize("func,is_async", INVALID_EVALUATOR_CASES)
+def test_normalize_evaluator_func_invalid(func, is_async):
+    """Test _normalize_evaluator_func fails correctly."""
+    with pytest.raises(ValueError, match="Invalid evaluator function"):
+        _normalize_evaluator_func(func)
+
+    session = mock.Mock()
+    ds_name = "my-dataset"
+    ds_id = "00886375-eb2a-4038-9032-efff60309896"
+
+    ds_example_responses = [_create_example(i) for i in range(10)]
+    ds_examples = [e[0] for e in ds_example_responses]
+    tenant_id = str(uuid.uuid4())
+    fake_request = FakeRequest(
+        ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id
+    )
+    session.request = fake_request.request
+    client = Client(api_url="http://localhost:1984", api_key="123", session=session)
+    client._tenant_id = tenant_id  # type: ignore
+
+    with pytest.raises(ValueError, match="Invalid evaluator function"):
+        if is_async:
+            asyncio.run(
+                aevaluate(atarget, data=ds_examples, evaluators=[func], client=client)
+            )
+        else:
+            evaluate(target, data=ds_examples, evaluators=[func], client=client)
+
+
 def summary_eval_runs_examples(runs_, examples_):
     return {"score": len(runs_[0].dotted_order)}
 

From f3cc56f88f50fb907a0a83b271a88f93bd33065f Mon Sep 17 00:00:00 2001
From: Ankush Gola <ankush.gola@gmail.com>
Date: Tue, 10 Dec 2024 12:33:24 -0800
Subject: [PATCH 74/88] update to 0.2.2

---
 python/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index a831ff0df..5b008c34d 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langsmith"
-version = "0.2.1"
+version = "0.2.2"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 authors = ["LangChain <support@langchain.dev>"]
 license = "MIT"

From bf00aa6063900306baa774d2417c378b48d8076e Mon Sep 17 00:00:00 2001
From: Ankush Gola <ankush.gola@gmail.com>
Date: Tue, 10 Dec 2024 12:50:47 -0800
Subject: [PATCH 75/88] fix spelling

---
 python/langsmith/client.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index dcb74ec7c..b6e366f45 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -3462,7 +3462,7 @@ def create_example_from_run(
             created_at=created_at,
         )
 
-    def _prepate_multipart_data(
+    def _prepare_multipart_data(
         self,
         examples: Union[
             List[ls_schemas.ExampleUploadWithAttachments]
@@ -3635,7 +3635,7 @@ def update_examples_multipart(
         if updates is None:
             updates = []
 
-        encoder, data = self._prepate_multipart_data(updates, include_dataset_id=False)
+        encoder, data = self._prepare_multipart_data(updates, include_dataset_id=False)
 
         response = self.request_with_retries(
             "PATCH",
@@ -3666,7 +3666,7 @@ def upload_examples_multipart(
             )
         if uploads is None:
             uploads = []
-        encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False)
+        encoder, data = self._prepare_multipart_data(uploads, include_dataset_id=False)
 
         response = self.request_with_retries(
             "POST",
@@ -3702,7 +3702,7 @@ def upsert_examples_multipart(
         if upserts is None:
             upserts = []
 
-        encoder, data = self._prepate_multipart_data(upserts, include_dataset_id=True)
+        encoder, data = self._prepare_multipart_data(upserts, include_dataset_id=True)
 
         response = self.request_with_retries(
             "POST",

From c63b92c43c2718840e44933d2ba69864513794d4 Mon Sep 17 00:00:00 2001
From: Ankush Gola <ankush.gola@gmail.com>
Date: Tue, 10 Dec 2024 12:53:22 -0800
Subject: [PATCH 76/88] fix update_examples issue

---
 python/langsmith/client.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
index b6e366f45..a92a89659 100644
--- a/python/langsmith/client.py
+++ b/python/langsmith/client.py
@@ -4258,6 +4258,7 @@ def update_examples(
                 metadata or [None] * len(example_ids),
                 splits or [None] * len(example_ids),
                 dataset_ids or [None] * len(example_ids),
+                attachments_operations or [None] * len(example_ids),
             )
         ]
         response = self.request_with_retries(

From 76e003e17c7eedac3e8f6026bc8ddbeb4abb1d49 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 10 Dec 2024 12:57:14 -0800
Subject: [PATCH 77/88] fix test

---
 python/tests/integration_tests/test_client.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index f72a2ebdf..c9c6f6587 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1220,13 +1220,17 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
     dataset_name = "__test_list_examples_attachments" + uuid4().hex[:4]
     dataset = langchain_client.create_dataset(dataset_name=dataset_name)
 
-    langchain_client.create_example(
-        inputs={"text": "hello world"},
-        outputs={"response": "hi there"},
+    langchain_client.upload_examples_multipart(
         dataset_id=dataset.id,
-        attachments={
-            "test_file": ("text/plain", b"test content"),
-        },
+        uploads=[
+            ExampleUploadWithAttachments(
+                inputs={"text": "hello world"},
+                outputs={"response": "hi there"},
+                attachments={
+                    "test_file": ("text/plain", b"test content"),
+                },
+            )
+        ]
     )
 
     # Get examples with attachments

From ed73f1a54783e425314ef557f687fffc4ce4e058 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 10 Dec 2024 13:18:38 -0800
Subject: [PATCH 78/88] test fix

---
 python/langsmith/evaluation/_arunner.py | 15 ++++++++-
 python/langsmith/evaluation/_runner.py  | 45 ++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 7cee6bcf5..311178576 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -47,6 +47,7 @@
     _load_experiment,
     _load_tqdm,
     _load_traces,
+    _make_fresh_examples,
     _resolve_data,
     _resolve_evaluators,
     _resolve_experiment,
@@ -569,8 +570,12 @@ async def aget_examples(self) -> AsyncIterator[schemas.Example]:
                 include_attachments=self._include_attachments,
             )
             if self._num_repetitions > 1:
+                examples_list = [example async for example in self._examples]
                 self._examples = async_chain_from_iterable(
-                    aitertools.atee(self._examples, self._num_repetitions)
+                    [
+                        async_iter_from_list(_make_fresh_examples(examples_list))
+                        for _ in range(self._num_repetitions)
+                    ]
                 )
 
         self._examples, examples_iter = aitertools.atee(
@@ -1115,3 +1120,11 @@ async def async_chain_from_iterable(
     for sub_iterable in iterable:
         async for item in sub_iterable:
             yield item
+
+
+async def async_iter_from_list(
+    examples: List[schemas.Example],
+) -> AsyncIterable[schemas.Example]:
+    """Convert a list of examples to an async iterable."""
+    for example in examples:
+        yield example
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index ddbd9bf18..f78523718 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -8,6 +8,7 @@
 import datetime
 import functools
 import inspect
+import io
 import itertools
 import logging
 import pathlib
@@ -36,6 +37,7 @@
     cast,
 )
 
+import requests
 from typing_extensions import TypedDict, overload
 
 import langsmith
@@ -1341,8 +1343,10 @@ def examples(self) -> Iterable[schemas.Example]:
                 include_attachments=self._include_attachments,
             )
             if self._num_repetitions > 1:
+                examples_list = list(self._examples)
                 self._examples = itertools.chain.from_iterable(
-                    itertools.tee(self._examples, self._num_repetitions)
+                    _make_fresh_examples(examples_list)
+                    for _ in range(self._num_repetitions)
                 )
         self._examples, examples_iter = itertools.tee(self._examples)
         return examples_iter
@@ -2221,3 +2225,42 @@ def _import_langchain_runnable() -> Optional[type]:
 
 def _is_langchain_runnable(o: Any) -> bool:
     return bool((Runnable := _import_langchain_runnable()) and isinstance(o, Runnable))
+
+
+def _reset_example_attachments(example: schemas.Example) -> schemas.Example:
+    """Reset attachment readers for an example."""
+    if not hasattr(example, "attachments") or not example.attachments:
+        return example
+
+    new_attachments = {}
+    for key, attachment in example.attachments.items():
+        response = requests.get(attachment["presigned_url"], stream=True)
+        response.raise_for_status()
+        reader = io.BytesIO(response.content)
+        new_attachments[key] = {
+            "presigned_url": attachment["presigned_url"],
+            "reader": reader,
+        }
+
+    # Create a new Example instance with the updated attachments
+    return schemas.Example(
+        id=example.id,
+        created_at=example.created_at,
+        dataset_id=example.dataset_id,
+        inputs=example.inputs,
+        outputs=example.outputs,
+        metadata=example.metadata,
+        modified_at=example.modified_at,
+        runs=example.runs,
+        source_run_id=example.source_run_id,
+        attachments=new_attachments,
+        _host_url=example._host_url,
+        _tenant_id=example._tenant_id,
+    )
+
+
+def _make_fresh_examples(
+    _original_examples: List[schemas.Example],
+) -> List[schemas.Example]:
+    """Create fresh copies of examples with reset readers."""
+    return [_reset_example_attachments(example) for example in _original_examples]

From 4887a99de954f601df326a26fc05c9100ae876e1 Mon Sep 17 00:00:00 2001
From: Ankush Gola <ankush.gola@gmail.com>
Date: Tue, 10 Dec 2024 13:24:11 -0800
Subject: [PATCH 79/88] attempt to fix test_update_examples_multipart

---
 python/tests/integration_tests/test_client.py | 58 ++++++-------------
 1 file changed, 19 insertions(+), 39 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index c9c6f6587..904a3c028 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1778,7 +1778,7 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
             "new_file1": ("text/plain", b"new content 1"),
         },
         attachments_operations=AttachmentsOperations(
-            rename={"file1": "renamed_file1"},
+            retain=["file1"],
         ),
     )
 
@@ -1807,60 +1807,41 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
 
     # Verify first example updates
     example_1_updated = next(ex for ex in updated if ex.id == example_ids[0])
+    print(example_1_updated.attachments)
     assert example_1_updated.inputs["text"] == "updated hello world"
-    assert "renamed_file1" in example_1_updated.attachments_info
-    assert "new_file1" in example_1_updated.attachments_info
-    assert "file2" not in example_1_updated.attachments_info
-    assert (
-        example_1_updated.attachments_info["renamed_file1"]["reader"].read()
-        == b"original content 1"
-    )
-    assert (
-        example_1_updated.attachments_info["new_file1"]["reader"].read()
-        == b"new content 1"
-    )
+    assert "new_file1" in example_1_updated.attachments
+    assert "file1" in example_1_updated.attachments
+    assert "file2" not in example_1_updated.attachments
+    # assert (
+    #     example_1_updated.attachments["renamed_file1"]["reader"].read()
+    #     == b"original content 1"
+    # )
+    # assert (
+    #     example_1_updated.attachments["new_file1"]["reader"].read()
+    #     == b"new content 1"
+    # )
 
     # Verify second example updates
     example_2_updated = next(ex for ex in updated if ex.id == example_ids[1])
     assert example_2_updated.inputs["text"] == "updated second example"
-    assert "file3" in example_2_updated.attachments_info
-    assert "new_file2" in example_2_updated.attachments_info
-    assert "file4" not in example_2_updated.attachments_info
+    assert "file3" in example_2_updated.attachments
+    assert "new_file2" in example_2_updated.attachments
+    assert "file4" not in example_2_updated.attachments
     assert (
-        example_2_updated.attachments_info["file3"]["reader"].read()
+        example_2_updated.attachments["file3"]["reader"].read()
         == b"original content 3"
     )
     assert (
-        example_2_updated.attachments_info["new_file2"]["reader"].read()
+        example_2_updated.attachments["new_file2"]["reader"].read()
         == b"new content 2"
     )
 
-    # Test updating examples in different datasets fails
-    other_dataset = langchain_client.create_dataset(
-        dataset_name=dataset_name + "_other",
-        description="Other test dataset",
-    )
-    with pytest.raises(ValueError, match="All examples must be in the same dataset"):
-        langchain_client.update_examples_multipart(
-            dataset_id=dataset.id,
-            updates=[
-                ExampleUpsertWithAttachments(
-                    id=example_ids[0],
-                    inputs={"text": "update 1"},
-                ),
-                ExampleUpsertWithAttachments(
-                    id=uuid4(),
-                    inputs={"text": "update 2"},
-                ),
-            ],
-        )
-
     # Test updating non-existent example fails
     with pytest.raises(LangSmithNotFoundError):
         langchain_client.update_examples_multipart(
             dataset_id=dataset.id,
             updates=[
-                ExampleUpsertWithAttachments(
+                ExampleUpdateWithAttachments(
                     id=uuid4(),
                     inputs={"text": "should fail"},
                 )
@@ -1902,4 +1883,3 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
 
     # Clean up
     langchain_client.delete_dataset(dataset_id=dataset.id)
-    langchain_client.delete_dataset(dataset_id=other_dataset.id)

From 6b9a0268be1ec6d754e88266a9a9cbb87c2db3ae Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 10 Dec 2024 13:32:25 -0800
Subject: [PATCH 80/88] fix tests

---
 python/tests/integration_tests/test_client.py | 140 ++++++++----------
 1 file changed, 65 insertions(+), 75 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index c9c6f6587..696d6ebaf 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -64,7 +64,7 @@ def langchain_client() -> Client:
                 "dataset_examples_multipart_enabled": True,
                 "examples_multipart_enabled": True,
             }
-        }
+        },
     )
 
 
@@ -1230,7 +1230,7 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:
                     "test_file": ("text/plain", b"test content"),
                 },
             )
-        ]
+        ],
     )
 
     # Get examples with attachments
@@ -1744,9 +1744,11 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
         description="Test dataset for multipart example updates",
         data_type=DataType.kv,
     )
+    example_ids = [uuid4() for _ in range(2)]
 
     # First create some examples with attachments
     example_1 = ExampleUploadWithAttachments(
+        id=example_ids[0],
         inputs={"text": "hello world"},
         attachments={
             "file1": ("text/plain", b"original content 1"),
@@ -1755,6 +1757,7 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     )
 
     example_2 = ExampleUploadWithAttachments(
+        id=example_ids[1],
         inputs={"text": "second example"},
         attachments={
             "file3": ("text/plain", b"original content 3"),
@@ -1768,7 +1771,6 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     assert created_examples["count"] == 2
 
     examples = list(langchain_client.list_examples(dataset_id=dataset.id))
-    example_ids = [ex.id for ex in examples]
 
     # Now create update operations
     update_1 = ExampleUpdateWithAttachments(
@@ -1808,98 +1810,86 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     # Verify first example updates
     example_1_updated = next(ex for ex in updated if ex.id == example_ids[0])
     assert example_1_updated.inputs["text"] == "updated hello world"
-    assert "renamed_file1" in example_1_updated.attachments_info
-    assert "new_file1" in example_1_updated.attachments_info
-    assert "file2" not in example_1_updated.attachments_info
+    assert "renamed_file1" in example_1_updated.attachments
+    assert "new_file1" in example_1_updated.attachments
+    assert "file2" not in example_1_updated.attachments
     assert (
-        example_1_updated.attachments_info["renamed_file1"]["reader"].read()
+        example_1_updated.attachments["renamed_file1"]["reader"].read()
         == b"original content 1"
     )
     assert (
-        example_1_updated.attachments_info["new_file1"]["reader"].read()
+        example_1_updated.attachments["new_file1"]["reader"].read()
         == b"new content 1"
     )
 
     # Verify second example updates
     example_2_updated = next(ex for ex in updated if ex.id == example_ids[1])
     assert example_2_updated.inputs["text"] == "updated second example"
-    assert "file3" in example_2_updated.attachments_info
-    assert "new_file2" in example_2_updated.attachments_info
-    assert "file4" not in example_2_updated.attachments_info
+    assert "file3" in example_2_updated.attachments
+    assert "new_file2" in example_2_updated.attachments
+    assert "file4" not in example_2_updated.attachments
     assert (
-        example_2_updated.attachments_info["file3"]["reader"].read()
+        example_2_updated.attachments["file3"]["reader"].read()
         == b"original content 3"
     )
     assert (
-        example_2_updated.attachments_info["new_file2"]["reader"].read()
+        example_2_updated.attachments["new_file2"]["reader"].read()
         == b"new content 2"
     )
 
-    # Test updating examples in different datasets fails
-    other_dataset = langchain_client.create_dataset(
-        dataset_name=dataset_name + "_other",
-        description="Other test dataset",
+    # Test updating non-existent example doesn't do anything
+    response = langchain_client.update_examples_multipart(
+        dataset_id=dataset.id,
+        updates=[
+            ExampleUpdateWithAttachments(
+                id=uuid4(),
+                inputs={"text": "should fail"},
+            )
+        ],
     )
-    with pytest.raises(ValueError, match="All examples must be in the same dataset"):
-        langchain_client.update_examples_multipart(
-            dataset_id=dataset.id,
-            updates=[
-                ExampleUpsertWithAttachments(
-                    id=example_ids[0],
-                    inputs={"text": "update 1"},
-                ),
-                ExampleUpsertWithAttachments(
-                    id=uuid4(),
-                    inputs={"text": "update 2"},
-                ),
-            ],
-        )
-
-    # Test updating non-existent example fails
-    with pytest.raises(LangSmithNotFoundError):
-        langchain_client.update_examples_multipart(
-            dataset_id=dataset.id,
-            updates=[
-                ExampleUpsertWithAttachments(
-                    id=uuid4(),
-                    inputs={"text": "should fail"},
-                )
-            ],
-        )
+    assert response["count"] == 0
 
-    # Test updating with mismatch named attachments fails
-    with pytest.raises(ValueError):
-        langchain_client.update_examples_multipart(
-            dataset_id=dataset.id,
-            updates=[
-                ExampleUpdateWithAttachments(
-                    id=example_ids[0],
-                    attachments={
-                        "renamed_file1": ("text/plain", b"new content 1"),
-                    },
-                    attachments_operations=AttachmentsOperations(
-                        retain=["renamed_file1"],
-                    ),
-                )
-            ],
-        )
+    # Test new attachments have priority
+    response = langchain_client.update_examples_multipart(
+        dataset_id=dataset.id,
+        updates=[
+            ExampleUpdateWithAttachments(
+                id=example_ids[0],
+                attachments={
+                    "renamed_file1": ("text/plain", b"new content 1"),
+                },
+                attachments_operations=AttachmentsOperations(
+                    retain=["renamed_file1"],
+                ),
+            )
+        ],
+    )
+    assert response["count"] == 1
+    example_1_updated = langchain_client.read_example(example_ids[0])
+    assert list(example_1_updated.attachments.keys()) == ["renamed_file1"]
+    assert (
+        example_1_updated.attachments["renamed_file1"]["reader"].read()
+        == b"new content 1"
+    )
 
-    with pytest.raises(ValueError):
-        langchain_client.update_examples_multipart(
-            dataset_id=dataset.id,
-            updates=[
-                ExampleUpdateWithAttachments(
-                    id=example_ids[0],
-                    attachments={
-                        "foo": ("text/plain", b"new content 1"),
-                    },
-                    attachments_operations=AttachmentsOperations(
-                        rename={"renamed_file1": "foo"},
-                    ),
-                )
-            ],
-        )
+    # Test new attachments have priority
+    response = langchain_client.update_examples_multipart(
+        dataset_id=dataset.id,
+        updates=[
+            ExampleUpdateWithAttachments(
+                id=example_ids[0],
+                attachments={
+                    "foo": ("text/plain", b"new content 1"),
+                },
+                attachments_operations=AttachmentsOperations(
+                    rename={"renamed_file1": "foo"},
+                ),
+            )
+        ],
+    )
+    assert response["count"] == 1
+    example_1_updated = langchain_client.read_example(example_ids[0])
+    assert list(example_1_updated.attachments.keys()) == ["foo"]
 
     # Clean up
     langchain_client.delete_dataset(dataset_id=dataset.id)
-    langchain_client.delete_dataset(dataset_id=other_dataset.id)

From 75736919347ee77fe883c497107af68e77862c15 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 10 Dec 2024 13:37:24 -0800
Subject: [PATCH 81/88] x

---
 python/tests/integration_tests/test_client.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 7e65a8d34..1c145b600 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -59,7 +59,6 @@ def wait_for(
 def langchain_client() -> Client:
     get_env_var.cache_clear()
     return Client(
-        api_key="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",
         info={
             "instance_flags": {
                 "dataset_examples_multipart_enabled": True,

From cf85e5659c78854835fa30ac283b21c8e96b4ebd Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 10 Dec 2024 13:39:07 -0800
Subject: [PATCH 82/88] fix test

---
 python/tests/integration_tests/test_client.py | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 1c145b600..41a206f1c 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1608,9 +1608,10 @@ def test_update_example_with_attachments_operations(langchain_client: Client) ->
         dataset_name=dataset_name,
         description="Test dataset for updating example attachments",
     )
-
+    example_id = uuid4()
     # Create example with attachments
     example = ExampleUploadWithAttachments(
+        id=example_id,
         inputs={"query": "What's in this image?"},
         outputs={"answer": "A test image"},
         attachments={
@@ -1618,7 +1619,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) ->
             "image2": ("image/png", b"fake image data 2"),
         },
     )
-    created_example = langchain_client.upload_examples_multipart(
+    langchain_client.upload_examples_multipart(
         dataset_id=dataset.id, uploads=[example]
     )
 
@@ -1629,26 +1630,26 @@ def test_update_example_with_attachments_operations(langchain_client: Client) ->
     )
 
     langchain_client.update_example(
-        example_id=created_example.id,
+        example_id=example_id,
         attachments_operations=attachments_operations,
     )
 
     # Verify the update
     retrieved_example = langchain_client.read_example(
-        example_id=created_example.id,
+        example_id=example_id,
     )
 
     # Check that only the renamed attachment exists
-    assert len(retrieved_example.attachments_info) == 2
-    assert "renamed_image" in retrieved_example.attachments_info
-    assert "image2" in retrieved_example.attachments_info
-    assert "image1" not in retrieved_example.attachments_info
+    assert len(retrieved_example.attachments) == 2
+    assert "renamed_image" in retrieved_example.attachments
+    assert "image2" in retrieved_example.attachments
+    assert "image1" not in retrieved_example.attachments
     assert (
-        retrieved_example.attachments_info["image2"]["reader"].read()
+        retrieved_example.attachments["image2"]["reader"].read()
         == b"fake image data 2"
     )
     assert (
-        retrieved_example.attachments_info["renamed_image"]["reader"].read()
+        retrieved_example.attachments["renamed_image"]["reader"].read()
         == b"fake image data 1"
     )
 

From 5c7482948cea3fa60caa8930577baa0d50bb0c19 Mon Sep 17 00:00:00 2001
From: Ankush Gola <ankush.gola@gmail.com>
Date: Tue, 10 Dec 2024 13:40:32 -0800
Subject: [PATCH 83/88] fix
 test_bulk_update_examples_with_attachments_operations

---
 python/tests/integration_tests/test_client.py | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 904a3c028..be5409782 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1666,8 +1666,10 @@ def test_bulk_update_examples_with_attachments_operations(
         description="Test dataset for bulk updating example attachments",
     )
 
+    example_id1, example_id2 = uuid4(), uuid4()
     # Create two examples with attachments
     example1 = ExampleUploadWithAttachments(
+        id=example_id1,
         inputs={"query": "What's in this image?"},
         outputs={"answer": "A test image 1"},
         attachments={
@@ -1676,6 +1678,7 @@ def test_bulk_update_examples_with_attachments_operations(
         },
     )
     example2 = ExampleUploadWithAttachments(
+        id=example_id2,
         inputs={"query": "What's in this image?"},
         outputs={"answer": "A test image 2"},
         attachments={
@@ -1688,7 +1691,9 @@ def test_bulk_update_examples_with_attachments_operations(
         dataset_id=dataset.id,
         uploads=[example1, example2],
     )
-    example_ids = [ex.id for ex in created_examples]
+    assert len(created_examples["example_ids"]) == 2
+    assert str(example_id1) in created_examples["example_ids"]
+    assert str(example_id2) in created_examples["example_ids"]
 
     # Update both examples with different attachment operations
     attachments_operations = [
@@ -1699,7 +1704,7 @@ def test_bulk_update_examples_with_attachments_operations(
     ]
 
     langchain_client.update_examples(
-        example_ids=example_ids,
+        example_ids=[example_id1, example_id2],
         attachments_operations=attachments_operations,
     )
 
@@ -1707,27 +1712,29 @@ def test_bulk_update_examples_with_attachments_operations(
     updated_examples = list(
         langchain_client.list_examples(
             dataset_id=dataset.id,
-            example_ids=example_ids,
+            example_ids=[example_id1, example_id2],
             include_attachments=True,
         )
     )
 
+    updated_example_1 = next(ex for ex in updated_examples if ex.id == example_id1)
+    updated_example_2 = next(ex for ex in updated_examples if ex.id == example_id2)
     # Check first example
-    assert len(updated_examples[0].attachments) == 1
-    assert "renamed_image1" in updated_examples[0].attachments
-    assert "extra" not in updated_examples[0].attachments
+    assert len(updated_example_1.attachments) == 1
+    assert "renamed_image1" in updated_example_1.attachments
+    assert "extra" not in updated_example_1.attachments
 
     # Check second example
-    assert len(updated_examples[1].attachments) == 1
-    assert "extra" in updated_examples[1].attachments
-    assert "image2" not in updated_examples[1].attachments
+    assert len(updated_example_2.attachments) == 1
+    assert "extra" in updated_example_2.attachments
+    assert "image2" not in updated_example_2.attachments
 
     # Check attachment data
     assert (
-        updated_examples[0].attachments["renamed_image1"][1].read()
+        updated_example_1.attachments["renamed_image1"]["reader"].read()
         == b"fake image data 1"
     )
-    assert updated_examples[1].attachments["extra"][1].read() == b"extra data"
+    assert updated_example_2.attachments["extra"]["reader"].read() == b"extra data"
 
     # Clean up
     langchain_client.delete_dataset(dataset_id=dataset.id)

From 266272d734c3258a11f6628b07a517fa09ecfc29 Mon Sep 17 00:00:00 2001
From: Ankush Gola <ankush.gola@gmail.com>
Date: Tue, 10 Dec 2024 13:44:16 -0800
Subject: [PATCH 84/88] lint and fmt

---
 python/tests/integration_tests/test_client.py | 21 ++++++-------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 983c072ca..803b88454 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -1619,9 +1619,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) ->
             "image2": ("image/png", b"fake image data 2"),
         },
     )
-    langchain_client.upload_examples_multipart(
-        dataset_id=dataset.id, uploads=[example]
-    )
+    langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
 
     # Update example with attachment operations to rename and retain attachments
     attachments_operations = AttachmentsOperations(
@@ -1645,8 +1643,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) ->
     assert "image2" in retrieved_example.attachments
     assert "image1" not in retrieved_example.attachments
     assert (
-        retrieved_example.attachments["image2"]["reader"].read()
-        == b"fake image data 2"
+        retrieved_example.attachments["image2"]["reader"].read() == b"fake image data 2"
     )
     assert (
         retrieved_example.attachments["renamed_image"]["reader"].read()
@@ -1778,8 +1775,6 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     )
     assert created_examples["count"] == 2
 
-    examples = list(langchain_client.list_examples(dataset_id=dataset.id))
-
     # Now create update operations
     update_1 = ExampleUpdateWithAttachments(
         id=example_ids[0],
@@ -1822,12 +1817,10 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     assert "new_file1" in example_1_updated.attachments
     assert "file2" not in example_1_updated.attachments
     assert (
-        example_1_updated.attachments["new_file1"]["reader"].read()
-        == b"new content 1"
+        example_1_updated.attachments["new_file1"]["reader"].read() == b"new content 1"
     )
     assert (
-        example_1_updated.attachments["file1"]["reader"].read()
-        == b"original content 1"
+        example_1_updated.attachments["file1"]["reader"].read() == b"original content 1"
     )
 
     # Verify second example updates
@@ -1840,12 +1833,10 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     assert "new_file2" in example_2_updated.attachments
     assert "file4" not in example_2_updated.attachments
     assert (
-        example_2_updated.attachments["file3"]["reader"].read()
-        == b"original content 3"
+        example_2_updated.attachments["file3"]["reader"].read() == b"original content 3"
     )
     assert (
-        example_2_updated.attachments["new_file2"]["reader"].read()
-        == b"new content 2"
+        example_2_updated.attachments["new_file2"]["reader"].read() == b"new content 2"
     )
 
     # Test updating non-existent example doesn't do anything

From 61b28f5624753e19e41ef480420c7236741db92b Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 10 Dec 2024 13:51:01 -0800
Subject: [PATCH 85/88] fix tests

---
 python/langsmith/evaluation/_arunner.py       | 14 ++++++--
 python/langsmith/evaluation/_runner.py        | 34 +++++++++----------
 python/tests/integration_tests/test_client.py | 26 +++++---------
 3 files changed, 36 insertions(+), 38 deletions(-)

diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
index 311178576..25ea0d62a 100644
--- a/python/langsmith/evaluation/_arunner.py
+++ b/python/langsmith/evaluation/_arunner.py
@@ -648,7 +648,11 @@ async def awith_predictions(
         /,
         max_concurrency: Optional[int] = None,
     ) -> _AsyncExperimentManager:
-        _experiment_results = self._apredict(target, max_concurrency=max_concurrency)
+        _experiment_results = self._apredict(
+            target,
+            max_concurrency=max_concurrency,
+            include_attachments=_include_attachments(target),
+        )
         r1, r2 = aitertools.atee(_experiment_results, 2, lock=asyncio.Lock())
         return _AsyncExperimentManager(
             (pred["example"] async for pred in r1),
@@ -723,7 +727,11 @@ async def aget_summary_scores(self) -> Dict[str, List[dict]]:
     ## Private methods
 
     async def _apredict(
-        self, target: ATARGET_T, /, max_concurrency: Optional[int] = None
+        self,
+        target: ATARGET_T,
+        /,
+        max_concurrency: Optional[int] = None,
+        include_attachments: bool = False,
     ) -> AsyncIterator[_ForwardResults]:
         fn = _ensure_async_traceable(target)
 
@@ -736,7 +744,7 @@ async def predict_all():
                     self.experiment_name,
                     self._metadata,
                     self.client,
-                    include_attachments=self._include_attachments,
+                    include_attachments,
                 )
 
         async for result in aitertools.aiter_with_concurrency(
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index f78523718..2232feeff 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1402,7 +1402,10 @@ def with_predictions(
         """Lazily apply the target function to the experiment."""
         context = copy_context()
         _experiment_results = context.run(
-            self._predict, target, max_concurrency=max_concurrency
+            self._predict,
+            target,
+            max_concurrency=max_concurrency,
+            include_attachments=_include_attachments(target),
         )
         r1, r2 = itertools.tee(_experiment_results, 2)
         return _ExperimentManager(
@@ -1501,6 +1504,7 @@ def _predict(
         target: TARGET_T,
         /,
         max_concurrency: Optional[int] = None,
+        include_attachments: bool = False,
     ) -> Generator[_ForwardResults, None, None]:
         """Run the target function on the examples."""
         fn = _ensure_traceable(target)
@@ -1514,7 +1518,7 @@ def _predict(
                     self._metadata,
                     self.client,
                     self._upload_results,
-                    self._include_attachments,
+                    include_attachments,
                 )
 
         else:
@@ -1528,7 +1532,7 @@ def _predict(
                         self._metadata,
                         self.client,
                         self._upload_results,
-                        self._include_attachments,
+                        include_attachments,
                     )
                     for example in self.examples
                 ]
@@ -1923,20 +1927,16 @@ def _evaluators_include_attachments(
 ) -> bool:
     if evaluators is None:
         return False
-    return any(
-        any(
-            p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
-            and p.name == "attachments"
-            for p in (
-                inspect.signature(
-                    e.__call__ if hasattr(e, "__call__") else e
-                ).parameters.values()
-                if callable(e) or hasattr(e, "__call__")
-                else []
-            )
-        )
-        for e in evaluators
-    )
+
+    def evaluator_has_attachments(evaluator: Union[EVALUATOR_T, AEVALUATOR_T]) -> bool:
+        sig = inspect.signature(evaluator)
+        params = list(sig.parameters.values())
+        positional_params = [
+            p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+        ]
+        return any(p.name == "attachments" for p in positional_params)
+
+    return any(evaluator_has_attachments(e) for e in evaluators)
 
 
 def _include_attachments(
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 41a206f1c..8ec45be52 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -59,6 +59,7 @@ def wait_for(
 def langchain_client() -> Client:
     get_env_var.cache_clear()
     return Client(
+        api_key="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",
         info={
             "instance_flags": {
                 "dataset_examples_multipart_enabled": True,
@@ -1321,7 +1322,6 @@ def test_evaluate_with_attachments_not_in_target(langchain_client: Client) -> No
     )
 
     example = ExampleUploadWithAttachments(
-        dataset_id=dataset.id,
         inputs={"question": "What is shown in the image?"},
         outputs={"answer": "test image"},
         attachments={
@@ -1329,10 +1329,9 @@ def test_evaluate_with_attachments_not_in_target(langchain_client: Client) -> No
         },
     )
 
-    langchain_client.upload_examples_multipart(uploads=[example])
+    langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
 
     def target(inputs: Dict[str, Any]) -> Dict[str, Any]:
-        # Verify we receive the attachment data
         return {"answer": "test image"}
 
     def evaluator(
@@ -1619,9 +1618,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) ->
             "image2": ("image/png", b"fake image data 2"),
         },
     )
-    langchain_client.upload_examples_multipart(
-        dataset_id=dataset.id, uploads=[example]
-    )
+    langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])
 
     # Update example with attachment operations to rename and retain attachments
     attachments_operations = AttachmentsOperations(
@@ -1645,8 +1642,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) ->
     assert "image2" in retrieved_example.attachments
     assert "image1" not in retrieved_example.attachments
     assert (
-        retrieved_example.attachments["image2"]["reader"].read()
-        == b"fake image data 2"
+        retrieved_example.attachments["image2"]["reader"].read() == b"fake image data 2"
     )
     assert (
         retrieved_example.attachments["renamed_image"]["reader"].read()
@@ -1771,8 +1767,6 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     )
     assert created_examples["count"] == 2
 
-    examples = list(langchain_client.list_examples(dataset_id=dataset.id))
-
     # Now create update operations
     update_1 = ExampleUpdateWithAttachments(
         id=example_ids[0],
@@ -1815,12 +1809,10 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     assert "new_file1" in example_1_updated.attachments
     assert "file2" not in example_1_updated.attachments
     assert (
-        example_1_updated.attachments["new_file1"]["reader"].read()
-        == b"new content 1"
+        example_1_updated.attachments["new_file1"]["reader"].read() == b"new content 1"
     )
     assert (
-        example_1_updated.attachments["file1"]["reader"].read()
-        == b"original content 1"
+        example_1_updated.attachments["file1"]["reader"].read() == b"original content 1"
     )
 
     # Verify second example updates
@@ -1833,12 +1825,10 @@ def test_update_examples_multipart(langchain_client: Client) -> None:
     assert "new_file2" in example_2_updated.attachments
     assert "file4" not in example_2_updated.attachments
     assert (
-        example_2_updated.attachments["file3"]["reader"].read()
-        == b"original content 3"
+        example_2_updated.attachments["file3"]["reader"].read() == b"original content 3"
     )
     assert (
-        example_2_updated.attachments["new_file2"]["reader"].read()
-        == b"new content 2"
+        example_2_updated.attachments["new_file2"]["reader"].read() == b"new content 2"
     )
 
     # Test updating non-existent example doesn't do anything

From 34e8bb92c0ccbe9dec3ffb55dfb7a732952a7b9f Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 10 Dec 2024 14:04:24 -0800
Subject: [PATCH 86/88] fmt

---
 python/langsmith/evaluation/_runner.py        | 2 +-
 python/tests/integration_tests/test_client.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 2232feeff..55f8865cb 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1928,7 +1928,7 @@ def _evaluators_include_attachments(
     if evaluators is None:
         return False
 
-    def evaluator_has_attachments(evaluator: Union[EVALUATOR_T, AEVALUATOR_T]) -> bool:
+    def evaluator_has_attachments(evaluator: Any) -> bool:
         sig = inspect.signature(evaluator)
         params = list(sig.parameters.values())
         positional_params = [
diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py
index 8adf47182..f5f7ba878 100644
--- a/python/tests/integration_tests/test_client.py
+++ b/python/tests/integration_tests/test_client.py
@@ -59,7 +59,6 @@ def wait_for(
 def langchain_client() -> Client:
     get_env_var.cache_clear()
     return Client(
-        api_key="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",
         info={
             "instance_flags": {
                 "dataset_examples_multipart_enabled": True,

From e043a7d4ec837445561a08f1eb938e5232863c08 Mon Sep 17 00:00:00 2001
From: isaac hershenson <ihershenson@hmc.edu>
Date: Tue, 10 Dec 2024 14:15:32 -0800
Subject: [PATCH 87/88] fmt

---
 python/langsmith/evaluation/_runner.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index 55f8865cb..aea7a86cf 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1929,12 +1929,17 @@ def _evaluators_include_attachments(
         return False
 
     def evaluator_has_attachments(evaluator: Any) -> bool:
-        sig = inspect.signature(evaluator)
-        params = list(sig.parameters.values())
-        positional_params = [
-            p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
-        ]
-        return any(p.name == "attachments" for p in positional_params)
+        try:
+            sig = inspect.signature(evaluator)
+            params = list(sig.parameters.values())
+            positional_params = [
+                p
+                for p in params
+                if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+            ]
+            return any(p.name == "attachments" for p in positional_params)
+        except Exception:
+            return False
 
     return any(evaluator_has_attachments(e) for e in evaluators)
 

From d77bd0e4855e0476350780889f74db2ff9e20eac Mon Sep 17 00:00:00 2001
From: Ankush Gola <ankush.gola@gmail.com>
Date: Tue, 10 Dec 2024 14:37:45 -0800
Subject: [PATCH 88/88] remove blanket try/except

---
 python/langsmith/evaluation/_runner.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
index aea7a86cf..ea206b098 100644
--- a/python/langsmith/evaluation/_runner.py
+++ b/python/langsmith/evaluation/_runner.py
@@ -1929,17 +1929,14 @@ def _evaluators_include_attachments(
         return False
 
     def evaluator_has_attachments(evaluator: Any) -> bool:
-        try:
-            sig = inspect.signature(evaluator)
-            params = list(sig.parameters.values())
-            positional_params = [
-                p
-                for p in params
-                if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
-            ]
-            return any(p.name == "attachments" for p in positional_params)
-        except Exception:
+        if not callable(evaluator):
             return False
+        sig = inspect.signature(evaluator)
+        params = list(sig.parameters.values())
+        positional_params = [
+            p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+        ]
+        return any(p.name == "attachments" for p in positional_params)
 
     return any(evaluator_has_attachments(e) for e in evaluators)