From e9e2131f0c0fe19e5de062f398e7a920ab7e67ca Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 12 Nov 2024 18:38:53 -0800 Subject: [PATCH 01/88] wip --- python/langsmith/client.py | 89 +++++++++++++++++++++++++++++++++++++ python/langsmith/schemas.py | 3 ++ 2 files changed, 92 insertions(+) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index eb397b4c4..99368aa85 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -82,6 +82,7 @@ _SIZE_LIMIT_BYTES, ) from langsmith._internal._multipart import ( + MultipartPart, MultipartPartsAndContext, join_multipart_parts_and_context, ) @@ -3369,6 +3370,94 @@ def create_example_from_run( created_at=created_at, ) + def upsert_example_multipart( + self, + *, + upserts: List[ls_schemas.ExampleCreateWithAttachments] = None, + ) -> None: + """Upsert examples""" + parts = list[MultipartPart] + + for example in upserts: + + if example.id is not None: + example_id = str(example.id) # is the conversion to string neccessary? + else: + example_id = str(uuid.uuid4()) + + remaining_values = { + "dataset_id": example.dataset_id, + "created_at": example.created_at, + "metadata": example.metadata, + "split": example.split + } + valb = _dumps_json(remaining_values) + + parts.append( + f"{example_id}", + ( + None, + valb, + "application/json", + {"Content-Length": str(len(valb))}, + ), + ), + + inputsb = example.inputs + outputsb = example.outputs + + parts.append( + f"{example_id}.inputs", + ( + None, + inputsb, + "application/json", + {"Content-Length": str(len(inputsb))}, + ), + ), + + parts.append( + f"{example_id}.outputs", + ( + None, + outputsb, + "application/json", + {"Content-Length": str(len(outputsb))}, + ), + ), + + if example.attachments: + for attachment in example.attachments: + parts.append( + f"{example_id}.attachment.{attachment.mime_type}", + ( + None, + attachment.data, + "application/json", # I feel like this is wrong + {"Content-Length": str(len(attachment.data))}, + ), + ), + + encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY) + if encoder.len <= 20_000_000: # ~20 MB + data = encoder.to_string() + else: + data = encoder + + + response = self.request_with_retries( + "POST", + "/v1/examples/multipart", # No clue what this is supposed to be + request_kwargs={ + "data": data, + "headers": { + **self._headers, + "Content-Type": encoder.content_type, + }, + }, + ) + ls_utils.raise_for_status_with_text(response) + def create_examples( self, *, diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 80e112e46..ff8528d29 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -88,6 +88,9 @@ class ExampleCreate(ExampleBase): created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) split: Optional[Union[str, List[str]]] = None +class ExampleCreateWithAttachments(ExampleCreate): + """Example create with attachments.""" + attachments: Optional[List[Attachment]] = None class Example(ExampleBase): """Example model.""" From ff3054182f91f0305e42718e6b8431d96e4adca8 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 13 Nov 2024 08:02:55 -0800 Subject: [PATCH 02/88] unit test --- python/langsmith/client.py | 122 +++++++++++++++---------- python/langsmith/schemas.py | 5 +- python/tests/unit_tests/test_client.py | 87 ++++++++++++++++++ 3 files changed, 167 insertions(+), 47 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 99368aa85..dca29d9fc 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3375,13 +3375,12 @@ def upsert_example_multipart( *, upserts: List[ls_schemas.ExampleCreateWithAttachments] = None, ) -> None: - """Upsert examples""" - parts = list[MultipartPart] + """Upsert examples.""" + parts: list[MultipartPart] = [] for example in upserts: - if example.id is not None: - example_id = str(example.id) # is the conversion to string neccessary? + example_id = str(example.id) # is the conversion to string neccessary? else: example_id = str(uuid.uuid4()) @@ -3389,65 +3388,96 @@ def upsert_example_multipart( "dataset_id": example.dataset_id, "created_at": example.created_at, "metadata": example.metadata, - "split": example.split + "split": example.split, } valb = _dumps_json(remaining_values) - - parts.append( - f"{example_id}", - ( - None, - valb, - "application/json", - {"Content-Length": str(len(valb))}, + + ( + parts.append( + ( + f"{example_id}", + ( + None, + valb, + "application/json", + {"Content-Length": str(len(valb))}, + ), + ) ), - ), + ) - inputsb = example.inputs - outputsb = example.outputs + inputsb = _dumps_json(example.inputs) + outputsb = _dumps_json(example.outputs) - parts.append( - f"{example_id}.inputs", - ( - None, - inputsb, - "application/json", - {"Content-Length": str(len(inputsb))}, - ), - ), - - parts.append( - f"{example_id}.outputs", - ( - None, - outputsb, - "application/json", - {"Content-Length": str(len(outputsb))}, + ( + parts.append( + ( + f"{example_id}.inputs", + ( + None, + inputsb, + "application/json", + {"Content-Length": str(len(inputsb))}, + ), + ) ), - ), - - if example.attachments: - for attachment in example.attachments: - parts.append( - f"{example_id}.attachment.{attachment.mime_type}", + ) + + ( + parts.append( + ( + f"{example_id}.outputs", ( None, - attachment.data, - "application/json", # I feel like this is wrong - {"Content-Length": str(len(attachment.data))}, + outputsb, + "application/json", + {"Content-Length": str(len(outputsb))}, ), - ), - + ) + ), + ) + + if example.attachments: + for name, attachment in example.attachments.items(): + if isinstance(attachment, tuple): + mime_type, data = attachment + ( + parts.append( + ( + f"{example_id}.attachment.{name}", + ( + None, + data, + mime_type, + {"Content-Length": str(len(data))}, + ), + ) + ), + ) + else: + ( + parts.append( + ( + f"{example_id}.attachment.{name}", + ( + None, + attachment.data, + attachment.mime_type, + {"Content-Length": str(len(attachment.data))}, + ), + ) + ), + ) + encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY) if encoder.len <= 20_000_000: # ~20 MB data = encoder.to_string() else: data = encoder - response = self.request_with_retries( "POST", - "/v1/examples/multipart", # No clue what this is supposed to be + "/v1/examples/multipart", # No clue what this is supposed to be request_kwargs={ "data": data, "headers": { diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index ff8528d29..4e8711002 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -88,9 +88,12 @@ class ExampleCreate(ExampleBase): created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) split: Optional[Union[str, List[str]]] = None + class ExampleCreateWithAttachments(ExampleCreate): """Example create with attachments.""" - attachments: Optional[List[Attachment]] = None + + attachments: Optional[Attachments] = None + class Example(ExampleBase): """Example model.""" diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 5dc1bbe1e..edda5dd09 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -416,6 +416,93 @@ def test_create_run_mutate( assert outputs == {"messages": ["hi", "there"]} +@mock.patch("langsmith.client.requests.Session") +def test_upsert_example_multipart(mock_session_cls: mock.Mock) -> None: + """Test that upsert_example_multipart sends correct multipart data.""" + mock_session = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_session.request.return_value = mock_response + mock_session_cls.return_value = mock_session + + client = Client(api_url="http://localhost:1984", api_key="123") + + # Create test data + example_id = uuid.uuid4() + dataset_id = uuid.uuid4() + created_at = datetime(2015, 1, 1, 0, 0, 0) + + example = ls_schemas.ExampleCreateWithAttachments( + id=example_id, + dataset_id=dataset_id, + created_at=created_at, + inputs={"input": "test input"}, + outputs={"output": "test output"}, + metadata={"meta": "data"}, + split="train", + attachments={ + "file1": ("text/plain", b"test data"), + "file2": ls_schemas.Attachment( + mime_type="application/json", data=b'{"key": "value"}' + ), + }, + ) + client.upsert_example_multipart(upserts=[example]) + + # Verify the request + assert mock_session.request.call_count == 2 # we always make a call to /info + call_args = mock_session.request.call_args + + assert call_args[0][0] == "POST" + assert call_args[0][1].endswith("/v1/examples/multipart") + + # Parse the multipart data + request_data = call_args[1]["data"] + content_type = call_args[1]["headers"]["Content-Type"] + boundary = parse_options_header(content_type)[1]["boundary"] + + parser = MultipartParser( + io.BytesIO( + request_data + if isinstance(request_data, bytes) + else request_data.to_string() + ), + boundary, + ) + parts = list(parser.parts()) + + # Verify all expected parts are present + expected_parts = { + str(example_id): { + "dataset_id": str(dataset_id), + "created_at": created_at.isoformat(), + "metadata": {"meta": "data"}, + "split": "train", + }, + f"{example_id}.inputs": {"input": "test input"}, + f"{example_id}.outputs": {"output": "test output"}, + f"{example_id}.attachment.file1": "test data", + f"{example_id}.attachment.file2": '{"key": "value"}', + } + + assert len(parts) == len(expected_parts) + + for part in parts: + name = part.name + assert name in expected_parts, f"Unexpected part: {name}" + + if name.endswith(".attachment.file1"): + assert part.value == expected_parts[name] + assert part.headers["Content-Type"] == "text/plain" + elif name.endswith(".attachment.file2"): + assert part.value == expected_parts[name] + assert part.headers["Content-Type"] == "application/json" + else: + value = json.loads(part.value) + assert value == expected_parts[name] + assert part.headers["Content-Type"] == "application/json" + + class CallTracker: def __init__(self) -> None: self.counter = 0 From 152ec59849ec81622738c443765f6b5da91ce44e Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 13 Nov 2024 11:23:09 -0800 Subject: [PATCH 03/88] integration test skeleton --- python/langsmith/client.py | 11 ++- python/tests/integration_tests/test_client.py | 68 ++++++++++++++++++- python/tests/unit_tests/test_client.py | 6 +- 3 files changed, 79 insertions(+), 6 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index dca29d9fc..7e823573d 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3370,12 +3370,19 @@ def create_example_from_run( created_at=created_at, ) - def upsert_example_multipart( + def upsert_examples_multipart( self, *, - upserts: List[ls_schemas.ExampleCreateWithAttachments] = None, + upserts: List[ls_schemas.ExampleCreateWithAttachments] = [], ) -> None: """Upsert examples.""" + # not sure if the below checks are necessary + if not isinstance(upserts, list): + raise TypeError(f"upserts must be a list, got {type(upserts)}") + for item in upserts: + if not isinstance(item, ls_schemas.ExampleCreateWithAttachments): + raise TypeError(f"Each item must be ExampleCreateWithAttachments, got {type(item)}") + parts: list[MultipartPart] = [] for example in upserts: diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 57a6e2171..bfe0d818a 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -20,7 +20,7 @@ from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor from langsmith.client import ID_TYPE, Client -from langsmith.schemas import DataType +from langsmith.schemas import DataType, ExampleCreateWithAttachments from langsmith.utils import ( LangSmithConnectionError, LangSmithError, @@ -369,6 +369,72 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm") +@pytest.mark.parametrize("uri", ["http://dev.api.smith.langchain.com"]) +def test_upsert_examples_multipart(uri: str) -> None: + """Test upserting examples with attachments via multipart endpoint.""" + dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] + langchain_client = Client(api_url=uri, api_key="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") + if langchain_client.has_dataset(dataset_name=dataset_name): + langchain_client.delete_dataset(dataset_name=dataset_name) + + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for multipart example upload", + data_type=DataType.kv, + ) + + # Test example with all fields + example_id = uuid4() + example_1 = ExampleCreateWithAttachments( + id=example_id, + dataset_id=dataset.id, + inputs={"text": "hello world"}, + outputs={"response": "greeting"}, + attachments={ + "test_file": ("text/plain", b"test content"), + }, + ) + # Test example without id + example_2 = ExampleCreateWithAttachments( + dataset_id=dataset.id, + inputs={"text": "foo bar"}, + outputs={"response": "baz"}, + attachments={ + "my_file": ("text/plain", b"more test content"), + }, + ) + + langchain_client.upsert_examples_multipart([example_1, example_2]) + + created_example = langchain_client.read_example(example_id) + assert created_example.inputs["text"] == "hello world" + assert created_example.outputs["response"] == "greeting" + + all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] + assert len(all_examples_in_dataset) == 2 + + # Test that adding invalid example fails - even if valid examples are added alongside + example_3 = ExampleCreateWithAttachments( + dataset_id=uuid4(), # not a real dataset + inputs={"text": "foo bar"}, + outputs={"response": "baz"}, + attachments={ + "my_file": ("text/plain", b"more test content"), + }, + ) + + # will this throw an error? idk need to test + langchain_client.upsert_examples_multipart([example_2, example_3]) # don't add example_1 because of explicit id + + all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] + assert len(all_examples_in_dataset) == 2 + + # Throw type errors when not passing ExampleCreateWithAttachments + with pytest.raises(TypeError): + langchain_client.upsert_examples_multipart([{"foo":"bar"}]) + + langchain_client.delete_dataset(dataset_name=dataset_name) + def test_create_dataset(langchain_client: Client) -> None: dataset_name = "__test_create_dataset" + uuid4().hex[:4] if langchain_client.has_dataset(dataset_name=dataset_name): diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index edda5dd09..4b68ce368 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -417,8 +417,8 @@ def test_create_run_mutate( @mock.patch("langsmith.client.requests.Session") -def test_upsert_example_multipart(mock_session_cls: mock.Mock) -> None: - """Test that upsert_example_multipart sends correct multipart data.""" +def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None: + """Test that upsert_examples_multipart sends correct multipart data.""" mock_session = MagicMock() mock_response = MagicMock() mock_response.status_code = 200 @@ -447,7 +447,7 @@ def test_upsert_example_multipart(mock_session_cls: mock.Mock) -> None: ), }, ) - client.upsert_example_multipart(upserts=[example]) + client.upsert_examples_multipart(upserts=[example]) # Verify the request assert mock_session.request.call_count == 2 # we always make a call to /info From 27b15462180219cef6878bf80c27d4436b3b21a6 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 13 Nov 2024 12:48:09 -0800 Subject: [PATCH 04/88] integration test passing --- python/langsmith/client.py | 16 +++++++++------- python/tests/integration_tests/test_client.py | 15 ++++++++------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 7e823573d..a6ff8d4c8 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3394,9 +3394,11 @@ def upsert_examples_multipart( remaining_values = { "dataset_id": example.dataset_id, "created_at": example.created_at, - "metadata": example.metadata, - "split": example.split, } + if example.metadata is not None: + remaining_values["metadata"] = example.metadata + if example.split is not None: + remaining_values["split"] = example.split valb = _dumps_json(remaining_values) ( @@ -3455,8 +3457,8 @@ def upsert_examples_multipart( ( None, data, - mime_type, - {"Content-Length": str(len(data))}, + f"{mime_type}; length={len(data)}", + {}, ), ) ), @@ -3469,8 +3471,8 @@ def upsert_examples_multipart( ( None, attachment.data, - attachment.mime_type, - {"Content-Length": str(len(attachment.data))}, + f"{attachment.mime_type}; length={len(attachment.data)}", + {}, ), ) ), @@ -3484,7 +3486,7 @@ def upsert_examples_multipart( response = self.request_with_retries( "POST", - "/v1/examples/multipart", # No clue what this is supposed to be + "/v1/platform/examples/multipart", # No clue what this is supposed to be request_kwargs={ "data": data, "headers": { diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index bfe0d818a..c86ce3dc9 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -22,6 +22,7 @@ from langsmith.client import ID_TYPE, Client from langsmith.schemas import DataType, ExampleCreateWithAttachments from langsmith.utils import ( + LangSmithNotFoundError, LangSmithConnectionError, LangSmithError, get_env_var, @@ -368,12 +369,12 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: with pytest.raises(LangSmithConnectionError): client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm") - -@pytest.mark.parametrize("uri", ["http://dev.api.smith.langchain.com"]) +# NEED TO FIX ONCE CHANGES PUSH TO PROD +@pytest.mark.parametrize("uri", ["https://dev.api.smith.langchain.com"]) def test_upsert_examples_multipart(uri: str) -> None: """Test upserting examples with attachments via multipart endpoint.""" dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] - langchain_client = Client(api_url=uri, api_key="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") + langchain_client = Client(api_url=uri, api_key="NEED TO HARDCODE FOR TESTING") if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) @@ -404,7 +405,7 @@ def test_upsert_examples_multipart(uri: str) -> None: }, ) - langchain_client.upsert_examples_multipart([example_1, example_2]) + langchain_client.upsert_examples_multipart(upserts=[example_1, example_2]) created_example = langchain_client.read_example(example_id) assert created_example.inputs["text"] == "hello world" @@ -423,15 +424,15 @@ def test_upsert_examples_multipart(uri: str) -> None: }, ) - # will this throw an error? idk need to test - langchain_client.upsert_examples_multipart([example_2, example_3]) # don't add example_1 because of explicit id + with pytest.raises(LangSmithNotFoundError): + langchain_client.upsert_examples_multipart(upserts=[example_3]) all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] assert len(all_examples_in_dataset) == 2 # Throw type errors when not passing ExampleCreateWithAttachments with pytest.raises(TypeError): - langchain_client.upsert_examples_multipart([{"foo":"bar"}]) + langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}]) langchain_client.delete_dataset(dataset_name=dataset_name) From 53a0f1494548b297c5d19d13e93cbd409bdeaa80 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 13 Nov 2024 16:33:15 -0800 Subject: [PATCH 05/88] wip --- python/bench/upload_examples_bench.py | 124 ++++++++++++++++++ python/langsmith/client.py | 29 ++-- python/tests/integration_tests/test_client.py | 4 +- 3 files changed, 138 insertions(+), 19 deletions(-) create mode 100644 python/bench/upload_examples_bench.py diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py new file mode 100644 index 000000000..41e2faa4e --- /dev/null +++ b/python/bench/upload_examples_bench.py @@ -0,0 +1,124 @@ +import statistics +import time +from typing import Dict +from uuid import uuid4 +from langsmith.schemas import DataType, ExampleCreateWithAttachments +import sys +sys.path.append('./../langsmith') +from client import Client + +def create_large_json(length: int) -> Dict: + """Create a large JSON object for benchmarking purposes.""" + large_array = [ + { + "index": i, + "data": f"This is element number {i}", + "nested": {"id": i, "value": f"Nested value for element {i}"}, + } + for i in range(length) + ] + + return { + "name": "Huge JSON" + str(uuid4()), + "description": "This is a very large JSON object for benchmarking purposes.", + "array": large_array, + "metadata": { + "created_at": "2024-10-22T19:00:00Z", + "author": "Python Program", + "version": 1.0, + }, + } + + +def create_example_data(dataset_id: str, json_size: int) -> Dict: + """Create a single example data object.""" + return ExampleCreateWithAttachments(**{ + "dataset_id": dataset_id, + "inputs": create_large_json(json_size), + "outputs": create_large_json(json_size), + }) + +DATASET_NAME = "TEST DATASET" +def benchmark_example_uploading(num_examples: int, json_size: int, samples: int = 1) -> Dict: + """ + Benchmark run creation with specified parameters. + Returns timing statistics. + """ + multipart_timings, old_timings = [], [] + + + for _ in range(samples): + client = Client(api_url="https://dev.api.smith.langchain.com", api_key="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") + + if client.has_dataset(dataset_name=DATASET_NAME): + client.delete_dataset(dataset_name=DATASET_NAME) + + dataset = client.create_dataset( + DATASET_NAME, + description="Test dataset for multipart example upload", + data_type=DataType.kv, + ) + examples = [create_example_data(dataset.id, json_size) for i in range(num_examples)] + + # Old method + old_start = time.perf_counter() + inputs=[e.inputs for e in examples] + outputs=[e.outputs for e in examples] + # the create_examples endpoint fails above 20mb + try: + client.create_examples(inputs=inputs, + outputs=outputs,dataset_id=dataset.id) + old_elapsed = time.perf_counter() - old_start + except: + old_elapsed = 1000000 + + # New method + multipart_start = time.perf_counter() + client.upsert_examples_multipart(upserts=examples) + multipart_elapsed = time.perf_counter() - multipart_start + + multipart_timings.append(multipart_elapsed) + old_timings.append(old_elapsed) + + return { + "old": { + "mean": statistics.mean(old_timings), + "median": statistics.median(old_timings), + "stdev": statistics.stdev(old_timings) if len(old_timings) > 1 else 0, + "min": min(old_timings), + "max": max(old_timings), + }, + "new": { + "mean": statistics.mean(multipart_timings), + "median": statistics.median(multipart_timings), + "stdev": statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0, + "min": min(multipart_timings), + "max": max(multipart_timings), + } + } + +json_size = 1000 +num_examples = 1000 + +def main(json_size: int, num_examples: int): + """ + Run benchmarks with different combinations of parameters and report results. + """ + results = benchmark_example_uploading(num_examples=num_examples, json_size=json_size) + + print(f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:") + print("-" * 60) + print(f"{'Metric':<15} {'Old Method':>20} {'New Method':>20}") + print("-" * 60) + + metrics = ['mean', 'median', 'stdev', 'min', 'max'] + for metric in metrics: + print(f"{metric:<15} {results['old'][metric]:>20.4f} {results['new'][metric]:>20.4f}") + + print("-" * 60) + print(f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} {num_examples / results['new']['mean']:>20.2f}") + print("(examples/second)") + + +if __name__ == "__main__": + main(json_size, num_examples) \ No newline at end of file diff --git a/python/langsmith/client.py b/python/langsmith/client.py index a6ff8d4c8..7d31a8652 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3373,33 +3373,28 @@ def create_example_from_run( def upsert_examples_multipart( self, *, - upserts: List[ls_schemas.ExampleCreateWithAttachments] = [], + upserts: List[ls_schemas.ExampleCreateWithAttachments] = None, ) -> None: """Upsert examples.""" - # not sure if the below checks are necessary - if not isinstance(upserts, list): - raise TypeError(f"upserts must be a list, got {type(upserts)}") - for item in upserts: - if not isinstance(item, ls_schemas.ExampleCreateWithAttachments): - raise TypeError(f"Each item must be ExampleCreateWithAttachments, got {type(item)}") - + if upserts is None: + upserts = [] parts: list[MultipartPart] = [] for example in upserts: if example.id is not None: - example_id = str(example.id) # is the conversion to string neccessary? + example_id = str(example.id) else: example_id = str(uuid.uuid4()) - remaining_values = { + example_body = { "dataset_id": example.dataset_id, "created_at": example.created_at, } if example.metadata is not None: - remaining_values["metadata"] = example.metadata + example_body["metadata"] = example.metadata if example.split is not None: - remaining_values["split"] = example.split - valb = _dumps_json(remaining_values) + example_body["split"] = example.split + valb = _dumps_json(example_body) ( parts.append( @@ -3409,7 +3404,7 @@ def upsert_examples_multipart( None, valb, "application/json", - {"Content-Length": str(len(valb))}, + {}, ), ) ), @@ -3426,7 +3421,7 @@ def upsert_examples_multipart( None, inputsb, "application/json", - {"Content-Length": str(len(inputsb))}, + {}, ), ) ), @@ -3440,7 +3435,7 @@ def upsert_examples_multipart( None, outputsb, "application/json", - {"Content-Length": str(len(outputsb))}, + {}, ), ) ), @@ -3486,7 +3481,7 @@ def upsert_examples_multipart( response = self.request_with_retries( "POST", - "/v1/platform/examples/multipart", # No clue what this is supposed to be + "/v1/platform/examples/multipart", request_kwargs={ "data": data, "headers": { diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index c86ce3dc9..56112975a 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -374,7 +374,7 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: def test_upsert_examples_multipart(uri: str) -> None: """Test upserting examples with attachments via multipart endpoint.""" dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] - langchain_client = Client(api_url=uri, api_key="NEED TO HARDCODE FOR TESTING") + langchain_client = Client(api_url=uri, api_key="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") if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) @@ -431,7 +431,7 @@ def test_upsert_examples_multipart(uri: str) -> None: assert len(all_examples_in_dataset) == 2 # Throw type errors when not passing ExampleCreateWithAttachments - with pytest.raises(TypeError): + with pytest.raises(AttributeError): langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}]) langchain_client.delete_dataset(dataset_name=dataset_name) From 025aa6d65fa8b469d2969d63ca3750b29826eef6 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 13 Nov 2024 17:11:30 -0800 Subject: [PATCH 06/88] wip --- python/bench/upload_examples_bench.py | 2 +- python/langsmith/client.py | 8 +++++++- python/langsmith/schemas.py | 2 ++ python/tests/integration_tests/test_client.py | 19 ++++++++++++------- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py index 41e2faa4e..dc7efeae4 100644 --- a/python/bench/upload_examples_bench.py +++ b/python/bench/upload_examples_bench.py @@ -48,7 +48,7 @@ def benchmark_example_uploading(num_examples: int, json_size: int, samples: int for _ in range(samples): - client = Client(api_url="https://dev.api.smith.langchain.com", api_key="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") + client = Client(api_url="https://dev.api.smith.langchain.com") if client.has_dataset(dataset_name=DATASET_NAME): client.delete_dataset(dataset_name=DATASET_NAME) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 7d31a8652..94359446d 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3374,8 +3374,13 @@ def upsert_examples_multipart( self, *, upserts: List[ls_schemas.ExampleCreateWithAttachments] = None, - ) -> None: + ) -> dict: # Should we create an object for the return type - like UpsertExamplesResponse? """Upsert examples.""" + if not (self.info.instance_flags or {}).get( + "examples_multipart_enabled", False + ): + raise ValueError("Your LangChain version does not allow using the multipart examples endpoint, please update to the latest version.") + if upserts is None: upserts = [] parts: list[MultipartPart] = [] @@ -3491,6 +3496,7 @@ def upsert_examples_multipart( }, ) ls_utils.raise_for_status_with_text(response) + return response.json() def create_examples( self, diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 4e8711002..d309cb5fd 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -701,6 +701,8 @@ class LangSmithInfo(BaseModel): license_expiration_time: Optional[datetime] = None """The time the license will expire.""" batch_ingest_config: Optional[BatchIngestConfig] = None + """The instance flags.""" + instance_flags: dict[str, Any] = None Example.update_forward_refs() diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 56112975a..e202d6b39 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -370,11 +370,10 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm") # NEED TO FIX ONCE CHANGES PUSH TO PROD -@pytest.mark.parametrize("uri", ["https://dev.api.smith.langchain.com"]) -def test_upsert_examples_multipart(uri: str) -> None: +def test_upsert_examples_multipart() -> None: """Test upserting examples with attachments via multipart endpoint.""" dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] - langchain_client = Client(api_url=uri, api_key="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") + langchain_client = Client(api_url="https://dev.api.smith.langchain.com") if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) @@ -405,12 +404,18 @@ def test_upsert_examples_multipart(uri: str) -> None: }, ) - langchain_client.upsert_examples_multipart(upserts=[example_1, example_2]) + created_examples = langchain_client.upsert_examples_multipart(upserts=[example_1, example_2]) + assert created_examples['count'] == 2 - created_example = langchain_client.read_example(example_id) - assert created_example.inputs["text"] == "hello world" - assert created_example.outputs["response"] == "greeting" + created_example_1 = langchain_client.read_example(created_examples['example_ids'][0]) + assert created_example_1.inputs["text"] == "hello world" + assert created_example_1.outputs["response"] == "greeting" + created_example_2 = langchain_client.read_example(created_examples['example_ids'][1]) + assert created_example_2.inputs["text"] == "foo bar" + assert created_example_2.outputs["response"] == "baz" + + # make sure examples were sent to the correct dataset all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] assert len(all_examples_in_dataset) == 2 From 4208b6e491a415d05514f2b22d85977746f04e8f Mon Sep 17 00:00:00 2001 From: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Date: Thu, 14 Nov 2024 07:50:14 -0800 Subject: [PATCH 07/88] Update python/langsmith/client.py Co-authored-by: Ankush Gola <9536492+agola11@users.noreply.github.com> --- python/langsmith/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 94359446d..aa5f009bb 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3379,7 +3379,7 @@ def upsert_examples_multipart( if not (self.info.instance_flags or {}).get( "examples_multipart_enabled", False ): - raise ValueError("Your LangChain version does not allow using the multipart examples endpoint, please update to the latest version.") + raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.") if upserts is None: upserts = [] From fd16baa5e721dac218f0e9b282b9f8024f8f4281 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Thu, 14 Nov 2024 09:06:47 -0800 Subject: [PATCH 08/88] more edits --- python/bench/upload_examples_bench.py | 18 +++++------- python/langsmith/client.py | 8 +++--- python/langsmith/schemas.py | 20 ++++++++----- python/tests/integration_tests/test_client.py | 28 +++++++++++++++---- 4 files changed, 46 insertions(+), 28 deletions(-) diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py index dc7efeae4..0dd979313 100644 --- a/python/bench/upload_examples_bench.py +++ b/python/bench/upload_examples_bench.py @@ -4,8 +4,7 @@ from uuid import uuid4 from langsmith.schemas import DataType, ExampleCreateWithAttachments import sys -sys.path.append('./../langsmith') -from client import Client +from langsmith import Client def create_large_json(length: int) -> Dict: """Create a large JSON object for benchmarking purposes.""" @@ -38,7 +37,7 @@ def create_example_data(dataset_id: str, json_size: int) -> Dict: "outputs": create_large_json(json_size), }) -DATASET_NAME = "TEST DATASET" +DATASET_NAME = "upsert_llm_evaluator_benchmark_dataset" def benchmark_example_uploading(num_examples: int, json_size: int, samples: int = 1) -> Dict: """ Benchmark run creation with specified parameters. @@ -64,13 +63,10 @@ def benchmark_example_uploading(num_examples: int, json_size: int, samples: int old_start = time.perf_counter() inputs=[e.inputs for e in examples] outputs=[e.outputs for e in examples] - # the create_examples endpoint fails above 20mb - try: - client.create_examples(inputs=inputs, - outputs=outputs,dataset_id=dataset.id) - old_elapsed = time.perf_counter() - old_start - except: - old_elapsed = 1000000 + # the create_examples endpoint fails above 20mb - so this will crash with json_size > ~100 + client.create_examples(inputs=inputs, + outputs=outputs,dataset_id=dataset.id) + old_elapsed = time.perf_counter() - old_start # New method multipart_start = time.perf_counter() @@ -121,4 +117,4 @@ def main(json_size: int, num_examples: int): if __name__ == "__main__": - main(json_size, num_examples) \ No newline at end of file + main(json_size, num_examples) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index aa5f009bb..0f9455ff1 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3373,14 +3373,14 @@ def create_example_from_run( def upsert_examples_multipart( self, *, - upserts: List[ls_schemas.ExampleCreateWithAttachments] = None, - ) -> dict: # Should we create an object for the return type - like UpsertExamplesResponse? + upserts: List[ls_schemas.ExampleUpsertWithAttachments] = None, + ) -> ls_schemas.UpsertExamplesResponse: """Upsert examples.""" - if not (self.info.instance_flags or {}).get( + """ if not (self.info.instance_flags or {}).get( "examples_multipart_enabled", False ): raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.") - + """ if upserts is None: upserts = [] parts: list[MultipartPart] = [] diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index d309cb5fd..7b94c2017 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -89,7 +89,7 @@ class ExampleCreate(ExampleBase): split: Optional[Union[str, List[str]]] = None -class ExampleCreateWithAttachments(ExampleCreate): +class ExampleUpsertWithAttachments(ExampleCreate): """Example create with attachments.""" attachments: Optional[Attachments] = None @@ -131,12 +131,6 @@ def url(self) -> Optional[str]: return None -class ExampleSearch(ExampleBase): - """Example returned via search.""" - - id: UUID - - class ExampleUpdate(BaseModel): """Update class for Example.""" @@ -151,6 +145,10 @@ class Config: frozen = True +class ExampleUpdateWithAttachments(ExampleUpdate): + """Example update with attachments.""" + id: UUID + attachments: Optional[Attachments] = None class DataType(str, Enum): """Enum for dataset data types.""" @@ -988,3 +986,11 @@ class UsageMetadata(TypedDict): Does *not* need to sum to full output token count. Does *not* need to have all keys. """ + +class UpsertExamplesResponse(TypedDict): + """Response object returned from the upsert_examples_multipart method.""" + + count: int + """The number of examples that were upserted.""" + example_ids: List[str] + """The ids of the examples that were upserted.""" \ No newline at end of file diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index e202d6b39..bf99b9d62 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -20,7 +20,7 @@ from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor from langsmith.client import ID_TYPE, Client -from langsmith.schemas import DataType, ExampleCreateWithAttachments +from langsmith.schemas import DataType, ExampleUpsertWithAttachments from langsmith.utils import ( LangSmithNotFoundError, LangSmithConnectionError, @@ -373,7 +373,7 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: def test_upsert_examples_multipart() -> None: """Test upserting examples with attachments via multipart endpoint.""" dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] - langchain_client = Client(api_url="https://dev.api.smith.langchain.com") + langchain_client = Client(api_url="https://dev.api.smith.langchain.com", api_key="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") if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) @@ -385,7 +385,7 @@ def test_upsert_examples_multipart() -> None: # Test example with all fields example_id = uuid4() - example_1 = ExampleCreateWithAttachments( + example_1 = ExampleUpsertWithAttachments( id=example_id, dataset_id=dataset.id, inputs={"text": "hello world"}, @@ -395,7 +395,7 @@ def test_upsert_examples_multipart() -> None: }, ) # Test example without id - example_2 = ExampleCreateWithAttachments( + example_2 = ExampleUpsertWithAttachments( dataset_id=dataset.id, inputs={"text": "foo bar"}, outputs={"response": "baz"}, @@ -419,8 +419,24 @@ def test_upsert_examples_multipart() -> None: all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] assert len(all_examples_in_dataset) == 2 + example_1_update = ExampleUpsertWithAttachments( + id=example_id, + dataset_id=dataset.id, + inputs={"text": "bar baz"}, + outputs={"response": "foo"}, + attachments={ + "my_file": ("text/plain", b"more test content"), + }, + ) + updated_examples = langchain_client.upsert_examples_multipart(upserts=[example_1_update]) + assert updated_examples['count'] == 1 + assert updated_examples['example_ids'][0] == str(example_id) + updated_example = langchain_client.read_example(updated_examples['example_ids'][0]) + assert updated_example.inputs['text'] == "bar baz" + assert updated_example.outputs['response'] == "foo" + # Test that adding invalid example fails - even if valid examples are added alongside - example_3 = ExampleCreateWithAttachments( + example_3 = ExampleUpsertWithAttachments( dataset_id=uuid4(), # not a real dataset inputs={"text": "foo bar"}, outputs={"response": "baz"}, @@ -435,7 +451,7 @@ def test_upsert_examples_multipart() -> None: all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] assert len(all_examples_in_dataset) == 2 - # Throw type errors when not passing ExampleCreateWithAttachments + # Throw type errors when not passing ExampleUpsertWithAttachments with pytest.raises(AttributeError): langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}]) From 28a46771ecce2b4d5da950436fa5ce5b9a3bec25 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Thu, 14 Nov 2024 12:04:02 -0800 Subject: [PATCH 09/88] nit --- python/langsmith/client.py | 29 ++++++++++--------- python/tests/integration_tests/test_client.py | 4 +-- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 0f9455ff1..9d6283c77 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3416,7 +3416,6 @@ def upsert_examples_multipart( ) inputsb = _dumps_json(example.inputs) - outputsb = _dumps_json(example.outputs) ( parts.append( @@ -3431,20 +3430,22 @@ def upsert_examples_multipart( ) ), ) - - ( - parts.append( - ( - f"{example_id}.outputs", + + if example.outputs: + outputsb = _dumps_json(example.outputs) + ( + parts.append( ( - None, - outputsb, - "application/json", - {}, - ), - ) - ), - ) + f"{example_id}.outputs", + ( + None, + outputsb, + "application/json", + {}, + ), + ) + ), + ) if example.attachments: for name, attachment in example.attachments.items(): diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index bf99b9d62..068f75e73 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -389,7 +389,7 @@ def test_upsert_examples_multipart() -> None: id=example_id, dataset_id=dataset.id, inputs={"text": "hello world"}, - outputs={"response": "greeting"}, + # test without outputs attachments={ "test_file": ("text/plain", b"test content"), }, @@ -409,7 +409,7 @@ def test_upsert_examples_multipart() -> None: created_example_1 = langchain_client.read_example(created_examples['example_ids'][0]) assert created_example_1.inputs["text"] == "hello world" - assert created_example_1.outputs["response"] == "greeting" + assert created_example_1.outputs == None created_example_2 = langchain_client.read_example(created_examples['example_ids'][1]) assert created_example_2.inputs["text"] == "foo bar" From 816302d1ec888d8f42d34476e10982fe07422220 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Thu, 14 Nov 2024 13:14:53 -0800 Subject: [PATCH 10/88] nit --- python/tests/integration_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 068f75e73..170220678 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -373,7 +373,7 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: def test_upsert_examples_multipart() -> None: """Test upserting examples with attachments via multipart endpoint.""" dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] - langchain_client = Client(api_url="https://dev.api.smith.langchain.com", api_key="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") + langchain_client = Client(api_url="https://dev.api.smith.langchain.com", api_key="HARDCODE FOR TESTING") if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) From aa947a641e3859559624edbebb365cda90f221de Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 14:29:33 -0800 Subject: [PATCH 11/88] remove dev endpoint in test --- python/tests/integration_tests/test_client.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 170220678..b752800b1 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -369,11 +369,10 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: with pytest.raises(LangSmithConnectionError): client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm") -# NEED TO FIX ONCE CHANGES PUSH TO PROD -def test_upsert_examples_multipart() -> None: + +def test_upsert_examples_multipart(langchain_client: Client) -> None: """Test upserting examples with attachments via multipart endpoint.""" - dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] - langchain_client = Client(api_url="https://dev.api.smith.langchain.com", api_key="HARDCODE FOR TESTING") + dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]" if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) From a82063b159929c2f708ecedacb4aad155f99e035 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 14:33:06 -0800 Subject: [PATCH 12/88] typo --- python/tests/integration_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index b752800b1..74c9d5168 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -372,7 +372,7 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: def test_upsert_examples_multipart(langchain_client: Client) -> None: """Test upserting examples with attachments via multipart endpoint.""" - dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4]" + dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) From ad19dafd46d4a72c323ce55dcdd6c68af060daf6 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:07:15 -0800 Subject: [PATCH 13/88] fmt --- python/bench/upload_examples_bench.py | 73 ++++++++++++------- python/langsmith/client.py | 2 +- python/langsmith/schemas.py | 6 +- python/tests/integration_tests/test_client.py | 52 ++++++++----- 4 files changed, 87 insertions(+), 46 deletions(-) diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py index 0dd979313..f6669b293 100644 --- a/python/bench/upload_examples_bench.py +++ b/python/bench/upload_examples_bench.py @@ -2,9 +2,10 @@ import time from typing import Dict from uuid import uuid4 -from langsmith.schemas import DataType, ExampleCreateWithAttachments -import sys + from langsmith import Client +from langsmith.schemas import DataType, ExampleCreateWithAttachments + def create_large_json(length: int) -> Dict: """Create a large JSON object for benchmarking purposes.""" @@ -31,21 +32,27 @@ def create_large_json(length: int) -> Dict: def create_example_data(dataset_id: str, json_size: int) -> Dict: """Create a single example data object.""" - return ExampleCreateWithAttachments(**{ - "dataset_id": dataset_id, - "inputs": create_large_json(json_size), - "outputs": create_large_json(json_size), - }) + return ExampleCreateWithAttachments( + **{ + "dataset_id": dataset_id, + "inputs": create_large_json(json_size), + "outputs": create_large_json(json_size), + } + ) + DATASET_NAME = "upsert_llm_evaluator_benchmark_dataset" -def benchmark_example_uploading(num_examples: int, json_size: int, samples: int = 1) -> Dict: + + +def benchmark_example_uploading( + num_examples: int, json_size: int, samples: int = 1 +) -> Dict: """ Benchmark run creation with specified parameters. Returns timing statistics. """ multipart_timings, old_timings = [], [] - for _ in range(samples): client = Client(api_url="https://dev.api.smith.langchain.com") @@ -57,15 +64,17 @@ def benchmark_example_uploading(num_examples: int, json_size: int, samples: int description="Test dataset for multipart example upload", data_type=DataType.kv, ) - examples = [create_example_data(dataset.id, json_size) for i in range(num_examples)] + examples = [ + create_example_data(dataset.id, json_size) for i in range(num_examples) + ] # Old method old_start = time.perf_counter() - inputs=[e.inputs for e in examples] - outputs=[e.outputs for e in examples] - # the create_examples endpoint fails above 20mb - so this will crash with json_size > ~100 - client.create_examples(inputs=inputs, - outputs=outputs,dataset_id=dataset.id) + inputs = [e.inputs for e in examples] + outputs = [e.outputs for e in examples] + # the create_examples endpoint fails above 20mb + # so this will crash with json_size > ~100 + client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id) old_elapsed = time.perf_counter() - old_start # New method @@ -87,32 +96,46 @@ def benchmark_example_uploading(num_examples: int, json_size: int, samples: int "new": { "mean": statistics.mean(multipart_timings), "median": statistics.median(multipart_timings), - "stdev": statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0, + "stdev": statistics.stdev(multipart_timings) + if len(multipart_timings) > 1 + else 0, "min": min(multipart_timings), "max": max(multipart_timings), - } + }, } + json_size = 1000 num_examples = 1000 + def main(json_size: int, num_examples: int): """ Run benchmarks with different combinations of parameters and report results. """ - results = benchmark_example_uploading(num_examples=num_examples, json_size=json_size) - - print(f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:") + results = benchmark_example_uploading( + num_examples=num_examples, json_size=json_size + ) + + print( + f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:" + ) print("-" * 60) print(f"{'Metric':<15} {'Old Method':>20} {'New Method':>20}") print("-" * 60) - - metrics = ['mean', 'median', 'stdev', 'min', 'max'] + + metrics = ["mean", "median", "stdev", "min", "max"] for metric in metrics: - print(f"{metric:<15} {results['old'][metric]:>20.4f} {results['new'][metric]:>20.4f}") - + print( + f"{metric:<15} {results['old'][metric]:>20.4f} " + f"{results['new'][metric]:>20.4f}" + ) + print("-" * 60) - print(f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} {num_examples / results['new']['mean']:>20.2f}") + print( + f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} " + f"{num_examples / results['new']['mean']:>20.2f}" + ) print("(examples/second)") diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 9d6283c77..ff77b81d8 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3430,7 +3430,7 @@ def upsert_examples_multipart( ) ), ) - + if example.outputs: outputsb = _dumps_json(example.outputs) ( diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 9409314c0..b5a74d0fb 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -149,11 +149,14 @@ class Config: frozen = True + class ExampleUpdateWithAttachments(ExampleUpdate): """Example update with attachments.""" + id: UUID attachments: Optional[Attachments] = None + class DataType(str, Enum): """Enum for dataset data types.""" @@ -991,10 +994,11 @@ class UsageMetadata(TypedDict): Does *not* need to sum to full output token count. Does *not* need to have all keys. """ + class UpsertExamplesResponse(TypedDict): """Response object returned from the upsert_examples_multipart method.""" count: int """The number of examples that were upserted.""" example_ids: List[str] - """The ids of the examples that were upserted.""" \ No newline at end of file + """The ids of the examples that were upserted.""" diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 74c9d5168..d1d16ce0f 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -22,9 +22,9 @@ from langsmith.client import ID_TYPE, Client from langsmith.schemas import DataType, ExampleUpsertWithAttachments from langsmith.utils import ( - LangSmithNotFoundError, LangSmithConnectionError, LangSmithError, + LangSmithNotFoundError, get_env_var, ) @@ -403,19 +403,27 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: }, ) - created_examples = langchain_client.upsert_examples_multipart(upserts=[example_1, example_2]) - assert created_examples['count'] == 2 - - created_example_1 = langchain_client.read_example(created_examples['example_ids'][0]) + created_examples = langchain_client.upsert_examples_multipart( + upserts=[example_1, example_2] + ) + assert created_examples["count"] == 2 + + created_example_1 = langchain_client.read_example( + created_examples["example_ids"][0] + ) assert created_example_1.inputs["text"] == "hello world" - assert created_example_1.outputs == None + assert created_example_1.outputs is None - created_example_2 = langchain_client.read_example(created_examples['example_ids'][1]) + created_example_2 = langchain_client.read_example( + created_examples["example_ids"][1] + ) assert created_example_2.inputs["text"] == "foo bar" assert created_example_2.outputs["response"] == "baz" # make sure examples were sent to the correct dataset - all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] + all_examples_in_dataset = [ + example for example in langchain_client.list_examples(dataset_id=dataset.id) + ] assert len(all_examples_in_dataset) == 2 example_1_update = ExampleUpsertWithAttachments( @@ -427,16 +435,19 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: "my_file": ("text/plain", b"more test content"), }, ) - updated_examples = langchain_client.upsert_examples_multipart(upserts=[example_1_update]) - assert updated_examples['count'] == 1 - assert updated_examples['example_ids'][0] == str(example_id) - updated_example = langchain_client.read_example(updated_examples['example_ids'][0]) - assert updated_example.inputs['text'] == "bar baz" - assert updated_example.outputs['response'] == "foo" - - # Test that adding invalid example fails - even if valid examples are added alongside + updated_examples = langchain_client.upsert_examples_multipart( + upserts=[example_1_update] + ) + assert updated_examples["count"] == 1 + assert updated_examples["example_ids"][0] == str(example_id) + updated_example = langchain_client.read_example(updated_examples["example_ids"][0]) + assert updated_example.inputs["text"] == "bar baz" + assert updated_example.outputs["response"] == "foo" + + # Test that adding invalid example fails + # even if valid examples are added alongside example_3 = ExampleUpsertWithAttachments( - dataset_id=uuid4(), # not a real dataset + dataset_id=uuid4(), # not a real dataset inputs={"text": "foo bar"}, outputs={"response": "baz"}, attachments={ @@ -447,15 +458,18 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: with pytest.raises(LangSmithNotFoundError): langchain_client.upsert_examples_multipart(upserts=[example_3]) - all_examples_in_dataset = [example for example in langchain_client.list_examples(dataset_id=dataset.id)] + all_examples_in_dataset = [ + example for example in langchain_client.list_examples(dataset_id=dataset.id) + ] assert len(all_examples_in_dataset) == 2 # Throw type errors when not passing ExampleUpsertWithAttachments with pytest.raises(AttributeError): - langchain_client.upsert_examples_multipart(upserts=[{"foo":"bar"}]) + langchain_client.upsert_examples_multipart(upserts=[{"foo": "bar"}]) langchain_client.delete_dataset(dataset_name=dataset_name) + def test_create_dataset(langchain_client: Client) -> None: dataset_name = "__test_create_dataset" + uuid4().hex[:4] if langchain_client.has_dataset(dataset_name=dataset_name): From 390ac66d62237d58759cf2aa7b65898b90187893 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:07:49 -0800 Subject: [PATCH 14/88] yml changes --- .github/actions/python-integration-tests/action.yml | 2 +- .github/workflows/integration_tests.yml | 2 +- .github/workflows/release.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/python-integration-tests/action.yml b/.github/actions/python-integration-tests/action.yml index d62a15f0a..74553585d 100644 --- a/.github/actions/python-integration-tests/action.yml +++ b/.github/actions/python-integration-tests/action.yml @@ -23,7 +23,7 @@ runs: uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ inputs.python-version }} cache: "pip" diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index d9f6ddc27..1a2928d1a 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -33,7 +33,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python 3.11 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: 3.11 cache: "pip" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0f933626b..7e10b9d67 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -25,7 +25,7 @@ jobs: - name: Install poetry run: pipx install poetry==$POETRY_VERSION - name: Set up Python 3.11 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" cache: "poetry" From 523e5d11fec9fe2db88cec88d3c50a1f3996c957 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:12:46 -0800 Subject: [PATCH 15/88] fmt --- python/langsmith/client.py | 102 ++++++++++++++++-------------------- python/langsmith/schemas.py | 2 +- 2 files changed, 47 insertions(+), 57 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index ff77b81d8..e71c85be8 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3373,7 +3373,7 @@ def create_example_from_run( def upsert_examples_multipart( self, *, - upserts: List[ls_schemas.ExampleUpsertWithAttachments] = None, + upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [], ) -> ls_schemas.UpsertExamplesResponse: """Upsert examples.""" """ if not (self.info.instance_flags or {}).get( @@ -3381,8 +3381,6 @@ def upsert_examples_multipart( ): raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.") """ - if upserts is None: - upserts = [] parts: list[MultipartPart] = [] for example in upserts: @@ -3401,82 +3399,74 @@ def upsert_examples_multipart( example_body["split"] = example.split valb = _dumps_json(example_body) - ( - parts.append( + parts.append( + ( + f"{example_id}", ( - f"{example_id}", - ( - None, - valb, - "application/json", - {}, - ), - ) - ), + None, + valb, + "application/json", + {}, + ), + ) ) inputsb = _dumps_json(example.inputs) - ( + + parts.append( + ( + f"{example_id}.inputs", + ( + None, + inputsb, + "application/json", + {}, + ), + ) + ) + + + if example.outputs: + outputsb = _dumps_json(example.outputs) parts.append( ( - f"{example_id}.inputs", + f"{example_id}.outputs", ( None, - inputsb, + outputsb, "application/json", {}, ), ) - ), - ) - - if example.outputs: - outputsb = _dumps_json(example.outputs) - ( - parts.append( - ( - f"{example_id}.outputs", - ( - None, - outputsb, - "application/json", - {}, - ), - ) - ), ) if example.attachments: for name, attachment in example.attachments.items(): if isinstance(attachment, tuple): mime_type, data = attachment - ( - parts.append( + parts.append( + ( + f"{example_id}.attachment.{name}", ( - f"{example_id}.attachment.{name}", - ( - None, - data, - f"{mime_type}; length={len(data)}", - {}, - ), - ) - ), + None, + data, + f"{mime_type}; length={len(data)}", + {}, + ), + ) ) else: - ( - parts.append( + parts.append( + ( + f"{example_id}.attachment.{name}", ( - f"{example_id}.attachment.{name}", - ( - None, - attachment.data, - f"{attachment.mime_type}; length={len(attachment.data)}", - {}, - ), - ) - ), + None, + attachment.data, + f"{attachment.mime_type}; length={len(attachment.data)}", + {}, + ), + ) ) encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY) diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index b5a74d0fb..9949f9998 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -707,7 +707,7 @@ class LangSmithInfo(BaseModel): """The time the license will expire.""" batch_ingest_config: Optional[BatchIngestConfig] = None """The instance flags.""" - instance_flags: dict[str, Any] = None + instance_flags: Optional[dict[str, Any]] = None Example.update_forward_refs() From ed3aa1cda85518a503c5999b9afe8ded46ef78a7 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:15:35 -0800 Subject: [PATCH 16/88] example search restoration --- python/langsmith/schemas.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 9949f9998..6f84ccea6 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -135,6 +135,12 @@ def __repr__(self): return f"{self.__class__}(id={self.id}, dataset_id={self.dataset_id}, link='{self.url}')" +class ExampleSearch(ExampleBase): + """Example returned via search.""" + + id: UUID + + class ExampleUpdate(BaseModel): """Update class for Example.""" From ce73afcfb6b04e5a887a3fdcc8691fde466bebb8 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:18:20 -0800 Subject: [PATCH 17/88] fmt --- python/bench/upload_examples_bench.py | 6 +++--- python/langsmith/client.py | 2 -- python/langsmith/schemas.py | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py index f6669b293..3fc79beb4 100644 --- a/python/bench/upload_examples_bench.py +++ b/python/bench/upload_examples_bench.py @@ -96,9 +96,9 @@ def benchmark_example_uploading( "new": { "mean": statistics.mean(multipart_timings), "median": statistics.median(multipart_timings), - "stdev": statistics.stdev(multipart_timings) - if len(multipart_timings) > 1 - else 0, + "stdev": ( + statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0 + ), "min": min(multipart_timings), "max": max(multipart_timings), }, diff --git a/python/langsmith/client.py b/python/langsmith/client.py index e71c85be8..cc3987649 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3413,7 +3413,6 @@ def upsert_examples_multipart( inputsb = _dumps_json(example.inputs) - parts.append( ( f"{example_id}.inputs", @@ -3425,7 +3424,6 @@ def upsert_examples_multipart( ), ) ) - if example.outputs: outputsb = _dumps_json(example.outputs) diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 6f84ccea6..58b311b83 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -136,9 +136,9 @@ def __repr__(self): class ExampleSearch(ExampleBase): - """Example returned via search.""" + """Example returned via search.""" - id: UUID + id: UUID class ExampleUpdate(BaseModel): From 460b16b15720a0f291c31814896f4100c7a4336b Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:22:08 -0800 Subject: [PATCH 18/88] list -> List --- python/langsmith/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index cc3987649..8f4351994 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3381,7 +3381,7 @@ def upsert_examples_multipart( ): raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.") """ - parts: list[MultipartPart] = [] + parts: List[MultipartPart] = [] for example in upserts: if example.id is not None: From 4e9edf4da4de1a8a3421f589345c929ae71ae3c4 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:24:11 -0800 Subject: [PATCH 19/88] dict -> Dict --- python/langsmith/schemas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 58b311b83..f134e1432 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -713,7 +713,7 @@ class LangSmithInfo(BaseModel): """The time the license will expire.""" batch_ingest_config: Optional[BatchIngestConfig] = None """The instance flags.""" - instance_flags: Optional[dict[str, Any]] = None + instance_flags: Optional[Dict[str, Any]] = None Example.update_forward_refs() From b6b9d79d43709a1da95fbf85df7c59bbb5c91da5 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:28:08 -0800 Subject: [PATCH 20/88] fmt --- python/tests/unit_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 4b68ce368..a97e8329d 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -432,7 +432,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None: dataset_id = uuid.uuid4() created_at = datetime(2015, 1, 1, 0, 0, 0) - example = ls_schemas.ExampleCreateWithAttachments( + example = ls_schemas.ExampleUpsertWithAttachments( id=example_id, dataset_id=dataset_id, created_at=created_at, From bc9ec6f5bf562b75a9081f1435ddc3aa799fa3ee Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:28:55 -0800 Subject: [PATCH 21/88] undo yml changes --- .github/actions/python-integration-tests/action.yml | 2 +- .github/workflows/integration_tests.yml | 2 +- .github/workflows/release.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/actions/python-integration-tests/action.yml b/.github/actions/python-integration-tests/action.yml index 74553585d..d62a15f0a 100644 --- a/.github/actions/python-integration-tests/action.yml +++ b/.github/actions/python-integration-tests/action.yml @@ -23,7 +23,7 @@ runs: uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v4 with: python-version: ${{ inputs.python-version }} cache: "pip" diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index 1a2928d1a..d9f6ddc27 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -33,7 +33,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Set up Python 3.11 - uses: actions/setup-python@v5 + uses: actions/setup-python@v4 with: python-version: 3.11 cache: "pip" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7e10b9d67..0f933626b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -25,7 +25,7 @@ jobs: - name: Install poetry run: pipx install poetry==$POETRY_VERSION - name: Set up Python 3.11 - uses: actions/setup-python@v5 + uses: actions/setup-python@v4 with: python-version: "3.11" cache: "poetry" From 15708dc485f1ede8d2e8e245ffa5d9d7cb3c68ce Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:31:23 -0800 Subject: [PATCH 22/88] unit test fix --- python/tests/unit_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index a97e8329d..9019e44f5 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -454,7 +454,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None: call_args = mock_session.request.call_args assert call_args[0][0] == "POST" - assert call_args[0][1].endswith("/v1/examples/multipart") + assert call_args[0][1].endswith("/v1/platform/examples/multipart") # Parse the multipart data request_data = call_args[1]["data"] From 527174ab4330a763b1d4bd8adfdc314a81ed2980 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:36:18 -0800 Subject: [PATCH 23/88] unit test fix --- python/tests/unit_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 9019e44f5..46a946285 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -493,7 +493,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None: if name.endswith(".attachment.file1"): assert part.value == expected_parts[name] - assert part.headers["Content-Type"] == "text/plain" + assert part.headers["Content-Type"] == "text/plain; length=9" elif name.endswith(".attachment.file2"): assert part.value == expected_parts[name] assert part.headers["Content-Type"] == "application/json" From 81f52492dfe50cac96f5e2106732b9740a6fa52d Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 18 Nov 2024 15:38:40 -0800 Subject: [PATCH 24/88] unit test fix --- python/tests/unit_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 46a946285..adb59e37e 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -496,7 +496,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None: assert part.headers["Content-Type"] == "text/plain; length=9" elif name.endswith(".attachment.file2"): assert part.value == expected_parts[name] - assert part.headers["Content-Type"] == "application/json" + assert part.headers["Content-Type"] == "application/json; length=16" else: value = json.loads(part.value) assert value == expected_parts[name] From f36a0cb20389f75a8fc6ff31db8db874831e13c7 Mon Sep 17 00:00:00 2001 From: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:23:44 -0800 Subject: [PATCH 25/88] make evaluate function compatible with attachments (#1218) Co-authored-by: Bagatur --- python/langsmith/client.py | 32 +++++++ python/langsmith/evaluation/_runner.py | 90 +++++++++++++++-- python/langsmith/schemas.py | 22 ++++- python/tests/integration_tests/test_client.py | 96 ++++++++++++++++++- 4 files changed, 227 insertions(+), 13 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 8f4351994..aad7057fc 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3454,6 +3454,21 @@ def upsert_examples_multipart( ), ) ) + elif isinstance(attachment, str): + file_path = attachment + mime_type = "application/octet-stream" + file_size = os.path.getsize(file_path) + parts.append( + ( + f"{example_id}.attachment.{name}", + ( + None, + open(file_path, "rb"), + f"{mime_type}; length={file_size}", + {}, + ), + ) + ) else: parts.append( ( @@ -3645,6 +3660,7 @@ def read_example( "as_of": as_of.isoformat() if as_of else None, }, ) + return ls_schemas.Example( **response.json(), _host_url=self._host_url, @@ -3664,6 +3680,7 @@ def list_examples( limit: Optional[int] = None, metadata: Optional[dict] = None, filter: Optional[str] = None, + include_attachments: bool = False, **kwargs: Any, ) -> Iterator[ls_schemas.Example]: """Retrieve the example rows of the specified dataset. @@ -3713,11 +3730,26 @@ def list_examples( params["dataset"] = dataset_id else: pass + if include_attachments: + params["select"] = ["attachment_urls", "outputs", "metadata"] for i, example in enumerate( self._get_paginated_list("/examples", params=params) ): + attachment_urls = {} + if example["attachment_urls"]: + for key, value in example["attachment_urls"].items(): + response = requests.get(value["presigned_url"], stream=True) + response.raise_for_status() + reader = io.BytesIO(response.content) + attachment_urls[key.split(".")[1]] = ( + value["presigned_url"], + reader, + ) + del example["attachment_urls"] + yield ls_schemas.Example( **example, + attachment_urls=attachment_urls, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), ) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 8ed55f6bf..d197de0d3 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -65,7 +65,7 @@ DataFrame = Any logger = logging.getLogger(__name__) -TARGET_T = Callable[[dict], dict] +TARGET_T = Union[Callable[[dict], dict], Callable[[dict, dict], dict]] # Data format: dataset-name, dataset_id, or examples DATA_T = Union[str, uuid.UUID, Iterable[schemas.Example], schemas.Dataset] # Summary evaluator runs over the whole dataset @@ -907,7 +907,6 @@ def _evaluate( runs, client, ) - manager = _ExperimentManager( data, client=client, @@ -918,6 +917,7 @@ def _evaluate( # If provided, we don't need to create a new experiment. runs=runs, # Create or resolve the experiment. + include_attachments=_include_attachments(target), ).start() cache_dir = ls_utils.get_cache_dir(None) cache_path = ( @@ -1162,6 +1162,7 @@ def __init__( summary_results: Optional[Iterable[EvaluationResults]] = None, description: Optional[str] = None, num_repetitions: int = 1, + include_attachments: bool = False, ): super().__init__( experiment=experiment, @@ -1175,11 +1176,16 @@ def __init__( self._evaluation_results = evaluation_results self._summary_results = summary_results self._num_repetitions = num_repetitions + self._include_attachments = include_attachments @property def examples(self) -> Iterable[schemas.Example]: if self._examples is None: - self._examples = _resolve_data(self._data, client=self.client) + self._examples = _resolve_data( + self._data, + client=self.client, + include_attachments=self._include_attachments, + ) if self._num_repetitions > 1: self._examples = itertools.chain.from_iterable( itertools.tee(self._examples, self._num_repetitions) @@ -1225,6 +1231,7 @@ def start(self) -> _ExperimentManager: client=self.client, runs=self._runs, evaluation_results=self._evaluation_results, + include_attachments=self._include_attachments, ) def with_predictions( @@ -1246,6 +1253,7 @@ def with_predictions( client=self.client, runs=(pred["run"] for pred in r2), # TODO: Can't do multiple prediction rounds rn. + include_attachments=self._include_attachments, ) def with_evaluators( @@ -1276,6 +1284,7 @@ def with_evaluators( runs=(result["run"] for result in r2), evaluation_results=(result["evaluation_results"] for result in r3), summary_results=self._summary_results, + include_attachments=self._include_attachments, ) def with_summary_evaluators( @@ -1296,6 +1305,7 @@ def with_summary_evaluators( runs=self.runs, evaluation_results=self._evaluation_results, summary_results=aggregate_feedback_gen, + include_attachments=self._include_attachments, ) def get_results(self) -> Iterable[ExperimentResultRow]: @@ -1325,14 +1335,23 @@ def get_summary_scores(self) -> Dict[str, List[dict]]: # Private methods def _predict( - self, target: TARGET_T, /, max_concurrency: Optional[int] = None + self, + target: TARGET_T, + /, + max_concurrency: Optional[int] = None, ) -> Generator[_ForwardResults, None, None]: """Run the target function on the examples.""" fn = _ensure_traceable(target) + if max_concurrency == 0: for example in self.examples: yield _forward( - fn, example, self.experiment_name, self._metadata, self.client + fn, + example, + self.experiment_name, + self._metadata, + self.client, + include_attachments=self._include_attachments, ) else: @@ -1345,6 +1364,7 @@ def _predict( self.experiment_name, self._metadata, self.client, + include_attachments=self._include_attachments, ) for example in self.examples ] @@ -1618,6 +1638,7 @@ def _forward( experiment_name: str, metadata: dict, client: langsmith.Client, + include_attachments: Optional[bool] = None, ) -> _ForwardResults: run: Optional[schemas.RunBase] = None @@ -1627,8 +1648,13 @@ def _get_run(r: rt.RunTree) -> None: with rh.tracing_context(enabled=True): try: + args = ( + (example.inputs, example.attachment_urls) + if include_attachments + else (example.inputs,) + ) fn( - example.inputs, + *args, langsmith_extra=rh.LangSmithExtra( reference_example_id=example.id, on_end=_get_run, @@ -1655,15 +1681,26 @@ def _get_run(r: rt.RunTree) -> None: def _resolve_data( - data: DATA_T, *, client: langsmith.Client + data: DATA_T, + *, + client: langsmith.Client, + include_attachments: bool = False, ) -> Iterable[schemas.Example]: """Return the examples for the given dataset.""" + # TODO: Find a smarter way of determining whether + # to get attachments (don't just default to true) if isinstance(data, str): - return client.list_examples(dataset_name=data) + return client.list_examples( + dataset_name=data, include_attachments=include_attachments + ) elif isinstance(data, uuid.UUID): - return client.list_examples(dataset_id=data) + return client.list_examples( + dataset_id=data, include_attachments=include_attachments + ) elif isinstance(data, schemas.Dataset): - return client.list_examples(dataset_id=data.id) + return client.list_examples( + dataset_id=data.id, include_attachments=include_attachments + ) return data @@ -1683,6 +1720,7 @@ def _ensure_traceable( " ...\n" ")" ) + if rh.is_traceable_function(target): fn: rh.SupportsLangsmithExtra[[dict], dict] = target else: @@ -1692,6 +1730,38 @@ def _ensure_traceable( return fn +def _include_attachments( + target: Union[TARGET_T, Iterable[schemas.Run], Runnable], +) -> bool: + """Whether the target function accepts attachments.""" + if _is_langchain_runnable(target) or not callable(target): + return False + # Check function signature + sig = inspect.signature(target) + params = list(sig.parameters.values()) + positional_params = [ + p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + ] + + if len(positional_params) == 0: + raise ValueError( + "Target function must accept at least one positional argument (inputs)" + ) + elif len(positional_params) > 2: + raise ValueError( + "Target function must accept at most two positional " + "arguments (inputs, attachments)" + ) + elif len(positional_params) == 2: + if tuple(p.name for p in positional_params) != ("inputs", "attachments"): + raise ValueError( + "When target function has two positional arguments, they must be named " + "'inputs' and 'attachments', respectively." + ) + + return len(positional_params) == 2 + + def _resolve_experiment( experiment: Optional[Union[schemas.TracerSession, str, uuid.UUID]], runs: Optional[Iterable[schemas.Run]], diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index f134e1432..1ea0e6b32 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -63,8 +63,22 @@ def my_function(bar: int, my_val: Attachment): data: bytes -Attachments = Dict[str, Union[Tuple[str, bytes], Attachment]] -"""Attachments associated with the run. Each entry is a tuple of (mime_type, bytes).""" +Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, str]] +"""Attachments associated with the run. +Each entry is a tuple of (mime_type, bytes), or a fliepath""" + + +@runtime_checkable +class BinaryIOLike(Protocol): + """Protocol for binary IO-like objects.""" + + def read(self, size: int = -1) -> bytes: + """Read function.""" + ... + + def write(self, b: bytes) -> int: + """Write function.""" + ... class ExampleBase(BaseModel): @@ -74,11 +88,15 @@ class ExampleBase(BaseModel): inputs: Dict[str, Any] = Field(default_factory=dict) outputs: Optional[Dict[str, Any]] = Field(default=None) metadata: Optional[Dict[str, Any]] = Field(default=None) + attachment_urls: Optional[Dict[str, Tuple[str, BinaryIOLike]]] = Field(default=None) + """Dictionary with attachment names as keys and a tuple of the S3 url + and a reader of the data for the file.""" class Config: """Configuration class for the schema.""" frozen = True + arbitrary_types_allowed = True class ExampleCreate(ExampleBase): diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index d1d16ce0f..cb89b1629 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -20,7 +20,8 @@ from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor from langsmith.client import ID_TYPE, Client -from langsmith.schemas import DataType, ExampleUpsertWithAttachments +from langsmith.evaluation import evaluate +from langsmith.schemas import DataType, Example, ExampleUpsertWithAttachments, Run from langsmith.utils import ( LangSmithConnectionError, LangSmithError, @@ -1119,3 +1120,96 @@ def create_encoder(*args, **kwargs): myobj["key_1"] assert not caplog.records + + +@pytest.mark.skip( + reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first" +) +def test_list_examples_attachments_keys(langchain_client: Client) -> None: + """Test list_examples returns same keys with and without attachments.""" + dataset_name = "__test_list_examples_attachments" + uuid4().hex[:4] + dataset = langchain_client.create_dataset(dataset_name=dataset_name) + + langchain_client.create_example( + inputs={"text": "hello world"}, + outputs={"response": "hi there"}, + dataset_id=dataset.id, + attachments={ + "test_file": ("text/plain", b"test content"), + }, + ) + + # Get examples with attachments + with_attachments = next( + langchain_client.list_examples(dataset_id=dataset.id, include_attachments=True) + ) + + # Get examples without attachments + without_attachments = next( + langchain_client.list_examples(dataset_id=dataset.id, include_attachments=False) + ) + + with_keys = set(with_attachments.dict().keys()) + without_keys = set(without_attachments.dict().keys()) + assert with_keys == without_keys, ( + f"Keys differ when include_attachments=True vs False.\n" + f"Only in with_attachments: {with_keys - without_keys}\n" + f"Only in without_attachments: {without_keys - with_keys}" + ) + + langchain_client.delete_dataset(dataset_id=dataset.id) + + +@pytest.mark.skip( + reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first" +) +def test_evaluate_with_attachments(langchain_client: Client) -> None: + """Test evaluating examples with attachments.""" + dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] + # 1. Create dataset + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for evals with attachments", + data_type=DataType.kv, + ) + + # 2. Create example with attachments + example = ExampleUpsertWithAttachments( + dataset_id=dataset.id, + inputs={"question": "What is shown in the image?"}, + outputs={"answer": "test image"}, + attachments={ + "image": ("image/png", b"fake image data for testing"), + }, + ) + + langchain_client.upsert_examples_multipart(upserts=[example]) + + # 3. Define target function that uses attachments + def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]: + # Verify we receive the attachment data + assert "image" in attachments + image_url, image_data = attachments["image"] + assert image_data.read() == b"fake image data for testing" + return {"answer": "test image"} + + # 4. Define simple evaluator + def evaluator(run: Run, example: Example) -> Dict[str, Any]: + return { + "score": float( + run.outputs.get("answer") == example.outputs.get("answer") # type: ignore + ) + } + + # 5. Run evaluation + results = evaluate( + target, data=dataset_name, evaluators=[evaluator], client=langchain_client + ) + + # 6. Verify results + assert len(results) == 1 + for result in results: + assert result["evaluation_results"]["results"][0].score == 1.0 + + # Cleanup + langchain_client.delete_dataset(dataset_name=dataset_name) From ddbe2f5e54d8c3b888dccce9ae931a1973e00c8f Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 13:44:45 -0800 Subject: [PATCH 26/88] file path update --- python/langsmith/_internal/_operations.py | 26 ++++++++----- python/langsmith/client.py | 47 ++++++++++++----------- python/langsmith/schemas.py | 5 ++- 3 files changed, 43 insertions(+), 35 deletions(-) diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index e1e99d6e2..80f07d8dd 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -2,6 +2,7 @@ import itertools import logging +from pathlib import Path import uuid from typing import Literal, Optional, Union, cast @@ -256,18 +257,23 @@ def serialized_run_operation_to_multipart_parts_and_context( " periods ('.'). Please rename the attachment and try again." ) continue - - acc_parts.append( - ( - f"attachment.{op.id}.{n}", + + if isinstance(valb, Path): + #TODO: actually deal with this case + # This is just for speed of getting something out + continue + else: + acc_parts.append( ( - None, - valb, - content_type, - {"Content-Length": str(len(valb))}, - ), + f"attachment.{op.id}.{n}", + ( + None, + valb, + content_type, + {"Content-Length": str(len(valb))}, + ), + ) ) - ) return MultipartPartsAndContext( acc_parts, f"trace={op.trace_id},id={op.id}", diff --git a/python/langsmith/client.py b/python/langsmith/client.py index aad7057fc..e9832e276 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -25,6 +25,7 @@ import json import logging import os +from pathlib import Path import random import threading import time @@ -3442,33 +3443,33 @@ def upsert_examples_multipart( if example.attachments: for name, attachment in example.attachments.items(): if isinstance(attachment, tuple): - mime_type, data = attachment - parts.append( - ( - f"{example_id}.attachment.{name}", + if isinstance(attachment[1], Path): + mime_type, file_path = attachment + file_size = os.path.getsize(file_path) + parts.append( ( - None, - data, - f"{mime_type}; length={len(data)}", - {}, - ), + f"{example_id}.attachment.{name}", + ( + None, + open(file_path, "rb"), + f"{mime_type}; length={file_size}", + {}, + ), + ) ) - ) - elif isinstance(attachment, str): - file_path = attachment - mime_type = "application/octet-stream" - file_size = os.path.getsize(file_path) - parts.append( - ( - f"{example_id}.attachment.{name}", + else: + mime_type, data = attachment + parts.append( ( - None, - open(file_path, "rb"), - f"{mime_type}; length={file_size}", - {}, - ), + f"{example_id}.attachment.{name}", + ( + None, + data, + f"{mime_type}; length={len(data)}", + {}, + ), + ) ) - ) else: parts.append( ( diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 1ea0e6b32..f4c3df0e9 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -5,6 +5,7 @@ from datetime import datetime, timedelta, timezone from decimal import Decimal from enum import Enum +from pathlib import Path from typing import ( Any, Dict, @@ -63,9 +64,9 @@ def my_function(bar: int, my_val: Attachment): data: bytes -Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, str]] +Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, Tuple[str, Path]]] """Attachments associated with the run. -Each entry is a tuple of (mime_type, bytes), or a fliepath""" +Each entry is a tuple of (mime_type, bytes), or (mime_type, file_path)""" @runtime_checkable From c1ba615bd9fb0754bd2da6d64d7810f4472be736 Mon Sep 17 00:00:00 2001 From: Jake Rachleff Date: Tue, 19 Nov 2024 14:17:21 -0800 Subject: [PATCH 27/88] add benchmarks --- ...load_example_with_large_file_attachment.py | 113 ++++++++++++++++++ python/bench/upload_examples_bench.py | 16 +-- 2 files changed, 121 insertions(+), 8 deletions(-) create mode 100644 python/bench/upload_example_with_large_file_attachment.py diff --git a/python/bench/upload_example_with_large_file_attachment.py b/python/bench/upload_example_with_large_file_attachment.py new file mode 100644 index 000000000..31b36b2e5 --- /dev/null +++ b/python/bench/upload_example_with_large_file_attachment.py @@ -0,0 +1,113 @@ +import statistics +import time +from pathlib import Path +from typing import Dict +from uuid import uuid4 + + +from langsmith import Client +from langsmith.schemas import DataType, ExampleUpsertWithAttachments + +import os + +WRITE_BATCH = 10000 + +def create_large_file(size: int, dir: str) -> str: + """Create a large file for benchmarking purposes.""" + filename = f"large_file_{size}.txt" + filepath = os.path.join(dir, filename) + + # delete the file if it exists + print("Deleting existing file...") + if os.path.exists(filepath): + os.remove(filepath) + + print("Creating big file...") + with open(filepath, "w") as f: + curr_size = 0 + while curr_size < size: + f.write("a" * (size - curr_size)) + curr_size += size - curr_size + + print("Done creating big file...") + return filepath + +DATASET_NAME = "upsert_big_file_to_dataset" + +def benchmark_big_file_upload(size_bytes: int, num_examples: int, samples: int = 1) -> Dict: + """ + Benchmark run creation with specified parameters. + Returns timing statistics. + """ + multipart_timings = [] + + for _ in range(samples): + client = Client() + + if client.has_dataset(dataset_name=DATASET_NAME): + client.delete_dataset(dataset_name=DATASET_NAME) + + dataset = client.create_dataset( + DATASET_NAME, + description="Test dataset for big file upload", + ) + large_file = create_large_file(size_bytes, "/tmp") + examples = [ + ExampleUpsertWithAttachments( + dataset_id=dataset.id, + inputs={"a": 1}, + outputs={"b": 2}, + attachments={ + "bigfile": ("text/plain", Path(large_file)), + }, + ) for _ in range(num_examples) + ] + + multipart_start = time.perf_counter() + client.upsert_examples_multipart(upserts=examples) + multipart_elapsed = time.perf_counter() - multipart_start + + multipart_timings.append(multipart_elapsed) + + return { + "mean": statistics.mean(multipart_timings), + "median": statistics.median(multipart_timings), + "stdev": ( + statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0 + ), + "min": min(multipart_timings), + "max": max(multipart_timings), + } + + +size_bytes = 50000000 +num_examples = 10 + +def main(size_bytes: int, num_examples: int = 1): + """ + Run benchmarks with different combinations of parameters and report results. + """ + results = benchmark_big_file_upload(size_bytes, num_examples) + + print( + f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:" + ) + print("-" * 30) + print(f"{'Metric':<15} {'Result':>20}") + print("-" * 30) + + metrics = ["mean", "median", "stdev", "min", "max"] + for metric in metrics: + print( + f"{results[metric]:>20.4f}" + ) + + print("-" * 30) + print( + f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} " + ) + print("(examples/second)") + + +if __name__ == "__main__": + main(size_bytes, num_examples) diff --git a/python/bench/upload_examples_bench.py b/python/bench/upload_examples_bench.py index 3fc79beb4..5a22a731b 100644 --- a/python/bench/upload_examples_bench.py +++ b/python/bench/upload_examples_bench.py @@ -4,7 +4,7 @@ from uuid import uuid4 from langsmith import Client -from langsmith.schemas import DataType, ExampleCreateWithAttachments +from langsmith.schemas import DataType, ExampleUpsertWithAttachments def create_large_json(length: int) -> Dict: @@ -32,7 +32,7 @@ def create_large_json(length: int) -> Dict: def create_example_data(dataset_id: str, json_size: int) -> Dict: """Create a single example data object.""" - return ExampleCreateWithAttachments( + return ExampleUpsertWithAttachments( **{ "dataset_id": dataset_id, "inputs": create_large_json(json_size), @@ -54,7 +54,7 @@ def benchmark_example_uploading( multipart_timings, old_timings = [], [] for _ in range(samples): - client = Client(api_url="https://dev.api.smith.langchain.com") + client = Client() if client.has_dataset(dataset_name=DATASET_NAME): client.delete_dataset(dataset_name=DATASET_NAME) @@ -70,11 +70,11 @@ def benchmark_example_uploading( # Old method old_start = time.perf_counter() - inputs = [e.inputs for e in examples] - outputs = [e.outputs for e in examples] - # the create_examples endpoint fails above 20mb - # so this will crash with json_size > ~100 - client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id) + # inputs = [e.inputs for e in examples] + # outputs = [e.outputs for e in examples] + # # the create_examples endpoint fails above 20mb + # # so this will crash with json_size > ~100 + # client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id) old_elapsed = time.perf_counter() - old_start # New method From 354417182ae86c1c88762173916b333f8cac0473 Mon Sep 17 00:00:00 2001 From: Jake Rachleff Date: Tue, 19 Nov 2024 14:52:54 -0800 Subject: [PATCH 28/88] better error message --- python/langsmith/evaluation/_runner.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index d197de0d3..bb038f51a 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1753,10 +1753,17 @@ def _include_attachments( "arguments (inputs, attachments)" ) elif len(positional_params) == 2: - if tuple(p.name for p in positional_params) != ("inputs", "attachments"): + mismatches = [] + for i, (p, expected) in enumerate(zip(positional_params, ("inputs", "attachments"))): + if p.name != expected: + mismatches.append((i, p.name)) + + if mismatches: raise ValueError( "When target function has two positional arguments, they must be named " - "'inputs' and 'attachments', respectively." + "'inputs' and 'attachments', respectively. Received: " + ",".join( + f"'{p}' at index {i}" for i, p in mismatches + ) ) return len(positional_params) == 2 From 3cc32c57695c2f7a2d13c8c1bba971af7863b522 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 14:55:20 -0800 Subject: [PATCH 29/88] aevaluate --- python/langsmith/evaluation/_arunner.py | 42 +++++++++++++++++++++---- python/langsmith/evaluation/_runner.py | 2 +- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index a2c3b2705..2ff023e3e 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -40,6 +40,7 @@ _ExperimentManagerMixin, _extract_feedback_keys, _ForwardResults, + _include_attachments, _is_langchain_runnable, _load_examples_map, _load_experiment, @@ -66,7 +67,9 @@ logger = logging.getLogger(__name__) -ATARGET_T = Callable[[dict], Awaitable[dict]] +ATARGET_T = Union[ + Callable[[dict], Awaitable[dict]], Callable[[dict, dict], Awaitable[dict]] +] async def aevaluate( @@ -401,6 +404,7 @@ async def _aevaluate( description=description, num_repetitions=num_repetitions, runs=runs, + include_attachments=_include_attachments(target), ).astart() cache_dir = ls_utils.get_cache_dir(None) if cache_dir is not None: @@ -461,6 +465,7 @@ def __init__( summary_results: Optional[AsyncIterable[EvaluationResults]] = None, description: Optional[str] = None, num_repetitions: int = 1, + include_attachments: bool = False, ): super().__init__( experiment=experiment, @@ -476,10 +481,15 @@ def __init__( self._evaluation_results = evaluation_results self._summary_results = summary_results self._num_repetitions = num_repetitions + self._include_attachments = include_attachments async def aget_examples(self) -> AsyncIterator[schemas.Example]: if self._examples is None: - self._examples = _aresolve_data(self._data, client=self.client) + self._examples = _aresolve_data( + self._data, + client=self.client, + include_attachments=self._include_attachments, + ) if self._num_repetitions > 1: self._examples = async_chain_from_iterable( aitertools.atee(self._examples, self._num_repetitions) @@ -545,6 +555,7 @@ async def astart(self) -> _AsyncExperimentManager: client=self.client, runs=self._runs, evaluation_results=self._evaluation_results, + include_attachments=self._include_attachments, ) async def awith_predictions( @@ -561,6 +572,7 @@ async def awith_predictions( metadata=self._metadata, client=self.client, runs=(pred["run"] async for pred in r2), + include_attachments=self._include_attachments, ) async def awith_evaluators( @@ -580,6 +592,7 @@ async def awith_evaluators( runs=(result["run"] async for result in r2), evaluation_results=(result["evaluation_results"] async for result in r3), summary_results=self._summary_results, + include_attachments=self._include_attachments, ) async def awith_summary_evaluators( @@ -596,6 +609,7 @@ async def awith_summary_evaluators( runs=self.aget_runs(), evaluation_results=self._evaluation_results, summary_results=aggregate_feedback_gen, + include_attachments=self._include_attachments, ) async def aget_results(self) -> AsyncIterator[ExperimentResultRow]: @@ -630,7 +644,12 @@ async def predict_all(): async for example in await self.aget_examples(): # Yield the coroutine to be awaited later yield _aforward( - fn, example, self.experiment_name, self._metadata, self.client + fn, + example, + self.experiment_name, + self._metadata, + self.client, + include_attachments=self._include_attachments, ) async for result in aitertools.aiter_with_concurrency( @@ -904,6 +923,7 @@ async def _aforward( experiment_name: str, metadata: dict, client: langsmith.Client, + include_attachments: bool = False, ) -> _ForwardResults: run: Optional[schemas.RunBase] = None @@ -913,8 +933,13 @@ def _get_run(r: run_trees.RunTree) -> None: with rh.tracing_context(enabled=True): try: + args = ( + (example.inputs, example.attachment_urls) + if include_attachments + else (example.inputs,) + ) await fn( - example.inputs, + *args, langsmith_extra=rh.LangSmithExtra( reference_example_id=example.id, on_end=_get_run, @@ -971,12 +996,17 @@ def _ensure_async_traceable( def _aresolve_data( - data: Union[DATA_T, AsyncIterable[schemas.Example]], *, client: langsmith.Client + data: Union[DATA_T, AsyncIterable[schemas.Example]], + *, + client: langsmith.Client, + include_attachments: bool = False, ) -> AsyncIterator[schemas.Example]: """Return the examples for the given dataset.""" if isinstance(data, AsyncIterable): return aitertools.ensure_async_iterator(data) - return aitertools.ensure_async_iterator(_resolve_data(data, client=client)) + return aitertools.ensure_async_iterator( + _resolve_data(data, client=client, include_attachments=include_attachments) + ) T = TypeVar("T") diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index d197de0d3..c2603b440 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1638,7 +1638,7 @@ def _forward( experiment_name: str, metadata: dict, client: langsmith.Client, - include_attachments: Optional[bool] = None, + include_attachments: bool = False, ) -> _ForwardResults: run: Optional[schemas.RunBase] = None From 08a6f34aca8ff78745ea925a0699e20569d4f96c Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 17:15:41 -0800 Subject: [PATCH 30/88] unit test for _include_attachments --- .../unit_tests/evaluation/test_runner.py | 69 ++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index 408d4508d..5c06292ea 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -5,6 +5,7 @@ import itertools import json import random +import re import sys import time import uuid @@ -20,7 +21,7 @@ from langsmith import schemas as ls_schemas from langsmith.client import Client from langsmith.evaluation._arunner import aevaluate, aevaluate_existing -from langsmith.evaluation._runner import evaluate_existing +from langsmith.evaluation._runner import evaluate_existing, _include_attachments from langsmith.evaluation.evaluator import _normalize_evaluator_func @@ -566,3 +567,69 @@ async def atarget(x): await aevaluate( atarget, data=ds_examples, evaluators=[eval_], client=client ) + + +@pytest.mark.parametrize( + "target,expected,error_msg", + [ + # Valid cases + (lambda inputs: None, False, None), + (lambda inputs, attachments: None, True, None), + + # Invalid parameter names + ( + lambda x, y: None, + None, + "When target function has two positional arguments, they must be named " + "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' at index 1" + ), + ( + lambda input, attachment: None, + None, + "When target function has two positional arguments, they must be named " + "'inputs' and 'attachments', respectively. Received: 'input' at index 0," + "'attachment' at index 1" + ), + + # Too many parameters + ( + lambda inputs, attachments, extra: None, + None, + re.escape("Target function must accept at most two positional arguments (inputs, attachments)") + ), + + # No positional parameters + ( + lambda *, foo="bar": None, + None, + re.escape("Target function must accept at least one positional argument (inputs)") + ), + + # Mixed positional and keyword + (lambda inputs, *, optional=None: None, False, None), + (lambda inputs, attachments, *, optional=None: None, True, None), + + # Non-callable + ("not_a_function", False, None), + ], +) +def test_include_attachments(target, expected, error_msg): + """Test the _include_attachments function with various input cases.""" + try: + from langchain_core.runnables import RunnableLambda + except ImportError: + if target == "runnable": + pytest.skip("langchain-core not installed") + return + + if target == "runnable": + target = RunnableLambda(lambda x: x) + expected = False + error_msg = None + + if error_msg is not None: + with pytest.raises(ValueError, match=error_msg): + _include_attachments(target) + else: + result = _include_attachments(target) + assert result == expected \ No newline at end of file From 8e2e7042e8ad320ba80bf6df8705bca4380fe2e3 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 18:29:30 -0800 Subject: [PATCH 31/88] test that adding examples without attachments still lets you run evals --- python/tests/integration_tests/test_client.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index cb89b1629..bb5651997 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1213,3 +1213,51 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: # Cleanup langchain_client.delete_dataset(dataset_name=dataset_name) + + +def test_evaluate_with_no_attachments(langchain_client: Client) -> None: + """Test evaluating examples without attachments using a target that accepts attachments.""" + dataset_name = "__test_evaluate_no_attachments" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for evals without attachments", + data_type=DataType.kv, + ) + + # Create example using old way, attachments should be set to {} + langchain_client.create_example( + dataset_id=dataset.id, + inputs={"question": "What is 2+2?"}, + outputs={"answer": "4"}, + ) + + # Verify we can create example the new way without attachments + example = ExampleUpsertWithAttachments( + dataset_id=dataset.id, + inputs={"question": "What is 3+1?"}, + outputs={"answer": "4"}, + ) + langchain_client.upsert_examples_multipart(upserts=[example]) + + def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]: + # Verify we receive an empty attachments dict + assert isinstance(attachments, dict) + assert len(attachments) == 0 + return {"answer": "4"} + + def evaluator(run: Run, example: Example) -> Dict[str, Any]: + return { + "score": float( + run.outputs.get("answer") == example.outputs.get("answer") # type: ignore + ) + } + + results = evaluate( + target, data=dataset_name, evaluators=[evaluator], client=langchain_client + ) + + assert len(results) == 2 + for result in results: + assert result["evaluation_results"]["results"][0].score == 1.0 + + langchain_client.delete_dataset(dataset_name=dataset_name) \ No newline at end of file From cfa0e4c734738646e4f052d7e55e52323bd86ec5 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 18:47:03 -0800 Subject: [PATCH 32/88] fmt --- ...load_example_with_large_file_attachment.py | 30 +++++++++--------- python/langsmith/_internal/_operations.py | 6 ++-- python/langsmith/client.py | 2 +- python/langsmith/evaluation/_runner.py | 9 +++--- python/tests/integration_tests/test_client.py | 4 +-- .../unit_tests/evaluation/test_runner.py | 31 ++++++++++--------- 6 files changed, 41 insertions(+), 41 deletions(-) diff --git a/python/bench/upload_example_with_large_file_attachment.py b/python/bench/upload_example_with_large_file_attachment.py index 31b36b2e5..8aaedd696 100644 --- a/python/bench/upload_example_with_large_file_attachment.py +++ b/python/bench/upload_example_with_large_file_attachment.py @@ -1,17 +1,15 @@ +import os import statistics import time from pathlib import Path from typing import Dict -from uuid import uuid4 - from langsmith import Client -from langsmith.schemas import DataType, ExampleUpsertWithAttachments - -import os +from langsmith.schemas import ExampleUpsertWithAttachments WRITE_BATCH = 10000 + def create_large_file(size: int, dir: str) -> str: """Create a large file for benchmarking purposes.""" filename = f"large_file_{size}.txt" @@ -32,9 +30,13 @@ def create_large_file(size: int, dir: str) -> str: print("Done creating big file...") return filepath + DATASET_NAME = "upsert_big_file_to_dataset" -def benchmark_big_file_upload(size_bytes: int, num_examples: int, samples: int = 1) -> Dict: + +def benchmark_big_file_upload( + size_bytes: int, num_examples: int, samples: int = 1 +) -> Dict: """ Benchmark run creation with specified parameters. Returns timing statistics. @@ -60,7 +62,8 @@ def benchmark_big_file_upload(size_bytes: int, num_examples: int, samples: int = attachments={ "bigfile": ("text/plain", Path(large_file)), }, - ) for _ in range(num_examples) + ) + for _ in range(num_examples) ] multipart_start = time.perf_counter() @@ -83,29 +86,24 @@ def benchmark_big_file_upload(size_bytes: int, num_examples: int, samples: int = size_bytes = 50000000 num_examples = 10 + def main(size_bytes: int, num_examples: int = 1): """ Run benchmarks with different combinations of parameters and report results. """ results = benchmark_big_file_upload(size_bytes, num_examples) - print( - f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:" - ) + print(f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:") print("-" * 30) print(f"{'Metric':<15} {'Result':>20}") print("-" * 30) metrics = ["mean", "median", "stdev", "min", "max"] for metric in metrics: - print( - f"{results[metric]:>20.4f}" - ) + print(f"{results[metric]:>20.4f}") print("-" * 30) - print( - f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} " - ) + print(f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} ") print("(examples/second)") diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index 80f07d8dd..430c690b0 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -2,8 +2,8 @@ import itertools import logging -from pathlib import Path import uuid +from pathlib import Path from typing import Literal, Optional, Union, cast import orjson @@ -257,9 +257,9 @@ def serialized_run_operation_to_multipart_parts_and_context( " periods ('.'). Please rename the attachment and try again." ) continue - + if isinstance(valb, Path): - #TODO: actually deal with this case + # TODO: actually deal with this case # This is just for speed of getting something out continue else: diff --git a/python/langsmith/client.py b/python/langsmith/client.py index e9832e276..e649849ba 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -25,7 +25,6 @@ import json import logging import os -from pathlib import Path import random import threading import time @@ -35,6 +34,7 @@ import warnings import weakref from inspect import signature +from pathlib import Path from queue import PriorityQueue from typing import ( TYPE_CHECKING, diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 6393dfbfe..a860c137b 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1754,16 +1754,17 @@ def _include_attachments( ) elif len(positional_params) == 2: mismatches = [] - for i, (p, expected) in enumerate(zip(positional_params, ("inputs", "attachments"))): + for i, (p, expected) in enumerate( + zip(positional_params, ("inputs", "attachments")) + ): if p.name != expected: mismatches.append((i, p.name)) if mismatches: raise ValueError( "When target function has two positional arguments, they must be named " - "'inputs' and 'attachments', respectively. Received: " + ",".join( - f"'{p}' at index {i}" for i, p in mismatches - ) + "'inputs' and 'attachments', respectively. Received: " + + ",".join(f"'{p}' at index {i}" for i, p in mismatches) ) return len(positional_params) == 2 diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index bb5651997..cfd848e01 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1216,7 +1216,7 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: def test_evaluate_with_no_attachments(langchain_client: Client) -> None: - """Test evaluating examples without attachments using a target that accepts attachments.""" + """Test evaluating examples without attachments using a target with attachments.""" dataset_name = "__test_evaluate_no_attachments" + uuid4().hex[:4] dataset = langchain_client.create_dataset( dataset_name, @@ -1260,4 +1260,4 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: for result in results: assert result["evaluation_results"]["results"][0].score == 1.0 - langchain_client.delete_dataset(dataset_name=dataset_name) \ No newline at end of file + langchain_client.delete_dataset(dataset_name=dataset_name) diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index 5c06292ea..ffbcf2901 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -21,7 +21,7 @@ from langsmith import schemas as ls_schemas from langsmith.client import Client from langsmith.evaluation._arunner import aevaluate, aevaluate_existing -from langsmith.evaluation._runner import evaluate_existing, _include_attachments +from langsmith.evaluation._runner import _include_attachments, evaluate_existing from langsmith.evaluation.evaluator import _normalize_evaluator_func @@ -575,40 +575,41 @@ async def atarget(x): # Valid cases (lambda inputs: None, False, None), (lambda inputs, attachments: None, True, None), - # Invalid parameter names ( - lambda x, y: None, - None, + lambda x, y: None, + None, "When target function has two positional arguments, they must be named " - "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' at index 1" + "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' " + "at index 1", ), ( - lambda input, attachment: None, + lambda input, attachment: None, None, "When target function has two positional arguments, they must be named " "'inputs' and 'attachments', respectively. Received: 'input' at index 0," - "'attachment' at index 1" + "'attachment' at index 1", ), - # Too many parameters ( lambda inputs, attachments, extra: None, None, - re.escape("Target function must accept at most two positional arguments (inputs, attachments)") + re.escape( + "Target function must accept at most two positional arguments " + "(inputs, attachments)" + ), ), - # No positional parameters ( lambda *, foo="bar": None, None, - re.escape("Target function must accept at least one positional argument (inputs)") + re.escape( + "Target function must accept at least one positional argument (inputs)" + ), ), - # Mixed positional and keyword (lambda inputs, *, optional=None: None, False, None), (lambda inputs, attachments, *, optional=None: None, True, None), - # Non-callable ("not_a_function", False, None), ], @@ -621,7 +622,7 @@ def test_include_attachments(target, expected, error_msg): if target == "runnable": pytest.skip("langchain-core not installed") return - + if target == "runnable": target = RunnableLambda(lambda x: x) expected = False @@ -632,4 +633,4 @@ def test_include_attachments(target, expected, error_msg): _include_attachments(target) else: result = _include_attachments(target) - assert result == expected \ No newline at end of file + assert result == expected From de38a37ffc244bce357b8d1f4d9e726970db7814 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 18:57:42 -0800 Subject: [PATCH 33/88] fmt --- python/langsmith/_internal/_operations.py | 2 +- python/langsmith/client.py | 2 +- python/langsmith/evaluation/_arunner.py | 4 ++-- python/langsmith/evaluation/_runner.py | 6 +++++- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index 430c690b0..5ba4ff90e 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -215,7 +215,7 @@ def serialized_run_operation_to_multipart_parts_and_context( op: SerializedRunOperation, ) -> MultipartPartsAndContext: acc_parts: list[MultipartPart] = [] - + valb: Union[bytes, Path] # this is main object, minus inputs/outputs/events/attachments acc_parts.append( ( diff --git a/python/langsmith/client.py b/python/langsmith/client.py index e649849ba..ce5731cdb 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3451,7 +3451,7 @@ def upsert_examples_multipart( f"{example_id}.attachment.{name}", ( None, - open(file_path, "rb"), + open(file_path, "rb"), # type: ignore[arg-type] f"{mime_type}; length={file_size}", {}, ), diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 2ff023e3e..2d77edbd4 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -991,8 +991,8 @@ def _ensure_async_traceable( return target # type: ignore else: if _is_langchain_runnable(target): - target = target.ainvoke # type: ignore[attr-defined] - return rh.traceable(name="AsyncTarget")(target) + target = target.ainvoke # type: ignore[union-attr] + return rh.traceable(name="AsyncTarget")(target) # type: ignore[arg-type] def _aresolve_data( diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index a860c137b..fefec7aa2 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -20,6 +20,7 @@ from typing import ( TYPE_CHECKING, Any, + AsyncIterable, Awaitable, Callable, DefaultDict, @@ -44,6 +45,7 @@ from langsmith import run_trees as rt from langsmith import schemas from langsmith import utils as ls_utils +from langsmith.evaluation._arunner import ATARGET_T from langsmith.evaluation.evaluator import ( ComparisonEvaluationResult, DynamicComparisonRunEvaluator, @@ -1731,7 +1733,9 @@ def _ensure_traceable( def _include_attachments( - target: Union[TARGET_T, Iterable[schemas.Run], Runnable], + target: Union[ + ATARGET_T, TARGET_T, Iterable[schemas.Run], AsyncIterable[dict], Runnable + ], ) -> bool: """Whether the target function accepts attachments.""" if _is_langchain_runnable(target) or not callable(target): From 2e747356eac1c309cf89dcc0705f0f6fe58846ff Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 19:03:54 -0800 Subject: [PATCH 34/88] fmt --- python/langsmith/evaluation/_arunner.py | 42 ++++++++++++++++++++++++- python/langsmith/evaluation/_runner.py | 6 +--- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 2d77edbd4..a7cc2033c 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -5,6 +5,7 @@ import asyncio import concurrent.futures as cf import datetime +import inspect import logging import pathlib import uuid @@ -40,7 +41,6 @@ _ExperimentManagerMixin, _extract_feedback_keys, _ForwardResults, - _include_attachments, _is_langchain_runnable, _load_examples_map, _load_experiment, @@ -965,6 +965,46 @@ def _get_run(r: run_trees.RunTree) -> None: ) +def _include_attachments( + target: Union[ATARGET_T, Iterable[schemas.Run], AsyncIterable[dict]], +) -> bool: + """Whether the target function accepts attachments.""" + if _is_langchain_runnable(target) or not callable(target): + return False + # Check function signature + sig = inspect.signature(target) + params = list(sig.parameters.values()) + positional_params = [ + p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + ] + + if len(positional_params) == 0: + raise ValueError( + "Target function must accept at least one positional argument (inputs)" + ) + elif len(positional_params) > 2: + raise ValueError( + "Target function must accept at most two positional " + "arguments (inputs, attachments)" + ) + elif len(positional_params) == 2: + mismatches = [] + for i, (p, expected) in enumerate( + zip(positional_params, ("inputs", "attachments")) + ): + if p.name != expected: + mismatches.append((i, p.name)) + + if mismatches: + raise ValueError( + "When target function has two positional arguments, they must be named " + "'inputs' and 'attachments', respectively. Received: " + + ",".join(f"'{p}' at index {i}" for i, p in mismatches) + ) + + return len(positional_params) == 2 + + def _ensure_async_traceable( target: ATARGET_T, ) -> rh.SupportsLangsmithExtra[[dict], Awaitable]: diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index fefec7aa2..a860c137b 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -20,7 +20,6 @@ from typing import ( TYPE_CHECKING, Any, - AsyncIterable, Awaitable, Callable, DefaultDict, @@ -45,7 +44,6 @@ from langsmith import run_trees as rt from langsmith import schemas from langsmith import utils as ls_utils -from langsmith.evaluation._arunner import ATARGET_T from langsmith.evaluation.evaluator import ( ComparisonEvaluationResult, DynamicComparisonRunEvaluator, @@ -1733,9 +1731,7 @@ def _ensure_traceable( def _include_attachments( - target: Union[ - ATARGET_T, TARGET_T, Iterable[schemas.Run], AsyncIterable[dict], Runnable - ], + target: Union[TARGET_T, Iterable[schemas.Run], Runnable], ) -> bool: """Whether the target function accepts attachments.""" if _is_langchain_runnable(target) or not callable(target): From f26c996ca33c6e4e9f623728cf45219130fa2ba2 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 19:06:08 -0800 Subject: [PATCH 35/88] attempt fix --- python/langsmith/client.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index ce5731cdb..ce51a09da 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3662,8 +3662,22 @@ def read_example( }, ) + example = response.json() + attachment_urls = {} + if example["attachment_urls"]: + for key, value in example["attachment_urls"].items(): + response = requests.get(value["presigned_url"], stream=True) + response.raise_for_status() + reader = io.BytesIO(response.content) + attachment_urls[key.split(".")[1]] = ( + value["presigned_url"], + reader, + ) + del example["attachment_urls"] + return ls_schemas.Example( - **response.json(), + **example, + attachment_urls=attachment_urls, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), ) From 095aae936aaa35387e2701a9ef2f9119dfdbb08e Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 19:24:54 -0800 Subject: [PATCH 36/88] fix test --- python/langsmith/evaluation/_runner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index a860c137b..6c916f47a 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1740,7 +1740,10 @@ def _include_attachments( sig = inspect.signature(target) params = list(sig.parameters.values()) positional_params = [ - p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + p + for p in params + if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + and p.default is p.empty ] if len(positional_params) == 0: From a99da233cd79e8112c51a8f28853f30badc76e35 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 19 Nov 2024 19:29:12 -0800 Subject: [PATCH 37/88] add unit test --- python/tests/unit_tests/evaluation/test_runner.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index ffbcf2901..6fb5a1739 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -16,6 +16,7 @@ from unittest.mock import MagicMock import pytest +from langchain_core.runnables import chain as as_runnable from langsmith import evaluate from langsmith import schemas as ls_schemas @@ -569,6 +570,16 @@ async def atarget(x): ) +@as_runnable +def nested_predict(inputs): + return {"output": "Yes"} + + +@as_runnable +def lc_predict(inputs): + return nested_predict.invoke(inputs) + + @pytest.mark.parametrize( "target,expected,error_msg", [ @@ -612,6 +623,8 @@ async def atarget(x): (lambda inputs, attachments, *, optional=None: None, True, None), # Non-callable ("not_a_function", False, None), + # Runnable + (lc_predict.invoke, False, None), ], ) def test_include_attachments(target, expected, error_msg): From b9dd0f28eb71724a840823d121d4fd01a07fe8d5 Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Wed, 20 Nov 2024 06:57:16 -0800 Subject: [PATCH 38/88] Bump version (rc) --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 191d61b22..fa5fed80b 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langsmith" -version = "0.1.144rc1" +version = "0.1.144rc3" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." authors = ["LangChain "] license = "MIT" From 01ef4d01a775a24107062835bd98101da9f3fab5 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 27 Nov 2024 15:22:02 -0800 Subject: [PATCH 39/88] repetitions --- python/langsmith/client.py | 4 ++-- python/langsmith/evaluation/_runner.py | 4 ++++ python/tests/integration_tests/test_client.py | 12 +++++++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index f1213d939..d77a79d8e 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3381,11 +3381,11 @@ def upsert_examples_multipart( upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [], ) -> ls_schemas.UpsertExamplesResponse: """Upsert examples.""" - """ if not (self.info.instance_flags or {}).get( + if not (self.info.instance_flags or {}).get( "examples_multipart_enabled", False ): raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.") - """ + parts: List[MultipartPart] = [] for example in upserts: diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 6c916f47a..756375d9b 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1670,6 +1670,10 @@ def _get_run(r: rt.RunTree) -> None: client=client, ), ) + if include_attachments: + for attachment in example.attachment_urls: + _, reader = example.attachment_urls[attachment] + reader.seek(0) except Exception as e: logger.error( f"Error running target function: {e}", exc_info=True, stacklevel=1 diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index cfd848e01..02510fccf 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1160,12 +1160,14 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None: langchain_client.delete_dataset(dataset_id=dataset.id) -@pytest.mark.skip( - reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first" -) + def test_evaluate_with_attachments(langchain_client: Client) -> None: """Test evaluating examples with attachments.""" dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] + langchain_client = Client( + api_key="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", + api_url="https://dev.api.smith.langchain.com" + ) # 1. Create dataset dataset = langchain_client.create_dataset( dataset_name, @@ -1203,11 +1205,11 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: # 5. Run evaluation results = evaluate( - target, data=dataset_name, evaluators=[evaluator], client=langchain_client + target, data=dataset_name, evaluators=[evaluator], client=langchain_client, num_repetitions=2 ) # 6. Verify results - assert len(results) == 1 + assert len(results) == 2 for result in results: assert result["evaluation_results"]["results"][0].score == 1.0 From 3715c3070743e59caff2d38269f254c9516e40df Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Wed, 27 Nov 2024 15:22:35 -0800 Subject: [PATCH 40/88] nit --- python/tests/integration_tests/test_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 02510fccf..4ba418e1f 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1160,7 +1160,9 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None: langchain_client.delete_dataset(dataset_id=dataset.id) - +@pytest.mark.skip( + reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first" +) def test_evaluate_with_attachments(langchain_client: Client) -> None: """Test evaluating examples with attachments.""" dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] From 49442d7aa4957d100e584894601aeb8f04f9bf50 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 2 Dec 2024 10:24:37 -0800 Subject: [PATCH 41/88] added upload endpoint --- python/langsmith/client.py | 71 ++++++++-- python/langsmith/schemas.py | 6 + python/tests/integration_tests/test_client.py | 121 +++++++++++++++++- 3 files changed, 183 insertions(+), 15 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index d77a79d8e..20bab0933 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3375,27 +3375,24 @@ def create_example_from_run( created_at=created_at, ) - def upsert_examples_multipart( + def _prepate_multipart_data( self, - *, - upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [], - ) -> ls_schemas.UpsertExamplesResponse: - """Upsert examples.""" - if not (self.info.instance_flags or {}).get( - "examples_multipart_enabled", False - ): - raise ValueError("Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version.") - + examples: List[ + ls_schemas.ExampleUploadWithAttachments + | ls_schemas.ExampleUpsertWithAttachments + ], + include_dataset_id: bool = False, + ) -> List[MultipartPart]: parts: List[MultipartPart] = [] - for example in upserts: + for example in examples: if example.id is not None: example_id = str(example.id) else: example_id = str(uuid.uuid4()) example_body = { - "dataset_id": example.dataset_id, + **({"dataset_id": example.dataset_id} if include_dataset_id else {}), "created_at": example.created_at, } if example.metadata is not None: @@ -3493,6 +3490,56 @@ def upsert_examples_multipart( else: data = encoder + return encoder, data + + def upload_examples_multipart( + self, + *, + uploads: List[ls_schemas.ExampleUploadWithAttachments] = [], + ) -> ls_schemas.UpsertExamplesResponse: + """Upload examples.""" + if not (self.info.instance_flags or {}).get( + "examples_multipart_enabled", False + ): + raise ValueError( + "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." + ) + + encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False) + dataset_ids = set([example.dataset_id for example in uploads]) + if len(dataset_ids) > 1: + raise ValueError("All examples must be in the same dataset.") + dataset_id = list(dataset_ids)[0] + + response = self.request_with_retries( + "POST", + f"/v1/platform/datasets/{dataset_id}/examples", + request_kwargs={ + "data": data, + "headers": { + **self._headers, + "Content-Type": encoder.content_type, + }, + }, + ) + ls_utils.raise_for_status_with_text(response) + return response.json() + + def upsert_examples_multipart( + self, + *, + upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [], + ) -> ls_schemas.UpsertExamplesResponse: + """Upsert examples.""" + if not (self.info.instance_flags or {}).get( + "examples_multipart_enabled", False + ): + raise ValueError( + "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." + ) + + encoder, data = self._prepate_multipart_data(upserts, include_dataset_id=True) + response = self.request_with_retries( "POST", "/v1/platform/examples/multipart", diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index f4c3df0e9..533cc8e67 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -114,6 +114,12 @@ class ExampleUpsertWithAttachments(ExampleCreate): attachments: Optional[Attachments] = None +class ExampleUploadWithAttachments(ExampleUpsertWithAttachments): + """Example upload with attachments.""" + + pass + + class Example(ExampleBase): """Example model.""" diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 4ba418e1f..277fb6faf 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -21,7 +21,13 @@ from langsmith.client import ID_TYPE, Client from langsmith.evaluation import evaluate -from langsmith.schemas import DataType, Example, ExampleUpsertWithAttachments, Run +from langsmith.schemas import ( + DataType, + Example, + ExampleUploadWithAttachments, + ExampleUpsertWithAttachments, + Run, +) from langsmith.utils import ( LangSmithConnectionError, LangSmithError, @@ -371,6 +377,111 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: client.create_run("My Run", inputs={"text": "hello world"}, run_type="llm") +def test_upload_examples_multipart(langchain_client: Client): + """Test uploading examples with attachments via multipart endpoint.""" + dataset_name = "__test_upload_examples_multipart" + uuid4().hex[:4] + if langchain_client.has_dataset(dataset_name=dataset_name): + langchain_client.delete_dataset(dataset_name=dataset_name) + + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for multipart example upload", + data_type=DataType.kv, + ) + + # Test example with all fields + example_id = uuid4() + example_1 = ExampleUploadWithAttachments( + id=example_id, + dataset_id=dataset.id, + inputs={"text": "hello world"}, + attachments={ + "test_file": ("text/plain", b"test content"), + }, + ) + + # Test example with minimum required fields + example_2 = ExampleUploadWithAttachments( + dataset_id=dataset.id, + inputs={"text": "minimal example"}, + ) + + # Test example with outputs and multiple attachments + example_3 = ExampleUploadWithAttachments( + dataset_id=dataset.id, + inputs={"text": "example with outputs"}, + outputs={"response": "test response"}, + attachments={ + "file1": ("text/plain", b"content 1"), + "file2": ("text/plain", b"content 2"), + }, + ) + + # Test uploading multiple examples at once + created_examples = langchain_client.upload_examples_multipart( + uploads=[example_1, example_2, example_3] + ) + assert created_examples["count"] == 3 + + created_example_1 = langchain_client.read_example(example_id) + assert created_example_1.inputs["text"] == "hello world" + + # Verify the examples were created correctly + examples = [ + ex + for ex in langchain_client.list_examples( + dataset_id=dataset.id, + include_attachments=True, + ) + ] + assert len(examples) == 3 + + # Verify example with ID was created with correct ID + example_with_id = [ex for ex in examples if ex.id == example_id][0] + assert example_with_id.inputs["text"] == "hello world" + assert "test_file" in example_with_id.attachment_urls + + # Verify example with outputs and multiple attachments + example_with_outputs = next( + ex + for ex in examples + if ex.outputs and ex.outputs.get("response") == "test response" + ) + assert len(example_with_outputs.attachment_urls) == 2 + assert "file1" in example_with_outputs.attachment_urls + assert "file2" in example_with_outputs.attachment_urls + + # Test uploading to non-existent dataset fails + fake_id = uuid4() + with pytest.raises(LangSmithNotFoundError): + langchain_client.upload_examples_multipart( + uploads=[ + ExampleUploadWithAttachments( + dataset_id=fake_id, + inputs={"text": "should fail"}, + ) + ] + ) + + # Test uploading examples to different datasets fails + with pytest.raises(ValueError, match="All examples must be in the same dataset"): + langchain_client.upload_examples_multipart( + uploads=[ + ExampleUploadWithAttachments( + dataset_id=dataset.id, + inputs={"text": "example 1"}, + ), + ExampleUploadWithAttachments( + dataset_id=uuid4(), + inputs={"text": "example 2"}, + ), + ] + ) + + # Clean up + langchain_client.delete_dataset(dataset_name=dataset_name) + + def test_upsert_examples_multipart(langchain_client: Client) -> None: """Test upserting examples with attachments via multipart endpoint.""" dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] @@ -1168,7 +1279,7 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None: dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] langchain_client = Client( api_key="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", - api_url="https://dev.api.smith.langchain.com" + api_url="https://dev.api.smith.langchain.com", ) # 1. Create dataset dataset = langchain_client.create_dataset( @@ -1207,7 +1318,11 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: # 5. Run evaluation results = evaluate( - target, data=dataset_name, evaluators=[evaluator], client=langchain_client, num_repetitions=2 + target, + data=dataset_name, + evaluators=[evaluator], + client=langchain_client, + num_repetitions=2, ) # 6. Verify results From 484f2a5c1800497c251dc1a39a29a73e1c89712c Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Fri, 6 Dec 2024 13:01:58 -0800 Subject: [PATCH 42/88] comments --- python/langsmith/client.py | 7 ++- python/langsmith/evaluation/_arunner.py | 5 +- python/langsmith/evaluation/_runner.py | 2 +- .../unit_tests/evaluation/test_runner.py | 51 +++++++++++++++---- 4 files changed, 51 insertions(+), 14 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 450427f4e..77b6aeff2 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3603,7 +3603,12 @@ def upsert_examples_multipart( *, upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [], ) -> ls_schemas.UpsertExamplesResponse: - """Upsert examples.""" + """Upsert examples. + + .. deprecated:: 0.1.0 + This method is deprecated. Use :func:`langsmith.upload_examples_multipart` instead. + + """ # noqa: E501 if not (self.info.instance_flags or {}).get( "examples_multipart_enabled", False ): diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index ca04a57e0..1a77c75ff 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -1056,7 +1056,10 @@ def _include_attachments( sig = inspect.signature(target) params = list(sig.parameters.values()) positional_params = [ - p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + p + for p in params + if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + and p.default is p.empty ] if len(positional_params) == 0: diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index fa8b289a3..d62a8935d 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1801,8 +1801,8 @@ def _forward( experiment_name: str, metadata: dict, client: langsmith.Client, - include_attachments: bool = False, upload_results: bool, + include_attachments: bool = False, ) -> _ForwardResults: run: Optional[schemas.RunBase] = None diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index d52614f91..e376bfd39 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -21,7 +21,13 @@ from langsmith import evaluate from langsmith import schemas as ls_schemas from langsmith.client import Client -from langsmith.evaluation._arunner import aevaluate, aevaluate_existing +from langsmith.evaluation._arunner import ( + _include_attachments as a_include_attachments, +) +from langsmith.evaluation._arunner import ( + aevaluate, + aevaluate_existing, +) from langsmith.evaluation._runner import _include_attachments, evaluate_existing from langsmith.evaluation.evaluator import ( _normalize_comparison_evaluator_func, @@ -689,12 +695,26 @@ def lc_predict(inputs): return nested_predict.invoke(inputs) +async def async_just_inputs(inputs): + return None + + +async def async_just_inputs_with_attachments(inputs, attachments): + return None + + +async def async_extra_args(inputs, attachments, foo="bar"): + return None + + @pytest.mark.parametrize( - "target,expected,error_msg", + "target,expected,error_msg,is_async", [ # Valid cases - (lambda inputs: None, False, None), - (lambda inputs, attachments: None, True, None), + (lambda inputs: None, False, None, False), + (lambda inputs, attachments: None, True, None, False), + (async_just_inputs, False, None, True), + (async_just_inputs_with_attachments, True, None, True), # Invalid parameter names ( lambda x, y: None, @@ -702,6 +722,7 @@ def lc_predict(inputs): "When target function has two positional arguments, they must be named " "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' " "at index 1", + False, ), ( lambda input, attachment: None, @@ -709,6 +730,7 @@ def lc_predict(inputs): "When target function has two positional arguments, they must be named " "'inputs' and 'attachments', respectively. Received: 'input' at index 0," "'attachment' at index 1", + False, ), # Too many parameters ( @@ -718,6 +740,7 @@ def lc_predict(inputs): "Target function must accept at most two positional arguments " "(inputs, attachments)" ), + False, ), # No positional parameters ( @@ -726,17 +749,21 @@ def lc_predict(inputs): re.escape( "Target function must accept at least one positional argument (inputs)" ), + False, ), # Mixed positional and keyword - (lambda inputs, *, optional=None: None, False, None), - (lambda inputs, attachments, *, optional=None: None, True, None), + (lambda inputs, *, optional=None: None, False, None, False), + (lambda inputs, attachments, *, optional=None: None, True, None, False), # Non-callable - ("not_a_function", False, None), + ("not_a_function", False, None, False), # Runnable - (lc_predict.invoke, False, None), + (lc_predict.invoke, False, None, False), + # Positional args with defaults + (lambda inputs, attachments, foo="bar": None, True, None, False), + (async_extra_args, True, None, True), ], ) -def test_include_attachments(target, expected, error_msg): +def test_include_attachments(target, expected, error_msg, is_async): """Test the _include_attachments function with various input cases.""" try: from langchain_core.runnables import RunnableLambda @@ -750,13 +777,15 @@ def test_include_attachments(target, expected, error_msg): expected = False error_msg = None + func = _include_attachments if not is_async else a_include_attachments if error_msg is not None: with pytest.raises(ValueError, match=error_msg): - _include_attachments(target) + func(target) else: - result = _include_attachments(target) + result = func(target) assert result == expected + def summary_eval_runs_examples(runs_, examples_): return {"score": len(runs_[0].dotted_order)} From 28fe5d1d2aa285af9de231ab1b6de2c03f5517de Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Fri, 6 Dec 2024 13:15:06 -0800 Subject: [PATCH 43/88] fmt --- python/tests/integration_tests/test_client.py | 2 +- python/tests/unit_tests/evaluation/test_runner.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 8cf8a6158..b717dbc58 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1381,7 +1381,7 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: langchain_client.delete_dataset(dataset_name=dataset_name) - + def test_examples_length_validation(langchain_client: Client) -> None: """Test that mismatched lengths raise ValueError for create and update examples.""" dataset_name = "__test_examples_length_validation" + uuid4().hex[:4] diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index 30021d5c6..a1e2d79de 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -18,7 +18,7 @@ import pytest from langchain_core.runnables import chain as as_runnable -from langsmith import Client, aevaluate, evaluate, evaluate_existing, aevaluate_existing +from langsmith import Client, aevaluate, evaluate from langsmith import schemas as ls_schemas from langsmith.evaluation._arunner import ( _include_attachments as a_include_attachments, From 1e5eebfa0ba3f16d0fa6de36c2990c28cda6826b Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Fri, 6 Dec 2024 13:39:56 -0800 Subject: [PATCH 44/88] fmt --- python/langsmith/client.py | 12 +++++------- python/langsmith/evaluation/_arunner.py | 2 +- python/langsmith/evaluation/_runner.py | 2 +- python/langsmith/schemas.py | 4 ++++ 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 9e63e73a5..4e32188e6 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -1684,9 +1684,7 @@ def update_run( events: Optional[Sequence[dict]] = None, extra: Optional[Dict] = None, tags: Optional[List[str]] = None, - attachments: Optional[ - Dict[str, tuple[str, bytes] | ls_schemas.Attachment] - ] = None, + attachments: Optional[ls_schemas.Attachments] = None, **kwargs: Any, ) -> None: """Update a run in the LangSmith API. @@ -3464,12 +3462,12 @@ def create_example_from_run( def _prepate_multipart_data( self, - examples: List[ - ls_schemas.ExampleUploadWithAttachments - | ls_schemas.ExampleUpsertWithAttachments + examples: Union[ + List[ls_schemas.ExampleUploadWithAttachments] + | List[ls_schemas.ExampleUpsertWithAttachments] ], include_dataset_id: bool = False, - ) -> List[MultipartPart]: + ) -> Tuple[Any, bytes]: parts: List[MultipartPart] = [] for example in examples: diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index af9714e6c..9412bf5f3 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -1055,7 +1055,7 @@ def _get_run(r: run_trees.RunTree) -> None: def _include_attachments( - target: Union[ATARGET_T, Iterable[schemas.Run], AsyncIterable[dict]], + target: Union[ATARGET_T, Iterable[schemas.Run], AsyncIterable[dict], Runnable], ) -> bool: """Whether the target function accepts attachments.""" if _is_langchain_runnable(target) or not callable(target): diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 9ed6b954a..764225596 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1841,7 +1841,7 @@ def _get_run(r: rt.RunTree) -> None: *args, langsmith_extra=langsmith_extra, ) - if include_attachments: + if include_attachments and example.attachment_urls is not None: for attachment in example.attachment_urls: _, reader = example.attachment_urls[attachment] reader.seek(0) diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index cc2accec9..34de09aaa 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -81,6 +81,10 @@ def write(self, b: bytes) -> int: """Write function.""" ... + def seek(self, offset: int, whence: int = 0) -> int: + """Seek function.""" + ... + class ExampleBase(BaseModel): """Example base model.""" From e013d72da72c6f99feb024e288cdcd96f6e24929 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Fri, 6 Dec 2024 13:59:56 -0800 Subject: [PATCH 45/88] fmt --- python/tests/integration_tests/test_client.py | 1 + python/tests/unit_tests/test_client.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index b717dbc58..1c1a5a3eb 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -484,6 +484,7 @@ def test_upload_examples_multipart(langchain_client: Client): def test_upsert_examples_multipart(langchain_client: Client) -> None: """Test upserting examples with attachments via multipart endpoint.""" + langchain_client._info = {"instance_flags": {"examples_multipart_enabled": True}} dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 98e52d7b4..939aa9ad2 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -426,7 +426,11 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None: mock_session.request.return_value = mock_response mock_session_cls.return_value = mock_session - client = Client(api_url="http://localhost:1984", api_key="123") + client = Client( + api_url="http://localhost:1984", + api_key="123", + info={"instance_flags": {"examples_multipart_enabled": True}}, + ) # Create test data example_id = uuid.uuid4() @@ -451,7 +455,7 @@ def test_upsert_examples_multipart(mock_session_cls: mock.Mock) -> None: client.upsert_examples_multipart(upserts=[example]) # Verify the request - assert mock_session.request.call_count == 2 # we always make a call to /info + assert mock_session.request.call_count == 1 call_args = mock_session.request.call_args assert call_args[0][0] == "POST" From bc2d4b647cd85503ab4d6819ec3f9e59dae05a39 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Fri, 6 Dec 2024 14:22:59 -0800 Subject: [PATCH 46/88] fmt --- python/langsmith/evaluation/_runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 764225596..ebd259e14 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1053,6 +1053,7 @@ def _evaluate( runs, client, ) + manager = _ExperimentManager( data, client=client, @@ -1507,8 +1508,8 @@ def _predict( self.experiment_name, self._metadata, self.client, - self._include_attachments, self._upload_results, + self._include_attachments, ) else: @@ -1521,8 +1522,8 @@ def _predict( self.experiment_name, self._metadata, self.client, - self._include_attachments, self._upload_results, + self._include_attachments, ) for example in self.examples ] From 96f4246db88ebd73d4697e18b876e5478d720eab Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Fri, 6 Dec 2024 14:35:49 -0800 Subject: [PATCH 47/88] fix test --- python/tests/integration_tests/test_client.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 1c1a5a3eb..4d2478680 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -515,7 +515,6 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: "my_file": ("text/plain", b"more test content"), }, ) - created_examples = langchain_client.upsert_examples_multipart( upserts=[example_1, example_2] ) @@ -551,12 +550,7 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: updated_examples = langchain_client.upsert_examples_multipart( upserts=[example_1_update] ) - assert updated_examples["count"] == 1 - assert updated_examples["example_ids"][0] == str(example_id) - updated_example = langchain_client.read_example(updated_examples["example_ids"][0]) - assert updated_example.inputs["text"] == "bar baz" - assert updated_example.outputs["response"] == "foo" - + assert updated_examples["count"] == 0 # Test that adding invalid example fails # even if valid examples are added alongside example_3 = ExampleUpsertWithAttachments( @@ -579,7 +573,6 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: # Throw type errors when not passing ExampleUpsertWithAttachments with pytest.raises(AttributeError): langchain_client.upsert_examples_multipart(upserts=[{"foo": "bar"}]) - langchain_client.delete_dataset(dataset_name=dataset_name) From 887782e931eaaabff96918970d0ab4b94ff3501f Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Fri, 6 Dec 2024 14:38:31 -0800 Subject: [PATCH 48/88] x --- python/tests/integration_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 4d2478680..bc3f6f33f 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -483,7 +483,7 @@ def test_upload_examples_multipart(langchain_client: Client): def test_upsert_examples_multipart(langchain_client: Client) -> None: - """Test upserting examples with attachments via multipart endpoint.""" + """Test upserting examples with attachments via the multipart endpoint.""" langchain_client._info = {"instance_flags": {"examples_multipart_enabled": True}} dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] if langchain_client.has_dataset(dataset_name=dataset_name): From 66228e8af4047d27a878edd7c51c98245c2cc629 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 08:03:44 -0800 Subject: [PATCH 49/88] defaults --- python/langsmith/client.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 4e32188e6..9fe393125 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3580,7 +3580,7 @@ def _prepate_multipart_data( def upload_examples_multipart( self, *, - uploads: List[ls_schemas.ExampleUploadWithAttachments] = [], + uploads: List[ls_schemas.ExampleUploadWithAttachments] = None, ) -> ls_schemas.UpsertExamplesResponse: """Upload examples.""" if not (self.info.instance_flags or {}).get( @@ -3589,7 +3589,8 @@ def upload_examples_multipart( raise ValueError( "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." ) - + if uploads is None: + uploads = [] encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False) dataset_ids = set([example.dataset_id for example in uploads]) if len(dataset_ids) > 1: @@ -3613,7 +3614,7 @@ def upload_examples_multipart( def upsert_examples_multipart( self, *, - upserts: List[ls_schemas.ExampleUpsertWithAttachments] = [], + upserts: List[ls_schemas.ExampleUpsertWithAttachments] = None, ) -> ls_schemas.UpsertExamplesResponse: """Upsert examples. @@ -3627,6 +3628,8 @@ def upsert_examples_multipart( raise ValueError( "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." ) + if upserts is None: + upserts = [] encoder, data = self._prepate_multipart_data(upserts, include_dataset_id=True) From a5ee5990f47903a81d252f996d923758c02252f4 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 08:46:18 -0800 Subject: [PATCH 50/88] refactor --- python/langsmith/client.py | 29 ++++++------ python/langsmith/evaluation/_arunner.py | 6 ++- python/langsmith/evaluation/_runner.py | 8 ++-- python/langsmith/schemas.py | 30 +++++++++---- python/tests/integration_tests/test_client.py | 44 +++++-------------- 5 files changed, 56 insertions(+), 61 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 9fe393125..778911808 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3580,6 +3580,7 @@ def _prepate_multipart_data( def upload_examples_multipart( self, *, + dataset_id: ID_TYPE, uploads: List[ls_schemas.ExampleUploadWithAttachments] = None, ) -> ls_schemas.UpsertExamplesResponse: """Upload examples.""" @@ -3592,10 +3593,6 @@ def upload_examples_multipart( if uploads is None: uploads = [] encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False) - dataset_ids = set([example.dataset_id for example in uploads]) - if len(dataset_ids) > 1: - raise ValueError("All examples must be in the same dataset.") - dataset_id = list(dataset_ids)[0] response = self.request_with_retries( "POST", @@ -3823,21 +3820,21 @@ def read_example( ) example = response.json() - attachment_urls = {} + attachments_info = {} if example["attachment_urls"]: for key, value in example["attachment_urls"].items(): response = requests.get(value["presigned_url"], stream=True) response.raise_for_status() reader = io.BytesIO(response.content) - attachment_urls[key.split(".")[1]] = ( - value["presigned_url"], - reader, - ) + attachments_info[key.split(".")[1]] = { + "presigned_url": value["presigned_url"], + "reader": reader, + } del example["attachment_urls"] return ls_schemas.Example( **example, - attachment_urls=attachment_urls, + attachments_info=attachments_info, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), ) @@ -3910,21 +3907,21 @@ def list_examples( for i, example in enumerate( self._get_paginated_list("/examples", params=params) ): - attachment_urls = {} + attachments_info = {} if example["attachment_urls"]: for key, value in example["attachment_urls"].items(): response = requests.get(value["presigned_url"], stream=True) response.raise_for_status() reader = io.BytesIO(response.content) - attachment_urls[key.split(".")[1]] = ( - value["presigned_url"], - reader, - ) + attachments_info[key.split(".")[1]] = { + "presigned_url": value["presigned_url"], + "reader": reader, + } del example["attachment_urls"] yield ls_schemas.Example( **example, - attachment_urls=attachment_urls, + attachments_info=attachments_info, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), ) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 9412bf5f3..5a22ba305 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -1023,7 +1023,7 @@ def _get_run(r: run_trees.RunTree) -> None: with rh.tracing_context(enabled=True): try: args = ( - (example.inputs, example.attachment_urls) + (example.inputs, example.attachments_info) if include_attachments else (example.inputs,) ) @@ -1044,6 +1044,10 @@ def _get_run(r: run_trees.RunTree) -> None: client=client, ), ) + if include_attachments and example.attachments_info is not None: + for attachment in example.attachments_info: + reader = example.attachments_info[attachment]["reader"] + reader.seek(0) except Exception as e: logger.error( f"Error running target function: {e}", exc_info=True, stacklevel=1 diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index ebd259e14..fbb096484 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1834,7 +1834,7 @@ def _get_run(r: rt.RunTree) -> None: ) try: args = ( - (example.inputs, example.attachment_urls) + (example.inputs, example.attachments_info) if include_attachments else (example.inputs,) ) @@ -1842,9 +1842,9 @@ def _get_run(r: rt.RunTree) -> None: *args, langsmith_extra=langsmith_extra, ) - if include_attachments and example.attachment_urls is not None: - for attachment in example.attachment_urls: - _, reader = example.attachment_urls[attachment] + if include_attachments and example.attachments_info is not None: + for attachment in example.attachments_info: + reader = example.attachments_info[attachment]["reader"] reader.seek(0) except Exception as e: logger.error( diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 34de09aaa..b06552dcd 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -93,9 +93,6 @@ class ExampleBase(BaseModel): inputs: Dict[str, Any] = Field(default_factory=dict) outputs: Optional[Dict[str, Any]] = Field(default=None) metadata: Optional[Dict[str, Any]] = Field(default=None) - attachment_urls: Optional[Dict[str, Tuple[str, BinaryIOLike]]] = Field(default=None) - """Dictionary with attachment names as keys and a tuple of the S3 url - and a reader of the data for the file.""" class Config: """Configuration class for the schema.""" @@ -112,16 +109,30 @@ class ExampleCreate(ExampleBase): split: Optional[Union[str, List[str]]] = None -class ExampleUpsertWithAttachments(ExampleCreate): - """Example create with attachments.""" +class ExampleUploadWithAttachments(BaseModel): + """Example upload with attachments.""" + id: Optional[UUID] + created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + inputs: Dict[str, Any] = Field(default_factory=dict) + outputs: Optional[Dict[str, Any]] = Field(default=None) + metadata: Optional[Dict[str, Any]] = Field(default=None) + split: Optional[Union[str, List[str]]] = None attachments: Optional[Attachments] = None -class ExampleUploadWithAttachments(ExampleUpsertWithAttachments): - """Example upload with attachments.""" +class ExampleUpsertWithAttachments(ExampleUploadWithAttachments): + """Example create with attachments.""" + + dataset_id: UUID - pass + +class AttachmentInfo(TypedDict): + """Info for an attachment.""" + + presigned_url: str + reader: BinaryIOLike + # TODO: add mime type class Example(ExampleBase): @@ -135,6 +146,9 @@ class Example(ExampleBase): modified_at: Optional[datetime] = Field(default=None) runs: List[Run] = Field(default_factory=list) source_run_id: Optional[UUID] = None + attachments_info: Optional[Dict[str, AttachmentInfo]] = Field(default=None) + """Dictionary with attachment names as keys and a tuple of the S3 url + and a reader of the data for the file.""" _host_url: Optional[str] = PrivateAttr(default=None) _tenant_id: Optional[UUID] = PrivateAttr(default=None) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index bc3f6f33f..e175efc9e 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -393,7 +393,6 @@ def test_upload_examples_multipart(langchain_client: Client): example_id = uuid4() example_1 = ExampleUploadWithAttachments( id=example_id, - dataset_id=dataset.id, inputs={"text": "hello world"}, attachments={ "test_file": ("text/plain", b"test content"), @@ -402,13 +401,11 @@ def test_upload_examples_multipart(langchain_client: Client): # Test example with minimum required fields example_2 = ExampleUploadWithAttachments( - dataset_id=dataset.id, inputs={"text": "minimal example"}, ) # Test example with outputs and multiple attachments example_3 = ExampleUploadWithAttachments( - dataset_id=dataset.id, inputs={"text": "example with outputs"}, outputs={"response": "test response"}, attachments={ @@ -419,7 +416,7 @@ def test_upload_examples_multipart(langchain_client: Client): # Test uploading multiple examples at once created_examples = langchain_client.upload_examples_multipart( - uploads=[example_1, example_2, example_3] + dataset_id=dataset.id, uploads=[example_1, example_2, example_3] ) assert created_examples["count"] == 3 @@ -439,7 +436,7 @@ def test_upload_examples_multipart(langchain_client: Client): # Verify example with ID was created with correct ID example_with_id = [ex for ex in examples if ex.id == example_id][0] assert example_with_id.inputs["text"] == "hello world" - assert "test_file" in example_with_id.attachment_urls + assert "test_file" in example_with_id.attachments_info # Verify example with outputs and multiple attachments example_with_outputs = next( @@ -447,35 +444,20 @@ def test_upload_examples_multipart(langchain_client: Client): for ex in examples if ex.outputs and ex.outputs.get("response") == "test response" ) - assert len(example_with_outputs.attachment_urls) == 2 - assert "file1" in example_with_outputs.attachment_urls - assert "file2" in example_with_outputs.attachment_urls + assert len(example_with_outputs.attachments_info) == 2 + assert "file1" in example_with_outputs.attachments_info + assert "file2" in example_with_outputs.attachments_info # Test uploading to non-existent dataset fails fake_id = uuid4() with pytest.raises(LangSmithNotFoundError): langchain_client.upload_examples_multipart( + dataset_id=fake_id, uploads=[ ExampleUploadWithAttachments( - dataset_id=fake_id, inputs={"text": "should fail"}, ) - ] - ) - - # Test uploading examples to different datasets fails - with pytest.raises(ValueError, match="All examples must be in the same dataset"): - langchain_client.upload_examples_multipart( - uploads=[ - ExampleUploadWithAttachments( - dataset_id=dataset.id, - inputs={"text": "example 1"}, - ), - ExampleUploadWithAttachments( - dataset_id=uuid4(), - inputs={"text": "example 2"}, - ), - ] + ], ) # Clean up @@ -1283,8 +1265,7 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None: ) # 2. Create example with attachments - example = ExampleUpsertWithAttachments( - dataset_id=dataset.id, + example = ExampleUploadWithAttachments( inputs={"question": "What is shown in the image?"}, outputs={"answer": "test image"}, attachments={ @@ -1292,13 +1273,13 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None: }, ) - langchain_client.upsert_examples_multipart(upserts=[example]) + langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) # 3. Define target function that uses attachments def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]: # Verify we receive the attachment data assert "image" in attachments - image_url, image_data = attachments["image"] + image_data = attachments["image"]["reader"] assert image_data.read() == b"fake image data for testing" return {"answer": "test image"} @@ -1345,12 +1326,11 @@ def test_evaluate_with_no_attachments(langchain_client: Client) -> None: ) # Verify we can create example the new way without attachments - example = ExampleUpsertWithAttachments( - dataset_id=dataset.id, + example = ExampleUploadWithAttachments( inputs={"question": "What is 3+1?"}, outputs={"answer": "4"}, ) - langchain_client.upsert_examples_multipart(upserts=[example]) + langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]: # Verify we receive an empty attachments dict From 2f1e6be60774db3e69ae29b91130005a88011214 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 08:54:04 -0800 Subject: [PATCH 51/88] fmt --- python/langsmith/client.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 778911808..c51f31d4a 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3469,6 +3469,13 @@ def _prepate_multipart_data( include_dataset_id: bool = False, ) -> Tuple[Any, bytes]: parts: List[MultipartPart] = [] + if include_dataset_id: + if not isinstance(examples[0], ls_schemas.ExampleUpsertWithAttachments): + raise ValueError( + "The examples must be of type ExampleUpsertWithAttachments" + " if include_dataset_id is True" + ) + dataset_id = examples[0].dataset_id for example in examples: if example.id is not None: @@ -3477,7 +3484,7 @@ def _prepate_multipart_data( example_id = str(uuid.uuid4()) example_body = { - **({"dataset_id": example.dataset_id} if include_dataset_id else {}), + **({"dataset_id": dataset_id} if include_dataset_id else {}), "created_at": example.created_at, } if example.metadata is not None: @@ -3581,7 +3588,7 @@ def upload_examples_multipart( self, *, dataset_id: ID_TYPE, - uploads: List[ls_schemas.ExampleUploadWithAttachments] = None, + uploads: Optional[List[ls_schemas.ExampleUploadWithAttachments]] = None, ) -> ls_schemas.UpsertExamplesResponse: """Upload examples.""" if not (self.info.instance_flags or {}).get( @@ -3611,7 +3618,7 @@ def upload_examples_multipart( def upsert_examples_multipart( self, *, - upserts: List[ls_schemas.ExampleUpsertWithAttachments] = None, + upserts: Optional[List[ls_schemas.ExampleUpsertWithAttachments]] = None, ) -> ls_schemas.UpsertExamplesResponse: """Upsert examples. From c9ade2e10689f0754e5b59634cafb5426ef838f9 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 09:25:56 -0800 Subject: [PATCH 52/88] fmt --- python/langsmith/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index c51f31d4a..73190c4b6 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3828,7 +3828,7 @@ def read_example( example = response.json() attachments_info = {} - if example["attachment_urls"]: + if "attachment_urls" in example and example["attachment_urls"]: for key, value in example["attachment_urls"].items(): response = requests.get(value["presigned_url"], stream=True) response.raise_for_status() @@ -3915,7 +3915,7 @@ def list_examples( self._get_paginated_list("/examples", params=params) ): attachments_info = {} - if example["attachment_urls"]: + if "attachment_urls" in example and example["attachment_urls"]: for key, value in example["attachment_urls"].items(): response = requests.get(value["presigned_url"], stream=True) response.raise_for_status() From 4576779a9b71097896a1e336380907aa3ef1d31e Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 10:08:06 -0800 Subject: [PATCH 53/88] fmt --- python/langsmith/client.py | 1 - python/tests/integration_tests/test_client.py | 4 ---- 2 files changed, 5 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 73190c4b6..49831e526 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -948,7 +948,6 @@ def _get_paginated_list( params=params_, ) items = response.json() - if not items: break yield from items diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index e175efc9e..371387c87 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1253,10 +1253,6 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None: def test_evaluate_with_attachments(langchain_client: Client) -> None: """Test evaluating examples with attachments.""" dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] - langchain_client = Client( - api_key="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", - api_url="https://dev.api.smith.langchain.com", - ) # 1. Create dataset dataset = langchain_client.create_dataset( dataset_name, From 578a715af42b64940a4f301a5b5f39708006b44c Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 10:39:57 -0800 Subject: [PATCH 54/88] changes --- python/langsmith/client.py | 16 ++++++++-------- python/langsmith/evaluation/_arunner.py | 8 ++++---- python/langsmith/evaluation/_runner.py | 8 ++++---- python/langsmith/schemas.py | 2 +- python/tests/integration_tests/test_client.py | 8 ++++---- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 49831e526..eca9c1614 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3826,13 +3826,13 @@ def read_example( ) example = response.json() - attachments_info = {} - if "attachment_urls" in example and example["attachment_urls"]: + attachment_urls = {} + if example["attachment_urls"]: for key, value in example["attachment_urls"].items(): response = requests.get(value["presigned_url"], stream=True) response.raise_for_status() reader = io.BytesIO(response.content) - attachments_info[key.split(".")[1]] = { + attachment_urls[key.split(".")[1]] = { "presigned_url": value["presigned_url"], "reader": reader, } @@ -3840,7 +3840,7 @@ def read_example( return ls_schemas.Example( **example, - attachments_info=attachments_info, + attachment_urls=attachment_urls, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), ) @@ -3913,13 +3913,13 @@ def list_examples( for i, example in enumerate( self._get_paginated_list("/examples", params=params) ): - attachments_info = {} - if "attachment_urls" in example and example["attachment_urls"]: + attachment_urls = {} + if example["attachment_urls"]: for key, value in example["attachment_urls"].items(): response = requests.get(value["presigned_url"], stream=True) response.raise_for_status() reader = io.BytesIO(response.content) - attachments_info[key.split(".")[1]] = { + attachment_urls[key.split(".")[1]] = { "presigned_url": value["presigned_url"], "reader": reader, } @@ -3927,7 +3927,7 @@ def list_examples( yield ls_schemas.Example( **example, - attachments_info=attachments_info, + attachment_urls=attachment_urls, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), ) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 5a22ba305..729166add 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -1023,7 +1023,7 @@ def _get_run(r: run_trees.RunTree) -> None: with rh.tracing_context(enabled=True): try: args = ( - (example.inputs, example.attachments_info) + (example.inputs, example.attachment_urls) if include_attachments else (example.inputs,) ) @@ -1044,9 +1044,9 @@ def _get_run(r: run_trees.RunTree) -> None: client=client, ), ) - if include_attachments and example.attachments_info is not None: - for attachment in example.attachments_info: - reader = example.attachments_info[attachment]["reader"] + if include_attachments and example.attachment_urls is not None: + for attachment in example.attachment_urls: + reader = example.attachment_urls[attachment]["reader"] reader.seek(0) except Exception as e: logger.error( diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index fbb096484..199d8fa22 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1834,7 +1834,7 @@ def _get_run(r: rt.RunTree) -> None: ) try: args = ( - (example.inputs, example.attachments_info) + (example.inputs, example.attachment_urls) if include_attachments else (example.inputs,) ) @@ -1842,9 +1842,9 @@ def _get_run(r: rt.RunTree) -> None: *args, langsmith_extra=langsmith_extra, ) - if include_attachments and example.attachments_info is not None: - for attachment in example.attachments_info: - reader = example.attachments_info[attachment]["reader"] + if include_attachments and example.attachment_urls is not None: + for attachment in example.attachment_urls: + reader = example.attachment_urls[attachment]["reader"] reader.seek(0) except Exception as e: logger.error( diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index b06552dcd..41fa76cb0 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -146,7 +146,7 @@ class Example(ExampleBase): modified_at: Optional[datetime] = Field(default=None) runs: List[Run] = Field(default_factory=list) source_run_id: Optional[UUID] = None - attachments_info: Optional[Dict[str, AttachmentInfo]] = Field(default=None) + attachment_urls: Optional[Dict[str, AttachmentInfo]] = Field(default=None) """Dictionary with attachment names as keys and a tuple of the S3 url and a reader of the data for the file.""" _host_url: Optional[str] = PrivateAttr(default=None) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 371387c87..0281b1df6 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -436,7 +436,7 @@ def test_upload_examples_multipart(langchain_client: Client): # Verify example with ID was created with correct ID example_with_id = [ex for ex in examples if ex.id == example_id][0] assert example_with_id.inputs["text"] == "hello world" - assert "test_file" in example_with_id.attachments_info + assert "test_file" in example_with_id.attachment_urls # Verify example with outputs and multiple attachments example_with_outputs = next( @@ -444,9 +444,9 @@ def test_upload_examples_multipart(langchain_client: Client): for ex in examples if ex.outputs and ex.outputs.get("response") == "test response" ) - assert len(example_with_outputs.attachments_info) == 2 - assert "file1" in example_with_outputs.attachments_info - assert "file2" in example_with_outputs.attachments_info + assert len(example_with_outputs.attachment_urls) == 2 + assert "file1" in example_with_outputs.attachment_urls + assert "file2" in example_with_outputs.attachment_urls # Test uploading to non-existent dataset fails fake_id = uuid4() From e4e3068accbbb4a9793d36d4c2fcd4fcc5daac92 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 10:54:12 -0800 Subject: [PATCH 55/88] fmt --- python/tests/integration_tests/test_client.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 0281b1df6..338ab482d 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -502,14 +502,12 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: ) assert created_examples["count"] == 2 - created_example_1 = langchain_client.read_example( - created_examples["example_ids"][0] - ) + created_example_1 = langchain_client.read_example(example_id) assert created_example_1.inputs["text"] == "hello world" assert created_example_1.outputs is None created_example_2 = langchain_client.read_example( - created_examples["example_ids"][1] + [id_ for id_ in created_examples["example_ids"] if id_ != example_id][0] ) assert created_example_2.inputs["text"] == "foo bar" assert created_example_2.outputs["response"] == "baz" From 6e91e05324ace1788b3d2b5457743169becdd74a Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 11:08:20 -0800 Subject: [PATCH 56/88] x --- python/tests/integration_tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 338ab482d..d808adbfe 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -507,7 +507,7 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: assert created_example_1.outputs is None created_example_2 = langchain_client.read_example( - [id_ for id_ in created_examples["example_ids"] if id_ != example_id][0] + [id_ for id_ in created_examples["example_ids"] if id_ != str(example_id)][0] ) assert created_example_2.inputs["text"] == "foo bar" assert created_example_2.outputs["response"] == "baz" From 020d07436de6eeef12214699397c5f0883026392 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 11:16:03 -0800 Subject: [PATCH 57/88] fmt --- python/langsmith/client.py | 7 +++++++ python/tests/integration_tests/test_client.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index eca9c1614..e9196a4c9 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3477,6 +3477,13 @@ def _prepate_multipart_data( dataset_id = examples[0].dataset_id for example in examples: + if not isinstance( + example, ls_schemas.ExampleUploadWithAttachments + ) and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments): + raise ValueError( + "The examples must be of type ExampleUploadWithAttachments" + " or ExampleUpsertWithAttachments" + ) if example.id is not None: example_id = str(example.id) else: diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index d808adbfe..53e940c06 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -551,7 +551,7 @@ def test_upsert_examples_multipart(langchain_client: Client) -> None: assert len(all_examples_in_dataset) == 2 # Throw type errors when not passing ExampleUpsertWithAttachments - with pytest.raises(AttributeError): + with pytest.raises(ValueError): langchain_client.upsert_examples_multipart(upserts=[{"foo": "bar"}]) langchain_client.delete_dataset(dataset_name=dataset_name) From 1abe4f9a6ffd805fc902540df5e60cb7f6449c58 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 13:02:26 -0800 Subject: [PATCH 58/88] flag --- python/langsmith/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index e9196a4c9..a0c98cdcd 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3598,7 +3598,7 @@ def upload_examples_multipart( ) -> ls_schemas.UpsertExamplesResponse: """Upload examples.""" if not (self.info.instance_flags or {}).get( - "examples_multipart_enabled", False + "dataset_examples_multipart_enabled", False ): raise ValueError( "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." From 39be3c772b7b32ead519bd586c8dabd8143f18aa Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 13:14:29 -0800 Subject: [PATCH 59/88] flags in tests --- python/tests/integration_tests/test_client.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 53e940c06..ae866521c 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -379,6 +379,9 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: def test_upload_examples_multipart(langchain_client: Client): """Test uploading examples with attachments via multipart endpoint.""" + langchain_client._info = { + "instance_flags": {"dataset_examples_multipart_enabled": True} + } dataset_name = "__test_upload_examples_multipart" + uuid4().hex[:4] if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) @@ -1245,11 +1248,11 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None: langchain_client.delete_dataset(dataset_id=dataset.id) -@pytest.mark.skip( - reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first" -) def test_evaluate_with_attachments(langchain_client: Client) -> None: """Test evaluating examples with attachments.""" + langchain_client._info = { + "instance_flags": {"dataset_examples_multipart_enabled": True} + } dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] # 1. Create dataset dataset = langchain_client.create_dataset( @@ -1305,6 +1308,9 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: def test_evaluate_with_no_attachments(langchain_client: Client) -> None: """Test evaluating examples without attachments using a target with attachments.""" + langchain_client._info = { + "instance_flags": {"dataset_examples_multipart_enabled": True} + } dataset_name = "__test_evaluate_no_attachments" + uuid4().hex[:4] dataset = langchain_client.create_dataset( dataset_name, From 5c2c74dbd2bf7abe661e89d8249dd67bc31f5640 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 14:16:55 -0800 Subject: [PATCH 60/88] attachment_urls -> attachments --- python/langsmith/client.py | 12 ++++++------ python/langsmith/evaluation/_arunner.py | 8 ++++---- python/langsmith/evaluation/_runner.py | 8 ++++---- python/langsmith/schemas.py | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index a0c98cdcd..82c25e8da 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3833,13 +3833,13 @@ def read_example( ) example = response.json() - attachment_urls = {} + attachments = {} if example["attachment_urls"]: for key, value in example["attachment_urls"].items(): response = requests.get(value["presigned_url"], stream=True) response.raise_for_status() reader = io.BytesIO(response.content) - attachment_urls[key.split(".")[1]] = { + attachments[key.split(".")[1]] = { "presigned_url": value["presigned_url"], "reader": reader, } @@ -3847,7 +3847,7 @@ def read_example( return ls_schemas.Example( **example, - attachment_urls=attachment_urls, + attachments=attachments, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), ) @@ -3920,13 +3920,13 @@ def list_examples( for i, example in enumerate( self._get_paginated_list("/examples", params=params) ): - attachment_urls = {} + attachments = {} if example["attachment_urls"]: for key, value in example["attachment_urls"].items(): response = requests.get(value["presigned_url"], stream=True) response.raise_for_status() reader = io.BytesIO(response.content) - attachment_urls[key.split(".")[1]] = { + attachments[key.split(".")[1]] = { "presigned_url": value["presigned_url"], "reader": reader, } @@ -3934,7 +3934,7 @@ def list_examples( yield ls_schemas.Example( **example, - attachment_urls=attachment_urls, + attachments=attachments, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), ) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 729166add..ccbad302b 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -1023,7 +1023,7 @@ def _get_run(r: run_trees.RunTree) -> None: with rh.tracing_context(enabled=True): try: args = ( - (example.inputs, example.attachment_urls) + (example.inputs, example.attachments) if include_attachments else (example.inputs,) ) @@ -1044,9 +1044,9 @@ def _get_run(r: run_trees.RunTree) -> None: client=client, ), ) - if include_attachments and example.attachment_urls is not None: - for attachment in example.attachment_urls: - reader = example.attachment_urls[attachment]["reader"] + if include_attachments and example.attachments is not None: + for attachment in example.attachments: + reader = example.attachments[attachment]["reader"] reader.seek(0) except Exception as e: logger.error( diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 199d8fa22..5a00585d5 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1834,7 +1834,7 @@ def _get_run(r: rt.RunTree) -> None: ) try: args = ( - (example.inputs, example.attachment_urls) + (example.inputs, example.attachments) if include_attachments else (example.inputs,) ) @@ -1842,9 +1842,9 @@ def _get_run(r: rt.RunTree) -> None: *args, langsmith_extra=langsmith_extra, ) - if include_attachments and example.attachment_urls is not None: - for attachment in example.attachment_urls: - reader = example.attachment_urls[attachment]["reader"] + if include_attachments and example.attachments is not None: + for attachment in example.attachments: + reader = example.attachments[attachment]["reader"] reader.seek(0) except Exception as e: logger.error( diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 41fa76cb0..5b226a830 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -146,7 +146,7 @@ class Example(ExampleBase): modified_at: Optional[datetime] = Field(default=None) runs: List[Run] = Field(default_factory=list) source_run_id: Optional[UUID] = None - attachment_urls: Optional[Dict[str, AttachmentInfo]] = Field(default=None) + attachments: Optional[Dict[str, AttachmentInfo]] = Field(default=None) """Dictionary with attachment names as keys and a tuple of the S3 url and a reader of the data for the file.""" _host_url: Optional[str] = PrivateAttr(default=None) From 2b385b631ab32348fd00123993bc38a6438f49ee Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 14:20:06 -0800 Subject: [PATCH 61/88] x --- python/tests/integration_tests/test_client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index ae866521c..83efc397e 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -439,7 +439,7 @@ def test_upload_examples_multipart(langchain_client: Client): # Verify example with ID was created with correct ID example_with_id = [ex for ex in examples if ex.id == example_id][0] assert example_with_id.inputs["text"] == "hello world" - assert "test_file" in example_with_id.attachment_urls + assert "test_file" in example_with_id.attachments # Verify example with outputs and multiple attachments example_with_outputs = next( @@ -447,9 +447,9 @@ def test_upload_examples_multipart(langchain_client: Client): for ex in examples if ex.outputs and ex.outputs.get("response") == "test response" ) - assert len(example_with_outputs.attachment_urls) == 2 - assert "file1" in example_with_outputs.attachment_urls - assert "file2" in example_with_outputs.attachment_urls + assert len(example_with_outputs.attachments) == 2 + assert "file1" in example_with_outputs.attachments + assert "file2" in example_with_outputs.attachments # Test uploading to non-existent dataset fails fake_id = uuid4() From 0daf2459285529b6d151cdf4a48a817d2cc02018 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Mon, 9 Dec 2024 14:23:49 -0800 Subject: [PATCH 62/88] fmt --- python/langsmith/evaluation/_arunner.py | 45 +------------------------ python/langsmith/evaluation/_runner.py | 34 +++++++++---------- 2 files changed, 18 insertions(+), 61 deletions(-) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 729166add..6ba6095f8 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -5,7 +5,6 @@ import asyncio import concurrent.futures as cf import datetime -import inspect import logging import pathlib import uuid @@ -41,6 +40,7 @@ _ExperimentManagerMixin, _extract_feedback_keys, _ForwardResults, + _include_attachments, _is_langchain_runnable, _load_examples_map, _load_experiment, @@ -1058,49 +1058,6 @@ def _get_run(r: run_trees.RunTree) -> None: ) -def _include_attachments( - target: Union[ATARGET_T, Iterable[schemas.Run], AsyncIterable[dict], Runnable], -) -> bool: - """Whether the target function accepts attachments.""" - if _is_langchain_runnable(target) or not callable(target): - return False - # Check function signature - sig = inspect.signature(target) - params = list(sig.parameters.values()) - positional_params = [ - p - for p in params - if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) - and p.default is p.empty - ] - - if len(positional_params) == 0: - raise ValueError( - "Target function must accept at least one positional argument (inputs)" - ) - elif len(positional_params) > 2: - raise ValueError( - "Target function must accept at most two positional " - "arguments (inputs, attachments)" - ) - elif len(positional_params) == 2: - mismatches = [] - for i, (p, expected) in enumerate( - zip(positional_params, ("inputs", "attachments")) - ): - if p.name != expected: - mismatches.append((i, p.name)) - - if mismatches: - raise ValueError( - "When target function has two positional arguments, they must be named " - "'inputs' and 'attachments', respectively. Received: " - + ",".join(f"'{p}' at index {i}" for i, p in mismatches) - ) - - return len(positional_params) == 2 - - def _ensure_async_traceable( target: ATARGET_T, ) -> rh.SupportsLangsmithExtra[[dict], Awaitable]: diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 199d8fa22..bc24585d0 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1913,9 +1913,7 @@ def _ensure_traceable( return fn -def _include_attachments( - target: Union[TARGET_T, Iterable[schemas.Run], Runnable], -) -> bool: +def _include_attachments(target: Any) -> bool: """Whether the target function accepts attachments.""" if _is_langchain_runnable(target) or not callable(target): return False @@ -1923,37 +1921,39 @@ def _include_attachments( sig = inspect.signature(target) params = list(sig.parameters.values()) positional_params = [ - p - for p in params - if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) - and p.default is p.empty + p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) ] + positional_no_default = [p for p in positional_params if p.default is p.empty] if len(positional_params) == 0: raise ValueError( - "Target function must accept at least one positional argument (inputs)" + "Target function must accept at least one positional argument (inputs)." ) - elif len(positional_params) > 2: + elif len(positional_no_default) > 2: raise ValueError( - "Target function must accept at most two positional " - "arguments (inputs, attachments)" + "Target function must accept at most two " + "arguments without default values: (inputs, attachments)." ) - elif len(positional_params) == 2: + else: mismatches = [] + num_args = 0 for i, (p, expected) in enumerate( zip(positional_params, ("inputs", "attachments")) ): if p.name != expected: mismatches.append((i, p.name)) + else: + num_args += 1 if mismatches: - raise ValueError( - "When target function has two positional arguments, they must be named " - "'inputs' and 'attachments', respectively. Received: " - + ",".join(f"'{p}' at index {i}" for i, p in mismatches) + msg = ( + "Target function is expected to have a first positional argument " + "'inputs' and optionally a second positional argument 'attachments'. " + "Received: " + ", ".join(f"'{p}' at index {i}" for i, p in mismatches) ) + raise ValueError(msg) - return len(positional_params) == 2 + return num_args == 2 def _resolve_experiment( From c8a2b01e0625312884233c879316d1a3a4037b9a Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 14:48:48 -0800 Subject: [PATCH 63/88] undo --- python/langsmith/_internal/_operations.py | 24 +++++++++-------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index 3f82f5f9a..cc615d60d 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -214,7 +214,6 @@ def serialized_run_operation_to_multipart_parts_and_context( op: SerializedRunOperation, ) -> MultipartPartsAndContext: acc_parts: list[MultipartPart] = [] - valb: Union[bytes, Path] # this is main object, minus inputs/outputs/events/attachments acc_parts.append( ( @@ -257,22 +256,17 @@ def serialized_run_operation_to_multipart_parts_and_context( ) continue - if isinstance(valb, Path): - # TODO: actually deal with this case - # This is just for speed of getting something out - continue - else: - acc_parts.append( + acc_parts.append( + ( + f"attachment.{op.id}.{n}", ( - f"attachment.{op.id}.{n}", - ( - None, - valb, - content_type, - {"Content-Length": str(len(valb))}, - ), - ) + None, + valb, + content_type, + {"Content-Length": str(len(valb))}, + ), ) + ) return MultipartPartsAndContext( acc_parts, f"trace={op.trace_id},id={op.id}", From 8033b7e1dc85d5e31cb48e970ae5591730bc484e Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 14:49:06 -0800 Subject: [PATCH 64/88] undo --- python/langsmith/_internal/_operations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index cc615d60d..c68c17499 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -3,7 +3,6 @@ import itertools import logging import uuid -from pathlib import Path from typing import Literal, Optional, Union, cast from langsmith import schemas as ls_schemas From 114a79d8cf121cba285bcd41c5c58adee0b96398 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 14:49:28 -0800 Subject: [PATCH 65/88] fix --- python/langsmith/_internal/_operations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index c68c17499..24c40efa0 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -213,6 +213,7 @@ def serialized_run_operation_to_multipart_parts_and_context( op: SerializedRunOperation, ) -> MultipartPartsAndContext: acc_parts: list[MultipartPart] = [] + # this is main object, minus inputs/outputs/events/attachments acc_parts.append( ( From b524f7235320974d833c0b3b63ef68cd3248bc8b Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 14:49:52 -0800 Subject: [PATCH 66/88] fix --- python/langsmith/_internal/_operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py index 24c40efa0..66decff0f 100644 --- a/python/langsmith/_internal/_operations.py +++ b/python/langsmith/_internal/_operations.py @@ -213,7 +213,7 @@ def serialized_run_operation_to_multipart_parts_and_context( op: SerializedRunOperation, ) -> MultipartPartsAndContext: acc_parts: list[MultipartPart] = [] - + # this is main object, minus inputs/outputs/events/attachments acc_parts.append( ( From 23187f172276b1b18c5b44a42abc4f5ca1019cc8 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 15:13:49 -0800 Subject: [PATCH 67/88] test fix --- python/langsmith/client.py | 6 +-- .../unit_tests/evaluation/test_runner.py | 48 ++++++++++++++----- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 82c25e8da..0f39aa9c0 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3843,10 +3843,9 @@ def read_example( "presigned_url": value["presigned_url"], "reader": reader, } - del example["attachment_urls"] return ls_schemas.Example( - **example, + **{k: v for k, v in example.items() if k != "attachment_urls"}, attachments=attachments, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), @@ -3930,10 +3929,9 @@ def list_examples( "presigned_url": value["presigned_url"], "reader": reader, } - del example["attachment_urls"] yield ls_schemas.Example( - **example, + **{k: v for k, v in example.items() if k != "attachment_urls"}, attachments=attachments, _host_url=self._host_url, _tenant_id=self._get_optional_tenant_id(), diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index a1e2d79de..87ebe6042 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -11,7 +11,7 @@ import uuid from datetime import datetime, timezone from threading import Lock -from typing import Callable, List +from typing import Any, Callable, Dict, List, Tuple from unittest import mock from unittest.mock import MagicMock @@ -53,7 +53,9 @@ def request(self, verb: str, endpoint: str, *args, **kwargs): return res elif endpoint == "http://localhost:1984/examples": res = MagicMock() - res.json.return_value = [e.dict() for e in self.ds_examples] + res.json.return_value = [ + e.dict() if not isinstance(e, dict) else e for e in self.ds_examples + ] return res elif endpoint == "http://localhost:1984/sessions": res = {} # type: ignore @@ -143,14 +145,23 @@ def _wait_until(condition: Callable, timeout: int = 8): raise TimeoutError("Condition not met") -def _create_example(idx: int) -> ls_schemas.Example: +def _create_example(idx: int) -> Tuple[ls_schemas.Example, Dict[str, Any]]: + _id = uuid.uuid4() + _created_at = datetime.now(timezone.utc) return ls_schemas.Example( - id=uuid.uuid4(), + id=_id, inputs={"in": idx}, outputs={"answer": idx + 1}, dataset_id="00886375-eb2a-4038-9032-efff60309896", - created_at=datetime.now(timezone.utc), - ) + created_at=_created_at, + ), { + "id": _id, + "dataset_id": "00886375-eb2a-4038-9032-efff60309896", + "created_at": _created_at, + "inputs": {"in": idx}, + "outputs": {"answer": idx + 1}, + "attachment_urls": None, + } @pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher") @@ -166,10 +177,13 @@ def test_evaluate_results( SPLIT_SIZE = 3 NUM_REPETITIONS = 4 - ds_examples = [_create_example(i) for i in range(10)] + ds_example_responses = [_create_example(i) for i in range(10)] + ds_examples = [e[0] for e in ds_example_responses] dev_split = random.sample(ds_examples, SPLIT_SIZE) tenant_id = str(uuid.uuid4()) - fake_request = FakeRequest(ds_id, ds_name, ds_examples, tenant_id) + fake_request = FakeRequest( + ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id + ) session.request = fake_request.request client = Client( api_url="http://localhost:1984", @@ -393,7 +407,12 @@ def eval2(x, y, inputs): _normalize_evaluator_func(eval_) with pytest.raises(ValueError, match="Invalid evaluator function."): - evaluate((lambda x: x), data=ds_examples, evaluators=[eval_], client=client) + evaluate( + (lambda inputs: inputs), + data=ds_examples, + evaluators=[eval_], + client=client, + ) def test_evaluate_raises_for_async(): @@ -437,10 +456,13 @@ async def test_aevaluate_results( SPLIT_SIZE = 3 NUM_REPETITIONS = 4 - ds_examples = [_create_example(i) for i in range(10)] + ds_example_responses = [_create_example(i) for i in range(10)] + ds_examples = [e[0] for e in ds_example_responses] dev_split = random.sample(ds_examples, SPLIT_SIZE) tenant_id = str(uuid.uuid4()) - fake_request = FakeRequest(ds_id, ds_name, ds_examples, tenant_id) + fake_request = FakeRequest( + ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id + ) session.request = fake_request.request client = Client( api_url="http://localhost:1984", @@ -664,8 +686,8 @@ async def eval2(x, y, inputs): evaluators = [eval1, eval2] - async def atarget(x): - return x + async def atarget(inputs): + return inputs for eval_ in evaluators: with pytest.raises(ValueError, match="Invalid evaluator function."): From 5471e88b566fac494f056caf11dc42a92ebbdd9f Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 15:17:37 -0800 Subject: [PATCH 68/88] fmt --- python/langsmith/schemas.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 5b226a830..30a65a018 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -5,7 +5,6 @@ from datetime import datetime, timedelta, timezone from decimal import Decimal from enum import Enum -from pathlib import Path from typing import ( Any, Dict, @@ -64,7 +63,7 @@ def my_function(bar: int, my_val: Attachment): data: bytes -Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, Tuple[str, Path]]] +Attachments = Dict[str, Union[Tuple[str, bytes], Attachment]] """Attachments associated with the run. Each entry is a tuple of (mime_type, bytes), or (mime_type, file_path)""" From 49246d06872857babfc607701abbea9ed996c5ec Mon Sep 17 00:00:00 2001 From: Bagatur Date: Mon, 9 Dec 2024 15:51:35 -0800 Subject: [PATCH 69/88] fmt --- python/langsmith/evaluation/_runner.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 805cfad03..51b464e68 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1934,26 +1934,13 @@ def _include_attachments(target: Any) -> bool: "Target function must accept at most two " "arguments without default values: (inputs, attachments)." ) - else: - mismatches = [] - num_args = 0 - for i, (p, expected) in enumerate( - zip(positional_params, ("inputs", "attachments")) - ): - if p.name != expected: - mismatches.append((i, p.name)) - else: - num_args += 1 - - if mismatches: - msg = ( - "Target function is expected to have a first positional argument " - "'inputs' and optionally a second positional argument 'attachments'. " - "Received: " + ", ".join(f"'{p}' at index {i}" for i, p in mismatches) - ) + elif len(positional_no_default) == 2: + if [p.name for p in positional_no_default] != ["inputs", "attachments"]: + msg = "" raise ValueError(msg) - - return num_args == 2 + return True + else: + return [p.name for p in positional_params[:2]] == ["inputs", "attachments"] def _resolve_experiment( From b0921e06cd16ba0dbb78ad0dc7d3669273614900 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 15:57:54 -0800 Subject: [PATCH 70/88] tests --- python/langsmith/evaluation/_runner.py | 7 +++-- .../unit_tests/evaluation/test_runner.py | 26 +++++++++---------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 51b464e68..836aaabe4 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1936,8 +1936,11 @@ def _include_attachments(target: Any) -> bool: ) elif len(positional_no_default) == 2: if [p.name for p in positional_no_default] != ["inputs", "attachments"]: - msg = "" - raise ValueError(msg) + raise ValueError( + "When passing 2 positional arguments, they must be named " + "'inputs' and 'attachments', respectively. Received: " + f"{[p.name for p in positional_no_default]}" + ) return True else: return [p.name for p in positional_params[:2]] == ["inputs", "attachments"] diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index 87ebe6042..04b269100 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -20,9 +20,6 @@ from langsmith import Client, aevaluate, evaluate from langsmith import schemas as ls_schemas -from langsmith.evaluation._arunner import ( - _include_attachments as a_include_attachments, -) from langsmith.evaluation._runner import _include_attachments from langsmith.evaluation.evaluator import ( _normalize_comparison_evaluator_func, @@ -738,17 +735,19 @@ async def async_extra_args(inputs, attachments, foo="bar"): ( lambda x, y: None, None, - "When target function has two positional arguments, they must be named " - "'inputs' and 'attachments', respectively. Received: 'x' at index 0,'y' " - "at index 1", + re.escape( + "When passing 2 positional arguments, they must be named 'inputs' and " + "'attachments', respectively. Received: ['x', 'y']" + ), False, ), ( lambda input, attachment: None, None, - "When target function has two positional arguments, they must be named " - "'inputs' and 'attachments', respectively. Received: 'input' at index 0," - "'attachment' at index 1", + re.escape( + "When passing 2 positional arguments, they must be named 'inputs' and " + "'attachments', respectively. Received: ['input', 'attachment']" + ), False, ), # Too many parameters @@ -756,8 +755,8 @@ async def async_extra_args(inputs, attachments, foo="bar"): lambda inputs, attachments, extra: None, None, re.escape( - "Target function must accept at most two positional arguments " - "(inputs, attachments)" + "Target function must accept at most two arguments without " + "default values: (inputs, attachments)." ), False, ), @@ -796,12 +795,11 @@ def test_include_attachments(target, expected, error_msg, is_async): expected = False error_msg = None - func = _include_attachments if not is_async else a_include_attachments if error_msg is not None: with pytest.raises(ValueError, match=error_msg): - func(target) + _include_attachments(target) else: - result = func(target) + result = _include_attachments(target) assert result == expected From 70c3f3c1a7c084ed545781228cc07e89677cd46e Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Mon, 9 Dec 2024 16:08:14 -0800 Subject: [PATCH 71/88] tests --- python/tests/integration_tests/test_client.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 83efc397e..bef79a594 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -56,7 +56,14 @@ def wait_for( @pytest.fixture def langchain_client() -> Client: get_env_var.cache_clear() - return Client() + return Client( + info={ + "instance_flags": { + "dataset_examples_multipart_enabled": True, + "examples_multipart_enabled": True, + } + } + ) def test_datasets(langchain_client: Client) -> None: @@ -379,9 +386,6 @@ def test_error_surfaced_invalid_uri(uri: str) -> None: def test_upload_examples_multipart(langchain_client: Client): """Test uploading examples with attachments via multipart endpoint.""" - langchain_client._info = { - "instance_flags": {"dataset_examples_multipart_enabled": True} - } dataset_name = "__test_upload_examples_multipart" + uuid4().hex[:4] if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) @@ -469,7 +473,6 @@ def test_upload_examples_multipart(langchain_client: Client): def test_upsert_examples_multipart(langchain_client: Client) -> None: """Test upserting examples with attachments via the multipart endpoint.""" - langchain_client._info = {"instance_flags": {"examples_multipart_enabled": True}} dataset_name = "__test_upsert_examples_multipart" + uuid4().hex[:4] if langchain_client.has_dataset(dataset_name=dataset_name): langchain_client.delete_dataset(dataset_name=dataset_name) @@ -1250,9 +1253,6 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None: def test_evaluate_with_attachments(langchain_client: Client) -> None: """Test evaluating examples with attachments.""" - langchain_client._info = { - "instance_flags": {"dataset_examples_multipart_enabled": True} - } dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] # 1. Create dataset dataset = langchain_client.create_dataset( @@ -1308,9 +1308,6 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: def test_evaluate_with_no_attachments(langchain_client: Client) -> None: """Test evaluating examples without attachments using a target with attachments.""" - langchain_client._info = { - "instance_flags": {"dataset_examples_multipart_enabled": True} - } dataset_name = "__test_evaluate_no_attachments" + uuid4().hex[:4] dataset = langchain_client.create_dataset( dataset_name, From 8bb0826dfe589b6c418b1b0c1733da93b549b4ee Mon Sep 17 00:00:00 2001 From: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:54:53 -0800 Subject: [PATCH 72/88] update examples multipart (#1310) --- python/langsmith/client.py | 89 ++++- python/langsmith/schemas.py | 17 + python/tests/integration_tests/test_client.py | 306 ++++++++++++++++++ 3 files changed, 407 insertions(+), 5 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 0f39aa9c0..c173cc7cb 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3464,6 +3464,7 @@ def _prepate_multipart_data( examples: Union[ List[ls_schemas.ExampleUploadWithAttachments] | List[ls_schemas.ExampleUpsertWithAttachments] + | List[ls_schemas.ExampleUpdateWithAttachments], ], include_dataset_id: bool = False, ) -> Tuple[Any, bytes]: @@ -3477,21 +3478,29 @@ def _prepate_multipart_data( dataset_id = examples[0].dataset_id for example in examples: - if not isinstance( - example, ls_schemas.ExampleUploadWithAttachments - ) and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments): + if ( + not isinstance(example, ls_schemas.ExampleUploadWithAttachments) + and not isinstance(example, ls_schemas.ExampleUpsertWithAttachments) + and not isinstance(example, ls_schemas.ExampleUpdateWithAttachments) + ): raise ValueError( "The examples must be of type ExampleUploadWithAttachments" " or ExampleUpsertWithAttachments" + " or ExampleUpdateWithAttachments" ) if example.id is not None: example_id = str(example.id) else: example_id = str(uuid.uuid4()) + if isinstance(example, ls_schemas.ExampleUpdateWithAttachments): + created_at = None + else: + created_at = example.created_at + example_body = { **({"dataset_id": dataset_id} if include_dataset_id else {}), - "created_at": example.created_at, + **({"created_at": created_at} if created_at is not None else {}), } if example.metadata is not None: example_body["metadata"] = example.metadata @@ -3582,6 +3591,23 @@ def _prepate_multipart_data( ) ) + if ( + isinstance(example, ls_schemas.ExampleUpdateWithAttachments) + and example.attachments_operations + ): + attachments_operationsb = _dumps_json(example.attachments_operations) + parts.append( + ( + f"{example_id}.attachments_operations", + ( + None, + attachments_operationsb, + "application/json", + {}, + ), + ) + ) + encoder = rqtb_multipart.MultipartEncoder(parts, boundary=BOUNDARY) if encoder.len <= 20_000_000: # ~20 MB data = encoder.to_string() @@ -3590,6 +3616,38 @@ def _prepate_multipart_data( return encoder, data + def update_examples_multipart( + self, + *, + dataset_id: ID_TYPE, + updates: Optional[List[ls_schemas.ExampleUpdateWithAttachments]] = None, + ) -> ls_schemas.UpsertExamplesResponse: + """Upload examples.""" + if not (self.info.instance_flags or {}).get( + "dataset_examples_multipart_enabled", False + ): + raise ValueError( + "Your LangSmith version does not allow using the multipart examples endpoint, please update to the latest version." + ) + if updates is None: + updates = [] + + encoder, data = self._prepate_multipart_data(updates, include_dataset_id=False) + + response = self.request_with_retries( + "PATCH", + f"/v1/platform/datasets/{dataset_id}/examples", + request_kwargs={ + "data": data, + "headers": { + **self._headers, + "Content-Type": encoder.content_type, + }, + }, + ) + ls_utils.raise_for_status_with_text(response) + return response.json() + def upload_examples_multipart( self, *, @@ -4072,6 +4130,7 @@ def update_example( metadata: Optional[Dict] = None, split: Optional[str | List[str]] = None, dataset_id: Optional[ID_TYPE] = None, + attachments_operations: Optional[ls_schemas.AttachmentsOperations] = None, ) -> Dict[str, Any]: """Update a specific example. @@ -4096,12 +4155,20 @@ def update_example( Dict[str, Any] The updated example. """ + if attachments_operations is not None: + if not (self.info.instance_flags or {}).get( + "dataset_examples_multipart_enabled", False + ): + raise ValueError( + "Your LangSmith version does not allow using the attachment operations, please update to the latest version." + ) example = dict( inputs=inputs, outputs=outputs, dataset_id=dataset_id, metadata=metadata, split=split, + attachments_operations=attachments_operations, ) response = self.request_with_retries( "PATCH", @@ -4121,6 +4188,9 @@ def update_examples( metadata: Optional[Sequence[Optional[Dict]]] = None, splits: Optional[Sequence[Optional[str | List[str]]]] = None, dataset_ids: Optional[Sequence[Optional[ID_TYPE]]] = None, + attachments_operations: Optional[ + Sequence[Optional[ls_schemas.AttachmentsOperations]] + ] = None, ) -> Dict[str, Any]: """Update multiple examples. @@ -4145,12 +4215,20 @@ def update_examples( Dict[str, Any] The response from the server (specifies the number of examples updated). """ + if attachments_operations is not None: + if not (self.info.instance_flags or {}).get( + "dataset_examples_multipart_enabled", False + ): + raise ValueError( + "Your LangSmith version does not allow using the attachment operations, please update to the latest version." + ) sequence_args = { "inputs": inputs, "outputs": outputs, "metadata": metadata, "splits": splits, "dataset_ids": dataset_ids, + "attachments_operations": attachments_operations, } # Since inputs are required, we will check against them examples_len = len(example_ids) @@ -4168,8 +4246,9 @@ def update_examples( "dataset_id": dataset_id_, "metadata": metadata_, "split": split_, + "attachments_operations": attachments_operations_, } - for id_, in_, out_, metadata_, split_, dataset_id_ in zip( + for id_, in_, out_, metadata_, split_, dataset_id_, attachments_operations_ in zip( example_ids, inputs or [None] * len(example_ids), outputs or [None] * len(example_ids), diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py index 30a65a018..acedaf177 100644 --- a/python/langsmith/schemas.py +++ b/python/langsmith/schemas.py @@ -183,12 +183,24 @@ class ExampleSearch(ExampleBase): id: UUID +class AttachmentsOperations(BaseModel): + """Operations to perform on attachments.""" + + rename: Dict[str, str] = Field( + default_factory=dict, description="Mapping of old attachment names to new names" + ) + retain: List[str] = Field( + default_factory=list, description="List of attachment names to keep" + ) + + class ExampleUpdate(BaseModel): """Update class for Example.""" dataset_id: Optional[UUID] = None inputs: Optional[Dict[str, Any]] = None outputs: Optional[Dict[str, Any]] = None + attachments_operations: Optional[AttachmentsOperations] = None metadata: Optional[Dict[str, Any]] = None split: Optional[Union[str, List[str]]] = None @@ -202,7 +214,12 @@ class ExampleUpdateWithAttachments(ExampleUpdate): """Example update with attachments.""" id: UUID + inputs: Dict[str, Any] = Field(default_factory=dict) + outputs: Optional[Dict[str, Any]] = Field(default=None) + metadata: Optional[Dict[str, Any]] = Field(default=None) + split: Optional[Union[str, List[str]]] = None attachments: Optional[Attachments] = None + attachments_operations: Optional[AttachmentsOperations] = None class DataType(str, Enum): diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index bef79a594..33eec0f46 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -22,8 +22,10 @@ from langsmith.client import ID_TYPE, Client from langsmith.evaluation import evaluate from langsmith.schemas import ( + AttachmentsOperations, DataType, Example, + ExampleUpdateWithAttachments, ExampleUploadWithAttachments, ExampleUpsertWithAttachments, Run, @@ -1392,3 +1394,307 @@ def test_examples_length_validation(langchain_client: Client) -> None: # Clean up langchain_client.delete_dataset(dataset_id=dataset.id) + + +def test_update_example_with_attachments_operations(langchain_client: Client) -> None: + """Test updating an example with attachment operations.""" + dataset_name = "__test_update_example_attachments" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name=dataset_name, + description="Test dataset for updating example attachments", + ) + + # Create example with attachments + example = ExampleUploadWithAttachments( + inputs={"query": "What's in this image?"}, + outputs={"answer": "A test image"}, + attachments={ + "image1": ("image/png", b"fake image data 1"), + "image2": ("image/png", b"fake image data 2"), + }, + ) + created_example = langchain_client.upload_examples_multipart( + dataset_id=dataset.id, uploads=[example] + ) + + # Update example with attachment operations to rename and retain attachments + attachments_operations = AttachmentsOperations( + rename={"image1": "renamed_image"}, + retain=["image2"], # Only keep the renamed image1, drop image2 + ) + + langchain_client.update_example( + example_id=created_example.id, + attachments_operations=attachments_operations, + ) + + # Verify the update + retrieved_example = langchain_client.read_example( + example_id=created_example.id, + ) + + # Check that only the renamed attachment exists + assert len(retrieved_example.attachments_info) == 2 + assert "renamed_image" in retrieved_example.attachments_info + assert "image2" in retrieved_example.attachments_info + assert "image1" not in retrieved_example.attachments_info + assert ( + retrieved_example.attachments_info["image2"]["reader"].read() + == b"fake image data 2" + ) + assert ( + retrieved_example.attachments_info["renamed_image"]["reader"].read() + == b"fake image data 1" + ) + + # Clean up + langchain_client.delete_dataset(dataset_id=dataset.id) + + +def test_bulk_update_examples_with_attachments_operations( + langchain_client: Client, +) -> None: + """Test bulk updating examples with attachment operations.""" + dataset_name = "__test_bulk_update_attachments" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name=dataset_name, + description="Test dataset for bulk updating example attachments", + ) + + # Create two examples with attachments + example1 = ExampleUploadWithAttachments( + inputs={"query": "What's in this image?"}, + outputs={"answer": "A test image 1"}, + attachments={ + "image1": ("image/png", b"fake image data 1"), + "extra": ("text/plain", b"extra data"), + }, + ) + example2 = ExampleUploadWithAttachments( + inputs={"query": "What's in this image?"}, + outputs={"answer": "A test image 2"}, + attachments={ + "image2": ("image/png", b"fake image data 2"), + "extra": ("text/plain", b"extra data"), + }, + ) + + created_examples = langchain_client.upload_examples_multipart( + dataset_id=dataset.id, + uploads=[example1, example2], + ) + example_ids = [ex.id for ex in created_examples] + + # Update both examples with different attachment operations + attachments_operations = [ + AttachmentsOperations( + rename={"image1": "renamed_image1"}, + ), + AttachmentsOperations(retain=["extra"]), + ] + + langchain_client.update_examples( + example_ids=example_ids, + attachments_operations=attachments_operations, + ) + + # Verify the updates + updated_examples = list( + langchain_client.list_examples( + dataset_id=dataset.id, + example_ids=example_ids, + include_attachments=True, + ) + ) + + # Check first example + assert len(updated_examples[0].attachments) == 1 + assert "renamed_image1" in updated_examples[0].attachments + assert "extra" not in updated_examples[0].attachments + + # Check second example + assert len(updated_examples[1].attachments) == 1 + assert "extra" in updated_examples[1].attachments + assert "image2" not in updated_examples[1].attachments + + # Check attachment data + assert ( + updated_examples[0].attachments["renamed_image1"][1].read() + == b"fake image data 1" + ) + assert updated_examples[1].attachments["extra"][1].read() == b"extra data" + + # Clean up + langchain_client.delete_dataset(dataset_id=dataset.id) + + +def test_update_examples_multipart(langchain_client: Client) -> None: + """Test updating examples with attachments via multipart endpoint.""" + dataset_name = "__test_update_examples_multipart" + uuid4().hex[:4] + if langchain_client.has_dataset(dataset_name=dataset_name): + langchain_client.delete_dataset(dataset_name=dataset_name) + + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for multipart example updates", + data_type=DataType.kv, + ) + + # First create some examples with attachments + example_1 = ExampleUploadWithAttachments( + inputs={"text": "hello world"}, + attachments={ + "file1": ("text/plain", b"original content 1"), + "file2": ("text/plain", b"original content 2"), + }, + ) + + example_2 = ExampleUploadWithAttachments( + inputs={"text": "second example"}, + attachments={ + "file3": ("text/plain", b"original content 3"), + "file4": ("text/plain", b"original content 4"), + }, + ) + + created_examples = langchain_client.upload_examples_multipart( + dataset_id=dataset.id, uploads=[example_1, example_2] + ) + assert created_examples["count"] == 2 + + examples = list(langchain_client.list_examples(dataset_id=dataset.id)) + example_ids = [ex.id for ex in examples] + + # Now create update operations + update_1 = ExampleUpdateWithAttachments( + id=example_ids[0], + inputs={"text": "updated hello world"}, + attachments={ + "new_file1": ("text/plain", b"new content 1"), + }, + attachments_operations=AttachmentsOperations( + rename={"file1": "renamed_file1"}, + ), + ) + + update_2 = ExampleUpdateWithAttachments( + id=example_ids[1], + inputs={"text": "updated second example"}, + attachments={ + "new_file2": ("text/plain", b"new content 2"), + }, + attachments_operations=AttachmentsOperations(retain=["file3"]), + ) + + # Test updating multiple examples at once + updated_examples = langchain_client.update_examples_multipart( + dataset_id=dataset.id, updates=[update_1, update_2] + ) + assert updated_examples["count"] == 2 + + # Verify the updates + updated = list( + langchain_client.list_examples( + dataset_id=dataset.id, + include_attachments=True, + ) + ) + + # Verify first example updates + example_1_updated = next(ex for ex in updated if ex.id == example_ids[0]) + assert example_1_updated.inputs["text"] == "updated hello world" + assert "renamed_file1" in example_1_updated.attachments_info + assert "new_file1" in example_1_updated.attachments_info + assert "file2" not in example_1_updated.attachments_info + assert ( + example_1_updated.attachments_info["renamed_file1"]["reader"].read() + == b"original content 1" + ) + assert ( + example_1_updated.attachments_info["new_file1"]["reader"].read() + == b"new content 1" + ) + + # Verify second example updates + example_2_updated = next(ex for ex in updated if ex.id == example_ids[1]) + assert example_2_updated.inputs["text"] == "updated second example" + assert "file3" in example_2_updated.attachments_info + assert "new_file2" in example_2_updated.attachments_info + assert "file4" not in example_2_updated.attachments_info + assert ( + example_2_updated.attachments_info["file3"]["reader"].read() + == b"original content 3" + ) + assert ( + example_2_updated.attachments_info["new_file2"]["reader"].read() + == b"new content 2" + ) + + # Test updating examples in different datasets fails + other_dataset = langchain_client.create_dataset( + dataset_name=dataset_name + "_other", + description="Other test dataset", + ) + with pytest.raises(ValueError, match="All examples must be in the same dataset"): + langchain_client.update_examples_multipart( + dataset_id=dataset.id, + updates=[ + ExampleUpsertWithAttachments( + id=example_ids[0], + inputs={"text": "update 1"}, + ), + ExampleUpsertWithAttachments( + id=uuid4(), + inputs={"text": "update 2"}, + ), + ], + ) + + # Test updating non-existent example fails + with pytest.raises(LangSmithNotFoundError): + langchain_client.update_examples_multipart( + dataset_id=dataset.id, + updates=[ + ExampleUpsertWithAttachments( + id=uuid4(), + inputs={"text": "should fail"}, + ) + ], + ) + + # Test updating with mismatch named attachments fails + with pytest.raises(ValueError): + langchain_client.update_examples_multipart( + dataset_id=dataset.id, + updates=[ + ExampleUpdateWithAttachments( + id=example_ids[0], + attachments={ + "renamed_file1": ("text/plain", b"new content 1"), + }, + attachments_operations=AttachmentsOperations( + retain=["renamed_file1"], + ), + ) + ], + ) + + with pytest.raises(ValueError): + langchain_client.update_examples_multipart( + dataset_id=dataset.id, + updates=[ + ExampleUpdateWithAttachments( + id=example_ids[0], + attachments={ + "foo": ("text/plain", b"new content 1"), + }, + attachments_operations=AttachmentsOperations( + rename={"renamed_file1": "foo"}, + ), + ) + ], + ) + + # Clean up + langchain_client.delete_dataset(dataset_id=dataset.id) + langchain_client.delete_dataset(dataset_id=other_dataset.id) From c841ec6528513a3bf124c2061940a05975abcb53 Mon Sep 17 00:00:00 2001 From: Isaac Francisco <78627776+isahers1@users.noreply.github.com> Date: Tue, 10 Dec 2024 12:29:22 -0800 Subject: [PATCH 73/88] add attachments to evaluate (#1237) --- python/langsmith/evaluation/_arunner.py | 5 +- python/langsmith/evaluation/_runner.py | 28 ++- python/langsmith/evaluation/evaluator.py | 11 +- python/tests/integration_tests/test_client.py | 227 +++++++++++++++++- .../unit_tests/evaluation/test_runner.py | 154 ++++++++++++ 5 files changed, 408 insertions(+), 17 deletions(-) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 59fe06caf..7cee6bcf5 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -37,6 +37,7 @@ DATA_T, EVALUATOR_T, ExperimentResultRow, + _evaluators_include_attachments, _ExperimentManagerMixin, _extract_feedback_keys, _ForwardResults, @@ -259,6 +260,7 @@ async def aevaluate( ... ) # doctest: +ELLIPSIS View the evaluation results for experiment:... + .. versionchanged:: 0.2.0 'max_concurrency' default updated from None (no limit on concurrency) @@ -476,7 +478,8 @@ async def _aevaluate( description=description, num_repetitions=num_repetitions, runs=runs, - include_attachments=_include_attachments(target), + include_attachments=_include_attachments(target) + or _evaluators_include_attachments(evaluators), upload_results=upload_results, ).astart() cache_dir = ls_utils.get_cache_dir(None) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 836aaabe4..ddbd9bf18 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1064,7 +1064,8 @@ def _evaluate( # If provided, we don't need to create a new experiment. runs=runs, # Create or resolve the experiment. - include_attachments=_include_attachments(target), + include_attachments=_include_attachments(target) + or _evaluators_include_attachments(evaluators), upload_results=upload_results, ).start() cache_dir = ls_utils.get_cache_dir(None) @@ -1913,7 +1914,30 @@ def _ensure_traceable( return fn -def _include_attachments(target: Any) -> bool: +def _evaluators_include_attachments( + evaluators: Optional[Sequence[Union[EVALUATOR_T, AEVALUATOR_T]]], +) -> bool: + if evaluators is None: + return False + return any( + any( + p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + and p.name == "attachments" + for p in ( + inspect.signature( + e.__call__ if hasattr(e, "__call__") else e + ).parameters.values() + if callable(e) or hasattr(e, "__call__") + else [] + ) + ) + for e in evaluators + ) + + +def _include_attachments( + target: Any, +) -> bool: """Whether the target function accepts attachments.""" if _is_langchain_runnable(target) or not callable(target): return False diff --git a/python/langsmith/evaluation/evaluator.py b/python/langsmith/evaluation/evaluator.py index 02fab3b71..a1505699a 100644 --- a/python/langsmith/evaluation/evaluator.py +++ b/python/langsmith/evaluation/evaluator.py @@ -624,7 +624,14 @@ def _normalize_evaluator_func( Callable[[Run, Optional[Example]], _RUNNABLE_OUTPUT], Callable[[Run, Optional[Example]], Awaitable[_RUNNABLE_OUTPUT]], ]: - supported_args = ("run", "example", "inputs", "outputs", "reference_outputs") + supported_args = ( + "run", + "example", + "inputs", + "outputs", + "reference_outputs", + "attachments", + ) sig = inspect.signature(func) positional_args = [ pname @@ -659,6 +666,7 @@ async def awrapper( "example": example, "inputs": example.inputs if example else {}, "outputs": run.outputs or {}, + "attachments": example.attachments or {} if example else {}, "reference_outputs": example.outputs or {} if example else {}, } args = (arg_map[arg] for arg in positional_args) @@ -679,6 +687,7 @@ def wrapper(run: Run, example: Example) -> _RUNNABLE_OUTPUT: "example": example, "inputs": example.inputs if example else {}, "outputs": run.outputs or {}, + "attachments": example.attachments or {}, "reference_outputs": example.outputs or {} if example else {}, } args = (arg_map[arg] for arg in positional_args) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 33eec0f46..f72a2ebdf 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -20,7 +20,7 @@ from requests_toolbelt import MultipartEncoder, MultipartEncoderMonitor from langsmith.client import ID_TYPE, Client -from langsmith.evaluation import evaluate +from langsmith.evaluation import aevaluate, evaluate from langsmith.schemas import ( AttachmentsOperations, DataType, @@ -1215,9 +1215,6 @@ def create_encoder(*args, **kwargs): assert not caplog.records -@pytest.mark.skip( - reason="Need to land https://github.com/langchain-ai/langsmith-sdk/pull/1209 first" -) def test_list_examples_attachments_keys(langchain_client: Client) -> None: """Test list_examples returns same keys with and without attachments.""" dataset_name = "__test_list_examples_attachments" + uuid4().hex[:4] @@ -1256,6 +1253,7 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None: def test_evaluate_with_attachments(langchain_client: Client) -> None: """Test evaluating examples with attachments.""" dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] + # 1. Create dataset dataset = langchain_client.create_dataset( dataset_name, @@ -1274,37 +1272,89 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None: langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) - # 3. Define target function that uses attachments def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]: # Verify we receive the attachment data assert "image" in attachments + assert "presigned_url" in attachments["image"] image_data = attachments["image"]["reader"] assert image_data.read() == b"fake image data for testing" return {"answer": "test image"} - # 4. Define simple evaluator - def evaluator(run: Run, example: Example) -> Dict[str, Any]: + def evaluator( + outputs: dict, reference_outputs: dict, attachments: dict + ) -> Dict[str, Any]: + assert "image" in attachments + assert "presigned_url" in attachments["image"] + image_data = attachments["image"]["reader"] + assert image_data.read() == b"fake image data for testing" return { "score": float( - run.outputs.get("answer") == example.outputs.get("answer") # type: ignore + reference_outputs.get("answer") == outputs.get("answer") # type: ignore ) } - # 5. Run evaluation - results = evaluate( + results = langchain_client.evaluate( + target, + data=dataset_name, + evaluators=[evaluator], + num_repetitions=2, + ) + + assert len(results) == 2 + for result in results: + assert result["evaluation_results"]["results"][0].score == 1.0 + + langchain_client.delete_dataset(dataset_name=dataset_name) + + +def test_evaluate_with_attachments_not_in_target(langchain_client: Client) -> None: + """Test evaluating examples with attachments.""" + dataset_name = "__test_evaluate_attachments" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for evals with attachments", + data_type=DataType.kv, + ) + + example = ExampleUploadWithAttachments( + dataset_id=dataset.id, + inputs={"question": "What is shown in the image?"}, + outputs={"answer": "test image"}, + attachments={ + "image": ("image/png", b"fake image data for testing"), + }, + ) + + langchain_client.upload_examples_multipart(uploads=[example]) + + def target(inputs: Dict[str, Any]) -> Dict[str, Any]: + # Verify we receive the attachment data + return {"answer": "test image"} + + def evaluator( + outputs: dict, reference_outputs: dict, attachments: dict + ) -> Dict[str, Any]: + assert "image" in attachments + assert "presigned_url" in attachments["image"] + image_data = attachments["image"]["reader"] + assert image_data.read() == b"fake image data for testing" + return { + "score": float( + reference_outputs.get("answer") == outputs.get("answer") # type: ignore + ) + } + + results = langchain_client.evaluate( target, data=dataset_name, evaluators=[evaluator], - client=langchain_client, num_repetitions=2, ) - # 6. Verify results assert len(results) == 2 for result in results: assert result["evaluation_results"]["results"][0].score == 1.0 - # Cleanup langchain_client.delete_dataset(dataset_name=dataset_name) @@ -1355,6 +1405,157 @@ def evaluator(run: Run, example: Example) -> Dict[str, Any]: langchain_client.delete_dataset(dataset_name=dataset_name) +async def test_aevaluate_with_attachments(langchain_client: Client) -> None: + """Test evaluating examples with attachments.""" + dataset_name = "__test_aevaluate_attachments" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for evals with attachments", + data_type=DataType.kv, + ) + + example = ExampleUploadWithAttachments( + inputs={"question": "What is shown in the image?"}, + outputs={"answer": "test image"}, + attachments={ + "image": ("image/png", b"fake image data for testing"), + }, + ) + + langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) + + async def target( + inputs: Dict[str, Any], attachments: Dict[str, Any] + ) -> Dict[str, Any]: + # Verify we receive the attachment data + assert "image" in attachments + assert "presigned_url" in attachments["image"] + image_data = attachments["image"]["reader"] + assert image_data.read() == b"fake image data for testing" + return {"answer": "test image"} + + async def evaluator( + outputs: dict, reference_outputs: dict, attachments: dict + ) -> Dict[str, Any]: + assert "image" in attachments + assert "presigned_url" in attachments["image"] + image_data = attachments["image"]["reader"] + assert image_data.read() == b"fake image data for testing" + return { + "score": float( + reference_outputs.get("answer") == outputs.get("answer") # type: ignore + ) + } + + results = await langchain_client.aevaluate( + target, data=dataset_name, evaluators=[evaluator], num_repetitions=2 + ) + + assert len(results) == 2 + async for result in results: + assert result["evaluation_results"]["results"][0].score == 1.0 + + langchain_client.delete_dataset(dataset_name=dataset_name) + + +async def test_aevaluate_with_attachments_not_in_target( + langchain_client: Client, +) -> None: + """Test evaluating examples with attachments.""" + dataset_name = "__test_aevaluate_attachments" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for evals with attachments", + data_type=DataType.kv, + ) + + example = ExampleUploadWithAttachments( + inputs={"question": "What is shown in the image?"}, + outputs={"answer": "test image"}, + attachments={ + "image": ("image/png", b"fake image data for testing"), + }, + ) + + langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) + + async def target(inputs: Dict[str, Any]) -> Dict[str, Any]: + # Verify we receive the attachment data + return {"answer": "test image"} + + async def evaluator( + outputs: dict, reference_outputs: dict, attachments: dict + ) -> Dict[str, Any]: + assert "image" in attachments + assert "presigned_url" in attachments["image"] + image_data = attachments["image"]["reader"] + assert image_data.read() == b"fake image data for testing" + return { + "score": float( + reference_outputs.get("answer") == outputs.get("answer") # type: ignore + ) + } + + results = await langchain_client.aevaluate( + target, data=dataset_name, evaluators=[evaluator], num_repetitions=2 + ) + + assert len(results) == 2 + async for result in results: + assert result["evaluation_results"]["results"][0].score == 1.0 + + langchain_client.delete_dataset(dataset_name=dataset_name) + + +async def test_aevaluate_with_no_attachments(langchain_client: Client) -> None: + """Test evaluating examples without attachments using a target with attachments.""" + dataset_name = "__test_aevaluate_no_attachments" + uuid4().hex[:4] + dataset = langchain_client.create_dataset( + dataset_name, + description="Test dataset for evals without attachments", + data_type=DataType.kv, + ) + + # Create example using old way, attachments should be set to {} + langchain_client.create_example( + dataset_id=dataset.id, + inputs={"question": "What is 2+2?"}, + outputs={"answer": "4"}, + ) + + # Verify we can create example the new way without attachments + example = ExampleUploadWithAttachments( + inputs={"question": "What is 3+1?"}, + outputs={"answer": "4"}, + ) + langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) + + async def target( + inputs: Dict[str, Any], attachments: Dict[str, Any] + ) -> Dict[str, Any]: + # Verify we receive an empty attachments dict + assert isinstance(attachments, dict) + assert len(attachments) == 0 + return {"answer": "4"} + + async def evaluator(run: Run, example: Example) -> Dict[str, Any]: + return { + "score": float( + run.outputs.get("answer") == example.outputs.get("answer") # type: ignore + ) + } + + results = await aevaluate( + target, data=dataset_name, evaluators=[evaluator], client=langchain_client + ) + + assert len(results) == 2 + async for result in results: + assert result["evaluation_results"]["results"][0].score == 1.0 + + langchain_client.delete_dataset(dataset_name=dataset_name) + + def test_examples_length_validation(langchain_client: Client) -> None: """Test that mismatched lengths raise ValueError for create and update examples.""" dataset_name = "__test_examples_length_validation" + uuid4().hex[:4] diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py index 04b269100..e33d07fd5 100644 --- a/python/tests/unit_tests/evaluation/test_runner.py +++ b/python/tests/unit_tests/evaluation/test_runner.py @@ -242,6 +242,14 @@ def score_unpacked_inputs_outputs_reference(inputs, outputs, reference_outputs): ordering_of_stuff.append("evaluate") return {"score": reference_outputs["answer"]} + def score_unpacked_inputs_outputs_attachments(inputs, outputs, attachments): + ordering_of_stuff.append("evaluate") + return {"score": outputs["output"]} + + def score_unpacked_outputs(outputs): + ordering_of_stuff.append("evaluate") + return {"score": outputs["output"]} + def eval_float(run, example): ordering_of_stuff.append("evaluate") return 0.2 @@ -270,6 +278,8 @@ def summary_eval_outputs_reference(outputs, reference_outputs): score_value_first, score_unpacked_inputs_outputs, score_unpacked_inputs_outputs_reference, + score_unpacked_inputs_outputs_attachments, + score_unpacked_outputs, eval_float, eval_str, eval_list, @@ -524,6 +534,14 @@ async def score_unpacked_inputs_outputs_reference( ordering_of_stuff.append("evaluate") return {"score": reference_outputs["answer"]} + async def score_unpacked_inputs_outputs_attachments(inputs, outputs, attachments): + ordering_of_stuff.append("evaluate") + return {"score": outputs["output"]} + + async def score_unpacked_outputs(outputs): + ordering_of_stuff.append("evaluate") + return {"score": outputs["output"]} + async def eval_float(run, example): ordering_of_stuff.append("evaluate") return 0.2 @@ -552,6 +570,8 @@ def summary_eval_outputs_reference(outputs, reference_outputs): score_value_first, score_unpacked_inputs_outputs, score_unpacked_inputs_outputs_reference, + score_unpacked_inputs_outputs_attachments, + score_unpacked_outputs, eval_float, eval_str, eval_list, @@ -803,6 +823,140 @@ def test_include_attachments(target, expected, error_msg, is_async): assert result == expected +def valid_single_supported(inputs, *, optional=None): + return {"score": 1} + + +async def valid_single_supported_async(inputs, *, optional=None): + return {"score": 1} + + +def valid_two_arbitrary(foo, bar, *, optional=None): + return {"score": 1} + + +async def valid_two_arbitrary_async(foo, bar, *, optional=None): + return {"score": 1} + + +def valid_multiple_supported(inputs, outputs, reference_outputs, *, optional=None): + return {"score": 1} + + +async def valid_multiple_supported_async( + inputs, outputs, reference_outputs, *, optional=None +): + return {"score": 1} + + +def invalid_single_unsupported(foo, *, optional=None): + return {"score": 1} + + +async def invalid_single_unsupported_async(foo, *, optional=None): + return {"score": 1} + + +def invalid_three_args(inputs, outputs, foo, *, optional=None): + return {"score": 1} + + +async def invalid_three_args_async(inputs, outputs, foo, *, optional=None): + return {"score": 1} + + +def invalid_no_positional(*, inputs, outputs, optional=None): + return {"score": 1} + + +async def invalid_no_positional_async(*, inputs, outputs, optional=None): + return {"score": 1} + + +# Test cases that should succeed +VALID_EVALUATOR_CASES = [ + (valid_single_supported, False), + (valid_single_supported_async, True), + (valid_two_arbitrary, False), + (valid_two_arbitrary_async, True), + (valid_multiple_supported, False), + (valid_multiple_supported_async, True), +] + +# Test cases that should raise ValueError +INVALID_EVALUATOR_CASES = [ + (invalid_single_unsupported, False), + (invalid_single_unsupported_async, True), + (invalid_three_args, False), + (invalid_three_args_async, True), + (invalid_no_positional, False), + (invalid_no_positional_async, True), +] + + +def target(inputs, attachments): + return {"foo": "bar"} + + +async def atarget(inputs, attachments): + return {"foo": "bar"} + + +@pytest.mark.parametrize("func,is_async", VALID_EVALUATOR_CASES) +def test_normalize_evaluator_func_valid(func, is_async): + """Test _normalize_evaluator_func succeeds.""" + func = _normalize_evaluator_func(func) + session = mock.Mock() + ds_name = "my-dataset" + ds_id = "00886375-eb2a-4038-9032-efff60309896" + + ds_example_responses = [_create_example(i) for i in range(10)] + ds_examples = [e[0] for e in ds_example_responses] + tenant_id = str(uuid.uuid4()) + fake_request = FakeRequest( + ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id + ) + session.request = fake_request.request + client = Client(api_url="http://localhost:1984", api_key="123", session=session) + client._tenant_id = tenant_id # type: ignore + + if is_async: + asyncio.run( + aevaluate(atarget, data=ds_examples, evaluators=[func], client=client) + ) + else: + evaluate(target, data=ds_examples, evaluators=[func], client=client) + + +@pytest.mark.parametrize("func,is_async", INVALID_EVALUATOR_CASES) +def test_normalize_evaluator_func_invalid(func, is_async): + """Test _normalize_evaluator_func fails correctly.""" + with pytest.raises(ValueError, match="Invalid evaluator function"): + _normalize_evaluator_func(func) + + session = mock.Mock() + ds_name = "my-dataset" + ds_id = "00886375-eb2a-4038-9032-efff60309896" + + ds_example_responses = [_create_example(i) for i in range(10)] + ds_examples = [e[0] for e in ds_example_responses] + tenant_id = str(uuid.uuid4()) + fake_request = FakeRequest( + ds_id, ds_name, [e[1] for e in ds_example_responses], tenant_id + ) + session.request = fake_request.request + client = Client(api_url="http://localhost:1984", api_key="123", session=session) + client._tenant_id = tenant_id # type: ignore + + with pytest.raises(ValueError, match="Invalid evaluator function"): + if is_async: + asyncio.run( + aevaluate(atarget, data=ds_examples, evaluators=[func], client=client) + ) + else: + evaluate(target, data=ds_examples, evaluators=[func], client=client) + + def summary_eval_runs_examples(runs_, examples_): return {"score": len(runs_[0].dotted_order)} From f3cc56f88f50fb907a0a83b271a88f93bd33065f Mon Sep 17 00:00:00 2001 From: Ankush Gola Date: Tue, 10 Dec 2024 12:33:24 -0800 Subject: [PATCH 74/88] update to 0.2.2 --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index a831ff0df..5b008c34d 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langsmith" -version = "0.2.1" +version = "0.2.2" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." authors = ["LangChain "] license = "MIT" From bf00aa6063900306baa774d2417c378b48d8076e Mon Sep 17 00:00:00 2001 From: Ankush Gola Date: Tue, 10 Dec 2024 12:50:47 -0800 Subject: [PATCH 75/88] fix spelling --- python/langsmith/client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index dcb74ec7c..b6e366f45 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -3462,7 +3462,7 @@ def create_example_from_run( created_at=created_at, ) - def _prepate_multipart_data( + def _prepare_multipart_data( self, examples: Union[ List[ls_schemas.ExampleUploadWithAttachments] @@ -3635,7 +3635,7 @@ def update_examples_multipart( if updates is None: updates = [] - encoder, data = self._prepate_multipart_data(updates, include_dataset_id=False) + encoder, data = self._prepare_multipart_data(updates, include_dataset_id=False) response = self.request_with_retries( "PATCH", @@ -3666,7 +3666,7 @@ def upload_examples_multipart( ) if uploads is None: uploads = [] - encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False) + encoder, data = self._prepare_multipart_data(uploads, include_dataset_id=False) response = self.request_with_retries( "POST", @@ -3702,7 +3702,7 @@ def upsert_examples_multipart( if upserts is None: upserts = [] - encoder, data = self._prepate_multipart_data(upserts, include_dataset_id=True) + encoder, data = self._prepare_multipart_data(upserts, include_dataset_id=True) response = self.request_with_retries( "POST", From c63b92c43c2718840e44933d2ba69864513794d4 Mon Sep 17 00:00:00 2001 From: Ankush Gola Date: Tue, 10 Dec 2024 12:53:22 -0800 Subject: [PATCH 76/88] fix update_examples issue --- python/langsmith/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index b6e366f45..a92a89659 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -4258,6 +4258,7 @@ def update_examples( metadata or [None] * len(example_ids), splits or [None] * len(example_ids), dataset_ids or [None] * len(example_ids), + attachments_operations or [None] * len(example_ids), ) ] response = self.request_with_retries( From 76e003e17c7eedac3e8f6026bc8ddbeb4abb1d49 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 12:57:14 -0800 Subject: [PATCH 77/88] fix test --- python/tests/integration_tests/test_client.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index f72a2ebdf..c9c6f6587 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1220,13 +1220,17 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None: dataset_name = "__test_list_examples_attachments" + uuid4().hex[:4] dataset = langchain_client.create_dataset(dataset_name=dataset_name) - langchain_client.create_example( - inputs={"text": "hello world"}, - outputs={"response": "hi there"}, + langchain_client.upload_examples_multipart( dataset_id=dataset.id, - attachments={ - "test_file": ("text/plain", b"test content"), - }, + uploads=[ + ExampleUploadWithAttachments( + inputs={"text": "hello world"}, + outputs={"response": "hi there"}, + attachments={ + "test_file": ("text/plain", b"test content"), + }, + ) + ] ) # Get examples with attachments From ed73f1a54783e425314ef557f687fffc4ce4e058 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 13:18:38 -0800 Subject: [PATCH 78/88] test fix --- python/langsmith/evaluation/_arunner.py | 15 ++++++++- python/langsmith/evaluation/_runner.py | 45 ++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 7cee6bcf5..311178576 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -47,6 +47,7 @@ _load_experiment, _load_tqdm, _load_traces, + _make_fresh_examples, _resolve_data, _resolve_evaluators, _resolve_experiment, @@ -569,8 +570,12 @@ async def aget_examples(self) -> AsyncIterator[schemas.Example]: include_attachments=self._include_attachments, ) if self._num_repetitions > 1: + examples_list = [example async for example in self._examples] self._examples = async_chain_from_iterable( - aitertools.atee(self._examples, self._num_repetitions) + [ + async_iter_from_list(_make_fresh_examples(examples_list)) + for _ in range(self._num_repetitions) + ] ) self._examples, examples_iter = aitertools.atee( @@ -1115,3 +1120,11 @@ async def async_chain_from_iterable( for sub_iterable in iterable: async for item in sub_iterable: yield item + + +async def async_iter_from_list( + examples: List[schemas.Example], +) -> AsyncIterable[schemas.Example]: + """Convert a list of examples to an async iterable.""" + for example in examples: + yield example diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index ddbd9bf18..f78523718 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -8,6 +8,7 @@ import datetime import functools import inspect +import io import itertools import logging import pathlib @@ -36,6 +37,7 @@ cast, ) +import requests from typing_extensions import TypedDict, overload import langsmith @@ -1341,8 +1343,10 @@ def examples(self) -> Iterable[schemas.Example]: include_attachments=self._include_attachments, ) if self._num_repetitions > 1: + examples_list = list(self._examples) self._examples = itertools.chain.from_iterable( - itertools.tee(self._examples, self._num_repetitions) + _make_fresh_examples(examples_list) + for _ in range(self._num_repetitions) ) self._examples, examples_iter = itertools.tee(self._examples) return examples_iter @@ -2221,3 +2225,42 @@ def _import_langchain_runnable() -> Optional[type]: def _is_langchain_runnable(o: Any) -> bool: return bool((Runnable := _import_langchain_runnable()) and isinstance(o, Runnable)) + + +def _reset_example_attachments(example: schemas.Example) -> schemas.Example: + """Reset attachment readers for an example.""" + if not hasattr(example, "attachments") or not example.attachments: + return example + + new_attachments = {} + for key, attachment in example.attachments.items(): + response = requests.get(attachment["presigned_url"], stream=True) + response.raise_for_status() + reader = io.BytesIO(response.content) + new_attachments[key] = { + "presigned_url": attachment["presigned_url"], + "reader": reader, + } + + # Create a new Example instance with the updated attachments + return schemas.Example( + id=example.id, + created_at=example.created_at, + dataset_id=example.dataset_id, + inputs=example.inputs, + outputs=example.outputs, + metadata=example.metadata, + modified_at=example.modified_at, + runs=example.runs, + source_run_id=example.source_run_id, + attachments=new_attachments, + _host_url=example._host_url, + _tenant_id=example._tenant_id, + ) + + +def _make_fresh_examples( + _original_examples: List[schemas.Example], +) -> List[schemas.Example]: + """Create fresh copies of examples with reset readers.""" + return [_reset_example_attachments(example) for example in _original_examples] From 4887a99de954f601df326a26fc05c9100ae876e1 Mon Sep 17 00:00:00 2001 From: Ankush Gola Date: Tue, 10 Dec 2024 13:24:11 -0800 Subject: [PATCH 79/88] attempt to fix test_update_examples_multipart --- python/tests/integration_tests/test_client.py | 58 ++++++------------- 1 file changed, 19 insertions(+), 39 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index c9c6f6587..904a3c028 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1778,7 +1778,7 @@ def test_update_examples_multipart(langchain_client: Client) -> None: "new_file1": ("text/plain", b"new content 1"), }, attachments_operations=AttachmentsOperations( - rename={"file1": "renamed_file1"}, + retain=["file1"], ), ) @@ -1807,60 +1807,41 @@ def test_update_examples_multipart(langchain_client: Client) -> None: # Verify first example updates example_1_updated = next(ex for ex in updated if ex.id == example_ids[0]) + print(example_1_updated.attachments) assert example_1_updated.inputs["text"] == "updated hello world" - assert "renamed_file1" in example_1_updated.attachments_info - assert "new_file1" in example_1_updated.attachments_info - assert "file2" not in example_1_updated.attachments_info - assert ( - example_1_updated.attachments_info["renamed_file1"]["reader"].read() - == b"original content 1" - ) - assert ( - example_1_updated.attachments_info["new_file1"]["reader"].read() - == b"new content 1" - ) + assert "new_file1" in example_1_updated.attachments + assert "file1" in example_1_updated.attachments + assert "file2" not in example_1_updated.attachments + # assert ( + # example_1_updated.attachments["renamed_file1"]["reader"].read() + # == b"original content 1" + # ) + # assert ( + # example_1_updated.attachments["new_file1"]["reader"].read() + # == b"new content 1" + # ) # Verify second example updates example_2_updated = next(ex for ex in updated if ex.id == example_ids[1]) assert example_2_updated.inputs["text"] == "updated second example" - assert "file3" in example_2_updated.attachments_info - assert "new_file2" in example_2_updated.attachments_info - assert "file4" not in example_2_updated.attachments_info + assert "file3" in example_2_updated.attachments + assert "new_file2" in example_2_updated.attachments + assert "file4" not in example_2_updated.attachments assert ( - example_2_updated.attachments_info["file3"]["reader"].read() + example_2_updated.attachments["file3"]["reader"].read() == b"original content 3" ) assert ( - example_2_updated.attachments_info["new_file2"]["reader"].read() + example_2_updated.attachments["new_file2"]["reader"].read() == b"new content 2" ) - # Test updating examples in different datasets fails - other_dataset = langchain_client.create_dataset( - dataset_name=dataset_name + "_other", - description="Other test dataset", - ) - with pytest.raises(ValueError, match="All examples must be in the same dataset"): - langchain_client.update_examples_multipart( - dataset_id=dataset.id, - updates=[ - ExampleUpsertWithAttachments( - id=example_ids[0], - inputs={"text": "update 1"}, - ), - ExampleUpsertWithAttachments( - id=uuid4(), - inputs={"text": "update 2"}, - ), - ], - ) - # Test updating non-existent example fails with pytest.raises(LangSmithNotFoundError): langchain_client.update_examples_multipart( dataset_id=dataset.id, updates=[ - ExampleUpsertWithAttachments( + ExampleUpdateWithAttachments( id=uuid4(), inputs={"text": "should fail"}, ) @@ -1902,4 +1883,3 @@ def test_update_examples_multipart(langchain_client: Client) -> None: # Clean up langchain_client.delete_dataset(dataset_id=dataset.id) - langchain_client.delete_dataset(dataset_id=other_dataset.id) From 6b9a0268be1ec6d754e88266a9a9cbb87c2db3ae Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 13:32:25 -0800 Subject: [PATCH 80/88] fix tests --- python/tests/integration_tests/test_client.py | 140 ++++++++---------- 1 file changed, 65 insertions(+), 75 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index c9c6f6587..696d6ebaf 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -64,7 +64,7 @@ def langchain_client() -> Client: "dataset_examples_multipart_enabled": True, "examples_multipart_enabled": True, } - } + }, ) @@ -1230,7 +1230,7 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None: "test_file": ("text/plain", b"test content"), }, ) - ] + ], ) # Get examples with attachments @@ -1744,9 +1744,11 @@ def test_update_examples_multipart(langchain_client: Client) -> None: description="Test dataset for multipart example updates", data_type=DataType.kv, ) + example_ids = [uuid4() for _ in range(2)] # First create some examples with attachments example_1 = ExampleUploadWithAttachments( + id=example_ids[0], inputs={"text": "hello world"}, attachments={ "file1": ("text/plain", b"original content 1"), @@ -1755,6 +1757,7 @@ def test_update_examples_multipart(langchain_client: Client) -> None: ) example_2 = ExampleUploadWithAttachments( + id=example_ids[1], inputs={"text": "second example"}, attachments={ "file3": ("text/plain", b"original content 3"), @@ -1768,7 +1771,6 @@ def test_update_examples_multipart(langchain_client: Client) -> None: assert created_examples["count"] == 2 examples = list(langchain_client.list_examples(dataset_id=dataset.id)) - example_ids = [ex.id for ex in examples] # Now create update operations update_1 = ExampleUpdateWithAttachments( @@ -1808,98 +1810,86 @@ def test_update_examples_multipart(langchain_client: Client) -> None: # Verify first example updates example_1_updated = next(ex for ex in updated if ex.id == example_ids[0]) assert example_1_updated.inputs["text"] == "updated hello world" - assert "renamed_file1" in example_1_updated.attachments_info - assert "new_file1" in example_1_updated.attachments_info - assert "file2" not in example_1_updated.attachments_info + assert "renamed_file1" in example_1_updated.attachments + assert "new_file1" in example_1_updated.attachments + assert "file2" not in example_1_updated.attachments assert ( - example_1_updated.attachments_info["renamed_file1"]["reader"].read() + example_1_updated.attachments["renamed_file1"]["reader"].read() == b"original content 1" ) assert ( - example_1_updated.attachments_info["new_file1"]["reader"].read() + example_1_updated.attachments["new_file1"]["reader"].read() == b"new content 1" ) # Verify second example updates example_2_updated = next(ex for ex in updated if ex.id == example_ids[1]) assert example_2_updated.inputs["text"] == "updated second example" - assert "file3" in example_2_updated.attachments_info - assert "new_file2" in example_2_updated.attachments_info - assert "file4" not in example_2_updated.attachments_info + assert "file3" in example_2_updated.attachments + assert "new_file2" in example_2_updated.attachments + assert "file4" not in example_2_updated.attachments assert ( - example_2_updated.attachments_info["file3"]["reader"].read() + example_2_updated.attachments["file3"]["reader"].read() == b"original content 3" ) assert ( - example_2_updated.attachments_info["new_file2"]["reader"].read() + example_2_updated.attachments["new_file2"]["reader"].read() == b"new content 2" ) - # Test updating examples in different datasets fails - other_dataset = langchain_client.create_dataset( - dataset_name=dataset_name + "_other", - description="Other test dataset", + # Test updating non-existent example doesn't do anything + response = langchain_client.update_examples_multipart( + dataset_id=dataset.id, + updates=[ + ExampleUpdateWithAttachments( + id=uuid4(), + inputs={"text": "should fail"}, + ) + ], ) - with pytest.raises(ValueError, match="All examples must be in the same dataset"): - langchain_client.update_examples_multipart( - dataset_id=dataset.id, - updates=[ - ExampleUpsertWithAttachments( - id=example_ids[0], - inputs={"text": "update 1"}, - ), - ExampleUpsertWithAttachments( - id=uuid4(), - inputs={"text": "update 2"}, - ), - ], - ) - - # Test updating non-existent example fails - with pytest.raises(LangSmithNotFoundError): - langchain_client.update_examples_multipart( - dataset_id=dataset.id, - updates=[ - ExampleUpsertWithAttachments( - id=uuid4(), - inputs={"text": "should fail"}, - ) - ], - ) + assert response["count"] == 0 - # Test updating with mismatch named attachments fails - with pytest.raises(ValueError): - langchain_client.update_examples_multipart( - dataset_id=dataset.id, - updates=[ - ExampleUpdateWithAttachments( - id=example_ids[0], - attachments={ - "renamed_file1": ("text/plain", b"new content 1"), - }, - attachments_operations=AttachmentsOperations( - retain=["renamed_file1"], - ), - ) - ], - ) + # Test new attachments have priority + response = langchain_client.update_examples_multipart( + dataset_id=dataset.id, + updates=[ + ExampleUpdateWithAttachments( + id=example_ids[0], + attachments={ + "renamed_file1": ("text/plain", b"new content 1"), + }, + attachments_operations=AttachmentsOperations( + retain=["renamed_file1"], + ), + ) + ], + ) + assert response["count"] == 1 + example_1_updated = langchain_client.read_example(example_ids[0]) + assert list(example_1_updated.attachments.keys()) == ["renamed_file1"] + assert ( + example_1_updated.attachments["renamed_file1"]["reader"].read() + == b"new content 1" + ) - with pytest.raises(ValueError): - langchain_client.update_examples_multipart( - dataset_id=dataset.id, - updates=[ - ExampleUpdateWithAttachments( - id=example_ids[0], - attachments={ - "foo": ("text/plain", b"new content 1"), - }, - attachments_operations=AttachmentsOperations( - rename={"renamed_file1": "foo"}, - ), - ) - ], - ) + # Test new attachments have priority + response = langchain_client.update_examples_multipart( + dataset_id=dataset.id, + updates=[ + ExampleUpdateWithAttachments( + id=example_ids[0], + attachments={ + "foo": ("text/plain", b"new content 1"), + }, + attachments_operations=AttachmentsOperations( + rename={"renamed_file1": "foo"}, + ), + ) + ], + ) + assert response["count"] == 1 + example_1_updated = langchain_client.read_example(example_ids[0]) + assert list(example_1_updated.attachments.keys()) == ["foo"] # Clean up langchain_client.delete_dataset(dataset_id=dataset.id) - langchain_client.delete_dataset(dataset_id=other_dataset.id) From 75736919347ee77fe883c497107af68e77862c15 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 13:37:24 -0800 Subject: [PATCH 81/88] x --- python/tests/integration_tests/test_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 7e65a8d34..1c145b600 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -59,7 +59,6 @@ def wait_for( def langchain_client() -> Client: get_env_var.cache_clear() return Client( - api_key="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", info={ "instance_flags": { "dataset_examples_multipart_enabled": True, From cf85e5659c78854835fa30ac283b21c8e96b4ebd Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 13:39:07 -0800 Subject: [PATCH 82/88] fix test --- python/tests/integration_tests/test_client.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 1c145b600..41a206f1c 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1608,9 +1608,10 @@ def test_update_example_with_attachments_operations(langchain_client: Client) -> dataset_name=dataset_name, description="Test dataset for updating example attachments", ) - + example_id = uuid4() # Create example with attachments example = ExampleUploadWithAttachments( + id=example_id, inputs={"query": "What's in this image?"}, outputs={"answer": "A test image"}, attachments={ @@ -1618,7 +1619,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) -> "image2": ("image/png", b"fake image data 2"), }, ) - created_example = langchain_client.upload_examples_multipart( + langchain_client.upload_examples_multipart( dataset_id=dataset.id, uploads=[example] ) @@ -1629,26 +1630,26 @@ def test_update_example_with_attachments_operations(langchain_client: Client) -> ) langchain_client.update_example( - example_id=created_example.id, + example_id=example_id, attachments_operations=attachments_operations, ) # Verify the update retrieved_example = langchain_client.read_example( - example_id=created_example.id, + example_id=example_id, ) # Check that only the renamed attachment exists - assert len(retrieved_example.attachments_info) == 2 - assert "renamed_image" in retrieved_example.attachments_info - assert "image2" in retrieved_example.attachments_info - assert "image1" not in retrieved_example.attachments_info + assert len(retrieved_example.attachments) == 2 + assert "renamed_image" in retrieved_example.attachments + assert "image2" in retrieved_example.attachments + assert "image1" not in retrieved_example.attachments assert ( - retrieved_example.attachments_info["image2"]["reader"].read() + retrieved_example.attachments["image2"]["reader"].read() == b"fake image data 2" ) assert ( - retrieved_example.attachments_info["renamed_image"]["reader"].read() + retrieved_example.attachments["renamed_image"]["reader"].read() == b"fake image data 1" ) From 5c7482948cea3fa60caa8930577baa0d50bb0c19 Mon Sep 17 00:00:00 2001 From: Ankush Gola Date: Tue, 10 Dec 2024 13:40:32 -0800 Subject: [PATCH 83/88] fix test_bulk_update_examples_with_attachments_operations --- python/tests/integration_tests/test_client.py | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 904a3c028..be5409782 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1666,8 +1666,10 @@ def test_bulk_update_examples_with_attachments_operations( description="Test dataset for bulk updating example attachments", ) + example_id1, example_id2 = uuid4(), uuid4() # Create two examples with attachments example1 = ExampleUploadWithAttachments( + id=example_id1, inputs={"query": "What's in this image?"}, outputs={"answer": "A test image 1"}, attachments={ @@ -1676,6 +1678,7 @@ def test_bulk_update_examples_with_attachments_operations( }, ) example2 = ExampleUploadWithAttachments( + id=example_id2, inputs={"query": "What's in this image?"}, outputs={"answer": "A test image 2"}, attachments={ @@ -1688,7 +1691,9 @@ def test_bulk_update_examples_with_attachments_operations( dataset_id=dataset.id, uploads=[example1, example2], ) - example_ids = [ex.id for ex in created_examples] + assert len(created_examples["example_ids"]) == 2 + assert str(example_id1) in created_examples["example_ids"] + assert str(example_id2) in created_examples["example_ids"] # Update both examples with different attachment operations attachments_operations = [ @@ -1699,7 +1704,7 @@ def test_bulk_update_examples_with_attachments_operations( ] langchain_client.update_examples( - example_ids=example_ids, + example_ids=[example_id1, example_id2], attachments_operations=attachments_operations, ) @@ -1707,27 +1712,29 @@ def test_bulk_update_examples_with_attachments_operations( updated_examples = list( langchain_client.list_examples( dataset_id=dataset.id, - example_ids=example_ids, + example_ids=[example_id1, example_id2], include_attachments=True, ) ) + updated_example_1 = next(ex for ex in updated_examples if ex.id == example_id1) + updated_example_2 = next(ex for ex in updated_examples if ex.id == example_id2) # Check first example - assert len(updated_examples[0].attachments) == 1 - assert "renamed_image1" in updated_examples[0].attachments - assert "extra" not in updated_examples[0].attachments + assert len(updated_example_1.attachments) == 1 + assert "renamed_image1" in updated_example_1.attachments + assert "extra" not in updated_example_1.attachments # Check second example - assert len(updated_examples[1].attachments) == 1 - assert "extra" in updated_examples[1].attachments - assert "image2" not in updated_examples[1].attachments + assert len(updated_example_2.attachments) == 1 + assert "extra" in updated_example_2.attachments + assert "image2" not in updated_example_2.attachments # Check attachment data assert ( - updated_examples[0].attachments["renamed_image1"][1].read() + updated_example_1.attachments["renamed_image1"]["reader"].read() == b"fake image data 1" ) - assert updated_examples[1].attachments["extra"][1].read() == b"extra data" + assert updated_example_2.attachments["extra"]["reader"].read() == b"extra data" # Clean up langchain_client.delete_dataset(dataset_id=dataset.id) From 266272d734c3258a11f6628b07a517fa09ecfc29 Mon Sep 17 00:00:00 2001 From: Ankush Gola Date: Tue, 10 Dec 2024 13:44:16 -0800 Subject: [PATCH 84/88] lint and fmt --- python/tests/integration_tests/test_client.py | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 983c072ca..803b88454 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -1619,9 +1619,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) -> "image2": ("image/png", b"fake image data 2"), }, ) - langchain_client.upload_examples_multipart( - dataset_id=dataset.id, uploads=[example] - ) + langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) # Update example with attachment operations to rename and retain attachments attachments_operations = AttachmentsOperations( @@ -1645,8 +1643,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) -> assert "image2" in retrieved_example.attachments assert "image1" not in retrieved_example.attachments assert ( - retrieved_example.attachments["image2"]["reader"].read() - == b"fake image data 2" + retrieved_example.attachments["image2"]["reader"].read() == b"fake image data 2" ) assert ( retrieved_example.attachments["renamed_image"]["reader"].read() @@ -1778,8 +1775,6 @@ def test_update_examples_multipart(langchain_client: Client) -> None: ) assert created_examples["count"] == 2 - examples = list(langchain_client.list_examples(dataset_id=dataset.id)) - # Now create update operations update_1 = ExampleUpdateWithAttachments( id=example_ids[0], @@ -1822,12 +1817,10 @@ def test_update_examples_multipart(langchain_client: Client) -> None: assert "new_file1" in example_1_updated.attachments assert "file2" not in example_1_updated.attachments assert ( - example_1_updated.attachments["new_file1"]["reader"].read() - == b"new content 1" + example_1_updated.attachments["new_file1"]["reader"].read() == b"new content 1" ) assert ( - example_1_updated.attachments["file1"]["reader"].read() - == b"original content 1" + example_1_updated.attachments["file1"]["reader"].read() == b"original content 1" ) # Verify second example updates @@ -1840,12 +1833,10 @@ def test_update_examples_multipart(langchain_client: Client) -> None: assert "new_file2" in example_2_updated.attachments assert "file4" not in example_2_updated.attachments assert ( - example_2_updated.attachments["file3"]["reader"].read() - == b"original content 3" + example_2_updated.attachments["file3"]["reader"].read() == b"original content 3" ) assert ( - example_2_updated.attachments["new_file2"]["reader"].read() - == b"new content 2" + example_2_updated.attachments["new_file2"]["reader"].read() == b"new content 2" ) # Test updating non-existent example doesn't do anything From 61b28f5624753e19e41ef480420c7236741db92b Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 13:51:01 -0800 Subject: [PATCH 85/88] fix tests --- python/langsmith/evaluation/_arunner.py | 14 ++++++-- python/langsmith/evaluation/_runner.py | 34 +++++++++---------- python/tests/integration_tests/test_client.py | 26 +++++--------- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py index 311178576..25ea0d62a 100644 --- a/python/langsmith/evaluation/_arunner.py +++ b/python/langsmith/evaluation/_arunner.py @@ -648,7 +648,11 @@ async def awith_predictions( /, max_concurrency: Optional[int] = None, ) -> _AsyncExperimentManager: - _experiment_results = self._apredict(target, max_concurrency=max_concurrency) + _experiment_results = self._apredict( + target, + max_concurrency=max_concurrency, + include_attachments=_include_attachments(target), + ) r1, r2 = aitertools.atee(_experiment_results, 2, lock=asyncio.Lock()) return _AsyncExperimentManager( (pred["example"] async for pred in r1), @@ -723,7 +727,11 @@ async def aget_summary_scores(self) -> Dict[str, List[dict]]: ## Private methods async def _apredict( - self, target: ATARGET_T, /, max_concurrency: Optional[int] = None + self, + target: ATARGET_T, + /, + max_concurrency: Optional[int] = None, + include_attachments: bool = False, ) -> AsyncIterator[_ForwardResults]: fn = _ensure_async_traceable(target) @@ -736,7 +744,7 @@ async def predict_all(): self.experiment_name, self._metadata, self.client, - include_attachments=self._include_attachments, + include_attachments, ) async for result in aitertools.aiter_with_concurrency( diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index f78523718..2232feeff 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1402,7 +1402,10 @@ def with_predictions( """Lazily apply the target function to the experiment.""" context = copy_context() _experiment_results = context.run( - self._predict, target, max_concurrency=max_concurrency + self._predict, + target, + max_concurrency=max_concurrency, + include_attachments=_include_attachments(target), ) r1, r2 = itertools.tee(_experiment_results, 2) return _ExperimentManager( @@ -1501,6 +1504,7 @@ def _predict( target: TARGET_T, /, max_concurrency: Optional[int] = None, + include_attachments: bool = False, ) -> Generator[_ForwardResults, None, None]: """Run the target function on the examples.""" fn = _ensure_traceable(target) @@ -1514,7 +1518,7 @@ def _predict( self._metadata, self.client, self._upload_results, - self._include_attachments, + include_attachments, ) else: @@ -1528,7 +1532,7 @@ def _predict( self._metadata, self.client, self._upload_results, - self._include_attachments, + include_attachments, ) for example in self.examples ] @@ -1923,20 +1927,16 @@ def _evaluators_include_attachments( ) -> bool: if evaluators is None: return False - return any( - any( - p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) - and p.name == "attachments" - for p in ( - inspect.signature( - e.__call__ if hasattr(e, "__call__") else e - ).parameters.values() - if callable(e) or hasattr(e, "__call__") - else [] - ) - ) - for e in evaluators - ) + + def evaluator_has_attachments(evaluator: Union[EVALUATOR_T, AEVALUATOR_T]) -> bool: + sig = inspect.signature(evaluator) + params = list(sig.parameters.values()) + positional_params = [ + p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + ] + return any(p.name == "attachments" for p in positional_params) + + return any(evaluator_has_attachments(e) for e in evaluators) def _include_attachments( diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 41a206f1c..8ec45be52 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -59,6 +59,7 @@ def wait_for( def langchain_client() -> Client: get_env_var.cache_clear() return Client( + api_key="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", info={ "instance_flags": { "dataset_examples_multipart_enabled": True, @@ -1321,7 +1322,6 @@ def test_evaluate_with_attachments_not_in_target(langchain_client: Client) -> No ) example = ExampleUploadWithAttachments( - dataset_id=dataset.id, inputs={"question": "What is shown in the image?"}, outputs={"answer": "test image"}, attachments={ @@ -1329,10 +1329,9 @@ def test_evaluate_with_attachments_not_in_target(langchain_client: Client) -> No }, ) - langchain_client.upload_examples_multipart(uploads=[example]) + langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) def target(inputs: Dict[str, Any]) -> Dict[str, Any]: - # Verify we receive the attachment data return {"answer": "test image"} def evaluator( @@ -1619,9 +1618,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) -> "image2": ("image/png", b"fake image data 2"), }, ) - langchain_client.upload_examples_multipart( - dataset_id=dataset.id, uploads=[example] - ) + langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example]) # Update example with attachment operations to rename and retain attachments attachments_operations = AttachmentsOperations( @@ -1645,8 +1642,7 @@ def test_update_example_with_attachments_operations(langchain_client: Client) -> assert "image2" in retrieved_example.attachments assert "image1" not in retrieved_example.attachments assert ( - retrieved_example.attachments["image2"]["reader"].read() - == b"fake image data 2" + retrieved_example.attachments["image2"]["reader"].read() == b"fake image data 2" ) assert ( retrieved_example.attachments["renamed_image"]["reader"].read() @@ -1771,8 +1767,6 @@ def test_update_examples_multipart(langchain_client: Client) -> None: ) assert created_examples["count"] == 2 - examples = list(langchain_client.list_examples(dataset_id=dataset.id)) - # Now create update operations update_1 = ExampleUpdateWithAttachments( id=example_ids[0], @@ -1815,12 +1809,10 @@ def test_update_examples_multipart(langchain_client: Client) -> None: assert "new_file1" in example_1_updated.attachments assert "file2" not in example_1_updated.attachments assert ( - example_1_updated.attachments["new_file1"]["reader"].read() - == b"new content 1" + example_1_updated.attachments["new_file1"]["reader"].read() == b"new content 1" ) assert ( - example_1_updated.attachments["file1"]["reader"].read() - == b"original content 1" + example_1_updated.attachments["file1"]["reader"].read() == b"original content 1" ) # Verify second example updates @@ -1833,12 +1825,10 @@ def test_update_examples_multipart(langchain_client: Client) -> None: assert "new_file2" in example_2_updated.attachments assert "file4" not in example_2_updated.attachments assert ( - example_2_updated.attachments["file3"]["reader"].read() - == b"original content 3" + example_2_updated.attachments["file3"]["reader"].read() == b"original content 3" ) assert ( - example_2_updated.attachments["new_file2"]["reader"].read() - == b"new content 2" + example_2_updated.attachments["new_file2"]["reader"].read() == b"new content 2" ) # Test updating non-existent example doesn't do anything From 34e8bb92c0ccbe9dec3ffb55dfb7a732952a7b9f Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 14:04:24 -0800 Subject: [PATCH 86/88] fmt --- python/langsmith/evaluation/_runner.py | 2 +- python/tests/integration_tests/test_client.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 2232feeff..55f8865cb 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1928,7 +1928,7 @@ def _evaluators_include_attachments( if evaluators is None: return False - def evaluator_has_attachments(evaluator: Union[EVALUATOR_T, AEVALUATOR_T]) -> bool: + def evaluator_has_attachments(evaluator: Any) -> bool: sig = inspect.signature(evaluator) params = list(sig.parameters.values()) positional_params = [ diff --git a/python/tests/integration_tests/test_client.py b/python/tests/integration_tests/test_client.py index 8adf47182..f5f7ba878 100644 --- a/python/tests/integration_tests/test_client.py +++ b/python/tests/integration_tests/test_client.py @@ -59,7 +59,6 @@ def wait_for( def langchain_client() -> Client: get_env_var.cache_clear() return Client( - api_key="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", info={ "instance_flags": { "dataset_examples_multipart_enabled": True, From e043a7d4ec837445561a08f1eb938e5232863c08 Mon Sep 17 00:00:00 2001 From: isaac hershenson Date: Tue, 10 Dec 2024 14:15:32 -0800 Subject: [PATCH 87/88] fmt --- python/langsmith/evaluation/_runner.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index 55f8865cb..aea7a86cf 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1929,12 +1929,17 @@ def _evaluators_include_attachments( return False def evaluator_has_attachments(evaluator: Any) -> bool: - sig = inspect.signature(evaluator) - params = list(sig.parameters.values()) - positional_params = [ - p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) - ] - return any(p.name == "attachments" for p in positional_params) + try: + sig = inspect.signature(evaluator) + params = list(sig.parameters.values()) + positional_params = [ + p + for p in params + if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + ] + return any(p.name == "attachments" for p in positional_params) + except Exception: + return False return any(evaluator_has_attachments(e) for e in evaluators) From d77bd0e4855e0476350780889f74db2ff9e20eac Mon Sep 17 00:00:00 2001 From: Ankush Gola Date: Tue, 10 Dec 2024 14:37:45 -0800 Subject: [PATCH 88/88] remove blanket try/except --- python/langsmith/evaluation/_runner.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py index aea7a86cf..ea206b098 100644 --- a/python/langsmith/evaluation/_runner.py +++ b/python/langsmith/evaluation/_runner.py @@ -1929,17 +1929,14 @@ def _evaluators_include_attachments( return False def evaluator_has_attachments(evaluator: Any) -> bool: - try: - sig = inspect.signature(evaluator) - params = list(sig.parameters.values()) - positional_params = [ - p - for p in params - if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) - ] - return any(p.name == "attachments" for p in positional_params) - except Exception: + if not callable(evaluator): return False + sig = inspect.signature(evaluator) + params = list(sig.parameters.values()) + positional_params = [ + p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD) + ] + return any(p.name == "attachments" for p in positional_params) return any(evaluator_has_attachments(e) for e in evaluators)