diff --git a/python/README.md b/python/README.md index cbf999404..3076e168f 100644 --- a/python/README.md +++ b/python/README.md @@ -345,7 +345,7 @@ def my_function(text: str): my_function("hello world") ``` -# Instructor +## Instructor We provide a convenient integration with [Instructor](https://jxnl.github.io/instructor/), largely by virtue of it essentially just using the OpenAI SDK. @@ -417,6 +417,14 @@ def my_function(text: str) -> UserDetail: my_function("Jason is 25 years old") ``` +## Pytest Plugin + +The LangSmith pytest plugin lets Python developers define their datasets and evaluations as pytest test cases. +See [online docs](https://docs.smith.langchain.com/evaluation/how_to_guides/pytest) for more information. + +This plugin is installed as part of the LangSmith SDK, and is enabled by default. +See also official pytest docs: [How to install and use plugins](https://docs.pytest.org/en/stable/how-to/plugins.html) + ## Additional Documentation To learn more about the LangSmith platform, check out the [docs](https://docs.smith.langchain.com/docs/). diff --git a/python/langsmith/pytest_plugin.py b/python/langsmith/pytest_plugin.py index fdd339a05..9687c7357 100644 --- a/python/langsmith/pytest_plugin.py +++ b/python/langsmith/pytest_plugin.py @@ -2,6 +2,7 @@ import importlib.util import json +import logging import os import time from collections import defaultdict @@ -12,26 +13,33 @@ from langsmith import utils as ls_utils from langsmith.testing._internal import test as ls_test +logger = logging.getLogger(__name__) + def pytest_addoption(parser): - """Set CLI options for choosing output format.""" - group = parser.getgroup("langsmith", "LangSmith") - group.addoption( - "--output", - action="store", - default="pytest", - choices=["langsmith", "ls", "pytest"], - help=( - "Choose output format: 'langsmith' | 'ls' " - "(rich custom LangSmith output) or 'pytest' " - "(standard pytest). Defaults to 'pytest'." - ), - ) + """Set a boolean flag for LangSmith output. + + Skip if --langsmith-output is already defined. + """ + try: + # Try to add the option, will raise if it already exists + group = parser.getgroup("langsmith", "LangSmith") + group.addoption( + "--langsmith-output", + action="store_true", + default=False, + help="Use LangSmith output (requires 'rich').", + ) + except ValueError: + # Option already exists + logger.warning( + "LangSmith output flag cannot be added because it's already defined." + ) def _handle_output_args(args): """Handle output arguments.""" - if any(opt in args for opt in ["--output=langsmith", "--output=ls"]): + if any(opt in args for opt in ["--langsmith-output"]): # Only add --quiet if it's not already there if not any(a in args for a in ["-q", "--quiet"]): args.insert(0, "--quiet") @@ -82,7 +90,7 @@ def pytest_report_teststatus(report, config): """Remove the short test-status character outputs ("./F").""" # The hook normally returns a 3-tuple: (short_letter, verbose_word, color) # By returning empty strings, the progress characters won't show. - if config.getoption("--output") in ("langsmith", "ls"): + if config.getoption("--langsmith-output"): return "", "", "" @@ -301,23 +309,24 @@ def pytest_configure(config): config.addinivalue_line( "markers", "langsmith: mark test to be tracked in LangSmith" ) - if config.getoption("--output") in ("langsmith", "ls"): + if config.getoption("--langsmith-output"): if not importlib.util.find_spec("rich"): msg = ( - "Must have 'rich' installed to use --output='langsmith' | 'ls'. " + "Must have 'rich' installed to use --langsmith-output. " "Please install with: `pip install -U 'langsmith[pytest]'`" ) raise ValueError(msg) if os.environ.get("PYTEST_XDIST_TESTRUNUID"): msg = ( - "--output='langsmith' | 'ls' not supported with pytest-xdist. " - "Please remove the '--output' option or '-n' option." + "--langsmith-output not supported with pytest-xdist. " + "Please remove the '--langsmith-output' option or '-n' option." ) raise ValueError(msg) if ls_utils.test_tracking_is_disabled(): msg = ( - "--output='langsmith' | 'ls' not supported when env var" - "LANGSMITH_TEST_TRACKING='false'. Please remove the '--output' option " + "--langsmith-output not supported when env var" + "LANGSMITH_TEST_TRACKING='false'. Please remove the" + "'--langsmith-output' option " "or enable test tracking." ) raise ValueError(msg) diff --git a/python/tests/evaluation/test_decorator.py b/python/tests/evaluation/test_decorator.py new file mode 100644 index 000000000..a43320ba6 --- /dev/null +++ b/python/tests/evaluation/test_decorator.py @@ -0,0 +1,83 @@ +import os + +import pytest + +from langsmith import testing as t + + +@pytest.mark.skipif( + not os.getenv("LANGSMITH_TRACING"), + reason="LANGSMITH_TRACING environment variable not set", +) +@pytest.mark.langsmith +@pytest.mark.parametrize("c", list(range(10))) +async def test_addition_single(c): + x = 3 + y = 4 + t.log_inputs({"x": x, "y": y, "c": c}) + + expected = 7 + c + t.log_reference_outputs({"sum": expected}) + + actual = x + y + c + t.log_outputs({"sum": actual}) + + t.log_feedback(key="foo", score=1) + + assert actual == expected + + +async def my_app(): + return "hello" + + +@pytest.mark.skipif( + not os.getenv("LANGSMITH_TRACING"), + reason="LANGSMITH_TRACING environment variable not set", +) +@pytest.mark.langsmith +async def test_openai_says_hello(): + # Traced code will be included in the test case + text = "Say hello!" + response = await my_app() + t.log_inputs({"text": text}) + t.log_outputs({"response": response}) + t.log_reference_outputs({"response": "hello!"}) + + # Use this context manager to trace any steps used for generating evaluation + # feedback separately from the main application logic + with t.trace_feedback(): + grade = 1 if "hello" in response else 0 + t.log_feedback(key="llm_judge", score=grade) + + assert "hello" in response.lower() + + +@pytest.mark.skipif( + not os.getenv("LANGSMITH_TRACING"), + reason="LANGSMITH_TRACING environment variable not set", +) +@pytest.mark.xfail(reason="Test failure output case") +@pytest.mark.langsmith(output_keys=["expected"]) +@pytest.mark.parametrize( + "a, b, expected", + [ + (1, 2, 3), + (3, 4, 7), + ], +) +async def test_addition_parametrized(a: int, b: int, expected: int): + t.log_outputs({"sum": a + b}) + assert a + b != expected + + +@pytest.mark.skipif( + not os.getenv("LANGSMITH_TRACING"), + reason="LANGSMITH_TRACING environment variable not set", +) +@pytest.mark.langsmith +@pytest.mark.parametrize("a,b", [[i, i] for i in range(20)]) +def test_param(a, b): + t.log_outputs({"sum": a + b}) + t.log_reference_outputs({"sum": a + b}) + assert a + b == a + b