Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python[minor]: pytest integration #1362

Merged
merged 65 commits into from
Jan 21, 2025
Merged
Changes from 1 commit
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
ed71173
rfc: manually set test case inputs/outputs
baskaryan Dec 30, 2024
80cbf66
fmt
baskaryan Dec 31, 2024
b33f75a
fmt
baskaryan Jan 2, 2025
37f43cb
fmt
baskaryan Jan 2, 2025
cf82fd6
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 2, 2025
80d4205
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 3, 2025
fa8882f
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 3, 2025
6b999b4
fmt
baskaryan Jan 3, 2025
cbbf3a3
fmt
baskaryan Jan 3, 2025
a61d7d0
fmt
baskaryan Jan 3, 2025
cf37a91
fmt
baskaryan Jan 3, 2025
c9addf0
fmt
baskaryan Jan 3, 2025
d814ec5
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 6, 2025
8700d4b
fmt
baskaryan Jan 6, 2025
376a645
fmt
baskaryan Jan 6, 2025
81e41f4
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 8, 2025
d5d4ebb
rc release
baskaryan Jan 8, 2025
6940ace
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 8, 2025
d3ed9c6
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 8, 2025
bd72391
add better error messaging
isahers1 Jan 8, 2025
542d76e
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 8, 2025
aaf41c9
fmt
baskaryan Jan 8, 2025
d7beea8
wait example updates
baskaryan Jan 8, 2025
c4de666
Merge branch 'bagatur/rfc_set_test_vals' of github.com:langchain-ai/l…
baskaryan Jan 8, 2025
44897c7
fmt
baskaryan Jan 9, 2025
2192c09
fmt
baskaryan Jan 9, 2025
7b433a2
fmt
baskaryan Jan 9, 2025
48670e3
rc2
baskaryan Jan 9, 2025
78eeabf
fmt
baskaryan Jan 9, 2025
e1c9d7c
fmt
baskaryan Jan 9, 2025
400e38b
ptyest plugin
baskaryan Jan 10, 2025
9250bbf
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 10, 2025
414ef69
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 10, 2025
1446452
fmt
baskaryan Jan 11, 2025
790617d
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 11, 2025
37ceca0
fmt
baskaryan Jan 13, 2025
1fca587
rc6
baskaryan Jan 13, 2025
88e6f63
fmt
baskaryan Jan 13, 2025
43509c2
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 14, 2025
233ae72
update table
baskaryan Jan 14, 2025
996e21f
fmt
baskaryan Jan 15, 2025
d79bec9
Merge branch 'main' into bagatur/rfc_set_test_vals
baskaryan Jan 15, 2025
99069fb
group by test suite
baskaryan Jan 15, 2025
6076c29
fmt
baskaryan Jan 15, 2025
d4d1695
fmt
baskaryan Jan 15, 2025
441a8b1
fmt
baskaryan Jan 15, 2025
05dbf16
fmt
baskaryan Jan 15, 2025
82cd582
rc7
baskaryan Jan 15, 2025
e56a68f
fmt
baskaryan Jan 15, 2025
d049218
update api ref
baskaryan Jan 16, 2025
e400c93
fix LANGSMITH_TEST_TRACKING=false
baskaryan Jan 16, 2025
5752078
fmt
baskaryan Jan 16, 2025
8f336f4
fix entrypoint
baskaryan Jan 16, 2025
a5ad0ef
rm script
baskaryan Jan 17, 2025
6aa0062
rename plugin
baskaryan Jan 17, 2025
65dbe26
rename output plugin
baskaryan Jan 17, 2025
3773cb8
rc12
baskaryan Jan 17, 2025
48ff3c9
fix val logging
baskaryan Jan 18, 2025
e7675c1
rc13
baskaryan Jan 19, 2025
2bc8ad5
rc14
baskaryan Jan 20, 2025
66c7799
rc15
baskaryan Jan 20, 2025
ce08bfa
cr
baskaryan Jan 20, 2025
b2f6f87
merge
baskaryan Jan 20, 2025
4084685
nit
baskaryan Jan 20, 2025
341798c
merge
baskaryan Jan 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
group by test suite
  • Loading branch information
baskaryan committed Jan 15, 2025
commit 99069fbc50ce74e44891249b245eca7c0f5380d4
48 changes: 35 additions & 13 deletions python/langsmith/pytest_plugin.py
Original file line number Diff line number Diff line change
@@ -70,13 +70,20 @@ def __init__(self):
from rich.console import Console # type: ignore[import-not-found]
from rich.live import Live # type: ignore[import-not-found]

self.test_suites: dict[str, list[str]] = defaultdict(list)
self.test_suite_urls: dict[str, str] = {}

self.process_status = {} # Track process status
self.status_lock = Lock() # Thread-safe updates
self.console = Console()

self.live = Live(self.generate_table(), refresh_per_second=4)
self.live = Live(self.generate_tables(), refresh_per_second=4)
self.live.start()

def add_process_to_test_suite(self, test_suite, process_id):
"""Group a test case with its test suite."""
self.test_suites[test_suite].append(process_id)

def update_process_status(self, process_id, status):
"""Update test results."""
with self.status_lock:
@@ -102,17 +109,34 @@ def update_process_status(self, process_id, status):
**status.pop("outputs"),
}
self.process_status[process_id] = {**current_status, **status}
self.live.update(self.generate_table())
self.live.update(self.generate_tables())

def pytest_runtest_logstart(self, nodeid):
"""Initialize live display when first test starts."""
self.update_process_status(nodeid, {"status": "running"})

def generate_table(self):
def generate_tables(self):
"""Generate a collection of tables—one per suite.

Returns a 'Group' object so it can be rendered simultaneously by Rich Live.
"""
from rich.console import Group

tables = []
for suite_name in self.test_suites:
table = self._generate_table(suite_name)
tables.append(table)
return Group(*tables)

def _generate_table(self, suite_name: str):
"""Generate results table."""
from rich.table import Table # type: ignore[import-not-found]

table = Table()
process_ids = self.test_suites[suite_name]

table = Table(
title=f"[link={self.test_suite_urls[suite_name]}]{suite_name}[/link]"
)
table.add_column("Test")
table.add_column("Inputs")
table.add_column("Ref outputs")
@@ -128,7 +152,9 @@ def generate_table(self):
now = time.time()
durations = []
numeric_feedbacks = defaultdict(list)
for pid, status in self.process_status.items():
# Gather data only for this suite
suite_statuses = {pid: self.process_status[pid] for pid in process_ids}
for pid, status in suite_statuses.items():
duration = status.get("end_time", now) - status.get("start_time", now)
durations.append(duration)
feedback = "\n".join(
@@ -141,12 +167,8 @@ def generate_table(self):
max_status = max(len(status.get("status", "queued")), max_status)
max_feedback = max(len(feedback), max_feedback)

passed_count = sum(
s.get("status") == "passed" for s in self.process_status.values()
)
failed_count = sum(
s.get("status") == "failed" for s in self.process_status.values()
)
passed_count = sum(s.get("status") == "passed" for s in suite_statuses.values())
failed_count = sum(s.get("status") == "failed" for s in suite_statuses.values())

# You could arrange a row to show the aggregated data—here, in the last column:
if passed_count + failed_count:
@@ -172,7 +194,7 @@ def generate_table(self):
self.console.width - (max_status + max_feedback + max_duration)
) // 4

for pid, status in self.process_status.items():
for pid, status in suite_statuses.items():
status_color = {
"running": "yellow",
"passed": "green",
@@ -201,7 +223,7 @@ def generate_table(self):
table.add_row("", "", "", "", "", "", "")
# Finally, our “footer” row:
table.add_row(
"[bold]Results[/bold]",
"[bold]Summary[/bold]",
"",
"",
"",
11 changes: 11 additions & 0 deletions python/langsmith/testing/_internal.py
Original file line number Diff line number Diff line change
@@ -645,6 +645,11 @@ def __init__(
self.pytest_plugin = pytest_plugin
self.pytest_nodeid = pytest_nodeid

if pytest_plugin and pytest_nodeid:
pytest_plugin.add_process_to_test_suite(
test_suite._dataset.name, pytest_nodeid
)

def sync_example(
self, *, inputs: Optional[dict] = None, outputs: Optional[dict] = None
) -> None:
@@ -752,6 +757,12 @@ def _create_test_case(
else None
)
pytest_nodeid = pytest_request.node.nodeid if pytest_request else None
if pytest_plugin:
pytest_plugin.test_suite_urls[test_suite._dataset.name] = (
test_suite._dataset.url
+ "/compare?selectedSessions="
+ str(test_suite.experiment_id)
)
return _TestCase(
test_suite,
example_id,
Loading