-
Notifications
You must be signed in to change notification settings - Fork 504
feat(model-specific-tools): Add model-specific tool selection from TOML config #932
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
gspeter-max
wants to merge
4
commits into
PrimeIntellect-ai:main
Choose a base branch
from
gspeter-max:contribution/issue_#580
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
88ad41c
feat(model-specific-tools): Add model-specific tool selection from TO…
peter-luminova 136627b
fix(model-specific-tools): Fix critical timing and config issues
peter-luminova 8ac4f5c
fix(cursor-bot-issues): Fix all 5 remaining issues from Cursor bot re…
peter-luminova f1d9215
fix(cursor-bot-final): Fix last 2 Cursor bot issues
peter-luminova File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,183 @@ | ||
| """ | ||
| Integration tests for env_utils tool resolution functionality | ||
|
|
||
| Tests cover: | ||
| - Loading environment with string tools (resolved via registry) | ||
| - Loading environment with callable tools (passed through) | ||
| - Loading environment with no tools (backward compatibility) | ||
| - Error handling for mixed tool types | ||
| """ | ||
|
|
||
| import pytest | ||
|
|
||
| from verifiers.utils.env_utils import load_environment | ||
| from verifiers.utils.tool_registry import register_tool | ||
|
|
||
|
|
||
| @pytest.fixture(autouse=True) | ||
| def clear_registry(): | ||
| """Clear the registry before and after each test.""" | ||
| from verifiers.utils import tool_registry | ||
|
|
||
| # Clear before test | ||
| tool_registry._tool_registry.clear() | ||
| yield | ||
| # Clear after test | ||
| tool_registry._tool_registry.clear() | ||
|
|
||
|
|
||
| def test_load_environment_with_string_tools(clear_registry): | ||
| """Test loading environment with string tool names (registry resolution).""" | ||
|
|
||
| # Register test tools | ||
| @register_tool("tool-test", "test_tool_a") | ||
| async def test_tool_a(x: int) -> int: | ||
| return x + 1 | ||
|
|
||
| @register_tool("tool-test", "test_tool_b") | ||
| async def test_tool_b(x: str) -> str: | ||
| return x + "suffix" | ||
|
|
||
| # Load environment with string tools | ||
| env = load_environment("tool-test", tools=["test_tool_a", "test_tool_b"]) | ||
|
|
||
| # Verify tools were resolved and attached | ||
| assert hasattr(env, "tools") | ||
| assert len(env.tools) == 2 | ||
| assert test_tool_a in env.tools | ||
| assert test_tool_b in env.tools | ||
|
|
||
|
|
||
| def test_load_environment_with_callable_tools(clear_registry): | ||
| """Test loading environment with callable tools (direct pass-through).""" | ||
|
|
||
| # Define test tools | ||
| async def direct_tool_a(x: int) -> int: | ||
| return x + 1 | ||
|
|
||
| async def direct_tool_b(x: str) -> str: | ||
| return x + "suffix" | ||
|
|
||
| # Load environment with callable tools | ||
| env = load_environment( | ||
| "tool-test", tools=[direct_tool_a, direct_tool_b] | ||
| ) | ||
|
|
||
| # Verify tools were passed through | ||
| assert hasattr(env, "tools") | ||
| assert len(env.tools) == 2 | ||
| assert direct_tool_a in env.tools | ||
| assert direct_tool_b in env.tools | ||
|
|
||
|
|
||
| def test_load_environment_no_tools(clear_registry): | ||
| """Test loading environment without tools parameter (backward compatibility).""" | ||
|
|
||
| # Load environment without tools parameter | ||
| env = load_environment("tool-test") | ||
|
|
||
| # Verify environment loaded with default tools | ||
| assert hasattr(env, "tools") | ||
| # tool-test environment has 4 default tools | ||
| assert len(env.tools) == 4 | ||
|
|
||
|
|
||
| def test_load_environment_empty_tool_list(clear_registry): | ||
| """Test loading environment with empty tool list.""" | ||
|
|
||
| # Load environment with empty tools list | ||
| env = load_environment("tool-test", tools=[]) | ||
|
|
||
| # Verify environment has no tools | ||
| assert hasattr(env, "tools") | ||
| assert len(env.tools) == 0 | ||
|
|
||
|
|
||
| def test_mixed_tool_types_error(clear_registry): | ||
| """Test that mixing Callable and str tools raises TypeError.""" | ||
|
|
||
| # Define a callable tool | ||
| async def my_tool(x: int) -> int: | ||
| return x + 1 | ||
|
|
||
| # Register a string tool | ||
| @register_tool("tool-test", "registered_tool") | ||
| async def registered_tool() -> str: | ||
| return "registered" | ||
|
|
||
| # Attempt to load with mixed types - should raise TypeError | ||
| with pytest.raises(TypeError, match="tools must be all Callable or all str"): | ||
| load_environment("tool-test", tools=[my_tool, "registered_tool"]) | ||
|
|
||
| with pytest.raises(TypeError, match="tools must be all Callable or all str"): | ||
| load_environment("tool-test", tools=["registered_tool", my_tool]) | ||
|
|
||
|
|
||
| def test_invalid_tool_name_in_registry(clear_registry): | ||
| """Test that unregistered tool name raises KeyError from registry.""" | ||
|
|
||
| # Register one tool so environment exists in registry | ||
| @register_tool("tool-test", "valid_tool") | ||
| async def valid_tool(x: int) -> int: | ||
| return x + 1 | ||
|
|
||
| # Try to load with a different, unregistered tool name | ||
| with pytest.raises(KeyError, match=r"Tools \['nonexistent_tool'\] not found"): | ||
| load_environment("tool-test", tools=["valid_tool", "nonexistent_tool"]) | ||
|
|
||
|
|
||
| def test_invalid_tool_type_error(clear_registry): | ||
| """Test that invalid tool type raises TypeError.""" | ||
|
|
||
| # Load with invalid tool type (int, not Callable or str) | ||
| with pytest.raises(TypeError, match="tools must be list of Callable or list of str"): | ||
| load_environment("tool-test", tools=[123, 456]) | ||
|
|
||
|
|
||
| def test_environment_with_other_args(clear_registry): | ||
| """Test that tools parameter works alongside other environment arguments.""" | ||
|
|
||
| # Register a tool | ||
| @register_tool("tool-test", "custom_tool") | ||
| async def custom_tool() -> str: | ||
| return "custom" | ||
|
|
||
| # Load environment with tools and other args | ||
| env = load_environment( | ||
| "tool-test", | ||
| tools=["custom_tool"], | ||
| num_train_examples=50, | ||
| num_eval_examples=10, | ||
| ) | ||
|
|
||
| # Verify both tools and other args were applied | ||
| assert hasattr(env, "tools") | ||
| assert len(env.tools) >= 1 | ||
| assert custom_tool in env.tools | ||
| # num_train_examples should affect dataset size | ||
| # (actual value depends on tool-test env implementation) | ||
|
|
||
|
|
||
| def test_single_string_tool(clear_registry): | ||
| """Test loading environment with single string tool.""" | ||
|
|
||
| @register_tool("tool-test", "single_tool") | ||
| async def single_tool(x: int) -> int: | ||
| return x * 2 | ||
|
|
||
| env = load_environment("tool-test", tools=["single_tool"]) | ||
|
|
||
| assert hasattr(env, "tools") | ||
| assert single_tool in env.tools | ||
|
|
||
|
|
||
| def test_single_callable_tool(clear_registry): | ||
| """Test loading environment with single callable tool.""" | ||
|
|
||
| async def my_tool() -> str: | ||
| return "result" | ||
|
|
||
| env = load_environment("tool-test", tools=[my_tool]) | ||
|
|
||
| assert hasattr(env, "tools") | ||
| assert my_tool in env.tools |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.