-
Notifications
You must be signed in to change notification settings - Fork 505
feat(retry): connect retry configuration to maybe_retry calls #936
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
gspeter-max
wants to merge
3
commits into
PrimeIntellect-ai:main
Choose a base branch
from
gspeter-max:verifiersContirbuting/issue_#577
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
a84e015
feat(retry): add configurable retry mechanism with exponential backof…
peter-luminova 0df8778
test(retry): fix test_multiple_error_types_in_retry to match maybe_re…
peter-luminova 4177c39
feat(retry): connect retry configuration to maybe_retry calls
peter-luminova File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,161 @@ | ||
| """Tests for rate limit error handling and retry mechanism.""" | ||
|
|
||
| import httpx | ||
| import pytest | ||
| from openai import RateLimitError as OpenAIRateLimitError | ||
|
|
||
| from verifiers.errors import RateLimitError as VFRateLimitError | ||
| from verifiers.types import EvalConfig | ||
| from verifiers.utils.async_utils import maybe_retry | ||
|
|
||
|
|
||
| def _make_rate_limit_error() -> OpenAIRateLimitError: | ||
| response = httpx.Response( | ||
| status_code=429, | ||
| request=httpx.Request("POST", "https://api.openai.com/v1/chat/completions"), | ||
| json={"error": {"message": "Too many requests", "type": "rate_limit_error"}}, | ||
| ) | ||
| return OpenAIRateLimitError("Rate limit exceeded", response=response, body=None) | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_rate_limit_error_retries_with_config(): | ||
| """Test that RateLimitError triggers retry when configured.""" | ||
| call_count = 0 | ||
|
|
||
| async def failing_func(): | ||
| nonlocal call_count | ||
| call_count += 1 | ||
| if call_count < 3: | ||
| return {"error": VFRateLimitError("Rate limited")} | ||
| return {"result": "success"} | ||
|
|
||
| wrapped = maybe_retry(failing_func, max_retries=3, initial=0.01) | ||
| result = await wrapped() | ||
|
|
||
| assert call_count == 3 | ||
| assert result["result"] == "success" | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_rate_limit_error_exhaustion_returns_error(): | ||
| """Test that exhausted retries return error in state.""" | ||
| async def always_failing_func(): | ||
| return {"error": VFRateLimitError("Always rate limited")} | ||
|
|
||
| wrapped = maybe_retry(always_failing_func, max_retries=2, initial=0.01) | ||
| result = await wrapped() | ||
|
|
||
| assert "error" in result | ||
| assert isinstance(result["error"], VFRateLimitError) | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_no_retry_when_max_retries_zero(): | ||
| """Test that max_retries=0 disables retry.""" | ||
| call_count = 0 | ||
|
|
||
| async def failing_func(): | ||
| nonlocal call_count | ||
| call_count += 1 | ||
| return {"error": VFRateLimitError("Rate limited")} | ||
|
|
||
| wrapped = maybe_retry(failing_func, max_retries=0) | ||
| result = await wrapped() | ||
|
|
||
| assert call_count == 1 # Only called once, no retry | ||
| assert "error" in result | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_jitter_configuration(): | ||
| """Test that jitter can be disabled.""" | ||
| async def failing_func(): | ||
| return {"error": VFRateLimitError("Rate limited")} | ||
|
|
||
| # Should not raise with jitter enabled (default) | ||
| wrapped_with_jitter = maybe_retry(failing_func, max_retries=1, initial=0.01, jitter=True) | ||
| result = await wrapped_with_jitter() | ||
| assert "error" in result | ||
|
|
||
| # Should not raise with jitter disabled | ||
| wrapped_no_jitter = maybe_retry(failing_func, max_retries=1, initial=0.01, jitter=False) | ||
| result = await wrapped_no_jitter() | ||
| assert "error" in result | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_multiple_error_types_in_retry(): | ||
| """Test that multiple error types can be retried.""" | ||
| from verifiers.errors import InfraError | ||
|
|
||
| call_count = 0 | ||
|
|
||
| async def multi_error_func(): | ||
| nonlocal call_count | ||
| call_count += 1 | ||
| if call_count == 1: | ||
| return {"error": VFRateLimitError("Rate limited")} | ||
| elif call_count == 2: | ||
| return {"error": InfraError("Infra error")} | ||
| else: | ||
| return {"result": "success"} | ||
|
|
||
| wrapped = maybe_retry( | ||
| multi_error_func, | ||
| max_retries=3, | ||
| initial=0.01, | ||
| error_types=(VFRateLimitError, InfraError) | ||
| ) | ||
| result = await wrapped() | ||
|
|
||
| assert result["result"] == "success" | ||
| assert call_count == 3 | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_retry_configuration_values_are_used(): | ||
| """Test that EvalConfig accepts and stores retry timing parameters.""" | ||
| from verifiers.types import ClientConfig | ||
|
|
||
| config = EvalConfig( | ||
| env_id="test_env", | ||
| env_args={}, | ||
| env_dir_path="/tmp/test", | ||
| model="gpt-4", | ||
| client_config=ClientConfig(api_key_var="TEST_KEY"), | ||
| sampling_args={}, | ||
| num_examples=1, | ||
| rollouts_per_example=1, | ||
| max_concurrent=1, | ||
| retry_base_delay=2.0, | ||
| retry_max_backoff=30.0, | ||
| retry_jitter=False, | ||
| ) | ||
|
|
||
| assert config.retry_base_delay == 2.0 | ||
| assert config.retry_max_backoff == 30.0 | ||
| assert config.retry_jitter is False | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_retry_configuration_defaults(): | ||
| """Test that EvalConfig has correct default values for retry timing.""" | ||
| from verifiers.types import ClientConfig | ||
|
|
||
| config = EvalConfig( | ||
| env_id="test_env", | ||
| env_args={}, | ||
| env_dir_path="/tmp/test", | ||
| model="gpt-4", | ||
| client_config=ClientConfig(api_key_var="TEST_KEY"), | ||
| sampling_args={}, | ||
| num_examples=1, | ||
| rollouts_per_example=1, | ||
| max_concurrent=1, | ||
| ) | ||
|
|
||
| # Verify defaults match maybe_retry defaults | ||
| assert config.retry_base_delay == 1.0 | ||
| assert config.retry_max_backoff == 60.0 | ||
| assert config.retry_jitter is True |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Anthropic rate limit check is unreachable dead code
High Severity
The Anthropic SDK raises
anthropic.RateLimitError(notBadRequestError) for HTTP 429 responses. The new check fore.response.status_code == 429insideexcept BadRequestErroris unreachable — aBadRequestErrorwill never have a 429 status code. Anthropic rate limit errors are not imported or caught, so they fall through to the genericexcept Exceptioninclient.pyand become aModelError, bypassing the retry mechanism entirely. The OpenAI client correctly imports and catches a separateRateLimitErrorexception.