diff --git a/docs/configuration.md b/docs/configuration.md index 2886e32..5cdff26 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -115,15 +115,19 @@ is longer: the computed backoff or the server hint). from pollux import Options options = Options( + system_instruction="You are a concise analyst.", # Optional global behavior guide response_schema=MyPydanticModel, # Structured output extraction reasoning_effort="medium", # Reserved for future provider support - delivery_mode="realtime", # "deferred" reserved for v1.1+ + delivery_mode="realtime", # "deferred" reserved for future provider batch APIs ) ``` See [Sources and Patterns](sources-and-patterns.md#structured-output) for a complete structured output example. +Conversation options are provider-dependent in v1.1: OpenAI supports +`history`/`continue_from`; Gemini remains unsupported. + ## Safety Notes - `Config` is immutable (`frozen=True`). Create a new instance to change values. diff --git a/docs/overrides/home.html b/docs/overrides/home.html index f8f0880..077221d 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -20,6 +20,7 @@ .md-footer { margin-left: 0 !important; margin-right: 0 !important; + max-width: none !important; padding-inline: 0; } diff --git a/docs/reference/provider-capabilities.md b/docs/reference/provider-capabilities.md index b71e7c0..cfe31e7 100644 --- a/docs/reference/provider-capabilities.md +++ b/docs/reference/provider-capabilities.md @@ -1,16 +1,16 @@ # Provider Capabilities -This page defines the v1.0 capability contract by provider. +This page defines the v1.1 capability contract by provider. Pollux is **capability-transparent**, not capability-equalizing: providers are allowed to differ, and those differences are surfaced clearly. -## v1.0 Policy +## v1.1 Policy - Provider feature parity is **not** required for release. - Unsupported features must fail fast with clear errors. - New provider features do not require immediate cross-provider implementation. -## Capability Matrix (v1.0) +## Capability Matrix (v1.1) | Capability | Gemini | OpenAI | Notes | |---|---|---|---| @@ -19,20 +19,20 @@ Pollux is **capability-transparent**, not capability-equalizing: providers are a | Local file inputs | ✅ | ✅ | OpenAI uses Files API upload | | PDF URL inputs | ✅ (via URI part) | ✅ (native `input_file.file_url`) | | | Image URL inputs | ✅ (via URI part) | ✅ (native `input_image.image_url`) | | -| YouTube URL inputs | ✅ | ⚠️ limited | OpenAI parity layer (download/re-upload) is out of scope for v1.0 | +| YouTube URL inputs | ✅ | ⚠️ limited | OpenAI parity layer (download/re-upload) is out of scope for v1.1 | | Provider-side context caching | ✅ | ❌ | OpenAI provider returns unsupported for caching | | Structured outputs (`response_schema`) | ✅ | ✅ | JSON-schema path in both providers | | Reasoning controls (`reasoning_effort`) | ❌ | ❌ | Reserved for future provider enablement | -| Deferred delivery (`delivery_mode="deferred"`) | ❌ | ❌ | Explicitly disabled in v1.0 | -| Conversation continuity (`history`, `continue_from`) | ❌ | ❌ | Reserved/disabled in v1.0 | +| Deferred delivery (`delivery_mode="deferred"`) | ❌ | ❌ | Explicitly disabled in v1.1 | +| Conversation continuity (`history`, `continue_from`) | ❌ | ✅ | OpenAI-native continuation; single prompt per call | ## Important OpenAI Notes - Pollux uploads local files with: - `purpose="user_data"` - finite `expires_after` metadata -- Automatic file deletion is not part of v1.0 yet. -- Remote URL support in v1.0 is intentionally narrow and explicit: +- Automatic file deletion is not part of v1.1 yet. +- Remote URL support in v1.1 is intentionally narrow and explicit: - PDFs - images @@ -48,7 +48,7 @@ from pollux import Config config = Config( provider="openai", model="gpt-5-nano", - enable_caching=True, # not supported for OpenAI in v1.0 + enable_caching=True, # not supported for OpenAI in v1.1 ) # At execution time, Pollux raises: # ConfigurationError: Provider does not support caching diff --git a/docs/sources-and-patterns.md b/docs/sources-and-patterns.md index d762b92..bea9fda 100644 --- a/docs/sources-and-patterns.md +++ b/docs/sources-and-patterns.md @@ -165,10 +165,10 @@ Example of a complete envelope: } ``` -## v1.0 Notes +## v1.1 Notes -- Conversation continuity (`history`, `continue_from`) is reserved and - disabled in v1.0. -- `delivery_mode="deferred"` is reserved and disabled in v1.0. +- Conversation continuity (`history`, `continue_from`) is currently + OpenAI-only and supports one prompt per call. +- `delivery_mode="deferred"` remains reserved and disabled. - Provider feature support varies. See [Provider Capabilities](reference/provider-capabilities.md). diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index 9d83161..fcb42c2 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -11,7 +11,7 @@ --pollux-sidebar-bottom-space: 1.25rem; --pollux-nav-gutter: 0.25rem; --pollux-nav-indent: 0.45rem; - --pollux-nav-link-margin-top: 0.5em; + --pollux-nav-link-margin-top: 0.35em; --pollux-toc-link-size: 0.72rem; --pollux-toc-link-line-height: 1.45; --pollux-toc-link-padding-y: 0.18rem; @@ -122,6 +122,7 @@ body { /* ── Navigation links ────────────────────────────────────────── */ .md-nav__item--section > .md-nav__link { + font-size: 0.7rem; letter-spacing: 0.04em; text-transform: uppercase; } @@ -288,6 +289,17 @@ body { .md-nav--primary .md-nav__link { margin-top: var(--pollux-nav-link-margin-top); padding: 0; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + + .md-nav__item--section { + margin-top: 0.75em; + } + + .md-nav__item--section:first-child { + margin-top: 0; } /* Boosted-specificity active link: .md-nav--primary prefix matches @@ -341,8 +353,11 @@ body { font-size: var(--pollux-toc-link-size); line-height: var(--pollux-toc-link-line-height); margin-top: 0; + overflow: hidden; padding-block: var(--pollux-toc-link-padding-y); position: relative; + text-overflow: ellipsis; + white-space: nowrap; } .md-nav--secondary .md-nav__link--active::before { diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 4f08ef4..db80f82 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -34,8 +34,9 @@ Use this order — most failures resolve by step 2. provider. Re-run a minimal prompt after fixing any mismatch. 3. **Unsupported feature** — Compare your options against - [Provider Capabilities](reference/provider-capabilities.md). In v1.0, - `delivery_mode="deferred"`, `history`, and `continue_from` are reserved. + [Provider Capabilities](reference/provider-capabilities.md). + `delivery_mode="deferred"` is reserved; conversation continuity is + provider-dependent (OpenAI-only in v1.1). 4. **Source and payload** — Reduce to one source + one prompt and retry. For OpenAI remote URLs in v1.0, only PDF and image URLs are supported. @@ -61,12 +62,13 @@ Or pass `api_key` directly in `Config(...)`. **Fix:** verify the model belongs to the selected provider. -## Option Not Implemented in v1.0 +## Option Not Implemented Yet **Symptom:** `ConfigurationError` mentioning `delivery_mode="deferred"`, `history`, or `continue_from`. -These are intentionally reserved and disabled in v1.0. +`delivery_mode="deferred"` is intentionally reserved. +`history`/`continue_from` require a provider with conversation support. ## `status == "partial"` diff --git a/src/pollux/execute.py b/src/pollux/execute.py index efb35f2..94b98c6 100644 --- a/src/pollux/execute.py +++ b/src/pollux/execute.py @@ -5,7 +5,6 @@ import asyncio from dataclasses import dataclass, field import logging -import os from pathlib import Path import time from typing import TYPE_CHECKING, Any @@ -59,6 +58,7 @@ class ExecutionTrace: cache_name: str | None = None duration_s: float = 0.0 usage: dict[str, int] = field(default_factory=dict) + conversation_state: dict[str, Any] | None = None async def execute_plan( @@ -81,24 +81,12 @@ async def execute_plan( wants_conversation = ( options.history is not None or options.continue_from is not None ) - if wants_conversation: - enabled = os.environ.get( - "POLLUX_EXPERIMENTAL_CONVERSATION", "" - ).strip().lower() in {"1", "true", "yes", "on"} - if not enabled: - raise ConfigurationError( - "Conversation options are reserved for a future release", - hint=( - "Remove history/continue_from for now, or set " - "POLLUX_EXPERIMENTAL_CONVERSATION=1 to opt in during development." - ), - ) if options.delivery_mode == "deferred": provider_name = type(provider).__name__ raise ConfigurationError( f"delivery_mode='deferred' is not implemented yet for provider {provider_name}", - hint="Use delivery_mode='realtime' for v1.0.", + hint="Use delivery_mode='realtime' for now.", ) if options.response_schema is not None and not caps.structured_outputs: raise ConfigurationError( @@ -115,6 +103,11 @@ async def execute_plan( "Provider does not support conversation continuity", hint="Remove history/continue_from or choose a provider with conversation support.", ) + if wants_conversation and len(prompts) != 1: + raise ConfigurationError( + "Conversation continuity currently supports exactly one prompt per call", + hint="Use run() or run_many() with a single prompt when passing history/continue_from.", + ) if (not provider.supports_uploads) and any( isinstance(p, dict) @@ -130,6 +123,10 @@ async def execute_plan( schema = options.response_schema_json() history = options.history + conversation_history: list[dict[str, str]] = [] + if history is not None: + conversation_history = [dict(item) for item in history] + previous_response_id: str | None = None if options.continue_from is not None: state = options.continue_from.get("_conversation_state") @@ -142,16 +139,18 @@ async def execute_plan( ), ) + state_history = state.get("history") + if history is None and isinstance(state_history, list): + conversation_history = [ + item + for item in state_history + if isinstance(item, dict) + and isinstance(item.get("role"), str) + and isinstance(item.get("content"), str) + ] + if history is None: - state_history = state.get("history") - if isinstance(state_history, list): - history = [ - item - for item in state_history - if isinstance(item, dict) - and isinstance(item.get("role"), str) - and isinstance(item.get("content"), str) - ] + history = conversation_history prev = state.get("response_id") previous_response_id = prev if isinstance(prev, str) else None @@ -185,7 +184,7 @@ async def execute_plan( key=plan.cache_key, model=config.model, parts=shared_parts, # Use resolved parts with URIs - system_instruction=None, + system_instruction=options.system_instruction, ttl_seconds=config.ttl_seconds, retry_policy=retry_policy, ) @@ -229,7 +228,7 @@ async def _execute_call(call_idx: int) -> dict[str, Any]: return await provider.generate( model=model, parts=parts, - system_instruction=None, + system_instruction=options.system_instruction, cache_name=cache_name, response_schema=schema, reasoning_effort=options.reasoning_effort, @@ -242,7 +241,7 @@ async def _execute_call(call_idx: int) -> dict[str, Any]: lambda: provider.generate( model=model, parts=parts, - system_instruction=None, + system_instruction=options.system_instruction, cache_name=cache_name, response_schema=schema, reasoning_effort=options.reasoning_effort, @@ -298,11 +297,29 @@ async def _execute_call(call_idx: int) -> dict[str, Any]: duration_s = time.perf_counter() - start_time + conversation_state: dict[str, Any] | None = None + if wants_conversation and responses: + prompt = prompts[0] if isinstance(prompts[0], str) else str(prompts[0]) + answer = responses[0].get("text") + reply = answer if isinstance(answer, str) else "" + updated_history = [ + *conversation_history, + {"role": "user", "content": prompt}, + {"role": "assistant", "content": reply}, + ] + conversation_state = {"history": updated_history} + response_id = responses[0].get("response_id") + if isinstance(response_id, str): + conversation_state["response_id"] = response_id + elif previous_response_id is not None: + conversation_state["response_id"] = previous_response_id + return ExecutionTrace( responses=responses, cache_name=cache_name, duration_s=duration_s, usage=total_usage, + conversation_state=conversation_state, ) diff --git a/src/pollux/options.py b/src/pollux/options.py index ed6ae0f..cbcaee1 100644 --- a/src/pollux/options.py +++ b/src/pollux/options.py @@ -21,6 +21,8 @@ class Options: """Optional execution features for `run()` and `run_many()`.""" + #: Optional system-level instruction for model behavior. + system_instruction: str | None = None #: Pydantic ``BaseModel`` subclass or JSON Schema dict for structured output. response_schema: ResponseSchemaInput | None = None #: Reserved — not yet wired in v1.0. @@ -34,6 +36,14 @@ class Options: def __post_init__(self) -> None: """Validate option shapes early for clear errors.""" + if self.system_instruction is not None and not isinstance( + self.system_instruction, str + ): + raise ConfigurationError( + "system_instruction must be a string", + hint="Pass system_instruction='You are a concise assistant.'", + ) + if self.response_schema is not None and not ( isinstance(self.response_schema, dict) or ( diff --git a/src/pollux/plan.py b/src/pollux/plan.py index dc7b0e9..0a07f86 100644 --- a/src/pollux/plan.py +++ b/src/pollux/plan.py @@ -43,7 +43,11 @@ def build_plan(request: Request) -> Plan: if use_cache: from pollux.cache import compute_cache_key - cache_key = compute_cache_key(config.model, sources) + cache_key = compute_cache_key( + config.model, + sources, + system_instruction=request.options.system_instruction, + ) return Plan( request=request, diff --git a/src/pollux/providers/openai.py b/src/pollux/providers/openai.py index d904dd7..2db4f80 100644 --- a/src/pollux/providers/openai.py +++ b/src/pollux/providers/openai.py @@ -57,7 +57,7 @@ def capabilities(self) -> ProviderCapabilities: structured_outputs=True, reasoning=False, deferred_delivery=False, - conversation=False, + conversation=True, ) async def generate( @@ -74,7 +74,7 @@ async def generate( previous_response_id: str | None = None, ) -> dict[str, Any]: """Generate a response using OpenAI's responses endpoint.""" - _ = cache_name, reasoning_effort, history, delivery_mode, previous_response_id + _ = cache_name, reasoning_effort, delivery_mode client = self._get_client() user_content: list[dict[str, str]] = [] @@ -86,25 +86,30 @@ async def generate( if not user_content: user_content.append({"type": "input_text", "text": ""}) - input_messages: list[dict[str, Any]] = [ - { - "role": "user", - "content": user_content, - } - ] - if system_instruction: - input_messages.insert( - 0, - { - "role": "system", - "content": [{"type": "input_text", "text": system_instruction}], - }, - ) + input_messages: list[dict[str, Any]] = [] + history_items = None if previous_response_id else history + if history_items is not None: + for item in history_items: + role = item.get("role") + content = item.get("content") + if not isinstance(role, str) or not isinstance(content, str): + continue + input_messages.append( + { + "role": role, + "content": [{"type": "input_text", "text": content}], + } + ) + input_messages.append({"role": "user", "content": user_content}) create_kwargs: dict[str, Any] = { "model": model, "input": input_messages, } + if system_instruction: + create_kwargs["instructions"] = system_instruction + if previous_response_id: + create_kwargs["previous_response_id"] = previous_response_id if response_schema is not None: strict_schema = _to_openai_strict_schema(response_schema) create_kwargs["text"] = { @@ -119,6 +124,7 @@ async def generate( try: response = await client.responses.create(**create_kwargs) text = getattr(response, "output_text", "") or "" + response_id = getattr(response, "id", None) structured: Any = None if response_schema is not None and text: try: @@ -136,6 +142,8 @@ async def generate( payload: dict[str, Any] = {"text": text, "usage": usage} if structured is not None: payload["structured"] = structured + if isinstance(response_id, str): + payload["response_id"] = response_id return payload except asyncio.CancelledError: raise diff --git a/src/pollux/result.py b/src/pollux/result.py index 5cf68cf..c85cfce 100644 --- a/src/pollux/result.py +++ b/src/pollux/result.py @@ -87,6 +87,8 @@ def build_result(plan: Plan, trace: ExecutionTrace) -> ResultEnvelope: ) if wants_structured: envelope["structured"] = structured_values + if trace.conversation_state is not None: + envelope["_conversation_state"] = trace.conversation_state return envelope diff --git a/tests/conftest.py b/tests/conftest.py index 7e16e79..0033b37 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -63,9 +63,10 @@ async def generate( delivery_mode: str = "realtime", previous_response_id: str | None = None, ) -> dict[str, Any]: - del model, system_instruction, cache_name + del model, cache_name self.last_parts = parts self.last_generate_kwargs = { + "system_instruction": system_instruction, "response_schema": response_schema, "reasoning_effort": reasoning_effort, "history": history, diff --git a/tests/test_api.py b/tests/test_api.py index 7db92db..e53d175 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -15,6 +15,7 @@ import pollux from pollux.config import Config +from pollux.options import Options from pollux.source import Source pytestmark = [pytest.mark.api, pytest.mark.slow] @@ -88,6 +89,33 @@ async def test_gemini_run_many_returns_multiple_answers( assert result["metrics"]["n_calls"] == 2 +@pytest.mark.asyncio +async def test_gemini_system_instruction_shapes_output( + gemini_api_key: str, gemini_test_model: str +) -> None: + """E2E: system_instruction should steer output style on a real model.""" + config = Config( + provider="gemini", + model=gemini_test_model, + api_key=gemini_api_key, + ) + + result = await pollux.run( + "Write about rain.", + config=config, + options=Options( + system_instruction=( + "Respond as a haiku with exactly three lines separated by newline." + ) + ), + ) + + answer = result["answers"][0] + lines = [line for line in answer.splitlines() if line.strip()] + assert result["status"] == "ok" + assert len(lines) == 3 + + # ============================================================================= # OpenAI Provider # ============================================================================= @@ -154,3 +182,59 @@ async def test_openai_run_many_returns_multiple_answers( assert result["status"] == "ok" assert len(result["answers"]) == 2 assert result["metrics"]["n_calls"] == 2 + + +@pytest.mark.asyncio +async def test_openai_system_instruction_shapes_output( + openai_api_key: str, openai_test_model: str +) -> None: + """E2E: system_instruction should steer output style on a real model.""" + config = Config( + provider="openai", + model=openai_test_model, + api_key=openai_api_key, + ) + + result = await pollux.run( + "Write about rain.", + config=config, + options=Options( + system_instruction=( + "Respond as a haiku with exactly three lines separated by newline." + ) + ), + ) + + answer = result["answers"][0] + lines = [line for line in answer.splitlines() if line.strip()] + assert result["status"] == "ok" + assert len(lines) == 3 + + +@pytest.mark.asyncio +async def test_openai_continue_from_roundtrip( + openai_api_key: str, openai_test_model: str +) -> None: + """E2E: OpenAI continuation should preserve state across calls.""" + config = Config( + provider="openai", + model=openai_test_model, + api_key=openai_api_key, + ) + + first = await pollux.run( + "Remember this secret word: ORBIT. Reply only with 'stored'.", + config=config, + options=Options(history=[]), + ) + assert first["status"] == "ok" + assert "_conversation_state" in first + + second = await pollux.run( + "What secret word did I ask you to remember? Reply with only the word.", + config=config, + options=Options(continue_from=first), + ) + + assert second["status"] == "ok" + assert "orbit" in second["answers"][0].lower() diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 4b2cbc8..912f393 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -556,6 +556,25 @@ def test_cache_identity_uses_content_digest_not_identifier_only() -> None: assert len(set(keys)) == len(keys) +def test_cache_identity_includes_system_instruction() -> None: + """Distinct system instructions should produce distinct cache identities.""" + model = GEMINI_MODEL + source = Source.from_text("shared context") + + concise = compute_cache_key( + model, + (source,), + system_instruction="Be concise.", + ) + verbose = compute_cache_key( + model, + (source,), + system_instruction="Be verbose.", + ) + + assert concise != verbose + + @given( arxiv_id=st.one_of( st.from_regex(r"\d{4}\.\d{4,5}(?:v\d+)?", fullmatch=True), @@ -600,6 +619,12 @@ async def test_options_response_schema_requires_provider_capability() -> None: ) +def test_options_system_instruction_requires_string() -> None: + """Invalid system_instruction types should fail fast at option construction.""" + with pytest.raises(ConfigurationError, match="system_instruction must be a string"): + Options(system_instruction=123) # type: ignore[arg-type] + + @pytest.mark.asyncio async def test_options_are_forwarded_when_provider_supports_features( monkeypatch: pytest.MonkeyPatch, @@ -627,6 +652,7 @@ class ExampleSchema(BaseModel): sources=(Source.from_text("context"),), config=cfg, options=Options( + system_instruction="Reply in one sentence.", response_schema=ExampleSchema, reasoning_effort="high", delivery_mode="realtime", @@ -637,6 +663,7 @@ class ExampleSchema(BaseModel): assert fake.last_generate_kwargs["reasoning_effort"] == "high" assert fake.last_generate_kwargs["delivery_mode"] == "realtime" assert fake.last_generate_kwargs["history"] is None + assert fake.last_generate_kwargs["system_instruction"] == "Reply in one sentence." response_schema = fake.last_generate_kwargs["response_schema"] assert isinstance(response_schema, dict) assert response_schema["type"] == "object" @@ -741,10 +768,10 @@ async def generate( @pytest.mark.asyncio -async def test_conversation_options_are_lifecycle_gated_by_default( +async def test_conversation_options_are_forwarded_when_provider_supports_them( monkeypatch: pytest.MonkeyPatch, ) -> None: - """Lifecycle gate should reject by default, and allow when explicitly enabled.""" + """Conversation options should pass through when provider supports the feature.""" fake = FakeProvider( _capabilities=ProviderCapabilities( caching=True, @@ -758,14 +785,6 @@ async def test_conversation_options_are_lifecycle_gated_by_default( monkeypatch.setattr(pollux, "_get_provider", lambda _config: fake) cfg = Config(provider="gemini", model=GEMINI_MODEL, use_mock=True) - with pytest.raises(ConfigurationError, match="reserved for a future release"): - await pollux.run_many( - ("Q1?",), - config=cfg, - options=Options(history=[{"role": "user", "content": "hello"}]), - ) - - monkeypatch.setenv("POLLUX_EXPERIMENTAL_CONVERSATION", "1") await pollux.run_many( ("Q1?",), config=cfg, @@ -777,6 +796,32 @@ async def test_conversation_options_are_lifecycle_gated_by_default( ] +@pytest.mark.asyncio +async def test_conversation_requires_single_prompt_per_call( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Conversation continuity is single-turn per API call in v1.1.""" + fake = FakeProvider( + _capabilities=ProviderCapabilities( + caching=True, + uploads=True, + structured_outputs=True, + reasoning=False, + deferred_delivery=False, + conversation=True, + ) + ) + monkeypatch.setattr(pollux, "_get_provider", lambda _config: fake) + cfg = Config(provider="gemini", model=GEMINI_MODEL, use_mock=True) + + with pytest.raises(ConfigurationError, match="exactly one prompt"): + await pollux.run_many( + ("Q1?", "Q2?"), + config=cfg, + options=Options(history=[{"role": "user", "content": "hello"}]), + ) + + @pytest.mark.asyncio async def test_continue_from_requires_conversation_state( monkeypatch: pytest.MonkeyPatch, @@ -793,7 +838,6 @@ async def test_continue_from_requires_conversation_state( ) ) monkeypatch.setattr(pollux, "_get_provider", lambda _config: fake) - monkeypatch.setenv("POLLUX_EXPERIMENTAL_CONVERSATION", "1") cfg = Config(provider="gemini", model=GEMINI_MODEL, use_mock=True) with pytest.raises(ConfigurationError, match="missing _conversation_state"): @@ -822,6 +866,53 @@ async def test_continue_from_requires_conversation_state( assert fake.generate_kwargs[0]["previous_response_id"] == "resp_123" +@pytest.mark.asyncio +async def test_conversation_result_includes_conversation_state( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Conversation runs should emit continuation state in the result envelope.""" + + @dataclass + class _ConversationProvider(FakeProvider): + _capabilities: ProviderCapabilities = field( + default_factory=lambda: ProviderCapabilities( + caching=True, + uploads=True, + structured_outputs=False, + reasoning=False, + deferred_delivery=False, + conversation=True, + ) + ) + + async def generate(self, **kwargs: Any) -> dict[str, Any]: + _ = kwargs + return { + "text": "Assistant reply.", + "usage": {"total_tokens": 1}, + "response_id": "resp_next", + } + + fake = _ConversationProvider() + monkeypatch.setattr(pollux, "_get_provider", lambda _config: fake) + cfg = Config(provider="gemini", model=GEMINI_MODEL, use_mock=True) + + result = await pollux.run( + "Next question?", + config=cfg, + options=Options(history=[{"role": "user", "content": "hello"}]), + ) + + state = result.get("_conversation_state") + assert isinstance(state, dict) + assert state["response_id"] == "resp_next" + assert state["history"] == [ + {"role": "user", "content": "hello"}, + {"role": "user", "content": "Next question?"}, + {"role": "assistant", "content": "Assistant reply."}, + ] + + @pytest.mark.asyncio async def test_planning_error_wraps_source_loader_failure() -> None: """Source loader failures should surface as PlanningError with context.""" diff --git a/tests/test_providers.py b/tests/test_providers.py index 08eece5..28276c2 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -535,6 +535,43 @@ async def test_openai_generate_characterizes_multimodal_request_shape( assert golden.out["request"] == responses.last_kwargs +@pytest.mark.asyncio +async def test_openai_generate_forwards_conversation_and_instructions() -> None: + """Conversation params should map to Responses API fields.""" + responses = _FakeResponses() + fake_client = type("Client", (), {"responses": responses})() + + provider = OpenAIProvider("test-key") + provider._client = fake_client + + await provider.generate( + model=OPENAI_MODEL, + parts=["What did I just ask?"], + system_instruction="Be concise.", + history=[{"role": "user", "content": "Say hello."}], + ) + + assert responses.last_kwargs is not None + assert responses.last_kwargs["instructions"] == "Be concise." + assert responses.last_kwargs["input"][0]["role"] == "user" + assert responses.last_kwargs["input"][0]["content"][0] == { + "type": "input_text", + "text": "Say hello.", + } + assert responses.last_kwargs["input"][1]["role"] == "user" + + await provider.generate( + model=OPENAI_MODEL, + parts=["And now?"], + history=[{"role": "user", "content": "This should be skipped."}], + previous_response_id="resp_123", + ) + + assert responses.last_kwargs["previous_response_id"] == "resp_123" + assert len(responses.last_kwargs["input"]) == 1 + assert responses.last_kwargs["input"][0]["role"] == "user" + + @pytest.mark.asyncio async def test_openai_rejects_unsupported_remote_mime_type() -> None: """Remote URIs with unsupported mime types should fail clearly."""