forge-labs-dev · fangchenli · Jan 28, 2026 · Jan 28, 2026
diff --git a/src/delibera/cli.py b/src/delibera/cli.py
@@ -246,7 +246,7 @@ def run(
         sys.exit(1)
     except Exception as e:
         click.echo(f"Error: Unexpected failure during run: {e}", err=True)
-        click.echo("Check that ANTHROPIC_API_KEY is set and valid.", err=True)
+        click.echo("Check your configuration and API keys.", err=True)
         sys.exit(1)
 
 

diff --git a/src/delibera/engine/orchestrator.py b/src/delibera/engine/orchestrator.py
@@ -272,7 +272,10 @@ def run(self, question: str) -> Path:
             # PROPOSE: Generate proposals for each branch
             proposer: ProposerStub | Any  # Allow LLM proposer
             if self._use_llm_proposer and self._llm_client is not None:
-                from delibera.agents.llm_proposer import ProposerLLM
+                from delibera.agents.llm_proposer import ProposerLLM, check_llm_allowed_in_step
+
+                # Runtime guard: LLM only allowed in work steps
+                check_llm_allowed_in_step("work")
 
                 proposer = ProposerLLM(
                     llm_client=self._llm_client,
@@ -334,6 +337,8 @@ def run(self, question: str) -> Path:
                         )
                     except Exception as e:
                         # Emit llm_call_failed and fall back to stub
+                        from delibera.llm.redaction import redact_text
+
                         writer.emit(
                             TraceEvent(
                                 event_type="llm_call_failed",
@@ -343,7 +348,7 @@ def run(self, question: str) -> Path:
                                     "role": "proposer",
                                     "step": "PROPOSE",
                                     "error_type": type(e).__name__,
-                                    "error_message": str(e)[:200],
+                                    "error_message": redact_text(str(e))[:200],
                                 },
                             )
                         )

diff --git a/src/delibera/inspect/summarize.py b/src/delibera/inspect/summarize.py
@@ -199,7 +199,7 @@ def _extract_protocol_info(summary: RunSummary, events: list[dict[str, Any]]) ->
     for event in events:
         if event.get("event_type") == "run_start":
             payload = event.get("payload", {})
-            summary.protocol.name = payload.get("protocol", "")
+            summary.protocol.name = payload.get("protocol_name", "")
             summary.protocol.version = payload.get("protocol_version", "")
             summary.protocol.source = payload.get("protocol_source", "")
             break

diff --git a/src/delibera/llm/gemini.py b/src/delibera/llm/gemini.py
@@ -147,11 +147,15 @@ def _generate_with_sdk(self, request: LLMRequest, model: str) -> LLMResponse:
         if request.response_format == "json":
             generation_config["response_mime_type"] = "application/json"
 
+        # Build content from messages (system prompt separated for SDK)
+        system_instruction, contents = self._build_contents(request)
+
         # Create the model
         try:
             genai_model = genai.GenerativeModel(
                 model_name=model,
                 generation_config=generation_config if generation_config else None,
+                system_instruction=system_instruction if system_instruction else None,
             )
         except Exception as e:
             raise LLMError(
@@ -160,9 +164,6 @@ def _generate_with_sdk(self, request: LLMRequest, model: str) -> LLMResponse:
                 model=model,
             ) from e
 
-        # Build content from messages
-        contents = self._build_sdk_contents(request)
-
         # Generate
         try:
             response = genai_model.generate_content(contents)
@@ -217,13 +218,17 @@ def _generate_with_http(self, request: LLMRequest, model: str) -> LLMResponse:
         import urllib.error
         import urllib.request
 
-        # Build API URL
-        url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={self._api_key}"
+        # Build API URL (key passed via header, not query param, to avoid leaking in logs)
+        url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
 
         # Build request body
-        contents = self._build_http_contents(request)
+        system_instruction, contents = self._build_contents(request)
         body: dict[str, Any] = {"contents": contents}
 
+        # Add system instruction if present
+        if system_instruction:
+            body["systemInstruction"] = {"parts": [{"text": system_instruction}]}
+
         # Add generation config
         generation_config: dict[str, Any] = {}
         if request.temperature is not None:
@@ -240,7 +245,10 @@ def _generate_with_http(self, request: LLMRequest, model: str) -> LLMResponse:
         req = urllib.request.Request(
             url,
             data=json.dumps(body).encode("utf-8"),
-            headers={"Content-Type": "application/json"},
+            headers={
+                "Content-Type": "application/json",
+                "x-goog-api-key": self._api_key,
+            },
             method="POST",
         )
 
@@ -315,64 +323,42 @@ def _generate_with_http(self, request: LLMRequest, model: str) -> LLMResponse:
             usage=usage,
         )
 
-    def _build_sdk_contents(self, request: LLMRequest) -> list[Any]:
-        """Build SDK-compatible content list from messages.
-
-        Args:
-            request: The LLM request.
-
-        Returns:
-            Content list for SDK.
-        """
-        contents: list[Any] = []
-
-        # Combine system and user messages
-        combined_prompt = ""
-        for msg in request.messages:
-            if msg.role == "system":
-                combined_prompt += msg.content + "\n\n"
-            elif msg.role == "user":
-                combined_prompt += msg.content
-            elif msg.role == "assistant":
-                # For multi-turn, include assistant responses
-                if combined_prompt:
-                    contents.append({"role": "user", "parts": [{"text": combined_prompt}]})
-                    combined_prompt = ""
-                contents.append({"role": "model", "parts": [{"text": msg.content}]})
-
-        if combined_prompt:
-            contents.append({"role": "user", "parts": [{"text": combined_prompt}]})
-
-        return contents
+    def _build_contents(self, request: LLMRequest) -> tuple[str, list[dict[str, Any]]]:
+        """Build API-compatible content list from messages.
 
-    def _build_http_contents(self, request: LLMRequest) -> list[dict[str, Any]]:
-        """Build HTTP API-compatible content list from messages.
+        Separates system messages into a dedicated system instruction string
+        (used natively by both the SDK and HTTP API) and builds the
+        conversation content list from user/assistant messages.
 
         Args:
             request: The LLM request.
 
         Returns:
-            Content list for HTTP API.
+            Tuple of (system_instruction, contents) where system_instruction
+            is the concatenated system messages and contents is the
+            conversation turns.
         """
+        system_parts: list[str] = []
         contents: list[dict[str, Any]] = []
 
-        # Combine system and user messages
-        combined_prompt = ""
+        user_prompt = ""
         for msg in request.messages:
             if msg.role == "system":
-                combined_prompt += msg.content + "\n\n"
+                system_parts.append(msg.content)
             elif msg.role == "user":
-                combined_prompt += msg.content
+                user_prompt += msg.content
             elif msg.role == "assistant":
-                if combined_prompt:
-                    contents.append({"role": "user", "parts": [{"text": combined_prompt}]})
-                    combined_prompt = ""
+                # For multi-turn, flush user prompt before assistant response
+                if user_prompt:
+                    contents.append({"role": "user", "parts": [{"text": user_prompt}]})
+                    user_prompt = ""
                 contents.append({"role": "model", "parts": [{"text": msg.content}]})
 
-        if combined_prompt:
-            contents.append({"role": "user", "parts": [{"text": combined_prompt}]})
+        if user_prompt:
+            contents.append({"role": "user", "parts": [{"text": user_prompt}]})
 
-        return contents
+        system_instruction = "\n\n".join(system_parts)
+        return system_instruction, contents
 
     def _parse_json_response(self, text: str) -> dict[str, Any]:
         """Parse JSON from response text.