From e547a403510c84139760d4d588cccc331885f09a Mon Sep 17 00:00:00 2001 From: Trevor Mells Date: Fri, 27 Feb 2026 07:40:38 -0800 Subject: [PATCH 1/8] Introduce LiteLLM dependency and initial Gemini configuration --- .env.example | 9 ++-- .gitignore | 3 ++ README.md | 10 ++--- pyproject.toml | 1 + src/applypilot/llm.py | 61 +++++++++++++++++++++++--- src/applypilot/scoring/cover_letter.py | 2 +- src/applypilot/wizard/init.py | 58 +++++++++++++++--------- 7 files changed, 109 insertions(+), 35 deletions(-) diff --git a/.env.example b/.env.example index df7cc38..fbf9543 100644 --- a/.env.example +++ b/.env.example @@ -2,10 +2,11 @@ # Copy to ~/.applypilot/.env and fill in your values. # LLM Provider (pick one) -GEMINI_API_KEY= # Gemini 2.0 Flash (recommended, cheapest) -# OPENAI_API_KEY= # OpenAI (GPT-4o-mini) -# LLM_URL=http://127.0.0.1:8080/v1 # Local LLM (llama.cpp, Ollama) -# LLM_MODEL= # Override model name +GEMINI_API_KEY= # Gemini (recommended, cheapest) +# OPENAI_API_KEY= # OpenAI +# ANTHROPIC_API_KEY= # Anthropic Claude +# LLM_URL=http://127.0.0.1:8080/v1 # Local LLM (OpenAI-compatible: llama.cpp, Ollama, vLLM) +# LLM_MODEL= # Override model name (provider-specific) # Auto-Apply (optional) CAPSOLVER_API_KEY= # For CAPTCHA solving during auto-apply diff --git a/.gitignore b/.gitignore index 835589f..35c6a55 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ resume.pdf *.env .env.* !.env.example +.venv/* + # Runtime artifacts *.db @@ -39,3 +41,4 @@ Thumbs.db # Claude Code .claude/ +tm_dev/nb.ipynb diff --git a/README.md b/README.md index e7fe08e..140e43c 100644 --- a/README.md +++ b/README.md @@ -43,12 +43,12 @@ applypilot apply --dry-run # fill forms without submitting ## Two Paths ### Full Pipeline (recommended) -**Requires:** Python 3.11+, Node.js (for npx), Gemini API key (free), Claude Code CLI, Chrome +**Requires:** Python 3.11+, Node.js (for npx), an LLM key (Gemini/OpenAI/Claude) or `LLM_URL`, Claude Code CLI, Chrome Runs all 6 stages, from job discovery to autonomous application submission. This is the full power of ApplyPilot. ### Discovery + Tailoring Only -**Requires:** Python 3.11+, Gemini API key (free) +**Requires:** Python 3.11+, an LLM key (Gemini/OpenAI/Claude) or `LLM_URL` Runs stages 1-5: discovers jobs, scores them, tailors your resume, generates cover letters. You submit applications manually with the AI-prepared materials. @@ -88,11 +88,11 @@ Each stage is independent. Run them all or pick what you need. |-----------|-------------|---------| | Python 3.11+ | Everything | Core runtime | | Node.js 18+ | Auto-apply | Needed for `npx` to run Playwright MCP server | -| Gemini API key | Scoring, tailoring, cover letters | Free tier (15 RPM / 1M tokens/day) is enough | +| LLM API key or local endpoint | Scoring, tailoring, cover letters | Set one of `GEMINI_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `LLM_URL` | | Chrome/Chromium | Auto-apply | Auto-detected on most systems | | Claude Code CLI | Auto-apply | Install from [claude.ai/code](https://claude.ai/code) | -**Gemini API key is free.** Get one at [aistudio.google.com](https://aistudio.google.com). OpenAI and local models (Ollama/llama.cpp) are also supported. +**Gemini API key is free.** Get one at [aistudio.google.com](https://aistudio.google.com). OpenAI, Claude, and local models (Ollama/llama.cpp/vLLM) are also supported. ### Optional @@ -115,7 +115,7 @@ Your personal data in one structured file: contact info, work authorization, com Job search queries, target titles, locations, boards. Run multiple searches with different parameters. ### `.env` -API keys and runtime config: `GEMINI_API_KEY`, `LLM_MODEL`, `CAPSOLVER_API_KEY` (optional). +API keys and runtime config: `GEMINI_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `LLM_URL`, `LLM_MODEL`, `CAPSOLVER_API_KEY` (optional). ### Package configs (shipped with ApplyPilot) - `config/employers.yaml` - Workday employer registry (48 preconfigured) diff --git a/pyproject.toml b/pyproject.toml index f5116d8..51268f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ dependencies = [ "typer>=0.9.0", "rich>=13.0", + "litellm", "httpx>=0.24", "beautifulsoup4>=4.12", "playwright>=1.40", diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index 1fb7be6..f2fab0b 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -9,6 +9,7 @@ LLM_MODEL env var overrides the model name for any provider. """ +import json import logging import os import time @@ -73,6 +74,28 @@ def _detect_provider() -> tuple[str, str, str]: _GEMINI_COMPAT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai" _GEMINI_NATIVE_BASE = "https://generativelanguage.googleapis.com/v1beta" +_GEMINI_THINKING_LEVELS = {"none", "minimal", "low", "medium", "high"} +_GEMINI_COMPAT_REASONING_EFFORT = { + "none": "none", + "minimal": "low", + "low": "low", + "medium": "high", + "high": "high", +} +_GEMINI_25_THINKING_BUDGET = { + "none": 0, + "minimal": 1024, + "low": 1024, + "medium": 8192, + "high": 24576, +} +_GEMINI_NATIVE_THINKING_LEVEL = { + "none": "low", + "minimal": "low", + "low": "low", + "medium": "high", + "high": "high", +} class LLMClient: @@ -93,6 +116,20 @@ def __init__(self, base_url: str, model: str, api_key: str) -> None: self._use_native_gemini: bool = False self._is_gemini: bool = base_url.startswith(_GEMINI_COMPAT_BASE) + @staticmethod + def _normalize_thinking_level(thinking_level: str) -> str: + level = (thinking_level or "low").strip().lower() + if level not in _GEMINI_THINKING_LEVELS: + log.warning("Invalid thinking_level '%s', defaulting to 'low'.", thinking_level) + return "low" + return level + + def _gemini_native_thinking_config(self, thinking_level: str) -> dict: + level = self._normalize_thinking_level(thinking_level) + if "2.5" in self.model: + return {"thinkingBudget": _GEMINI_25_THINKING_BUDGET[level]} + return {"thinkingLevel": _GEMINI_NATIVE_THINKING_LEVEL[level]} + # -- Native Gemini API -------------------------------------------------- def _chat_native_gemini( @@ -100,6 +137,7 @@ def _chat_native_gemini( messages: list[dict], temperature: float, max_tokens: int, + thinking_level: str, ) -> str: """Call the native Gemini generateContent API. @@ -128,6 +166,7 @@ def _chat_native_gemini( "generationConfig": { "temperature": temperature, "maxOutputTokens": max_tokens, + "thinkingConfig": self._gemini_native_thinking_config(thinking_level), }, } if system_parts: @@ -151,6 +190,7 @@ def _chat_compat( messages: list[dict], temperature: float, max_tokens: int, + thinking_level: str, ) -> str: """Call the OpenAI-compatible endpoint.""" headers: dict[str, str] = {"Content-Type": "application/json"} @@ -163,6 +203,9 @@ def _chat_compat( "temperature": temperature, "max_tokens": max_tokens, } + if self._is_gemini: + level = self._normalize_thinking_level(thinking_level) + payload["reasoning_effort"] = _GEMINI_COMPAT_REASONING_EFFORT[level] resp = self._client.post( f"{self.base_url}/chat/completions", @@ -181,6 +224,10 @@ def _chat_compat( def _handle_compat_response(resp: httpx.Response) -> str: resp.raise_for_status() data = resp.json() + if resp.status_code == 200: + # Intentionally log the full JSON payload for every successful + # chat/completions response to aid truncation/debug analysis. + log.info("LLM compat full response JSON:\n%s", json.dumps(data, indent=2, ensure_ascii=False)) return data["choices"][0]["message"]["content"] # -- public API --------------------------------------------------------- @@ -189,9 +236,13 @@ def chat( self, messages: list[dict], temperature: float = 0.0, - max_tokens: int = 4096, + max_tokens: int = 10000, + thinking_level: str = "low", ) -> str: - """Send a chat completion request and return the assistant message text.""" + """Send a chat completion request and return the assistant message text. + + thinking_level applies to Gemini requests and defaults to "low". + """ # Qwen3 optimization: prepend /no_think to skip chain-of-thought # reasoning, saving tokens on structured extraction tasks. if "qwen" in self.model.lower() and messages: @@ -203,9 +254,9 @@ def chat( try: # Route to native Gemini if we've already confirmed it's needed if self._use_native_gemini: - return self._chat_native_gemini(messages, temperature, max_tokens) + return self._chat_native_gemini(messages, temperature, max_tokens, thinking_level) - return self._chat_compat(messages, temperature, max_tokens) + return self._chat_compat(messages, temperature, max_tokens, thinking_level) except _GeminiCompatForbidden as exc: # Model not available on OpenAI-compat layer — switch to native. @@ -218,7 +269,7 @@ def chat( self._use_native_gemini = True # Retry immediately with native — don't count as a rate-limit wait try: - return self._chat_native_gemini(messages, temperature, max_tokens) + return self._chat_native_gemini(messages, temperature, max_tokens, thinking_level) except httpx.HTTPStatusError as native_exc: raise RuntimeError( f"Both Gemini endpoints failed. Compat: 403 Forbidden. " diff --git a/src/applypilot/scoring/cover_letter.py b/src/applypilot/scoring/cover_letter.py index c16cdd5..bbbb053 100644 --- a/src/applypilot/scoring/cover_letter.py +++ b/src/applypilot/scoring/cover_letter.py @@ -165,7 +165,7 @@ def generate_cover_letter( )}, ] - letter = client.chat(messages, max_tokens=1024, temperature=0.7) + letter = client.chat(messages, max_tokens=10000, temperature=0.7) letter = sanitize_text(letter) # auto-fix em dashes, smart quotes letter = _strip_preamble(letter) # remove any "Here is the letter:" prefix diff --git a/src/applypilot/wizard/init.py b/src/applypilot/wizard/init.py index 0f893c3..20bea2a 100644 --- a/src/applypilot/wizard/init.py +++ b/src/applypilot/wizard/init.py @@ -4,7 +4,7 @@ - resume.txt (and optionally resume.pdf) - profile.json - searches.yaml - - .env (LLM API key) + - .env (LLM API keys and runtime settings) """ from __future__ import annotations @@ -245,33 +245,51 @@ def _setup_ai_features() -> None: console.print("[dim]Discovery-only mode. You can configure AI later with [bold]applypilot init[/bold].[/dim]") return - console.print("Supported providers: [bold]Gemini[/bold] (recommended, free tier), OpenAI, local (Ollama/llama.cpp)") - provider = Prompt.ask( - "Provider", - choices=["gemini", "openai", "local"], - default="gemini", + console.print( + "Supported providers: [bold]Gemini[/bold] (recommended, free tier), " + "OpenAI, Claude, local (Ollama/llama.cpp)." ) + console.print("[dim]Enter any credentials you want to save now. Leave blank to skip each field.[/dim]") env_lines = ["# ApplyPilot configuration", ""] + configured_sources: list[str] = [] + + gemini_key = Prompt.ask("Gemini API key (optional, from aistudio.google.com)", default="").strip() + if gemini_key: + env_lines.append(f"GEMINI_API_KEY={gemini_key}") + configured_sources.append("gemini") + + openai_key = Prompt.ask("OpenAI API key (optional)", default="").strip() + if openai_key: + env_lines.append(f"OPENAI_API_KEY={openai_key}") + configured_sources.append("openai") + + anthropic_key = Prompt.ask("Anthropic API key (optional)", default="").strip() + if anthropic_key: + env_lines.append(f"ANTHROPIC_API_KEY={anthropic_key}") + configured_sources.append("anthropic") + + local_url = Prompt.ask("Local LLM endpoint URL (optional)", default="").strip() + if local_url: + env_lines.append(f"LLM_URL={local_url}") + configured_sources.append("local") + + if not configured_sources: + console.print("[dim]No AI provider configured. You can add one later with [bold]applypilot init[/bold].[/dim]") + return - if provider == "gemini": - api_key = Prompt.ask("Gemini API key (from aistudio.google.com)") - model = Prompt.ask("Model", default="gemini-2.0-flash") - env_lines.append(f"GEMINI_API_KEY={api_key}") - env_lines.append(f"LLM_MODEL={model}") - elif provider == "openai": - api_key = Prompt.ask("OpenAI API key") - model = Prompt.ask("Model", default="gpt-4o-mini") - env_lines.append(f"OPENAI_API_KEY={api_key}") - env_lines.append(f"LLM_MODEL={model}") - elif provider == "local": - url = Prompt.ask("Local LLM endpoint URL", default="http://localhost:8080/v1") - model = Prompt.ask("Model name", default="local-model") - env_lines.append(f"LLM_URL={url}") + model = Prompt.ask("LLM model override (optional, leave blank to use provider defaults)", default="").strip() + if model: env_lines.append(f"LLM_MODEL={model}") env_lines.append("") ENV_PATH.write_text("\n".join(env_lines), encoding="utf-8") + if len(configured_sources) > 1: + configured = ", ".join(configured_sources) + console.print( + f"[yellow]Multiple LLM providers saved ({configured}). " + "Deterministic provider resolution is added in the next phase.[/yellow]" + ) console.print(f"[green]AI configuration saved to {ENV_PATH}[/green]") From ce1007195b70908733d0cd74d22a3173055eddfc Mon Sep 17 00:00:00 2001 From: Trevor Mells Date: Fri, 27 Feb 2026 07:40:42 -0800 Subject: [PATCH 2/8] Centralize provider resolution and refactor llm.py into a thin adapter --- src/applypilot/cli.py | 33 +- src/applypilot/config.py | 15 +- src/applypilot/llm.py | 551 +++++++++++++++++----------------- src/applypilot/wizard/init.py | 2 +- 4 files changed, 314 insertions(+), 287 deletions(-) diff --git a/src/applypilot/cli.py b/src/applypilot/cli.py index 6c8be91..d524174 100644 --- a/src/applypilot/cli.py +++ b/src/applypilot/cli.py @@ -379,21 +379,24 @@ def doctor() -> None: "pip install --no-deps python-jobspy && pip install pydantic tls-client requests markdownify regex")) # --- Tier 2 checks --- - import os - has_gemini = bool(os.environ.get("GEMINI_API_KEY")) - has_openai = bool(os.environ.get("OPENAI_API_KEY")) - has_local = bool(os.environ.get("LLM_URL")) - if has_gemini: - model = os.environ.get("LLM_MODEL", "gemini-2.0-flash") - results.append(("LLM API key", ok_mark, f"Gemini ({model})")) - elif has_openai: - model = os.environ.get("LLM_MODEL", "gpt-4o-mini") - results.append(("LLM API key", ok_mark, f"OpenAI ({model})")) - elif has_local: - results.append(("LLM API key", ok_mark, f"Local: {os.environ.get('LLM_URL')}")) - else: - results.append(("LLM API key", fail_mark, - "Set GEMINI_API_KEY in ~/.applypilot/.env (run 'applypilot init')")) + from applypilot.llm import resolve_llm_config + + try: + llm_cfg = resolve_llm_config() + if llm_cfg.provider == "local": + results.append(("LLM API key", ok_mark, f"Local: {llm_cfg.base_url} ({llm_cfg.model})")) + else: + label = { + "gemini": "Gemini", + "openai": "OpenAI", + "anthropic": "Anthropic", + }.get(llm_cfg.provider, llm_cfg.provider) + results.append(("LLM API key", ok_mark, f"{label} ({llm_cfg.model})")) + except RuntimeError: + results.append( + ("LLM API key", fail_mark, + "Set GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, or LLM_URL in ~/.applypilot/.env") + ) # --- Tier 3 checks --- # Claude Code CLI diff --git a/src/applypilot/config.py b/src/applypilot/config.py index 8c39780..9067245 100644 --- a/src/applypilot/config.py +++ b/src/applypilot/config.py @@ -206,7 +206,10 @@ def get_tier() -> int: """ load_env() - has_llm = any(os.environ.get(k) for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "LLM_URL")) + has_llm = any( + os.environ.get(k) + for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "LLM_URL") + ) if not has_llm: return 1 @@ -238,8 +241,14 @@ def check_tier(required: int, feature: str) -> None: _console = Console(stderr=True) missing: list[str] = [] - if required >= 2 and not any(os.environ.get(k) for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "LLM_URL")): - missing.append("LLM API key — run [bold]applypilot init[/bold] or set GEMINI_API_KEY") + if required >= 2 and not any( + os.environ.get(k) + for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "LLM_URL") + ): + missing.append( + "LLM API key — run [bold]applypilot init[/bold] or set " + "GEMINI_API_KEY / OPENAI_API_KEY / ANTHROPIC_API_KEY / LLM_URL" + ) if required >= 3: if not shutil.which("claude"): missing.append("Claude Code CLI — install from [bold]https://claude.ai/code[/bold]") diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index f2fab0b..c60aed8 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -1,79 +1,43 @@ -""" -Unified LLM client for ApplyPilot. +"""Unified LLM client for ApplyPilot using LiteLLM. Auto-detects provider from environment: - GEMINI_API_KEY -> Google Gemini (default: gemini-2.0-flash) - OPENAI_API_KEY -> OpenAI (default: gpt-4o-mini) - LLM_URL -> Local llama.cpp / Ollama compatible endpoint + GEMINI_API_KEY -> Google Gemini (default: gemini-2.0-flash) + OPENAI_API_KEY -> OpenAI (default: gpt-4o-mini) + ANTHROPIC_API_KEY -> Anthropic Claude (default: claude-3-5-haiku-latest) + LLM_URL -> Local OpenAI-compatible endpoint LLM_MODEL env var overrides the model name for any provider. """ -import json +from __future__ import annotations + +from collections.abc import Mapping +from dataclasses import dataclass import logging import os import time -import httpx - log = logging.getLogger(__name__) -# --------------------------------------------------------------------------- -# Provider detection -# --------------------------------------------------------------------------- - -def _detect_provider() -> tuple[str, str, str]: - """Return (base_url, model, api_key) based on environment variables. - - Reads env at call time (not module import time) so that load_env() called - in _bootstrap() is always visible here. - """ - gemini_key = os.environ.get("GEMINI_API_KEY", "") - openai_key = os.environ.get("OPENAI_API_KEY", "") - local_url = os.environ.get("LLM_URL", "") - model_override = os.environ.get("LLM_MODEL", "") - - if gemini_key and not local_url: - return ( - "https://generativelanguage.googleapis.com/v1beta/openai", - model_override or "gemini-2.0-flash", - gemini_key, - ) - - if openai_key and not local_url: - return ( - "https://api.openai.com/v1", - model_override or "gpt-4o-mini", - openai_key, - ) - - if local_url: - return ( - local_url.rstrip("/"), - model_override or "local-model", - os.environ.get("LLM_API_KEY", ""), - ) - - raise RuntimeError( - "No LLM provider configured. " - "Set GEMINI_API_KEY, OPENAI_API_KEY, or LLM_URL in your environment." - ) - - -# --------------------------------------------------------------------------- -# Client -# --------------------------------------------------------------------------- +_OPENAI_BASE = "https://api.openai.com/v1" +_ANTHROPIC_BASE = "https://api.anthropic.com/v1" +_GEMINI_BASE = "https://generativelanguage.googleapis.com/v1beta/openai" +_PROVIDER_API_ENV_KEY = { + "gemini": "GEMINI_API_KEY", + "openai": "OPENAI_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", +} +_DEFAULT_MODEL_BY_PROVIDER = { + "local": "local-model", + "gemini": "gemini-2.0-flash", + "openai": "gpt-4o-mini", + "anthropic": "claude-3-5-haiku-latest", +} _MAX_RETRIES = 5 _TIMEOUT = 120 # seconds - -# Base wait on first 429/503 (doubles each retry, caps at 60s). -# Gemini free tier is 15 RPM = 4s minimum between requests; 10s gives headroom. _RATE_LIMIT_BASE_WAIT = 10 - -_GEMINI_COMPAT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai" -_GEMINI_NATIVE_BASE = "https://generativelanguage.googleapis.com/v1beta" _GEMINI_THINKING_LEVELS = {"none", "minimal", "low", "medium", "high"} _GEMINI_COMPAT_REASONING_EFFORT = { "none": "none", @@ -82,229 +46,284 @@ def _detect_provider() -> tuple[str, str, str]: "medium": "high", "high": "high", } -_GEMINI_25_THINKING_BUDGET = { - "none": 0, - "minimal": 1024, - "low": 1024, - "medium": 8192, - "high": 24576, -} -_GEMINI_NATIVE_THINKING_LEVEL = { - "none": "low", - "minimal": "low", - "low": "low", - "medium": "high", - "high": "high", -} -class LLMClient: - """Thin LLM client supporting OpenAI-compatible and native Gemini endpoints. - - For Gemini keys, starts on the OpenAI-compat layer. On a 403 (which - happens with preview/experimental models not exposed via compat), it - automatically switches to the native generateContent API and stays there - for the lifetime of the process. - """ - - def __init__(self, base_url: str, model: str, api_key: str) -> None: - self.base_url = base_url - self.model = model - self.api_key = api_key - self._client = httpx.Client(timeout=_TIMEOUT) - # True once we've confirmed the native Gemini API works for this model - self._use_native_gemini: bool = False - self._is_gemini: bool = base_url.startswith(_GEMINI_COMPAT_BASE) - - @staticmethod - def _normalize_thinking_level(thinking_level: str) -> str: - level = (thinking_level or "low").strip().lower() - if level not in _GEMINI_THINKING_LEVELS: - log.warning("Invalid thinking_level '%s', defaulting to 'low'.", thinking_level) - return "low" - return level - - def _gemini_native_thinking_config(self, thinking_level: str) -> dict: - level = self._normalize_thinking_level(thinking_level) - if "2.5" in self.model: - return {"thinkingBudget": _GEMINI_25_THINKING_BUDGET[level]} - return {"thinkingLevel": _GEMINI_NATIVE_THINKING_LEVEL[level]} - - # -- Native Gemini API -------------------------------------------------- - - def _chat_native_gemini( - self, - messages: list[dict], - temperature: float, - max_tokens: int, - thinking_level: str, - ) -> str: - """Call the native Gemini generateContent API. - - Used automatically when the OpenAI-compat endpoint returns 403, - which happens for preview/experimental models not exposed via compat. - - Converts OpenAI-style messages to Gemini's contents/systemInstruction - format transparently. - """ - contents: list[dict] = [] - system_parts: list[dict] = [] - - for msg in messages: - role = msg["role"] - text = msg.get("content", "") - if role == "system": - system_parts.append({"text": text}) - elif role == "user": - contents.append({"role": "user", "parts": [{"text": text}]}) - elif role == "assistant": - # Gemini uses "model" instead of "assistant" - contents.append({"role": "model", "parts": [{"text": text}]}) - - payload: dict = { - "contents": contents, - "generationConfig": { - "temperature": temperature, - "maxOutputTokens": max_tokens, - "thinkingConfig": self._gemini_native_thinking_config(thinking_level), - }, - } - if system_parts: - payload["systemInstruction"] = {"parts": system_parts} - - url = f"{_GEMINI_NATIVE_BASE}/models/{self.model}:generateContent" - resp = self._client.post( - url, - json=payload, - headers={"Content-Type": "application/json"}, - params={"key": self.api_key}, +@dataclass(frozen=True) +class LLMConfig: + """Normalized LLM configuration consumed by LLMClient.""" + + provider: str + base_url: str + model: str + api_key: str + + +def _env_get(env: Mapping[str, str], key: str) -> str: + value = env.get(key, "") + if value is None: + return "" + return str(value).strip() + + +def _normalize_thinking_level(thinking_level: str) -> str: + level = (thinking_level or "low").strip().lower() + if level not in _GEMINI_THINKING_LEVELS: + log.warning("Invalid thinking_level '%s', defaulting to 'low'.", thinking_level) + return "low" + return level + + +def _provider_model(provider: str, model: str) -> str: + if provider == "local": + return model + if model.startswith(f"{provider}/"): + return model + return f"{provider}/{model}" + + +def _default_model(provider: str) -> str: + return _DEFAULT_MODEL_BY_PROVIDER[provider] + + +def resolve_llm_config(env: Mapping[str, str] | None = None) -> LLMConfig: + """Resolve provider configuration from environment with deterministic precedence.""" + env_map = env if env is not None else os.environ + + model_override = _env_get(env_map, "LLM_MODEL") + local_url = _env_get(env_map, "LLM_URL") + gemini_key = _env_get(env_map, "GEMINI_API_KEY") + openai_key = _env_get(env_map, "OPENAI_API_KEY") + anthropic_key = _env_get(env_map, "ANTHROPIC_API_KEY") + llm_provider = _env_get(env_map, "LLM_PROVIDER").lower() + + providers_present = { + "local": bool(local_url), + "gemini": bool(gemini_key), + "openai": bool(openai_key), + "anthropic": bool(anthropic_key), + } + precedence = ["local", "gemini", "openai", "anthropic"] + configured = [provider for provider in precedence if providers_present[provider]] + + if not configured: + raise RuntimeError( + "No LLM provider configured. " + "Set one of LLM_URL, GEMINI_API_KEY, OPENAI_API_KEY, or ANTHROPIC_API_KEY." ) - resp.raise_for_status() - data = resp.json() - return data["candidates"][0]["content"]["parts"][0]["text"] - # -- OpenAI-compat API -------------------------------------------------- + chosen = "" + override_aliases = { + "local": "local", + "gemini": "gemini", + "openai": "openai", + "anthropic": "anthropic", + } + + # Optional override only when multiple providers are configured. + if len(configured) > 1 and llm_provider: + overridden = override_aliases.get(llm_provider) + if overridden and overridden in configured: + chosen = overridden + log.warning( + "Multiple LLM providers configured (%s). Using '%s' via LLM_PROVIDER override.", + ", ".join(configured), + chosen, + ) + else: + log.warning( + "Ignoring LLM_PROVIDER='%s' because it is not configured. " + "Using precedence: LLM_URL > GEMINI_API_KEY > OPENAI_API_KEY > ANTHROPIC_API_KEY.", + llm_provider, + ) + + if not chosen: + chosen = configured[0] + if len(configured) > 1: + log.warning( + "Multiple LLM providers configured (%s). Using '%s' based on precedence: " + "LLM_URL > GEMINI_API_KEY > OPENAI_API_KEY > ANTHROPIC_API_KEY.", + ", ".join(configured), + chosen, + ) + model = model_override or _default_model(chosen) + + if chosen == "local": + return LLMConfig( + provider="local", + base_url=local_url.rstrip("/"), + model=model, + api_key=_env_get(env_map, "LLM_API_KEY"), + ) + if chosen == "gemini": + return LLMConfig( + provider="gemini", + base_url=_GEMINI_BASE, + model=model, + api_key=gemini_key, + ) + if chosen == "openai": + return LLMConfig( + provider="openai", + base_url=_OPENAI_BASE, + model=model, + api_key=openai_key, + ) + return LLMConfig( + provider="anthropic", + base_url=_ANTHROPIC_BASE, + model=model, + api_key=anthropic_key, + ) - def _chat_compat( + +def _extract_status_code(exc: Exception) -> int | None: + status_code = getattr(exc, "status_code", None) + if isinstance(status_code, int): + return status_code + response = getattr(exc, "response", None) + if response is not None: + status_code = getattr(response, "status_code", None) + if isinstance(status_code, int): + return status_code + return None + + +def _extract_retry_after(exc: Exception) -> float | None: + response = getattr(exc, "response", None) + if response is None: + return None + headers = getattr(response, "headers", {}) or {} + retry_after = headers.get("Retry-After") or headers.get("X-RateLimit-Reset-Requests") + if not retry_after: + return None + try: + return float(retry_after) + except (TypeError, ValueError): + return None + + +def _is_timeout_error(exc: Exception) -> bool: + if isinstance(exc, TimeoutError): + return True + text = str(exc).lower() + return "timed out" in text or "timeout" in text + + +def _extract_text_content(resp: object) -> str: + choices = getattr(resp, "choices", None) + if choices is None and isinstance(resp, dict): + choices = resp.get("choices", []) + if not choices: + raise RuntimeError("LLM response contained no choices.") + + first = choices[0] + if isinstance(first, dict): + message = first.get("message", {}) + else: + message = getattr(first, "message", {}) + + content = message.get("content") if isinstance(message, dict) else getattr(message, "content", None) + if isinstance(content, str): + return content + if isinstance(content, list): + chunks: list[str] = [] + for part in content: + if isinstance(part, str): + chunks.append(part) + elif isinstance(part, dict): + text = part.get("text") + if isinstance(text, str): + chunks.append(text) + text = "".join(chunks).strip() + if text: + return text + raise RuntimeError("LLM response contained no text content.") + + +class LLMClient: + """Thin wrapper around LiteLLM completion().""" + + def __init__(self, config: LLMConfig) -> None: + self.config = config + self.provider = config.provider + self.model = config.model + self._apply_provider_env() + + def _apply_provider_env(self) -> None: + env_key = _PROVIDER_API_ENV_KEY.get(self.provider) + if env_key and self.config.api_key: + os.environ[env_key] = self.config.api_key + + def _build_completion_args( self, messages: list[dict], temperature: float, max_tokens: int, - thinking_level: str, - ) -> str: - """Call the OpenAI-compatible endpoint.""" - headers: dict[str, str] = {"Content-Type": "application/json"} - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - - payload = { - "model": self.model, + thinking_level: str | None, + completion_kwargs: Mapping[str, object] | None, + ) -> dict: + args: dict = { + "model": _provider_model(self.provider, self.model), "messages": messages, "temperature": temperature, "max_tokens": max_tokens, + "timeout": _TIMEOUT, + "num_retries": 0, # ApplyPilot handles retries centrally below. } - if self._is_gemini: - level = self._normalize_thinking_level(thinking_level) - payload["reasoning_effort"] = _GEMINI_COMPAT_REASONING_EFFORT[level] - - resp = self._client.post( - f"{self.base_url}/chat/completions", - json=payload, - headers=headers, - ) - - # 403 on Gemini compat = model not available on compat layer. - # Raise a specific sentinel so chat() can switch to native API. - if resp.status_code == 403 and self._is_gemini: - raise _GeminiCompatForbidden(resp) - - return self._handle_compat_response(resp) - @staticmethod - def _handle_compat_response(resp: httpx.Response) -> str: - resp.raise_for_status() - data = resp.json() - if resp.status_code == 200: - # Intentionally log the full JSON payload for every successful - # chat/completions response to aid truncation/debug analysis. - log.info("LLM compat full response JSON:\n%s", json.dumps(data, indent=2, ensure_ascii=False)) - return data["choices"][0]["message"]["content"] + if self.provider == "local": + args["model"] = self.model + args["api_base"] = self.config.base_url + if self.config.api_key: + args["api_key"] = self.config.api_key + elif self.provider == "gemini" and thinking_level is not None: + level = _normalize_thinking_level(thinking_level) + args["reasoning_effort"] = _GEMINI_COMPAT_REASONING_EFFORT[level] - # -- public API --------------------------------------------------------- + if completion_kwargs: + args.update(completion_kwargs) + return args def chat( self, messages: list[dict], temperature: float = 0.0, max_tokens: int = 10000, - thinking_level: str = "low", + thinking_level: str | None = None, + completion_kwargs: Mapping[str, object] | None = None, ) -> str: - """Send a chat completion request and return the assistant message text. - - thinking_level applies to Gemini requests and defaults to "low". - """ - # Qwen3 optimization: prepend /no_think to skip chain-of-thought - # reasoning, saving tokens on structured extraction tasks. - if "qwen" in self.model.lower() and messages: - first = messages[0] - if first.get("role") == "user" and not first["content"].startswith("/no_think"): - messages = [{"role": first["role"], "content": f"/no_think\n{first['content']}"}] + messages[1:] + """Send a completion request and return plain text content.""" + try: + from litellm import completion as litellm_completion + except ModuleNotFoundError as exc: + raise RuntimeError( + "LiteLLM is required for AI stages but is not installed. " + "Install dependencies and re-run." + ) from exc for attempt in range(_MAX_RETRIES): try: - # Route to native Gemini if we've already confirmed it's needed - if self._use_native_gemini: - return self._chat_native_gemini(messages, temperature, max_tokens, thinking_level) - - return self._chat_compat(messages, temperature, max_tokens, thinking_level) - - except _GeminiCompatForbidden as exc: - # Model not available on OpenAI-compat layer — switch to native. - log.warning( - "Gemini compat endpoint returned 403 for model '%s'. " - "Switching to native generateContent API. " - "(Preview/experimental models are often compat-only on native.)", - self.model, - ) - self._use_native_gemini = True - # Retry immediately with native — don't count as a rate-limit wait - try: - return self._chat_native_gemini(messages, temperature, max_tokens, thinking_level) - except httpx.HTTPStatusError as native_exc: - raise RuntimeError( - f"Both Gemini endpoints failed. Compat: 403 Forbidden. " - f"Native: {native_exc.response.status_code} — " - f"{native_exc.response.text[:200]}" - ) from native_exc - - except httpx.HTTPStatusError as exc: - resp = exc.response - if resp.status_code in (429, 503) and attempt < _MAX_RETRIES - 1: - # Respect Retry-After header if provided (Gemini sends this). - retry_after = ( - resp.headers.get("Retry-After") - or resp.headers.get("X-RateLimit-Reset-Requests") + response = litellm_completion( + **self._build_completion_args( + messages=messages, + temperature=temperature, + max_tokens=max_tokens, + thinking_level=thinking_level, + completion_kwargs=completion_kwargs, ) - if retry_after: - try: - wait = float(retry_after) - except (ValueError, TypeError): - wait = _RATE_LIMIT_BASE_WAIT * (2 ** attempt) - else: - wait = min(_RATE_LIMIT_BASE_WAIT * (2 ** attempt), 60) - + ) + return _extract_text_content(response) + except Exception as exc: # pragma: no cover - provider SDK exception types vary by backend/version. + status_code = _extract_status_code(exc) + if status_code in (429, 503, 529) and attempt < _MAX_RETRIES - 1: + wait = _extract_retry_after(exc) or min(_RATE_LIMIT_BASE_WAIT * (2 ** attempt), 60) log.warning( - "LLM rate limited (HTTP %s). Waiting %ds before retry %d/%d. " - "Tip: Gemini free tier = 15 RPM. Consider a paid account " - "or switching to a local model.", - resp.status_code, wait, attempt + 1, _MAX_RETRIES, + "LLM rate limited (HTTP %s). Waiting %ds before retry %d/%d.", + status_code, wait, attempt + 1, _MAX_RETRIES, ) time.sleep(wait) continue - raise - - except httpx.TimeoutException: - if attempt < _MAX_RETRIES - 1: + if _is_timeout_error(exc) and attempt < _MAX_RETRIES - 1: wait = min(_RATE_LIMIT_BASE_WAIT * (2 ** attempt), 60) log.warning( "LLM request timed out, retrying in %ds (attempt %d/%d)", @@ -312,7 +331,7 @@ def chat( ) time.sleep(wait) continue - raise + raise RuntimeError(f"LLM request failed ({self.provider}/{self.model}): {exc}") from exc raise RuntimeError("LLM request failed after all retries") @@ -321,19 +340,9 @@ def ask(self, prompt: str, **kwargs) -> str: return self.chat([{"role": "user", "content": prompt}], **kwargs) def close(self) -> None: - self._client.close() - - -class _GeminiCompatForbidden(Exception): - """Sentinel: Gemini OpenAI-compat returned 403. Switch to native API.""" - def __init__(self, response: httpx.Response) -> None: - self.response = response - super().__init__(f"Gemini compat 403: {response.text[:200]}") - + """No-op. LiteLLM completion() is stateless per call.""" + return None -# --------------------------------------------------------------------------- -# Singleton -# --------------------------------------------------------------------------- _instance: LLMClient | None = None @@ -342,7 +351,13 @@ def get_client() -> LLMClient: """Return (or create) the module-level LLMClient singleton.""" global _instance if _instance is None: - base_url, model, api_key = _detect_provider() - log.info("LLM provider: %s model: %s", base_url, model) - _instance = LLMClient(base_url, model, api_key) + try: + from applypilot.config import load_env + + load_env() + except ModuleNotFoundError: + log.debug("python-dotenv not installed; skipping .env auto-load in llm.get_client().") + config = resolve_llm_config() + log.info("LLM provider: %s model: %s", config.provider, config.model) + _instance = LLMClient(config) return _instance diff --git a/src/applypilot/wizard/init.py b/src/applypilot/wizard/init.py index 20bea2a..aad9783 100644 --- a/src/applypilot/wizard/init.py +++ b/src/applypilot/wizard/init.py @@ -288,7 +288,7 @@ def _setup_ai_features() -> None: configured = ", ".join(configured_sources) console.print( f"[yellow]Multiple LLM providers saved ({configured}). " - "Deterministic provider resolution is added in the next phase.[/yellow]" + "Runtime selects one deterministically by precedence.[/yellow]" ) console.print(f"[green]AI configuration saved to {ENV_PATH}[/green]") From 6dba6dcac56bafa79a34bea56929b2184a15d421 Mon Sep 17 00:00:00 2001 From: Trevor Mells Date: Fri, 27 Feb 2026 07:40:46 -0800 Subject: [PATCH 3/8] Stabilize CLI and LiteLLM runtime behavior with tests and tooling updates --- pyproject.toml | 4 ++ src/applypilot/apply/dashboard.py | 2 +- src/applypilot/apply/launcher.py | 4 +- src/applypilot/cli.py | 30 +++++++++++---- src/applypilot/discovery/jobspy.py | 2 +- src/applypilot/discovery/smartextract.py | 10 ++--- src/applypilot/enrichment/detail.py | 6 +-- src/applypilot/llm.py | 15 +++++--- src/applypilot/pipeline.py | 6 +-- src/applypilot/scoring/cover_letter.py | 5 +-- src/applypilot/scoring/scorer.py | 5 +-- src/applypilot/scoring/tailor.py | 7 +--- src/applypilot/view.py | 3 +- src/applypilot/wizard/init.py | 1 - tests/test_llm_resolution.py | 47 ++++++++++++++++++++++++ 15 files changed, 103 insertions(+), 44 deletions(-) create mode 100644 tests/test_llm_resolution.py diff --git a/pyproject.toml b/pyproject.toml index 51268f7..489ada9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,3 +53,7 @@ artifacts = ["src/applypilot/config/*.yaml"] [tool.ruff] target-version = "py311" line-length = 120 + +[tool.pytest.ini_options] +pythonpath = ["src"] +testpaths = ["tests"] diff --git a/src/applypilot/apply/dashboard.py b/src/applypilot/apply/dashboard.py index c286009..ea85373 100644 --- a/src/applypilot/apply/dashboard.py +++ b/src/applypilot/apply/dashboard.py @@ -7,7 +7,7 @@ import logging import threading import time -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime from pathlib import Path diff --git a/src/applypilot/apply/launcher.py b/src/applypilot/apply/launcher.py index 341a11a..e726ae1 100644 --- a/src/applypilot/apply/launcher.py +++ b/src/applypilot/apply/launcher.py @@ -25,7 +25,7 @@ from applypilot import config from applypilot.database import get_connection -from applypilot.apply import chrome, dashboard, prompt as prompt_mod +from applypilot.apply import prompt as prompt_mod from applypilot.apply.chrome import ( launch_chrome, cleanup_worker, kill_all_chrome, reset_worker_dir, cleanup_on_exit, _kill_process_tree, @@ -125,7 +125,7 @@ def acquire_job(target_url: str | None = None, min_score: int = 7, params.extend(blocked_sites) url_clauses = "" if blocked_patterns: - url_clauses = " ".join(f"AND url NOT LIKE ?" for _ in blocked_patterns) + url_clauses = " ".join("AND url NOT LIKE ?" for _ in blocked_patterns) params.extend(blocked_patterns) row = conn.execute(f""" SELECT url, title, site, application_url, tailored_resume_path, diff --git a/src/applypilot/cli.py b/src/applypilot/cli.py index d524174..eaeed7f 100644 --- a/src/applypilot/cli.py +++ b/src/applypilot/cli.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import os from typing import Optional import typer @@ -11,11 +12,24 @@ from applypilot import __version__ -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(levelname)s - %(message)s", - datefmt="%H:%M:%S", -) + +def _configure_logging() -> None: + """Set consistent logging output for CLI runs.""" + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%H:%M:%S", + ) + + # Keep LiteLLM internals quiet by default; warnings/errors still surface. + for name in ("LiteLLM", "litellm"): + noisy = logging.getLogger(name) + noisy.handlers.clear() + noisy.setLevel(logging.WARNING) + noisy.propagate = True + + +_configure_logging() app = typer.Typer( name="applypilot", @@ -211,7 +225,7 @@ def apply( raise typer.Exit(code=1) if gen: - from applypilot.apply.launcher import gen_prompt, BASE_CDP_PORT + from applypilot.apply.launcher import gen_prompt target = url or "" if not target: console.print("[red]--gen requires --url to specify which job.[/red]") @@ -222,7 +236,7 @@ def apply( raise typer.Exit(code=1) mcp_path = _profile_path.parent / ".mcp-apply-0.json" console.print(f"[green]Wrote prompt to:[/green] {prompt_file}") - console.print(f"\n[bold]Run manually:[/bold]") + console.print("\n[bold]Run manually:[/bold]") console.print( f" claude --model {model} -p " f"--mcp-config {mcp_path} " @@ -338,7 +352,7 @@ def doctor() -> None: import shutil from applypilot.config import ( load_env, PROFILE_PATH, RESUME_PATH, RESUME_PDF_PATH, - SEARCH_CONFIG_PATH, ENV_PATH, get_chrome_path, + SEARCH_CONFIG_PATH, get_chrome_path, ) load_env() diff --git a/src/applypilot/discovery/jobspy.py b/src/applypilot/discovery/jobspy.py index b5e54ff..ce0c4c8 100644 --- a/src/applypilot/discovery/jobspy.py +++ b/src/applypilot/discovery/jobspy.py @@ -15,7 +15,7 @@ from jobspy import scrape_jobs from applypilot import config -from applypilot.database import get_connection, init_db, store_jobs +from applypilot.database import get_connection, init_db log = logging.getLogger(__name__) diff --git a/src/applypilot/discovery/smartextract.py b/src/applypilot/discovery/smartextract.py index cf49a9a..43c50c7 100644 --- a/src/applypilot/discovery/smartextract.py +++ b/src/applypilot/discovery/smartextract.py @@ -20,17 +20,15 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone -from pathlib import Path from urllib.parse import quote_plus -import httpx import yaml from bs4 import BeautifulSoup from playwright.sync_api import sync_playwright from applypilot import config from applypilot.config import CONFIG_DIR -from applypilot.database import get_connection, init_db, store_jobs, get_stats +from applypilot.database import init_db, get_stats from applypilot.llm import get_client log = logging.getLogger(__name__) @@ -393,7 +391,7 @@ def judge_api_responses(api_responses: list[dict]) -> list[dict]: ) try: - raw = client.ask(prompt, temperature=0.0, max_tokens=1024) + raw = client.ask(prompt, max_tokens=1024) verdict = extract_json(raw) is_relevant = verdict.get("relevant", False) reason = verdict.get("reason", "?") @@ -424,7 +422,7 @@ def format_strategy_briefing(intel: dict) -> str: sections.append(f"\nJSON-LD: {len(job_postings)} JobPosting entries found (usable!)") sections.append(f"First JobPosting:\n{json.dumps(job_postings[0], indent=2)[:3000]}") else: - sections.append(f"\nJSON-LD: NO JobPosting entries (json_ld strategy will NOT work)") + sections.append("\nJSON-LD: NO JobPosting entries (json_ld strategy will NOT work)") if other: types = [j.get("@type", "?") if isinstance(j, dict) else "?" for j in other] sections.append(f"Other JSON-LD types (NOT job data): {types}") @@ -642,7 +640,7 @@ def ask_llm(prompt: str) -> tuple[str, float, dict]: """Send prompt to LLM. Returns (response_text, seconds_taken, metadata).""" client = get_client() t0 = time.time() - text = client.ask(prompt, temperature=0.0, max_tokens=4096) + text = client.ask(prompt, max_tokens=4096) elapsed = time.time() - t0 meta = { "finish_reason": "stop", diff --git a/src/applypilot/enrichment/detail.py b/src/applypilot/enrichment/detail.py index 11b7926..8a79579 100644 --- a/src/applypilot/enrichment/detail.py +++ b/src/applypilot/enrichment/detail.py @@ -22,9 +22,7 @@ from bs4 import BeautifulSoup from playwright.sync_api import sync_playwright -from applypilot import config -from applypilot.config import DB_PATH -from applypilot.database import get_connection, init_db, ensure_columns +from applypilot.database import init_db from applypilot.llm import get_client log = logging.getLogger(__name__) @@ -465,7 +463,7 @@ def extract_with_llm(page, url: str) -> dict: try: client = get_client() t0 = time.time() - raw = client.ask(prompt, temperature=0.0, max_tokens=4096) + raw = client.ask(prompt, max_tokens=4096) elapsed = time.time() - t0 log.info("LLM: %d chars in, %.1fs", len(prompt), elapsed) diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index c60aed8..bc758fc 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -257,7 +257,7 @@ def _apply_provider_env(self) -> None: def _build_completion_args( self, messages: list[dict], - temperature: float, + temperature: float | None, max_tokens: int, thinking_level: str | None, completion_kwargs: Mapping[str, object] | None, @@ -265,11 +265,12 @@ def _build_completion_args( args: dict = { "model": _provider_model(self.provider, self.model), "messages": messages, - "temperature": temperature, "max_tokens": max_tokens, "timeout": _TIMEOUT, "num_retries": 0, # ApplyPilot handles retries centrally below. } + if temperature is not None: + args["temperature"] = temperature if self.provider == "local": args["model"] = self.model @@ -287,23 +288,27 @@ def _build_completion_args( def chat( self, messages: list[dict], - temperature: float = 0.0, + temperature: float | None = None, max_tokens: int = 10000, thinking_level: str | None = None, completion_kwargs: Mapping[str, object] | None = None, ) -> str: """Send a completion request and return plain text content.""" try: - from litellm import completion as litellm_completion + import litellm except ModuleNotFoundError as exc: raise RuntimeError( "LiteLLM is required for AI stages but is not installed. " "Install dependencies and re-run." ) from exc + # Suppress LiteLLM's verbose multiline info logs (e.g. completion() traces). + litellm.set_verbose = False + litellm.suppress_debug_info = True + for attempt in range(_MAX_RETRIES): try: - response = litellm_completion( + response = litellm.completion( **self._build_completion_args( messages=messages, temperature=temperature, diff --git a/src/applypilot/pipeline.py b/src/applypilot/pipeline.py index 29881c5..8ae30ab 100644 --- a/src/applypilot/pipeline.py +++ b/src/applypilot/pipeline.py @@ -384,7 +384,7 @@ def _run_streaming(ordered: list[str], min_score: int, workers: int = 1, stop_event = threading.Event() pipeline_start = time.time() - console.print(f"\n [bold cyan]STREAMING MODE[/bold cyan] — stages run concurrently") + console.print("\n [bold cyan]STREAMING MODE[/bold cyan] — stages run concurrently") console.print(f" Poll interval: {_STREAM_POLL_INTERVAL}s\n") # Mark stages NOT in `ordered` as done so downstream doesn't wait for them @@ -492,7 +492,7 @@ def run_pipeline( for name in ordered: meta = STAGE_META[name] console.print(f" {name:<12s} {meta['desc']}") - console.print(f"\n No changes made.") + console.print("\n No changes made.") return {"stages": [], "errors": {}, "elapsed": 0.0} # Execute @@ -527,7 +527,7 @@ def run_pipeline( # Final DB stats final = get_stats() - console.print(f"\n [bold]DB Final State:[/bold]") + console.print("\n [bold]DB Final State:[/bold]") console.print(f" Total jobs: {final['total']}") console.print(f" With desc: {final['with_description']}") console.print(f" Scored: {final['scored']}") diff --git a/src/applypilot/scoring/cover_letter.py b/src/applypilot/scoring/cover_letter.py index bbbb053..77045b5 100644 --- a/src/applypilot/scoring/cover_letter.py +++ b/src/applypilot/scoring/cover_letter.py @@ -5,14 +5,13 @@ profile at runtime. No hardcoded personal information. """ -import json import logging import re import time from datetime import datetime, timezone from applypilot.config import COVER_LETTER_DIR, RESUME_PATH, load_profile -from applypilot.database import get_connection, get_jobs_by_stage +from applypilot.database import get_connection from applypilot.llm import get_client from applypilot.scoring.validator import ( BANNED_WORDS, @@ -165,7 +164,7 @@ def generate_cover_letter( )}, ] - letter = client.chat(messages, max_tokens=10000, temperature=0.7) + letter = client.chat(messages, max_tokens=10000) letter = sanitize_text(letter) # auto-fix em dashes, smart quotes letter = _strip_preamble(letter) # remove any "Here is the letter:" prefix diff --git a/src/applypilot/scoring/scorer.py b/src/applypilot/scoring/scorer.py index 97692d5..42efda3 100644 --- a/src/applypilot/scoring/scorer.py +++ b/src/applypilot/scoring/scorer.py @@ -5,13 +5,12 @@ profile and resume file. """ -import json import logging import re import time from datetime import datetime, timezone -from applypilot.config import RESUME_PATH, load_profile +from applypilot.config import RESUME_PATH from applypilot.database import get_connection, get_jobs_by_stage from applypilot.llm import get_client @@ -94,7 +93,7 @@ def score_job(resume_text: str, job: dict) -> dict: try: client = get_client() - response = client.chat(messages, max_tokens=512, temperature=0.2) + response = client.chat(messages, max_tokens=512) return _parse_score_response(response) except Exception as e: log.error("LLM error scoring job '%s': %s", job.get("title", "?"), e) diff --git a/src/applypilot/scoring/tailor.py b/src/applypilot/scoring/tailor.py index 352fb5f..28f2a37 100644 --- a/src/applypilot/scoring/tailor.py +++ b/src/applypilot/scoring/tailor.py @@ -14,17 +14,14 @@ import re import time from datetime import datetime, timezone -from pathlib import Path from applypilot.config import RESUME_PATH, TAILORED_DIR, load_profile from applypilot.database import get_connection, get_jobs_by_stage from applypilot.llm import get_client from applypilot.scoring.validator import ( BANNED_WORDS, - FABRICATION_WATCHLIST, sanitize_text, validate_json_fields, - validate_tailored_resume, ) log = logging.getLogger(__name__) @@ -326,7 +323,7 @@ def judge_tailored_resume( ] client = get_client() - response = client.chat(messages, max_tokens=512, temperature=0.1) + response = client.chat(messages, max_tokens=512) passed = "VERDICT: PASS" in response.upper() issues = "none" @@ -400,7 +397,7 @@ def tailor_resume( {"role": "user", "content": f"ORIGINAL RESUME:\n{resume_text}\n\n---\n\nTARGET JOB:\n{job_text}\n\nReturn the JSON:"}, ] - raw = client.chat(messages, max_tokens=2048, temperature=0.4) + raw = client.chat(messages, max_tokens=2048) # Parse JSON from response try: diff --git a/src/applypilot/view.py b/src/applypilot/view.py index ff42fec..82be192 100644 --- a/src/applypilot/view.py +++ b/src/applypilot/view.py @@ -10,14 +10,13 @@ from __future__ import annotations -import os import webbrowser from html import escape from pathlib import Path from rich.console import Console -from applypilot.config import APP_DIR, DB_PATH +from applypilot.config import APP_DIR from applypilot.database import get_connection console = Console() diff --git a/src/applypilot/wizard/init.py b/src/applypilot/wizard/init.py index aad9783..9367f91 100644 --- a/src/applypilot/wizard/init.py +++ b/src/applypilot/wizard/init.py @@ -13,7 +13,6 @@ import shutil from pathlib import Path -import typer from rich.console import Console from rich.panel import Panel from rich.prompt import Confirm, Prompt diff --git a/tests/test_llm_resolution.py b/tests/test_llm_resolution.py new file mode 100644 index 0000000..96c8c2a --- /dev/null +++ b/tests/test_llm_resolution.py @@ -0,0 +1,47 @@ +import logging + +import pytest + +from applypilot.llm import resolve_llm_config + + +def test_only_gemini_api_key_selects_gemini() -> None: + cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key"}) + assert cfg.provider == "gemini" + + +def test_only_openai_api_key_selects_openai() -> None: + cfg = resolve_llm_config({"OPENAI_API_KEY": "o-key"}) + assert cfg.provider == "openai" + +def test_llm_url_with_keys_selects_local() -> None: + cfg = resolve_llm_config( + { + "LLM_URL": "http://127.0.0.1:8080/v1", + "GEMINI_API_KEY": "g-key", + "OPENAI_API_KEY": "o-key", + "ANTHROPIC_API_KEY": "a-key", + } + ) + assert cfg.provider == "local" + + +def test_multiple_keys_selects_deterministically_and_warns(caplog: pytest.LogCaptureFixture) -> None: + with caplog.at_level(logging.WARNING): + cfg = resolve_llm_config( + { + "GEMINI_API_KEY": "g-key", + "OPENAI_API_KEY": "o-key", + "ANTHROPIC_API_KEY": "a-key", + } + ) + assert cfg.provider == "gemini" + assert any( + "Multiple LLM providers configured" in rec.message and "Using 'gemini' based on precedence" in rec.message + for rec in caplog.records + ) + + +def test_missing_everything_raises_clear_error() -> None: + with pytest.raises(RuntimeError, match="No LLM provider configured"): + resolve_llm_config({}) From 2aafd3d5aaf9743d5de24197ff483eaca3269644 Mon Sep 17 00:00:00 2001 From: Trevor Mells Date: Fri, 27 Feb 2026 07:40:49 -0800 Subject: [PATCH 4/8] Migrate to Responses API and standardize auth/model handling --- README.md | 1 + pyproject.toml | 2 +- src/applypilot/discovery/smartextract.py | 4 +- src/applypilot/enrichment/detail.py | 2 +- src/applypilot/llm.py | 208 +++++++++-------------- src/applypilot/scoring/cover_letter.py | 2 +- src/applypilot/scoring/scorer.py | 2 +- src/applypilot/scoring/tailor.py | 4 +- src/applypilot/wizard/init.py | 5 +- tests/test_llm_client.py | 36 ++++ tests/test_llm_resolution.py | 18 ++ 11 files changed, 144 insertions(+), 140 deletions(-) create mode 100644 tests/test_llm_client.py diff --git a/README.md b/README.md index 140e43c..0f18a22 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,7 @@ Each stage is independent. Run them all or pick what you need. | Claude Code CLI | Auto-apply | Install from [claude.ai/code](https://claude.ai/code) | **Gemini API key is free.** Get one at [aistudio.google.com](https://aistudio.google.com). OpenAI, Claude, and local models (Ollama/llama.cpp/vLLM) are also supported. +ApplyPilot uses Gemini through LiteLLM's native Gemini provider path, and Gemini API version routing is owned by LiteLLM. ### Optional diff --git a/pyproject.toml b/pyproject.toml index 489ada9..622aa10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ dependencies = [ "typer>=0.9.0", "rich>=13.0", - "litellm", + "litellm~=1.63.0", "httpx>=0.24", "beautifulsoup4>=4.12", "playwright>=1.40", diff --git a/src/applypilot/discovery/smartextract.py b/src/applypilot/discovery/smartextract.py index 43c50c7..c8b5300 100644 --- a/src/applypilot/discovery/smartextract.py +++ b/src/applypilot/discovery/smartextract.py @@ -391,7 +391,7 @@ def judge_api_responses(api_responses: list[dict]) -> list[dict]: ) try: - raw = client.ask(prompt, max_tokens=1024) + raw = client.ask(prompt, max_output_tokens=1024) verdict = extract_json(raw) is_relevant = verdict.get("relevant", False) reason = verdict.get("reason", "?") @@ -640,7 +640,7 @@ def ask_llm(prompt: str) -> tuple[str, float, dict]: """Send prompt to LLM. Returns (response_text, seconds_taken, metadata).""" client = get_client() t0 = time.time() - text = client.ask(prompt, max_tokens=4096) + text = client.ask(prompt, max_output_tokens=4096) elapsed = time.time() - t0 meta = { "finish_reason": "stop", diff --git a/src/applypilot/enrichment/detail.py b/src/applypilot/enrichment/detail.py index 8a79579..c76081d 100644 --- a/src/applypilot/enrichment/detail.py +++ b/src/applypilot/enrichment/detail.py @@ -463,7 +463,7 @@ def extract_with_llm(page, url: str) -> dict: try: client = get_client() t0 = time.time() - raw = client.ask(prompt, max_tokens=4096) + raw = client.ask(prompt, max_output_tokens=4096) elapsed = time.time() - t0 log.info("LLM: %d chars in, %.1fs", len(prompt), elapsed) diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index bc758fc..b71396a 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -7,6 +7,11 @@ LLM_URL -> Local OpenAI-compatible endpoint LLM_MODEL env var overrides the model name for any provider. + +Gemini provider behavior: + - Uses LiteLLM's native Gemini provider path (no OpenAI-compat base URL). + - Google v1 is considered stable while v1beta can change; endpoint version choice is delegated to LiteLLM. + - Provider is inferred from configured credentials; model prefixes are handled internally. """ from __future__ import annotations @@ -15,13 +20,13 @@ from dataclasses import dataclass import logging import os -import time + +import litellm log = logging.getLogger(__name__) _OPENAI_BASE = "https://api.openai.com/v1" _ANTHROPIC_BASE = "https://api.anthropic.com/v1" -_GEMINI_BASE = "https://generativelanguage.googleapis.com/v1beta/openai" _PROVIDER_API_ENV_KEY = { "gemini": "GEMINI_API_KEY", "openai": "OPENAI_API_KEY", @@ -36,16 +41,8 @@ _MAX_RETRIES = 5 _TIMEOUT = 120 # seconds -_RATE_LIMIT_BASE_WAIT = 10 - -_GEMINI_THINKING_LEVELS = {"none", "minimal", "low", "medium", "high"} -_GEMINI_COMPAT_REASONING_EFFORT = { - "none": "none", - "minimal": "low", - "low": "low", - "medium": "high", - "high": "high", -} + +_THINKING_LEVELS = {"none", "low", "medium", "high"} @dataclass(frozen=True) @@ -67,7 +64,7 @@ def _env_get(env: Mapping[str, str], key: str) -> str: def _normalize_thinking_level(thinking_level: str) -> str: level = (thinking_level or "low").strip().lower() - if level not in _GEMINI_THINKING_LEVELS: + if level not in _THINKING_LEVELS: log.warning("Invalid thinking_level '%s', defaulting to 'low'.", thinking_level) return "low" return level @@ -85,6 +82,25 @@ def _default_model(provider: str) -> str: return _DEFAULT_MODEL_BY_PROVIDER[provider] +def _normalize_model_for_provider(provider: str, model: str) -> str: + normalized = model.strip() + if provider == "local": + return normalized + if normalized.startswith("models/"): + normalized = normalized.split("/", 1)[1] + + provider_prefix = f"{provider}/" + if normalized.startswith(provider_prefix): + return normalized[len(provider_prefix):] + + for other in ("gemini", "openai", "anthropic", "vertex_ai"): + other_prefix = f"{other}/" + if normalized.startswith(other_prefix): + return normalized.split("/", 1)[1] + + return normalized + + def resolve_llm_config(env: Mapping[str, str] | None = None) -> LLMConfig: """Resolve provider configuration from environment with deterministic precedence.""" env_map = env if env is not None else os.environ @@ -146,6 +162,7 @@ def resolve_llm_config(env: Mapping[str, str] | None = None) -> LLMConfig: chosen, ) model = model_override or _default_model(chosen) + model = _normalize_model_for_provider(chosen, model) if chosen == "local": return LLMConfig( @@ -157,7 +174,7 @@ def resolve_llm_config(env: Mapping[str, str] | None = None) -> LLMConfig: if chosen == "gemini": return LLMConfig( provider="gemini", - base_url=_GEMINI_BASE, + base_url="", model=model, api_key=gemini_key, ) @@ -176,70 +193,6 @@ def resolve_llm_config(env: Mapping[str, str] | None = None) -> LLMConfig: ) -def _extract_status_code(exc: Exception) -> int | None: - status_code = getattr(exc, "status_code", None) - if isinstance(status_code, int): - return status_code - response = getattr(exc, "response", None) - if response is not None: - status_code = getattr(response, "status_code", None) - if isinstance(status_code, int): - return status_code - return None - - -def _extract_retry_after(exc: Exception) -> float | None: - response = getattr(exc, "response", None) - if response is None: - return None - headers = getattr(response, "headers", {}) or {} - retry_after = headers.get("Retry-After") or headers.get("X-RateLimit-Reset-Requests") - if not retry_after: - return None - try: - return float(retry_after) - except (TypeError, ValueError): - return None - - -def _is_timeout_error(exc: Exception) -> bool: - if isinstance(exc, TimeoutError): - return True - text = str(exc).lower() - return "timed out" in text or "timeout" in text - - -def _extract_text_content(resp: object) -> str: - choices = getattr(resp, "choices", None) - if choices is None and isinstance(resp, dict): - choices = resp.get("choices", []) - if not choices: - raise RuntimeError("LLM response contained no choices.") - - first = choices[0] - if isinstance(first, dict): - message = first.get("message", {}) - else: - message = getattr(first, "message", {}) - - content = message.get("content") if isinstance(message, dict) else getattr(message, "content", None) - if isinstance(content, str): - return content - if isinstance(content, list): - chunks: list[str] = [] - for part in content: - if isinstance(part, str): - chunks.append(part) - elif isinstance(part, dict): - text = part.get("text") - if isinstance(text, str): - chunks.append(text) - text = "".join(chunks).strip() - if text: - return text - raise RuntimeError("LLM response contained no text content.") - - class LLMClient: """Thin wrapper around LiteLLM completion().""" @@ -258,16 +211,16 @@ def _build_completion_args( self, messages: list[dict], temperature: float | None, - max_tokens: int, + max_output_tokens: int, thinking_level: str | None, - completion_kwargs: Mapping[str, object] | None, + response_kwargs: Mapping[str, object] | None, ) -> dict: args: dict = { "model": _provider_model(self.provider, self.model), "messages": messages, - "max_tokens": max_tokens, + "max_tokens": max_output_tokens, "timeout": _TIMEOUT, - "num_retries": 0, # ApplyPilot handles retries centrally below. + "num_retries": _MAX_RETRIES, # Delegate retry handling to LiteLLM. } if temperature is not None: args["temperature"] = temperature @@ -277,68 +230,61 @@ def _build_completion_args( args["api_base"] = self.config.base_url if self.config.api_key: args["api_key"] = self.config.api_key - elif self.provider == "gemini" and thinking_level is not None: + if thinking_level is not None: level = _normalize_thinking_level(thinking_level) - args["reasoning_effort"] = _GEMINI_COMPAT_REASONING_EFFORT[level] + args["reasoning_effort"] = level - if completion_kwargs: - args.update(completion_kwargs) + if response_kwargs: + args.update(response_kwargs) return args def chat( self, messages: list[dict], temperature: float | None = None, - max_tokens: int = 10000, + max_output_tokens: int = 10000, thinking_level: str | None = None, - completion_kwargs: Mapping[str, object] | None = None, + response_kwargs: Mapping[str, object] | None = None, ) -> str: """Send a completion request and return plain text content.""" + # Suppress LiteLLM's verbose multiline info logs (e.g. request traces). + if hasattr(litellm, 'set_verbose'): + litellm.set_verbose(False) + if hasattr(litellm, 'suppress_debug_info'): + litellm.suppress_debug_info = True + try: - import litellm - except ModuleNotFoundError as exc: - raise RuntimeError( - "LiteLLM is required for AI stages but is not installed. " - "Install dependencies and re-run." - ) from exc - - # Suppress LiteLLM's verbose multiline info logs (e.g. completion() traces). - litellm.set_verbose = False - litellm.suppress_debug_info = True - - for attempt in range(_MAX_RETRIES): - try: - response = litellm.completion( - **self._build_completion_args( - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - thinking_level=thinking_level, - completion_kwargs=completion_kwargs, - ) + response = litellm.completion( + **self._build_completion_args( + messages=messages, + temperature=temperature, + max_output_tokens=max_output_tokens, + thinking_level=thinking_level, + response_kwargs=response_kwargs, ) - return _extract_text_content(response) - except Exception as exc: # pragma: no cover - provider SDK exception types vary by backend/version. - status_code = _extract_status_code(exc) - if status_code in (429, 503, 529) and attempt < _MAX_RETRIES - 1: - wait = _extract_retry_after(exc) or min(_RATE_LIMIT_BASE_WAIT * (2 ** attempt), 60) - log.warning( - "LLM rate limited (HTTP %s). Waiting %ds before retry %d/%d.", - status_code, wait, attempt + 1, _MAX_RETRIES, - ) - time.sleep(wait) - continue - if _is_timeout_error(exc) and attempt < _MAX_RETRIES - 1: - wait = min(_RATE_LIMIT_BASE_WAIT * (2 ** attempt), 60) - log.warning( - "LLM request timed out, retrying in %ds (attempt %d/%d)", - wait, attempt + 1, _MAX_RETRIES, - ) - time.sleep(wait) - continue - raise RuntimeError(f"LLM request failed ({self.provider}/{self.model}): {exc}") from exc - - raise RuntimeError("LLM request failed after all retries") + ) + + choices = getattr(response, "choices", None) + if not choices: + raise RuntimeError("LLM response contained no choices.") + content = choices[0].message.content + + if isinstance(content, str): + text = content.strip() + elif isinstance(content, list): + text = "".join( + part if isinstance(part, str) else part.get("text", "") + for part in content + if isinstance(part, (str, dict)) + ).strip() + else: + text = "" + + if not text: + raise RuntimeError("LLM response contained no text content.") + return text + except Exception as exc: # pragma: no cover - provider SDK exception types vary by backend/version. + raise RuntimeError(f"LLM request failed ({self.provider}/{self.model}): {exc}") from exc def ask(self, prompt: str, **kwargs) -> str: """Convenience: single user prompt -> assistant response.""" diff --git a/src/applypilot/scoring/cover_letter.py b/src/applypilot/scoring/cover_letter.py index 77045b5..06e9333 100644 --- a/src/applypilot/scoring/cover_letter.py +++ b/src/applypilot/scoring/cover_letter.py @@ -164,7 +164,7 @@ def generate_cover_letter( )}, ] - letter = client.chat(messages, max_tokens=10000) + letter = client.chat(messages, max_output_tokens=10000) letter = sanitize_text(letter) # auto-fix em dashes, smart quotes letter = _strip_preamble(letter) # remove any "Here is the letter:" prefix diff --git a/src/applypilot/scoring/scorer.py b/src/applypilot/scoring/scorer.py index 42efda3..61d6e5e 100644 --- a/src/applypilot/scoring/scorer.py +++ b/src/applypilot/scoring/scorer.py @@ -93,7 +93,7 @@ def score_job(resume_text: str, job: dict) -> dict: try: client = get_client() - response = client.chat(messages, max_tokens=512) + response = client.chat(messages, max_output_tokens=512) return _parse_score_response(response) except Exception as e: log.error("LLM error scoring job '%s': %s", job.get("title", "?"), e) diff --git a/src/applypilot/scoring/tailor.py b/src/applypilot/scoring/tailor.py index 28f2a37..aaf4021 100644 --- a/src/applypilot/scoring/tailor.py +++ b/src/applypilot/scoring/tailor.py @@ -323,7 +323,7 @@ def judge_tailored_resume( ] client = get_client() - response = client.chat(messages, max_tokens=512) + response = client.chat(messages, max_output_tokens=512) passed = "VERDICT: PASS" in response.upper() issues = "none" @@ -397,7 +397,7 @@ def tailor_resume( {"role": "user", "content": f"ORIGINAL RESUME:\n{resume_text}\n\n---\n\nTARGET JOB:\n{job_text}\n\nReturn the JSON:"}, ] - raw = client.chat(messages, max_tokens=2048) + raw = client.chat(messages, max_output_tokens=2048) # Parse JSON from response try: diff --git a/src/applypilot/wizard/init.py b/src/applypilot/wizard/init.py index 9367f91..88cbea1 100644 --- a/src/applypilot/wizard/init.py +++ b/src/applypilot/wizard/init.py @@ -277,7 +277,10 @@ def _setup_ai_features() -> None: console.print("[dim]No AI provider configured. You can add one later with [bold]applypilot init[/bold].[/dim]") return - model = Prompt.ask("LLM model override (optional, leave blank to use provider defaults)", default="").strip() + model = Prompt.ask( + "LLM model override (optional, leave blank to use provider defaults)", + default="", + ).strip() if model: env_lines.append(f"LLM_MODEL={model}") diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py new file mode 100644 index 0000000..0e6fe6f --- /dev/null +++ b/tests/test_llm_client.py @@ -0,0 +1,36 @@ +from applypilot.llm import LLMClient, LLMConfig, _normalize_thinking_level + + +def test_normalize_thinking_level_accepts_supported_levels() -> None: + assert _normalize_thinking_level("none") == "none" + assert _normalize_thinking_level("low") == "low" + assert _normalize_thinking_level("medium") == "medium" + assert _normalize_thinking_level("high") == "high" + + +def test_normalize_thinking_level_defaults_minimal_to_low() -> None: + assert _normalize_thinking_level("minimal") == "low" + + +def test_normalize_thinking_level_defaults_invalid_value_to_low() -> None: + assert _normalize_thinking_level("max") == "low" + + +def test_build_completion_args_applies_reasoning_effort_for_openai() -> None: + client = LLMClient( + LLMConfig( + provider="openai", + base_url="https://api.openai.com/v1", + model="gpt-4o-mini", + api_key="test-key", + ) + ) + args = client._build_completion_args( + messages=[{"role": "user", "content": "hello"}], + temperature=None, + max_output_tokens=128, + thinking_level="medium", + response_kwargs=None, + ) + assert args["reasoning_effort"] == "medium" + assert args["max_tokens"] == 128 diff --git a/tests/test_llm_resolution.py b/tests/test_llm_resolution.py index 96c8c2a..d6db6a1 100644 --- a/tests/test_llm_resolution.py +++ b/tests/test_llm_resolution.py @@ -8,12 +8,30 @@ def test_only_gemini_api_key_selects_gemini() -> None: cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key"}) assert cfg.provider == "gemini" + assert cfg.base_url == "" + assert cfg.model == "gemini-2.0-flash" def test_only_openai_api_key_selects_openai() -> None: cfg = resolve_llm_config({"OPENAI_API_KEY": "o-key"}) assert cfg.provider == "openai" + +def test_gemini_model_override_without_prefix_is_normalized() -> None: + cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key", "LLM_MODEL": "gemini-2.5-flash"}) + assert cfg.model == "gemini-2.5-flash" + + +def test_gemini_model_override_google_models_prefix_is_normalized() -> None: + cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key", "LLM_MODEL": "models/gemini-2.5-flash"}) + assert cfg.model == "gemini-2.5-flash" + + +def test_gemini_model_override_gemini_prefix_is_stripped() -> None: + cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key", "LLM_MODEL": "gemini/gemini-2.5-flash"}) + assert cfg.model == "gemini-2.5-flash" + + def test_llm_url_with_keys_selects_local() -> None: cfg = resolve_llm_config( { From e1c06eda5f94d31e8c48880389ff2cec26c70b48 Mon Sep 17 00:00:00 2001 From: Trevor Mells Date: Fri, 27 Feb 2026 07:40:53 -0800 Subject: [PATCH 5/8] Remove provider side effects and tighten runtime verification defaults --- README.md | 6 +++ pyproject.toml | 3 ++ src/applypilot/cli.py | 2 +- src/applypilot/llm.py | 66 +++++---------------------- tests/test_gemini_smoke.py | 49 ++++++++++++++++++++ tests/test_llm_client.py | 88 +++++++++++++++++++++++++++++------- tests/test_llm_resolution.py | 2 +- 7 files changed, 143 insertions(+), 73 deletions(-) create mode 100644 tests/test_gemini_smoke.py diff --git a/README.md b/README.md index 0f18a22..5bc20b2 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,12 @@ ApplyPilot uses Gemini through LiteLLM's native Gemini provider path, and Gemini |-----------|-------------| | CapSolver API key | Solves CAPTCHAs during auto-apply (hCaptcha, reCAPTCHA, Turnstile, FunCaptcha). Without it, CAPTCHA-blocked applications just fail gracefully | +### Gemini Smoke Check (optional) + +```bash +GEMINI_API_KEY=your_key_here pytest -m smoke -q tests/test_gemini_smoke.py +``` + > **Note:** python-jobspy is installed separately with `--no-deps` because it pins an exact numpy version in its metadata that conflicts with pip's resolver. It works fine with modern numpy at runtime. --- diff --git a/pyproject.toml b/pyproject.toml index 622aa10..21a3afd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,3 +57,6 @@ line-length = 120 [tool.pytest.ini_options] pythonpath = ["src"] testpaths = ["tests"] +markers = [ + "smoke: live-provider smoke tests that require external API keys", +] diff --git a/src/applypilot/cli.py b/src/applypilot/cli.py index eaeed7f..85a859c 100644 --- a/src/applypilot/cli.py +++ b/src/applypilot/cli.py @@ -398,7 +398,7 @@ def doctor() -> None: try: llm_cfg = resolve_llm_config() if llm_cfg.provider == "local": - results.append(("LLM API key", ok_mark, f"Local: {llm_cfg.base_url} ({llm_cfg.model})")) + results.append(("LLM API key", ok_mark, f"Local: {llm_cfg.api_base} ({llm_cfg.model})")) else: label = { "gemini": "Gemini", diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index b71396a..80f8d21 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -25,13 +25,6 @@ log = logging.getLogger(__name__) -_OPENAI_BASE = "https://api.openai.com/v1" -_ANTHROPIC_BASE = "https://api.anthropic.com/v1" -_PROVIDER_API_ENV_KEY = { - "gemini": "GEMINI_API_KEY", - "openai": "OPENAI_API_KEY", - "anthropic": "ANTHROPIC_API_KEY", -} _DEFAULT_MODEL_BY_PROVIDER = { "local": "local-model", "gemini": "gemini-2.0-flash", @@ -42,15 +35,13 @@ _MAX_RETRIES = 5 _TIMEOUT = 120 # seconds -_THINKING_LEVELS = {"none", "low", "medium", "high"} - @dataclass(frozen=True) class LLMConfig: """Normalized LLM configuration consumed by LLMClient.""" provider: str - base_url: str + api_base: str | None model: str api_key: str @@ -62,14 +53,6 @@ def _env_get(env: Mapping[str, str], key: str) -> str: return str(value).strip() -def _normalize_thinking_level(thinking_level: str) -> str: - level = (thinking_level or "low").strip().lower() - if level not in _THINKING_LEVELS: - log.warning("Invalid thinking_level '%s', defaulting to 'low'.", thinking_level) - return "low" - return level - - def _provider_model(provider: str, model: str) -> str: if provider == "local": return model @@ -167,27 +150,27 @@ def resolve_llm_config(env: Mapping[str, str] | None = None) -> LLMConfig: if chosen == "local": return LLMConfig( provider="local", - base_url=local_url.rstrip("/"), + api_base=local_url.rstrip("/"), model=model, api_key=_env_get(env_map, "LLM_API_KEY"), ) if chosen == "gemini": return LLMConfig( provider="gemini", - base_url="", + api_base=None, model=model, api_key=gemini_key, ) if chosen == "openai": return LLMConfig( provider="openai", - base_url=_OPENAI_BASE, + api_base=None, model=model, api_key=openai_key, ) return LLMConfig( provider="anthropic", - base_url=_ANTHROPIC_BASE, + api_base=None, model=model, api_key=anthropic_key, ) @@ -200,19 +183,12 @@ def __init__(self, config: LLMConfig) -> None: self.config = config self.provider = config.provider self.model = config.model - self._apply_provider_env() - - def _apply_provider_env(self) -> None: - env_key = _PROVIDER_API_ENV_KEY.get(self.provider) - if env_key and self.config.api_key: - os.environ[env_key] = self.config.api_key def _build_completion_args( self, messages: list[dict], temperature: float | None, max_output_tokens: int, - thinking_level: str | None, response_kwargs: Mapping[str, object] | None, ) -> dict: args: dict = { @@ -225,14 +201,13 @@ def _build_completion_args( if temperature is not None: args["temperature"] = temperature + if self.config.api_key: + args["api_key"] = self.config.api_key + if self.provider == "local": args["model"] = self.model - args["api_base"] = self.config.base_url - if self.config.api_key: - args["api_key"] = self.config.api_key - if thinking_level is not None: - level = _normalize_thinking_level(thinking_level) - args["reasoning_effort"] = level + if self.config.api_base: + args["api_base"] = self.config.api_base if response_kwargs: args.update(response_kwargs) @@ -243,15 +218,10 @@ def chat( messages: list[dict], temperature: float | None = None, max_output_tokens: int = 10000, - thinking_level: str | None = None, response_kwargs: Mapping[str, object] | None = None, ) -> str: """Send a completion request and return plain text content.""" - # Suppress LiteLLM's verbose multiline info logs (e.g. request traces). - if hasattr(litellm, 'set_verbose'): - litellm.set_verbose(False) - if hasattr(litellm, 'suppress_debug_info'): - litellm.suppress_debug_info = True + litellm.suppress_debug_info = True try: response = litellm.completion( @@ -259,7 +229,6 @@ def chat( messages=messages, temperature=temperature, max_output_tokens=max_output_tokens, - thinking_level=thinking_level, response_kwargs=response_kwargs, ) ) @@ -267,18 +236,7 @@ def chat( choices = getattr(response, "choices", None) if not choices: raise RuntimeError("LLM response contained no choices.") - content = choices[0].message.content - - if isinstance(content, str): - text = content.strip() - elif isinstance(content, list): - text = "".join( - part if isinstance(part, str) else part.get("text", "") - for part in content - if isinstance(part, (str, dict)) - ).strip() - else: - text = "" + text = response.choices[0].message.content.strip() if not text: raise RuntimeError("LLM response contained no text content.") diff --git a/tests/test_gemini_smoke.py b/tests/test_gemini_smoke.py new file mode 100644 index 0000000..8b732af --- /dev/null +++ b/tests/test_gemini_smoke.py @@ -0,0 +1,49 @@ +import os + +import pytest + +litellm = pytest.importorskip("litellm") + + +def _gemini_smoke_model() -> str: + raw = os.getenv("GEMINI_SMOKE_MODEL", "gemini-2.0-flash").strip() + if raw.startswith("gemini/"): + return raw + if raw.startswith("models/"): + raw = raw.split("/", 1)[1] + return f"gemini/{raw}" + + +def _content_text(content: object) -> str: + if isinstance(content, str): + return content.strip() + if isinstance(content, list): + return "".join( + part if isinstance(part, str) else str(part.get("text", "")) + for part in content + if isinstance(part, (str, dict)) + ).strip() + return "" + + +@pytest.mark.smoke +def test_gemini_smoke_completion_returns_non_empty_content() -> None: + api_key = os.getenv("GEMINI_API_KEY", "").strip() + if not api_key: + pytest.skip("Set GEMINI_API_KEY to run Gemini smoke tests.") + + prompt = os.getenv("GEMINI_SMOKE_PROMPT", "Reply with a single word: ready.") + response = litellm.completion( + model=_gemini_smoke_model(), + api_key=api_key, + messages=[{"role": "user", "content": prompt}], + max_tokens=32, + timeout=60, + num_retries=1, + ) + + choices = getattr(response, "choices", None) + assert choices, "Gemini smoke call returned no choices." + + content = choices[0].message.content + assert _content_text(content), "Gemini smoke call returned empty choices[0].message.content." diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py index 0e6fe6f..e0a6203 100644 --- a/tests/test_llm_client.py +++ b/tests/test_llm_client.py @@ -1,26 +1,26 @@ -from applypilot.llm import LLMClient, LLMConfig, _normalize_thinking_level +import os +from applypilot.llm import LLMClient, LLMConfig -def test_normalize_thinking_level_accepts_supported_levels() -> None: - assert _normalize_thinking_level("none") == "none" - assert _normalize_thinking_level("low") == "low" - assert _normalize_thinking_level("medium") == "medium" - assert _normalize_thinking_level("high") == "high" - -def test_normalize_thinking_level_defaults_minimal_to_low() -> None: - assert _normalize_thinking_level("minimal") == "low" - - -def test_normalize_thinking_level_defaults_invalid_value_to_low() -> None: - assert _normalize_thinking_level("max") == "low" +def test_client_init_does_not_mutate_provider_env(monkeypatch) -> None: + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + LLMClient( + LLMConfig( + provider="openai", + api_base=None, + model="gpt-4o-mini", + api_key="test-key", + ) + ) + assert "OPENAI_API_KEY" not in os.environ -def test_build_completion_args_applies_reasoning_effort_for_openai() -> None: +def test_build_completion_args_does_not_include_reasoning_effort_by_default() -> None: client = LLMClient( LLMConfig( provider="openai", - base_url="https://api.openai.com/v1", + api_base=None, model="gpt-4o-mini", api_key="test-key", ) @@ -29,8 +29,62 @@ def test_build_completion_args_applies_reasoning_effort_for_openai() -> None: messages=[{"role": "user", "content": "hello"}], temperature=None, max_output_tokens=128, - thinking_level="medium", response_kwargs=None, ) - assert args["reasoning_effort"] == "medium" + assert "reasoning_effort" not in args assert args["max_tokens"] == 128 + + +def test_build_completion_args_uses_litellm_native_gemini_model_prefix() -> None: + client = LLMClient( + LLMConfig( + provider="gemini", + api_base=None, + model="gemini-2.0-flash", + api_key="g-key", + ) + ) + args = client._build_completion_args( + messages=[{"role": "user", "content": "hello"}], + temperature=None, + max_output_tokens=64, + response_kwargs=None, + ) + assert args["model"] == "gemini/gemini-2.0-flash" + + +def test_build_completion_args_includes_api_key_for_remote_provider() -> None: + client = LLMClient( + LLMConfig( + provider="gemini", + api_base=None, + model="gemini-2.0-flash", + api_key="g-key", + ) + ) + args = client._build_completion_args( + messages=[{"role": "user", "content": "hello"}], + temperature=None, + max_output_tokens=64, + response_kwargs=None, + ) + assert args["api_key"] == "g-key" + + +def test_build_completion_args_sets_local_api_base_and_api_key() -> None: + client = LLMClient( + LLMConfig( + provider="local", + api_base="http://127.0.0.1:8080/v1", + model="local-model", + api_key="local-key", + ) + ) + args = client._build_completion_args( + messages=[{"role": "user", "content": "hello"}], + temperature=None, + max_output_tokens=64, + response_kwargs=None, + ) + assert args["api_base"] == "http://127.0.0.1:8080/v1" + assert args["api_key"] == "local-key" diff --git a/tests/test_llm_resolution.py b/tests/test_llm_resolution.py index d6db6a1..c1c4fb5 100644 --- a/tests/test_llm_resolution.py +++ b/tests/test_llm_resolution.py @@ -8,7 +8,7 @@ def test_only_gemini_api_key_selects_gemini() -> None: cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key"}) assert cfg.provider == "gemini" - assert cfg.base_url == "" + assert cfg.api_base is None assert cfg.model == "gemini-2.0-flash" From a524b8aff963fd8aa8ad4400033f66f827c32e98 Mon Sep 17 00:00:00 2001 From: Trevor Mells Date: Fri, 27 Feb 2026 07:41:08 -0800 Subject: [PATCH 6/8] Simplify model selection and remove legacy input-shape compatibility paths --- README.md | 4 +- src/applypilot/cli.py | 7 +- src/applypilot/config.py | 19 +- src/applypilot/discovery/smartextract.py | 4 +- src/applypilot/enrichment/detail.py | 2 +- src/applypilot/llm.py | 294 ++++++++++------------- src/applypilot/wizard/init.py | 16 +- tests/test_gemini_smoke.py | 2 +- tests/test_llm_client.py | 115 +++++---- tests/test_llm_resolution.py | 72 +++--- 10 files changed, 263 insertions(+), 272 deletions(-) diff --git a/README.md b/README.md index 5bc20b2..59df888 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ Each stage is independent. Run them all or pick what you need. |-----------|-------------|---------| | Python 3.11+ | Everything | Core runtime | | Node.js 18+ | Auto-apply | Needed for `npx` to run Playwright MCP server | -| LLM API key or local endpoint | Scoring, tailoring, cover letters | Set one of `GEMINI_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `LLM_URL` | +| LLM credentials or local endpoint | Scoring, tailoring, cover letters | Set one of `GEMINI_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `LLM_URL`. Optional: set `LLM_MODEL` (for example `gemini/gemini-3.0-flash`) to override the default model. | | Chrome/Chromium | Auto-apply | Auto-detected on most systems | | Claude Code CLI | Auto-apply | Install from [claude.ai/code](https://claude.ai/code) | @@ -122,7 +122,7 @@ Your personal data in one structured file: contact info, work authorization, com Job search queries, target titles, locations, boards. Run multiple searches with different parameters. ### `.env` -API keys and runtime config: `GEMINI_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `LLM_URL`, `LLM_MODEL`, `CAPSOLVER_API_KEY` (optional). +API keys and runtime config: `GEMINI_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `LLM_URL`, optional `LLM_MODEL`, optional `LLM_API_KEY`, and `CAPSOLVER_API_KEY`. ### Package configs (shipped with ApplyPilot) - `config/employers.yaml` - Workday employer registry (48 preconfigured) diff --git a/src/applypilot/cli.py b/src/applypilot/cli.py index 85a859c..7c770ac 100644 --- a/src/applypilot/cli.py +++ b/src/applypilot/cli.py @@ -397,8 +397,8 @@ def doctor() -> None: try: llm_cfg = resolve_llm_config() - if llm_cfg.provider == "local": - results.append(("LLM API key", ok_mark, f"Local: {llm_cfg.api_base} ({llm_cfg.model})")) + if llm_cfg.api_base: + results.append(("LLM API key", ok_mark, f"Custom endpoint: {llm_cfg.api_base} ({llm_cfg.model})")) else: label = { "gemini": "Gemini", @@ -409,7 +409,8 @@ def doctor() -> None: except RuntimeError: results.append( ("LLM API key", fail_mark, - "Set GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, or LLM_URL in ~/.applypilot/.env") + "Set one of GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, LLM_URL, " + "or set LLM_MODEL with LLM_API_KEY in ~/.applypilot/.env") ) # --- Tier 3 checks --- diff --git a/src/applypilot/config.py b/src/applypilot/config.py index 9067245..090dec6 100644 --- a/src/applypilot/config.py +++ b/src/applypilot/config.py @@ -206,10 +206,14 @@ def get_tier() -> int: """ load_env() - has_llm = any( + has_provider_source = any( os.environ.get(k) for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "LLM_URL") ) + has_model_and_generic_key = bool((os.environ.get("LLM_MODEL") or "").strip()) and bool( + (os.environ.get("LLM_API_KEY") or "").strip() + ) + has_llm = has_provider_source or has_model_and_generic_key if not has_llm: return 1 @@ -241,13 +245,18 @@ def check_tier(required: int, feature: str) -> None: _console = Console(stderr=True) missing: list[str] = [] - if required >= 2 and not any( + has_provider_source = any( os.environ.get(k) for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "LLM_URL") - ): + ) + has_model_and_generic_key = bool((os.environ.get("LLM_MODEL") or "").strip()) and bool( + (os.environ.get("LLM_API_KEY") or "").strip() + ) + if required >= 2 and not (has_provider_source or has_model_and_generic_key): missing.append( - "LLM API key — run [bold]applypilot init[/bold] or set " - "GEMINI_API_KEY / OPENAI_API_KEY / ANTHROPIC_API_KEY / LLM_URL" + "LLM config — run [bold]applypilot init[/bold] or set one of " + "GEMINI_API_KEY / OPENAI_API_KEY / ANTHROPIC_API_KEY / LLM_URL " + "(or set LLM_MODEL with LLM_API_KEY)" ) if required >= 3: if not shutil.which("claude"): diff --git a/src/applypilot/discovery/smartextract.py b/src/applypilot/discovery/smartextract.py index c8b5300..c9bb18c 100644 --- a/src/applypilot/discovery/smartextract.py +++ b/src/applypilot/discovery/smartextract.py @@ -391,7 +391,7 @@ def judge_api_responses(api_responses: list[dict]) -> list[dict]: ) try: - raw = client.ask(prompt, max_output_tokens=1024) + raw = client.chat([{"role": "user", "content": prompt}], max_output_tokens=1024) verdict = extract_json(raw) is_relevant = verdict.get("relevant", False) reason = verdict.get("reason", "?") @@ -640,7 +640,7 @@ def ask_llm(prompt: str) -> tuple[str, float, dict]: """Send prompt to LLM. Returns (response_text, seconds_taken, metadata).""" client = get_client() t0 = time.time() - text = client.ask(prompt, max_output_tokens=4096) + text = client.chat([{"role": "user", "content": prompt}], max_output_tokens=4096) elapsed = time.time() - t0 meta = { "finish_reason": "stop", diff --git a/src/applypilot/enrichment/detail.py b/src/applypilot/enrichment/detail.py index c76081d..f415cc9 100644 --- a/src/applypilot/enrichment/detail.py +++ b/src/applypilot/enrichment/detail.py @@ -463,7 +463,7 @@ def extract_with_llm(page, url: str) -> dict: try: client = get_client() t0 = time.time() - raw = client.ask(prompt, max_output_tokens=4096) + raw = client.chat([{"role": "user", "content": prompt}], max_output_tokens=4096) elapsed = time.time() - t0 log.info("LLM: %d chars in, %.1fs", len(prompt), elapsed) diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index 80f8d21..9888c9f 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -1,17 +1,13 @@ """Unified LLM client for ApplyPilot using LiteLLM. -Auto-detects provider from environment: - GEMINI_API_KEY -> Google Gemini (default: gemini-2.0-flash) - OPENAI_API_KEY -> OpenAI (default: gpt-4o-mini) - ANTHROPIC_API_KEY -> Anthropic Claude (default: claude-3-5-haiku-latest) - LLM_URL -> Local OpenAI-compatible endpoint - -LLM_MODEL env var overrides the model name for any provider. - -Gemini provider behavior: - - Uses LiteLLM's native Gemini provider path (no OpenAI-compat base URL). - - Google v1 is considered stable while v1beta can change; endpoint version choice is delegated to LiteLLM. - - Provider is inferred from configured credentials; model prefixes are handled internally. +Runtime contract: + - If set, LLM_MODEL must be a fully-qualified LiteLLM model string + (for example: openai/gpt-4o-mini, anthropic/claude-3-5-haiku-latest, + gemini/gemini-3.0-flash). + - If LLM_MODEL is unset, provider is inferred by first configured source: + GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, then LLM_URL. + - Credentials come from provider env vars or generic LLM_API_KEY. + - LLM_URL is optional for custom OpenAI-compatible endpoints. """ from __future__ import annotations @@ -20,25 +16,32 @@ from dataclasses import dataclass import logging import os +from typing import Any, Literal, TypedDict, Unpack +import warnings import litellm log = logging.getLogger(__name__) -_DEFAULT_MODEL_BY_PROVIDER = { - "local": "local-model", - "gemini": "gemini-2.0-flash", - "openai": "gpt-4o-mini", - "anthropic": "claude-3-5-haiku-latest", -} - _MAX_RETRIES = 5 _TIMEOUT = 120 # seconds +_INFERRED_SOURCE_ORDER: tuple[tuple[str, str], ...] = ( + ("gemini", "GEMINI_API_KEY"), + ("openai", "OPENAI_API_KEY"), + ("anthropic", "ANTHROPIC_API_KEY"), + ("openai", "LLM_URL"), +) +_DEFAULT_MODEL_BY_PROVIDER = { + "gemini": "gemini/gemini-3.0-flash", + "openai": "openai/gpt-5-mini", + "anthropic": "anthropic/claude-haiku-4-5", +} +_DEFAULT_LOCAL_MODEL = "openai/local-model" @dataclass(frozen=True) class LLMConfig: - """Normalized LLM configuration consumed by LLMClient.""" + """LLM configuration consumed by LLMClient.""" provider: str api_base: str | None @@ -46,6 +49,22 @@ class LLMConfig: api_key: str +class ChatMessage(TypedDict): + role: Literal["system", "user", "assistant", "tool"] + content: str + + +class LiteLLMExtra(TypedDict, total=False): + stop: str | list[str] + top_p: float + seed: int + stream: bool + response_format: dict[str, Any] + tools: list[dict[str, Any]] + tool_choice: str | dict[str, Any] + fallbacks: list[str] + + def _env_get(env: Mapping[str, str], key: str) -> str: value = env.get(key, "") if value is None: @@ -53,126 +72,75 @@ def _env_get(env: Mapping[str, str], key: str) -> str: return str(value).strip() -def _provider_model(provider: str, model: str) -> str: - if provider == "local": - return model - if model.startswith(f"{provider}/"): - return model - return f"{provider}/{model}" - - -def _default_model(provider: str) -> str: - return _DEFAULT_MODEL_BY_PROVIDER[provider] - - -def _normalize_model_for_provider(provider: str, model: str) -> str: - normalized = model.strip() - if provider == "local": - return normalized - if normalized.startswith("models/"): - normalized = normalized.split("/", 1)[1] - - provider_prefix = f"{provider}/" - if normalized.startswith(provider_prefix): - return normalized[len(provider_prefix):] +def _provider_from_model(model: str) -> str: + provider, _, model_name = model.partition("/") + if not provider or not model_name: + raise RuntimeError( + "LLM_MODEL must include a provider prefix (for example 'openai/gpt-4o-mini')." + ) + return provider - for other in ("gemini", "openai", "anthropic", "vertex_ai"): - other_prefix = f"{other}/" - if normalized.startswith(other_prefix): - return normalized.split("/", 1)[1] - return normalized +def _infer_provider_and_source(env: Mapping[str, str]) -> tuple[str, str] | None: + for provider, env_key in _INFERRED_SOURCE_ORDER: + if _env_get(env, env_key): + return provider, env_key + return None def resolve_llm_config(env: Mapping[str, str] | None = None) -> LLMConfig: - """Resolve provider configuration from environment with deterministic precedence.""" + """Resolve LLM configuration from environment.""" env_map = env if env is not None else os.environ - model_override = _env_get(env_map, "LLM_MODEL") + model = _env_get(env_map, "LLM_MODEL") local_url = _env_get(env_map, "LLM_URL") - gemini_key = _env_get(env_map, "GEMINI_API_KEY") - openai_key = _env_get(env_map, "OPENAI_API_KEY") - anthropic_key = _env_get(env_map, "ANTHROPIC_API_KEY") - llm_provider = _env_get(env_map, "LLM_PROVIDER").lower() - - providers_present = { - "local": bool(local_url), - "gemini": bool(gemini_key), - "openai": bool(openai_key), - "anthropic": bool(anthropic_key), - } - precedence = ["local", "gemini", "openai", "anthropic"] - configured = [provider for provider in precedence if providers_present[provider]] - - if not configured: - raise RuntimeError( - "No LLM provider configured. " - "Set one of LLM_URL, GEMINI_API_KEY, OPENAI_API_KEY, or ANTHROPIC_API_KEY." - ) - - chosen = "" - override_aliases = { - "local": "local", - "gemini": "gemini", - "openai": "openai", - "anthropic": "anthropic", - } - - # Optional override only when multiple providers are configured. - if len(configured) > 1 and llm_provider: - overridden = override_aliases.get(llm_provider) - if overridden and overridden in configured: - chosen = overridden - log.warning( - "Multiple LLM providers configured (%s). Using '%s' via LLM_PROVIDER override.", - ", ".join(configured), - chosen, - ) + inferred = _infer_provider_and_source(env_map) + if model: + if "/" in model: + provider = _provider_from_model(model) + elif inferred: + provider, _ = inferred + model = f"{provider}/{model}" else: - log.warning( - "Ignoring LLM_PROVIDER='%s' because it is not configured. " - "Using precedence: LLM_URL > GEMINI_API_KEY > OPENAI_API_KEY > ANTHROPIC_API_KEY.", - llm_provider, + raise RuntimeError( + "LLM_MODEL must include a provider prefix (for example 'openai/gpt-4o-mini')." ) - - if not chosen: - chosen = configured[0] - if len(configured) > 1: - log.warning( - "Multiple LLM providers configured (%s). Using '%s' based on precedence: " - "LLM_URL > GEMINI_API_KEY > OPENAI_API_KEY > ANTHROPIC_API_KEY.", - ", ".join(configured), - chosen, + else: + if not inferred: + raise RuntimeError( + "No LLM provider configured. Set one of GEMINI_API_KEY, OPENAI_API_KEY, " + "ANTHROPIC_API_KEY, LLM_URL, or LLM_MODEL." ) - model = model_override or _default_model(chosen) - model = _normalize_model_for_provider(chosen, model) - - if chosen == "local": - return LLMConfig( - provider="local", - api_base=local_url.rstrip("/"), - model=model, - api_key=_env_get(env_map, "LLM_API_KEY"), - ) - if chosen == "gemini": - return LLMConfig( - provider="gemini", - api_base=None, - model=model, - api_key=gemini_key, + provider, source = inferred + if source == "LLM_URL": + model = _DEFAULT_LOCAL_MODEL + else: + model = _DEFAULT_MODEL_BY_PROVIDER[provider] + + provider_api_key_env = { + "gemini": "GEMINI_API_KEY", + "openai": "OPENAI_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", + } + api_key_env = provider_api_key_env.get(provider, "LLM_API_KEY") + api_key = _env_get(env_map, api_key_env) or _env_get(env_map, "LLM_API_KEY") + + if not api_key and not local_url: + key_help = ( + f"{api_key_env} or LLM_API_KEY" + if provider in provider_api_key_env + else "LLM_API_KEY" ) - if chosen == "openai": - return LLMConfig( - provider="openai", - api_base=None, - model=model, - api_key=openai_key, + raise RuntimeError( + f"Missing credentials for LLM_MODEL '{model}'. Set {key_help}, or set LLM_URL for " + "a local OpenAI-compatible endpoint." ) + return LLMConfig( - provider="anthropic", - api_base=None, + provider=provider, + api_base=local_url.rstrip("/") if local_url else None, model=model, - api_key=anthropic_key, + api_key=api_key, ) @@ -183,60 +151,52 @@ def __init__(self, config: LLMConfig) -> None: self.config = config self.provider = config.provider self.model = config.model - - def _build_completion_args( - self, - messages: list[dict], - temperature: float | None, - max_output_tokens: int, - response_kwargs: Mapping[str, object] | None, - ) -> dict: - args: dict = { - "model": _provider_model(self.provider, self.model), - "messages": messages, - "max_tokens": max_output_tokens, - "timeout": _TIMEOUT, - "num_retries": _MAX_RETRIES, # Delegate retry handling to LiteLLM. - } - if temperature is not None: - args["temperature"] = temperature - - if self.config.api_key: - args["api_key"] = self.config.api_key - - if self.provider == "local": - args["model"] = self.model - if self.config.api_base: - args["api_base"] = self.config.api_base - - if response_kwargs: - args.update(response_kwargs) - return args + litellm.suppress_debug_info = True def chat( self, - messages: list[dict], - temperature: float | None = None, + messages: list[ChatMessage], + *, max_output_tokens: int = 10000, - response_kwargs: Mapping[str, object] | None = None, + temperature: float | None = None, + timeout: int = _TIMEOUT, + num_retries: int = _MAX_RETRIES, + drop_params: bool = True, + **extra: Unpack[LiteLLMExtra], ) -> str: """Send a completion request and return plain text content.""" - litellm.suppress_debug_info = True - try: - response = litellm.completion( - **self._build_completion_args( + if temperature is None: + response = litellm.completion( + model=self.model, + messages=messages, + max_tokens=max_output_tokens, + timeout=timeout, + num_retries=num_retries, + drop_params=drop_params, + api_key=self.config.api_key or None, + api_base=self.config.api_base or None, + **extra, + ) + else: + response = litellm.completion( + model=self.model, messages=messages, + max_tokens=max_output_tokens, temperature=temperature, - max_output_tokens=max_output_tokens, - response_kwargs=response_kwargs, + timeout=timeout, + num_retries=num_retries, + drop_params=drop_params, + api_key=self.config.api_key or None, + api_base=self.config.api_base or None, + **extra, ) - ) choices = getattr(response, "choices", None) if not choices: raise RuntimeError("LLM response contained no choices.") - text = response.choices[0].message.content.strip() + content = response.choices[0].message.content + text = content.strip() if isinstance(content, str) else str(content).strip() if not text: raise RuntimeError("LLM response contained no text content.") @@ -244,10 +204,6 @@ def chat( except Exception as exc: # pragma: no cover - provider SDK exception types vary by backend/version. raise RuntimeError(f"LLM request failed ({self.provider}/{self.model}): {exc}") from exc - def ask(self, prompt: str, **kwargs) -> str: - """Convenience: single user prompt -> assistant response.""" - return self.chat([{"role": "user", "content": prompt}], **kwargs) - def close(self) -> None: """No-op. LiteLLM completion() is stateless per call.""" return None diff --git a/src/applypilot/wizard/init.py b/src/applypilot/wizard/init.py index 88cbea1..06826bd 100644 --- a/src/applypilot/wizard/init.py +++ b/src/applypilot/wizard/init.py @@ -277,12 +277,18 @@ def _setup_ai_features() -> None: console.print("[dim]No AI provider configured. You can add one later with [bold]applypilot init[/bold].[/dim]") return + default_model_by_source = { + "gemini": "gemini/gemini-3.0-flash", + "openai": "openai/gpt-4o-mini", + "anthropic": "anthropic/claude-3-5-haiku-latest", + "local": "openai/local-model", + } + default_model = default_model_by_source.get(configured_sources[0], "openai/gpt-4o-mini") model = Prompt.ask( - "LLM model override (optional, leave blank to use provider defaults)", - default="", + "LLM model (required, include provider prefix)", + default=default_model, ).strip() - if model: - env_lines.append(f"LLM_MODEL={model}") + env_lines.append(f"LLM_MODEL={model}") env_lines.append("") ENV_PATH.write_text("\n".join(env_lines), encoding="utf-8") @@ -290,7 +296,7 @@ def _setup_ai_features() -> None: configured = ", ".join(configured_sources) console.print( f"[yellow]Multiple LLM providers saved ({configured}). " - "Runtime selects one deterministically by precedence.[/yellow]" + "Runtime routing follows LLM_MODEL's provider prefix.[/yellow]" ) console.print(f"[green]AI configuration saved to {ENV_PATH}[/green]") diff --git a/tests/test_gemini_smoke.py b/tests/test_gemini_smoke.py index 8b732af..fecc332 100644 --- a/tests/test_gemini_smoke.py +++ b/tests/test_gemini_smoke.py @@ -6,7 +6,7 @@ def _gemini_smoke_model() -> str: - raw = os.getenv("GEMINI_SMOKE_MODEL", "gemini-2.0-flash").strip() + raw = os.getenv("GEMINI_SMOKE_MODEL", "gemini-3.0-flash").strip() if raw.startswith("gemini/"): return raw if raw.startswith("models/"): diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py index e0a6203..6470ea9 100644 --- a/tests/test_llm_client.py +++ b/tests/test_llm_client.py @@ -1,5 +1,7 @@ import os +from types import SimpleNamespace +import applypilot.llm as llm_module from applypilot.llm import LLMClient, LLMConfig @@ -9,82 +11,105 @@ def test_client_init_does_not_mutate_provider_env(monkeypatch) -> None: LLMConfig( provider="openai", api_base=None, - model="gpt-4o-mini", + model="openai/gpt-4o-mini", api_key="test-key", ) ) assert "OPENAI_API_KEY" not in os.environ + assert llm_module.litellm.suppress_debug_info is True -def test_build_completion_args_does_not_include_reasoning_effort_by_default() -> None: +def _mock_response(content: str = "hello") -> SimpleNamespace: + return SimpleNamespace( + choices=[ + SimpleNamespace( + message=SimpleNamespace(content=content), + ) + ] + ) + + +def test_chat_passes_defaults_without_temperature(monkeypatch) -> None: client = LLMClient( LLMConfig( provider="openai", api_base=None, - model="gpt-4o-mini", + model="openai/gpt-4o-mini", api_key="test-key", ) ) - args = client._build_completion_args( - messages=[{"role": "user", "content": "hello"}], - temperature=None, - max_output_tokens=128, - response_kwargs=None, - ) - assert "reasoning_effort" not in args - assert args["max_tokens"] == 128 + captured: dict[str, object] = {} + def _fake_completion(**kwargs: object) -> SimpleNamespace: + captured.update(kwargs) + return _mock_response() -def test_build_completion_args_uses_litellm_native_gemini_model_prefix() -> None: - client = LLMClient( - LLMConfig( - provider="gemini", - api_base=None, - model="gemini-2.0-flash", - api_key="g-key", - ) - ) - args = client._build_completion_args( - messages=[{"role": "user", "content": "hello"}], - temperature=None, - max_output_tokens=64, - response_kwargs=None, - ) - assert args["model"] == "gemini/gemini-2.0-flash" + monkeypatch.setattr(llm_module.litellm, "completion", _fake_completion) + response = client.chat([{"role": "user", "content": "hello"}], max_output_tokens=128) + + assert response == "hello" + assert captured["model"] == "openai/gpt-4o-mini" + assert captured["max_tokens"] == 128 + assert captured["timeout"] == 120 + assert captured["num_retries"] == 5 + assert captured["drop_params"] is True + assert captured["api_key"] == "test-key" + assert captured["api_base"] is None + assert "temperature" not in captured + assert "reasoning_effort" not in captured -def test_build_completion_args_includes_api_key_for_remote_provider() -> None: +def test_chat_supports_temperature_and_typed_extra(monkeypatch) -> None: client = LLMClient( LLMConfig( provider="gemini", api_base=None, - model="gemini-2.0-flash", + model="gemini/gemini-3.0-flash", api_key="g-key", ) ) - args = client._build_completion_args( - messages=[{"role": "user", "content": "hello"}], - temperature=None, + captured: dict[str, object] = {} + + def _fake_completion(**kwargs: object) -> SimpleNamespace: + captured.update(kwargs) + return _mock_response("ok") + + monkeypatch.setattr(llm_module.litellm, "completion", _fake_completion) + response = client.chat( + [{"role": "user", "content": "hello"}], max_output_tokens=64, - response_kwargs=None, + temperature=0.2, + top_p=0.9, + stop=["\n\n"], + response_format={"type": "json_object"}, ) - assert args["api_key"] == "g-key" + + assert response == "ok" + assert captured["model"] == "gemini/gemini-3.0-flash" + assert captured["api_key"] == "g-key" + assert captured["temperature"] == 0.2 + assert captured["top_p"] == 0.9 + assert captured["stop"] == ["\n\n"] + assert captured["response_format"] == {"type": "json_object"} -def test_build_completion_args_sets_local_api_base_and_api_key() -> None: +def test_chat_sets_local_api_base_and_api_key(monkeypatch) -> None: client = LLMClient( LLMConfig( - provider="local", + provider="openai", api_base="http://127.0.0.1:8080/v1", - model="local-model", + model="openai/local-model", api_key="local-key", ) ) - args = client._build_completion_args( - messages=[{"role": "user", "content": "hello"}], - temperature=None, - max_output_tokens=64, - response_kwargs=None, - ) - assert args["api_base"] == "http://127.0.0.1:8080/v1" - assert args["api_key"] == "local-key" + captured: dict[str, object] = {} + + def _fake_completion(**kwargs: object) -> SimpleNamespace: + captured.update(kwargs) + return _mock_response() + + monkeypatch.setattr(llm_module.litellm, "completion", _fake_completion) + _ = client.chat([{"role": "user", "content": "hello"}], max_output_tokens=64) + + assert captured["api_base"] == "http://127.0.0.1:8080/v1" + assert captured["api_key"] == "local-key" diff --git a/tests/test_llm_resolution.py b/tests/test_llm_resolution.py index c1c4fb5..47022d6 100644 --- a/tests/test_llm_resolution.py +++ b/tests/test_llm_resolution.py @@ -1,63 +1,57 @@ -import logging - import pytest from applypilot.llm import resolve_llm_config -def test_only_gemini_api_key_selects_gemini() -> None: - cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key"}) +def test_infers_provider_from_first_configured_source() -> None: + cfg = resolve_llm_config( + { + "GEMINI_API_KEY": "g-key", + "OPENAI_API_KEY": "o-key", + "ANTHROPIC_API_KEY": "a-key", + "LLM_URL": "http://127.0.0.1:8080/v1", + } + ) assert cfg.provider == "gemini" - assert cfg.api_base is None - assert cfg.model == "gemini-2.0-flash" + assert cfg.model == "gemini/gemini-3.0-flash" + assert cfg.api_key == "g-key" -def test_only_openai_api_key_selects_openai() -> None: - cfg = resolve_llm_config({"OPENAI_API_KEY": "o-key"}) +def test_unprefixed_model_uses_inferred_provider() -> None: + cfg = resolve_llm_config({"LLM_MODEL": "gpt-4o-mini", "OPENAI_API_KEY": "o-key"}) assert cfg.provider == "openai" + assert cfg.model == "openai/gpt-4o-mini" -def test_gemini_model_override_without_prefix_is_normalized() -> None: - cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key", "LLM_MODEL": "gemini-2.5-flash"}) - assert cfg.model == "gemini-2.5-flash" +def test_requires_model_provider_prefix_without_inferable_provider() -> None: + with pytest.raises(RuntimeError, match="must include a provider prefix"): + resolve_llm_config({"LLM_MODEL": "gpt-4o-mini", "LLM_API_KEY": "generic"}) -def test_gemini_model_override_google_models_prefix_is_normalized() -> None: - cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key", "LLM_MODEL": "models/gemini-2.5-flash"}) - assert cfg.model == "gemini-2.5-flash" +def test_provider_and_api_key_come_from_model_contract() -> None: + cfg = resolve_llm_config({"LLM_MODEL": "gemini/gemini-3.0-flash", "GEMINI_API_KEY": "g-key"}) + assert cfg.provider == "gemini" + assert cfg.api_base is None + assert cfg.model == "gemini/gemini-3.0-flash" + assert cfg.api_key == "g-key" -def test_gemini_model_override_gemini_prefix_is_stripped() -> None: - cfg = resolve_llm_config({"GEMINI_API_KEY": "g-key", "LLM_MODEL": "gemini/gemini-2.5-flash"}) - assert cfg.model == "gemini-2.5-flash" +def test_uses_generic_api_key_for_unmapped_provider() -> None: + cfg = resolve_llm_config({"LLM_MODEL": "vertex_ai/gemini-3.0-flash", "LLM_API_KEY": "v-key"}) + assert cfg.provider == "vertex_ai" + assert cfg.api_key == "v-key" -def test_llm_url_with_keys_selects_local() -> None: +def test_llm_url_infers_local_default_model_and_allows_missing_api_key() -> None: cfg = resolve_llm_config( { - "LLM_URL": "http://127.0.0.1:8080/v1", - "GEMINI_API_KEY": "g-key", - "OPENAI_API_KEY": "o-key", - "ANTHROPIC_API_KEY": "a-key", + "LLM_URL": "http://127.0.0.1:8080/v1/", } ) - assert cfg.provider == "local" - - -def test_multiple_keys_selects_deterministically_and_warns(caplog: pytest.LogCaptureFixture) -> None: - with caplog.at_level(logging.WARNING): - cfg = resolve_llm_config( - { - "GEMINI_API_KEY": "g-key", - "OPENAI_API_KEY": "o-key", - "ANTHROPIC_API_KEY": "a-key", - } - ) - assert cfg.provider == "gemini" - assert any( - "Multiple LLM providers configured" in rec.message and "Using 'gemini' based on precedence" in rec.message - for rec in caplog.records - ) + assert cfg.provider == "openai" + assert cfg.model == "openai/local-model" + assert cfg.api_base == "http://127.0.0.1:8080/v1" + assert cfg.api_key == "" def test_missing_everything_raises_clear_error() -> None: From efaee0809a76b3d1ad9c7082f53a6abb806d9d4c Mon Sep 17 00:00:00 2001 From: Trevor Mells Date: Fri, 27 Feb 2026 08:21:24 -0800 Subject: [PATCH 7/8] Cleanup gitignore and pyproject.toml --- .gitignore | 3 --- pyproject.toml | 7 ------- 2 files changed, 10 deletions(-) diff --git a/.gitignore b/.gitignore index 35c6a55..835589f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,8 +5,6 @@ resume.pdf *.env .env.* !.env.example -.venv/* - # Runtime artifacts *.db @@ -41,4 +39,3 @@ Thumbs.db # Claude Code .claude/ -tm_dev/nb.ipynb diff --git a/pyproject.toml b/pyproject.toml index 21a3afd..2b0e264 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,10 +53,3 @@ artifacts = ["src/applypilot/config/*.yaml"] [tool.ruff] target-version = "py311" line-length = 120 - -[tool.pytest.ini_options] -pythonpath = ["src"] -testpaths = ["tests"] -markers = [ - "smoke: live-provider smoke tests that require external API keys", -] From 8e29d13c3b19994fead7f2e192544128bd885cc5 Mon Sep 17 00:00:00 2001 From: Trevor Mells Date: Fri, 27 Feb 2026 15:10:25 -0800 Subject: [PATCH 8/8] increase tokens for tailor graph. Improve logging --- src/applypilot/cli.py | 13 +++++++++++++ src/applypilot/llm.py | 4 ++++ src/applypilot/scoring/tailor.py | 7 +++++-- src/applypilot/scoring/validator.py | 5 ++++- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/applypilot/cli.py b/src/applypilot/cli.py index 7c770ac..6344ce9 100644 --- a/src/applypilot/cli.py +++ b/src/applypilot/cli.py @@ -28,6 +28,19 @@ def _configure_logging() -> None: noisy.setLevel(logging.WARNING) noisy.propagate = True + # Route verbose tailor/cover loggers to a file instead of the terminal. + # Per-attempt warnings and validation details are useful for debugging + # but too noisy for normal CLI output. + from applypilot.config import LOG_DIR + LOG_DIR.mkdir(parents=True, exist_ok=True) + _file_fmt = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s", datefmt="%H:%M:%S") + for logger_name in ("applypilot.scoring.tailor", "applypilot.scoring.cover_letter"): + file_log = logging.getLogger(logger_name) + file_log.propagate = False # suppress terminal output + fh = logging.FileHandler(LOG_DIR / f"{logger_name.split('.')[-1]}.log", encoding="utf-8") + fh.setFormatter(_file_fmt) + file_log.addHandler(fh) + _configure_logging() diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index 9888c9f..030f2ce 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -21,6 +21,10 @@ import litellm +# Suppress pydantic serialization warnings from litellm internals when provider +# responses have fewer fields than the full ModelResponse schema. +warnings.filterwarnings("ignore", category=UserWarning, module="pydantic.*") + log = logging.getLogger(__name__) _MAX_RETRIES = 5 diff --git a/src/applypilot/scoring/tailor.py b/src/applypilot/scoring/tailor.py index aaf4021..0fb71d9 100644 --- a/src/applypilot/scoring/tailor.py +++ b/src/applypilot/scoring/tailor.py @@ -397,12 +397,14 @@ def tailor_resume( {"role": "user", "content": f"ORIGINAL RESUME:\n{resume_text}\n\n---\n\nTARGET JOB:\n{job_text}\n\nReturn the JSON:"}, ] - raw = client.chat(messages, max_output_tokens=2048) + raw = client.chat(messages, max_output_tokens=16000) # Parse JSON from response try: data = extract_json(raw) - except ValueError: + except ValueError as exc: + log.warning("Attempt %d JSON parse failed (%s). Raw response (first 500 chars):\n%s", + attempt + 1, exc, raw[:1000]) avoid_notes.append("Output was not valid JSON. Return ONLY a JSON object, nothing else.") continue @@ -412,6 +414,7 @@ def tailor_resume( if not validation["passed"]: # Only retry if there are hard errors (warnings never block) + log.warning("Attempt %d validation failed: %s", attempt + 1, validation["errors"]) avoid_notes.extend(validation["errors"]) if attempt < max_retries: continue diff --git a/src/applypilot/scoring/validator.py b/src/applypilot/scoring/validator.py index abb8f89..3d3ce17 100644 --- a/src/applypilot/scoring/validator.py +++ b/src/applypilot/scoring/validator.py @@ -114,9 +114,12 @@ def validate_json_fields(data: dict, profile: dict, mode: str = "normal") -> dic warnings: list[str] = [] # Required keys — always checked regardless of mode - for key in ("title", "summary", "skills", "experience", "projects", "education"): + # "projects" may be an empty list (model may drop all projects for some jobs) + for key in ("title", "summary", "skills", "experience", "education"): if key not in data or not data[key]: errors.append(f"Missing required field: {key}") + if "projects" not in data: + errors.append("Missing required field: projects") if errors: return {"passed": False, "errors": errors, "warnings": warnings}