diff --git a/.env.example b/.env.example index df7cc38..fbf9543 100644 --- a/.env.example +++ b/.env.example @@ -2,10 +2,11 @@ # Copy to ~/.applypilot/.env and fill in your values. # LLM Provider (pick one) -GEMINI_API_KEY= # Gemini 2.0 Flash (recommended, cheapest) -# OPENAI_API_KEY= # OpenAI (GPT-4o-mini) -# LLM_URL=http://127.0.0.1:8080/v1 # Local LLM (llama.cpp, Ollama) -# LLM_MODEL= # Override model name +GEMINI_API_KEY= # Gemini (recommended, cheapest) +# OPENAI_API_KEY= # OpenAI +# ANTHROPIC_API_KEY= # Anthropic Claude +# LLM_URL=http://127.0.0.1:8080/v1 # Local LLM (OpenAI-compatible: llama.cpp, Ollama, vLLM) +# LLM_MODEL= # Override model name (provider-specific) # Auto-Apply (optional) CAPSOLVER_API_KEY= # For CAPTCHA solving during auto-apply diff --git a/README.md b/README.md index e7fe08e..59df888 100644 --- a/README.md +++ b/README.md @@ -43,12 +43,12 @@ applypilot apply --dry-run # fill forms without submitting ## Two Paths ### Full Pipeline (recommended) -**Requires:** Python 3.11+, Node.js (for npx), Gemini API key (free), Claude Code CLI, Chrome +**Requires:** Python 3.11+, Node.js (for npx), an LLM key (Gemini/OpenAI/Claude) or `LLM_URL`, Claude Code CLI, Chrome Runs all 6 stages, from job discovery to autonomous application submission. This is the full power of ApplyPilot. ### Discovery + Tailoring Only -**Requires:** Python 3.11+, Gemini API key (free) +**Requires:** Python 3.11+, an LLM key (Gemini/OpenAI/Claude) or `LLM_URL` Runs stages 1-5: discovers jobs, scores them, tailors your resume, generates cover letters. You submit applications manually with the AI-prepared materials. @@ -88,11 +88,12 @@ Each stage is independent. Run them all or pick what you need. |-----------|-------------|---------| | Python 3.11+ | Everything | Core runtime | | Node.js 18+ | Auto-apply | Needed for `npx` to run Playwright MCP server | -| Gemini API key | Scoring, tailoring, cover letters | Free tier (15 RPM / 1M tokens/day) is enough | +| LLM credentials or local endpoint | Scoring, tailoring, cover letters | Set one of `GEMINI_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, or `LLM_URL`. Optional: set `LLM_MODEL` (for example `gemini/gemini-3.0-flash`) to override the default model. | | Chrome/Chromium | Auto-apply | Auto-detected on most systems | | Claude Code CLI | Auto-apply | Install from [claude.ai/code](https://claude.ai/code) | -**Gemini API key is free.** Get one at [aistudio.google.com](https://aistudio.google.com). OpenAI and local models (Ollama/llama.cpp) are also supported. +**Gemini API key is free.** Get one at [aistudio.google.com](https://aistudio.google.com). OpenAI, Claude, and local models (Ollama/llama.cpp/vLLM) are also supported. +ApplyPilot uses Gemini through LiteLLM's native Gemini provider path, and Gemini API version routing is owned by LiteLLM. ### Optional @@ -100,6 +101,12 @@ Each stage is independent. Run them all or pick what you need. |-----------|-------------| | CapSolver API key | Solves CAPTCHAs during auto-apply (hCaptcha, reCAPTCHA, Turnstile, FunCaptcha). Without it, CAPTCHA-blocked applications just fail gracefully | +### Gemini Smoke Check (optional) + +```bash +GEMINI_API_KEY=your_key_here pytest -m smoke -q tests/test_gemini_smoke.py +``` + > **Note:** python-jobspy is installed separately with `--no-deps` because it pins an exact numpy version in its metadata that conflicts with pip's resolver. It works fine with modern numpy at runtime. --- @@ -115,7 +122,7 @@ Your personal data in one structured file: contact info, work authorization, com Job search queries, target titles, locations, boards. Run multiple searches with different parameters. ### `.env` -API keys and runtime config: `GEMINI_API_KEY`, `LLM_MODEL`, `CAPSOLVER_API_KEY` (optional). +API keys and runtime config: `GEMINI_API_KEY`, `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `LLM_URL`, optional `LLM_MODEL`, optional `LLM_API_KEY`, and `CAPSOLVER_API_KEY`. ### Package configs (shipped with ApplyPilot) - `config/employers.yaml` - Workday employer registry (48 preconfigured) diff --git a/pyproject.toml b/pyproject.toml index f5116d8..2b0e264 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ classifiers = [ dependencies = [ "typer>=0.9.0", "rich>=13.0", + "litellm~=1.63.0", "httpx>=0.24", "beautifulsoup4>=4.12", "playwright>=1.40", diff --git a/src/applypilot/apply/dashboard.py b/src/applypilot/apply/dashboard.py index c286009..ea85373 100644 --- a/src/applypilot/apply/dashboard.py +++ b/src/applypilot/apply/dashboard.py @@ -7,7 +7,7 @@ import logging import threading import time -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime from pathlib import Path diff --git a/src/applypilot/apply/launcher.py b/src/applypilot/apply/launcher.py index 341a11a..e726ae1 100644 --- a/src/applypilot/apply/launcher.py +++ b/src/applypilot/apply/launcher.py @@ -25,7 +25,7 @@ from applypilot import config from applypilot.database import get_connection -from applypilot.apply import chrome, dashboard, prompt as prompt_mod +from applypilot.apply import prompt as prompt_mod from applypilot.apply.chrome import ( launch_chrome, cleanup_worker, kill_all_chrome, reset_worker_dir, cleanup_on_exit, _kill_process_tree, @@ -125,7 +125,7 @@ def acquire_job(target_url: str | None = None, min_score: int = 7, params.extend(blocked_sites) url_clauses = "" if blocked_patterns: - url_clauses = " ".join(f"AND url NOT LIKE ?" for _ in blocked_patterns) + url_clauses = " ".join("AND url NOT LIKE ?" for _ in blocked_patterns) params.extend(blocked_patterns) row = conn.execute(f""" SELECT url, title, site, application_url, tailored_resume_path, diff --git a/src/applypilot/cli.py b/src/applypilot/cli.py index 6c8be91..6344ce9 100644 --- a/src/applypilot/cli.py +++ b/src/applypilot/cli.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import os from typing import Optional import typer @@ -11,11 +12,37 @@ from applypilot import __version__ -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s - %(levelname)s - %(message)s", - datefmt="%H:%M:%S", -) + +def _configure_logging() -> None: + """Set consistent logging output for CLI runs.""" + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%H:%M:%S", + ) + + # Keep LiteLLM internals quiet by default; warnings/errors still surface. + for name in ("LiteLLM", "litellm"): + noisy = logging.getLogger(name) + noisy.handlers.clear() + noisy.setLevel(logging.WARNING) + noisy.propagate = True + + # Route verbose tailor/cover loggers to a file instead of the terminal. + # Per-attempt warnings and validation details are useful for debugging + # but too noisy for normal CLI output. + from applypilot.config import LOG_DIR + LOG_DIR.mkdir(parents=True, exist_ok=True) + _file_fmt = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s", datefmt="%H:%M:%S") + for logger_name in ("applypilot.scoring.tailor", "applypilot.scoring.cover_letter"): + file_log = logging.getLogger(logger_name) + file_log.propagate = False # suppress terminal output + fh = logging.FileHandler(LOG_DIR / f"{logger_name.split('.')[-1]}.log", encoding="utf-8") + fh.setFormatter(_file_fmt) + file_log.addHandler(fh) + + +_configure_logging() app = typer.Typer( name="applypilot", @@ -211,7 +238,7 @@ def apply( raise typer.Exit(code=1) if gen: - from applypilot.apply.launcher import gen_prompt, BASE_CDP_PORT + from applypilot.apply.launcher import gen_prompt target = url or "" if not target: console.print("[red]--gen requires --url to specify which job.[/red]") @@ -222,7 +249,7 @@ def apply( raise typer.Exit(code=1) mcp_path = _profile_path.parent / ".mcp-apply-0.json" console.print(f"[green]Wrote prompt to:[/green] {prompt_file}") - console.print(f"\n[bold]Run manually:[/bold]") + console.print("\n[bold]Run manually:[/bold]") console.print( f" claude --model {model} -p " f"--mcp-config {mcp_path} " @@ -338,7 +365,7 @@ def doctor() -> None: import shutil from applypilot.config import ( load_env, PROFILE_PATH, RESUME_PATH, RESUME_PDF_PATH, - SEARCH_CONFIG_PATH, ENV_PATH, get_chrome_path, + SEARCH_CONFIG_PATH, get_chrome_path, ) load_env() @@ -379,21 +406,25 @@ def doctor() -> None: "pip install --no-deps python-jobspy && pip install pydantic tls-client requests markdownify regex")) # --- Tier 2 checks --- - import os - has_gemini = bool(os.environ.get("GEMINI_API_KEY")) - has_openai = bool(os.environ.get("OPENAI_API_KEY")) - has_local = bool(os.environ.get("LLM_URL")) - if has_gemini: - model = os.environ.get("LLM_MODEL", "gemini-2.0-flash") - results.append(("LLM API key", ok_mark, f"Gemini ({model})")) - elif has_openai: - model = os.environ.get("LLM_MODEL", "gpt-4o-mini") - results.append(("LLM API key", ok_mark, f"OpenAI ({model})")) - elif has_local: - results.append(("LLM API key", ok_mark, f"Local: {os.environ.get('LLM_URL')}")) - else: - results.append(("LLM API key", fail_mark, - "Set GEMINI_API_KEY in ~/.applypilot/.env (run 'applypilot init')")) + from applypilot.llm import resolve_llm_config + + try: + llm_cfg = resolve_llm_config() + if llm_cfg.api_base: + results.append(("LLM API key", ok_mark, f"Custom endpoint: {llm_cfg.api_base} ({llm_cfg.model})")) + else: + label = { + "gemini": "Gemini", + "openai": "OpenAI", + "anthropic": "Anthropic", + }.get(llm_cfg.provider, llm_cfg.provider) + results.append(("LLM API key", ok_mark, f"{label} ({llm_cfg.model})")) + except RuntimeError: + results.append( + ("LLM API key", fail_mark, + "Set one of GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, LLM_URL, " + "or set LLM_MODEL with LLM_API_KEY in ~/.applypilot/.env") + ) # --- Tier 3 checks --- # Claude Code CLI diff --git a/src/applypilot/config.py b/src/applypilot/config.py index 8c39780..090dec6 100644 --- a/src/applypilot/config.py +++ b/src/applypilot/config.py @@ -206,7 +206,14 @@ def get_tier() -> int: """ load_env() - has_llm = any(os.environ.get(k) for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "LLM_URL")) + has_provider_source = any( + os.environ.get(k) + for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "LLM_URL") + ) + has_model_and_generic_key = bool((os.environ.get("LLM_MODEL") or "").strip()) and bool( + (os.environ.get("LLM_API_KEY") or "").strip() + ) + has_llm = has_provider_source or has_model_and_generic_key if not has_llm: return 1 @@ -238,8 +245,19 @@ def check_tier(required: int, feature: str) -> None: _console = Console(stderr=True) missing: list[str] = [] - if required >= 2 and not any(os.environ.get(k) for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "LLM_URL")): - missing.append("LLM API key — run [bold]applypilot init[/bold] or set GEMINI_API_KEY") + has_provider_source = any( + os.environ.get(k) + for k in ("GEMINI_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "LLM_URL") + ) + has_model_and_generic_key = bool((os.environ.get("LLM_MODEL") or "").strip()) and bool( + (os.environ.get("LLM_API_KEY") or "").strip() + ) + if required >= 2 and not (has_provider_source or has_model_and_generic_key): + missing.append( + "LLM config — run [bold]applypilot init[/bold] or set one of " + "GEMINI_API_KEY / OPENAI_API_KEY / ANTHROPIC_API_KEY / LLM_URL " + "(or set LLM_MODEL with LLM_API_KEY)" + ) if required >= 3: if not shutil.which("claude"): missing.append("Claude Code CLI — install from [bold]https://claude.ai/code[/bold]") diff --git a/src/applypilot/discovery/jobspy.py b/src/applypilot/discovery/jobspy.py index b5e54ff..ce0c4c8 100644 --- a/src/applypilot/discovery/jobspy.py +++ b/src/applypilot/discovery/jobspy.py @@ -15,7 +15,7 @@ from jobspy import scrape_jobs from applypilot import config -from applypilot.database import get_connection, init_db, store_jobs +from applypilot.database import get_connection, init_db log = logging.getLogger(__name__) diff --git a/src/applypilot/discovery/smartextract.py b/src/applypilot/discovery/smartextract.py index cf49a9a..c9bb18c 100644 --- a/src/applypilot/discovery/smartextract.py +++ b/src/applypilot/discovery/smartextract.py @@ -20,17 +20,15 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone -from pathlib import Path from urllib.parse import quote_plus -import httpx import yaml from bs4 import BeautifulSoup from playwright.sync_api import sync_playwright from applypilot import config from applypilot.config import CONFIG_DIR -from applypilot.database import get_connection, init_db, store_jobs, get_stats +from applypilot.database import init_db, get_stats from applypilot.llm import get_client log = logging.getLogger(__name__) @@ -393,7 +391,7 @@ def judge_api_responses(api_responses: list[dict]) -> list[dict]: ) try: - raw = client.ask(prompt, temperature=0.0, max_tokens=1024) + raw = client.chat([{"role": "user", "content": prompt}], max_output_tokens=1024) verdict = extract_json(raw) is_relevant = verdict.get("relevant", False) reason = verdict.get("reason", "?") @@ -424,7 +422,7 @@ def format_strategy_briefing(intel: dict) -> str: sections.append(f"\nJSON-LD: {len(job_postings)} JobPosting entries found (usable!)") sections.append(f"First JobPosting:\n{json.dumps(job_postings[0], indent=2)[:3000]}") else: - sections.append(f"\nJSON-LD: NO JobPosting entries (json_ld strategy will NOT work)") + sections.append("\nJSON-LD: NO JobPosting entries (json_ld strategy will NOT work)") if other: types = [j.get("@type", "?") if isinstance(j, dict) else "?" for j in other] sections.append(f"Other JSON-LD types (NOT job data): {types}") @@ -642,7 +640,7 @@ def ask_llm(prompt: str) -> tuple[str, float, dict]: """Send prompt to LLM. Returns (response_text, seconds_taken, metadata).""" client = get_client() t0 = time.time() - text = client.ask(prompt, temperature=0.0, max_tokens=4096) + text = client.chat([{"role": "user", "content": prompt}], max_output_tokens=4096) elapsed = time.time() - t0 meta = { "finish_reason": "stop", diff --git a/src/applypilot/enrichment/detail.py b/src/applypilot/enrichment/detail.py index 11b7926..f415cc9 100644 --- a/src/applypilot/enrichment/detail.py +++ b/src/applypilot/enrichment/detail.py @@ -22,9 +22,7 @@ from bs4 import BeautifulSoup from playwright.sync_api import sync_playwright -from applypilot import config -from applypilot.config import DB_PATH -from applypilot.database import get_connection, init_db, ensure_columns +from applypilot.database import init_db from applypilot.llm import get_client log = logging.getLogger(__name__) @@ -465,7 +463,7 @@ def extract_with_llm(page, url: str) -> dict: try: client = get_client() t0 = time.time() - raw = client.ask(prompt, temperature=0.0, max_tokens=4096) + raw = client.chat([{"role": "user", "content": prompt}], max_output_tokens=4096) elapsed = time.time() - t0 log.info("LLM: %d chars in, %.1fs", len(prompt), elapsed) diff --git a/src/applypilot/llm.py b/src/applypilot/llm.py index 1fb7be6..030f2ce 100644 --- a/src/applypilot/llm.py +++ b/src/applypilot/llm.py @@ -1,288 +1,217 @@ +"""Unified LLM client for ApplyPilot using LiteLLM. + +Runtime contract: + - If set, LLM_MODEL must be a fully-qualified LiteLLM model string + (for example: openai/gpt-4o-mini, anthropic/claude-3-5-haiku-latest, + gemini/gemini-3.0-flash). + - If LLM_MODEL is unset, provider is inferred by first configured source: + GEMINI_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, then LLM_URL. + - Credentials come from provider env vars or generic LLM_API_KEY. + - LLM_URL is optional for custom OpenAI-compatible endpoints. """ -Unified LLM client for ApplyPilot. -Auto-detects provider from environment: - GEMINI_API_KEY -> Google Gemini (default: gemini-2.0-flash) - OPENAI_API_KEY -> OpenAI (default: gpt-4o-mini) - LLM_URL -> Local llama.cpp / Ollama compatible endpoint - -LLM_MODEL env var overrides the model name for any provider. -""" +from __future__ import annotations +from collections.abc import Mapping +from dataclasses import dataclass import logging import os -import time +from typing import Any, Literal, TypedDict, Unpack +import warnings -import httpx +import litellm + +# Suppress pydantic serialization warnings from litellm internals when provider +# responses have fewer fields than the full ModelResponse schema. +warnings.filterwarnings("ignore", category=UserWarning, module="pydantic.*") log = logging.getLogger(__name__) -# --------------------------------------------------------------------------- -# Provider detection -# --------------------------------------------------------------------------- - -def _detect_provider() -> tuple[str, str, str]: - """Return (base_url, model, api_key) based on environment variables. - - Reads env at call time (not module import time) so that load_env() called - in _bootstrap() is always visible here. - """ - gemini_key = os.environ.get("GEMINI_API_KEY", "") - openai_key = os.environ.get("OPENAI_API_KEY", "") - local_url = os.environ.get("LLM_URL", "") - model_override = os.environ.get("LLM_MODEL", "") - - if gemini_key and not local_url: - return ( - "https://generativelanguage.googleapis.com/v1beta/openai", - model_override or "gemini-2.0-flash", - gemini_key, +_MAX_RETRIES = 5 +_TIMEOUT = 120 # seconds +_INFERRED_SOURCE_ORDER: tuple[tuple[str, str], ...] = ( + ("gemini", "GEMINI_API_KEY"), + ("openai", "OPENAI_API_KEY"), + ("anthropic", "ANTHROPIC_API_KEY"), + ("openai", "LLM_URL"), +) +_DEFAULT_MODEL_BY_PROVIDER = { + "gemini": "gemini/gemini-3.0-flash", + "openai": "openai/gpt-5-mini", + "anthropic": "anthropic/claude-haiku-4-5", +} +_DEFAULT_LOCAL_MODEL = "openai/local-model" + + +@dataclass(frozen=True) +class LLMConfig: + """LLM configuration consumed by LLMClient.""" + + provider: str + api_base: str | None + model: str + api_key: str + + +class ChatMessage(TypedDict): + role: Literal["system", "user", "assistant", "tool"] + content: str + + +class LiteLLMExtra(TypedDict, total=False): + stop: str | list[str] + top_p: float + seed: int + stream: bool + response_format: dict[str, Any] + tools: list[dict[str, Any]] + tool_choice: str | dict[str, Any] + fallbacks: list[str] + + +def _env_get(env: Mapping[str, str], key: str) -> str: + value = env.get(key, "") + if value is None: + return "" + return str(value).strip() + + +def _provider_from_model(model: str) -> str: + provider, _, model_name = model.partition("/") + if not provider or not model_name: + raise RuntimeError( + "LLM_MODEL must include a provider prefix (for example 'openai/gpt-4o-mini')." ) - - if openai_key and not local_url: - return ( - "https://api.openai.com/v1", - model_override or "gpt-4o-mini", - openai_key, + return provider + + +def _infer_provider_and_source(env: Mapping[str, str]) -> tuple[str, str] | None: + for provider, env_key in _INFERRED_SOURCE_ORDER: + if _env_get(env, env_key): + return provider, env_key + return None + + +def resolve_llm_config(env: Mapping[str, str] | None = None) -> LLMConfig: + """Resolve LLM configuration from environment.""" + env_map = env if env is not None else os.environ + + model = _env_get(env_map, "LLM_MODEL") + local_url = _env_get(env_map, "LLM_URL") + inferred = _infer_provider_and_source(env_map) + if model: + if "/" in model: + provider = _provider_from_model(model) + elif inferred: + provider, _ = inferred + model = f"{provider}/{model}" + else: + raise RuntimeError( + "LLM_MODEL must include a provider prefix (for example 'openai/gpt-4o-mini')." + ) + else: + if not inferred: + raise RuntimeError( + "No LLM provider configured. Set one of GEMINI_API_KEY, OPENAI_API_KEY, " + "ANTHROPIC_API_KEY, LLM_URL, or LLM_MODEL." + ) + provider, source = inferred + if source == "LLM_URL": + model = _DEFAULT_LOCAL_MODEL + else: + model = _DEFAULT_MODEL_BY_PROVIDER[provider] + + provider_api_key_env = { + "gemini": "GEMINI_API_KEY", + "openai": "OPENAI_API_KEY", + "anthropic": "ANTHROPIC_API_KEY", + } + api_key_env = provider_api_key_env.get(provider, "LLM_API_KEY") + api_key = _env_get(env_map, api_key_env) or _env_get(env_map, "LLM_API_KEY") + + if not api_key and not local_url: + key_help = ( + f"{api_key_env} or LLM_API_KEY" + if provider in provider_api_key_env + else "LLM_API_KEY" ) - - if local_url: - return ( - local_url.rstrip("/"), - model_override or "local-model", - os.environ.get("LLM_API_KEY", ""), + raise RuntimeError( + f"Missing credentials for LLM_MODEL '{model}'. Set {key_help}, or set LLM_URL for " + "a local OpenAI-compatible endpoint." ) - raise RuntimeError( - "No LLM provider configured. " - "Set GEMINI_API_KEY, OPENAI_API_KEY, or LLM_URL in your environment." + return LLMConfig( + provider=provider, + api_base=local_url.rstrip("/") if local_url else None, + model=model, + api_key=api_key, ) -# --------------------------------------------------------------------------- -# Client -# --------------------------------------------------------------------------- - -_MAX_RETRIES = 5 -_TIMEOUT = 120 # seconds - -# Base wait on first 429/503 (doubles each retry, caps at 60s). -# Gemini free tier is 15 RPM = 4s minimum between requests; 10s gives headroom. -_RATE_LIMIT_BASE_WAIT = 10 - - -_GEMINI_COMPAT_BASE = "https://generativelanguage.googleapis.com/v1beta/openai" -_GEMINI_NATIVE_BASE = "https://generativelanguage.googleapis.com/v1beta" - - class LLMClient: - """Thin LLM client supporting OpenAI-compatible and native Gemini endpoints. - - For Gemini keys, starts on the OpenAI-compat layer. On a 403 (which - happens with preview/experimental models not exposed via compat), it - automatically switches to the native generateContent API and stays there - for the lifetime of the process. - """ - - def __init__(self, base_url: str, model: str, api_key: str) -> None: - self.base_url = base_url - self.model = model - self.api_key = api_key - self._client = httpx.Client(timeout=_TIMEOUT) - # True once we've confirmed the native Gemini API works for this model - self._use_native_gemini: bool = False - self._is_gemini: bool = base_url.startswith(_GEMINI_COMPAT_BASE) - - # -- Native Gemini API -------------------------------------------------- - - def _chat_native_gemini( - self, - messages: list[dict], - temperature: float, - max_tokens: int, - ) -> str: - """Call the native Gemini generateContent API. - - Used automatically when the OpenAI-compat endpoint returns 403, - which happens for preview/experimental models not exposed via compat. - - Converts OpenAI-style messages to Gemini's contents/systemInstruction - format transparently. - """ - contents: list[dict] = [] - system_parts: list[dict] = [] - - for msg in messages: - role = msg["role"] - text = msg.get("content", "") - if role == "system": - system_parts.append({"text": text}) - elif role == "user": - contents.append({"role": "user", "parts": [{"text": text}]}) - elif role == "assistant": - # Gemini uses "model" instead of "assistant" - contents.append({"role": "model", "parts": [{"text": text}]}) - - payload: dict = { - "contents": contents, - "generationConfig": { - "temperature": temperature, - "maxOutputTokens": max_tokens, - }, - } - if system_parts: - payload["systemInstruction"] = {"parts": system_parts} - - url = f"{_GEMINI_NATIVE_BASE}/models/{self.model}:generateContent" - resp = self._client.post( - url, - json=payload, - headers={"Content-Type": "application/json"}, - params={"key": self.api_key}, - ) - resp.raise_for_status() - data = resp.json() - return data["candidates"][0]["content"]["parts"][0]["text"] - - # -- OpenAI-compat API -------------------------------------------------- - - def _chat_compat( - self, - messages: list[dict], - temperature: float, - max_tokens: int, - ) -> str: - """Call the OpenAI-compatible endpoint.""" - headers: dict[str, str] = {"Content-Type": "application/json"} - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - - payload = { - "model": self.model, - "messages": messages, - "temperature": temperature, - "max_tokens": max_tokens, - } - - resp = self._client.post( - f"{self.base_url}/chat/completions", - json=payload, - headers=headers, - ) + """Thin wrapper around LiteLLM completion().""" - # 403 on Gemini compat = model not available on compat layer. - # Raise a specific sentinel so chat() can switch to native API. - if resp.status_code == 403 and self._is_gemini: - raise _GeminiCompatForbidden(resp) - - return self._handle_compat_response(resp) - - @staticmethod - def _handle_compat_response(resp: httpx.Response) -> str: - resp.raise_for_status() - data = resp.json() - return data["choices"][0]["message"]["content"] - - # -- public API --------------------------------------------------------- + def __init__(self, config: LLMConfig) -> None: + self.config = config + self.provider = config.provider + self.model = config.model + litellm.suppress_debug_info = True def chat( self, - messages: list[dict], - temperature: float = 0.0, - max_tokens: int = 4096, + messages: list[ChatMessage], + *, + max_output_tokens: int = 10000, + temperature: float | None = None, + timeout: int = _TIMEOUT, + num_retries: int = _MAX_RETRIES, + drop_params: bool = True, + **extra: Unpack[LiteLLMExtra], ) -> str: - """Send a chat completion request and return the assistant message text.""" - # Qwen3 optimization: prepend /no_think to skip chain-of-thought - # reasoning, saving tokens on structured extraction tasks. - if "qwen" in self.model.lower() and messages: - first = messages[0] - if first.get("role") == "user" and not first["content"].startswith("/no_think"): - messages = [{"role": first["role"], "content": f"/no_think\n{first['content']}"}] + messages[1:] - - for attempt in range(_MAX_RETRIES): - try: - # Route to native Gemini if we've already confirmed it's needed - if self._use_native_gemini: - return self._chat_native_gemini(messages, temperature, max_tokens) - - return self._chat_compat(messages, temperature, max_tokens) - - except _GeminiCompatForbidden as exc: - # Model not available on OpenAI-compat layer — switch to native. - log.warning( - "Gemini compat endpoint returned 403 for model '%s'. " - "Switching to native generateContent API. " - "(Preview/experimental models are often compat-only on native.)", - self.model, + """Send a completion request and return plain text content.""" + try: + if temperature is None: + response = litellm.completion( + model=self.model, + messages=messages, + max_tokens=max_output_tokens, + timeout=timeout, + num_retries=num_retries, + drop_params=drop_params, + api_key=self.config.api_key or None, + api_base=self.config.api_base or None, + **extra, + ) + else: + response = litellm.completion( + model=self.model, + messages=messages, + max_tokens=max_output_tokens, + temperature=temperature, + timeout=timeout, + num_retries=num_retries, + drop_params=drop_params, + api_key=self.config.api_key or None, + api_base=self.config.api_base or None, + **extra, ) - self._use_native_gemini = True - # Retry immediately with native — don't count as a rate-limit wait - try: - return self._chat_native_gemini(messages, temperature, max_tokens) - except httpx.HTTPStatusError as native_exc: - raise RuntimeError( - f"Both Gemini endpoints failed. Compat: 403 Forbidden. " - f"Native: {native_exc.response.status_code} — " - f"{native_exc.response.text[:200]}" - ) from native_exc - - except httpx.HTTPStatusError as exc: - resp = exc.response - if resp.status_code in (429, 503) and attempt < _MAX_RETRIES - 1: - # Respect Retry-After header if provided (Gemini sends this). - retry_after = ( - resp.headers.get("Retry-After") - or resp.headers.get("X-RateLimit-Reset-Requests") - ) - if retry_after: - try: - wait = float(retry_after) - except (ValueError, TypeError): - wait = _RATE_LIMIT_BASE_WAIT * (2 ** attempt) - else: - wait = min(_RATE_LIMIT_BASE_WAIT * (2 ** attempt), 60) - - log.warning( - "LLM rate limited (HTTP %s). Waiting %ds before retry %d/%d. " - "Tip: Gemini free tier = 15 RPM. Consider a paid account " - "or switching to a local model.", - resp.status_code, wait, attempt + 1, _MAX_RETRIES, - ) - time.sleep(wait) - continue - raise - - except httpx.TimeoutException: - if attempt < _MAX_RETRIES - 1: - wait = min(_RATE_LIMIT_BASE_WAIT * (2 ** attempt), 60) - log.warning( - "LLM request timed out, retrying in %ds (attempt %d/%d)", - wait, attempt + 1, _MAX_RETRIES, - ) - time.sleep(wait) - continue - raise - - raise RuntimeError("LLM request failed after all retries") - - def ask(self, prompt: str, **kwargs) -> str: - """Convenience: single user prompt -> assistant response.""" - return self.chat([{"role": "user", "content": prompt}], **kwargs) - - def close(self) -> None: - self._client.close() + choices = getattr(response, "choices", None) + if not choices: + raise RuntimeError("LLM response contained no choices.") + content = response.choices[0].message.content + text = content.strip() if isinstance(content, str) else str(content).strip() -class _GeminiCompatForbidden(Exception): - """Sentinel: Gemini OpenAI-compat returned 403. Switch to native API.""" - def __init__(self, response: httpx.Response) -> None: - self.response = response - super().__init__(f"Gemini compat 403: {response.text[:200]}") + if not text: + raise RuntimeError("LLM response contained no text content.") + return text + except Exception as exc: # pragma: no cover - provider SDK exception types vary by backend/version. + raise RuntimeError(f"LLM request failed ({self.provider}/{self.model}): {exc}") from exc + def close(self) -> None: + """No-op. LiteLLM completion() is stateless per call.""" + return None -# --------------------------------------------------------------------------- -# Singleton -# --------------------------------------------------------------------------- _instance: LLMClient | None = None @@ -291,7 +220,13 @@ def get_client() -> LLMClient: """Return (or create) the module-level LLMClient singleton.""" global _instance if _instance is None: - base_url, model, api_key = _detect_provider() - log.info("LLM provider: %s model: %s", base_url, model) - _instance = LLMClient(base_url, model, api_key) + try: + from applypilot.config import load_env + + load_env() + except ModuleNotFoundError: + log.debug("python-dotenv not installed; skipping .env auto-load in llm.get_client().") + config = resolve_llm_config() + log.info("LLM provider: %s model: %s", config.provider, config.model) + _instance = LLMClient(config) return _instance diff --git a/src/applypilot/pipeline.py b/src/applypilot/pipeline.py index 29881c5..8ae30ab 100644 --- a/src/applypilot/pipeline.py +++ b/src/applypilot/pipeline.py @@ -384,7 +384,7 @@ def _run_streaming(ordered: list[str], min_score: int, workers: int = 1, stop_event = threading.Event() pipeline_start = time.time() - console.print(f"\n [bold cyan]STREAMING MODE[/bold cyan] — stages run concurrently") + console.print("\n [bold cyan]STREAMING MODE[/bold cyan] — stages run concurrently") console.print(f" Poll interval: {_STREAM_POLL_INTERVAL}s\n") # Mark stages NOT in `ordered` as done so downstream doesn't wait for them @@ -492,7 +492,7 @@ def run_pipeline( for name in ordered: meta = STAGE_META[name] console.print(f" {name:<12s} {meta['desc']}") - console.print(f"\n No changes made.") + console.print("\n No changes made.") return {"stages": [], "errors": {}, "elapsed": 0.0} # Execute @@ -527,7 +527,7 @@ def run_pipeline( # Final DB stats final = get_stats() - console.print(f"\n [bold]DB Final State:[/bold]") + console.print("\n [bold]DB Final State:[/bold]") console.print(f" Total jobs: {final['total']}") console.print(f" With desc: {final['with_description']}") console.print(f" Scored: {final['scored']}") diff --git a/src/applypilot/scoring/cover_letter.py b/src/applypilot/scoring/cover_letter.py index c16cdd5..06e9333 100644 --- a/src/applypilot/scoring/cover_letter.py +++ b/src/applypilot/scoring/cover_letter.py @@ -5,14 +5,13 @@ profile at runtime. No hardcoded personal information. """ -import json import logging import re import time from datetime import datetime, timezone from applypilot.config import COVER_LETTER_DIR, RESUME_PATH, load_profile -from applypilot.database import get_connection, get_jobs_by_stage +from applypilot.database import get_connection from applypilot.llm import get_client from applypilot.scoring.validator import ( BANNED_WORDS, @@ -165,7 +164,7 @@ def generate_cover_letter( )}, ] - letter = client.chat(messages, max_tokens=1024, temperature=0.7) + letter = client.chat(messages, max_output_tokens=10000) letter = sanitize_text(letter) # auto-fix em dashes, smart quotes letter = _strip_preamble(letter) # remove any "Here is the letter:" prefix diff --git a/src/applypilot/scoring/scorer.py b/src/applypilot/scoring/scorer.py index 97692d5..61d6e5e 100644 --- a/src/applypilot/scoring/scorer.py +++ b/src/applypilot/scoring/scorer.py @@ -5,13 +5,12 @@ profile and resume file. """ -import json import logging import re import time from datetime import datetime, timezone -from applypilot.config import RESUME_PATH, load_profile +from applypilot.config import RESUME_PATH from applypilot.database import get_connection, get_jobs_by_stage from applypilot.llm import get_client @@ -94,7 +93,7 @@ def score_job(resume_text: str, job: dict) -> dict: try: client = get_client() - response = client.chat(messages, max_tokens=512, temperature=0.2) + response = client.chat(messages, max_output_tokens=512) return _parse_score_response(response) except Exception as e: log.error("LLM error scoring job '%s': %s", job.get("title", "?"), e) diff --git a/src/applypilot/scoring/tailor.py b/src/applypilot/scoring/tailor.py index 352fb5f..0fb71d9 100644 --- a/src/applypilot/scoring/tailor.py +++ b/src/applypilot/scoring/tailor.py @@ -14,17 +14,14 @@ import re import time from datetime import datetime, timezone -from pathlib import Path from applypilot.config import RESUME_PATH, TAILORED_DIR, load_profile from applypilot.database import get_connection, get_jobs_by_stage from applypilot.llm import get_client from applypilot.scoring.validator import ( BANNED_WORDS, - FABRICATION_WATCHLIST, sanitize_text, validate_json_fields, - validate_tailored_resume, ) log = logging.getLogger(__name__) @@ -326,7 +323,7 @@ def judge_tailored_resume( ] client = get_client() - response = client.chat(messages, max_tokens=512, temperature=0.1) + response = client.chat(messages, max_output_tokens=512) passed = "VERDICT: PASS" in response.upper() issues = "none" @@ -400,12 +397,14 @@ def tailor_resume( {"role": "user", "content": f"ORIGINAL RESUME:\n{resume_text}\n\n---\n\nTARGET JOB:\n{job_text}\n\nReturn the JSON:"}, ] - raw = client.chat(messages, max_tokens=2048, temperature=0.4) + raw = client.chat(messages, max_output_tokens=16000) # Parse JSON from response try: data = extract_json(raw) - except ValueError: + except ValueError as exc: + log.warning("Attempt %d JSON parse failed (%s). Raw response (first 500 chars):\n%s", + attempt + 1, exc, raw[:1000]) avoid_notes.append("Output was not valid JSON. Return ONLY a JSON object, nothing else.") continue @@ -415,6 +414,7 @@ def tailor_resume( if not validation["passed"]: # Only retry if there are hard errors (warnings never block) + log.warning("Attempt %d validation failed: %s", attempt + 1, validation["errors"]) avoid_notes.extend(validation["errors"]) if attempt < max_retries: continue diff --git a/src/applypilot/scoring/validator.py b/src/applypilot/scoring/validator.py index abb8f89..3d3ce17 100644 --- a/src/applypilot/scoring/validator.py +++ b/src/applypilot/scoring/validator.py @@ -114,9 +114,12 @@ def validate_json_fields(data: dict, profile: dict, mode: str = "normal") -> dic warnings: list[str] = [] # Required keys — always checked regardless of mode - for key in ("title", "summary", "skills", "experience", "projects", "education"): + # "projects" may be an empty list (model may drop all projects for some jobs) + for key in ("title", "summary", "skills", "experience", "education"): if key not in data or not data[key]: errors.append(f"Missing required field: {key}") + if "projects" not in data: + errors.append("Missing required field: projects") if errors: return {"passed": False, "errors": errors, "warnings": warnings} diff --git a/src/applypilot/view.py b/src/applypilot/view.py index ff42fec..82be192 100644 --- a/src/applypilot/view.py +++ b/src/applypilot/view.py @@ -10,14 +10,13 @@ from __future__ import annotations -import os import webbrowser from html import escape from pathlib import Path from rich.console import Console -from applypilot.config import APP_DIR, DB_PATH +from applypilot.config import APP_DIR from applypilot.database import get_connection console = Console() diff --git a/src/applypilot/wizard/init.py b/src/applypilot/wizard/init.py index 0f893c3..06826bd 100644 --- a/src/applypilot/wizard/init.py +++ b/src/applypilot/wizard/init.py @@ -4,7 +4,7 @@ - resume.txt (and optionally resume.pdf) - profile.json - searches.yaml - - .env (LLM API key) + - .env (LLM API keys and runtime settings) """ from __future__ import annotations @@ -13,7 +13,6 @@ import shutil from pathlib import Path -import typer from rich.console import Console from rich.panel import Panel from rich.prompt import Confirm, Prompt @@ -245,33 +244,60 @@ def _setup_ai_features() -> None: console.print("[dim]Discovery-only mode. You can configure AI later with [bold]applypilot init[/bold].[/dim]") return - console.print("Supported providers: [bold]Gemini[/bold] (recommended, free tier), OpenAI, local (Ollama/llama.cpp)") - provider = Prompt.ask( - "Provider", - choices=["gemini", "openai", "local"], - default="gemini", + console.print( + "Supported providers: [bold]Gemini[/bold] (recommended, free tier), " + "OpenAI, Claude, local (Ollama/llama.cpp)." ) + console.print("[dim]Enter any credentials you want to save now. Leave blank to skip each field.[/dim]") env_lines = ["# ApplyPilot configuration", ""] + configured_sources: list[str] = [] + + gemini_key = Prompt.ask("Gemini API key (optional, from aistudio.google.com)", default="").strip() + if gemini_key: + env_lines.append(f"GEMINI_API_KEY={gemini_key}") + configured_sources.append("gemini") + + openai_key = Prompt.ask("OpenAI API key (optional)", default="").strip() + if openai_key: + env_lines.append(f"OPENAI_API_KEY={openai_key}") + configured_sources.append("openai") + + anthropic_key = Prompt.ask("Anthropic API key (optional)", default="").strip() + if anthropic_key: + env_lines.append(f"ANTHROPIC_API_KEY={anthropic_key}") + configured_sources.append("anthropic") + + local_url = Prompt.ask("Local LLM endpoint URL (optional)", default="").strip() + if local_url: + env_lines.append(f"LLM_URL={local_url}") + configured_sources.append("local") + + if not configured_sources: + console.print("[dim]No AI provider configured. You can add one later with [bold]applypilot init[/bold].[/dim]") + return - if provider == "gemini": - api_key = Prompt.ask("Gemini API key (from aistudio.google.com)") - model = Prompt.ask("Model", default="gemini-2.0-flash") - env_lines.append(f"GEMINI_API_KEY={api_key}") - env_lines.append(f"LLM_MODEL={model}") - elif provider == "openai": - api_key = Prompt.ask("OpenAI API key") - model = Prompt.ask("Model", default="gpt-4o-mini") - env_lines.append(f"OPENAI_API_KEY={api_key}") - env_lines.append(f"LLM_MODEL={model}") - elif provider == "local": - url = Prompt.ask("Local LLM endpoint URL", default="http://localhost:8080/v1") - model = Prompt.ask("Model name", default="local-model") - env_lines.append(f"LLM_URL={url}") - env_lines.append(f"LLM_MODEL={model}") + default_model_by_source = { + "gemini": "gemini/gemini-3.0-flash", + "openai": "openai/gpt-4o-mini", + "anthropic": "anthropic/claude-3-5-haiku-latest", + "local": "openai/local-model", + } + default_model = default_model_by_source.get(configured_sources[0], "openai/gpt-4o-mini") + model = Prompt.ask( + "LLM model (required, include provider prefix)", + default=default_model, + ).strip() + env_lines.append(f"LLM_MODEL={model}") env_lines.append("") ENV_PATH.write_text("\n".join(env_lines), encoding="utf-8") + if len(configured_sources) > 1: + configured = ", ".join(configured_sources) + console.print( + f"[yellow]Multiple LLM providers saved ({configured}). " + "Runtime routing follows LLM_MODEL's provider prefix.[/yellow]" + ) console.print(f"[green]AI configuration saved to {ENV_PATH}[/green]") diff --git a/tests/test_gemini_smoke.py b/tests/test_gemini_smoke.py new file mode 100644 index 0000000..fecc332 --- /dev/null +++ b/tests/test_gemini_smoke.py @@ -0,0 +1,49 @@ +import os + +import pytest + +litellm = pytest.importorskip("litellm") + + +def _gemini_smoke_model() -> str: + raw = os.getenv("GEMINI_SMOKE_MODEL", "gemini-3.0-flash").strip() + if raw.startswith("gemini/"): + return raw + if raw.startswith("models/"): + raw = raw.split("/", 1)[1] + return f"gemini/{raw}" + + +def _content_text(content: object) -> str: + if isinstance(content, str): + return content.strip() + if isinstance(content, list): + return "".join( + part if isinstance(part, str) else str(part.get("text", "")) + for part in content + if isinstance(part, (str, dict)) + ).strip() + return "" + + +@pytest.mark.smoke +def test_gemini_smoke_completion_returns_non_empty_content() -> None: + api_key = os.getenv("GEMINI_API_KEY", "").strip() + if not api_key: + pytest.skip("Set GEMINI_API_KEY to run Gemini smoke tests.") + + prompt = os.getenv("GEMINI_SMOKE_PROMPT", "Reply with a single word: ready.") + response = litellm.completion( + model=_gemini_smoke_model(), + api_key=api_key, + messages=[{"role": "user", "content": prompt}], + max_tokens=32, + timeout=60, + num_retries=1, + ) + + choices = getattr(response, "choices", None) + assert choices, "Gemini smoke call returned no choices." + + content = choices[0].message.content + assert _content_text(content), "Gemini smoke call returned empty choices[0].message.content." diff --git a/tests/test_llm_client.py b/tests/test_llm_client.py new file mode 100644 index 0000000..6470ea9 --- /dev/null +++ b/tests/test_llm_client.py @@ -0,0 +1,115 @@ +import os +from types import SimpleNamespace + +import applypilot.llm as llm_module +from applypilot.llm import LLMClient, LLMConfig + + +def test_client_init_does_not_mutate_provider_env(monkeypatch) -> None: + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + LLMClient( + LLMConfig( + provider="openai", + api_base=None, + model="openai/gpt-4o-mini", + api_key="test-key", + ) + ) + assert "OPENAI_API_KEY" not in os.environ + assert llm_module.litellm.suppress_debug_info is True + + +def _mock_response(content: str = "hello") -> SimpleNamespace: + return SimpleNamespace( + choices=[ + SimpleNamespace( + message=SimpleNamespace(content=content), + ) + ] + ) + + +def test_chat_passes_defaults_without_temperature(monkeypatch) -> None: + client = LLMClient( + LLMConfig( + provider="openai", + api_base=None, + model="openai/gpt-4o-mini", + api_key="test-key", + ) + ) + captured: dict[str, object] = {} + + def _fake_completion(**kwargs: object) -> SimpleNamespace: + captured.update(kwargs) + return _mock_response() + + monkeypatch.setattr(llm_module.litellm, "completion", _fake_completion) + response = client.chat([{"role": "user", "content": "hello"}], max_output_tokens=128) + + assert response == "hello" + assert captured["model"] == "openai/gpt-4o-mini" + assert captured["max_tokens"] == 128 + assert captured["timeout"] == 120 + assert captured["num_retries"] == 5 + assert captured["drop_params"] is True + assert captured["api_key"] == "test-key" + assert captured["api_base"] is None + assert "temperature" not in captured + assert "reasoning_effort" not in captured + + +def test_chat_supports_temperature_and_typed_extra(monkeypatch) -> None: + client = LLMClient( + LLMConfig( + provider="gemini", + api_base=None, + model="gemini/gemini-3.0-flash", + api_key="g-key", + ) + ) + captured: dict[str, object] = {} + + def _fake_completion(**kwargs: object) -> SimpleNamespace: + captured.update(kwargs) + return _mock_response("ok") + + monkeypatch.setattr(llm_module.litellm, "completion", _fake_completion) + response = client.chat( + [{"role": "user", "content": "hello"}], + max_output_tokens=64, + temperature=0.2, + top_p=0.9, + stop=["\n\n"], + response_format={"type": "json_object"}, + ) + + assert response == "ok" + assert captured["model"] == "gemini/gemini-3.0-flash" + assert captured["api_key"] == "g-key" + assert captured["temperature"] == 0.2 + assert captured["top_p"] == 0.9 + assert captured["stop"] == ["\n\n"] + assert captured["response_format"] == {"type": "json_object"} + + +def test_chat_sets_local_api_base_and_api_key(monkeypatch) -> None: + client = LLMClient( + LLMConfig( + provider="openai", + api_base="http://127.0.0.1:8080/v1", + model="openai/local-model", + api_key="local-key", + ) + ) + captured: dict[str, object] = {} + + def _fake_completion(**kwargs: object) -> SimpleNamespace: + captured.update(kwargs) + return _mock_response() + + monkeypatch.setattr(llm_module.litellm, "completion", _fake_completion) + _ = client.chat([{"role": "user", "content": "hello"}], max_output_tokens=64) + + assert captured["api_base"] == "http://127.0.0.1:8080/v1" + assert captured["api_key"] == "local-key" diff --git a/tests/test_llm_resolution.py b/tests/test_llm_resolution.py new file mode 100644 index 0000000..47022d6 --- /dev/null +++ b/tests/test_llm_resolution.py @@ -0,0 +1,59 @@ +import pytest + +from applypilot.llm import resolve_llm_config + + +def test_infers_provider_from_first_configured_source() -> None: + cfg = resolve_llm_config( + { + "GEMINI_API_KEY": "g-key", + "OPENAI_API_KEY": "o-key", + "ANTHROPIC_API_KEY": "a-key", + "LLM_URL": "http://127.0.0.1:8080/v1", + } + ) + assert cfg.provider == "gemini" + assert cfg.model == "gemini/gemini-3.0-flash" + assert cfg.api_key == "g-key" + + +def test_unprefixed_model_uses_inferred_provider() -> None: + cfg = resolve_llm_config({"LLM_MODEL": "gpt-4o-mini", "OPENAI_API_KEY": "o-key"}) + assert cfg.provider == "openai" + assert cfg.model == "openai/gpt-4o-mini" + + +def test_requires_model_provider_prefix_without_inferable_provider() -> None: + with pytest.raises(RuntimeError, match="must include a provider prefix"): + resolve_llm_config({"LLM_MODEL": "gpt-4o-mini", "LLM_API_KEY": "generic"}) + + +def test_provider_and_api_key_come_from_model_contract() -> None: + cfg = resolve_llm_config({"LLM_MODEL": "gemini/gemini-3.0-flash", "GEMINI_API_KEY": "g-key"}) + assert cfg.provider == "gemini" + assert cfg.api_base is None + assert cfg.model == "gemini/gemini-3.0-flash" + assert cfg.api_key == "g-key" + + +def test_uses_generic_api_key_for_unmapped_provider() -> None: + cfg = resolve_llm_config({"LLM_MODEL": "vertex_ai/gemini-3.0-flash", "LLM_API_KEY": "v-key"}) + assert cfg.provider == "vertex_ai" + assert cfg.api_key == "v-key" + + +def test_llm_url_infers_local_default_model_and_allows_missing_api_key() -> None: + cfg = resolve_llm_config( + { + "LLM_URL": "http://127.0.0.1:8080/v1/", + } + ) + assert cfg.provider == "openai" + assert cfg.model == "openai/local-model" + assert cfg.api_base == "http://127.0.0.1:8080/v1" + assert cfg.api_key == "" + + +def test_missing_everything_raises_clear_error() -> None: + with pytest.raises(RuntimeError, match="No LLM provider configured"): + resolve_llm_config({})