VectorInstitute · jacobthebanana · Sep 8, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "chatgpt.commentCodeLensEnabled": false
+}
diff --git a/AGENTS.md b/AGENTS.md
@@ -0,0 +1,19 @@
+# AGENTS.md
+
+## Testing
+
+- Ruff Lint: `uv run ruff check [files]`
+- Lint: `uv run basedpyright [files]`
+  - Pyright Warnings are okay but not nice- you don't have to fix existing ones, but try not to introduce additional ones.
+
+## Code style
+
+- Use type hints where ever appropriate.
+- The codebase uses a modern version of Python. Use `list[...]` instead of `typing.List[...]`. No need to import `annotations`.
+- Try to minimize indentation.
+
+## Notes for agents
+
+- Always load environment variables from `.envrc` before executing commands.
+- All python commands should be invoked through `uv run`.
+- If some code looks out of place or could be simplified, go ahead and fix the code, even if that is not directly part of the request.
diff --git a/download_model.py b/download_model.py
@@ -0,0 +1,32 @@
+"""
+Download model from Hub to a local path.
+
+Usage:
+uv run download_model.py \
+--repo_name Qwen/Qwen3-0.6B \
+--local_path /model-weights/Qwen3-0.6B
+
+uv run download_model.py \
+--repo_name Qwen/Qwen3-4B-Thinking-2507 \
+--local_path /model-weights/Qwen3-4B-Thinking-2507
+
+uv run download_model.py \
+--repo_name Qwen/Qwen3-1.7B \
+--local_path /model-weights/Qwen3-1.7B
+"""
+
+import argparse
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--repo_name", required=True)
+parser.add_argument("--local_path", required=True)
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    AutoTokenizer.from_pretrained(args.repo_name).save_pretrained(args.local_path)
+    AutoModelForCausalLM.from_pretrained(args.repo_name).save_pretrained(
+        args.local_path
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,6 +24,19 @@ dependencies = [
     "pillow>=10.0.0,<11.0.0",
     "ruff>=0.1.0,<1.0.0",
     "pre-commit>=3.0.0,<4.0.0",
+    "pydantic>=2.11.7",
+    "rich>=14.1.0",
+    "openai-agents>=0.2.11",
+    "vec-inf==0.6.1",
+    "basedpyright>=1.31.4",
+    "langfuse>=3.3.4",
+    "nest-asyncio>=1.6.0",
+    "pydantic-ai[logfire]>=1.0.6",
+]
+
+[dependency-groups]
+dev = [
+    "basedpyright>=1.31.4",
 ]
 
 [tool.ruff]
@@ -38,7 +51,7 @@ docstring-code-format = true
 [tool.ruff.lint]
 select = ["A","B","COM","C4","RET","SIM","ICN","Q","RSE","D","E","F","I","W","N","ERA","PL"]
 fixable = ["A","B","COM","C4","RET","SIM","ICN","Q","RSE","D","E","F","I","W","N","ERA","PL"]
-ignore = ["B905","E501","D203","D213","PLR2004","PLR0913","COM812"]
+ignore = ["B905","E501","D203","D213","PLC0415","PLR2004","PLR0913","COM812", "ERA001"]
 
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["E402","F401","F403","F811"]

diff --git a/scripts.md b/scripts.md
@@ -0,0 +1,21 @@
+# Scripts
+
+## RLVR
+
+```bash
+uv run starters/llm_fine_tuning/rlvr/grpo_trainer.py
+```
+
+## vec-inf Placeholder
+
+```bash
+singularity exec --nv  --bind /model-weights/Qwen3-0.6B --containall /model-weights/vec-inf-shared/vector-inference_0.10.0.sif \
+vllm serve /model-weights/Qwen3-0.6B \
+--served-model-name Qwen3-0.6B \
+--host "0.0.0.0" \
+--port 8000 \
+--max-model-len 40960 \
+--max-num-seqs 256 \
+--enable-auto-tool-choice \
+--tool-call-parser hermes
+```
diff --git a/starters/llm_fine_tuning/README.md b/starters/llm_fine_tuning/README.md
@@ -0,0 +1,6 @@
+### LLM fine-tuning starters
+
+This directory includes minimal examples for LLM fine-tuning with vec-tool:
+
+- [text_classification](text_classification/): Fine-tunes a small Transformer on AG News using Hugging Face Trainer, with Submitit-compatible checkpointing and resume.
+- [rlvr](rlvr/): RL fine-tuning, where reward is extrinsic and verifiable.
diff --git a/starters/llm_fine_tuning/rlvr/agents/examples.py b/starters/llm_fine_tuning/rlvr/agents/examples.py
@@ -0,0 +1,176 @@
+"""
+OpenAI Agents SDK demo: function tool `get_weather` + minimal agent.
+
+Setup (with astral-uv):
+    uv venv && uv pip install -U pip
+    uv add openai-agents pydantic
+    # Or, if you prefer pip: `pip install openai-agents pydantic`
+
+Environment:
+    export OPENAI_API_KEY=...  # Required for the default OpenAI client
+
+Run:
+    uv run python agents_sdk_get_weather_demo.py
+"""
+
+from __future__ import annotations
+
+from datetime import date, datetime, timedelta, timezone
+from typing import Literal
+
+from agents import Agent, Runner, function_tool
+from pydantic import BaseModel, Field
+
+
+class WeatherReport(BaseModel):
+    """Structured output for a weather report.
+
+    Attributes
+    ----------
+    city:
+        Echo of the requested city (canonicalized).
+    unit:
+        "c" for Celsius or "f" for Fahrenheit.
+    temperature:
+        Air temperature in the requested unit.
+    feels_like:
+        Apparent temperature in the requested unit.
+    condition:
+        One of: "clear", "partly cloudy", "cloudy", "rain", "snow", "windy".
+    humidity:
+        Relative humidity percentage (0–100).
+    wind_kph:
+        Wind speed in kilometers per hour.
+    observation_time:
+        UTC timestamp when the reading was generated.
+    """
+
+    city: str
+    unit: Literal["c", "f"]
+    temperature: float
+    feels_like: float
+    condition: Literal["clear", "partly cloudy", "cloudy", "rain", "snow", "windy"]
+    humidity: int = Field(ge=0, le=100)
+    wind_kph: float = Field(ge=0)
+    observation_time: datetime
+
+
+@function_tool
+def get_weather(
+    city: str,
+    unit: Literal["c", "f"] = "c",
+    when: Literal["now", "today", "tomorrow"] = "now",
+) -> str:
+    """Return a deterministic, mock weather report for demos.
+
+    The function is *offline* and *stable across runs* for a given `(city, date)`
+    so it's ideal for showcasing **function-tool** calls without network flakiness.
+
+    Args:
+        city:
+            Human-readable city name (e.g., "Vancouver").
+        unit:
+            Temperature unit: "c" for Celsius, "f" for Fahrenheit. Defaults to "c".
+        when:
+            Time window for the report: "now", "today", or "tomorrow". Defaults to "now".
+
+    Returns:
+        JSON string representing a `WeatherReport`.
+    """
+    canonical = city.strip()
+
+    # City baselines (°C). Extend this mapping to taste.
+    baselines: dict[str, float] = {
+        "vancouver": 14.0,
+        "new york": 12.0,
+        "london": 11.0,
+        "singapore": 28.0,
+        "shanghai": 28.0,
+        "auckland": 20.0,
+        "tokyo": 17.0,
+        "paris": 13.0,
+        "san francisco": 16.0,
+        "berlin": 12.0,
+        "mexico city": 19.0,
+    }
+
+    key = canonical.lower()
+    base_c = baselines.get(key, 15.0)
+
+    # Reference date for deterministic seeding
+    today = date.today()
+    ref_date = today if when in ("now", "today") else today + timedelta(days=1)
+
+    # Seeded pseudo-randoms derived from (city, date)
+    seed = abs(hash(f"{key}|{ref_date.isoformat()}"))
+
+    def prand(a: float, b: float, salt: int) -> float:
+        # Deterministic pseudo-random in [a, b]
+        return a + (seed ^ salt) % 10 / 9.0 * (b - a)
+
+    temp_c = base_c + prand(-4.0, 4.0, 0xA5A5) - 0.5
+    humidity = int(round(prand(40, 90, 0xB6B6)))
+    wind_kph = round(prand(0.0, 30.0, 0xC7C7), 1)
+
+    band = seed % 100
+    if band < 20:
+        condition = "clear"
+    elif band < 45:
+        condition = "partly cloudy"
+    elif band < 65:
+        condition = "cloudy"
+    elif band < 85:
+        condition = "rain"
+    elif band < 95:
+        condition = "windy"
+    else:
+        condition = "snow"
+
+    feels_c = temp_c - 0.1 * wind_kph + 0.02 * (humidity - 50)
+
+    def to_unit(tc: float, u: Literal["c", "f"]) -> float:
+        return round(tc if u == "c" else (tc * 9 / 5 + 32), 1)
+
+    report = WeatherReport(
+        city=canonical,
+        unit=unit,
+        temperature=to_unit(temp_c, unit),
+        feels_like=to_unit(feels_c, unit),
+        condition=condition,  # type: ignore[arg-type]
+        humidity=humidity,
+        wind_kph=wind_kph,
+        observation_time=datetime.now(timezone.utc),
+    )
+
+    # Agents SDK tools should return a string (or something that stringifies cleanly).
+    return report.model_dump_json()
+
+
+# --- Minimal agent wiring ----------------------------------------------------
+weather_agent = Agent(
+    name="Weather Helper",
+    instructions=(
+        "You answer weather questions. When the user asks about weather, "
+        "call the `get_weather` tool. If it returns JSON, parse it and reply "
+        "concisely with temperature, feels-like, condition, and units."
+    ),
+    tools=[get_weather],  # register the function tool
+)
+
+
+def main() -> None:
+    """Run a single demo turn with the agent and print the final output."""
+    # Example inputs that strongly encourage tool use
+    user_inputs: list[str] = [
+        "What's the weather in Vancouver today in celsius?",
+        "NYC now, in Fahrenheit — include feels-like and wind, please.",
+    ]
+
+    for i, prompt in enumerate(user_inputs, start=1):
+        print(f"\n=== Demo turn {i} ===")
+        result = Runner.run_sync(weather_agent, prompt)
+        print(result.final_output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/starters/llm_fine_tuning/rlvr/agents/notes.md b/starters/llm_fine_tuning/rlvr/agents/notes.md
@@ -0,0 +1,104 @@
+# Agent SDK output to HuggingFace
+
+HuggingFace transformer requires the following format: `messages` and `tools`.
+
+Messages in the usual OpenAI Chat Completion format, including a special "tool" role for tool outputs.
+
+```json
+[
+  { "role": "user", "content": "Turn on the living room lights." },
+  {
+    "role": "assistant",
+    "tool_calls": [
+      {
+        "type": "function",
+        "function": {
+          "name": "control_light",
+          "arguments": { "room": "living room", "state": "on" }
+        }
+      }
+    ]
+  },
+  {
+    "role": "tool",
+    "name": "control_light",
+    "content": "The lights in the living room are now on."
+  },
+  { "role": "assistant", "content": "Done!" }
+]
+```
+
+Tools as a list of dict of the following format:
+
+```json
+{
+  "type": "function",
+  "function": {
+    "name": "control_light",
+    "description": "Controls the lights in a room.",
+    "parameters": {
+      "type": "object",
+      "properties": {
+        "room": { "type": "string", "description": "The name of the room." },
+        "state": {
+          "type": "string",
+          "description": "The desired state of the light (\"on\" or \"off\")."
+        }
+      },
+      "required": ["room", "state"]
+    },
+    "return": {
+      "type": "string",
+      "description": "str: A message indicating the new state of the lights."
+    }
+  }
+}
+```
+
+Test the format using `tokenizer.apply_chat_template`:
+
+```python
+formatted = tokenizer.apply_chat_template(
+    messages,
+    tools=(tools or None),  # type: ignore[arg-type]
+    tokenize=False,
+    add_generation_prompt=False,
+)
+```
+
+Run the following to see the output:
+
+```bash
+PYTHONPATH="." uv run starters/llm_fine_tuning/rlvr/agents/main.py'
+```
+
+Example output from Qwen3 format:
+
+```
+<|im_start|>system
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "get_weather", "parameters": {"properties": {"city": {"description": "\nHuman-readable city name (e.g., \"Vancouver\").", "title": "City", "type": "string"}, "unit": {"default": "c", "description": "\nTemperature unit: \"c\" for Celsius, \"f\" for Fahrenheit. Defaults to \"c\".", "enum": ["c", "f"], "title": "Unit", "type": "string"}, "when": {"default": "now", "description": "\nTime window for the report: \"now\", \"today\", or \"tomorrow\". Defaults to \"now\".", "enum": ["now", "today", "tomorrow"], "title": "When", "type": "string"}}, "required": ["city", "unit", "when"], "title": "get_weather_args", "type": "object", "additionalProperties": false}, "description": "Return a deterministic, mock weather report for demos.\n\nThe function is *offline* and *stable across runs* for a given `(city, date)`\nso it's ideal for showcasing **function-tool** calls without network flakiness."}}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+Weather in Auckland<|im_end|>
+<|im_start|>user
+<tool_response>
+{"city":"Auckland","unit":"c","temperature":16.4,"feels_like":16.5,"condition":"cloudy","humidity":73,"wind_kph":3.3,"observation_time":"2025-09-12T18:01:56.767763Z"}
+</tool_response><|im_end|>
+<|im_start|>assistant
+<think>
+Okay, let me process the tool response. The user asked for the weather in Auckland, so I need to present the data concisely. The temperature is 16.4°C, feels like 16.5°C. The condition is cloudy. Humidity is 73%, wind speed 3.3 kph. I should mention the time as well. Let me check the units again to make sure they're correct. Celsius for temperature and feels-like, and the condition. No need for extra info unless specified. Alright, putting it all together in a clear sentence.
+</think>
+
+The weather in Auckland is currently 16.4°C with a feels-like temperature of 16.5°C, with a cloudy condition and 73% humidity. Wind is blowing at 3.3 kph.<|im_end|>
+```