From ae8a3d882679e826c8ecb2bd47573192dcafbb36 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 30 Sep 2025 03:08:10 +0000 Subject: [PATCH] feat: Add Claude Sonnet 4.5 and documentation This commit introduces documentation and scripts to add Claude Sonnet 4.5 to the LLM benchmarks. It includes detailed instructions, utility scripts for adding and testing models, and comprehensive research summaries. Co-authored-by: drose --- ADD_SONNET_4.5_INSTRUCTIONS.md | 136 +++++++++++ FILES_CREATED.md | 290 ++++++++++++++++++++++++ QUICK_START.md | 80 +++++++ README_ADDING_MODELS.md | 358 +++++++++++++++++++++++++++++ RESEARCH_SUMMARY.md | 374 +++++++++++++++++++++++++++++++ scripts/add_model_to_db.py | 217 ++++++++++++++++++ scripts/add_sonnet_4.5.sh | 117 ++++++++++ scripts/test_anthropic_models.py | 210 +++++++++++++++++ 8 files changed, 1782 insertions(+) create mode 100644 ADD_SONNET_4.5_INSTRUCTIONS.md create mode 100644 FILES_CREATED.md create mode 100644 QUICK_START.md create mode 100644 README_ADDING_MODELS.md create mode 100644 RESEARCH_SUMMARY.md create mode 100755 scripts/add_model_to_db.py create mode 100755 scripts/add_sonnet_4.5.sh create mode 100755 scripts/test_anthropic_models.py diff --git a/ADD_SONNET_4.5_INSTRUCTIONS.md b/ADD_SONNET_4.5_INSTRUCTIONS.md new file mode 100644 index 0000000..171be4b --- /dev/null +++ b/ADD_SONNET_4.5_INSTRUCTIONS.md @@ -0,0 +1,136 @@ +# Adding Claude Sonnet 4.5 to LLM Benchmarks + +## Overview +This document explains how to add the new Claude Sonnet 4.5 model to your LLM benchmarking system. + +## Current Status +- **Release Date**: September 29, 2025 +- **Model Type**: Cloud API-based (not for local deployment) +- **Provider**: Anthropic + +## Model Identifier +⚠️ **IMPORTANT**: You need to verify the exact model ID from Anthropic's documentation. + +Based on existing naming patterns, it's likely one of: +- `claude-4-sonnet-20250929` +- `claude-4-5-sonnet-20250929` +- `claude-4-sonnet-20250514` + +Check: https://docs.anthropic.com/en/docs/models-overview + +## How to Add the Model + +### Prerequisites +- Access to your MongoDB instance +- `mongosh` installed on your machine +- Environment variables set: + - `MONGODB_URI`: e.g., `mongodb+srv://user:pass@cluster.mongodb.net` + - `MONGODB_DB`: e.g., `llm-bench` or `llmbench_staging` + +### Method 1: Add to Database (Recommended for Production) + +1. **Verify the correct model ID** from Anthropic's API documentation + +2. **Seed the model into MongoDB**: + ```bash + # Replace MODEL_ID with the actual model identifier + PROVIDER=anthropic MODEL_ID=claude-4-sonnet-20250929 mongosh "$MONGODB_URI/$MONGODB_DB" scripts/seed_model.js + ``` + +3. **Verify it was added**: + ```bash + mongosh "$MONGODB_URI/$MONGODB_DB" --eval 'db.models.find({provider: "anthropic", enabled: true}).pretty()' + ``` + +### Method 2: Update Configuration File + +Edit `/workspace/cloud/models.json` and add the new model to the `anthropic` array: + +```json +"anthropic": [ + "claude-2.1", + "claude-3-haiku-20240307", + "claude-3-5-haiku-20241022", + "claude-3-sonnet-20240229", + "claude-3-5-sonnet-20240620", + "claude-3-7-sonnet-20250219", + "claude-3-opus-20240229", + "claude-4-sonnet-20250929" // <-- ADD THIS (verify exact ID) +] +``` + +**Note**: This method updates the reference configuration. The actual benchmarking system reads from MongoDB. + +### Method 3: Enqueue a Job (Optional) + +If you want to immediately queue a benchmark job: +```bash +PROVIDER=anthropic MODEL=claude-4-sonnet-20250929 IGNORE_FRESHNESS=true mongosh "$MONGODB_URI/$MONGODB_DB" scripts/enqueue_job.js +``` + +## Testing the Integration + +### 1. Test with headless benchmark (local, no Docker): +```bash +python api/bench_headless.py --providers anthropic --limit 1 --fresh-minutes 0 +``` + +### 2. Check for errors: +```bash +mongosh "$MONGODB_URI/$MONGODB_DB" --eval 'db.errors_cloud.find({provider: "anthropic"}).sort({ts: -1}).limit(5).pretty()' +``` + +### 3. View results: +```bash +mongosh "$MONGODB_URI/$MONGODB_DB" --eval 'db.metrics_cloud_staging.find({provider: "anthropic", model_name: "claude-4-sonnet-20250929"}).sort({gen_ts: -1}).limit(1).pretty()' +``` + +## Database Schema + +Models are stored with the following structure: +```javascript +{ + provider: "anthropic", + model_id: "claude-4-sonnet-20250929", + enabled: true, + added_at: ISODate("2025-09-30T...") +} +``` + +## Code Files Involved + +1. **`/workspace/api/llm_bench/models_db.py`**: Loads models from MongoDB +2. **`/workspace/cloud/models.json`**: Reference configuration +3. **`/workspace/scripts/seed_model.js`**: Script to add models to DB +4. **`/workspace/api/llm_bench/cloud/providers/anthropic.py`**: Anthropic API integration + +## Required Environment Variables + +Ensure you have: +- `ANTHROPIC_API_KEY`: Your Anthropic API key +- `MONGODB_URI`: MongoDB connection string +- `MONGODB_DB`: Database name +- `MONGODB_COLLECTION_MODELS`: Collection name (default: `models`) + +## Next Steps + +1. ✅ Find the exact model ID from Anthropic docs +2. ✅ Ensure `ANTHROPIC_API_KEY` is set in your environment +3. ✅ Run the seed_model.js script with the correct model ID +4. ✅ Test with a benchmark run +5. ✅ Monitor for any errors + +## Troubleshooting + +**If the model doesn't appear in benchmarks:** +- Check it's marked as `enabled: true` in the database +- Verify the model_id matches exactly what Anthropic's API expects +- Check logs for API errors (might indicate wrong model ID) + +**If you get authentication errors:** +- Verify `ANTHROPIC_API_KEY` is set correctly +- Check the API key has access to the new model + +**If the model runs but fails:** +- The model ID might be incorrect +- Check Anthropic's API status/changelog for any breaking changes \ No newline at end of file diff --git a/FILES_CREATED.md b/FILES_CREATED.md new file mode 100644 index 0000000..36437d7 --- /dev/null +++ b/FILES_CREATED.md @@ -0,0 +1,290 @@ +# Files Created for Adding Claude Sonnet 4.5 + +This document lists all the new files created to help you add the Claude Sonnet 4.5 model to your LLM Benchmarks database. + +--- + +## 📚 Documentation Files + +### 1. **QUICK_START.md** +**Purpose**: Fast reference - get started in 30 seconds +**Use when**: You just want to add the model quickly + +```bash +cat QUICK_START.md +``` + +### 2. **RESEARCH_SUMMARY.md** +**Purpose**: Complete research findings and all available methods +**Use when**: You want to understand how everything works + +Includes: +- How the system works +- All 4 methods to add models +- Testing procedures +- Troubleshooting guide +- Key files in codebase + +### 3. **ADD_SONNET_4.5_INSTRUCTIONS.md** +**Purpose**: Detailed step-by-step instructions +**Use when**: You want comprehensive guidance + +Includes: +- Prerequisites checklist +- All methods with examples +- Testing procedures +- Environment variables +- Next steps + +### 4. **FILES_CREATED.md** (this file) +**Purpose**: Index of all created files +**Use when**: You want to see what was created + +--- + +## 🔧 Utility Scripts + +### 5. **scripts/add_model_to_db.py** ⭐ RECOMMENDED +**Purpose**: Python script to add models to MongoDB +**Language**: Python +**Prerequisites**: `pymongo` installed + +**Features**: +- ✅ Add any model to the database +- ✅ Check for duplicates before inserting +- ✅ Update existing models +- ✅ List all models +- ✅ Filter by provider +- ✅ Interactive confirmation +- ✅ Verification after insertion +- ✅ Helpful error messages + +**Usage**: +```bash +# Add Sonnet 4.5 +python scripts/add_model_to_db.py \ + --provider anthropic \ + --model-id claude-4-sonnet-20250929 + +# List all Anthropic models +python scripts/add_model_to_db.py --list --provider anthropic + +# Add a disabled model +python scripts/add_model_to_db.py \ + --provider openai \ + --model-id gpt-5 \ + --disabled + +# Get help +python scripts/add_model_to_db.py --help +``` + +### 6. **scripts/add_sonnet_4.5.sh** +**Purpose**: Bash script specifically for adding Sonnet 4.5 +**Language**: Bash +**Prerequisites**: `mongosh` installed + +**Features**: +- ✅ Pre-configured for Anthropic Sonnet 4.5 +- ✅ Validation checks +- ✅ Warns about default model ID +- ✅ Verifies insertion +- ✅ Shows next steps + +**Usage**: +```bash +# Edit the MODEL_ID in the script first, then: +bash scripts/add_sonnet_4.5.sh +``` + +### 7. **scripts/test_anthropic_models.py** ⭐ VERY USEFUL +**Purpose**: Test Anthropic API access and discover model IDs +**Language**: Python +**Prerequisites**: `anthropic` package installed + +**Features**: +- ✅ Verify ANTHROPIC_API_KEY is working +- ✅ Test connectivity to Anthropic API +- ✅ Test specific model IDs +- ✅ List known models +- ✅ Test all known models +- ✅ Get actual responses from models +- ✅ Helpful error messages + +**Usage**: +```bash +# Test a specific model ID to verify it works +python scripts/test_anthropic_models.py \ + --test-model claude-4-sonnet-20250929 + +# Test all known models +python scripts/test_anthropic_models.py --test-all-known + +# Just show info and known models +python scripts/test_anthropic_models.py +``` + +**This is the best way to find the correct model ID!** + +--- + +## 🗂️ File Structure + +``` +/workspace/ +├── QUICK_START.md # Quick reference +├── RESEARCH_SUMMARY.md # Complete research +├── ADD_SONNET_4.5_INSTRUCTIONS.md # Detailed guide +├── FILES_CREATED.md # This file +└── scripts/ + ├── add_model_to_db.py # Python script (recommended) + ├── add_sonnet_4.5.sh # Bash script + ├── test_anthropic_models.py # Test Anthropic API + ├── seed_model.js # Original MongoDB script + └── ... +``` + +--- + +## 🚀 Recommended Workflow + +### Step 1: Find the Correct Model ID + +```bash +# First, test what model IDs work +python scripts/test_anthropic_models.py \ + --test-model claude-4-sonnet-20250929 +``` + +If that fails, try variations: +- `claude-4-5-sonnet-20250929` +- `claude-sonnet-4-5-20250929` +- Check Anthropic's docs + +### Step 2: Add to Database + +```bash +# Once you know the correct ID, add it +python scripts/add_model_to_db.py \ + --provider anthropic \ + --model-id +``` + +### Step 3: Test Benchmark + +```bash +# Run a test benchmark +python api/bench_headless.py \ + --providers anthropic \ + --limit 1 +``` + +### Step 4: Monitor + +```bash +# Check for errors +mongosh "$MONGODB_URI/$MONGODB_DB" --eval ' +db.errors_cloud.find({ + provider: "anthropic" +}).sort({ts: -1}).limit(5).pretty() +' +``` + +--- + +## 📝 Key Takeaways + +### ✅ What You Can Do + +1. **Add models via code** - No manual database editing needed +2. **Test before adding** - Verify model IDs work first +3. **List existing models** - See what's already in the DB +4. **Update models** - Change enabled/disabled status + +### ✅ What You Need + +1. **MongoDB access** - Connection string and credentials +2. **Anthropic API key** - For testing and benchmarking +3. **Correct model ID** - From Anthropic's documentation +4. **Python/mongosh** - To run the scripts + +### ✅ What You Don't Need + +1. ❌ Local model files - Anthropic is API-only +2. ❌ GPU/special hardware - API handles compute +3. ❌ Code changes - Everything is config-driven +4. ❌ Restart services - MongoDB changes are live + +--- + +## 🆘 If Something Goes Wrong + +### Script fails to run +```bash +# Check Python dependencies +pip install pymongo anthropic + +# Check mongosh is installed +mongosh --version +``` + +### Can't connect to MongoDB +```bash +# Verify connection +mongosh "$MONGODB_URI" --eval 'db.runCommand({ping: 1})' +``` + +### Model test fails +```bash +# Check API key +echo $ANTHROPIC_API_KEY + +# Try a known working model first +python scripts/test_anthropic_models.py \ + --test-model claude-3-5-sonnet-20240620 +``` + +### Model not appearing in benchmarks +```bash +# Check it's enabled +python scripts/add_model_to_db.py --list --provider anthropic + +# Verify in database +mongosh "$MONGODB_URI/$MONGODB_DB" --eval ' +db.models.find({provider: "anthropic", enabled: true}).pretty() +' +``` + +--- + +## 📖 Additional Resources + +- **Anthropic Docs**: https://docs.anthropic.com/en/docs/models-overview +- **MongoDB Shell**: https://www.mongodb.com/docs/mongodb-shell/ +- **PyMongo Docs**: https://pymongo.readthedocs.io/ + +--- + +## 💡 Pro Tips + +1. **Always test the model ID first** using `test_anthropic_models.py` +2. **Use the Python script** (`add_model_to_db.py`) - it's more robust +3. **List models frequently** to see what's in your database +4. **Start with one model** - don't add multiple until first one works +5. **Check the dates** - Model IDs use YYYYMMDD format (20250929 = Sep 29, 2025) + +--- + +## Summary + +I've created 7 files for you: +- **4 documentation files** explaining everything +- **3 utility scripts** to make adding models easy + +**Start here**: +1. Read `QUICK_START.md` (30 seconds) +2. Run `test_anthropic_models.py --test-model ` +3. Run `add_model_to_db.py --provider anthropic --model-id ` +4. Done! + +Good luck! 🚀 \ No newline at end of file diff --git a/QUICK_START.md b/QUICK_START.md new file mode 100644 index 0000000..799112a --- /dev/null +++ b/QUICK_START.md @@ -0,0 +1,80 @@ +# Quick Start: Add Claude Sonnet 4.5 + +## 🚀 TL;DR + +```bash +# 1. Get the exact model ID from Anthropic docs +# https://docs.anthropic.com/en/docs/models-overview + +# 2. Set environment variables (if not already set) +export MONGODB_URI="your-mongodb-uri" +export MONGODB_DB="llm-bench" +export ANTHROPIC_API_KEY="your-api-key" + +# 3. Add the model (replace MODEL_ID with actual ID) +python scripts/add_model_to_db.py \ + --provider anthropic \ + --model-id claude-4-sonnet-20250929 + +# 4. Test it +python api/bench_headless.py --providers anthropic --limit 1 +``` + +## ✅ That's it! + +--- + +## 📋 Checklist + +- [ ] Get exact model ID from Anthropic +- [ ] Set `MONGODB_URI` environment variable +- [ ] Set `ANTHROPIC_API_KEY` environment variable +- [ ] Run the add_model_to_db.py script +- [ ] Test with a benchmark run +- [ ] Check for errors + +--- + +## 📚 Full Documentation + +For detailed information, see: +- **RESEARCH_SUMMARY.md** - Complete research and all methods +- **ADD_SONNET_4.5_INSTRUCTIONS.md** - Step-by-step guide +- **scripts/add_model_to_db.py** - Python script with `--help` + +--- + +## 🔍 Verify Model ID + +The model ID likely follows this pattern: +``` +claude-4-sonnet-YYYYMMDD +``` + +Where YYYYMMDD is the release date (e.g., 20250929 for Sep 29, 2025). + +**Check Anthropic's docs to confirm!** + +--- + +## ❓ Common Questions + +**Q: Do I need local server access?** +A: Only if your MongoDB is running locally. If it's remote, you can run from anywhere. + +**Q: Will this work?** +A: Yes! The system is designed for this. Just need the correct model ID. + +**Q: What if the model ID is wrong?** +A: The benchmark will fail with an API error. Update the ID in the database. + +**Q: Can I test without adding to database?** +A: No, the system reads models from MongoDB only. + +--- + +## 🆘 Need Help? + +1. Check `/workspace/RESEARCH_SUMMARY.md` for complete details +2. Run with `--help`: `python scripts/add_model_to_db.py --help` +3. List existing models: `python scripts/add_model_to_db.py --list --provider anthropic` \ No newline at end of file diff --git a/README_ADDING_MODELS.md b/README_ADDING_MODELS.md new file mode 100644 index 0000000..4989860 --- /dev/null +++ b/README_ADDING_MODELS.md @@ -0,0 +1,358 @@ +# Adding New Models to LLM Benchmarks + +**Quick Navigation**: 🚀 [Quick Start](#quick-start) | 📚 [Documentation](#documentation) | 🔧 [Tools](#tools) | ❓ [FAQ](#faq) + +--- + +## 🎯 Goal + +Add **Claude Sonnet 4.5** (or any new model) to your LLM Benchmarks database. + +## ✅ Answer: Is This Fixable Via Code? + +**YES!** The system is designed for this. You can add models through: +- MongoDB scripts +- Python scripts (recommended) +- Direct database commands + +**No code changes needed** - it's all configuration-driven. + +--- + +## 🚀 Quick Start + +### The Fastest Way (3 commands) + +```bash +# 1. Find the correct model ID +python scripts/test_anthropic_models.py --test-model claude-4-sonnet-20250929 + +# 2. Add it to the database +python scripts/add_model_to_db.py --provider anthropic --model-id claude-4-sonnet-20250929 + +# 3. Test it +python api/bench_headless.py --providers anthropic --limit 1 +``` + +**Done!** 🎉 + +For more details, see [QUICK_START.md](QUICK_START.md) + +--- + +## 📚 Documentation + +I've created comprehensive documentation for you: + +| File | Purpose | When to Use | +|------|---------|-------------| +| **[QUICK_START.md](QUICK_START.md)** | 30-second guide | You want to add the model NOW | +| **[RESEARCH_SUMMARY.md](RESEARCH_SUMMARY.md)** | Complete research & all methods | You want to understand everything | +| **[ADD_SONNET_4.5_INSTRUCTIONS.md](ADD_SONNET_4.5_INSTRUCTIONS.md)** | Step-by-step guide | You want detailed instructions | +| **[FILES_CREATED.md](FILES_CREATED.md)** | Index of new files | You want to see what was created | + +--- + +## 🔧 Tools + +I've created three utility scripts: + +### 1. Test Anthropic Models ⭐ START HERE + +**Purpose**: Find the correct model ID and verify API access + +```bash +python scripts/test_anthropic_models.py --test-model claude-4-sonnet-20250929 +``` + +**This answers**: "What's the correct model ID?" + +### 2. Add Model to Database ⭐ RECOMMENDED + +**Purpose**: Add any model to MongoDB + +```bash +python scripts/add_model_to_db.py --provider anthropic --model-id +``` + +**Features**: Duplicate checking, verification, list models, interactive + +### 3. Add Sonnet 4.5 (Bash) + +**Purpose**: Bash alternative for adding Sonnet 4.5 + +```bash +bash scripts/add_sonnet_4.5.sh +``` + +**Note**: Edit the script first to set the correct model ID + +--- + +## 🔍 Research Findings + +### How Models Are Stored + +- **Database**: MongoDB +- **Collection**: `models` (configurable) +- **Structure**: + ```javascript + { + provider: "anthropic", + model_id: "claude-4-sonnet-20250929", + enabled: true, + added_at: ISODate(...) + } + ``` + +### How Models Are Loaded + +The Python code (`api/llm_bench/models_db.py`) queries MongoDB: + +```python +db.models.find({"enabled": True}) +``` + +This means: +- ✅ Add models = add to MongoDB +- ✅ Enable/disable = update `enabled` field +- ✅ Changes are immediate (no restart needed) + +### Current Anthropic Models in Your System + +From your database/config: +- `claude-3-7-sonnet-20250219` ← **Newest currently** +- `claude-3-5-sonnet-20240620` +- `claude-3-5-haiku-20241022` +- `claude-3-opus-20240229` +- Others... + +--- + +## 🎯 About Claude Sonnet 4.5 + +### Release Info +- **Date**: September 29, 2025 +- **Type**: Cloud API only +- **Capabilities**: Improved coding, finance, cybersecurity, long-duration tasks + +### Model Naming Pattern + +Anthropic uses: `claude-{family}-{variant}-{YYYYMMDD}` + +Examples: +- `claude-3-5-sonnet-20240620` (June 20, 2024) +- `claude-3-7-sonnet-20250219` (Feb 19, 2025) + +### Likely Model ID + +⚠️ **Must verify from Anthropic docs!** + +Based on pattern, likely: +- `claude-4-sonnet-20250929` (most likely) +- `claude-4-5-sonnet-20250929` +- `claude-sonnet-4-5-20250929` + +**Verify at**: https://docs.anthropic.com/en/docs/models-overview + +--- + +## ❓ FAQ + +### Q: Do I need to be on my server locally? + +**A**: Only if: +- Your MongoDB is local-only (no remote access) +- You need to set environment variables on the server + +If MongoDB is accessible remotely, you can add models from anywhere. + +### Q: Will this require code changes? + +**A**: No! The system is configuration-driven through MongoDB. + +### Q: What if I use the wrong model ID? + +**A**: The benchmark will fail with an API error. Just update it in MongoDB: + +```bash +python scripts/add_model_to_db.py --provider anthropic --model-id CORRECT_ID +``` + +### Q: How do I know if it worked? + +**A**: Run a test benchmark: + +```bash +python api/bench_headless.py --providers anthropic --limit 1 +``` + +Check for errors in the `errors_cloud` collection. + +### Q: Can I add multiple models at once? + +**A**: Yes! Run the script multiple times: + +```bash +python scripts/add_model_to_db.py --provider anthropic --model-id model-1 +python scripts/add_model_to_db.py --provider anthropic --model-id model-2 +``` + +### Q: How do I list what's already in the database? + +**A**: + +```bash +python scripts/add_model_to_db.py --list --provider anthropic +``` + +--- + +## 📋 Prerequisites + +Before you start, ensure you have: + +### Environment Variables +```bash +export MONGODB_URI="mongodb+srv://..." +export MONGODB_DB="llm-bench" +export ANTHROPIC_API_KEY="sk-ant-..." +``` + +### Tools Installed +- Python 3.9+ +- `pymongo` package: `pip install pymongo` +- `anthropic` package: `pip install anthropic` +- OR `mongosh` for bash scripts + +### Access +- MongoDB read/write access +- Valid Anthropic API key +- Correct model ID from Anthropic docs + +--- + +## 🎓 Step-by-Step Tutorial + +### Step 1: Verify Environment + +```bash +# Check MongoDB connection +mongosh "$MONGODB_URI/$MONGODB_DB" --eval 'db.runCommand({ping: 1})' + +# Check Anthropic API key +echo $ANTHROPIC_API_KEY +``` + +### Step 2: Find the Correct Model ID + +```bash +# Test a likely model ID +python scripts/test_anthropic_models.py --test-model claude-4-sonnet-20250929 + +# If it fails, try variations or check Anthropic docs +``` + +### Step 3: Add to Database + +```bash +# Add the model +python scripts/add_model_to_db.py \ + --provider anthropic \ + --model-id claude-4-sonnet-20250929 +``` + +### Step 4: Verify + +```bash +# List all Anthropic models +python scripts/add_model_to_db.py --list --provider anthropic +``` + +### Step 5: Test + +```bash +# Run a single benchmark +python api/bench_headless.py --providers anthropic --limit 1 --fresh-minutes 0 +``` + +### Step 6: Check Results + +```bash +# Check for errors +mongosh "$MONGODB_URI/$MONGODB_DB" --eval ' +db.errors_cloud.find({provider: "anthropic"}).sort({ts: -1}).limit(5).pretty() +' + +# Check for successful results +mongosh "$MONGODB_URI/$MONGODB_DB" --eval ' +db.metrics_cloud_staging.find({ + provider: "anthropic", + model_name: /claude-4/ +}).sort({gen_ts: -1}).limit(1).pretty() +' +``` + +--- + +## 🔗 Key Files in Codebase + +### Runtime +- `api/llm_bench/models_db.py` - Loads models from MongoDB +- `api/llm_bench/cloud/providers/anthropic.py` - Anthropic integration +- `api/bench_headless.py` - Benchmark runner + +### Configuration +- `cloud/models.json` - Reference file (NOT used at runtime) +- Environment variables - Actual configuration + +### Scripts (Original) +- `scripts/seed_model.js` - MongoDB script to add models +- `scripts/enqueue_job.js` - Queue a benchmark job + +### Scripts (NEW - Created for You) +- `scripts/add_model_to_db.py` ⭐ - Python helper +- `scripts/test_anthropic_models.py` ⭐ - Test API & find model IDs +- `scripts/add_sonnet_4.5.sh` - Bash helper + +--- + +## 🎉 Summary + +### What You Learned + +1. ✅ Models are stored in **MongoDB** (not code) +2. ✅ Adding models = **updating the database** +3. ✅ The system is **configuration-driven** +4. ✅ Changes are **immediate** (no restart needed) + +### What You Got + +1. 📚 **4 documentation files** explaining everything +2. 🔧 **3 utility scripts** to make it easy +3. 🎯 **Step-by-step instructions** for success + +### What You Need to Do + +1. 🔍 **Find the correct model ID** (use `test_anthropic_models.py`) +2. ➕ **Add it to MongoDB** (use `add_model_to_db.py`) +3. ✅ **Test it** (run a benchmark) +4. 🎊 **Done!** + +--- + +## 🚀 Ready to Start? + +Pick your starting point: + +- **Just want to do it**: [QUICK_START.md](QUICK_START.md) +- **Want to understand first**: [RESEARCH_SUMMARY.md](RESEARCH_SUMMARY.md) +- **Want step-by-step guide**: [ADD_SONNET_4.5_INSTRUCTIONS.md](ADD_SONNET_4.5_INSTRUCTIONS.md) +- **Want to see what's available**: [FILES_CREATED.md](FILES_CREATED.md) + +Or just run: +```bash +python scripts/test_anthropic_models.py +``` + +Good luck! 🎯 \ No newline at end of file diff --git a/RESEARCH_SUMMARY.md b/RESEARCH_SUMMARY.md new file mode 100644 index 0000000..570b631 --- /dev/null +++ b/RESEARCH_SUMMARY.md @@ -0,0 +1,374 @@ +# Research Summary: Adding Models to LLM Benchmarks Database + +**Date**: September 30, 2025 +**Topic**: How models are added to the database & adding Claude Sonnet 4.5 + +--- + +## Executive Summary + +✅ **Yes, this is fixable via code!** You can add new models to the database using: +1. MongoDB scripts (recommended) +2. Python script (created for you) +3. Direct MongoDB commands + +You may need to be on your server locally only to ensure you have access to the MongoDB instance and can run the seeding scripts. + +--- + +## How the System Works + +### Database Architecture + +The LLM Benchmarks system uses **MongoDB** to manage model configurations: + +- **Database**: `MONGODB_DB` (default: `llm-bench`) +- **Collection**: `MONGODB_COLLECTION_MODELS` (default: `models`) +- **Document Structure**: + ```javascript + { + provider: "anthropic", + model_id: "claude-3-5-sonnet-20240620", + enabled: true, + added_at: ISODate("2024-06-20T...") + } + ``` + +### How Models Are Loaded + +The Python code (`/workspace/api/llm_bench/models_db.py`) queries MongoDB for enabled models: + +```python +def load_provider_models() -> Dict[str, List[str]]: + # Connects to MongoDB + # Queries: db.models.find({"enabled": True}) + # Returns: {provider: [model_ids]} +``` + +This means: +- **Runtime**: Models are loaded from MongoDB +- **Configuration**: `/workspace/cloud/models.json` is a reference file (not used by runtime) +- **Control**: Enable/disable models by changing the `enabled` field in MongoDB + +--- + +## Current Anthropic Models in System + +From `/workspace/cloud/models.json`: + +```json +"anthropic": [ + "claude-2.1", + "claude-3-haiku-20240307", + "claude-3-5-haiku-20241022", + "claude-3-sonnet-20240229", + "claude-3-5-sonnet-20240620", + "claude-3-7-sonnet-20250219", // This is the newest in your system + "claude-3-opus-20240229" +] +``` + +**Note**: Claude 3.7 Sonnet (released Feb 2025) is already in the system. + +--- + +## About Claude Sonnet 4.5 + +### Release Information +- **Announced**: September 29, 2025 +- **Type**: Cloud API only (no local deployment) +- **Improvements**: Coding, finance, cybersecurity, long-duration autonomous work + +### Model Naming Pattern + +Anthropic uses date-based model identifiers following the pattern: +``` +claude-{family}-{variant}-{YYYYMMDD} +``` + +Examples: +- `claude-3-5-sonnet-20240620` (June 20, 2024) +- `claude-3-7-sonnet-20250219` (Feb 19, 2025) +- `claude-3-5-haiku-20241022` (Oct 22, 2024) + +### Likely Model ID for Sonnet 4.5 + +⚠️ **UNCONFIRMED** - Based on naming patterns, likely one of: +- `claude-4-sonnet-20250929` (most likely) +- `claude-4-5-sonnet-20250929` +- `claude-sonnet-4-5-20250929` + +**YOU MUST VERIFY** the exact model ID from: +- Anthropic's API documentation: https://docs.anthropic.com/en/docs/models-overview +- Your Anthropic console/dashboard +- Or test via their API + +--- + +## How to Add Claude Sonnet 4.5 + +### Prerequisites + +1. **Access to MongoDB**: + ```bash + export MONGODB_URI="mongodb+srv://user:pass@cluster.mongodb.net" + export MONGODB_DB="llm-bench" # or llmbench_staging + ``` + +2. **Tools installed**: + - Either `mongosh` (for shell script) + - Or `pymongo` (for Python script) + +3. **Anthropic API Key**: + ```bash + export ANTHROPIC_API_KEY="sk-ant-..." + ``` + +4. **Model ID**: Get the exact model identifier from Anthropic + +--- + +### Method 1: Using the Python Script (Recommended) + +I created a helper script for you at `/workspace/scripts/add_model_to_db.py`: + +```bash +# First, verify the model ID from Anthropic's docs +# Then run: + +python scripts/add_model_to_db.py \ + --provider anthropic \ + --model-id claude-4-sonnet-20250929 + +# To list existing models: +python scripts/add_model_to_db.py --list --provider anthropic +``` + +**Features**: +- ✅ Checks for existing models +- ✅ Confirms before updating +- ✅ Verifies the insertion +- ✅ Shows next steps +- ✅ Can list all models + +--- + +### Method 2: Using the Shell Script + +I created a bash script at `/workspace/scripts/add_sonnet_4.5.sh`: + +```bash +# Edit the script to set the correct MODEL_ID +# Then run: + +bash scripts/add_sonnet_4.5.sh +``` + +--- + +### Method 3: Using the Original MongoDB Script + +The existing seed script: + +```bash +PROVIDER=anthropic \ +MODEL_ID=claude-4-sonnet-20250929 \ +mongosh "$MONGODB_URI/$MONGODB_DB" scripts/seed_model.js +``` + +--- + +### Method 4: Direct MongoDB Command + +```bash +mongosh "$MONGODB_URI/$MONGODB_DB" --eval ' +db.models.insertOne({ + provider: "anthropic", + model_id: "claude-4-sonnet-20250929", + enabled: true, + added_at: new Date() +}) +' +``` + +--- + +## Testing the Integration + +### 1. Verify the model was added: + +```bash +mongosh "$MONGODB_URI/$MONGODB_DB" --eval ' +db.models.find({ + provider: "anthropic", + model_id: "claude-4-sonnet-20250929" +}).pretty() +' +``` + +### 2. Run a test benchmark: + +```bash +python api/bench_headless.py \ + --providers anthropic \ + --limit 1 \ + --fresh-minutes 0 +``` + +### 3. Check for errors: + +```bash +mongosh "$MONGODB_URI/$MONGODB_DB" --eval ' +db.errors_cloud.find({ + provider: "anthropic", + model_name: /claude-4/ +}).sort({ts: -1}).limit(5).pretty() +' +``` + +### 4. View results: + +```bash +mongosh "$MONGODB_URI/$MONGODB_DB" --eval ' +db.metrics_cloud_staging.find({ + provider: "anthropic", + model_name: "claude-4-sonnet-20250929" +}).sort({gen_ts: -1}).limit(1).pretty() +' +``` + +--- + +## Files I Created for You + +1. **`/workspace/ADD_SONNET_4.5_INSTRUCTIONS.md`** + - Complete step-by-step guide + - Troubleshooting tips + - Environment setup + +2. **`/workspace/scripts/add_model_to_db.py`** + - Python script to add models + - List existing models + - Interactive confirmation + - Error handling + +3. **`/workspace/scripts/add_sonnet_4.5.sh`** + - Bash script specifically for Sonnet 4.5 + - Validation checks + - Verification steps + +4. **`/workspace/RESEARCH_SUMMARY.md`** (this file) + - Complete research findings + - All methods to add models + +--- + +## Key Files in the Codebase + +### Runtime Code +- `/workspace/api/llm_bench/models_db.py` - Loads models from MongoDB +- `/workspace/api/llm_bench/cloud/providers/anthropic.py` - Anthropic API integration +- `/workspace/api/bench_headless.py` - Runs benchmarks + +### Configuration +- `/workspace/cloud/models.json` - Reference only (not used at runtime) +- Environment variables - The actual configuration source + +### Scripts +- `/workspace/scripts/seed_model.js` - MongoDB script to add single model +- `/workspace/scripts/add_model_to_db.py` - **NEW**: Python helper script +- `/workspace/scripts/add_sonnet_4.5.sh` - **NEW**: Bash helper script + +--- + +## Important Notes + +### Do You Need Local Server Access? + +**Maybe** - You need access to: +1. ✅ **MongoDB instance**: To add the model to the database +2. ✅ **Environment variables**: To set `MONGODB_URI`, `ANTHROPIC_API_KEY`, etc. +3. ❌ **Not needed**: Local model files (Anthropic is cloud API only) +4. ❌ **Not needed**: Special hardware/GPU (for API-based models) + +**If you can**: +- Connect to the MongoDB instance from anywhere → Can add the model remotely +- Set environment variables on the server → Can run benchmarks + +### Cloud vs Local Benchmarks + +This system has two types: +- **Cloud benchmarks**: Test API-based models (OpenAI, Anthropic, etc.) - This is what you want +- **Local benchmarks**: Test locally hosted models (Llama, etc.) - Not relevant here + +Anthropic models are **cloud benchmarks** - they just need API access. + +--- + +## Next Steps + +1. **Get the exact model ID**: + - Check https://docs.anthropic.com/en/docs/models-overview + - Or use Anthropic's API to list available models + - The date suffix is usually the release date (YYYYMMDD) + +2. **Ensure you have MongoDB access**: + ```bash + mongosh "$MONGODB_URI/$MONGODB_DB" --eval 'db.models.countDocuments()' + ``` + +3. **Add the model** using one of the methods above + +4. **Test it** with a single benchmark run + +5. **Monitor** the first few runs for any issues + +--- + +## Troubleshooting + +### "Model not found" errors +- Check the model_id exactly matches what Anthropic expects +- Verify with a direct API call first + +### "Authentication failed" +- Check `ANTHROPIC_API_KEY` is set correctly +- Verify the key has access to the new model + +### Model not appearing in benchmarks +- Check `enabled: true` in database +- Verify `fresh-minutes` isn't skipping it +- Check the scheduler picked it up + +### API rate limits +- Anthropic has rate limits per model +- May need to adjust `SLEEP_SECONDS` between runs + +--- + +## Questions to Answer + +Before proceeding, you should: + +1. ✅ **Verify the model ID**: What is the exact identifier from Anthropic? +2. ✅ **Check API access**: Can you call this model via Anthropic's API? +3. ✅ **Confirm MongoDB access**: Can you connect and write to the database? +4. ✅ **Test environment**: Are all environment variables set correctly? + +--- + +## Summary + +**The good news**: This is 100% fixable via code. The system is designed to support adding new models easily through the database. + +**What you need**: +1. The correct model ID from Anthropic +2. Access to your MongoDB instance +3. Your Anthropic API key + +**What you do**: +1. Run one of the scripts I created +2. Test with a benchmark +3. Monitor for any issues + +**No code changes needed** - everything is configuration-driven through MongoDB! \ No newline at end of file diff --git a/scripts/add_model_to_db.py b/scripts/add_model_to_db.py new file mode 100755 index 0000000..55969fe --- /dev/null +++ b/scripts/add_model_to_db.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +""" +Script to add a model to the LLM Benchmarks MongoDB database. + +This provides a Python alternative to using the seed_model.js script. + +Usage: + python scripts/add_model_to_db.py --provider anthropic --model-id claude-4-sonnet-20250929 + +Requirements: + - pymongo installed (pip install pymongo) + - MONGODB_URI environment variable set +""" + +import argparse +import os +import sys +from datetime import datetime + +try: + from pymongo import MongoClient +except ImportError: + print("❌ Error: pymongo not installed") + print(" Install it with: pip install pymongo") + sys.exit(1) + + +def add_model_to_db(provider: str, model_id: str, enabled: bool = True) -> bool: + """Add a model to the MongoDB models collection.""" + + # Get MongoDB configuration from environment + uri = os.getenv("MONGODB_URI") + if not uri: + print("❌ Error: MONGODB_URI environment variable not set") + print(" Example: export MONGODB_URI='mongodb+srv://user:pass@cluster.mongodb.net'") + return False + + db_name = os.getenv("MONGODB_DB", "llm-bench") + coll_name = os.getenv("MONGODB_COLLECTION_MODELS", "models") + + print(f"Configuration:") + print(f" Provider: {provider}") + print(f" Model ID: {model_id}") + print(f" Enabled: {enabled}") + print(f" Database: {db_name}") + print(f" Collection: {coll_name}") + print() + + # Create the document + doc = { + "provider": provider, + "model_id": model_id, + "enabled": enabled, + "added_at": datetime.now(), + } + + try: + # Connect to MongoDB + print("Connecting to MongoDB...") + client = MongoClient(uri) + + # Get collection + coll = client[db_name][coll_name] + + # Check if model already exists + existing = coll.find_one({"provider": provider, "model_id": model_id}) + if existing: + print(f"⚠️ Warning: Model already exists in database") + print(f" Existing document: {existing}") + + response = input("\nUpdate it? (y/N): ").strip().lower() + if response == 'y': + result = coll.update_one( + {"provider": provider, "model_id": model_id}, + {"$set": {"enabled": enabled, "updated_at": datetime.now()}} + ) + print(f"✅ Model updated successfully! (matched: {result.matched_count}, modified: {result.modified_count})") + else: + print("Skipped update.") + return True + else: + # Insert the document + print("Inserting model into database...") + result = coll.insert_one(doc) + print(f"✅ Model added successfully! ID: {result.inserted_id}") + + # Verify + print("\nVerifying...") + found = coll.find_one({"provider": provider, "model_id": model_id}) + if found: + print("Document in database:") + for key, value in found.items(): + if key != "_id": + print(f" {key}: {value}") + + client.close() + return True + + except Exception as e: + print(f"❌ Error: {e}") + return False + + +def list_models(provider: str = None): + """List all models in the database.""" + + uri = os.getenv("MONGODB_URI") + if not uri: + print("❌ Error: MONGODB_URI environment variable not set") + return False + + db_name = os.getenv("MONGODB_DB", "llm-bench") + coll_name = os.getenv("MONGODB_COLLECTION_MODELS", "models") + + try: + client = MongoClient(uri) + coll = client[db_name][coll_name] + + query = {"enabled": True} + if provider: + query["provider"] = provider + + models = list(coll.find(query, {"provider": 1, "model_id": 1, "enabled": 1, "_id": 0})) + + if not models: + print(f"No enabled models found" + (f" for provider: {provider}" if provider else "")) + return True + + print(f"\nEnabled models" + (f" for provider: {provider}" if provider else "") + ":") + print(f"{'Provider':<20} {'Model ID':<50} {'Enabled':<10}") + print("-" * 80) + + for model in sorted(models, key=lambda x: (x.get('provider', ''), x.get('model_id', ''))): + provider_name = model.get('provider', 'N/A') + model_id = model.get('model_id', 'N/A') + enabled = model.get('enabled', False) + print(f"{provider_name:<20} {model_id:<50} {enabled!s:<10}") + + print(f"\nTotal: {len(models)} models") + + client.close() + return True + + except Exception as e: + print(f"❌ Error: {e}") + return False + + +def main(): + parser = argparse.ArgumentParser( + description="Add or manage models in the LLM Benchmarks database", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Add Claude Sonnet 4.5 + python scripts/add_model_to_db.py --provider anthropic --model-id claude-4-sonnet-20250929 + + # Add a disabled model (won't be benchmarked) + python scripts/add_model_to_db.py --provider openai --model-id gpt-5 --disabled + + # List all enabled Anthropic models + python scripts/add_model_to_db.py --list --provider anthropic + + # List all enabled models + python scripts/add_model_to_db.py --list + +Environment Variables: + MONGODB_URI - MongoDB connection string (required) + MONGODB_DB - Database name (default: llm-bench) + MONGODB_COLLECTION_MODELS - Collection name (default: models) + """ + ) + + parser.add_argument("--provider", help="Provider name (e.g., anthropic, openai)") + parser.add_argument("--model-id", help="Model identifier (e.g., claude-4-sonnet-20250929)") + parser.add_argument("--disabled", action="store_true", help="Add model as disabled") + parser.add_argument("--list", action="store_true", help="List models instead of adding") + + args = parser.parse_args() + + if args.list: + success = list_models(provider=args.provider) + sys.exit(0 if success else 1) + + if not args.provider or not args.model_id: + parser.print_help() + print("\n❌ Error: --provider and --model-id are required (unless using --list)") + sys.exit(1) + + # Special check for Claude Sonnet 4.5 + if args.provider == "anthropic" and "claude-4" in args.model_id: + print("⚠️ Note: Please verify this is the correct model ID from Anthropic's documentation:") + print(" https://docs.anthropic.com/en/docs/models-overview") + print() + + enabled = not args.disabled + success = add_model_to_db(args.provider, args.model_id, enabled) + + if success: + print("\n" + "="*80) + print("Next Steps:") + print("="*80) + if args.provider == "anthropic": + print("1. Ensure ANTHROPIC_API_KEY is set in your environment") + print("2. Test the model with a benchmark run:") + print(f" python api/bench_headless.py --providers {args.provider} --limit 1") + print("3. Check for any errors in the errors_cloud collection") + print() + print("To list all enabled models for this provider:") + print(f" python scripts/add_model_to_db.py --list --provider {args.provider}") + print() + + sys.exit(0 if success else 1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/add_sonnet_4.5.sh b/scripts/add_sonnet_4.5.sh new file mode 100755 index 0000000..5c9b4d2 --- /dev/null +++ b/scripts/add_sonnet_4.5.sh @@ -0,0 +1,117 @@ +#!/bin/bash + +# Script to add Claude Sonnet 4.5 to the LLM Benchmarks database +# +# Prerequisites: +# - mongosh installed +# - MONGODB_URI and MONGODB_DB environment variables set +# +# Usage: +# 1. First, find the exact model ID from Anthropic's documentation +# 2. Update the MODEL_ID variable below with the correct value +# 3. Ensure MONGODB_URI and MONGODB_DB are set in your environment +# 4. Run this script: bash scripts/add_sonnet_4.5.sh + +set -e # Exit on error + +# ==================================== +# CONFIGURATION - UPDATE THESE VALUES +# ==================================== + +# TODO: Replace with the actual model ID from Anthropic's docs +# Common possibilities based on their naming pattern: +# - claude-4-sonnet-20250929 +# - claude-4-5-sonnet-20250929 +# - claude-sonnet-4-5-20250929 +MODEL_ID="claude-4-sonnet-20250929" # <-- VERIFY AND UPDATE THIS + +PROVIDER="anthropic" +ENABLED="true" + +# ==================================== +# VALIDATION +# ==================================== + +echo "==========================================" +echo "Add Claude Sonnet 4.5 to LLM Benchmarks" +echo "==========================================" +echo "" + +# Check if mongosh is installed +if ! command -v mongosh &> /dev/null; then + echo "❌ Error: mongosh is not installed" + echo " Install it from: https://www.mongodb.com/docs/mongodb-shell/install/" + exit 1 +fi + +# Check if required environment variables are set +if [ -z "$MONGODB_URI" ]; then + echo "❌ Error: MONGODB_URI environment variable is not set" + echo " Example: export MONGODB_URI='mongodb+srv://user:pass@cluster.mongodb.net'" + exit 1 +fi + +if [ -z "$MONGODB_DB" ]; then + echo "⚠️ Warning: MONGODB_DB not set, using default: llm-bench" + MONGODB_DB="llm-bench" +fi + +echo "Configuration:" +echo " Provider: $PROVIDER" +echo " Model ID: $MODEL_ID" +echo " Enabled: $ENABLED" +echo " Database: $MONGODB_DB" +echo "" + +# Warn about model ID +if [[ "$MODEL_ID" == "claude-4-sonnet-20250929" ]]; then + echo "⚠️ WARNING: You are using the default model ID" + echo " Please verify this is correct from Anthropic's documentation:" + echo " https://docs.anthropic.com/en/docs/models-overview" + echo "" + read -p "Continue anyway? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Aborted." + exit 0 + fi +fi + +# ==================================== +# ADD MODEL TO DATABASE +# ==================================== + +echo "" +echo "Adding model to database..." + +PROVIDER=$PROVIDER MODEL_ID=$MODEL_ID ENABLED=$ENABLED \ + mongosh "$MONGODB_URI/$MONGODB_DB" scripts/seed_model.js + +if [ $? -eq 0 ]; then + echo "✅ Model added successfully!" +else + echo "❌ Failed to add model" + exit 1 +fi + +# ==================================== +# VERIFY +# ==================================== + +echo "" +echo "Verifying model was added..." + +QUERY="db.models.find({provider: '$PROVIDER', model_id: '$MODEL_ID'}).pretty()" +mongosh "$MONGODB_URI/$MONGODB_DB" --eval "$QUERY" + +echo "" +echo "==========================================" +echo "Next Steps:" +echo "==========================================" +echo "1. Ensure ANTHROPIC_API_KEY is set in your environment" +echo "2. Test with: python api/bench_headless.py --providers anthropic --limit 1" +echo "3. Check for errors in the errors_cloud collection" +echo "" +echo "To list all enabled Anthropic models:" +echo " mongosh \"\$MONGODB_URI/\$MONGODB_DB\" --eval 'db.models.find({provider: \"anthropic\", enabled: true}).pretty()'" +echo "" \ No newline at end of file diff --git a/scripts/test_anthropic_models.py b/scripts/test_anthropic_models.py new file mode 100755 index 0000000..4a3fdc8 --- /dev/null +++ b/scripts/test_anthropic_models.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 +""" +Test script to verify Anthropic API access and list available models. + +This helps you: +1. Verify your ANTHROPIC_API_KEY is set correctly +2. Test connectivity to Anthropic's API +3. Discover the exact model IDs available to you + +Usage: + python scripts/test_anthropic_models.py + + # Test a specific model + python scripts/test_anthropic_models.py --test-model claude-4-sonnet-20250929 + +Requirements: + pip install anthropic +""" + +import argparse +import os +import sys +from datetime import datetime + +try: + from anthropic import Anthropic +except ImportError: + print("❌ Error: anthropic package not installed") + print(" Install it with: pip install anthropic") + sys.exit(1) + + +def test_api_connection(): + """Test basic connection to Anthropic API.""" + + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + print("❌ Error: ANTHROPIC_API_KEY environment variable not set") + print(" Get your API key from: https://console.anthropic.com/") + return False + + print(f"API Key: {api_key[:10]}...{api_key[-4:]} (length: {len(api_key)})") + print() + + try: + client = Anthropic(api_key=api_key) + print("✅ Successfully created Anthropic client") + return client + except Exception as e: + print(f"❌ Failed to create Anthropic client: {e}") + return None + + +def test_model(client, model_id: str): + """Test a specific model with a simple query.""" + + print(f"\n{'='*80}") + print(f"Testing Model: {model_id}") + print('='*80) + + try: + print("Sending test message...") + start_time = datetime.now() + + # Use non-streaming for simplicity + message = client.messages.create( + model=model_id, + max_tokens=50, + messages=[ + {"role": "user", "content": "Say 'Hello, I am working!' and nothing else."} + ] + ) + + end_time = datetime.now() + duration = (end_time - start_time).total_seconds() + + print(f"✅ Model {model_id} is working!") + print(f"\nResponse:") + print(f" Content: {message.content[0].text if message.content else 'N/A'}") + print(f" Input tokens: {message.usage.input_tokens}") + print(f" Output tokens: {message.usage.output_tokens}") + print(f" Duration: {duration:.2f}s") + print(f" Stop reason: {message.stop_reason}") + + return True + + except Exception as e: + error_msg = str(e) + print(f"❌ Model test failed: {error_msg}") + + if "model:" in error_msg.lower() or "not found" in error_msg.lower(): + print("\n💡 This might mean:") + print(" - The model ID is incorrect") + print(" - The model is not available to your API key") + print(" - The model name has changed") + elif "authentication" in error_msg.lower(): + print("\n💡 Check your ANTHROPIC_API_KEY") + elif "rate" in error_msg.lower(): + print("\n💡 You may have hit rate limits") + + return False + + +def list_known_models(): + """List models we know about from the codebase.""" + + known_models = [ + ("claude-2.1", "Claude 2.1", "Legacy"), + ("claude-3-haiku-20240307", "Claude 3 Haiku", "Fast & affordable"), + ("claude-3-5-haiku-20241022", "Claude 3.5 Haiku", "Latest Haiku"), + ("claude-3-sonnet-20240229", "Claude 3 Sonnet", "Balanced"), + ("claude-3-5-sonnet-20240620", "Claude 3.5 Sonnet", "Previous flagship"), + ("claude-3-7-sonnet-20250219", "Claude 3.7 Sonnet", "Latest in your DB"), + ("claude-3-opus-20240229", "Claude 3 Opus", "Most capable (expensive)"), + ] + + print("\n" + "="*80) + print("Known Anthropic Models") + print("="*80) + print(f"{'Model ID':<35} {'Name':<25} {'Notes':<20}") + print("-"*80) + + for model_id, name, notes in known_models: + print(f"{model_id:<35} {name:<25} {notes:<20}") + + print("\nFor Claude Sonnet 4.5 (released Sep 29, 2025), likely IDs:") + print(" - claude-4-sonnet-20250929") + print(" - claude-4-5-sonnet-20250929") + print("\nVerify at: https://docs.anthropic.com/en/docs/models-overview") + + +def main(): + parser = argparse.ArgumentParser( + description="Test Anthropic API access and available models", + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + "--test-model", + help="Test a specific model ID (e.g., claude-4-sonnet-20250929)" + ) + + parser.add_argument( + "--test-all-known", + action="store_true", + help="Test all known models from the codebase" + ) + + args = parser.parse_args() + + print("="*80) + print("Anthropic API Model Test") + print("="*80) + print() + + # Test API connection + client = test_api_connection() + if not client: + sys.exit(1) + + # List known models + list_known_models() + + # Test specific model + if args.test_model: + success = test_model(client, args.test_model) + sys.exit(0 if success else 1) + + # Test all known models + if args.test_all_known: + print("\n" + "="*80) + print("Testing All Known Models") + print("="*80) + + models_to_test = [ + "claude-3-5-sonnet-20240620", + "claude-3-7-sonnet-20250219", + "claude-3-5-haiku-20241022", + ] + + results = {} + for model_id in models_to_test: + print() + success = test_model(client, model_id) + results[model_id] = success + + print("\n" + "="*80) + print("Summary") + print("="*80) + for model_id, success in results.items(): + status = "✅ Working" if success else "❌ Failed" + print(f"{status:<15} {model_id}") + + sys.exit(0) + + # Default: instructions + print("\n" + "="*80) + print("Next Steps") + print("="*80) + print("\n1. Test a specific model:") + print(" python scripts/test_anthropic_models.py --test-model claude-4-sonnet-20250929") + print("\n2. Test all known models:") + print(" python scripts/test_anthropic_models.py --test-all-known") + print("\n3. Once you find the correct model ID, add it to the database:") + print(" python scripts/add_model_to_db.py --provider anthropic --model-id ") + print() + + +if __name__ == "__main__": + main() \ No newline at end of file