From 28215e8577c4e38ef42e72d04be0e372036110e4 Mon Sep 17 00:00:00 2001 From: Simantak Dabhade Date: Thu, 29 Jan 2026 13:25:33 -0800 Subject: [PATCH 1/2] revamped the skill.md to switch to bash and have a better decriptions to help people get to an API key) --- skills/tinyfish-web-agent/SKILL.md | 139 ++++++++++++------- skills/tinyfish-web-agent/scripts/extract.py | 79 ----------- skills/tinyfish-web-agent/scripts/extract.sh | 70 ++++++++++ 3 files changed, 160 insertions(+), 128 deletions(-) delete mode 100644 skills/tinyfish-web-agent/scripts/extract.py create mode 100755 skills/tinyfish-web-agent/scripts/extract.sh diff --git a/skills/tinyfish-web-agent/SKILL.md b/skills/tinyfish-web-agent/SKILL.md index 66a4e9c..dfb9392 100644 --- a/skills/tinyfish-web-agent/SKILL.md +++ b/skills/tinyfish-web-agent/SKILL.md @@ -7,6 +7,39 @@ description: Use TinyFish/Mino web agent to extract/scrape websites, extract dat Requires: `MINO_API_KEY` environment variable +## Pre-flight Check (REQUIRED) + +Before making any API call, **always** run this first to verify the key is available: + +```bash +[ -n "$MINO_API_KEY" ] && echo "MINO_API_KEY is set" || echo "MINO_API_KEY is NOT set" +``` + +If the key is **not set**, you **MUST stop and ask the user** to add their API key. Do **NOT** fall back to other tools or approaches — the task requires TinyFish/Mino. + +Tell the user: + +> You need a Mino API key. Get one at: https://mino.ai/api-keys +> +> Then set it so the agent can use it: +> +> **Option 1 — Environment variable (works everywhere):** +> ``` +> export MINO_API_KEY="your-key-here" +> ``` +> +> **Option 2 — Claude Code settings (Claude Code only):** +> Add to `~/.claude/settings.local.json`: +> ```json +> { +> "env": { +> "MINO_API_KEY": "your-key-here" +> } +> } +> ``` + +Do NOT proceed until the key is confirmed available. + ## Best Practices 1. **Specify JSON format**: Always describe the exact structure you want returned @@ -16,91 +49,99 @@ Requires: `MINO_API_KEY` environment variable Extract data from a page. Specify the JSON structure you want: -```python -import requests -import json -import os - -response = requests.post( - "https://mino.ai/v1/automation/run-sse", - headers={ - "X-API-Key": os.environ["MINO_API_KEY"], - "Content-Type": "application/json", - }, - json={ - "url": "https://example.com", - "goal": "Extract product info as JSON: {\"name\": str, \"price\": str, \"in_stock\": bool}", - }, - stream=True, -) - -for line in response.iter_lines(): - if line: - line_str = line.decode("utf-8") - if line_str.startswith("data: "): - event = json.loads(line_str[6:]) - if event.get("type") == "COMPLETE" and event.get("status") == "COMPLETED": - print(json.dumps(event["resultJson"], indent=2)) +```bash +curl -N -s -X POST "https://mino.ai/v1/automation/run-sse" \ + -H "X-API-Key: $MINO_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.com", + "goal": "Extract product info as JSON: {\"name\": str, \"price\": str, \"in_stock\": bool}" + }' ``` ## Multiple Items Extract lists of data with explicit structure: -```python -json={ +```bash +curl -N -s -X POST "https://mino.ai/v1/automation/run-sse" \ + -H "X-API-Key: $MINO_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ "url": "https://example.com/products", - "goal": "Extract all products as JSON array: [{\"name\": str, \"price\": str, \"url\": str}]", -} + "goal": "Extract all products as JSON array: [{\"name\": str, \"price\": str, \"url\": str}]" + }' ``` ## Stealth Mode -For bot-protected sites: +For bot-protected sites, add `"browser_profile": "stealth"` to the request body: -```python -json={ +```bash +curl -N -s -X POST "https://mino.ai/v1/automation/run-sse" \ + -H "X-API-Key: $MINO_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ "url": "https://protected-site.com", "goal": "Extract product data as JSON: {\"name\": str, \"price\": str, \"description\": str}", - "browser_profile": "stealth", -} + "browser_profile": "stealth" + }' ``` ## Proxy -Route through specific country: +Route through a specific country by adding `"proxy_config"` to the body: -```python -json={ +```bash +curl -N -s -X POST "https://mino.ai/v1/automation/run-sse" \ + -H "X-API-Key: $MINO_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ "url": "https://geo-restricted-site.com", "goal": "Extract pricing data as JSON: {\"item\": str, \"price\": str, \"currency\": str}", "browser_profile": "stealth", - "proxy_config": { - "enabled": True, - "country_code": "US", - }, -} + "proxy_config": {"enabled": true, "country_code": "US"} + }' ``` ## Output -Results are in `event["resultJson"]` when `event["type"] == "COMPLETE"` +The SSE stream returns `data: {...}` lines. The final result is the event where `type == "COMPLETE"` and `status == "COMPLETED"` — the extracted data is in the `resultJson` field. Claude reads the raw SSE output directly; no script-side parsing is needed. ## Parallel Extraction -When extracting from multiple independent sources, make separate parallel API calls instead of combining into one prompt: +When extracting from multiple independent sources, make separate parallel curl calls instead of combining into one prompt: **Good** - Parallel calls: -```python +```bash # Compare pizza prices - run these simultaneously -call_1 = extract("https://pizzahut.com", "Extract pizza prices as JSON: [{\"name\": str, \"price\": str}]") -call_2 = extract("https://dominos.com", "Extract pizza prices as JSON: [{\"name\": str, \"price\": str}]") +curl -N -s -X POST "https://mino.ai/v1/automation/run-sse" \ + -H "X-API-Key: $MINO_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://pizzahut.com", + "goal": "Extract pizza prices as JSON: [{\"name\": str, \"price\": str}]" + }' + +curl -N -s -X POST "https://mino.ai/v1/automation/run-sse" \ + -H "X-API-Key: $MINO_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://dominos.com", + "goal": "Extract pizza prices as JSON: [{\"name\": str, \"price\": str}]" + }' ``` **Bad** - Single combined call: -```python +```bash # Don't do this - less reliable and slower -extract("https://pizzahut.com", "Extract prices from Pizza Hut and also go to Dominos...") +curl -N -s -X POST "https://mino.ai/v1/automation/run-sse" \ + -H "X-API-Key: $MINO_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://pizzahut.com", + "goal": "Extract prices from Pizza Hut and also go to Dominos..." + }' ``` Each independent extraction task should be its own API call. This is faster (parallel execution) and more reliable. diff --git a/skills/tinyfish-web-agent/scripts/extract.py b/skills/tinyfish-web-agent/scripts/extract.py deleted file mode 100644 index 0238039..0000000 --- a/skills/tinyfish-web-agent/scripts/extract.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python3 -""" -TinyFish web extract/scrape helper - -Usage: - extract.py [--stealth] [--proxy US] - -Best practice: Specify the JSON format you want in the goal for better results. - -Examples: - extract.py "https://example.com" "Extract product as JSON: {\"name\": str, \"price\": str}" - extract.py "https://site.com" "Get all links as JSON: [{\"text\": str, \"url\": str}]" --stealth - extract.py "https://site.com" "Extract items as JSON: [{\"title\": str, \"price\": str}]" --stealth --proxy US -""" - -import os -import sys -import json -import urllib.request -import argparse - - -def extract(url, goal, stealth=False, proxy_country=None): - """Extract/scrape data from a website using TinyFish""" - api_key = os.environ.get("MINO_API_KEY") - if not api_key: - print("Error: MINO_API_KEY environment variable not set", file=sys.stderr) - sys.exit(1) - - payload = { - "url": url, - "goal": goal, - } - - if stealth: - payload["browser_profile"] = "stealth" - - if proxy_country: - payload["proxy_config"] = { - "enabled": True, - "country_code": proxy_country, - } - - req = urllib.request.Request( - "https://mino.ai/v1/automation/run-sse", - data=json.dumps(payload).encode(), - headers={ - "X-API-Key": api_key, - "Content-Type": "application/json", - } - ) - - print(f"Extracting from {url}...", file=sys.stderr) - - with urllib.request.urlopen(req) as response: - for line in response: - line_str = line.decode("utf-8").strip() - if line_str.startswith("data: "): - event = json.loads(line_str[6:]) - - # Print status updates - if event.get("type") == "STATUS_UPDATE": - print(f"[{event.get('status')}] {event.get('message', '')}", file=sys.stderr) - - # Print final result - if event.get("type") == "COMPLETE" and event.get("status") == "COMPLETED": - print(json.dumps(event["resultJson"], indent=2)) - return event["resultJson"] - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="TinyFish web extract/scrape tool") - parser.add_argument("url", help="URL to extract/scrape from") - parser.add_argument("goal", help="What to extract (natural language)") - parser.add_argument("--stealth", action="store_true", help="Use stealth mode") - parser.add_argument("--proxy", help="Proxy country code (e.g., US, UK, DE)") - - args = parser.parse_args() - extract(args.url, args.goal, args.stealth, args.proxy) diff --git a/skills/tinyfish-web-agent/scripts/extract.sh b/skills/tinyfish-web-agent/scripts/extract.sh new file mode 100755 index 0000000..022a10a --- /dev/null +++ b/skills/tinyfish-web-agent/scripts/extract.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# +# TinyFish web extract/scrape helper +# +# Usage: +# extract.sh [--stealth] [--proxy COUNTRY] +# +# Examples: +# extract.sh "https://example.com" 'Extract product as JSON: {"name": str, "price": str}' +# extract.sh "https://site.com" 'Get all links as JSON: [{"text": str, "url": str}]' --stealth +# extract.sh "https://site.com" 'Extract items' --stealth --proxy US + +set -euo pipefail + +if [ $# -lt 2 ]; then + echo "Usage: extract.sh [--stealth] [--proxy COUNTRY]" >&2 + exit 1 +fi + +if [ -z "${MINO_API_KEY:-}" ]; then + echo "Error: MINO_API_KEY environment variable not set" >&2 + exit 1 +fi + +URL="$1" +GOAL="$2" +shift 2 + +STEALTH=false +PROXY_COUNTRY="" + +while [ $# -gt 0 ]; do + case "$1" in + --stealth) + STEALTH=true + shift + ;; + --proxy) + PROXY_COUNTRY="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" >&2 + exit 1 + ;; + esac +done + +# Build JSON payload — escape URL and goal for safe embedding +JSON_URL=$(printf '%s' "$URL" | sed 's/\\/\\\\/g; s/"/\\"/g') +JSON_GOAL=$(printf '%s' "$GOAL" | sed 's/\\/\\\\/g; s/"/\\"/g') + +PAYLOAD="{\"url\":\"${JSON_URL}\",\"goal\":\"${JSON_GOAL}\"" + +if [ "$STEALTH" = true ]; then + PAYLOAD="${PAYLOAD},\"browser_profile\":\"stealth\"" +fi + +if [ -n "$PROXY_COUNTRY" ]; then + PAYLOAD="${PAYLOAD},\"proxy_config\":{\"enabled\":true,\"country_code\":\"${PROXY_COUNTRY}\"}" +fi + +PAYLOAD="${PAYLOAD}}" + +echo "Extracting from ${URL}..." >&2 + +exec curl -N -s -X POST "https://mino.ai/v1/automation/run-sse" \ + -H "X-API-Key: ${MINO_API_KEY}" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" From c2076560518af32c2b78d62bff5a4c7cf8c94b06 Mon Sep 17 00:00:00 2001 From: Simantak Dabhade Date: Thu, 29 Jan 2026 13:46:59 -0800 Subject: [PATCH 2/2] fixing up sime md issuses --- skills/tinyfish-web-agent/SKILL.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skills/tinyfish-web-agent/SKILL.md b/skills/tinyfish-web-agent/SKILL.md index dfb9392..8ca62ef 100644 --- a/skills/tinyfish-web-agent/SKILL.md +++ b/skills/tinyfish-web-agent/SKILL.md @@ -19,12 +19,12 @@ If the key is **not set**, you **MUST stop and ask the user** to add their API k Tell the user: -> You need a Mino API key. Get one at: https://mino.ai/api-keys +> You need a Mino API key. Get one at: > > Then set it so the agent can use it: > > **Option 1 — Environment variable (works everywhere):** -> ``` +> ```bash > export MINO_API_KEY="your-key-here" > ``` >