tinyfish-io · simantak-dabhade · Jan 28, 2026 · Jan 28, 2026 · coderabbitai · Jan 28, 2026
diff --git a/skills/tinyfish-web-agent/SKILL.md b/skills/tinyfish-web-agent/SKILL.md
@@ -0,0 +1,106 @@
+---
+name: tinyfish
+description: Use TinyFish/Mino web agent to extract/scrape websites, extract data, and automate browser actions using natural language. Use when you need to extract/scrape data from websites, handle bot-protected sites, or automate web tasks.
+---
+
+# TinyFish Web Agent
+
+Requires: `MINO_API_KEY` environment variable
+
+## Best Practices
+
+1. **Specify JSON format**: Always describe the exact structure you want returned
+2. **Parallel calls**: When extracting from multiple independent sites, make separate parallel calls instead of combining into one prompt
+
+## Basic Extract/Scrape
+
+Extract data from a page. Specify the JSON structure you want:
+
+```python
+import requests
+import json
+import os
+
+response = requests.post(
+    "https://mino.ai/v1/automation/run-sse",
+    headers={
+        "X-API-Key": os.environ["MINO_API_KEY"],
+        "Content-Type": "application/json",
+    },
+    json={
+        "url": "https://example.com",
+        "goal": "Extract product info as JSON: {\"name\": str, \"price\": str, \"in_stock\": bool}",
+    },
+    stream=True,
+)
+
+for line in response.iter_lines():
+    if line:
+        line_str = line.decode("utf-8")
+        if line_str.startswith("data: "):
+            event = json.loads(line_str[6:])
+            if event.get("type") == "COMPLETE" and event.get("status") == "COMPLETED":
+                print(json.dumps(event["resultJson"], indent=2))
+```
+
+## Multiple Items
+
+Extract lists of data with explicit structure:
+
+```python
+json={
+    "url": "https://example.com/products",
+    "goal": "Extract all products as JSON array: [{\"name\": str, \"price\": str, \"url\": str}]",
+}
+```
+
+## Stealth Mode
+
+For bot-protected sites:
+
+```python
+json={
+    "url": "https://protected-site.com",
+    "goal": "Extract product data as JSON: {\"name\": str, \"price\": str, \"description\": str}",
+    "browser_profile": "stealth",
+}
+```
+
+## Proxy
+
+Route through specific country:
+
+```python
+json={
+    "url": "https://geo-restricted-site.com",
+    "goal": "Extract pricing data as JSON: {\"item\": str, \"price\": str, \"currency\": str}",
+    "browser_profile": "stealth",
+    "proxy_config": {
+        "enabled": True,
+        "country_code": "US",
+    },
+}
+```
+
+## Output
+
+Results are in `event["resultJson"]` when `event["type"] == "COMPLETE"`
+
+## Parallel Extraction
+
+When extracting from multiple independent sources, make separate parallel API calls instead of combining into one prompt:
+
+**Good** - Parallel calls:
+```python
+# Compare pizza prices - run these simultaneously
+call_1 = extract("https://pizzahut.com", "Extract pizza prices as JSON: [{\"name\": str, \"price\": str}]")
+call_2 = extract("https://dominos.com", "Extract pizza prices as JSON: [{\"name\": str, \"price\": str}]")
+```
+
+**Bad** - Single combined call:
+```python
+# Don't do this - less reliable and slower
+extract("https://pizzahut.com", "Extract prices from Pizza Hut and also go to Dominos...")
+```
+
+Each independent extraction task should be its own API call. This is faster (parallel execution) and more reliable.
diff --git a/skills/tinyfish-web-agent/scripts/extract.py b/skills/tinyfish-web-agent/scripts/extract.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""
+TinyFish web extract/scrape helper
+
+Usage:
+    extract.py <url> <goal> [--stealth] [--proxy US]
+
+Best practice: Specify the JSON format you want in the goal for better results.
+
+Examples:
+    extract.py "https://example.com" "Extract product as JSON: {\"name\": str, \"price\": str}"
+    extract.py "https://site.com" "Get all links as JSON: [{\"text\": str, \"url\": str}]" --stealth
+    extract.py "https://site.com" "Extract items as JSON: [{\"title\": str, \"price\": str}]" --stealth --proxy US
+"""
+
+import os
+import sys
+import json
+import urllib.request
+import argparse
+
+
+def extract(url, goal, stealth=False, proxy_country=None):
+    """Extract/scrape data from a website using TinyFish"""
+    api_key = os.environ.get("MINO_API_KEY")
+    if not api_key:
+        print("Error: MINO_API_KEY environment variable not set", file=sys.stderr)
+        sys.exit(1)
+
+    payload = {
+        "url": url,
+        "goal": goal,
+    }
+
+    if stealth:
+        payload["browser_profile"] = "stealth"
+
+    if proxy_country:
+        payload["proxy_config"] = {
+            "enabled": True,
+            "country_code": proxy_country,
+        }
+
+    req = urllib.request.Request(
+        "https://mino.ai/v1/automation/run-sse",
+        data=json.dumps(payload).encode(),
+        headers={
+            "X-API-Key": api_key,
+            "Content-Type": "application/json",
+        }
+    )
+
+    print(f"Extracting from {url}...", file=sys.stderr)
+
+    with urllib.request.urlopen(req) as response:
+        for line in response:
+            line_str = line.decode("utf-8").strip()
+            if line_str.startswith("data: "):
+                event = json.loads(line_str[6:])
+
+                # Print status updates
+                if event.get("type") == "STATUS_UPDATE":
+                    print(f"[{event.get('status')}] {event.get('message', '')}", file=sys.stderr)
+
+                # Print final result
+                if event.get("type") == "COMPLETE" and event.get("status") == "COMPLETED":
+                    print(json.dumps(event["resultJson"], indent=2))
+                    return event["resultJson"]
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="TinyFish web extract/scrape tool")
+    parser.add_argument("url", help="URL to extract/scrape from")
+    parser.add_argument("goal", help="What to extract (natural language)")
+    parser.add_argument("--stealth", action="store_true", help="Use stealth mode")
+    parser.add_argument("--proxy", help="Proxy country code (e.g., US, UK, DE)")
+
+    args = parser.parse_args()
+    extract(args.url, args.goal, args.stealth, args.proxy)