diff --git a/README.md b/README.md index aa7b0719a..4584d9a16 100644 --- a/README.md +++ b/README.md @@ -207,7 +207,7 @@ docker compose --profile gateway up -d > [!TIP] > Set your API key in `~/.picoclaw/config.json`. > Get API keys: [OpenRouter](https://openrouter.ai/keys) (LLM) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) (LLM) -> Web Search is **optional** - get free [Tavily API](https://tavily.com) (1000 free queries/month) or [Brave Search API](https://brave.com/search/api) (2000 free queries/month) or use built-in auto fallback. +> Web Search is **optional** - get free [Tavily API](https://tavily.com) (1000 free queries/month), [SearXNG](https://github.com/searxng/searxng) (free, self-hosted) or [Brave Search API](https://brave.com/search/api) (2000 free queries/month) or use built-in auto fallback. **1. Initialize** @@ -255,6 +255,16 @@ picoclaw onboard "duckduckgo": { "enabled": true, "max_results": 5 + }, + "perplexity": { + "enabled": false, + "api_key": "YOUR_PERPLEXITY_API_KEY", + "max_results": 5 + }, + "searxng": { + "enabled": false, + "base_url": "http://your-searxng-instance:8888", + "max_results": 5 } } } @@ -266,7 +276,12 @@ picoclaw onboard **3. Get API Keys** * **LLM Provider**: [OpenRouter](https://openrouter.ai/keys) · [Zhipu](https://open.bigmodel.cn/usercenter/proj-mgmt/apikeys) · [Anthropic](https://console.anthropic.com) · [OpenAI](https://platform.openai.com) · [Gemini](https://aistudio.google.com/api-keys) -* **Web Search** (optional): [Tavily](https://tavily.com) - Optimized for AI Agents (1000 requests/month) · [Brave Search](https://brave.com/search/api) - Free tier available (2000 requests/month) +* **Web Search** (optional): + * [Brave Search](https://brave.com/search/api) - Paid ($5/1000 queries, ~$5-6/month) + * [Perplexity](https://www.perplexity.ai) - AI-powered search with chat interface + * [SearXNG](https://github.com/searxng/searxng) - Self-hosted metasearch engine (free, no API key needed) + * [Tavily](https://tavily.com) - Optimized for AI Agents (1000 requests/month) + * DuckDuckGo - Built-in fallback (no API key required) > **Note**: See `config.example.json` for a complete configuration template. @@ -1090,6 +1105,16 @@ picoclaw agent -m "Hello" "duckduckgo": { "enabled": true, "max_results": 5 + }, + "perplexity": { + "enabled": false, + "api_key": "", + "max_results": 5 + }, + "searxng": { + "enabled": false, + "base_url": "http://localhost:8888", + "max_results": 5 } }, "cron": { @@ -1147,10 +1172,69 @@ discord: This is normal if you haven't configured a search API key yet. PicoClaw will provide helpful links for manual searching. -To enable web search: +#### Search Provider Priority -1. **Option 1 (Recommended)**: Get a free API key at [https://brave.com/search/api](https://brave.com/search/api) (2000 free queries/month) for the best results. -2. **Option 2 (No Credit Card)**: If you don't have a key, we automatically fall back to **DuckDuckGo** (no key required). +PicoClaw automatically selects the best available search provider in this order: +1. **Perplexity** (if enabled and API key configured) - AI-powered search with citations +2. **Brave Search** (if enabled and API key configured) - Privacy-focused paid API ($5/1000 queries) +3. **SearXNG** (if enabled and base_url configured) - Self-hosted metasearch aggregating 70+ engines (free) +4. **DuckDuckGo** (if enabled, default fallback) - No API key required (free) + +#### Web Search Configuration Options + +**Option 1 (Best Results)**: Perplexity AI Search +```json +{ + "tools": { + "web": { + "perplexity": { + "enabled": true, + "api_key": "YOUR_PERPLEXITY_API_KEY", + "max_results": 5 + } + } + } +} +``` + +**Option 2 (Paid API)**: Get an API key at [https://brave.com/search/api](https://brave.com/search/api) ($5/1000 queries, ~$5-6/month) +```json +{ + "tools": { + "web": { + "brave": { + "enabled": true, + "api_key": "YOUR_BRAVE_API_KEY", + "max_results": 5 + } + } + } +} +``` + +**Option 3 (Self-Hosted)**: Deploy your own [SearXNG](https://github.com/searxng/searxng) instance +```json +{ + "tools": { + "web": { + "searxng": { + "enabled": true, + "base_url": "http://your-server:8888", + "max_results": 5 + } + } + } +} +``` + +Benefits of SearXNG: +- **Zero cost**: No API fees or rate limits +- **Privacy-focused**: Self-hosted, no tracking +- **Aggregate results**: Queries 70+ search engines simultaneously +- **Perfect for cloud VMs**: Solves datacenter IP blocking issues (Oracle Cloud, GCP, AWS, Azure) +- **No API key needed**: Just deploy and configure the base URL + +**Option 4 (No Setup Required)**: DuckDuckGo is enabled by default as fallback (no API key needed) Add the key to `~/.picoclaw/config.json` if using Brave: @@ -1166,6 +1250,16 @@ Add the key to `~/.picoclaw/config.json` if using Brave: "duckduckgo": { "enabled": true, "max_results": 5 + }, + "perplexity": { + "enabled": false, + "api_key": "YOUR_PERPLEXITY_API_KEY", + "max_results": 5 + }, + "searxng": { + "enabled": false, + "base_url": "http://your-searxng-instance:8888", + "max_results": 5 } } } @@ -1184,10 +1278,11 @@ This happens when another instance of the bot is running. Make sure only one `pi ## 📝 API Key Comparison -| Service | Free Tier | Use Case | -| ---------------- | ------------------- | ------------------------------------- | -| **OpenRouter** | 200K tokens/month | Multiple models (Claude, GPT-4, etc.) | -| **Zhipu** | 200K tokens/month | Best for Chinese users | -| **Brave Search** | 2000 queries/month | Web search functionality | -| **Groq** | Free tier available | Fast inference (Llama, Mixtral) | -| **Cerebras** | Free tier available | Fast inference (Llama, Qwen, etc.) | +| Service | Free Tier | Use Case | +| ---------------- | ------------------------ | ------------------------------------- | +| **OpenRouter** | 200K tokens/month | Multiple models (Claude, GPT-4, etc.) | +| **Zhipu** | 200K tokens/month | Best for Chinese users | +| **Brave Search** | Paid ($5/1000 queries) | Web search functionality | +| **SearXNG** | Unlimited (self-hosted) | Privacy-focused metasearch (70+ engines) | +| **Groq** | Free tier available | Fast inference (Llama, Mixtral) | +| **Cerebras** | Free tier available | Fast inference (Llama, Qwen, etc.) | diff --git a/config/config.example.json b/config/config.example.json index 9575039f8..605f9dc1d 100644 --- a/config/config.example.json +++ b/config/config.example.json @@ -218,6 +218,11 @@ "api_key": "pplx-xxx", "max_results": 5 }, + "searxng": { + "enabled": false, + "base_url": "http://localhost:8888", + "max_results": 5 + }, "proxy": "" }, "cron": { diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 693f2227b..37591fa79 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -106,6 +106,9 @@ func registerSharedTools( PerplexityAPIKey: cfg.Tools.Web.Perplexity.APIKey, PerplexityMaxResults: cfg.Tools.Web.Perplexity.MaxResults, PerplexityEnabled: cfg.Tools.Web.Perplexity.Enabled, + SearXNGBaseURL: cfg.Tools.Web.SearXNG.BaseURL, + SearXNGMaxResults: cfg.Tools.Web.SearXNG.MaxResults, + SearXNGEnabled: cfg.Tools.Web.SearXNG.Enabled, Proxy: cfg.Tools.Web.Proxy, }); searchTool != nil { agent.Tools.Register(searchTool) diff --git a/pkg/config/config.go b/pkg/config/config.go index 6f76614cf..f7c78136b 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -448,11 +448,18 @@ type PerplexityConfig struct { MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_PERPLEXITY_MAX_RESULTS"` } +type SearXNGConfig struct { + Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_WEB_SEARXNG_ENABLED"` + BaseURL string `json:"base_url" env:"PICOCLAW_TOOLS_WEB_SEARXNG_BASE_URL"` + MaxResults int `json:"max_results" env:"PICOCLAW_TOOLS_WEB_SEARXNG_MAX_RESULTS"` +} + type WebToolsConfig struct { Brave BraveConfig `json:"brave"` Tavily TavilyConfig `json:"tavily"` DuckDuckGo DuckDuckGoConfig `json:"duckduckgo"` Perplexity PerplexityConfig `json:"perplexity"` + SearXNG SearXNGConfig `json:"searxng"` // Proxy is an optional proxy URL for web tools (http/https/socks5/socks5h). // For authenticated proxies, prefer HTTP_PROXY/HTTPS_PROXY env vars instead of embedding credentials in config. Proxy string `json:"proxy,omitempty" env:"PICOCLAW_TOOLS_WEB_PROXY"` diff --git a/pkg/config/defaults.go b/pkg/config/defaults.go index cc6de9399..47c51fc9d 100644 --- a/pkg/config/defaults.go +++ b/pkg/config/defaults.go @@ -292,6 +292,11 @@ func DefaultConfig() *Config { APIKey: "", MaxResults: 5, }, + SearXNG: SearXNGConfig{ + Enabled: false, + BaseURL: "", + MaxResults: 5, + }, }, Cron: CronToolsConfig{ ExecTimeoutMinutes: 5, diff --git a/pkg/tools/web.go b/pkg/tools/web.go index 44df28215..e95185599 100644 --- a/pkg/tools/web.go +++ b/pkg/tools/web.go @@ -384,6 +384,68 @@ func (p *PerplexitySearchProvider) Search(ctx context.Context, query string, cou return fmt.Sprintf("Results for: %s (via Perplexity)\n%s", query, searchResp.Choices[0].Message.Content), nil } +type SearXNGSearchProvider struct { + baseURL string +} + +func (p *SearXNGSearchProvider) Search(ctx context.Context, query string, count int) (string, error) { + searchURL := fmt.Sprintf("%s/search?q=%s&format=json&categories=general", + strings.TrimSuffix(p.baseURL, "/"), + url.QueryEscape(query)) + + req, err := http.NewRequestWithContext(ctx, "GET", searchURL, nil) + if err != nil { + return "", fmt.Errorf("failed to create request: %w", err) + } + + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("SearXNG returned status %d", resp.StatusCode) + } + + var result struct { + Results []struct { + Title string `json:"title"` + URL string `json:"url"` + Content string `json:"content"` + Engine string `json:"engine"` + Score float64 `json:"score"` + } `json:"results"` + } + + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", fmt.Errorf("failed to parse response: %w", err) + } + + if len(result.Results) == 0 { + return fmt.Sprintf("No results for: %s", query), nil + } + + // Limit results to requested count + if len(result.Results) > count { + result.Results = result.Results[:count] + } + + // Format results in standard PicoClaw format + var b strings.Builder + b.WriteString(fmt.Sprintf("Results for: %s (via SearXNG)\n", query)) + for i, r := range result.Results { + b.WriteString(fmt.Sprintf("%d. %s\n", i+1, r.Title)) + b.WriteString(fmt.Sprintf(" %s\n", r.URL)) + if r.Content != "" { + b.WriteString(fmt.Sprintf(" %s\n", r.Content)) + } + } + + return b.String(), nil +} + type WebSearchTool struct { provider SearchProvider maxResults int @@ -402,6 +464,9 @@ type WebSearchToolOptions struct { PerplexityAPIKey string PerplexityMaxResults int PerplexityEnabled bool + SearXNGBaseURL string + SearXNGMaxResults int + SearXNGEnabled bool Proxy string } @@ -409,7 +474,7 @@ func NewWebSearchTool(opts WebSearchToolOptions) *WebSearchTool { var provider SearchProvider maxResults := 5 - // Priority: Perplexity > Brave > Tavily > DuckDuckGo + // Priority: Perplexity > Brave > SearXNG > Tavily > DuckDuckGo if opts.PerplexityEnabled && opts.PerplexityAPIKey != "" { provider = &PerplexitySearchProvider{apiKey: opts.PerplexityAPIKey, proxy: opts.Proxy} if opts.PerplexityMaxResults > 0 { @@ -420,6 +485,11 @@ func NewWebSearchTool(opts WebSearchToolOptions) *WebSearchTool { if opts.BraveMaxResults > 0 { maxResults = opts.BraveMaxResults } + } else if opts.SearXNGEnabled && opts.SearXNGBaseURL != "" { + provider = &SearXNGSearchProvider{baseURL: opts.SearXNGBaseURL} + if opts.SearXNGMaxResults > 0 { + maxResults = opts.SearXNGMaxResults + } } else if opts.TavilyEnabled && opts.TavilyAPIKey != "" { provider = &TavilySearchProvider{ apiKey: opts.TavilyAPIKey,