-
+
From c3549e8ce0ecd9c08118e2a3013bf49df14f9f5c Mon Sep 17 00:00:00 2001
From: "Bingxi Zhao (Frank)" <150592536+pancacake@users.noreply.github.com>
Date: Thu, 15 Jan 2026 18:30:55 +0800
Subject: [PATCH 2/9] update discord
---
CONTRIBUTING.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 535e8d83..0ec57968 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@ Contributing to DeepTutor 🚀
Thank you for your interest in contributing to DeepTutor! We are committed to building a smooth and robust intelligent learning companion, and we welcome developers of all skill levels to join us.
Join our community for discussion, support, and collaboration:
-
+
From 5b8b9d6de8e5b88a26dd1b7fcf320a9a92917ff9 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Thu, 15 Jan 2026 12:54:48 +0000
Subject: [PATCH 3/9] chore: update repo roster images [skip ci]
---
assets/roster/forkers.svg | 14 +++++++-------
assets/roster/stargazers.svg | 2 +-
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/assets/roster/forkers.svg b/assets/roster/forkers.svg
index 5a23cf1d..d42ce14a 100644
--- a/assets/roster/forkers.svg
+++ b/assets/roster/forkers.svg
@@ -10,21 +10,21 @@
Forkers
-
+
-
+
-
+
-
+
-
+
-
-and 1,108 others
+
+and 1,127 others
\ No newline at end of file
diff --git a/assets/roster/stargazers.svg b/assets/roster/stargazers.svg
index 300b5b44..25514294 100644
--- a/assets/roster/stargazers.svg
+++ b/assets/roster/stargazers.svg
@@ -26,5 +26,5 @@
-and 8,489 others
+and 8,672 others
\ No newline at end of file
From 42f8c42e00e0a67604d6a49ec5ad812fd2b3b038 Mon Sep 17 00:00:00 2001
From: andres
Date: Thu, 15 Jan 2026 14:07:01 -0600
Subject: [PATCH 4/9] "Add support for SearXNG metasearch provider, including
templates, context handling, and API integration."
---
src/tools/web_search/consolidation.py | 65 ++++-
src/tools/web_search/providers/__init__.py | 2 +-
src/tools/web_search/providers/searxng.py | 292 +++++++++++++++++++++
3 files changed, 355 insertions(+), 4 deletions(-)
create mode 100644 src/tools/web_search/providers/searxng.py
diff --git a/src/tools/web_search/consolidation.py b/src/tools/web_search/consolidation.py
index c888bb49..4f7b735c 100644
--- a/src/tools/web_search/consolidation.py
+++ b/src/tools/web_search/consolidation.py
@@ -136,6 +136,50 @@
{% endfor %}
---
*{{ results|length }} academic papers found via Google Scholar*""",
+ # -------------------------------------------------------------------------
+ # SEARXNG TEMPLATE
+ # -------------------------------------------------------------------------
+ "searxng": """{% if answers %}
+### Direct Answers
+{% for answer in answers %}
+{{ answer }}
+{% endfor %}
+
+---
+{% endif %}
+{% if infoboxes %}
+{% for infobox in infoboxes %}
+## {{ infobox.infobox }}{% if infobox.id %} ({{ infobox.id }}){% endif %}
+
+{{ infobox.content }}
+{% if infobox.urls %}
+{% for url in infobox.urls[:3] %}
+- [{{ url.title }}]({{ url.url }})
+{% endfor %}
+{% endif %}
+
+---
+{% endfor %}
+{% endif %}
+### Search Results for "{{ query }}"
+
+{% for result in results[:max_results] %}
+**[{{ loop.index }}] {{ result.title }}**
+{{ result.snippet }}
+{% if result.date %}*{{ result.date }}*{% endif %}
+{% if result.attributes.engine %}*via {{ result.attributes.engine }}*{% endif %}
+{{ result.url }}
+
+{% endfor %}
+{% if suggestions %}
+---
+*Suggestions: {% for s in suggestions[:5] %}{{ s }}{% if not loop.last %}, {% endif %}{% endfor %}*
+{% endif %}
+{% if corrections %}
+*Did you mean: {% for c in corrections[:3] %}{{ c }}{% if not loop.last %}, {% endif %}{% endfor %}*
+{% endif %}
+---
+*{{ results|length }} results from SearXNG metasearch*""",
}
@@ -157,6 +201,7 @@ class AnswerConsolidator:
"serper": "serper",
"jina": "jina",
"serper_scholar": "serper_scholar",
+ "searxng": "searxng",
}
def __init__(
@@ -315,6 +360,15 @@ def _build_provider_context(self, response: WebSearchResponse) -> dict[str, Any]
context["links"] = metadata.get("links", {})
context["images"] = metadata.get("images", {})
+ # -----------------------------------------------------------------
+ # SEARXNG-specific context
+ # -----------------------------------------------------------------
+ elif provider_lower == "searxng":
+ context["answers"] = metadata.get("answers", [])
+ context["infoboxes"] = metadata.get("infoboxes", [])
+ context["suggestions"] = metadata.get("suggestions", [])
+ context["corrections"] = metadata.get("corrections", [])
+
return context
def _consolidate_with_template(self, response: WebSearchResponse) -> str:
@@ -327,13 +381,18 @@ def _consolidate_with_template(self, response: WebSearchResponse) -> str:
# Build context with provider-specific fields
context = self._build_provider_context(response)
- _logger.debug(
- f"Context has {len(context.get('results', []))} results, {len(context.get('citations', []))} citations"
+ _logger.info(
+ f"[Consolidation] Context: {len(context.get('results', []))} results, "
+ f"{len(context.get('citations', []))} citations, max_results={context.get('max_results')}"
)
+ if context.get('results'):
+ first_result = context['results'][0]
+ _logger.info(f"[Consolidation] First result in context: title='{first_result.get('title', '')[:50]}', snippet='{first_result.get('snippet', '')[:100]}'...")
try:
rendered = template.render(**context)
- _logger.debug("Template rendered successfully")
+ _logger.info(f"[Consolidation] Template rendered ({len(rendered)} chars)")
+ _logger.info(f"[Consolidation] Rendered content:\n{rendered}")
return rendered
except Exception as e:
_logger.error(f"Template rendering failed: {e}")
diff --git a/src/tools/web_search/providers/__init__.py b/src/tools/web_search/providers/__init__.py
index 9d123453..add60bc9 100644
--- a/src/tools/web_search/providers/__init__.py
+++ b/src/tools/web_search/providers/__init__.py
@@ -117,7 +117,7 @@ def get_default_provider(**kwargs) -> BaseSearchProvider:
# Auto-import all providers to trigger registration
-from . import baidu, exa, jina, perplexity, serper, tavily
+from . import baidu, exa, jina, perplexity, searxng, serper, tavily
__all__ = [
"register_provider",
diff --git a/src/tools/web_search/providers/searxng.py b/src/tools/web_search/providers/searxng.py
new file mode 100644
index 00000000..09af6ccf
--- /dev/null
+++ b/src/tools/web_search/providers/searxng.py
@@ -0,0 +1,292 @@
+"""
+SearXNG Metasearch Provider
+
+SearXNG is a free, open-source metasearch engine that aggregates results
+from multiple search engines while protecting user privacy.
+
+Features:
+- Privacy-focused: No API keys required for self-hosted instances
+- Configurable: Choose specific engines and categories
+- Free: No per-query costs
+- Self-hosted: Full control over search sources
+
+Configuration:
+- SEARXNG_BASE_URL: Base URL of SearXNG instance (default: http://localhost:8888)
+
+Note: JSON format must be enabled in SearXNG settings.yml:
+ search:
+ formats:
+ - html
+ - json
+"""
+
+from datetime import datetime
+import os
+from typing import Any
+
+import requests
+
+from ..base import BaseSearchProvider
+from ..types import Citation, SearchResult, WebSearchResponse
+from . import register_provider
+
+
+class SearXNGAPIError(Exception):
+ """SearXNG API error"""
+
+ pass
+
+
+@register_provider("searxng")
+class SearXNGProvider(BaseSearchProvider):
+ """SearXNG metasearch engine provider"""
+
+ display_name = "SearXNG"
+ description = "Privacy-focused metasearch engine"
+ api_key_env_var = "SEARXNG_BASE_URL"
+ requires_api_key = False
+ supports_answer = False
+
+ DEFAULT_BASE_URL = "http://192.168.1.90:8888"
+
+ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None:
+ """
+ Initialize SearXNG provider.
+
+ Args:
+ api_key: Not used (SearXNG doesn't require API key).
+ **kwargs: Additional configuration options.
+ """
+ super().__init__(api_key=api_key, **kwargs)
+ self.base_url = (
+ kwargs.get("base_url")
+ or os.environ.get("SEARXNG_BASE_URL")
+ or self.DEFAULT_BASE_URL
+ ).rstrip("/")
+
+ DEFAULT_ENGINES = "brave,bing,wikipedia,wikidata,wikinews"
+ DEFAULT_CATEGORIES = "general"
+
+ def search(
+ self,
+ query: str,
+ categories: str | None = None,
+ engines: str | None = None,
+ language: str | None = None,
+ time_range: str | None = None,
+ page: int = 1,
+ safesearch: int = 1,
+ timeout: int = 30,
+ **kwargs: Any,
+ ) -> WebSearchResponse:
+ """
+ Perform search using SearXNG API.
+
+ Args:
+ query: Search query.
+ categories: Comma-separated categories (e.g., 'general,science').
+ engines: Comma-separated engines (e.g., 'google,duckduckgo').
+ language: Language code (e.g., "en", "es"); omit or use "auto" for defaults.
+ time_range: Time filter ('day', 'month', 'year').
+ page: Page number (default 1).
+ safesearch: Safe search level (0, 1, 2).
+ timeout: Request timeout in seconds.
+ **kwargs: Additional SearXNG parameters.
+
+ Returns:
+ WebSearchResponse: Standardized search response.
+ """
+ effective_engines = engines or os.environ.get("SEARXNG_ENGINES") or self.DEFAULT_ENGINES
+ effective_categories = categories or os.environ.get("SEARXNG_CATEGORIES") or self.DEFAULT_CATEGORIES
+ effective_language = None if not language or language == "auto" else language
+ self.logger.info(
+ f"[SearXNG] Request: base_url={self.base_url}, language={effective_language or 'auto'}, "
+ f"categories={effective_categories}, engines={effective_engines}"
+ )
+
+ params: dict[str, Any] = {
+ "q": query,
+ "format": "json",
+ "pageno": page,
+ "safesearch": safesearch,
+ }
+
+ if effective_language:
+ params["language"] = effective_language
+
+ if effective_categories:
+ params["categories"] = effective_categories
+
+ if effective_engines:
+ params["engines"] = effective_engines
+ if time_range:
+ params["time_range"] = time_range
+
+ params.update(kwargs)
+
+ search_endpoint = f"{self.base_url}/search"
+
+ headers = {
+ "Accept": "application/json",
+ "User-Agent": "DeepTutor/1.0 (SearXNG API Client)",
+ }
+ if effective_language:
+ headers["Accept-Language"] = effective_language
+
+ self.logger.info(f"[SearXNG] Endpoint: {search_endpoint}")
+ self.logger.info(f"[SearXNG] Query params: {params}")
+
+ try:
+ response = requests.get(
+ search_endpoint,
+ params=params,
+ headers=headers,
+ timeout=timeout,
+ )
+ self.logger.info(f"[SearXNG] Request URL: {response.url}")
+ except requests.exceptions.RequestException as e:
+ self.logger.error(f"SearXNG request failed: {e}")
+ raise SearXNGAPIError(f"SearXNG request failed: {e}") from e
+
+ if response.status_code == 403:
+ self.logger.error(
+ "SearXNG returned 403 Forbidden. "
+ "JSON format must be enabled in SearXNG settings.yml: "
+ "search.formats: [html, json]"
+ )
+ raise SearXNGAPIError(
+ "SearXNG API returned 403 Forbidden. "
+ "Ensure JSON format is enabled in your SearXNG instance settings.yml: "
+ "search:\n formats:\n - html\n - json"
+ )
+
+ if response.status_code != 200:
+ self.logger.error(f"SearXNG API error: {response.status_code} - {response.text}")
+ raise SearXNGAPIError(
+ f"SearXNG API error: {response.status_code} - {response.text}"
+ )
+
+ data = response.json()
+
+ self.logger.info(f"[SearXNG] Response status: {response.status_code}")
+ self.logger.info(f"[SearXNG] Response keys: {list(data.keys())}")
+ self.logger.info(f"[SearXNG] Results count: {len(data.get('results', []))}")
+ self.logger.info(f"[SearXNG] Answers count: {len(data.get('answers', []))}")
+ self.logger.info(f"[SearXNG] Suggestions: {data.get('suggestions', [])}")
+ self.logger.info(f"[SearXNG] Corrections: {data.get('corrections', [])}")
+ self.logger.info(f"[SearXNG] Infoboxes count: {len(data.get('infoboxes', []))}")
+
+ unresponsive = data.get("unresponsive_engines", [])
+ if unresponsive:
+ self.logger.warning(f"[SearXNG] Unresponsive engines: {unresponsive}")
+
+ if data.get('results'):
+ engine_counts: dict[str, int] = {}
+ for r in data['results']:
+ eng = r.get('engine', 'unknown')
+ engine_counts[eng] = engine_counts.get(eng, 0) + 1
+ self.logger.info(f"[SearXNG] Results by engine: {engine_counts}")
+ self.logger.info(f"[SearXNG] First result: {data['results'][0]}")
+ elif unresponsive:
+ engine_errors = ", ".join([f"{e[0]}({e[1]})" for e in unresponsive])
+ self.logger.error(
+ f"[SearXNG] No results - all engines failed: {engine_errors}. "
+ "Configure working engines in SearXNG settings.yml (brave, bing, wikipedia, wikidata, arxiv)"
+ )
+ else:
+ self.logger.warning(f"[SearXNG] No results returned. Full response: {data}")
+
+ citations: list[Citation] = []
+ search_results: list[SearchResult] = []
+
+ for i, result in enumerate(data.get("results", []), 1):
+ title = result.get("title", "")
+ url = result.get("url", "")
+ snippet = result.get("content", "")
+ date = result.get("publishedDate", "")
+ engine = result.get("engine", "")
+ category = result.get("category", "web")
+ score = result.get("score", 0.0)
+
+ attributes: dict[str, Any] = {}
+ if result.get("img_src"):
+ attributes["img_src"] = result["img_src"]
+ if engine:
+ attributes["engine"] = engine
+
+ sr = SearchResult(
+ title=title,
+ url=url,
+ snippet=snippet,
+ date=date,
+ source=engine,
+ score=score,
+ attributes=attributes,
+ )
+ search_results.append(sr)
+
+ citations.append(
+ Citation(
+ id=i,
+ reference=f"[{i}]",
+ url=url,
+ title=title,
+ snippet=snippet,
+ date=date,
+ source=engine,
+ type=category,
+ )
+ )
+
+ raw_answers = data.get("answers", [])
+ self.logger.info(f"[SearXNG] Raw answers: {raw_answers}")
+
+ answer_texts = []
+ for ans in raw_answers:
+ if isinstance(ans, str):
+ answer_texts.append(ans)
+ elif isinstance(ans, dict) and ans.get("content"):
+ answer_texts.append(ans["content"])
+
+ answer = "\n\n".join(answer_texts) if answer_texts else ""
+ self.logger.info(f"[SearXNG] Parsed answer: {answer[:200] if answer else 'None'}")
+
+ if not answer and search_results:
+ answer = search_results[0].snippet
+
+ metadata: dict[str, Any] = {
+ "finish_reason": "stop",
+ "base_url": self.base_url,
+ "answers": answer_texts,
+ "infoboxes": data.get("infoboxes", []),
+ "suggestions": data.get("suggestions", []),
+ "corrections": data.get("corrections", []),
+ }
+
+ self.logger.info(f"[SearXNG] Final results: {len(search_results)} search_results, {len(citations)} citations")
+ self.logger.info(f"[SearXNG] Final answer length: {len(answer)} chars")
+
+ return WebSearchResponse(
+ query=query,
+ answer=answer,
+ provider="searxng",
+ timestamp=datetime.now().isoformat(),
+ model="searxng",
+ citations=citations,
+ search_results=search_results,
+ usage={},
+ metadata=metadata,
+ )
+
+ def is_available(self) -> bool:
+ """
+ Check if SearXNG instance is reachable.
+
+ Returns:
+ bool: True if instance responds, False otherwise.
+ """
+ try:
+ response = requests.get(f"{self.base_url}/", timeout=5)
+ return response.status_code == 200
+ except Exception:
+ return False
From 1d8648b0f89e5f7e7e4565f12fcff91d65cecc9c Mon Sep 17 00:00:00 2001
From: andres
Date: Thu, 15 Jan 2026 14:10:23 -0600
Subject: [PATCH 5/9] "Remove SearXNG provider implementation and all
associated code from the web search module."
---
src/tools/web_search/providers/searxng.py | 292 ----------------------
1 file changed, 292 deletions(-)
delete mode 100644 src/tools/web_search/providers/searxng.py
diff --git a/src/tools/web_search/providers/searxng.py b/src/tools/web_search/providers/searxng.py
deleted file mode 100644
index 09af6ccf..00000000
--- a/src/tools/web_search/providers/searxng.py
+++ /dev/null
@@ -1,292 +0,0 @@
-"""
-SearXNG Metasearch Provider
-
-SearXNG is a free, open-source metasearch engine that aggregates results
-from multiple search engines while protecting user privacy.
-
-Features:
-- Privacy-focused: No API keys required for self-hosted instances
-- Configurable: Choose specific engines and categories
-- Free: No per-query costs
-- Self-hosted: Full control over search sources
-
-Configuration:
-- SEARXNG_BASE_URL: Base URL of SearXNG instance (default: http://localhost:8888)
-
-Note: JSON format must be enabled in SearXNG settings.yml:
- search:
- formats:
- - html
- - json
-"""
-
-from datetime import datetime
-import os
-from typing import Any
-
-import requests
-
-from ..base import BaseSearchProvider
-from ..types import Citation, SearchResult, WebSearchResponse
-from . import register_provider
-
-
-class SearXNGAPIError(Exception):
- """SearXNG API error"""
-
- pass
-
-
-@register_provider("searxng")
-class SearXNGProvider(BaseSearchProvider):
- """SearXNG metasearch engine provider"""
-
- display_name = "SearXNG"
- description = "Privacy-focused metasearch engine"
- api_key_env_var = "SEARXNG_BASE_URL"
- requires_api_key = False
- supports_answer = False
-
- DEFAULT_BASE_URL = "http://192.168.1.90:8888"
-
- def __init__(self, api_key: str | None = None, **kwargs: Any) -> None:
- """
- Initialize SearXNG provider.
-
- Args:
- api_key: Not used (SearXNG doesn't require API key).
- **kwargs: Additional configuration options.
- """
- super().__init__(api_key=api_key, **kwargs)
- self.base_url = (
- kwargs.get("base_url")
- or os.environ.get("SEARXNG_BASE_URL")
- or self.DEFAULT_BASE_URL
- ).rstrip("/")
-
- DEFAULT_ENGINES = "brave,bing,wikipedia,wikidata,wikinews"
- DEFAULT_CATEGORIES = "general"
-
- def search(
- self,
- query: str,
- categories: str | None = None,
- engines: str | None = None,
- language: str | None = None,
- time_range: str | None = None,
- page: int = 1,
- safesearch: int = 1,
- timeout: int = 30,
- **kwargs: Any,
- ) -> WebSearchResponse:
- """
- Perform search using SearXNG API.
-
- Args:
- query: Search query.
- categories: Comma-separated categories (e.g., 'general,science').
- engines: Comma-separated engines (e.g., 'google,duckduckgo').
- language: Language code (e.g., "en", "es"); omit or use "auto" for defaults.
- time_range: Time filter ('day', 'month', 'year').
- page: Page number (default 1).
- safesearch: Safe search level (0, 1, 2).
- timeout: Request timeout in seconds.
- **kwargs: Additional SearXNG parameters.
-
- Returns:
- WebSearchResponse: Standardized search response.
- """
- effective_engines = engines or os.environ.get("SEARXNG_ENGINES") or self.DEFAULT_ENGINES
- effective_categories = categories or os.environ.get("SEARXNG_CATEGORIES") or self.DEFAULT_CATEGORIES
- effective_language = None if not language or language == "auto" else language
- self.logger.info(
- f"[SearXNG] Request: base_url={self.base_url}, language={effective_language or 'auto'}, "
- f"categories={effective_categories}, engines={effective_engines}"
- )
-
- params: dict[str, Any] = {
- "q": query,
- "format": "json",
- "pageno": page,
- "safesearch": safesearch,
- }
-
- if effective_language:
- params["language"] = effective_language
-
- if effective_categories:
- params["categories"] = effective_categories
-
- if effective_engines:
- params["engines"] = effective_engines
- if time_range:
- params["time_range"] = time_range
-
- params.update(kwargs)
-
- search_endpoint = f"{self.base_url}/search"
-
- headers = {
- "Accept": "application/json",
- "User-Agent": "DeepTutor/1.0 (SearXNG API Client)",
- }
- if effective_language:
- headers["Accept-Language"] = effective_language
-
- self.logger.info(f"[SearXNG] Endpoint: {search_endpoint}")
- self.logger.info(f"[SearXNG] Query params: {params}")
-
- try:
- response = requests.get(
- search_endpoint,
- params=params,
- headers=headers,
- timeout=timeout,
- )
- self.logger.info(f"[SearXNG] Request URL: {response.url}")
- except requests.exceptions.RequestException as e:
- self.logger.error(f"SearXNG request failed: {e}")
- raise SearXNGAPIError(f"SearXNG request failed: {e}") from e
-
- if response.status_code == 403:
- self.logger.error(
- "SearXNG returned 403 Forbidden. "
- "JSON format must be enabled in SearXNG settings.yml: "
- "search.formats: [html, json]"
- )
- raise SearXNGAPIError(
- "SearXNG API returned 403 Forbidden. "
- "Ensure JSON format is enabled in your SearXNG instance settings.yml: "
- "search:\n formats:\n - html\n - json"
- )
-
- if response.status_code != 200:
- self.logger.error(f"SearXNG API error: {response.status_code} - {response.text}")
- raise SearXNGAPIError(
- f"SearXNG API error: {response.status_code} - {response.text}"
- )
-
- data = response.json()
-
- self.logger.info(f"[SearXNG] Response status: {response.status_code}")
- self.logger.info(f"[SearXNG] Response keys: {list(data.keys())}")
- self.logger.info(f"[SearXNG] Results count: {len(data.get('results', []))}")
- self.logger.info(f"[SearXNG] Answers count: {len(data.get('answers', []))}")
- self.logger.info(f"[SearXNG] Suggestions: {data.get('suggestions', [])}")
- self.logger.info(f"[SearXNG] Corrections: {data.get('corrections', [])}")
- self.logger.info(f"[SearXNG] Infoboxes count: {len(data.get('infoboxes', []))}")
-
- unresponsive = data.get("unresponsive_engines", [])
- if unresponsive:
- self.logger.warning(f"[SearXNG] Unresponsive engines: {unresponsive}")
-
- if data.get('results'):
- engine_counts: dict[str, int] = {}
- for r in data['results']:
- eng = r.get('engine', 'unknown')
- engine_counts[eng] = engine_counts.get(eng, 0) + 1
- self.logger.info(f"[SearXNG] Results by engine: {engine_counts}")
- self.logger.info(f"[SearXNG] First result: {data['results'][0]}")
- elif unresponsive:
- engine_errors = ", ".join([f"{e[0]}({e[1]})" for e in unresponsive])
- self.logger.error(
- f"[SearXNG] No results - all engines failed: {engine_errors}. "
- "Configure working engines in SearXNG settings.yml (brave, bing, wikipedia, wikidata, arxiv)"
- )
- else:
- self.logger.warning(f"[SearXNG] No results returned. Full response: {data}")
-
- citations: list[Citation] = []
- search_results: list[SearchResult] = []
-
- for i, result in enumerate(data.get("results", []), 1):
- title = result.get("title", "")
- url = result.get("url", "")
- snippet = result.get("content", "")
- date = result.get("publishedDate", "")
- engine = result.get("engine", "")
- category = result.get("category", "web")
- score = result.get("score", 0.0)
-
- attributes: dict[str, Any] = {}
- if result.get("img_src"):
- attributes["img_src"] = result["img_src"]
- if engine:
- attributes["engine"] = engine
-
- sr = SearchResult(
- title=title,
- url=url,
- snippet=snippet,
- date=date,
- source=engine,
- score=score,
- attributes=attributes,
- )
- search_results.append(sr)
-
- citations.append(
- Citation(
- id=i,
- reference=f"[{i}]",
- url=url,
- title=title,
- snippet=snippet,
- date=date,
- source=engine,
- type=category,
- )
- )
-
- raw_answers = data.get("answers", [])
- self.logger.info(f"[SearXNG] Raw answers: {raw_answers}")
-
- answer_texts = []
- for ans in raw_answers:
- if isinstance(ans, str):
- answer_texts.append(ans)
- elif isinstance(ans, dict) and ans.get("content"):
- answer_texts.append(ans["content"])
-
- answer = "\n\n".join(answer_texts) if answer_texts else ""
- self.logger.info(f"[SearXNG] Parsed answer: {answer[:200] if answer else 'None'}")
-
- if not answer and search_results:
- answer = search_results[0].snippet
-
- metadata: dict[str, Any] = {
- "finish_reason": "stop",
- "base_url": self.base_url,
- "answers": answer_texts,
- "infoboxes": data.get("infoboxes", []),
- "suggestions": data.get("suggestions", []),
- "corrections": data.get("corrections", []),
- }
-
- self.logger.info(f"[SearXNG] Final results: {len(search_results)} search_results, {len(citations)} citations")
- self.logger.info(f"[SearXNG] Final answer length: {len(answer)} chars")
-
- return WebSearchResponse(
- query=query,
- answer=answer,
- provider="searxng",
- timestamp=datetime.now().isoformat(),
- model="searxng",
- citations=citations,
- search_results=search_results,
- usage={},
- metadata=metadata,
- )
-
- def is_available(self) -> bool:
- """
- Check if SearXNG instance is reachable.
-
- Returns:
- bool: True if instance responds, False otherwise.
- """
- try:
- response = requests.get(f"{self.base_url}/", timeout=5)
- return response.status_code == 200
- except Exception:
- return False
From 746dcc9854f760a4b9e93612ab43a1443b713a44 Mon Sep 17 00:00:00 2001
From: andres
Date: Thu, 15 Jan 2026 14:50:20 -0600
Subject: [PATCH 6/9] "Add SearXNG metasearch provider integration with
configuration updates and API support."
---
src/services/config/unified_config.py | 2 +-
src/services/search/consolidation.py | 8 +-
src/services/search/providers/searxng.py | 292 +++++++++++++++++++++
web/app/settings/components/ConfigForm.tsx | 4 +-
web/app/settings/constants.ts | 21 +-
5 files changed, 319 insertions(+), 8 deletions(-)
create mode 100644 src/services/search/providers/searxng.py
diff --git a/src/services/config/unified_config.py b/src/services/config/unified_config.py
index f33f2397..8a17992e 100644
--- a/src/services/config/unified_config.py
+++ b/src/services/config/unified_config.py
@@ -49,7 +49,7 @@ class ConfigType(str, Enum):
],
ConfigType.EMBEDDING: ["openai", "azure_openai", "ollama", "jina", "cohere", "huggingface"],
ConfigType.TTS: ["openai", "azure_openai"],
- ConfigType.SEARCH: ["perplexity", "tavily", "exa", "jina", "serper", "baidu"],
+ ConfigType.SEARCH: ["perplexity", "tavily", "exa", "jina", "serper", "baidu", "searxng"],
}
# Environment variable mappings for each service type
diff --git a/src/services/search/consolidation.py b/src/services/search/consolidation.py
index 8ae834cb..021712fb 100644
--- a/src/services/search/consolidation.py
+++ b/src/services/search/consolidation.py
@@ -387,9 +387,11 @@ def _consolidate_with_template(self, response: WebSearchResponse) -> str:
f"[Consolidation] Context: {len(context.get('results', []))} results, "
f"{len(context.get('citations', []))} citations, max_results={context.get('max_results')}"
)
- if context.get('results'):
- first_result = context['results'][0]
- _logger.info(f"[Consolidation] First result in context: title='{first_result.get('title', '')[:50]}', snippet='{first_result.get('snippet', '')[:100]}'...")
+ if context.get("results"):
+ first_result = context["results"][0]
+ _logger.info(
+ f"[Consolidation] First result in context: title='{first_result.get('title', '')[:50]}', snippet='{first_result.get('snippet', '')[:100]}'..."
+ )
try:
rendered = template.render(**context)
diff --git a/src/services/search/providers/searxng.py b/src/services/search/providers/searxng.py
new file mode 100644
index 00000000..19eceb87
--- /dev/null
+++ b/src/services/search/providers/searxng.py
@@ -0,0 +1,292 @@
+"""
+SearXNG Metasearch Provider
+
+SearXNG is a free, open-source metasearch engine that aggregates results
+from multiple search engines while protecting user privacy.
+
+Features:
+- Privacy-focused: No API keys required for self-hosted instances
+- Configurable: Choose specific engines and categories
+- Free: No per-query costs
+- Self-hosted: Full control over search sources
+
+Configuration:
+- SEARXNG_BASE_URL: Base URL of SearXNG instance (default: http://localhost:8888)
+
+Note: JSON format must be enabled in SearXNG settings.yml:
+ search:
+ formats:
+ - html
+ - json
+"""
+
+from datetime import datetime
+import os
+from typing import Any
+
+import requests
+
+from ..base import BaseSearchProvider
+from ..types import Citation, SearchResult, WebSearchResponse
+from . import register_provider
+
+
+class SearXNGAPIError(Exception):
+ """SearXNG API error"""
+
+ pass
+
+
+@register_provider("searxng")
+class SearXNGProvider(BaseSearchProvider):
+ """SearXNG metasearch engine provider"""
+
+ display_name = "SearXNG"
+ description = "Privacy-focused metasearch engine"
+ api_key_env_var = "SEARXNG_BASE_URL"
+ requires_api_key = False
+ supports_answer = False
+
+ DEFAULT_BASE_URL = "http://192.168.1.90:8888"
+
+ def __init__(self, api_key: str | None = None, **kwargs: Any) -> None:
+ """
+ Initialize SearXNG provider.
+
+ Args:
+ api_key: Not used (SearXNG doesn't require API key).
+ **kwargs: Additional configuration options.
+ """
+ super().__init__(api_key=api_key, **kwargs)
+ self.base_url = (
+ kwargs.get("base_url") or os.environ.get("SEARXNG_BASE_URL") or self.DEFAULT_BASE_URL
+ ).rstrip("/")
+
+ DEFAULT_ENGINES = "brave,bing,wikipedia,wikidata,wikinews"
+ DEFAULT_CATEGORIES = "general"
+
+ def search(
+ self,
+ query: str,
+ categories: str | None = None,
+ engines: str | None = None,
+ language: str | None = None,
+ time_range: str | None = None,
+ page: int = 1,
+ safesearch: int = 1,
+ timeout: int = 30,
+ **kwargs: Any,
+ ) -> WebSearchResponse:
+ """
+ Perform search using SearXNG API.
+
+ Args:
+ query: Search query.
+ categories: Comma-separated categories (e.g., 'general,science').
+ engines: Comma-separated engines (e.g., 'google,duckduckgo').
+ language: Language code (e.g., "en", "es"); omit or use "auto" for defaults.
+ time_range: Time filter ('day', 'month', 'year').
+ page: Page number (default 1).
+ safesearch: Safe search level (0, 1, 2).
+ timeout: Request timeout in seconds.
+ **kwargs: Additional SearXNG parameters.
+
+ Returns:
+ WebSearchResponse: Standardized search response.
+ """
+ effective_engines = engines or os.environ.get("SEARXNG_ENGINES") or self.DEFAULT_ENGINES
+ effective_categories = (
+ categories or os.environ.get("SEARXNG_CATEGORIES") or self.DEFAULT_CATEGORIES
+ )
+ effective_language = None if not language or language == "auto" else language
+ self.logger.info(
+ f"[SearXNG] Request: base_url={self.base_url}, language={effective_language or 'auto'}, "
+ f"categories={effective_categories}, engines={effective_engines}"
+ )
+
+ params: dict[str, Any] = {
+ "q": query,
+ "format": "json",
+ "pageno": page,
+ "safesearch": safesearch,
+ }
+
+ if effective_language:
+ params["language"] = effective_language
+
+ if effective_categories:
+ params["categories"] = effective_categories
+
+ if effective_engines:
+ params["engines"] = effective_engines
+ if time_range:
+ params["time_range"] = time_range
+
+ params.update(kwargs)
+
+ search_endpoint = f"{self.base_url}/search"
+
+ headers = {
+ "Accept": "application/json",
+ "User-Agent": "DeepTutor/1.0 (SearXNG API Client)",
+ }
+ if effective_language:
+ headers["Accept-Language"] = effective_language
+
+ self.logger.info(f"[SearXNG] Endpoint: {search_endpoint}")
+ self.logger.info(f"[SearXNG] Query params: {params}")
+
+ try:
+ response = requests.get(
+ search_endpoint,
+ params=params,
+ headers=headers,
+ timeout=timeout,
+ )
+ self.logger.info(f"[SearXNG] Request URL: {response.url}")
+ except requests.exceptions.RequestException as e:
+ self.logger.error(f"SearXNG request failed: {e}")
+ raise SearXNGAPIError(f"SearXNG request failed: {e}") from e
+
+ if response.status_code == 403:
+ self.logger.error(
+ "SearXNG returned 403 Forbidden. "
+ "JSON format must be enabled in SearXNG settings.yml: "
+ "search.formats: [html, json]"
+ )
+ raise SearXNGAPIError(
+ "SearXNG API returned 403 Forbidden. "
+ "Ensure JSON format is enabled in your SearXNG instance settings.yml: "
+ "search:\n formats:\n - html\n - json"
+ )
+
+ if response.status_code != 200:
+ self.logger.error(f"SearXNG API error: {response.status_code} - {response.text}")
+ raise SearXNGAPIError(f"SearXNG API error: {response.status_code} - {response.text}")
+
+ data = response.json()
+
+ self.logger.info(f"[SearXNG] Response status: {response.status_code}")
+ self.logger.info(f"[SearXNG] Response keys: {list(data.keys())}")
+ self.logger.info(f"[SearXNG] Results count: {len(data.get('results', []))}")
+ self.logger.info(f"[SearXNG] Answers count: {len(data.get('answers', []))}")
+ self.logger.info(f"[SearXNG] Suggestions: {data.get('suggestions', [])}")
+ self.logger.info(f"[SearXNG] Corrections: {data.get('corrections', [])}")
+ self.logger.info(f"[SearXNG] Infoboxes count: {len(data.get('infoboxes', []))}")
+
+ unresponsive = data.get("unresponsive_engines", [])
+ if unresponsive:
+ self.logger.warning(f"[SearXNG] Unresponsive engines: {unresponsive}")
+
+ if data.get("results"):
+ engine_counts: dict[str, int] = {}
+ for r in data["results"]:
+ eng = r.get("engine", "unknown")
+ engine_counts[eng] = engine_counts.get(eng, 0) + 1
+ self.logger.info(f"[SearXNG] Results by engine: {engine_counts}")
+ self.logger.info(f"[SearXNG] First result: {data['results'][0]}")
+ elif unresponsive:
+ engine_errors = ", ".join([f"{e[0]}({e[1]})" for e in unresponsive])
+ self.logger.error(
+ f"[SearXNG] No results - all engines failed: {engine_errors}. "
+ "Configure working engines in SearXNG settings.yml (brave, bing, wikipedia, wikidata, arxiv)"
+ )
+ else:
+ self.logger.warning(f"[SearXNG] No results returned. Full response: {data}")
+
+ citations: list[Citation] = []
+ search_results: list[SearchResult] = []
+
+ for i, result in enumerate(data.get("results", []), 1):
+ title = result.get("title", "")
+ url = result.get("url", "")
+ snippet = result.get("content", "")
+ date = result.get("publishedDate", "")
+ engine = result.get("engine", "")
+ category = result.get("category", "web")
+ score = result.get("score", 0.0)
+
+ attributes: dict[str, Any] = {}
+ if result.get("img_src"):
+ attributes["img_src"] = result["img_src"]
+ if engine:
+ attributes["engine"] = engine
+
+ sr = SearchResult(
+ title=title,
+ url=url,
+ snippet=snippet,
+ date=date,
+ source=engine,
+ score=score,
+ attributes=attributes,
+ )
+ search_results.append(sr)
+
+ citations.append(
+ Citation(
+ id=i,
+ reference=f"[{i}]",
+ url=url,
+ title=title,
+ snippet=snippet,
+ date=date,
+ source=engine,
+ type=category,
+ )
+ )
+
+ raw_answers = data.get("answers", [])
+ self.logger.info(f"[SearXNG] Raw answers: {raw_answers}")
+
+ answer_texts = []
+ for ans in raw_answers:
+ if isinstance(ans, str):
+ answer_texts.append(ans)
+ elif isinstance(ans, dict) and ans.get("content"):
+ answer_texts.append(ans["content"])
+
+ answer = "\n\n".join(answer_texts) if answer_texts else ""
+ self.logger.info(f"[SearXNG] Parsed answer: {answer[:200] if answer else 'None'}")
+
+ if not answer and search_results:
+ answer = search_results[0].snippet
+
+ metadata: dict[str, Any] = {
+ "finish_reason": "stop",
+ "base_url": self.base_url,
+ "answers": answer_texts,
+ "infoboxes": data.get("infoboxes", []),
+ "suggestions": data.get("suggestions", []),
+ "corrections": data.get("corrections", []),
+ }
+
+ self.logger.info(
+ f"[SearXNG] Final results: {len(search_results)} search_results, {len(citations)} citations"
+ )
+ self.logger.info(f"[SearXNG] Final answer length: {len(answer)} chars")
+
+ return WebSearchResponse(
+ query=query,
+ answer=answer,
+ provider="searxng",
+ timestamp=datetime.now().isoformat(),
+ model="searxng",
+ citations=citations,
+ search_results=search_results,
+ usage={},
+ metadata=metadata,
+ )
+
+ def is_available(self) -> bool:
+ """
+ Check if SearXNG instance is reachable.
+
+ Returns:
+ bool: True if instance responds, False otherwise.
+ """
+ try:
+ response = requests.get(f"{self.base_url}/", timeout=5)
+ return response.status_code == 200
+ except Exception:
+ return False
diff --git a/web/app/settings/components/ConfigForm.tsx b/web/app/settings/components/ConfigForm.tsx
index de398cd9..c128b6d4 100644
--- a/web/app/settings/components/ConfigForm.tsx
+++ b/web/app/settings/components/ConfigForm.tsx
@@ -296,8 +296,8 @@ export default function ConfigForm({
- {/* Base URL (not for search) */}
- {!isSearchConfig && (
+ {/* Base URL (not for search, except searxng) */}
+ {(!isSearchConfig || (isSearchConfig && provider === "searxng")) && (