From 673364d6a25c802ec536903253c83b1d889567c9 Mon Sep 17 00:00:00 2001 From: "Bingxi Zhao (Frank)" <150592536+pancacake@users.noreply.github.com> Date: Thu, 15 Jan 2026 18:30:08 +0800 Subject: [PATCH 1/9] update discord --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 47f62b1d..a362fead 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ [![License](https://img.shields.io/badge/License-AGPL--3.0-blue?style=flat-square)](LICENSE)

- Discord + Discord    Feishu    From c3549e8ce0ecd9c08118e2a3013bf49df14f9f5c Mon Sep 17 00:00:00 2001 From: "Bingxi Zhao (Frank)" <150592536+pancacake@users.noreply.github.com> Date: Thu, 15 Jan 2026 18:30:55 +0800 Subject: [PATCH 2/9] update discord --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 535e8d83..0ec57968 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ Contributing to DeepTutor 🚀 Thank you for your interest in contributing to DeepTutor! We are committed to building a smooth and robust intelligent learning companion, and we welcome developers of all skill levels to join us. Join our community for discussion, support, and collaboration:

-Discord  +Discord  WeChat  Feishu

From 5b8b9d6de8e5b88a26dd1b7fcf320a9a92917ff9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 15 Jan 2026 12:54:48 +0000 Subject: [PATCH 3/9] chore: update repo roster images [skip ci] --- assets/roster/forkers.svg | 14 +++++++------- assets/roster/stargazers.svg | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/assets/roster/forkers.svg b/assets/roster/forkers.svg index 5a23cf1d..d42ce14a 100644 --- a/assets/roster/forkers.svg +++ b/assets/roster/forkers.svg @@ -10,21 +10,21 @@ Forkers - + - + - + - + - + - -and 1,108 others + +and 1,127 others \ No newline at end of file diff --git a/assets/roster/stargazers.svg b/assets/roster/stargazers.svg index 300b5b44..25514294 100644 --- a/assets/roster/stargazers.svg +++ b/assets/roster/stargazers.svg @@ -26,5 +26,5 @@ -and 8,489 others +and 8,672 others \ No newline at end of file From 42f8c42e00e0a67604d6a49ec5ad812fd2b3b038 Mon Sep 17 00:00:00 2001 From: andres Date: Thu, 15 Jan 2026 14:07:01 -0600 Subject: [PATCH 4/9] "Add support for SearXNG metasearch provider, including templates, context handling, and API integration." --- src/tools/web_search/consolidation.py | 65 ++++- src/tools/web_search/providers/__init__.py | 2 +- src/tools/web_search/providers/searxng.py | 292 +++++++++++++++++++++ 3 files changed, 355 insertions(+), 4 deletions(-) create mode 100644 src/tools/web_search/providers/searxng.py diff --git a/src/tools/web_search/consolidation.py b/src/tools/web_search/consolidation.py index c888bb49..4f7b735c 100644 --- a/src/tools/web_search/consolidation.py +++ b/src/tools/web_search/consolidation.py @@ -136,6 +136,50 @@ {% endfor %} --- *{{ results|length }} academic papers found via Google Scholar*""", + # ------------------------------------------------------------------------- + # SEARXNG TEMPLATE + # ------------------------------------------------------------------------- + "searxng": """{% if answers %} +### Direct Answers +{% for answer in answers %} +{{ answer }} +{% endfor %} + +--- +{% endif %} +{% if infoboxes %} +{% for infobox in infoboxes %} +## {{ infobox.infobox }}{% if infobox.id %} ({{ infobox.id }}){% endif %} + +{{ infobox.content }} +{% if infobox.urls %} +{% for url in infobox.urls[:3] %} +- [{{ url.title }}]({{ url.url }}) +{% endfor %} +{% endif %} + +--- +{% endfor %} +{% endif %} +### Search Results for "{{ query }}" + +{% for result in results[:max_results] %} +**[{{ loop.index }}] {{ result.title }}** +{{ result.snippet }} +{% if result.date %}*{{ result.date }}*{% endif %} +{% if result.attributes.engine %}*via {{ result.attributes.engine }}*{% endif %} +{{ result.url }} + +{% endfor %} +{% if suggestions %} +--- +*Suggestions: {% for s in suggestions[:5] %}{{ s }}{% if not loop.last %}, {% endif %}{% endfor %}* +{% endif %} +{% if corrections %} +*Did you mean: {% for c in corrections[:3] %}{{ c }}{% if not loop.last %}, {% endif %}{% endfor %}* +{% endif %} +--- +*{{ results|length }} results from SearXNG metasearch*""", } @@ -157,6 +201,7 @@ class AnswerConsolidator: "serper": "serper", "jina": "jina", "serper_scholar": "serper_scholar", + "searxng": "searxng", } def __init__( @@ -315,6 +360,15 @@ def _build_provider_context(self, response: WebSearchResponse) -> dict[str, Any] context["links"] = metadata.get("links", {}) context["images"] = metadata.get("images", {}) + # ----------------------------------------------------------------- + # SEARXNG-specific context + # ----------------------------------------------------------------- + elif provider_lower == "searxng": + context["answers"] = metadata.get("answers", []) + context["infoboxes"] = metadata.get("infoboxes", []) + context["suggestions"] = metadata.get("suggestions", []) + context["corrections"] = metadata.get("corrections", []) + return context def _consolidate_with_template(self, response: WebSearchResponse) -> str: @@ -327,13 +381,18 @@ def _consolidate_with_template(self, response: WebSearchResponse) -> str: # Build context with provider-specific fields context = self._build_provider_context(response) - _logger.debug( - f"Context has {len(context.get('results', []))} results, {len(context.get('citations', []))} citations" + _logger.info( + f"[Consolidation] Context: {len(context.get('results', []))} results, " + f"{len(context.get('citations', []))} citations, max_results={context.get('max_results')}" ) + if context.get('results'): + first_result = context['results'][0] + _logger.info(f"[Consolidation] First result in context: title='{first_result.get('title', '')[:50]}', snippet='{first_result.get('snippet', '')[:100]}'...") try: rendered = template.render(**context) - _logger.debug("Template rendered successfully") + _logger.info(f"[Consolidation] Template rendered ({len(rendered)} chars)") + _logger.info(f"[Consolidation] Rendered content:\n{rendered}") return rendered except Exception as e: _logger.error(f"Template rendering failed: {e}") diff --git a/src/tools/web_search/providers/__init__.py b/src/tools/web_search/providers/__init__.py index 9d123453..add60bc9 100644 --- a/src/tools/web_search/providers/__init__.py +++ b/src/tools/web_search/providers/__init__.py @@ -117,7 +117,7 @@ def get_default_provider(**kwargs) -> BaseSearchProvider: # Auto-import all providers to trigger registration -from . import baidu, exa, jina, perplexity, serper, tavily +from . import baidu, exa, jina, perplexity, searxng, serper, tavily __all__ = [ "register_provider", diff --git a/src/tools/web_search/providers/searxng.py b/src/tools/web_search/providers/searxng.py new file mode 100644 index 00000000..09af6ccf --- /dev/null +++ b/src/tools/web_search/providers/searxng.py @@ -0,0 +1,292 @@ +""" +SearXNG Metasearch Provider + +SearXNG is a free, open-source metasearch engine that aggregates results +from multiple search engines while protecting user privacy. + +Features: +- Privacy-focused: No API keys required for self-hosted instances +- Configurable: Choose specific engines and categories +- Free: No per-query costs +- Self-hosted: Full control over search sources + +Configuration: +- SEARXNG_BASE_URL: Base URL of SearXNG instance (default: http://localhost:8888) + +Note: JSON format must be enabled in SearXNG settings.yml: + search: + formats: + - html + - json +""" + +from datetime import datetime +import os +from typing import Any + +import requests + +from ..base import BaseSearchProvider +from ..types import Citation, SearchResult, WebSearchResponse +from . import register_provider + + +class SearXNGAPIError(Exception): + """SearXNG API error""" + + pass + + +@register_provider("searxng") +class SearXNGProvider(BaseSearchProvider): + """SearXNG metasearch engine provider""" + + display_name = "SearXNG" + description = "Privacy-focused metasearch engine" + api_key_env_var = "SEARXNG_BASE_URL" + requires_api_key = False + supports_answer = False + + DEFAULT_BASE_URL = "http://192.168.1.90:8888" + + def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """ + Initialize SearXNG provider. + + Args: + api_key: Not used (SearXNG doesn't require API key). + **kwargs: Additional configuration options. + """ + super().__init__(api_key=api_key, **kwargs) + self.base_url = ( + kwargs.get("base_url") + or os.environ.get("SEARXNG_BASE_URL") + or self.DEFAULT_BASE_URL + ).rstrip("/") + + DEFAULT_ENGINES = "brave,bing,wikipedia,wikidata,wikinews" + DEFAULT_CATEGORIES = "general" + + def search( + self, + query: str, + categories: str | None = None, + engines: str | None = None, + language: str | None = None, + time_range: str | None = None, + page: int = 1, + safesearch: int = 1, + timeout: int = 30, + **kwargs: Any, + ) -> WebSearchResponse: + """ + Perform search using SearXNG API. + + Args: + query: Search query. + categories: Comma-separated categories (e.g., 'general,science'). + engines: Comma-separated engines (e.g., 'google,duckduckgo'). + language: Language code (e.g., "en", "es"); omit or use "auto" for defaults. + time_range: Time filter ('day', 'month', 'year'). + page: Page number (default 1). + safesearch: Safe search level (0, 1, 2). + timeout: Request timeout in seconds. + **kwargs: Additional SearXNG parameters. + + Returns: + WebSearchResponse: Standardized search response. + """ + effective_engines = engines or os.environ.get("SEARXNG_ENGINES") or self.DEFAULT_ENGINES + effective_categories = categories or os.environ.get("SEARXNG_CATEGORIES") or self.DEFAULT_CATEGORIES + effective_language = None if not language or language == "auto" else language + self.logger.info( + f"[SearXNG] Request: base_url={self.base_url}, language={effective_language or 'auto'}, " + f"categories={effective_categories}, engines={effective_engines}" + ) + + params: dict[str, Any] = { + "q": query, + "format": "json", + "pageno": page, + "safesearch": safesearch, + } + + if effective_language: + params["language"] = effective_language + + if effective_categories: + params["categories"] = effective_categories + + if effective_engines: + params["engines"] = effective_engines + if time_range: + params["time_range"] = time_range + + params.update(kwargs) + + search_endpoint = f"{self.base_url}/search" + + headers = { + "Accept": "application/json", + "User-Agent": "DeepTutor/1.0 (SearXNG API Client)", + } + if effective_language: + headers["Accept-Language"] = effective_language + + self.logger.info(f"[SearXNG] Endpoint: {search_endpoint}") + self.logger.info(f"[SearXNG] Query params: {params}") + + try: + response = requests.get( + search_endpoint, + params=params, + headers=headers, + timeout=timeout, + ) + self.logger.info(f"[SearXNG] Request URL: {response.url}") + except requests.exceptions.RequestException as e: + self.logger.error(f"SearXNG request failed: {e}") + raise SearXNGAPIError(f"SearXNG request failed: {e}") from e + + if response.status_code == 403: + self.logger.error( + "SearXNG returned 403 Forbidden. " + "JSON format must be enabled in SearXNG settings.yml: " + "search.formats: [html, json]" + ) + raise SearXNGAPIError( + "SearXNG API returned 403 Forbidden. " + "Ensure JSON format is enabled in your SearXNG instance settings.yml: " + "search:\n formats:\n - html\n - json" + ) + + if response.status_code != 200: + self.logger.error(f"SearXNG API error: {response.status_code} - {response.text}") + raise SearXNGAPIError( + f"SearXNG API error: {response.status_code} - {response.text}" + ) + + data = response.json() + + self.logger.info(f"[SearXNG] Response status: {response.status_code}") + self.logger.info(f"[SearXNG] Response keys: {list(data.keys())}") + self.logger.info(f"[SearXNG] Results count: {len(data.get('results', []))}") + self.logger.info(f"[SearXNG] Answers count: {len(data.get('answers', []))}") + self.logger.info(f"[SearXNG] Suggestions: {data.get('suggestions', [])}") + self.logger.info(f"[SearXNG] Corrections: {data.get('corrections', [])}") + self.logger.info(f"[SearXNG] Infoboxes count: {len(data.get('infoboxes', []))}") + + unresponsive = data.get("unresponsive_engines", []) + if unresponsive: + self.logger.warning(f"[SearXNG] Unresponsive engines: {unresponsive}") + + if data.get('results'): + engine_counts: dict[str, int] = {} + for r in data['results']: + eng = r.get('engine', 'unknown') + engine_counts[eng] = engine_counts.get(eng, 0) + 1 + self.logger.info(f"[SearXNG] Results by engine: {engine_counts}") + self.logger.info(f"[SearXNG] First result: {data['results'][0]}") + elif unresponsive: + engine_errors = ", ".join([f"{e[0]}({e[1]})" for e in unresponsive]) + self.logger.error( + f"[SearXNG] No results - all engines failed: {engine_errors}. " + "Configure working engines in SearXNG settings.yml (brave, bing, wikipedia, wikidata, arxiv)" + ) + else: + self.logger.warning(f"[SearXNG] No results returned. Full response: {data}") + + citations: list[Citation] = [] + search_results: list[SearchResult] = [] + + for i, result in enumerate(data.get("results", []), 1): + title = result.get("title", "") + url = result.get("url", "") + snippet = result.get("content", "") + date = result.get("publishedDate", "") + engine = result.get("engine", "") + category = result.get("category", "web") + score = result.get("score", 0.0) + + attributes: dict[str, Any] = {} + if result.get("img_src"): + attributes["img_src"] = result["img_src"] + if engine: + attributes["engine"] = engine + + sr = SearchResult( + title=title, + url=url, + snippet=snippet, + date=date, + source=engine, + score=score, + attributes=attributes, + ) + search_results.append(sr) + + citations.append( + Citation( + id=i, + reference=f"[{i}]", + url=url, + title=title, + snippet=snippet, + date=date, + source=engine, + type=category, + ) + ) + + raw_answers = data.get("answers", []) + self.logger.info(f"[SearXNG] Raw answers: {raw_answers}") + + answer_texts = [] + for ans in raw_answers: + if isinstance(ans, str): + answer_texts.append(ans) + elif isinstance(ans, dict) and ans.get("content"): + answer_texts.append(ans["content"]) + + answer = "\n\n".join(answer_texts) if answer_texts else "" + self.logger.info(f"[SearXNG] Parsed answer: {answer[:200] if answer else 'None'}") + + if not answer and search_results: + answer = search_results[0].snippet + + metadata: dict[str, Any] = { + "finish_reason": "stop", + "base_url": self.base_url, + "answers": answer_texts, + "infoboxes": data.get("infoboxes", []), + "suggestions": data.get("suggestions", []), + "corrections": data.get("corrections", []), + } + + self.logger.info(f"[SearXNG] Final results: {len(search_results)} search_results, {len(citations)} citations") + self.logger.info(f"[SearXNG] Final answer length: {len(answer)} chars") + + return WebSearchResponse( + query=query, + answer=answer, + provider="searxng", + timestamp=datetime.now().isoformat(), + model="searxng", + citations=citations, + search_results=search_results, + usage={}, + metadata=metadata, + ) + + def is_available(self) -> bool: + """ + Check if SearXNG instance is reachable. + + Returns: + bool: True if instance responds, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/", timeout=5) + return response.status_code == 200 + except Exception: + return False From 1d8648b0f89e5f7e7e4565f12fcff91d65cecc9c Mon Sep 17 00:00:00 2001 From: andres Date: Thu, 15 Jan 2026 14:10:23 -0600 Subject: [PATCH 5/9] "Remove SearXNG provider implementation and all associated code from the web search module." --- src/tools/web_search/providers/searxng.py | 292 ---------------------- 1 file changed, 292 deletions(-) delete mode 100644 src/tools/web_search/providers/searxng.py diff --git a/src/tools/web_search/providers/searxng.py b/src/tools/web_search/providers/searxng.py deleted file mode 100644 index 09af6ccf..00000000 --- a/src/tools/web_search/providers/searxng.py +++ /dev/null @@ -1,292 +0,0 @@ -""" -SearXNG Metasearch Provider - -SearXNG is a free, open-source metasearch engine that aggregates results -from multiple search engines while protecting user privacy. - -Features: -- Privacy-focused: No API keys required for self-hosted instances -- Configurable: Choose specific engines and categories -- Free: No per-query costs -- Self-hosted: Full control over search sources - -Configuration: -- SEARXNG_BASE_URL: Base URL of SearXNG instance (default: http://localhost:8888) - -Note: JSON format must be enabled in SearXNG settings.yml: - search: - formats: - - html - - json -""" - -from datetime import datetime -import os -from typing import Any - -import requests - -from ..base import BaseSearchProvider -from ..types import Citation, SearchResult, WebSearchResponse -from . import register_provider - - -class SearXNGAPIError(Exception): - """SearXNG API error""" - - pass - - -@register_provider("searxng") -class SearXNGProvider(BaseSearchProvider): - """SearXNG metasearch engine provider""" - - display_name = "SearXNG" - description = "Privacy-focused metasearch engine" - api_key_env_var = "SEARXNG_BASE_URL" - requires_api_key = False - supports_answer = False - - DEFAULT_BASE_URL = "http://192.168.1.90:8888" - - def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: - """ - Initialize SearXNG provider. - - Args: - api_key: Not used (SearXNG doesn't require API key). - **kwargs: Additional configuration options. - """ - super().__init__(api_key=api_key, **kwargs) - self.base_url = ( - kwargs.get("base_url") - or os.environ.get("SEARXNG_BASE_URL") - or self.DEFAULT_BASE_URL - ).rstrip("/") - - DEFAULT_ENGINES = "brave,bing,wikipedia,wikidata,wikinews" - DEFAULT_CATEGORIES = "general" - - def search( - self, - query: str, - categories: str | None = None, - engines: str | None = None, - language: str | None = None, - time_range: str | None = None, - page: int = 1, - safesearch: int = 1, - timeout: int = 30, - **kwargs: Any, - ) -> WebSearchResponse: - """ - Perform search using SearXNG API. - - Args: - query: Search query. - categories: Comma-separated categories (e.g., 'general,science'). - engines: Comma-separated engines (e.g., 'google,duckduckgo'). - language: Language code (e.g., "en", "es"); omit or use "auto" for defaults. - time_range: Time filter ('day', 'month', 'year'). - page: Page number (default 1). - safesearch: Safe search level (0, 1, 2). - timeout: Request timeout in seconds. - **kwargs: Additional SearXNG parameters. - - Returns: - WebSearchResponse: Standardized search response. - """ - effective_engines = engines or os.environ.get("SEARXNG_ENGINES") or self.DEFAULT_ENGINES - effective_categories = categories or os.environ.get("SEARXNG_CATEGORIES") or self.DEFAULT_CATEGORIES - effective_language = None if not language or language == "auto" else language - self.logger.info( - f"[SearXNG] Request: base_url={self.base_url}, language={effective_language or 'auto'}, " - f"categories={effective_categories}, engines={effective_engines}" - ) - - params: dict[str, Any] = { - "q": query, - "format": "json", - "pageno": page, - "safesearch": safesearch, - } - - if effective_language: - params["language"] = effective_language - - if effective_categories: - params["categories"] = effective_categories - - if effective_engines: - params["engines"] = effective_engines - if time_range: - params["time_range"] = time_range - - params.update(kwargs) - - search_endpoint = f"{self.base_url}/search" - - headers = { - "Accept": "application/json", - "User-Agent": "DeepTutor/1.0 (SearXNG API Client)", - } - if effective_language: - headers["Accept-Language"] = effective_language - - self.logger.info(f"[SearXNG] Endpoint: {search_endpoint}") - self.logger.info(f"[SearXNG] Query params: {params}") - - try: - response = requests.get( - search_endpoint, - params=params, - headers=headers, - timeout=timeout, - ) - self.logger.info(f"[SearXNG] Request URL: {response.url}") - except requests.exceptions.RequestException as e: - self.logger.error(f"SearXNG request failed: {e}") - raise SearXNGAPIError(f"SearXNG request failed: {e}") from e - - if response.status_code == 403: - self.logger.error( - "SearXNG returned 403 Forbidden. " - "JSON format must be enabled in SearXNG settings.yml: " - "search.formats: [html, json]" - ) - raise SearXNGAPIError( - "SearXNG API returned 403 Forbidden. " - "Ensure JSON format is enabled in your SearXNG instance settings.yml: " - "search:\n formats:\n - html\n - json" - ) - - if response.status_code != 200: - self.logger.error(f"SearXNG API error: {response.status_code} - {response.text}") - raise SearXNGAPIError( - f"SearXNG API error: {response.status_code} - {response.text}" - ) - - data = response.json() - - self.logger.info(f"[SearXNG] Response status: {response.status_code}") - self.logger.info(f"[SearXNG] Response keys: {list(data.keys())}") - self.logger.info(f"[SearXNG] Results count: {len(data.get('results', []))}") - self.logger.info(f"[SearXNG] Answers count: {len(data.get('answers', []))}") - self.logger.info(f"[SearXNG] Suggestions: {data.get('suggestions', [])}") - self.logger.info(f"[SearXNG] Corrections: {data.get('corrections', [])}") - self.logger.info(f"[SearXNG] Infoboxes count: {len(data.get('infoboxes', []))}") - - unresponsive = data.get("unresponsive_engines", []) - if unresponsive: - self.logger.warning(f"[SearXNG] Unresponsive engines: {unresponsive}") - - if data.get('results'): - engine_counts: dict[str, int] = {} - for r in data['results']: - eng = r.get('engine', 'unknown') - engine_counts[eng] = engine_counts.get(eng, 0) + 1 - self.logger.info(f"[SearXNG] Results by engine: {engine_counts}") - self.logger.info(f"[SearXNG] First result: {data['results'][0]}") - elif unresponsive: - engine_errors = ", ".join([f"{e[0]}({e[1]})" for e in unresponsive]) - self.logger.error( - f"[SearXNG] No results - all engines failed: {engine_errors}. " - "Configure working engines in SearXNG settings.yml (brave, bing, wikipedia, wikidata, arxiv)" - ) - else: - self.logger.warning(f"[SearXNG] No results returned. Full response: {data}") - - citations: list[Citation] = [] - search_results: list[SearchResult] = [] - - for i, result in enumerate(data.get("results", []), 1): - title = result.get("title", "") - url = result.get("url", "") - snippet = result.get("content", "") - date = result.get("publishedDate", "") - engine = result.get("engine", "") - category = result.get("category", "web") - score = result.get("score", 0.0) - - attributes: dict[str, Any] = {} - if result.get("img_src"): - attributes["img_src"] = result["img_src"] - if engine: - attributes["engine"] = engine - - sr = SearchResult( - title=title, - url=url, - snippet=snippet, - date=date, - source=engine, - score=score, - attributes=attributes, - ) - search_results.append(sr) - - citations.append( - Citation( - id=i, - reference=f"[{i}]", - url=url, - title=title, - snippet=snippet, - date=date, - source=engine, - type=category, - ) - ) - - raw_answers = data.get("answers", []) - self.logger.info(f"[SearXNG] Raw answers: {raw_answers}") - - answer_texts = [] - for ans in raw_answers: - if isinstance(ans, str): - answer_texts.append(ans) - elif isinstance(ans, dict) and ans.get("content"): - answer_texts.append(ans["content"]) - - answer = "\n\n".join(answer_texts) if answer_texts else "" - self.logger.info(f"[SearXNG] Parsed answer: {answer[:200] if answer else 'None'}") - - if not answer and search_results: - answer = search_results[0].snippet - - metadata: dict[str, Any] = { - "finish_reason": "stop", - "base_url": self.base_url, - "answers": answer_texts, - "infoboxes": data.get("infoboxes", []), - "suggestions": data.get("suggestions", []), - "corrections": data.get("corrections", []), - } - - self.logger.info(f"[SearXNG] Final results: {len(search_results)} search_results, {len(citations)} citations") - self.logger.info(f"[SearXNG] Final answer length: {len(answer)} chars") - - return WebSearchResponse( - query=query, - answer=answer, - provider="searxng", - timestamp=datetime.now().isoformat(), - model="searxng", - citations=citations, - search_results=search_results, - usage={}, - metadata=metadata, - ) - - def is_available(self) -> bool: - """ - Check if SearXNG instance is reachable. - - Returns: - bool: True if instance responds, False otherwise. - """ - try: - response = requests.get(f"{self.base_url}/", timeout=5) - return response.status_code == 200 - except Exception: - return False From 746dcc9854f760a4b9e93612ab43a1443b713a44 Mon Sep 17 00:00:00 2001 From: andres Date: Thu, 15 Jan 2026 14:50:20 -0600 Subject: [PATCH 6/9] "Add SearXNG metasearch provider integration with configuration updates and API support." --- src/services/config/unified_config.py | 2 +- src/services/search/consolidation.py | 8 +- src/services/search/providers/searxng.py | 292 +++++++++++++++++++++ web/app/settings/components/ConfigForm.tsx | 4 +- web/app/settings/constants.ts | 21 +- 5 files changed, 319 insertions(+), 8 deletions(-) create mode 100644 src/services/search/providers/searxng.py diff --git a/src/services/config/unified_config.py b/src/services/config/unified_config.py index f33f2397..8a17992e 100644 --- a/src/services/config/unified_config.py +++ b/src/services/config/unified_config.py @@ -49,7 +49,7 @@ class ConfigType(str, Enum): ], ConfigType.EMBEDDING: ["openai", "azure_openai", "ollama", "jina", "cohere", "huggingface"], ConfigType.TTS: ["openai", "azure_openai"], - ConfigType.SEARCH: ["perplexity", "tavily", "exa", "jina", "serper", "baidu"], + ConfigType.SEARCH: ["perplexity", "tavily", "exa", "jina", "serper", "baidu", "searxng"], } # Environment variable mappings for each service type diff --git a/src/services/search/consolidation.py b/src/services/search/consolidation.py index 8ae834cb..021712fb 100644 --- a/src/services/search/consolidation.py +++ b/src/services/search/consolidation.py @@ -387,9 +387,11 @@ def _consolidate_with_template(self, response: WebSearchResponse) -> str: f"[Consolidation] Context: {len(context.get('results', []))} results, " f"{len(context.get('citations', []))} citations, max_results={context.get('max_results')}" ) - if context.get('results'): - first_result = context['results'][0] - _logger.info(f"[Consolidation] First result in context: title='{first_result.get('title', '')[:50]}', snippet='{first_result.get('snippet', '')[:100]}'...") + if context.get("results"): + first_result = context["results"][0] + _logger.info( + f"[Consolidation] First result in context: title='{first_result.get('title', '')[:50]}', snippet='{first_result.get('snippet', '')[:100]}'..." + ) try: rendered = template.render(**context) diff --git a/src/services/search/providers/searxng.py b/src/services/search/providers/searxng.py new file mode 100644 index 00000000..19eceb87 --- /dev/null +++ b/src/services/search/providers/searxng.py @@ -0,0 +1,292 @@ +""" +SearXNG Metasearch Provider + +SearXNG is a free, open-source metasearch engine that aggregates results +from multiple search engines while protecting user privacy. + +Features: +- Privacy-focused: No API keys required for self-hosted instances +- Configurable: Choose specific engines and categories +- Free: No per-query costs +- Self-hosted: Full control over search sources + +Configuration: +- SEARXNG_BASE_URL: Base URL of SearXNG instance (default: http://localhost:8888) + +Note: JSON format must be enabled in SearXNG settings.yml: + search: + formats: + - html + - json +""" + +from datetime import datetime +import os +from typing import Any + +import requests + +from ..base import BaseSearchProvider +from ..types import Citation, SearchResult, WebSearchResponse +from . import register_provider + + +class SearXNGAPIError(Exception): + """SearXNG API error""" + + pass + + +@register_provider("searxng") +class SearXNGProvider(BaseSearchProvider): + """SearXNG metasearch engine provider""" + + display_name = "SearXNG" + description = "Privacy-focused metasearch engine" + api_key_env_var = "SEARXNG_BASE_URL" + requires_api_key = False + supports_answer = False + + DEFAULT_BASE_URL = "http://192.168.1.90:8888" + + def __init__(self, api_key: str | None = None, **kwargs: Any) -> None: + """ + Initialize SearXNG provider. + + Args: + api_key: Not used (SearXNG doesn't require API key). + **kwargs: Additional configuration options. + """ + super().__init__(api_key=api_key, **kwargs) + self.base_url = ( + kwargs.get("base_url") or os.environ.get("SEARXNG_BASE_URL") or self.DEFAULT_BASE_URL + ).rstrip("/") + + DEFAULT_ENGINES = "brave,bing,wikipedia,wikidata,wikinews" + DEFAULT_CATEGORIES = "general" + + def search( + self, + query: str, + categories: str | None = None, + engines: str | None = None, + language: str | None = None, + time_range: str | None = None, + page: int = 1, + safesearch: int = 1, + timeout: int = 30, + **kwargs: Any, + ) -> WebSearchResponse: + """ + Perform search using SearXNG API. + + Args: + query: Search query. + categories: Comma-separated categories (e.g., 'general,science'). + engines: Comma-separated engines (e.g., 'google,duckduckgo'). + language: Language code (e.g., "en", "es"); omit or use "auto" for defaults. + time_range: Time filter ('day', 'month', 'year'). + page: Page number (default 1). + safesearch: Safe search level (0, 1, 2). + timeout: Request timeout in seconds. + **kwargs: Additional SearXNG parameters. + + Returns: + WebSearchResponse: Standardized search response. + """ + effective_engines = engines or os.environ.get("SEARXNG_ENGINES") or self.DEFAULT_ENGINES + effective_categories = ( + categories or os.environ.get("SEARXNG_CATEGORIES") or self.DEFAULT_CATEGORIES + ) + effective_language = None if not language or language == "auto" else language + self.logger.info( + f"[SearXNG] Request: base_url={self.base_url}, language={effective_language or 'auto'}, " + f"categories={effective_categories}, engines={effective_engines}" + ) + + params: dict[str, Any] = { + "q": query, + "format": "json", + "pageno": page, + "safesearch": safesearch, + } + + if effective_language: + params["language"] = effective_language + + if effective_categories: + params["categories"] = effective_categories + + if effective_engines: + params["engines"] = effective_engines + if time_range: + params["time_range"] = time_range + + params.update(kwargs) + + search_endpoint = f"{self.base_url}/search" + + headers = { + "Accept": "application/json", + "User-Agent": "DeepTutor/1.0 (SearXNG API Client)", + } + if effective_language: + headers["Accept-Language"] = effective_language + + self.logger.info(f"[SearXNG] Endpoint: {search_endpoint}") + self.logger.info(f"[SearXNG] Query params: {params}") + + try: + response = requests.get( + search_endpoint, + params=params, + headers=headers, + timeout=timeout, + ) + self.logger.info(f"[SearXNG] Request URL: {response.url}") + except requests.exceptions.RequestException as e: + self.logger.error(f"SearXNG request failed: {e}") + raise SearXNGAPIError(f"SearXNG request failed: {e}") from e + + if response.status_code == 403: + self.logger.error( + "SearXNG returned 403 Forbidden. " + "JSON format must be enabled in SearXNG settings.yml: " + "search.formats: [html, json]" + ) + raise SearXNGAPIError( + "SearXNG API returned 403 Forbidden. " + "Ensure JSON format is enabled in your SearXNG instance settings.yml: " + "search:\n formats:\n - html\n - json" + ) + + if response.status_code != 200: + self.logger.error(f"SearXNG API error: {response.status_code} - {response.text}") + raise SearXNGAPIError(f"SearXNG API error: {response.status_code} - {response.text}") + + data = response.json() + + self.logger.info(f"[SearXNG] Response status: {response.status_code}") + self.logger.info(f"[SearXNG] Response keys: {list(data.keys())}") + self.logger.info(f"[SearXNG] Results count: {len(data.get('results', []))}") + self.logger.info(f"[SearXNG] Answers count: {len(data.get('answers', []))}") + self.logger.info(f"[SearXNG] Suggestions: {data.get('suggestions', [])}") + self.logger.info(f"[SearXNG] Corrections: {data.get('corrections', [])}") + self.logger.info(f"[SearXNG] Infoboxes count: {len(data.get('infoboxes', []))}") + + unresponsive = data.get("unresponsive_engines", []) + if unresponsive: + self.logger.warning(f"[SearXNG] Unresponsive engines: {unresponsive}") + + if data.get("results"): + engine_counts: dict[str, int] = {} + for r in data["results"]: + eng = r.get("engine", "unknown") + engine_counts[eng] = engine_counts.get(eng, 0) + 1 + self.logger.info(f"[SearXNG] Results by engine: {engine_counts}") + self.logger.info(f"[SearXNG] First result: {data['results'][0]}") + elif unresponsive: + engine_errors = ", ".join([f"{e[0]}({e[1]})" for e in unresponsive]) + self.logger.error( + f"[SearXNG] No results - all engines failed: {engine_errors}. " + "Configure working engines in SearXNG settings.yml (brave, bing, wikipedia, wikidata, arxiv)" + ) + else: + self.logger.warning(f"[SearXNG] No results returned. Full response: {data}") + + citations: list[Citation] = [] + search_results: list[SearchResult] = [] + + for i, result in enumerate(data.get("results", []), 1): + title = result.get("title", "") + url = result.get("url", "") + snippet = result.get("content", "") + date = result.get("publishedDate", "") + engine = result.get("engine", "") + category = result.get("category", "web") + score = result.get("score", 0.0) + + attributes: dict[str, Any] = {} + if result.get("img_src"): + attributes["img_src"] = result["img_src"] + if engine: + attributes["engine"] = engine + + sr = SearchResult( + title=title, + url=url, + snippet=snippet, + date=date, + source=engine, + score=score, + attributes=attributes, + ) + search_results.append(sr) + + citations.append( + Citation( + id=i, + reference=f"[{i}]", + url=url, + title=title, + snippet=snippet, + date=date, + source=engine, + type=category, + ) + ) + + raw_answers = data.get("answers", []) + self.logger.info(f"[SearXNG] Raw answers: {raw_answers}") + + answer_texts = [] + for ans in raw_answers: + if isinstance(ans, str): + answer_texts.append(ans) + elif isinstance(ans, dict) and ans.get("content"): + answer_texts.append(ans["content"]) + + answer = "\n\n".join(answer_texts) if answer_texts else "" + self.logger.info(f"[SearXNG] Parsed answer: {answer[:200] if answer else 'None'}") + + if not answer and search_results: + answer = search_results[0].snippet + + metadata: dict[str, Any] = { + "finish_reason": "stop", + "base_url": self.base_url, + "answers": answer_texts, + "infoboxes": data.get("infoboxes", []), + "suggestions": data.get("suggestions", []), + "corrections": data.get("corrections", []), + } + + self.logger.info( + f"[SearXNG] Final results: {len(search_results)} search_results, {len(citations)} citations" + ) + self.logger.info(f"[SearXNG] Final answer length: {len(answer)} chars") + + return WebSearchResponse( + query=query, + answer=answer, + provider="searxng", + timestamp=datetime.now().isoformat(), + model="searxng", + citations=citations, + search_results=search_results, + usage={}, + metadata=metadata, + ) + + def is_available(self) -> bool: + """ + Check if SearXNG instance is reachable. + + Returns: + bool: True if instance responds, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/", timeout=5) + return response.status_code == 200 + except Exception: + return False diff --git a/web/app/settings/components/ConfigForm.tsx b/web/app/settings/components/ConfigForm.tsx index de398cd9..c128b6d4 100644 --- a/web/app/settings/components/ConfigForm.tsx +++ b/web/app/settings/components/ConfigForm.tsx @@ -296,8 +296,8 @@ export default function ConfigForm({ - {/* Base URL (not for search) */} - {!isSearchConfig && ( + {/* Base URL (not for search, except searxng) */} + {(!isSearchConfig || (isSearchConfig && provider === "searxng")) && (