From bbb48fa69318378d3257244fd7ea1a9794e3954d Mon Sep 17 00:00:00 2001 From: Rashad Philizaire Date: Fri, 31 May 2024 16:46:33 -0400 Subject: [PATCH] added bing search --- README.md | 13 +++++-- src/backend/search/providers/bing.py | 54 ++++++++++++++++++++++++++++ src/backend/search/search_service.py | 15 +++++++- 3 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 src/backend/search/providers/bing.py diff --git a/README.md b/README.md index b1d6c99..5e5d7ee 100644 --- a/README.md +++ b/README.md @@ -35,14 +35,14 @@ Please feel free to contact me on [Twitter](https://twitter.com/rashadphz) or [c - Frontend: [Next.js](https://nextjs.org/) - Backend: [FastAPI](https://fastapi.tiangolo.com/) -- Search API: [SearXNG](https://github.com/searxng/searxng), [Tavily](https://tavily.com/), [Serper](https://serper.dev/) +- Search API: [SearXNG](https://github.com/searxng/searxng), [Tavily](https://tavily.com/), [Serper](https://serper.dev/), [Bing](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) - Logging: [Logfire](https://pydantic.dev/logfire) - Rate Limiting: [Redis](https://redis.io/) - Components: [shadcn/ui](https://ui.shadcn.com/) ## Features -- Search with multiple search providers (Tavily, Searxng, Serper) +- Search with multiple search providers (Tavily, Searxng, Serper, Bing) - Answer questions with cloud models (OpenAI/gpt4-o, OpenAI/gpt3.5-turbo, Groq/Llama3) - Answer questions with local models (llama3, mistral, gemma, phi3) @@ -60,6 +60,7 @@ Please feel free to contact me on [Twitter](https://twitter.com/rashadphz) or [c - [Tavily (Optional)](https://app.tavily.com/home) - [Serper (Optional)](https://serper.dev/dashboard) - [OpenAI (Optional)](https://platform.openai.com/api-keys) +- [Bing (Optional)](https://www.microsoft.com/en-us/bing/apis/bing-web-search-api) - [Groq (Optional)](https://console.groq.com/keys) ### 1. Clone the Repo @@ -77,7 +78,7 @@ touch .env Add the following variables to the .env file: #### Search Provider -You can use Tavily, Searxng, or Serper as the search provider. +You can use Tavily, Searxng, Serper, or Bing as the search provider. **Searxng** (No API Key Required) ``` @@ -95,6 +96,12 @@ SERPER_API_KEY=... SEARCH_PROVIDER=serper ``` +**Bing** (Requires API Key) +``` +BING_API_KEY=... +SEARCH_PROVIDER=bing +``` + #### Optional ``` diff --git a/src/backend/search/providers/bing.py b/src/backend/search/providers/bing.py new file mode 100644 index 0000000..716d931 --- /dev/null +++ b/src/backend/search/providers/bing.py @@ -0,0 +1,54 @@ +import asyncio + +import httpx + +from backend.schemas import SearchResponse, SearchResult +from backend.search.providers.base import SearchProvider + + +class BingSearchProvider(SearchProvider): + def __init__(self, api_key: str): + self.host = "https://api.bing.microsoft.com/v7.0" + self.headers = { + "Ocp-Apim-Subscription-Key": api_key, + "Content-Type": "application/json", + } + + async def search(self, query: str) -> SearchResponse: + async with httpx.AsyncClient() as client: + link_results, image_results = await asyncio.gather( + self.get_link_results(client, query), + self.get_image_results(client, query), + ) + + return SearchResponse(results=link_results, images=image_results) + + async def get_link_results( + self, client: httpx.AsyncClient, query: str, num_results: int = 6 + ) -> list[SearchResult]: + response = await client.get( + f"{self.host}/search", + headers=self.headers, + params={"q": query, "count": num_results}, + ) + results = response.json() + + return [ + SearchResult( + title=result["name"], + url=result["url"], + content=result["snippet"], + ) + for result in results["webPages"]["value"][:num_results] + ] + + async def get_image_results( + self, client: httpx.AsyncClient, query: str, num_results: int = 5 + ) -> list[str]: + response = await client.get( + f"{self.host}/images/search", + headers=self.headers, + params={"q": query, "count": num_results}, + ) + results = response.json() + return [result["contentUrl"] for result in results["value"][:num_results]] diff --git a/src/backend/search/search_service.py b/src/backend/search/search_service.py index 48bb004..504bb9e 100644 --- a/src/backend/search/search_service.py +++ b/src/backend/search/search_service.py @@ -7,6 +7,7 @@ from backend.schemas import SearchResponse from backend.search.providers.base import SearchProvider +from backend.search.providers.bing import BingSearchProvider from backend.search.providers.searxng import SearxngSearchProvider from backend.search.providers.serper import SerperSearchProvider from backend.search.providers.tavily import TavilySearchProvider @@ -48,9 +49,18 @@ def get_serper_api_key(): return serper_api_key +def get_bing_api_key(): + bing_api_key = os.getenv("BING_API_KEY") + if not bing_api_key: + raise HTTPException( + status_code=500, + detail="Bing API key is not set in the environment variables. Please set the BING_API_KEY environment variable or set SEARCH_PROVIDER to 'searxng', 'tavily', or 'serper'.", + ) + return bing_api_key + + def get_search_provider() -> SearchProvider: search_provider = os.getenv("SEARCH_PROVIDER", "tavily") - print(f"Search provider: {search_provider}") match search_provider: case "searxng": @@ -62,6 +72,9 @@ def get_search_provider() -> SearchProvider: case "serper": serper_api_key = get_serper_api_key() return SerperSearchProvider(serper_api_key) + case "bing": + bing_api_key = get_bing_api_key() + return BingSearchProvider(bing_api_key) case _: raise HTTPException( status_code=500,