From ac85053630ffab1fdb1e45fe8f8adb416d6ecac8 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 17 Sep 2025 14:14:19 -0600 Subject: [PATCH 1/5] Add `PatchedSerpApiClient` --- elm/web/search/base.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/elm/web/search/base.py b/elm/web/search/base.py index add72d49..7b61e595 100644 --- a/elm/web/search/base.py +++ b/elm/web/search/base.py @@ -4,10 +4,12 @@ import random import asyncio import logging +import requests from urllib.parse import quote from abc import ABC, abstractmethod from contextlib import asynccontextmanager +from serpapi.serp_api_client import SerpApiClient from rebrowser_playwright.async_api import async_playwright from playwright_stealth import StealthConfig @@ -286,6 +288,38 @@ def __init__(self, api_key=None): self.api_key = api_key or os.environ.get(self.API_KEY_VAR or "") +class PatchedSerpApiClient(SerpApiClient): + """SerpApiClient patched to allow bypassing of SSL verification""" + + def __init__(self, params_dict, engine=None, timeout=60000, verify=True): + super().__init__(params_dict=params_dict, engine=engine, + timeout=timeout) + self.verify = verify + + def get_response(self, path='/search'): + """Get search response + + Parameters + ---------- + path : str, default='/search' + API path to use for the search. + + Returns + ------- + Response object provided by ``requests.get``. + """ + url = None + try: + url, parameter = self.construct_url(path) + response = requests.get(url, parameter, timeout=self.timeout, + verify=self.verify) + return response + except requests.HTTPError as e: + logger.error("fail: " + url) + logger.error(e, e.response.status_code) + raise e + + async def _navigate_to_se_url(page, se_url, timeout=90_000): """Navigate to search engine url""" await page.goto(se_url) From 239191907f69c56ae933580b43793416b4c4d0aa Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 17 Sep 2025 14:16:27 -0600 Subject: [PATCH 2/5] Add `SerpAPIGoogleSearch` --- elm/web/search/google.py | 41 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/elm/web/search/google.py b/elm/web/search/google.py index 525a6ca4..7214103c 100644 --- a/elm/web/search/google.py +++ b/elm/web/search/google.py @@ -13,7 +13,8 @@ from playwright.async_api import TimeoutError as PlaywrightTimeoutError from elm.web.search.base import (PlaywrightSearchEngineLinkSearch, - APISearchEngineLinkSearch) + APISearchEngineLinkSearch, + PatchedSerpApiClient) logger = logging.getLogger(__name__) @@ -239,6 +240,44 @@ async def _search(self, query, num_results=10): return list(filter(None, (info.get("link") for info in results))) +class SerpAPIGoogleSearch(APISearchEngineLinkSearch): + """Search the google for links using the SerpAPI service""" + + _SE_NAME = "SerpAPI (Google)" + + API_KEY_VAR = "SERPAPI_KEY" + """Environment variable that should contain the SerpAPI key""" + + def __init__(self, api_key=None, verify=False): + """ + + Parameters + ---------- + api_key : str, optional + API key for serper search API. If ``None``, will look up the + API key using the ``"SERPAPI_KEY"`` environment variable. + By default, ``None``. + verify : bool, default=False + Option to use SSL verification when making request to API + endpoint. By default, ``False``. + """ + super().__init__(api_key=api_key) + self.verify = verify + + async def _search(self, query, num_results=10, **param_kwargs): + """Search web for links related to a query""" + + params = {"q": query, "hl": "en", "gl": "us", "api_key": self.api_key} + params.update(param_kwargs) + + client = PatchedSerpApiClient(params, engine="google", + verify=self.verify) + results = client.get_dict() + results = results.get("organic_results", []) + return list(filter(None, (info.get('link', "").replace("+", "%20") + for info in results)))[:num_results] + + class APISerperSearch(APISearchEngineLinkSearch): """Search the web for links using the Google Serper API""" From bb0132a723fa431bdefa45da32a191d2e74f787c Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 17 Sep 2025 14:17:30 -0600 Subject: [PATCH 3/5] Add `SerpAPIGoogleSearch` as option --- elm/web/search/run.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/elm/web/search/run.py b/elm/web/search/run.py index 79838538..a997dac7 100644 --- a/elm/web/search/run.py +++ b/elm/web/search/run.py @@ -14,6 +14,7 @@ PlaywrightDuckDuckGoLinkSearch) from elm.web.search.dux import DuxDistributedGlobalSearch from elm.web.search.google import (APIGoogleCSESearch, APISerperSearch, + SerpAPIGoogleSearch, CamoufoxGoogleLinkSearch, PlaywrightGoogleCSELinkSearch, PlaywrightGoogleLinkSearch) @@ -34,6 +35,8 @@ "google_cse_api_kwargs"), "APISerperSearch": _SE_OPT(APISerperSearch, False, "google_serper_api_kwargs"), + "SerpAPIGoogleSearch": _SE_OPT(SerpAPIGoogleSearch, False, + "google_serpapi_kwargs"), "APITavilySearch": _SE_OPT(APITavilySearch, False, "tavily_api_kwargs"), "CamoufoxGoogleLinkSearch": _SE_OPT(CamoufoxGoogleLinkSearch, True, "cf_google_se_kwargs"), From 316223bbc7baf4025b5fbaa29d27fc866c3ec300 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 17 Sep 2025 14:19:19 -0600 Subject: [PATCH 4/5] Bump version --- elm/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elm/version.py b/elm/version.py index 9f8fb8d7..30843a73 100644 --- a/elm/version.py +++ b/elm/version.py @@ -2,4 +2,4 @@ ELM version number """ -__version__ = "0.0.25" +__version__ = "0.0.26" From 1313596979529b946063353a120206b7f2ea3c06 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 17 Sep 2025 14:22:17 -0600 Subject: [PATCH 5/5] Add new dep --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index c74c83a2..b65e220a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ crawl4ai ddgs fake_useragent>=2.0.3 google-api-python-client +google-search-results html2text httpx langchain