From 5626c40766a8330a9681efd8e19112d97cceacc9 Mon Sep 17 00:00:00 2001 From: Timid Robot Zehta Date: Sun, 22 Dec 2024 07:21:25 -0800 Subject: [PATCH] quote url and use linkSite to improve accuracy --- scripts/1-fetch/gcs_fetch.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/1-fetch/gcs_fetch.py b/scripts/1-fetch/gcs_fetch.py index 49a2886..031cfd5 100755 --- a/scripts/1-fetch/gcs_fetch.py +++ b/scripts/1-fetch/gcs_fetch.py @@ -203,8 +203,16 @@ def query_gcs(args, service, last_completed_plan_index, plan): for plan_row in plan[start:stop]: # noqa: E203 index = plan.index(plan_row) + 1 query_info = f"index: {index}, tool: {plan_row['TOOL_IDENTIFIER']}" - encoded_tool_url = urllib.parse.quote(plan_row["TOOL_URL"], safe=":/") - query_params = {"cx": GCS_CX, "q": encoded_tool_url} + # Note that the URL is quoted, which improves accuracy + # https://blog.google/products/search/how-were-improving-search-results-when-you-use-quotes/ + encoded_tool_url = urllib.parse.quote( + f'"{plan_row["TOOL_URL"]}"', safe=":/" + ) + query_params = { + "cx": GCS_CX, + "linkSite": plan_row["TOOL_URL"].lstrip("/"), + "q": encoded_tool_url, + } if plan_row["COUNTRY"]: query_info = f"{query_info}, country: {plan_row['COUNTRY']}" query_params["cr"] = plan_row["CR"]