Skip to content

Commit

Permalink
quote url and use linkSite to improve accuracy
Browse files Browse the repository at this point in the history
  • Loading branch information
TimidRobot committed Dec 22, 2024
1 parent 4c6dbd7 commit 5626c40
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions scripts/1-fetch/gcs_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,16 @@ def query_gcs(args, service, last_completed_plan_index, plan):
for plan_row in plan[start:stop]: # noqa: E203
index = plan.index(plan_row) + 1
query_info = f"index: {index}, tool: {plan_row['TOOL_IDENTIFIER']}"
encoded_tool_url = urllib.parse.quote(plan_row["TOOL_URL"], safe=":/")
query_params = {"cx": GCS_CX, "q": encoded_tool_url}
# Note that the URL is quoted, which improves accuracy
# https://blog.google/products/search/how-were-improving-search-results-when-you-use-quotes/
encoded_tool_url = urllib.parse.quote(
f'"{plan_row["TOOL_URL"]}"', safe=":/"
)
query_params = {
"cx": GCS_CX,
"linkSite": plan_row["TOOL_URL"].lstrip("/"),
"q": encoded_tool_url,
}
if plan_row["COUNTRY"]:
query_info = f"{query_info}, country: {plan_row['COUNTRY']}"
query_params["cr"] = plan_row["CR"]
Expand Down

0 comments on commit 5626c40

Please sign in to comment.