From 35290e521475a39b0e02275dfb90a3251c3e0922 Mon Sep 17 00:00:00 2001 From: Lizhen You Date: Mon, 23 Dec 2024 23:40:18 -0800 Subject: [PATCH] Add User-Agent header to download_dataset function Fix the HTTP Error 403 by using urlretrieve() Signed-off-by: Lizhen You --- python/cuvs_bench/cuvs_bench/get_dataset/__main__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py b/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py index a6b154ef2..f26a24a40 100644 --- a/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py +++ b/python/cuvs_bench/cuvs_bench/get_dataset/__main__.py @@ -17,7 +17,7 @@ import os import subprocess import sys -from urllib.request import urlretrieve +import urllib.request def get_dataset_path(name, ann_bench_data_path): @@ -29,7 +29,10 @@ def get_dataset_path(name, ann_bench_data_path): def download_dataset(url, path): if not os.path.exists(path): print(f"downloading {url} -> {path}...") - urlretrieve(url, path) + req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"}) + with urllib.request.urlopen(req) as response, open(path, 'wb') as out_file: + data = response.read() + out_file.write(data) def convert_hdf5_to_fbin(path, normalize):