From 018ff1e63bcf30453542fad8708af42eefd23fdb Mon Sep 17 00:00:00 2001 From: vinny Date: Tue, 19 Nov 2024 13:57:49 -0500 Subject: [PATCH 1/5] HARMONY-1929: Convert opendap get data to post --- harmony/harmony.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/harmony/harmony.py b/harmony/harmony.py index 6fbc14a..0e48228 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -36,6 +36,7 @@ from enum import Enum from typing import Any, ContextManager, IO, Iterator, List, Mapping, NamedTuple, Optional, \ Tuple, Generator, Union +from urllib import parse import curlify import dateutil.parser @@ -1308,10 +1309,17 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) print(filename) return filename else: + data_dict = {} + parse_result = parse.urlparse(url) + is_opendap = parse_result.netloc.startswith('opendap') + method = 'post' if is_opendap else 'get' + if is_opendap: # remove the query params from the URL and convert to dict + url = parse.urlunparse(parse_result._replace(query="")) + data_dict = dict(parse.parse_qsl(parse.urlsplit(url).query)) headers = { "Accept-Encoding": "identity" } - with session.get(url, stream=True, headers=headers) as r: + with getattr(session, method)(url, data=data_dict, stream=True, headers=headers) as r: with open(filename, 'wb') as f: shutil.copyfileobj(r.raw, f, length=chunksize) if verbose and verbose.upper() == 'TRUE': From 0332a43d6bef39c471a6dd95da2792aadd77b88b Mon Sep 17 00:00:00 2001 From: vinny Date: Tue, 19 Nov 2024 13:59:15 -0500 Subject: [PATCH 2/5] HARMONY-1929: Init data_dict to None --- harmony/harmony.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmony/harmony.py b/harmony/harmony.py index 0e48228..a8f171f 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -1309,7 +1309,7 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) print(filename) return filename else: - data_dict = {} + data_dict = None parse_result = parse.urlparse(url) is_opendap = parse_result.netloc.startswith('opendap') method = 'post' if is_opendap else 'get' From 3c27921c4f4b2c3880ebcfb61afaff534f65d2aa Mon Sep 17 00:00:00 2001 From: vinny Date: Wed, 20 Nov 2024 08:54:22 -0500 Subject: [PATCH 3/5] HARMONY-1929: test_download_opendap_file --- harmony/harmony.py | 7 +++++-- tests/test_client.py | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/harmony/harmony.py b/harmony/harmony.py index a8f171f..2a68eb5 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -1243,6 +1243,8 @@ def _is_staged_result(self, url: str) -> str: Returns: A boolean indicating whether the data is staged data. """ + if 'harmony' not in url: + return False url_parts = url.split('/') possible_uuid = url_parts[-3] possible_item_id = url_parts[-2] @@ -1266,10 +1268,11 @@ def get_download_filename_from_url(self, url: str) -> str: Returns: The filename that will be used to name the downloaded file. """ - url_parts = url.split('/') + url_no_query = parse.urlunparse(parse.urlparse(url)._replace(query="")) + url_parts = url_no_query.split('/') original_filename = url_parts[-1] - is_staged_result = self._is_staged_result(url) + is_staged_result = self._is_staged_result(url_no_query) if not is_staged_result: return original_filename item_id = url_parts[-2] diff --git a/tests/test_client.py b/tests/test_client.py index e08459f..d9a87a3 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1007,6 +1007,29 @@ def test_download_file(overwrite): if not overwrite: os.unlink(expected_filename) +def test_download_opendap_file(): + expected_data = bytes('abcde', encoding='utf-8') + expected_filename = 'SC:ATL03.006:264549068' + query = '?dap4.ce=/ds_surf_type[0:1:4]' + path = 'https://opendap.uat.earthdata.nasa.gov/collections/C1261703111-EEDTEST/granules/' + url = path + expected_filename + query + actual_output = None + + with io.BytesIO() as file_obj: + file_obj.write(expected_data) + file_obj.seek(0) + with responses.RequestsMock() as resp_mock: + resp_mock.add(responses.POST, path + expected_filename, body=file_obj.read(), stream=True) + client = Client(should_validate_auth=False) + actual_output = client._download_file(url, overwrite=False) + + # TODO assert POST body params are as expected + + assert actual_output == expected_filename + with open(expected_filename, 'rb') as temp_file: + data = temp_file.read() + assert data == expected_data + os.unlink(actual_output) def test_download_all(mocker): expected_urls = [ From 9d6f6852f6d5e5f1401e630ce5d6e8617897a37f Mon Sep 17 00:00:00 2001 From: vinny Date: Thu, 21 Nov 2024 08:15:46 -0500 Subject: [PATCH 4/5] HARMONY-1929: Test that POST body matches opendap query --- harmony/harmony.py | 5 +++-- tests/test_client.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/harmony/harmony.py b/harmony/harmony.py index 2a68eb5..02f77c8 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -1302,6 +1302,7 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) chunksize = int(self.config.DOWNLOAD_CHUNK_SIZE) session = self._session() filename = self.get_download_filename_from_url(url) + new_url = url if directory: filename = os.path.join(directory, filename) @@ -1317,12 +1318,12 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) is_opendap = parse_result.netloc.startswith('opendap') method = 'post' if is_opendap else 'get' if is_opendap: # remove the query params from the URL and convert to dict - url = parse.urlunparse(parse_result._replace(query="")) + new_url = parse.urlunparse(parse_result._replace(query="")) data_dict = dict(parse.parse_qsl(parse.urlsplit(url).query)) headers = { "Accept-Encoding": "identity" } - with getattr(session, method)(url, data=data_dict, stream=True, headers=headers) as r: + with getattr(session, method)(new_url, data=data_dict, stream=True, headers=headers) as r: with open(filename, 'wb') as f: shutil.copyfileobj(r.raw, f, length=chunksize) if verbose and verbose.upper() == 'TRUE': diff --git a/tests/test_client.py b/tests/test_client.py index d9a87a3..da8de54 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1019,16 +1019,16 @@ def test_download_opendap_file(): file_obj.write(expected_data) file_obj.seek(0) with responses.RequestsMock() as resp_mock: - resp_mock.add(responses.POST, path + expected_filename, body=file_obj.read(), stream=True) + resp_mock.add(responses.POST, path + expected_filename, body=file_obj.read(), stream=True, + match=[responses.matchers.urlencoded_params_matcher({"dap4.ce": "/ds_surf_type[0:1:4]"})]) client = Client(should_validate_auth=False) actual_output = client._download_file(url, overwrite=False) - # TODO assert POST body params are as expected - assert actual_output == expected_filename with open(expected_filename, 'rb') as temp_file: data = temp_file.read() assert data == expected_data + os.unlink(actual_output) def test_download_all(mocker): From a19fcd0b614a35d3b8bd8d1dd50bbf06bef7da99 Mon Sep 17 00:00:00 2001 From: vinny Date: Thu, 21 Nov 2024 08:17:55 -0500 Subject: [PATCH 5/5] HARMONY-1929: Lint cleanup --- harmony/harmony.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/harmony/harmony.py b/harmony/harmony.py index 02f77c8..9f88040 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -1317,13 +1317,14 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) parse_result = parse.urlparse(url) is_opendap = parse_result.netloc.startswith('opendap') method = 'post' if is_opendap else 'get' - if is_opendap: # remove the query params from the URL and convert to dict + if is_opendap: # remove the query params from the URL and convert to dict new_url = parse.urlunparse(parse_result._replace(query="")) data_dict = dict(parse.parse_qsl(parse.urlsplit(url).query)) headers = { "Accept-Encoding": "identity" } - with getattr(session, method)(new_url, data=data_dict, stream=True, headers=headers) as r: + with getattr(session, method)( + new_url, data=data_dict, stream=True, headers=headers) as r: with open(filename, 'wb') as f: shutil.copyfileobj(r.raw, f, length=chunksize) if verbose and verbose.upper() == 'TRUE':