diff --git a/harmony/harmony.py b/harmony/harmony.py index 6fbc14a..9f88040 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -36,6 +36,7 @@ from enum import Enum from typing import Any, ContextManager, IO, Iterator, List, Mapping, NamedTuple, Optional, \ Tuple, Generator, Union +from urllib import parse import curlify import dateutil.parser @@ -1242,6 +1243,8 @@ def _is_staged_result(self, url: str) -> str: Returns: A boolean indicating whether the data is staged data. """ + if 'harmony' not in url: + return False url_parts = url.split('/') possible_uuid = url_parts[-3] possible_item_id = url_parts[-2] @@ -1265,10 +1268,11 @@ def get_download_filename_from_url(self, url: str) -> str: Returns: The filename that will be used to name the downloaded file. """ - url_parts = url.split('/') + url_no_query = parse.urlunparse(parse.urlparse(url)._replace(query="")) + url_parts = url_no_query.split('/') original_filename = url_parts[-1] - is_staged_result = self._is_staged_result(url) + is_staged_result = self._is_staged_result(url_no_query) if not is_staged_result: return original_filename item_id = url_parts[-2] @@ -1298,6 +1302,7 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) chunksize = int(self.config.DOWNLOAD_CHUNK_SIZE) session = self._session() filename = self.get_download_filename_from_url(url) + new_url = url if directory: filename = os.path.join(directory, filename) @@ -1308,10 +1313,18 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) print(filename) return filename else: + data_dict = None + parse_result = parse.urlparse(url) + is_opendap = parse_result.netloc.startswith('opendap') + method = 'post' if is_opendap else 'get' + if is_opendap: # remove the query params from the URL and convert to dict + new_url = parse.urlunparse(parse_result._replace(query="")) + data_dict = dict(parse.parse_qsl(parse.urlsplit(url).query)) headers = { "Accept-Encoding": "identity" } - with session.get(url, stream=True, headers=headers) as r: + with getattr(session, method)( + new_url, data=data_dict, stream=True, headers=headers) as r: with open(filename, 'wb') as f: shutil.copyfileobj(r.raw, f, length=chunksize) if verbose and verbose.upper() == 'TRUE': diff --git a/tests/test_client.py b/tests/test_client.py index e08459f..da8de54 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1007,6 +1007,29 @@ def test_download_file(overwrite): if not overwrite: os.unlink(expected_filename) +def test_download_opendap_file(): + expected_data = bytes('abcde', encoding='utf-8') + expected_filename = 'SC:ATL03.006:264549068' + query = '?dap4.ce=/ds_surf_type[0:1:4]' + path = 'https://opendap.uat.earthdata.nasa.gov/collections/C1261703111-EEDTEST/granules/' + url = path + expected_filename + query + actual_output = None + + with io.BytesIO() as file_obj: + file_obj.write(expected_data) + file_obj.seek(0) + with responses.RequestsMock() as resp_mock: + resp_mock.add(responses.POST, path + expected_filename, body=file_obj.read(), stream=True, + match=[responses.matchers.urlencoded_params_matcher({"dap4.ce": "/ds_surf_type[0:1:4]"})]) + client = Client(should_validate_auth=False) + actual_output = client._download_file(url, overwrite=False) + + assert actual_output == expected_filename + with open(expected_filename, 'rb') as temp_file: + data = temp_file.read() + assert data == expected_data + + os.unlink(actual_output) def test_download_all(mocker): expected_urls = [