Skip to content

Commit

Permalink
Merge pull request #97 from nasa/HARMONY-1929
Browse files Browse the repository at this point in the history
Harmony 1929 - Ensure OPeNDAP downloads use POST
  • Loading branch information
vinnyinverso authored Nov 22, 2024
2 parents 5d218d5 + a19fcd0 commit 26f014e
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
19 changes: 16 additions & 3 deletions harmony/harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from enum import Enum
from typing import Any, ContextManager, IO, Iterator, List, Mapping, NamedTuple, Optional, \
Tuple, Generator, Union
from urllib import parse

import curlify
import dateutil.parser
Expand Down Expand Up @@ -1242,6 +1243,8 @@ def _is_staged_result(self, url: str) -> str:
Returns:
A boolean indicating whether the data is staged data.
"""
if 'harmony' not in url:
return False
url_parts = url.split('/')
possible_uuid = url_parts[-3]
possible_item_id = url_parts[-2]
Expand All @@ -1265,10 +1268,11 @@ def get_download_filename_from_url(self, url: str) -> str:
Returns:
The filename that will be used to name the downloaded file.
"""
url_parts = url.split('/')
url_no_query = parse.urlunparse(parse.urlparse(url)._replace(query=""))
url_parts = url_no_query.split('/')
original_filename = url_parts[-1]

is_staged_result = self._is_staged_result(url)
is_staged_result = self._is_staged_result(url_no_query)
if not is_staged_result:
return original_filename
item_id = url_parts[-2]
Expand Down Expand Up @@ -1298,6 +1302,7 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False)
chunksize = int(self.config.DOWNLOAD_CHUNK_SIZE)
session = self._session()
filename = self.get_download_filename_from_url(url)
new_url = url

if directory:
filename = os.path.join(directory, filename)
Expand All @@ -1308,10 +1313,18 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False)
print(filename)
return filename
else:
data_dict = None
parse_result = parse.urlparse(url)
is_opendap = parse_result.netloc.startswith('opendap')
method = 'post' if is_opendap else 'get'
if is_opendap: # remove the query params from the URL and convert to dict
new_url = parse.urlunparse(parse_result._replace(query=""))
data_dict = dict(parse.parse_qsl(parse.urlsplit(url).query))
headers = {
"Accept-Encoding": "identity"
}
with session.get(url, stream=True, headers=headers) as r:
with getattr(session, method)(
new_url, data=data_dict, stream=True, headers=headers) as r:
with open(filename, 'wb') as f:
shutil.copyfileobj(r.raw, f, length=chunksize)
if verbose and verbose.upper() == 'TRUE':
Expand Down
23 changes: 23 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,29 @@ def test_download_file(overwrite):
if not overwrite:
os.unlink(expected_filename)

def test_download_opendap_file():
expected_data = bytes('abcde', encoding='utf-8')
expected_filename = 'SC:ATL03.006:264549068'
query = '?dap4.ce=/ds_surf_type[0:1:4]'
path = 'https://opendap.uat.earthdata.nasa.gov/collections/C1261703111-EEDTEST/granules/'
url = path + expected_filename + query
actual_output = None

with io.BytesIO() as file_obj:
file_obj.write(expected_data)
file_obj.seek(0)
with responses.RequestsMock() as resp_mock:
resp_mock.add(responses.POST, path + expected_filename, body=file_obj.read(), stream=True,
match=[responses.matchers.urlencoded_params_matcher({"dap4.ce": "/ds_surf_type[0:1:4]"})])
client = Client(should_validate_auth=False)
actual_output = client._download_file(url, overwrite=False)

assert actual_output == expected_filename
with open(expected_filename, 'rb') as temp_file:
data = temp_file.read()
assert data == expected_data

os.unlink(actual_output)

def test_download_all(mocker):
expected_urls = [
Expand Down

0 comments on commit 26f014e

Please sign in to comment.