Merge pull request #97 from nasa/HARMONY-1929

Harmony 1929 - Ensure OPeNDAP downloads use POST
nasa · Nov 22, 2024 · 26f014e · 26f014e
2 parents 5d218d5 + a19fcd0
commit 26f014e
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 3 deletions.
diff --git a/harmony/harmony.py b/harmony/harmony.py
@@ -36,6 +36,7 @@
 from enum import Enum
 from typing import Any, ContextManager, IO, Iterator, List, Mapping, NamedTuple, Optional, \
     Tuple, Generator, Union
+from urllib import parse
 
 import curlify
 import dateutil.parser
@@ -1242,6 +1243,8 @@ def _is_staged_result(self, url: str) -> str:
         Returns:
             A boolean indicating whether the data is staged data.
         """
+        if 'harmony' not in url:
+            return False
         url_parts = url.split('/')
         possible_uuid = url_parts[-3]
         possible_item_id = url_parts[-2]
@@ -1265,10 +1268,11 @@ def get_download_filename_from_url(self, url: str) -> str:
         Returns:
             The filename that will be used to name the downloaded file.
         """
-        url_parts = url.split('/')
+        url_no_query = parse.urlunparse(parse.urlparse(url)._replace(query=""))
+        url_parts = url_no_query.split('/')
         original_filename = url_parts[-1]
 
-        is_staged_result = self._is_staged_result(url)
+        is_staged_result = self._is_staged_result(url_no_query)
         if not is_staged_result:
             return original_filename
         item_id = url_parts[-2]
@@ -1298,6 +1302,7 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False)
         chunksize = int(self.config.DOWNLOAD_CHUNK_SIZE)
         session = self._session()
         filename = self.get_download_filename_from_url(url)
+        new_url = url
 
         if directory:
             filename = os.path.join(directory, filename)
@@ -1308,10 +1313,18 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False)
                 print(filename)
             return filename
         else:
+            data_dict = None
+            parse_result = parse.urlparse(url)
+            is_opendap = parse_result.netloc.startswith('opendap')
+            method = 'post' if is_opendap else 'get'
+            if is_opendap:  # remove the query params from the URL and convert to dict
+                new_url = parse.urlunparse(parse_result._replace(query=""))
+                data_dict = dict(parse.parse_qsl(parse.urlsplit(url).query))
             headers = {
                 "Accept-Encoding": "identity"
             }
-            with session.get(url, stream=True, headers=headers) as r:
+            with getattr(session, method)(
+                    new_url, data=data_dict, stream=True, headers=headers) as r:
                 with open(filename, 'wb') as f:
                     shutil.copyfileobj(r.raw, f, length=chunksize)
             if verbose and verbose.upper() == 'TRUE':

diff --git a/tests/test_client.py b/tests/test_client.py
@@ -1007,6 +1007,29 @@ def test_download_file(overwrite):
     if not overwrite:
         os.unlink(expected_filename)
 
+def test_download_opendap_file():
+    expected_data = bytes('abcde', encoding='utf-8')
+    expected_filename = 'SC:ATL03.006:264549068'
+    query = '?dap4.ce=/ds_surf_type[0:1:4]'
+    path = 'https://opendap.uat.earthdata.nasa.gov/collections/C1261703111-EEDTEST/granules/'
+    url = path + expected_filename + query
+    actual_output = None
+
+    with io.BytesIO() as file_obj:
+        file_obj.write(expected_data)
+        file_obj.seek(0)
+        with responses.RequestsMock() as resp_mock:
+            resp_mock.add(responses.POST, path + expected_filename, body=file_obj.read(), stream=True,
+                match=[responses.matchers.urlencoded_params_matcher({"dap4.ce": "/ds_surf_type[0:1:4]"})])
+            client = Client(should_validate_auth=False)
+            actual_output = client._download_file(url, overwrite=False)
+
+    assert actual_output == expected_filename
+    with open(expected_filename, 'rb') as temp_file:
+        data = temp_file.read()
+        assert data == expected_data
+
+    os.unlink(actual_output)
 
 def test_download_all(mocker):
     expected_urls = [