From 89aec7376110aa140c97df350cbc1bcd9f58b17a Mon Sep 17 00:00:00 2001 From: Florian Ziemen Date: Tue, 16 Jul 2024 18:40:31 +0200 Subject: [PATCH] Update to current slk and add tests for slk --- slkspec/core.py | 6 +++--- slkspec/tests/conftest.py | 12 ++++++------ slkspec/tests/verify_slk.py | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 slkspec/tests/verify_slk.py diff --git a/slkspec/core.py b/slkspec/core.py index e430b06..cfc9410 100644 --- a/slkspec/core.py +++ b/slkspec/core.py @@ -147,11 +147,11 @@ def _retrieve_items(self, retrieve_files: list[tuple[str, str]]) -> None: self._mkdirs(output_dir) retrieval_requests.append(inp_file) logger.debug("Creating slk query for %i files", len(retrieve_files)) - search_id = pyslk.search(pyslk.slk_gen_file_query(retrieval_requests)) + search_id = pyslk.search(pyslk.gen_file_query(retrieval_requests)) if search_id is None: raise FileNotFoundError("No files found in archive.") logger.debug("Retrieving files for search id: %i", search_id) - pyslk.slk_retrieve(search_id, str(self.slk_cache), preserve_path=True) + pyslk.retrieve(search_id, str(self.slk_cache), preserve_path=True) logger.debug("Adjusting file permissions") for out_file in retrieval_requests: local_path = self.slk_cache / Path(out_file.strip("/")) @@ -194,7 +194,7 @@ def seek(self, target: int) -> int: # type: ignore self._cache_files() return self._file_obj.seek(target) # type: ignore - def _mkdirs(self, path): + def _mkdirs(self, path : Union[str, Path]) -> None: rp = os.path.realpath(path) if os.access(rp, os.F_OK): if not os.access(rp, os.W_OK): diff --git a/slkspec/tests/conftest.py b/slkspec/tests/conftest.py index 2aba586..15a07f9 100644 --- a/slkspec/tests/conftest.py +++ b/slkspec/tests/conftest.py @@ -38,19 +38,19 @@ def search(self, inp_f: builtins.list[str]) -> int | None: self._cache[hash_value] = inp_f return hash_value - def slk_gen_file_query(self, inp_files: builtins.list[str]) -> builtins.list[str]: + def gen_file_query(self, resources: builtins.list[str], **kwargs) -> builtins.list[str]: """Mock slk_gen_file_qeury.""" - return [f for f in inp_files if Path(f).exists()] + return [f for f in resources if Path(f).exists()] - def slk_retrieve(self, search_id: int, out_dir: str, preserve_path: bool) -> None: + def retrieve(self, resource: int, dest_dir: str, recursive:bool = False, group: Union[bool, None] = None, delayed: bool= False, preserve_path: bool = True, **kwargs) -> None: """Mock slk_retrieve.""" - for inp_file in map(Path, self._cache[search_id]): + for inp_file in map(Path, self._cache[resource]): if preserve_path: - outfile = Path(out_dir) / Path(str(inp_file).strip(inp_file.root)) + outfile = Path(dest_dir) / Path(str(inp_file).strip(inp_file.root)) outfile.parent.mkdir(parents=True, exist_ok=True) shutil.copy(inp_file, outfile) else: - shutil.copy(inp_file, Path(out_dir) / inp_file.name) + shutil.copy(inp_file, Path(dest_dir) / inp_file.name) def create_data(variable_name: str, size: int) -> xr.Dataset: diff --git a/slkspec/tests/verify_slk.py b/slkspec/tests/verify_slk.py new file mode 100644 index 0000000..02adef1 --- /dev/null +++ b/slkspec/tests/verify_slk.py @@ -0,0 +1,37 @@ +import pyslk +import os +from pathlib import Path + +test_files = [ + dict( + name="/arch/bk1040/dyamond_winter_post_processed/ECMWF/IFS-4km/DW-CPL/atmos/1hr/tas/r1i1p1f1/2d/gn/tas_1hr_IFS-4km_DW-CPL_r1i1p1f1_2d_gn_20200220000000-20200220230000.nc", + size=0.8215, + query='{"$and":[{"path":{"$gte":"/arch/bk1040/dyamond_winter_post_processed/ECMWF/IFS-4km/DW-CPL/atmos/1hr/tas/r1i1p1f1/2d/gn","$max_depth":1}},{"resources.name":{"$regex":"tas_1hr_IFS-4km_DW-CPL_r1i1p1f1_2d_gn_20200220000000-20200220230000.nc"}}]}', + ), + dict( + name="/arch/bk1040/dyamond_winter_post_processed/ECMWF/IFS-4km/DW-CPL/atmos/1hr/tas/r1i1p1f1/2d/gn/tas_1hr_IFS-4km_DW-CPL_r1i1p1f1_2d_gn_20200229000000-20200229000000.nc" + ), + ] + +def test_gen_file_query() -> None: + query = pyslk.gen_file_query(test_files[0]["name"]) + assert (query == test_files[0]["query"]) + +def test_retrieve() -> None: + import tempfile + + filename = "/arch/bm0146/k204221/iow/INDEX.txt" + td = tempfile.TemporaryDirectory() + pyslk.retrieve(filename, td.name, preserve_path=True) + assert(os.stat(Path(td.name, filename[1:])).st_size == 1268945) + td.cleanup() + +def test_search() -> None: + assert (isinstance(pyslk.search(test_files[0]['query']), int)) + + +def test_slk_list() -> None: + assert (pyslk.slk_list('/arch/bm0146/k204221/iow/') == + '-rwxr-xr-x- k204221 bm0146 1.2M 10 Jun 2020 08:25 INDEX.txt\n-rw-r--r--t k204221 bm0146 19.5G 05 Jun 2020 17:36 iow_data2_001.tar\n-rw-r--r--t k204221 bm0146 19.0G 05 Jun 2020 17:38 iow_data2_002.tar\n-rw-r--r--t k204221 bm0146 19.4G 05 Jun 2020 17:38 iow_data2_003.tar\n-rw-r--r--t k204221 bm0146 19.3G 05 Jun 2020 17:40 iow_data2_004.tar\n-rw-r--r--t k204221 bm0146 19.1G 05 Jun 2020 17:40 iow_data2_005.tar\n-rw-r--r--t k204221 bm0146 7.8G 05 Jun 2020 17:41 iow_data2_006.tar\n-rw-r--r--t k204221 bm0146 186.9G 05 Jun 2020 19:37 iow_data3_001.tar\n-rw-r--r--t k204221 bm0146 24.6G 05 Jun 2020 19:14 iow_data3_002.tar\n-rw-r--r--- k204221 bm0146 4.0M 05 Jun 2020 19:43 iow_data4_001.tar\n-rw-r--r--t k204221 bm0146 10.5G 05 Jun 2020 19:46 iow_data4_002.tar\n-rw-r--r--t k204221 bm0146 19.5G 10 Jun 2020 08:21 iow_data5_001.tar\n-rw-r--r--t k204221 bm0146 19.0G 10 Jun 2020 08:23 iow_data5_002.tar\n-rw-r--r--t k204221 bm0146 19.4G 10 Jun 2020 08:23 iow_data5_003.tar\n-rw-r--r--t k204221 bm0146 19.3G 10 Jun 2020 08:24 iow_data5_004.tar\n-rw-r--r--t k204221 bm0146 19.1G 10 Jun 2020 08:25 iow_data5_005.tar\n-rw-r--r--t k204221 bm0146 7.8G 10 Jun 2020 08:25 iow_data5_006.tar\n-rw-r--r--t k204221 bm0146 19.5G 05 Jun 2020 17:53 iow_data_001.tar\n-rw-r--r--t k204221 bm0146 19.0G 05 Jun 2020 17:53 iow_data_002.tar\n-rw-r--r--t k204221 bm0146 19.4G 05 Jun 2020 17:56 iow_data_003.tar\n-rw-r--r--t k204221 bm0146 19.3G 05 Jun 2020 17:56 iow_data_004.tar\n-rw-r--r--t k204221 bm0146 19.1G 05 Jun 2020 17:58 iow_data_005.tar\n-rw-r-----t k204221 bm0146 7.8G 05 Jun 2020 17:57 iow_data_006.tar\nFiles: 23\n\x1b[?25h') + pass +