diff --git a/documentation/index.md b/documentation/index.md index 3edaf16..4adf2cf 100644 --- a/documentation/index.md +++ b/documentation/index.md @@ -33,7 +33,7 @@ The code for the library is [here](https://github.com/OCHA-DAP/hdx-python-utilit The library has detailed API documentation which can be found in the menu at the top. ## Breaking Changes -From 4.0.0, Python 3.10 or later is required +From 4.0.0, Python 3.10 or later is required From 3.8.0, multiple_replace, match_template_variables, earliest_index, get_matching_text_in_strs, get_matching_text, diff --git a/src/hdx/utilities/base_downloader.py b/src/hdx/utilities/base_downloader.py index ac16245..6198484 100644 --- a/src/hdx/utilities/base_downloader.py +++ b/src/hdx/utilities/base_downloader.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod from collections.abc import Iterator, Sequence +from pathlib import Path from typing import Any @@ -32,7 +33,7 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: """ @abstractmethod - def download_file(self, url: str, *args: Any, **kwargs: Any) -> str: + def download_file(self, url: str, *args: Any, **kwargs: Any) -> Path: """Download file from url. Args: diff --git a/src/hdx/utilities/compare.py b/src/hdx/utilities/compare.py index 736ff86..575d38f 100755 --- a/src/hdx/utilities/compare.py +++ b/src/hdx/utilities/compare.py @@ -1,6 +1,7 @@ """File compare utilities.""" from os import linesep +from pathlib import Path try: from cydifflib import ndiff @@ -8,7 +9,9 @@ from difflib import ndiff -def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]: +def compare_files( + path1: Path | str, path2: Path | str, encoding: str = "utf-8" +) -> list[str]: """Returns the delta between two files using -, ?, + format excluding lines that are the same. @@ -26,7 +29,9 @@ def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]: return [x for x in diff if x[0] in ["-", "+", "?"]] -def assert_files_same(path1: str, path2: str, encoding: str = "utf-8") -> None: +def assert_files_same( + path1: Path | str, path2: Path | str, encoding: str = "utf-8" +) -> None: """Asserts that two files are the same and returns delta using. -, ?, + format if not diff --git a/src/hdx/utilities/dictandlist.py b/src/hdx/utilities/dictandlist.py index a1c0e33..18a60b9 100755 --- a/src/hdx/utilities/dictandlist.py +++ b/src/hdx/utilities/dictandlist.py @@ -3,6 +3,7 @@ import itertools import warnings from collections.abc import Callable, Mapping, MutableMapping, Sequence +from pathlib import Path from typing import Any from hdx.utilities.frictionless_wrapper import get_frictionless_tableresource @@ -410,7 +411,7 @@ def read_list_from_csv( def write_list_to_csv( - filepath: str, + filepath: Path | str, rows: list[Sequence | Mapping], headers: int | Sequence[str] | None = None, columns: Sequence[int] | Sequence[str] | None = None, diff --git a/src/hdx/utilities/downloader.py b/src/hdx/utilities/downloader.py index f510d92..f236e09 100755 --- a/src/hdx/utilities/downloader.py +++ b/src/hdx/utilities/downloader.py @@ -5,7 +5,7 @@ from collections.abc import Callable, Iterator, Sequence from copy import deepcopy from os import remove -from os.path import exists, isfile, join, split, splitext +from os.path import exists, isfile, split, splitext from pathlib import Path from typing import Any from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit @@ -133,12 +133,12 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: @staticmethod def get_path_for_url( url: str, - folder: str | None = None, + folder: Path | str | None = None, filename: str | None = None, - path: str | None = None, + path: Path | str | None = None, overwrite: bool = False, keep: bool = False, - ) -> str: + ) -> Path: """Get filename from url and join to provided folder or temporary folder if no folder supplied, ensuring uniqueness. @@ -164,7 +164,8 @@ def get_path_for_url( filename, extension = splitext(filename) if not folder: folder = get_temp_dir() - path = join(folder, f"{filename}{extension}") + folder = Path(folder) + path = folder / f"{filename}{extension}" if overwrite: try: remove(path) @@ -174,7 +175,7 @@ def get_path_for_url( count = 0 while exists(path): count += 1 - path = join(folder, f"{filename}{count}{extension}") + path = folder / f"{filename}{count}{extension}" return path def get_full_url(self, url: str) -> str: @@ -254,7 +255,7 @@ def hxl_row( def normal_setup( self, - url: str, + url: Path | str, stream: bool = True, post: bool = False, parameters: dict | None = None, @@ -281,6 +282,7 @@ def normal_setup( self.close_response() self.response = None try: + url = str(url) spliturl = urlsplit(url) if not spliturl.scheme: if isfile(url): @@ -336,7 +338,7 @@ def set_bearer_token(self, bearer_token: str) -> None: } ) - def hash_stream(self, url: str) -> str: + def hash_stream(self, url: Path | str) -> str: """Stream file from url and hash it using MD5. Must call setup method first. @@ -357,7 +359,7 @@ def hash_stream(self, url: str) -> str: f"Download of {url} failed in retrieval of stream!" % url ) - def stream_path(self, path: str, errormsg: str): + def stream_path(self, path: Path | str, errormsg: str) -> Path: """Stream file from url and store in provided path. Must call setup method first. @@ -370,12 +372,13 @@ def stream_path(self, path: str, errormsg: str): """ f = None try: - f = open(path, "wb") + path = Path(path) + f = path.open("wb") for chunk in self.response.iter_content(chunk_size=10240): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() - return f.name + return path except Exception as e: raise DownloadError(errormsg) from e finally: @@ -384,10 +387,10 @@ def stream_path(self, path: str, errormsg: str): def stream_file( self, - url: str, - folder: str | None = None, + url: Path | str, + folder: Path | str | None = None, filename: str | None = None, - path: str | None = None, + path: Path | str | None = None, overwrite: bool = False, keep: bool = False, ) -> str: @@ -414,9 +417,9 @@ def stream_file( def download_file( self, - url: str, + url: Path | str, **kwargs: Any, - ) -> str: + ) -> Path: """Download file from url and store in provided folder or temporary folder if no folder supplied. @@ -460,7 +463,7 @@ def download_file( path, f"Download of {url} failed in retrieval of stream!" ) - def download(self, url: str, **kwargs: Any) -> requests.Response: + def download(self, url: Path | str, **kwargs: Any) -> requests.Response: """Download url. Args: @@ -539,7 +542,7 @@ def get_json(self) -> Any: """ return self.response.json() - def download_text(self, url: str, **kwargs: Any) -> str: + def download_text(self, url: Path | str, **kwargs: Any) -> str: """Download url as text. Args: @@ -557,7 +560,7 @@ def download_text(self, url: str, **kwargs: Any) -> str: self.download(url, **kwargs) return self.get_text() - def download_yaml(self, url: str, **kwargs: Any) -> Any: + def download_yaml(self, url: Path | str, **kwargs: Any) -> Any: """Download url as YAML. Args: @@ -575,7 +578,7 @@ def download_yaml(self, url: str, **kwargs: Any) -> Any: self.download(url, **kwargs) return self.get_yaml() - def download_json(self, url: str, **kwargs: Any) -> Any: + def download_json(self, url: Path | str, **kwargs: Any) -> Any: """Download url as JSON. Args: @@ -595,7 +598,7 @@ def download_json(self, url: str, **kwargs: Any) -> Any: def get_frictionless_tableresource( self, - url: str, + url: Path | str, ignore_blank_rows: bool = True, infer_types: bool = False, **kwargs: Any, @@ -641,7 +644,7 @@ def get_frictionless_tableresource( def _get_tabular_rows( self, - url: str, + url: Path | str, headers: int | Sequence[int] | Sequence[str] = 1, dict_form: bool = False, include_headers: bool = False, @@ -704,7 +707,7 @@ def _get_tabular_rows( xlsx2csv = kwargs.pop("xlsx2csv", False) if xlsx2csv: path = self.download_file(url) - outpath = path.replace(".xlsx", ".csv") + outpath = path.with_suffix(".csv") sheet = kwargs.pop("sheet", 1) if isinstance(sheet, int): sheet_args = {"sheetid": sheet} diff --git a/src/hdx/utilities/file_hashing.py b/src/hdx/utilities/file_hashing.py index 58896e6..9764067 100644 --- a/src/hdx/utilities/file_hashing.py +++ b/src/hdx/utilities/file_hashing.py @@ -3,6 +3,7 @@ import zipfile from io import BytesIO, IOBase from os import fstat +from pathlib import Path from openpyxl import load_workbook from openpyxl.utils.exceptions import InvalidFileException @@ -90,7 +91,7 @@ def crc_zip_fp(fp: IOBase) -> str: return get_crc_sum(file_crcs) -def get_size_and_hash(filepath: str, file_format: str) -> tuple[int, str]: +def get_size_and_hash(filepath: Path | str, file_format: str) -> tuple[int, str]: """Return the size and hash of file Args: diff --git a/src/hdx/utilities/frictionless_wrapper.py b/src/hdx/utilities/frictionless_wrapper.py index 8156948..f5f4fd5 100644 --- a/src/hdx/utilities/frictionless_wrapper.py +++ b/src/hdx/utilities/frictionless_wrapper.py @@ -188,7 +188,7 @@ def get_frictionless_tableresource( http_session = kwargs.pop("http_session", session) with system.use_context(http_session=http_session): if url: - resource = TableResource(path=url, **kwargs) + resource = TableResource(path=str(url), **kwargs) else: resource = TableResource(data=data, **kwargs) resource.open() diff --git a/src/hdx/utilities/loader.py b/src/hdx/utilities/loader.py index 580fb23..f81eb6e 100755 --- a/src/hdx/utilities/loader.py +++ b/src/hdx/utilities/loader.py @@ -1,7 +1,8 @@ """Loading utilities for YAML, JSON etc.""" import json -from collections.abc import Mapping, Sequence +from collections.abc import Mapping, MutableMapping, Sequence +from pathlib import Path from typing import Any from warnings import warn @@ -15,7 +16,7 @@ class LoadError(Exception): def load_text( - path: str, + path: Path | str, encoding: str = "utf-8", strip: bool = False, replace_newlines: str | None = None, @@ -59,7 +60,7 @@ def load_text( def load_yaml( - path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True + path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True ) -> Any: """Load YAML file into an ordered dictionary. @@ -83,7 +84,7 @@ def load_yaml( def load_json( - path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True + path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True ) -> Any: """Load JSON file into an ordered dictionary (dict for Python 3.7+) @@ -152,11 +153,11 @@ def load_and_merge_json( def load_yaml_into_existing_dict( - data: dict, - path: str, + data: MutableMapping, + path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True, -) -> Mapping: +) -> MutableMapping: """Merge YAML file that is in dictionary form into existing dictionary. Args: @@ -173,11 +174,11 @@ def load_yaml_into_existing_dict( def load_json_into_existing_dict( - data: dict, - path: str, + data: MutableMapping, + path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True, -) -> Mapping: +) -> MutableMapping: """Merge JSON file that is in dictionary form into existing dictionary. Args: diff --git a/src/hdx/utilities/path.py b/src/hdx/utilities/path.py index b6eb1a4..33beaee 100755 --- a/src/hdx/utilities/path.py +++ b/src/hdx/utilities/path.py @@ -4,17 +4,15 @@ import inspect import logging import sys -from collections.abc import Generator, Iterable, Sequence +from collections.abc import Iterable, Iterator, Sequence from os import getenv, makedirs, remove from os.path import ( - abspath, basename, dirname, exists, - join, - realpath, splitext, ) +from pathlib import Path from shutil import rmtree from tempfile import gettempdir from typing import Any @@ -33,7 +31,7 @@ class NotFoundError(Exception): pass -def script_dir(pyobject: Any, follow_symlinks: bool = True) -> str: +def script_dir(pyobject: Any, follow_symlinks: bool = True) -> Path: """Get current script's directory. Args: @@ -43,36 +41,42 @@ def script_dir(pyobject: Any, follow_symlinks: bool = True) -> str: Returns: Current script's directory """ - if getattr(sys, "frozen", False): # py2exe, PyInstaller, cx_Freeze - path = abspath(sys.executable) # pragma: no cover + if getattr(sys, "frozen", False): + # Frozen (PyInstaller, etc.): Use the executable path + path = Path(sys.executable) # pragma: no cover else: - path = inspect.getabsfile(pyobject) - if follow_symlinks: - path = realpath(path) - return dirname(path) + # Standard: Use the object's file path + path = Path(inspect.getfile(pyobject)) + + # Resolve symlinks if requested, otherwise just make absolute + path = path.resolve() if follow_symlinks else path.absolute() + + return path.parent def script_dir_plus_file( filename: str, pyobject: Any, follow_symlinks: bool = True -) -> str: +) -> Path: """Get current script's directory and then append a filename. Args: filename: Filename to append to directory path pyobject: Any Python object in the script + return_path: Whether to return a Path object. Defaults to returning str. + follow_symlinks: Follow symlinks or not. Defaults to True. Returns: Current script's directory and with filename appended """ - return join(script_dir(pyobject, follow_symlinks), filename) + return script_dir(pyobject, follow_symlinks) / filename def get_temp_dir( - folder: str | None = None, + folder: Path | str | None = None, delete_if_exists: bool = False, - tempdir: str | None = None, -) -> str: + tempdir: Path | str | None = None, +) -> Path: """Get a temporary directory. Looks for environment variable TEMP_DIR and falls back on os.gettempdir if a root temporary directory is not supplied. If a folder is supplied, creates that folder within the temporary @@ -88,8 +92,9 @@ def get_temp_dir( """ if tempdir is None: tempdir = getenv("TEMP_DIR", gettempdir()) + tempdir = Path(tempdir) if folder: - tempdir = join(tempdir, folder) + tempdir = tempdir / folder if exists(tempdir): if delete_if_exists: rmtree(tempdir) @@ -101,12 +106,12 @@ def get_temp_dir( @contextlib.contextmanager def temp_dir( - folder: str | None = None, + folder: Path | str | None = None, delete_if_exists: bool = False, delete_on_success: bool = True, delete_on_failure: bool = True, - tempdir: str | None = None, -) -> Generator[str, Any, None]: + tempdir: Path | str | None = None, +) -> Iterator[Path]: """Get a temporary directory optionally with folder appended (and created if it doesn't exist) @@ -132,7 +137,7 @@ def temp_dir( raise -def read_or_create_batch(folder: str, batch: str | None = None) -> str: +def read_or_create_batch(folder: Path, batch: str | None = None) -> str: """Get batch or create it if it doesn't exist. Args: @@ -142,7 +147,7 @@ def read_or_create_batch(folder: str, batch: str | None = None) -> str: Returns: Batch """ - batch_file = join(folder, "batch.txt") + batch_file = folder / "batch.txt" if exists(batch_file): batch = load_text(batch_file, strip=True) logger.info(f"File BATCH = {batch}") @@ -156,13 +161,13 @@ def read_or_create_batch(folder: str, batch: str | None = None) -> str: @contextlib.contextmanager def temp_dir_batch( - folder: str | None = None, + folder: Path | str | None = None, delete_if_exists: bool = False, delete_on_success: bool = True, delete_on_failure: bool = True, batch: str | None = None, - tempdir: str | None = None, -) -> Generator[dict, Any, None]: + tempdir: Path | str | None = None, +) -> Iterator[dict]: """Get a temporary directory and batch id. Yields a dictionary with key folder which is the temporary directory optionally with folder appended (and created if it doesn't exist). In key batch is a batch code to be @@ -219,7 +224,7 @@ def progress_storing_folder( iterator: Iterable[dict], key: str, wheretostart: str | None = None, -) -> Generator[tuple[dict, dict], Any, None]: +) -> Iterator[tuple[dict, dict]]: """Store progress in folder in key folder of info dictionary parameter. Yields 2 dictionaries. The first is the info dictionary. It contains in key folder the folder being used to store progress and in key progress the @@ -238,7 +243,7 @@ def progress_storing_folder( A tuple of the form (info dictionary, next object in iterator) """ folder = info["folder"] - progress_file = join(folder, "progress.txt") + progress_file = folder / "progress.txt" if not wheretostart: contents = getenv("WHERETOSTART") @@ -277,8 +282,8 @@ def progress_storing_folder( @contextlib.contextmanager def wheretostart_tempdir_batch( - folder: str, batch: str | None = None, tempdir: str | None = None -) -> Generator[dict, Any, None]: + folder: Path | str, batch: str | None = None, tempdir: Path | str | None = None +) -> Iterator[dict]: """Get a temporary directory and batch id. Deletes any existing folder if WHERETOSTART environment variable is set to RESET. Yields a dictionary with key folder which is the temporary directory optionally with folder appended @@ -311,12 +316,12 @@ def wheretostart_tempdir_batch( def progress_storing_tempdir( - folder: str, + folder: Path | str, iterator: Iterable[dict], key: str, batch: str | None = None, - tempdir: str | None = None, -) -> Generator[tuple[dict, dict], Any, None]: + tempdir: Path | str | None = None, +) -> Iterator[tuple[dict, dict]]: """Store progress in temporary directory. The folder persists until the final iteration allowing which iteration to start at and the batch code to be persisted between runs. Yields 2 dictionaries. The first contains key @@ -344,11 +349,11 @@ def progress_storing_tempdir( def multiple_progress_storing_tempdir( - folder: str, + folder: Path | str, iterators: Sequence[Iterable[dict]], keys: Sequence[str], batch: str | None = None, -) -> Generator[tuple[int, dict, dict], Any, None]: +) -> Iterator[tuple[int, dict, dict]]: """Store progress in temporary directory. The folder persists until the final iteration of the last iterator allowing which iteration to start at and the batch code to be persisted between runs. Yields 2 dictionaries. The @@ -387,7 +392,7 @@ def multiple_progress_storing_tempdir( tempdir = info["folder"] batch = info["batch"] for i, key in enumerate(keys): - progress_file = join(tempdir, "progress.txt") + progress_file = tempdir / "progress.txt" if wheretostartenv: wheretostart = get_wheretostart( wheretostartenv, "Environment variable", key @@ -416,18 +421,19 @@ def multiple_progress_storing_tempdir( def get_filename_extension_from_url( - url: str, second_last: bool = False, use_query: bool = False + url: Path | str, second_last: bool = False, use_query: bool = False ) -> tuple[str, str]: """Get separately filename and extension from url. Args: - url: URL to download + url: URL or path to download second_last: Get second last segment of url as well. Defaults to False. use_query: Include query parameters as well. Defaults to False. Returns: Tuple of (filename, extension) """ + url = str(url) split_url = urlsplit(unquote_plus(url)) urlpath = split_url.path last_part = basename(urlpath) @@ -448,12 +454,12 @@ def get_filename_extension_from_url( def get_filename_from_url( - url: str, second_last: bool = False, use_query: bool = False + url: Path | str, second_last: bool = False, use_query: bool = False ) -> str: """Get filename including extension from url. Args: - url: URL + url: URL or path second_last: Get second last segment of url as well. Defaults to False. use_query: Include query parameters as well. Defaults to False. diff --git a/src/hdx/utilities/retriever.py b/src/hdx/utilities/retriever.py index 9241327..67263db 100644 --- a/src/hdx/utilities/retriever.py +++ b/src/hdx/utilities/retriever.py @@ -2,7 +2,7 @@ from collections.abc import Iterator, Sequence from copy import deepcopy from os import mkdir -from os.path import join +from pathlib import Path from shutil import rmtree from typing import Any @@ -39,9 +39,9 @@ class Retrieve(BaseDownload): def __init__( self, downloader: Download, - fallback_dir: str, - saved_dir: str, - temp_dir: str, + fallback_dir: Path | str, + saved_dir: Path | str, + temp_dir: Path | str, save: bool = False, use_saved: bool = False, prefix: str = "", @@ -49,9 +49,9 @@ def __init__( log_level: int = logging.INFO, ): self.downloader = downloader - self.fallback_dir = fallback_dir - self.saved_dir = saved_dir - self.temp_dir = temp_dir + self.fallback_dir = Path(fallback_dir) + self.saved_dir = Path(saved_dir) + self.temp_dir = Path(temp_dir) self.save = save self.use_saved = use_saved self.prefix = prefix @@ -59,7 +59,9 @@ def __init__( self.log_level = log_level @staticmethod - def check_flags(saved_dir: str, save: bool, use_saved: bool, delete: bool) -> None: + def check_flags( + saved_dir: Path | str, save: bool, use_saved: bool, delete: bool + ) -> None: """Check flags. Also delete saved_dir if save and delete are True. Args: @@ -81,7 +83,7 @@ def check_flags(saved_dir: str, save: bool, use_saved: bool, delete: bool) -> No mkdir(saved_dir) @staticmethod - def get_url_logstr(url: str) -> str: + def get_url_logstr(url: Path | str) -> str: """Url string that will be logged. It is limited to 100 characters if necessary. @@ -91,6 +93,7 @@ def get_url_logstr(url: str) -> str: Returns: Url string to use in logs """ + url = str(url) if len(url) > 100: return f"{url[:100]}..." return url @@ -183,7 +186,7 @@ def download_file( fallback: bool = False, log_level: int = None, **kwargs: Any, - ) -> str: + ) -> Path: """Retrieve file. Args: @@ -206,8 +209,8 @@ def download_file( folder = self.saved_dir else: folder = self.temp_dir - output_path = join(folder, filename) - saved_path = join(self.saved_dir, filename) + output_path = folder / filename + saved_path = self.saved_dir / filename if self.use_saved: logger.log(log_level, f"Using saved {logstr} in {saved_path}") return saved_path @@ -220,7 +223,7 @@ def download_file( except DownloadError: if not fallback: raise - fallback_path = join(self.fallback_dir, filename) + fallback_path = self.fallback_dir / filename logger.exception( f"{logstr} download failed, using static data {fallback_path}!" ) @@ -253,7 +256,7 @@ def download_text( filename, kwargs = self.get_filename(url, filename, **kwargs) if not logstr: logstr = filename - saved_path = join(self.saved_dir, filename) + saved_path = self.saved_dir / filename if self.use_saved: logger.log(log_level, f"Using saved {logstr} in {saved_path}") text = load_text(saved_path) @@ -270,7 +273,7 @@ def download_text( except DownloadError: if not fallback: raise - fallback_path = join(self.fallback_dir, filename) + fallback_path = self.fallback_dir / filename logger.exception( f"{logstr} download failed, using static data {fallback_path}!" ) @@ -304,7 +307,7 @@ def download_yaml( filename, kwargs = self.get_filename(url, filename, ("yaml", "yml"), **kwargs) if not logstr: logstr = filename - saved_path = join(self.saved_dir, filename) + saved_path = self.saved_dir / filename if self.use_saved: logger.log(log_level, f"Using saved {logstr} in {saved_path}") ryaml = load_yaml(saved_path) @@ -321,7 +324,7 @@ def download_yaml( except DownloadError: if not fallback: raise - fallback_path = join(self.fallback_dir, filename) + fallback_path = self.fallback_dir / filename logger.exception( f"{logstr} download failed, using static data {fallback_path}!" ) @@ -355,7 +358,7 @@ def download_json( filename, kwargs = self.get_filename(url, filename, ("json",), **kwargs) if not logstr: logstr = filename - saved_path = join(self.saved_dir, filename) + saved_path = self.saved_dir / filename if self.use_saved: logger.log(log_level, f"Using saved {logstr} in {saved_path}") rjson = load_json(saved_path) @@ -372,7 +375,7 @@ def download_json( except DownloadError: if not fallback: raise - fallback_path = join(self.fallback_dir, filename) + fallback_path = self.fallback_dir / filename logger.exception( f"{logstr} download failed, using static data {fallback_path}!" ) @@ -438,9 +441,9 @@ def get_tabular_rows( @classmethod def generate_retrievers( cls, - fallback_dir: str, - saved_dir: str, - temp_dir: str, + fallback_dir: Path | str, + saved_dir: Path | str, + temp_dir: Path | str, save: bool = False, use_saved: bool = False, ignore: Sequence[str] = tuple(), diff --git a/src/hdx/utilities/saver.py b/src/hdx/utilities/saver.py index 0ebe4cc..bd63eb8 100644 --- a/src/hdx/utilities/saver.py +++ b/src/hdx/utilities/saver.py @@ -4,7 +4,7 @@ import json from collections import OrderedDict from collections.abc import Callable, Iterable, Mapping, Sequence -from os.path import join +from pathlib import Path from typing import Any from ruamel.yaml import ( @@ -50,7 +50,7 @@ def represent_none(self, data: Any) -> Any: } -def save_text(string: str, path: str, encoding: str = "utf-8") -> None: +def save_text(string: str, path: Path | str, encoding: str = "utf-8") -> None: """Save text string to file. Args: @@ -67,7 +67,7 @@ def save_text(string: str, path: str, encoding: str = "utf-8") -> None: def save_yaml( object: Any, - path: str, + path: Path | str, encoding: str = "utf-8", pretty: bool = False, sortkeys: bool = False, @@ -101,7 +101,7 @@ def save_yaml( def save_json( object: Any, - path: str, + path: Path | str, encoding: str = "utf-8", pretty: bool = False, sortkeys: bool = False, @@ -139,7 +139,7 @@ def save_hxlated_output( rows: Sequence[Sequence | Mapping], includes_header: bool = True, includes_hxltags: bool = False, - output_dir: str = "", + output_dir: Path | str = "", **kwargs: Any, ) -> None: """Save rows with header and HXL hashtags. Currently, JSON and/or csv @@ -160,6 +160,7 @@ def save_hxlated_output( Returns: None """ + output_dir = Path(output_dir) row0 = rows[0] if includes_header: if isinstance(row0, dict): @@ -190,8 +191,7 @@ def save_hxlated_output( if csv_configuration: csv_hxltags = csv_configuration.get("hxltags", hxltags) csv_headers = [hxltag_to_header[hxltag] for hxltag in csv_hxltags] - csv_file = open( - join(output_dir, csv_configuration["filename"]), + csv_file = (output_dir / csv_configuration["filename"]).open( "w", encoding="utf-8", newline="\n", @@ -221,7 +221,7 @@ def save_hxlated_output( else: metadata_json = None - output_json = open(join(output_dir, json_configuration["filename"]), "w") + output_json = (output_dir / json_configuration["filename"]).open("w") if metadata_json: metadata_key = metadata_configuration.get("key", "metadata") @@ -272,7 +272,7 @@ def get_outrow(file_hxltags): def save_iterable( - filepath: str, + filepath: Path | str, rows: Iterable[Sequence | Mapping], headers: int | Sequence[str] | None = None, columns: Sequence[int] | Sequence[str] | None = None, diff --git a/src/hdx/utilities/state.py b/src/hdx/utilities/state.py index 57ab6a3..5016da0 100644 --- a/src/hdx/utilities/state.py +++ b/src/hdx/utilities/state.py @@ -2,6 +2,7 @@ import logging from collections.abc import Callable +from pathlib import Path from typing import Any from hdx.utilities.dateparse import iso_string_from_datetime, parse_date @@ -27,11 +28,11 @@ class State: def __init__( self, - path: str, + path: Path | str, read_fn: Callable[[str], Any] = lambda x: x, write_fn: Callable[[Any], str] = lambda x: x, ) -> None: - self.path = path + self.path = Path(path) self.read_fn = read_fn self.write_fn = write_fn self.state = self.read() diff --git a/tests/hdx/conftest.py b/tests/hdx/conftest.py index 8c393df..7eceaa3 100755 --- a/tests/hdx/conftest.py +++ b/tests/hdx/conftest.py @@ -1,7 +1,7 @@ """Global fixtures""" import smtplib -from os.path import join +from pathlib import Path import pytest @@ -10,12 +10,12 @@ @pytest.fixture(scope="session") def fixturesfolder(): - return join("tests", "fixtures") + return Path("tests", "fixtures") @pytest.fixture(scope="session") def configfolder(fixturesfolder): - return join(fixturesfolder, "config") + return Path(fixturesfolder, "config") @pytest.fixture(scope="function") diff --git a/tests/hdx/utilities/test_compare.py b/tests/hdx/utilities/test_compare.py index 4ed1f92..c742fbd 100755 --- a/tests/hdx/utilities/test_compare.py +++ b/tests/hdx/utilities/test_compare.py @@ -1,7 +1,5 @@ """Compare Utility Tests""" -from os.path import join - import pytest from hdx.utilities.compare import assert_files_same, compare_files @@ -10,11 +8,11 @@ class TestCompare: @pytest.fixture(scope="class") def testfile1(self, fixturesfolder): - return join(fixturesfolder, "compare", "test_csv_processing.csv") + return fixturesfolder / "compare" / "test_csv_processing.csv" @pytest.fixture(scope="class") def testfile2(self, fixturesfolder): - return join(fixturesfolder, "compare", "test_csv_processing2.csv") + return fixturesfolder / "compare" / "test_csv_processing2.csv" def test_compare_files(self, testfile1, testfile2): result = compare_files(testfile1, testfile2, encoding="utf-8") diff --git a/tests/hdx/utilities/test_dictandlist.py b/tests/hdx/utilities/test_dictandlist.py index d544fad..954871b 100755 --- a/tests/hdx/utilities/test_dictandlist.py +++ b/tests/hdx/utilities/test_dictandlist.py @@ -1,7 +1,6 @@ """Dictionary Tests""" from os import remove -from os.path import join import pytest @@ -277,7 +276,7 @@ def test_read_write_list_to_csv(self): delete_on_failure=False, ) as tempdir: filename = "test_read_write_list_to_csv.csv" - filepath = join(tempdir, filename) + filepath = tempdir / filename write_list_to_csv(filepath, list_of_lists, headers=["h1", "h2", "h3", "h4"]) newll = read_list_from_csv(filepath) newld = read_list_from_csv(filepath, headers=1, dict_form=True) @@ -360,7 +359,9 @@ def test_read_write_list_to_csv(self): {"h1": 4, "h2": 5, "h3": 6, "h4": "b"}, {"h1": 7, "h2": 8, "h3": 9, "h4": "c"}, ] - write_list_to_csv(filepath, list_of_dicts, headers=["h1", "h2", "h3", "h4"]) + write_list_to_csv( + str(filepath), list_of_dicts, headers=["h1", "h2", "h3", "h4"] + ) newll = read_list_from_csv(filepath) remove(filepath) assert newll == [ @@ -370,7 +371,7 @@ def test_read_write_list_to_csv(self): ["7", "8", "9", "c"], ] write_list_to_csv(filepath, list_of_dicts) - newll = read_list_from_csv(filepath) + newll = read_list_from_csv(str(filepath)) remove(filepath) assert newll == [ ["h1", "h2", "h3", "h4"], diff --git a/tests/hdx/utilities/test_downloader.py b/tests/hdx/utilities/test_downloader.py index 6195a1d..0952391 100755 --- a/tests/hdx/utilities/test_downloader.py +++ b/tests/hdx/utilities/test_downloader.py @@ -5,7 +5,8 @@ from collections import OrderedDict from contextlib import contextmanager from os import remove -from os.path import abspath, join +from os.path import join +from pathlib import Path from shutil import copytree, rmtree from tempfile import gettempdir @@ -40,11 +41,11 @@ def useragent(self): @pytest.fixture(scope="class") def downloaderfolder(self, fixturesfolder): - return join(fixturesfolder, self.downloaderfoldername) + return fixturesfolder / self.downloaderfoldername @pytest.fixture(scope="class") def fixturefile(self, downloaderfolder): - return join(downloaderfolder, "extra_params_tree.yaml") + return downloaderfolder / "extra_params_tree.yaml" @pytest.fixture(scope="class") def fixtureurl(self): @@ -89,23 +90,22 @@ def fixturejsonurl(self): def test_get_path_for_url( self, tmp_path, fixtureurl, configfolder, downloaderfolder ): - tmp_path = str(tmp_path) filename = "test_data.csv" path = Download.get_path_for_url(fixtureurl, configfolder) - assert abspath(path) == abspath(join(configfolder, filename)) + assert path.absolute() == configfolder.absolute() / filename path = Download.get_path_for_url(fixtureurl, downloaderfolder) - assert abspath(path) == abspath(join(downloaderfolder, "test_data3.csv")) - testfolder = join(tmp_path, self.downloaderfoldername) + assert path.absolute() == downloaderfolder.absolute() / "test_data3.csv" + testfolder = tmp_path / self.downloaderfoldername rmtree(testfolder, ignore_errors=True) copytree(downloaderfolder, testfolder) path = Download.get_path_for_url(fixtureurl, testfolder, overwrite=True) - assert abspath(path) == abspath(join(testfolder, filename)) + assert path.absolute() == testfolder.absolute() / filename rmtree(testfolder) filename = "myfilename.txt" path = Download.get_path_for_url(fixtureurl, filename=filename) - assert abspath(path) == abspath(join(gettempdir(), filename)) + assert path.absolute() == Path(join(gettempdir(), filename)).absolute() path = Download.get_path_for_url(fixtureurl, downloaderfolder, filename) - assert abspath(path) == abspath(join(downloaderfolder, filename)) + assert path.absolute() == downloaderfolder.absolute() / filename def test_init(self, monkeypatch, downloaderfolder): with Download(auth=("u", "p")) as downloader: @@ -113,7 +113,7 @@ def test_init(self, monkeypatch, downloaderfolder): basicauth = "Basic dXNlcjpwYXNz" with Download(basic_auth=basicauth) as downloader: assert downloader.session.auth == ("user", "pass") - basicauthfile = join(downloaderfolder, "basicauth.txt") + basicauthfile = downloaderfolder / "basicauth.txt" with Download(basic_auth_file=basicauthfile) as downloader: assert downloader.session.auth == ("testuser", "testpass") bearertoken = "ABCDE" @@ -122,14 +122,14 @@ def test_init(self, monkeypatch, downloaderfolder): assert ( downloader.session.headers["Authorization"] == f"Bearer {bearertoken}" ) - bearertokenfile = join(downloaderfolder, "bearertoken.txt") + bearertokenfile = downloaderfolder / "bearertoken.txt" bearertoken = "12345" with Download(bearer_token_file=bearertokenfile) as downloader: assert downloader.session.headers["Accept"] == "application/json" assert ( downloader.session.headers["Authorization"] == f"Bearer {bearertoken}" ) - extraparamsyamltree = join(downloaderfolder, "extra_params_tree.yaml") + extraparamsyamltree = downloaderfolder / "extra_params_tree.yaml" with Download( extra_params_yaml=extraparamsyamltree, extra_params_lookup="mykey" ) as downloader: @@ -172,7 +172,7 @@ def test_init(self, monkeypatch, downloaderfolder): ) with pytest.raises(SessionError): Download(auth=("u", "p"), basic_auth="Basic xxxxxxxxxxxxxxxx") - extraparamsjson = join(downloaderfolder, "extra_params.json") + extraparamsjson = downloaderfolder / "extra_params.json" with pytest.raises(SessionError): Download(auth=("u", "p"), basic_auth_file=extraparamsjson) with pytest.raises(SessionError): @@ -215,7 +215,7 @@ def test_init(self, monkeypatch, downloaderfolder): Download(basic_auth_file="NOTEXIST") with pytest.raises(IOError): Download(bearer_token_file="NOTEXIST") - extraparamsyaml = join(downloaderfolder, "extra_params.yaml") + extraparamsyaml = downloaderfolder / "extra_params.yaml" test_url = "http://www.lalala.com/lala" with Download( basic_auth_file=basicauthfile, extra_params_dict={"key1": "val1"} @@ -421,7 +421,6 @@ def test_download_file( getfixtureurl, postfixtureurl, ): - tmp_path = str(tmp_path) with pytest.raises(DownloadError), Download() as downloader: downloader.download_file("NOTEXIST://NOTEXIST.csv", folder=tmp_path) with pytest.raises(DownloadError), Download() as downloader: @@ -429,40 +428,42 @@ def test_download_file( filename = "myfilename.txt" with pytest.raises(DownloadError), Download() as downloader: downloader.download_file( - fixturefile, folder=tmp_path, path=join(tmp_path, filename) + fixturefile, folder=tmp_path, path=tmp_path / filename ) with pytest.raises(DownloadError), Download() as downloader: downloader.download_file( - fixturefile, filename=filename, path=join(tmp_path, filename) + fixturefile, filename=filename, path=tmp_path / filename ) with Download() as downloader: f = downloader.download_file(fixturefile, folder=tmp_path) - fpath = abspath(f) + fpath = f.absolute() remove(f) - assert fpath == abspath(join(tmp_path, "extra_params_tree.yaml")) + assert fpath == tmp_path.absolute() / "extra_params_tree.yaml" f = downloader.download_file(fixtureurl, folder=tmp_path) - fpath = abspath(f) + fpath = f.absolute() remove(f) - assert fpath == abspath(join(tmp_path, "test_data.csv")) - f = downloader.download_file(fixtureurl, folder=tmp_path, filename=filename) - fpath = abspath(f) - assert fpath == abspath(join(tmp_path, filename)) + assert fpath == tmp_path.absolute() / "test_data.csv" f = downloader.download_file( - fixtureurl, path=join(tmp_path, filename), overwrite=True + fixtureurl, folder=str(tmp_path), filename=filename ) - fpath = abspath(f) - assert fpath == abspath(join(tmp_path, filename)) + fpath = f.absolute() + assert fpath == tmp_path.absolute() / filename f = downloader.download_file( - fixtureurl, path=join(tmp_path, filename), overwrite=False + fixtureurl, path=tmp_path / filename, overwrite=True ) - fpath = abspath(f) - assert fpath == abspath(join(tmp_path, filename.replace(".txt", "1.txt"))) + fpath = f.absolute() + assert fpath == tmp_path.absolute() / filename f = downloader.download_file( - fixtureurl, path=join(tmp_path, filename), keep=True + fixtureurl, path=tmp_path / filename, overwrite=False ) - fpath = abspath(f) + fpath = f.absolute() + assert fpath == tmp_path.absolute() / filename.replace(".txt", "1.txt") + f = downloader.download_file( + fixtureurl, path=str(tmp_path / filename), keep=True + ) + fpath = f.absolute() remove(f) - assert fpath == abspath(join(tmp_path, filename)) + assert fpath == tmp_path.absolute() / filename f = downloader.download_file( f"{getfixtureurl}?id=10&lala=a", post=False, @@ -470,7 +471,7 @@ def test_download_file( folder=tmp_path, filename=filename, ) - fpath = abspath(f) + fpath = f.absolute() with open(fpath, encoding="utf-8") as fi: text = fi.read() assert '"id": "10"' in text @@ -478,7 +479,7 @@ def test_download_file( assert '"b": "4"' in text assert '"d": "3"' in text remove(f) - assert fpath == abspath(join(tmp_path, filename)) + assert fpath == tmp_path.absolute() / filename f = downloader.download_file( f"{postfixtureurl}?id=3&lala=b", post=True, @@ -486,7 +487,7 @@ def test_download_file( folder=tmp_path, filename=filename, ) - fpath = abspath(f) + fpath = f.absolute() with open(fpath, encoding="utf-8") as fi: text = fi.read() assert '"id": "3"' in text @@ -494,7 +495,7 @@ def test_download_file( assert '"a": "3"' in text assert '"c": "2"' in text remove(f) - assert fpath == abspath(join(tmp_path, filename)) + assert fpath == tmp_path.absolute() / filename def test_download( self, diff --git a/tests/hdx/utilities/test_email.py b/tests/hdx/utilities/test_email.py index 90b39de..bd905ea 100755 --- a/tests/hdx/utilities/test_email.py +++ b/tests/hdx/utilities/test_email.py @@ -1,7 +1,5 @@ """Email Tests""" -from os.path import join - import pytest from hdx.utilities.email import Email, EmailConfigurationError @@ -10,11 +8,11 @@ class TestEmail: @pytest.fixture(scope="class") def email_json(self, configfolder): - return join(configfolder, "hdx_email_configuration.json") + return configfolder / "hdx_email_configuration.json" @pytest.fixture(scope="class") def email_yaml(self, configfolder): - return join(configfolder, "hdx_email_configuration.yaml") + return configfolder / "hdx_email_configuration.yaml" def test_mail(self, mocksmtp): smtp_initargs = { diff --git a/tests/hdx/utilities/test_file_hashing.py b/tests/hdx/utilities/test_file_hashing.py index 1ac93a0..c4218b8 100644 --- a/tests/hdx/utilities/test_file_hashing.py +++ b/tests/hdx/utilities/test_file_hashing.py @@ -1,5 +1,3 @@ -from os.path import join - import pytest from hdx.utilities.file_hashing import ( @@ -23,31 +21,31 @@ class TestZipCRC: @pytest.fixture def zipfolder(self, fixturesfolder): - return join(fixturesfolder, "file_hashing") + return fixturesfolder / "file_hashing" @pytest.fixture def shpfile(self, zipfolder): - return join(zipfolder, "test_shapefile.zip") + return zipfolder / "test_shapefile.zip" @pytest.fixture def xlsxfile(self, zipfolder): - return join(zipfolder, "test.xlsx") + return zipfolder / "test.xlsx" @pytest.fixture def emptyfile(self, zipfolder): - return join(zipfolder, "empty.zip") + return zipfolder / "empty.zip" @pytest.fixture def badzipheader(self, zipfolder): - return join(zipfolder, "bad_header.zip") + return zipfolder / "bad_header.zip" @pytest.fixture def valid_sig_invalid_body(self, zipfolder): - return join(zipfolder, "valid_sig_invalid_body.zip") + return zipfolder / "valid_sig_invalid_body.zip" @pytest.fixture def bad_index(self, zipfolder): - return join(zipfolder, "bad_index.xlsx") + return zipfolder / "bad_index.xlsx" def test_hash_excel_fp(self, xlsxfile): with open(xlsxfile, "rb") as fp: @@ -72,7 +70,7 @@ def test_get_size_and_hash( bad_index, ): assert get_size_and_hash(shpfile, "shp") == (1330530, "31662cb7") - assert get_size_and_hash(shpfile, "zip") == (1330530, "31662cb7") + assert get_size_and_hash(str(shpfile), "zip") == (1330530, "31662cb7") assert get_size_and_hash(shpfile, "xlsx") == (1330530, "31662cb7") assert get_size_and_hash(xlsxfile, "xlsx") == ( diff --git a/tests/hdx/utilities/test_html.py b/tests/hdx/utilities/test_html.py index 9f7e54d..e8d7fd5 100755 --- a/tests/hdx/utilities/test_html.py +++ b/tests/hdx/utilities/test_html.py @@ -1,7 +1,5 @@ """HTML Tests""" -from os.path import join - import pytest from hdx.utilities.html import extract_table, get_soup @@ -13,7 +11,7 @@ class TestHTML: @pytest.fixture(scope="function") def htmltext(self, fixturesfolder): - return load_text(join(fixturesfolder, "html", "response.html")) + return load_text(fixturesfolder / "html" / "response.html") @pytest.fixture(scope="function") def downloader(self, htmltext): diff --git a/tests/hdx/utilities/test_loader.py b/tests/hdx/utilities/test_loader.py index 420af32..58c1181 100755 --- a/tests/hdx/utilities/test_loader.py +++ b/tests/hdx/utilities/test_loader.py @@ -1,7 +1,6 @@ """Loader Tests""" from collections import OrderedDict -from os.path import join import pytest @@ -115,19 +114,19 @@ class TestLoader: @pytest.fixture(scope="class") def loaderfolder(self, fixturesfolder): - return join(fixturesfolder, "loader") + return fixturesfolder / "loader" @pytest.fixture(scope="class") def empty_yaml(self, loaderfolder): - return join(loaderfolder, "empty.yaml") + return loaderfolder / "empty.yaml" @pytest.fixture(scope="class") def empty_json(self, loaderfolder): - return join(loaderfolder, "empty.json") + return loaderfolder / "empty.json" @pytest.fixture(scope="class") def empty_list(self, loaderfolder): - return join(loaderfolder, "empty_list.json") + return loaderfolder / "empty_list.json" def test_load_empty(self, empty_yaml, empty_json, empty_list): with pytest.raises(LoadError): @@ -144,8 +143,8 @@ def test_load_empty(self, empty_yaml, empty_json, empty_list): def test_load_and_merge_yaml(self, configfolder): result = load_and_merge_yaml( [ - join(configfolder, "hdx_config.yaml"), - join(configfolder, "project_configuration.yaml"), + configfolder / "hdx_config.yaml", + configfolder / "project_configuration.yaml", ] ) assert list(result.items()) == list(TestLoader.expected_yaml.items()) @@ -153,35 +152,36 @@ def test_load_and_merge_yaml(self, configfolder): def test_load_and_merge_json(self, configfolder): result = load_and_merge_json( [ - join(configfolder, "hdx_config.json"), - join(configfolder, "project_configuration.json"), + configfolder / "hdx_config.json", + configfolder / "project_configuration.json", ] ) assert list(result.items()) == list(TestLoader.expected_json.items()) def test_load_yaml_into_existing_dict(self, configfolder): - existing_dict = load_yaml(join(configfolder, "hdx_config.yaml")) + existing_dict = load_yaml(configfolder / "hdx_config.yaml") result = load_yaml_into_existing_dict( - existing_dict, join(configfolder, "project_configuration.yaml") + existing_dict, configfolder / "project_configuration.yaml" ) assert list(result.items()) == list(TestLoader.expected_yaml.items()) def test_load_json_into_existing_dict(self, configfolder): - existing_dict = load_json(join(configfolder, "hdx_config.json")) + existing_dict = load_json(configfolder / "hdx_config.json") result = load_json_into_existing_dict( - existing_dict, join(configfolder, "project_configuration.json") + existing_dict, configfolder / "project_configuration.json" ) assert list(result.items()) == list(TestLoader.expected_json.items()) def test_load_file_to_str(self): with temp_dir(folder="test_text") as tmp_path: - text_file = join(tmp_path, "text_file.txt") + text_file = tmp_path / "text_file.txt" save_text(TestLoader.text, text_file) result = load_text(text_file) assert result == TestLoader.text - result = load_text(text_file, strip=True) + result = load_text(str(text_file), strip=True) + save_text(TestLoader.text, str(text_file)) assert result == TestLoader.expected_text_strip result = load_text(text_file, replace_line_separators=" ") assert result == TestLoader.expected_text_newlines_to_spaces with pytest.raises(IOError): - load_text(join(tmp_path, "NOTEXIST.txt")) + load_text(tmp_path / "NOTEXIST.txt") diff --git a/tests/hdx/utilities/test_path.py b/tests/hdx/utilities/test_path.py index 646aa49..f646544 100755 --- a/tests/hdx/utilities/test_path.py +++ b/tests/hdx/utilities/test_path.py @@ -1,7 +1,8 @@ """Path Utility Tests""" import copy -from os.path import exists, join +from os.path import exists +from pathlib import Path from shutil import rmtree from tempfile import gettempdir @@ -21,33 +22,34 @@ class TestPath: @pytest.fixture(scope="class") def mytestdir(self): - return join("haha", "lala") + return Path("haha") / "lala" @pytest.fixture(scope="class") def fixtureurl(self): return "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-utilities/master/tests/fixtures/test_data.csv" def test_get_temp_dir(self, monkeypatch, mytestdir): - assert get_temp_dir() == gettempdir() - assert get_temp_dir("TEST") == join(gettempdir(), "TEST") - monkeypatch.setenv("TEMP_DIR", mytestdir) + expected_tmpdir = Path(gettempdir()) + assert get_temp_dir() == expected_tmpdir + assert get_temp_dir("TEST") == expected_tmpdir / "TEST" + monkeypatch.setenv("TEMP_DIR", str(mytestdir)) assert get_temp_dir() == mytestdir monkeypatch.delenv("TEMP_DIR") def test_temp_dir(self, monkeypatch, mytestdir): - monkeypatch.setenv("TEMP_DIR", mytestdir) + monkeypatch.setenv("TEMP_DIR", str(mytestdir)) with temp_dir() as tempdir: assert tempdir == mytestdir monkeypatch.delenv("TEMP_DIR") tempfolder = "papa" - expected_dir = join(gettempdir(), tempfolder) + expected_dir = Path(gettempdir(), tempfolder) with temp_dir(tempfolder) as tempdir: assert tempdir == expected_dir assert exists(tempdir) is False try: - with temp_dir(tempfolder) as tempdir: + with temp_dir(Path(tempfolder)) as tempdir: assert tempdir == expected_dir raise ValueError("Fail!") except ValueError: @@ -119,7 +121,7 @@ def test_temp_dir(self, monkeypatch, mytestdir): def test_progress_storing_tempdir(self, monkeypatch): tempfolder = "papa" - expected_dir = join(gettempdir(), tempfolder) + expected_dir = Path(gettempdir(), tempfolder) rmtree(expected_dir, ignore_errors=True) iterator = [ {"iso3": "AFG", "name": "Afghanistan"}, @@ -127,7 +129,7 @@ def test_progress_storing_tempdir(self, monkeypatch): {"iso3": "YEM", "name": "Yemen"}, {"iso3": "ZAM", "name": "Zambia"}, ] - expected_batch_file = join(expected_dir, "batch.txt") + expected_batch_file = expected_dir / "batch.txt" result = list() for info, nextdict in progress_storing_tempdir(tempfolder, iterator, "iso3"): assert info["folder"] == expected_dir @@ -243,7 +245,7 @@ def test_progress_storing_tempdir(self, monkeypatch): def test_multiple_progress_storing_tempdir(self, monkeypatch): tempfolder = "gaga" - expected_dir = join(gettempdir(), tempfolder) + expected_dir = Path(gettempdir(), tempfolder) rmtree(expected_dir, ignore_errors=True) iterator1 = [{"emergency_id": "911"}] iterator2 = [ @@ -263,7 +265,7 @@ def test_multiple_progress_storing_tempdir(self, monkeypatch): ( 0, { - "folder": join(expected_dir, "0"), + "folder": expected_dir / "0", "batch": "1234", "progress": "emergency_id=911", }, @@ -272,7 +274,7 @@ def test_multiple_progress_storing_tempdir(self, monkeypatch): ( 1, { - "folder": join(expected_dir, "1"), + "folder": expected_dir / "1", "batch": "1234", "progress": "iso3=AFG", }, @@ -281,7 +283,7 @@ def test_multiple_progress_storing_tempdir(self, monkeypatch): ( 1, { - "folder": join(expected_dir, "1"), + "folder": expected_dir / "1", "batch": "1234", "progress": "iso3=SDN", }, @@ -290,7 +292,7 @@ def test_multiple_progress_storing_tempdir(self, monkeypatch): ( 1, { - "folder": join(expected_dir, "1"), + "folder": expected_dir / "1", "batch": "1234", "progress": "iso3=YEM", }, @@ -299,7 +301,7 @@ def test_multiple_progress_storing_tempdir(self, monkeypatch): ( 1, { - "folder": join(expected_dir, "1"), + "folder": expected_dir / "1", "batch": "1234", "progress": "iso3=ZAM", }, @@ -329,7 +331,7 @@ def test_multiple_progress_storing_tempdir(self, monkeypatch): tempfolder, iterators, keys ): assert exists(info["folder"]) is True - assert info["folder"] == join(expected_dir, "1") + assert info["folder"] == expected_dir / "1" assert info["batch"] == start_batch result.append(nextdict) assert result == iterator2[2:] @@ -377,7 +379,7 @@ def test_multiple_progress_storing_tempdir(self, monkeypatch): tempfolder, iterators, keys ): assert exists(info["folder"]) is True - assert info["folder"] == join(expected_dir, "1") + assert info["folder"] == expected_dir / "1" assert info["batch"] == start_batch result.append(nextdict) assert result == iterator2[1:] diff --git a/tests/hdx/utilities/test_retriever.py b/tests/hdx/utilities/test_retriever.py index 58669e0..d69cd65 100755 --- a/tests/hdx/utilities/test_retriever.py +++ b/tests/hdx/utilities/test_retriever.py @@ -3,7 +3,6 @@ import random import string from os import mkdir -from os.path import join from shutil import rmtree import pytest @@ -25,17 +24,16 @@ def useragent(self): @pytest.fixture(scope="class") def retrieverfolder(self, fixturesfolder): - return join(fixturesfolder, self.retrieverfoldername) + return fixturesfolder / self.retrieverfoldername @pytest.fixture(scope="class") def fallback_dir(self, retrieverfolder): - return join(retrieverfolder, "fallbacks") + return retrieverfolder / "fallbacks" @pytest.fixture(scope="function") def dirs(self, tmp_path): - tmp_path = str(tmp_path) - saved_dir = join(tmp_path, "saved") - temp_dir = join(tmp_path, "temp") + saved_dir = tmp_path / "saved" + temp_dir = tmp_path / "temp" rmtree(temp_dir, ignore_errors=True) mkdir(temp_dir) return saved_dir, temp_dir @@ -190,15 +188,15 @@ def test_download_nosave(self, dirs, retrieverfolder, fallback_dir): use_saved=False, ) as retriever: filename = "test.txt" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename path = retriever.download_file( url, filename, logstr="test file", fallback=True ) - assert path == join(temp_dir, filename) + assert path == temp_dir / filename path = retriever.download_file( "NOTEXIST", filename, logstr="test file", fallback=True ) - assert path == join(fallback_dir, filename) + assert path == fallback_dir / filename with pytest.raises(DownloadError): retriever.download_file("NOTEXIST", filename, fallback=False) with pytest.raises(DownloadError): @@ -220,7 +218,7 @@ def test_download_nosave(self, dirs, retrieverfolder, fallback_dir): with pytest.raises(DownloadError): retriever.download_text("NOTEXIST", filename, fallback=False) filename = "test.yaml" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename data = retriever.download_yaml( url, filename, logstr="test file", fallback=False ) @@ -232,7 +230,7 @@ def test_download_nosave(self, dirs, retrieverfolder, fallback_dir): with pytest.raises(DownloadError): retriever.download_yaml("NOTEXIST", filename, fallback=False) filename = "test.json" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename data = retriever.download_json( url, filename, logstr="test file", fallback=False ) @@ -244,7 +242,7 @@ def test_download_nosave(self, dirs, retrieverfolder, fallback_dir): with pytest.raises(DownloadError): retriever.download_json("NOTEXIST", filename, fallback=False) filename = "test.csv" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename headers, iterator = retriever.get_tabular_rows( url, logstr="test file", fallback=False ) @@ -281,7 +279,7 @@ def test_get_tabular_rows_multi_url(self, dirs, retrieverfolder, fallback_dir): use_saved=False, ) as retriever: filename = "test.csv" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename headers, iterator = retriever.get_tabular_rows( [url, url], logstr="test file", fallback=False ) @@ -293,7 +291,7 @@ def test_get_tabular_rows_multi_url(self, dirs, retrieverfolder, fallback_dir): ["gas", "2", "6.5", "'n/a'"], ] filename = "test_hxl.csv" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename headers, iterator = retriever.get_tabular_rows( [url, url], has_hxl=True, @@ -321,15 +319,15 @@ def test_download_save(self, dirs, retrieverfolder, fallback_dir): use_saved=False, ) as retriever: filename = "test.txt" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename path = retriever.download_file( url, filename, logstr="test file", fallback=True ) - assert path == join(saved_dir, filename) + assert path == saved_dir / filename path = retriever.download_file( "NOTEXIST", filename, logstr="test file", fallback=True ) - assert path == join(fallback_dir, filename) + assert path == fallback_dir / filename with pytest.raises(DownloadError): retriever.download_file("NOTEXIST", filename, fallback=False) text = retriever.download_text( @@ -343,7 +341,7 @@ def test_download_save(self, dirs, retrieverfolder, fallback_dir): with pytest.raises(DownloadError): retriever.download_text("NOTEXIST", filename, fallback=False) filename = "test.yaml" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename data = retriever.download_yaml( url, filename, logstr="test file", fallback=False ) @@ -355,7 +353,7 @@ def test_download_save(self, dirs, retrieverfolder, fallback_dir): with pytest.raises(DownloadError): retriever.download_yaml("NOTEXIST", filename, fallback=False) filename = "test.json" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename data = retriever.download_json( url, filename, logstr="test file", fallback=False ) @@ -367,7 +365,7 @@ def test_download_save(self, dirs, retrieverfolder, fallback_dir): with pytest.raises(DownloadError): retriever.download_json("NOTEXIST", filename, fallback=False) filename = "test.csv" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename headers, iterator = retriever.get_tabular_rows( url, logstr="test file", fallback=False ) @@ -391,6 +389,20 @@ def test_download_save(self, dirs, retrieverfolder, fallback_dir): logstr="test file", fallback=False, ) + with Retrieve( + downloader, + str(fallback_dir), + str(saved_dir), + str(temp_dir), + save=True, + use_saved=False, + ) as retriever: + filename = "test.txt" + url = retrieverfolder / filename + path = retriever.download_file( + url, filename, logstr="test file", fallback=True + ) + assert path == saved_dir / filename def test_download_usesaved(self, dirs, retrieverfolder, fallback_dir): _, temp_dir = dirs @@ -405,17 +417,17 @@ def test_download_usesaved(self, dirs, retrieverfolder, fallback_dir): use_saved=True, ) as retriever: filename = "test.txt" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename path = retriever.download_file( url, filename, logstr="test file", fallback=True ) - assert path == join(saved_dir, filename) + assert path == saved_dir / filename path = retriever.download_file( "NOTEXIST", filename, logstr="test file", fallback=True ) - assert path == join(saved_dir, filename) + assert path == saved_dir / filename path = retriever.download_file("NOTEXIST", filename, fallback=False) - assert path == join(saved_dir, filename) + assert path == saved_dir / filename text = retriever.download_text( url, filename, logstr="test file", fallback=False ) @@ -427,7 +439,7 @@ def test_download_usesaved(self, dirs, retrieverfolder, fallback_dir): text = retriever.download_text("NOTEXIST", filename, fallback=False) assert text == "hello" filename = "test.yaml" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename data = retriever.download_yaml( url, filename, logstr="test file", fallback=False ) @@ -439,7 +451,7 @@ def test_download_usesaved(self, dirs, retrieverfolder, fallback_dir): data = retriever.download_yaml("NOTEXIST", filename, fallback=False) assert data["param_1"] == "ABC" filename = "test.json" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename data = retriever.download_json( url, filename, logstr="test file", fallback=False ) @@ -451,7 +463,7 @@ def test_download_usesaved(self, dirs, retrieverfolder, fallback_dir): data = retriever.download_json("NOTEXIST", filename, fallback=False) assert data["my_param"] == "abc" filename = "test.csv" - url = join(retrieverfolder, filename) + url = retrieverfolder / filename headers, iterator = retriever.get_tabular_rows( url, logstr="test file", fallback=False ) diff --git a/tests/hdx/utilities/test_saver.py b/tests/hdx/utilities/test_saver.py index 6b3c72a..f1bf0ce 100755 --- a/tests/hdx/utilities/test_saver.py +++ b/tests/hdx/utilities/test_saver.py @@ -4,7 +4,8 @@ from collections import OrderedDict from copy import deepcopy from os import remove -from os.path import exists, join +from os.path import exists +from pathlib import Path import pytest @@ -98,11 +99,11 @@ class TestLoader: @pytest.fixture(scope="class") def saverfolder(self, fixturesfolder): - return join(fixturesfolder, "saver") + return fixturesfolder / "saver" @pytest.fixture(scope="class") def json_csv_configuration(self, fixturesfolder): - return load_yaml(join(fixturesfolder, "config", "json_csv.yaml")) + return load_yaml(fixturesfolder / "config" / "json_csv.yaml") @pytest.mark.parametrize( "filename,pretty,sortkeys", @@ -114,8 +115,8 @@ def json_csv_configuration(self, fixturesfolder): ], ) def test_save_yaml(self, tmp_path, saverfolder, filename, pretty, sortkeys): - test_path = join(str(tmp_path), filename) - ref_path = join(saverfolder, filename) + test_path = Path(tmp_path, filename) + ref_path = saverfolder / filename save_yaml( TestLoader.yaml_to_write, test_path, @@ -124,7 +125,7 @@ def test_save_yaml(self, tmp_path, saverfolder, filename, pretty, sortkeys): ) assert_files_same(ref_path, test_path) dct = json.loads(json.dumps(TestLoader.yaml_to_write)) - save_yaml(dct, test_path, pretty=pretty, sortkeys=sortkeys) + save_yaml(dct, str(test_path), pretty=pretty, sortkeys=sortkeys) assert_files_same(ref_path, test_path) @pytest.mark.parametrize( @@ -137,11 +138,11 @@ def test_save_yaml(self, tmp_path, saverfolder, filename, pretty, sortkeys): ], ) def test_save_json(self, tmp_path, saverfolder, filename, pretty, sortkeys): - test_path = join(str(tmp_path), filename) - ref_path = join(saverfolder, filename) + test_path = Path(tmp_path, filename) + ref_path = saverfolder / filename save_json( TestLoader.json_to_write, - test_path, + str(test_path), pretty=pretty, sortkeys=sortkeys, ) @@ -157,7 +158,7 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration (1, "2", 3), (4, "5", 6), ) - output_dir = str(tmp_path) + output_dir = tmp_path save_hxlated_output( json_csv_configuration["test1"], @@ -167,9 +168,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration output_dir=output_dir, ) filename = "out.csv" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) filename = "out.json" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) row0 = rows[0] rowsdict = [] @@ -186,9 +187,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration output_dir=output_dir, ) filename = "out.csv" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) filename = "out.json" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) save_hxlated_output( json_csv_configuration["test2"], @@ -198,9 +199,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration output_dir=output_dir, ) filename = "out2.csv" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) filename = "out2.json" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) rowsdict = [] for row in rows[1:]: @@ -213,12 +214,12 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration rowsdict, includes_header=True, includes_hxltags=True, - output_dir=output_dir, + output_dir=str(output_dir), ) filename = "out2.csv" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) filename = "out2.json" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) save_hxlated_output( json_csv_configuration["test3"], @@ -228,9 +229,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration output_dir=output_dir, ) filename = "out3.csv" - assert_files_same(join(saverfolder, "out.csv"), join(output_dir, filename)) + assert_files_same(saverfolder / "out.csv", output_dir / filename) filename = "out3.json" - assert exists(join(output_dir, filename)) is False + assert exists(output_dir / filename) is False save_hxlated_output( json_csv_configuration["test4"], @@ -240,9 +241,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration output_dir=output_dir, ) filename = "out4.csv" - assert exists(join(output_dir, filename)) is False + assert exists(output_dir / filename) is False filename = "out4.json" - assert_files_same(join(saverfolder, "out2.json"), join(output_dir, filename)) + assert_files_same(saverfolder / "out2.json", output_dir / filename) save_hxlated_output( json_csv_configuration["test5"], @@ -252,9 +253,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration output_dir=output_dir, ) filename = "out5.csv" - assert_files_same(join(saverfolder, "out2.csv"), join(output_dir, filename)) + assert_files_same(saverfolder / "out2.csv", output_dir / filename) filename = "out5.json" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) save_hxlated_output( json_csv_configuration["test6"], @@ -265,9 +266,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration today="today!", ) filename = "out6.csv" - assert_files_same(join(saverfolder, "out2.csv"), join(output_dir, filename)) + assert_files_same(saverfolder / "out2.csv", output_dir / filename) filename = "out6.json" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) save_hxlated_output( json_csv_configuration["test7"], @@ -278,9 +279,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration today="today!", ) filename = "out7.csv" - assert_files_same(join(saverfolder, "out2.csv"), join(output_dir, filename)) + assert_files_same(saverfolder / "out2.csv", output_dir / filename) filename = "out7.json" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) save_hxlated_output( json_csv_configuration["test8"], @@ -291,9 +292,9 @@ def test_save_hxlated_output(self, tmp_path, saverfolder, json_csv_configuration today="today!", ) filename = "out8.csv" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) filename = "out8.json" - assert_files_same(join(saverfolder, filename), join(output_dir, filename)) + assert_files_same(saverfolder / filename, output_dir / filename) def test_save_iterable(self): list_of_tuples = [(1, 2, 3, "a"), (4, 5, 6, "b"), (7, 8, 9, "c")] @@ -310,7 +311,7 @@ def test_save_iterable(self): delete_on_failure=False, ) as tempdir: filename = "test_save_iterable_to_csv.csv" - filepath = join(tempdir, filename) + filepath = tempdir / filename rows = save_iterable( filepath, list_of_lists, headers=["h1", "h2", "h3", "h4"] ) @@ -343,7 +344,7 @@ def test_save_iterable(self): ["9", "8", "7", "c"], ] - save_iterable(filepath, list_of_dicts, columns=["h2", "h3", "h1"]) + save_iterable(str(filepath), list_of_dicts, columns=["h2", "h3", "h1"]) newll = read_list_from_csv(filepath) remove(filepath) assert newll == [ @@ -353,7 +354,7 @@ def test_save_iterable(self): ["8", "9", "7"], ] - xlfilepath = filepath.replace("csv", "xlsx") + xlfilepath = filepath.with_suffix(".xlsx") rows = save_iterable( xlfilepath, list_of_lists, @@ -506,7 +507,7 @@ def row_func(row): headers=["h1", "h2", "h3", "h4"], row_function=row_func, ) - newll = read_list_from_csv(filepath) + newll = read_list_from_csv(str(filepath)) remove(filepath) assert newll == [ ["h1", "h2", "h3", "h4"], diff --git a/tests/hdx/utilities/test_state.py b/tests/hdx/utilities/test_state.py index 60a5638..c677346 100755 --- a/tests/hdx/utilities/test_state.py +++ b/tests/hdx/utilities/test_state.py @@ -1,7 +1,6 @@ """State Utility Tests""" from datetime import datetime, timezone -from os.path import join from shutil import copyfile import pytest @@ -13,7 +12,7 @@ class TestState: @pytest.fixture(scope="class") def statefolder(self, fixturesfolder): - return join(fixturesfolder, "state") + return fixturesfolder / "state" @pytest.fixture(scope="class") def statefile(self): @@ -32,8 +31,8 @@ def date2(self): return datetime(2022, 5, 12, 10, 15, tzinfo=timezone.utc) def test_state(self, tmp_path, statefolder, statefile, date1, date2): - statepath = join(tmp_path, statefile) - copyfile(join(statefolder, statefile), statepath) + statepath = tmp_path / statefile + copyfile(statefolder / statefile, statepath) with State(statepath, parse_date, iso_string_from_datetime) as state: assert state.get() == date1 with State(statepath, parse_date, iso_string_from_datetime) as state: @@ -45,8 +44,8 @@ def test_state(self, tmp_path, statefolder, statefile, date1, date2): def test_multi_date_state( self, tmp_path, statefolder, multidatestatefile, date1, date2 ): - statepath = join(tmp_path, multidatestatefile) - copyfile(join(statefolder, multidatestatefile), statepath) + statepath = tmp_path / multidatestatefile + copyfile(statefolder / multidatestatefile, statepath) with State( statepath, State.dates_str_to_country_date_dict, diff --git a/tests/hdx/utilities/test_useragent.py b/tests/hdx/utilities/test_useragent.py index c3706c0..bbd057c 100755 --- a/tests/hdx/utilities/test_useragent.py +++ b/tests/hdx/utilities/test_useragent.py @@ -1,7 +1,5 @@ """User Agent Tests""" -from os.path import join - import pytest from hdx.utilities import __version__ @@ -12,23 +10,23 @@ class TestUserAgent: @pytest.fixture(scope="class") def user_agent_config_yaml(self, configfolder): - return join(configfolder, "user_agent_config.yaml") + return configfolder / "user_agent_config.yaml" @pytest.fixture(scope="class") def user_agent_config2_yaml(self, configfolder): - return join(configfolder, "user_agent_config2.yaml") + return configfolder / "user_agent_config2.yaml" @pytest.fixture(scope="class") def user_agent_config3_yaml(self, configfolder): - return join(configfolder, "user_agent_config3.yaml") + return configfolder / "user_agent_config3.yaml" @pytest.fixture(scope="class") def empty_yaml(self, configfolder): - return join(configfolder, "empty.yaml") + return configfolder / "empty.yaml" @pytest.fixture(scope="class") def user_agent_config_wrong_yaml(self, configfolder): - return join(configfolder, "user_agent_config_wrong.yaml") + return configfolder / "user_agent_config_wrong.yaml" def test_user_agent( self, diff --git a/tests/hdx/utilities/test_zip_crc.py b/tests/hdx/utilities/test_zip_crc.py index cc2c3f0..c6dbd19 100644 --- a/tests/hdx/utilities/test_zip_crc.py +++ b/tests/hdx/utilities/test_zip_crc.py @@ -1,5 +1,3 @@ -from os.path import join - import pytest from hdx.utilities.zip_crc import ( @@ -26,19 +24,19 @@ class TestZipCRC: @pytest.fixture def zipfolder(self, fixturesfolder): - return join(fixturesfolder, "file_hashing") + return fixturesfolder / "file_hashing" @pytest.fixture def shpfile(self, zipfolder): - return join(zipfolder, "test_shapefile.zip") + return zipfolder / "test_shapefile.zip" @pytest.fixture def xlsxfile(self, zipfolder): - return join(zipfolder, "test.xlsx") + return zipfolder / "test.xlsx" @pytest.fixture def emptyfile(self, zipfolder): - return join(zipfolder, "empty.zip") + return zipfolder / "empty.zip" def test_get_zip_tail_header(self): assert get_zip_tail_header(65535) == {"Range": "bytes=0-"}