Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion documentation/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ The code for the library is [here](https://github.com/OCHA-DAP/hdx-python-utilit
The library has detailed API documentation which can be found in the menu at the top.

## Breaking Changes
From 4.0.0, Python 3.10 or later is required
From 4.0.0, Python 3.10 or later is required

From 3.8.0, multiple_replace, match_template_variables, earliest_index,
get_matching_text_in_strs, get_matching_text,
Expand Down
3 changes: 2 additions & 1 deletion src/hdx/utilities/base_downloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC, abstractmethod
from collections.abc import Iterator, Sequence
from pathlib import Path
from typing import Any


Expand Down Expand Up @@ -32,7 +33,7 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
"""

@abstractmethod
def download_file(self, url: str, *args: Any, **kwargs: Any) -> str:
def download_file(self, url: str, *args: Any, **kwargs: Any) -> Path:
"""Download file from url.

Args:
Expand Down
9 changes: 7 additions & 2 deletions src/hdx/utilities/compare.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""File compare utilities."""

from os import linesep
from pathlib import Path

try:
from cydifflib import ndiff
except ImportError:
from difflib import ndiff


def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]:
def compare_files(
path1: Path | str, path2: Path | str, encoding: str = "utf-8"
) -> list[str]:
"""Returns the delta between two files using -, ?, + format excluding lines
that are the same.

Expand All @@ -26,7 +29,9 @@ def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]:
return [x for x in diff if x[0] in ["-", "+", "?"]]


def assert_files_same(path1: str, path2: str, encoding: str = "utf-8") -> None:
def assert_files_same(
path1: Path | str, path2: Path | str, encoding: str = "utf-8"
) -> None:
"""Asserts that two files are the same and returns delta using.

-, ?, + format if not
Expand Down
3 changes: 2 additions & 1 deletion src/hdx/utilities/dictandlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import itertools
import warnings
from collections.abc import Callable, Mapping, MutableMapping, Sequence
from pathlib import Path
from typing import Any

from hdx.utilities.frictionless_wrapper import get_frictionless_tableresource
Expand Down Expand Up @@ -410,7 +411,7 @@ def read_list_from_csv(


def write_list_to_csv(
filepath: str,
filepath: Path | str,
rows: list[Sequence | Mapping],
headers: int | Sequence[str] | None = None,
columns: Sequence[int] | Sequence[str] | None = None,
Expand Down
49 changes: 26 additions & 23 deletions src/hdx/utilities/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from collections.abc import Callable, Iterator, Sequence
from copy import deepcopy
from os import remove
from os.path import exists, isfile, join, split, splitext
from os.path import exists, isfile, split, splitext
from pathlib import Path
from typing import Any
from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit
Expand Down Expand Up @@ -133,12 +133,12 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
@staticmethod
def get_path_for_url(
url: str,
folder: str | None = None,
folder: Path | str | None = None,
filename: str | None = None,
path: str | None = None,
path: Path | str | None = None,
overwrite: bool = False,
keep: bool = False,
) -> str:
) -> Path:
"""Get filename from url and join to provided folder or temporary
folder if no folder supplied, ensuring uniqueness.

Expand All @@ -164,7 +164,8 @@ def get_path_for_url(
filename, extension = splitext(filename)
if not folder:
folder = get_temp_dir()
path = join(folder, f"{filename}{extension}")
folder = Path(folder)
path = folder / f"{filename}{extension}"
if overwrite:
try:
remove(path)
Expand All @@ -174,7 +175,7 @@ def get_path_for_url(
count = 0
while exists(path):
count += 1
path = join(folder, f"{filename}{count}{extension}")
path = folder / f"{filename}{count}{extension}"
return path

def get_full_url(self, url: str) -> str:
Expand Down Expand Up @@ -254,7 +255,7 @@ def hxl_row(

def normal_setup(
self,
url: str,
url: Path | str,
stream: bool = True,
post: bool = False,
parameters: dict | None = None,
Expand All @@ -281,6 +282,7 @@ def normal_setup(
self.close_response()
self.response = None
try:
url = str(url)
spliturl = urlsplit(url)
if not spliturl.scheme:
if isfile(url):
Expand Down Expand Up @@ -336,7 +338,7 @@ def set_bearer_token(self, bearer_token: str) -> None:
}
)

def hash_stream(self, url: str) -> str:
def hash_stream(self, url: Path | str) -> str:
"""Stream file from url and hash it using MD5. Must call setup method
first.

Expand All @@ -357,7 +359,7 @@ def hash_stream(self, url: str) -> str:
f"Download of {url} failed in retrieval of stream!" % url
)

def stream_path(self, path: str, errormsg: str):
def stream_path(self, path: Path | str, errormsg: str) -> Path:
"""Stream file from url and store in provided path. Must call setup
method first.

Expand All @@ -370,12 +372,13 @@ def stream_path(self, path: str, errormsg: str):
"""
f = None
try:
f = open(path, "wb")
path = Path(path)
f = path.open("wb")
for chunk in self.response.iter_content(chunk_size=10240):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
return f.name
return path
except Exception as e:
raise DownloadError(errormsg) from e
finally:
Expand All @@ -384,10 +387,10 @@ def stream_path(self, path: str, errormsg: str):

def stream_file(
self,
url: str,
folder: str | None = None,
url: Path | str,
folder: Path | str | None = None,
filename: str | None = None,
path: str | None = None,
path: Path | str | None = None,
overwrite: bool = False,
keep: bool = False,
) -> str:
Expand All @@ -414,9 +417,9 @@ def stream_file(

def download_file(
self,
url: str,
url: Path | str,
**kwargs: Any,
) -> str:
) -> Path:
"""Download file from url and store in provided folder or temporary
folder if no folder supplied.

Expand Down Expand Up @@ -460,7 +463,7 @@ def download_file(
path, f"Download of {url} failed in retrieval of stream!"
)

def download(self, url: str, **kwargs: Any) -> requests.Response:
def download(self, url: Path | str, **kwargs: Any) -> requests.Response:
"""Download url.

Args:
Expand Down Expand Up @@ -539,7 +542,7 @@ def get_json(self) -> Any:
"""
return self.response.json()

def download_text(self, url: str, **kwargs: Any) -> str:
def download_text(self, url: Path | str, **kwargs: Any) -> str:
"""Download url as text.

Args:
Expand All @@ -557,7 +560,7 @@ def download_text(self, url: str, **kwargs: Any) -> str:
self.download(url, **kwargs)
return self.get_text()

def download_yaml(self, url: str, **kwargs: Any) -> Any:
def download_yaml(self, url: Path | str, **kwargs: Any) -> Any:
"""Download url as YAML.

Args:
Expand All @@ -575,7 +578,7 @@ def download_yaml(self, url: str, **kwargs: Any) -> Any:
self.download(url, **kwargs)
return self.get_yaml()

def download_json(self, url: str, **kwargs: Any) -> Any:
def download_json(self, url: Path | str, **kwargs: Any) -> Any:
"""Download url as JSON.

Args:
Expand All @@ -595,7 +598,7 @@ def download_json(self, url: str, **kwargs: Any) -> Any:

def get_frictionless_tableresource(
self,
url: str,
url: Path | str,
ignore_blank_rows: bool = True,
infer_types: bool = False,
**kwargs: Any,
Expand Down Expand Up @@ -641,7 +644,7 @@ def get_frictionless_tableresource(

def _get_tabular_rows(
self,
url: str,
url: Path | str,
headers: int | Sequence[int] | Sequence[str] = 1,
dict_form: bool = False,
include_headers: bool = False,
Expand Down Expand Up @@ -704,7 +707,7 @@ def _get_tabular_rows(
xlsx2csv = kwargs.pop("xlsx2csv", False)
if xlsx2csv:
path = self.download_file(url)
outpath = path.replace(".xlsx", ".csv")
outpath = path.with_suffix(".csv")
sheet = kwargs.pop("sheet", 1)
if isinstance(sheet, int):
sheet_args = {"sheetid": sheet}
Expand Down
3 changes: 2 additions & 1 deletion src/hdx/utilities/file_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import zipfile
from io import BytesIO, IOBase
from os import fstat
from pathlib import Path

from openpyxl import load_workbook
from openpyxl.utils.exceptions import InvalidFileException
Expand Down Expand Up @@ -90,7 +91,7 @@ def crc_zip_fp(fp: IOBase) -> str:
return get_crc_sum(file_crcs)


def get_size_and_hash(filepath: str, file_format: str) -> tuple[int, str]:
def get_size_and_hash(filepath: Path | str, file_format: str) -> tuple[int, str]:
"""Return the size and hash of file

Args:
Expand Down
2 changes: 1 addition & 1 deletion src/hdx/utilities/frictionless_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def get_frictionless_tableresource(
http_session = kwargs.pop("http_session", session)
with system.use_context(http_session=http_session):
if url:
resource = TableResource(path=url, **kwargs)
resource = TableResource(path=str(url), **kwargs)
else:
resource = TableResource(data=data, **kwargs)
resource.open()
Expand Down
21 changes: 11 additions & 10 deletions src/hdx/utilities/loader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""Loading utilities for YAML, JSON etc."""

import json
from collections.abc import Mapping, Sequence
from collections.abc import Mapping, MutableMapping, Sequence
from pathlib import Path
from typing import Any
from warnings import warn

Expand All @@ -15,7 +16,7 @@ class LoadError(Exception):


def load_text(
path: str,
path: Path | str,
encoding: str = "utf-8",
strip: bool = False,
replace_newlines: str | None = None,
Expand Down Expand Up @@ -59,7 +60,7 @@ def load_text(


def load_yaml(
path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True
path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True
) -> Any:
"""Load YAML file into an ordered dictionary.

Expand All @@ -83,7 +84,7 @@ def load_yaml(


def load_json(
path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True
path: Path | str, encoding: str = "utf-8", loaderror_if_empty: bool = True
) -> Any:
"""Load JSON file into an ordered dictionary (dict for Python 3.7+)

Expand Down Expand Up @@ -152,11 +153,11 @@ def load_and_merge_json(


def load_yaml_into_existing_dict(
data: dict,
path: str,
data: MutableMapping,
path: Path | str,
encoding: str = "utf-8",
loaderror_if_empty: bool = True,
) -> Mapping:
) -> MutableMapping:
"""Merge YAML file that is in dictionary form into existing dictionary.

Args:
Expand All @@ -173,11 +174,11 @@ def load_yaml_into_existing_dict(


def load_json_into_existing_dict(
data: dict,
path: str,
data: MutableMapping,
path: Path | str,
encoding: str = "utf-8",
loaderror_if_empty: bool = True,
) -> Mapping:
) -> MutableMapping:
"""Merge JSON file that is in dictionary form into existing dictionary.

Args:
Expand Down
Loading
Loading