From 7684192ce3dcba1050718859685a4cbfed8a6642 Mon Sep 17 00:00:00 2001 From: mcarans Date: Mon, 12 Jan 2026 16:07:45 +1300 Subject: [PATCH 1/4] Modernise all type hints --- .pre-commit-config.yaml | 14 +- hatch.toml | 2 +- pyproject.toml | 2 +- requirements.txt | 140 ++------------ ruff.toml | 8 +- src/hdx/utilities/base_downloader.py | 17 +- src/hdx/utilities/compare.py | 3 +- src/hdx/utilities/dateparse.py | 23 +-- src/hdx/utilities/dictandlist.py | 63 ++++--- src/hdx/utilities/downloader.py | 218 +++++++++++----------- src/hdx/utilities/easy_logging.py | 3 +- src/hdx/utilities/email.py | 26 +-- src/hdx/utilities/encoding.py | 3 +- src/hdx/utilities/error_handler.py | 12 +- src/hdx/utilities/file_hashing.py | 3 +- src/hdx/utilities/frictionless_wrapper.py | 20 +- src/hdx/utilities/html.py | 10 +- src/hdx/utilities/loader.py | 32 ++-- src/hdx/utilities/matching.py | 39 ++-- src/hdx/utilities/path.py | 63 +++---- src/hdx/utilities/retriever.py | 49 ++--- src/hdx/utilities/saver.py | 29 +-- src/hdx/utilities/session.py | 12 +- src/hdx/utilities/state.py | 7 +- src/hdx/utilities/text.py | 12 +- src/hdx/utilities/typehint.py | 7 - src/hdx/utilities/useragent.py | 24 +-- src/hdx/utilities/zip_crc.py | 15 +- 28 files changed, 367 insertions(+), 489 deletions(-) delete mode 100644 src/hdx/utilities/typehint.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 81b2a53..536844a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_language_version: python: python3.13 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: trailing-whitespace exclude: (test_loader.py|pretty-false_sortkeys-false.yaml|pretty-false_sortkeys-true.yaml) @@ -10,7 +10,7 @@ repos: exclude: (test_csv_processing_blanks.csv|test.txt) - id: check-ast - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.0 + rev: v0.14.10 hooks: # Run the linter. - id: ruff-check @@ -18,10 +18,16 @@ repos: # Run the formatter. - id: ruff-format - repo: https://github.com/astral-sh/uv-pre-commit - rev: 0.7.14 + rev: 0.9.22 hooks: # Run the pip compile - id: pip-compile name: pip-compile requirements.txt files: pyproject.toml - args: [ pyproject.toml, --resolver=backtracking, --all-extras, --upgrade, -q, -o, requirements.txt ] + args: [ pyproject.toml, --resolver=backtracking, --upgrade, -q, + -o, requirements.txt ] + - id: pip-compile + name: pip-compile requirements-test.txt + files: pyproject.toml + args: [ pyproject.toml, --resolver=backtracking, --upgrade, -q, + --extra, test, -c, requirements.txt, -o, requirements-test.txt ] diff --git a/hatch.toml b/hatch.toml index e8157b2..914cace 100644 --- a/hatch.toml +++ b/hatch.toml @@ -31,4 +31,4 @@ run = """ [envs.hatch-static-analysis] config-path = "none" -dependencies = ["ruff==0.9.10"] +dependencies = ["ruff==0.14.10"] diff --git a/pyproject.toml b/pyproject.toml index efe8149..47463f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ classifiers = [ "Operating System :: MacOS", "Operating System :: Microsoft :: Windows", ] -requires-python = ">=3.8" +requires-python = ">=3.10" # Extras for frictionless[excel,json] added explicitly # for conda-forge compatibility dependencies = [ diff --git a/requirements.txt b/requirements.txt index 7dab205..6b79234 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,181 +1,75 @@ # This file was autogenerated by uv via the following command: -# uv pip compile pyproject.toml --resolver=backtracking --all-extras -o requirements.txt +# uv pip compile pyproject.toml --resolver=backtracking -o requirements.txt annotated-types==0.7.0 # via pydantic -astdoc==1.3.2 - # via mkapi attrs==25.4.0 # via # frictionless # jsonlines # jsonschema # referencing -babel==2.17.0 - # via mkdocs-material -backrefs==6.1 - # via mkdocs-material -beautifulsoup4==4.14.3 - # via hdx-python-utilities (pyproject.toml) certifi==2026.1.4 # via requests -cfgv==3.5.0 - # via pre-commit chardet==5.2.0 # via frictionless charset-normalizer==3.4.4 # via requests click==8.3.1 - # via - # mkdocs - # typer -colorama==0.4.6 - # via mkdocs-material -coverage==7.13.1 - # via pytest-cov -cydifflib==1.2.0 - # via hdx-python-utilities (pyproject.toml) -distlib==0.4.0 - # via virtualenv -dnspython==2.8.0 - # via email-validator -email-validator==2.3.0 - # via hdx-python-utilities (pyproject.toml) + # via typer et-xmlfile==2.0.0 # via openpyxl -filelock==3.20.2 - # via virtualenv frictionless==5.18.1 # via hdx-python-utilities (pyproject.toml) -ghp-import==2.1.0 - # via mkdocs -html5lib==1.1 - # via hdx-python-utilities (pyproject.toml) humanize==4.15.0 # via frictionless -identify==2.6.15 - # via pre-commit idna==3.11 - # via - # email-validator - # requests + # via requests ijson==3.4.0.post0 # via hdx-python-utilities (pyproject.toml) -iniconfig==2.3.0 - # via pytest isodate==0.7.2 # via frictionless jinja2==3.1.6 - # via - # frictionless - # mkapi - # mkdocs - # mkdocs-material + # via frictionless jsonlines==4.0.0 # via hdx-python-utilities (pyproject.toml) -jsonschema==4.25.1 +jsonschema==4.26.0 # via # frictionless # tableschema-to-template jsonschema-specifications==2025.9.1 # via jsonschema loguru==0.7.3 - # via - # hdx-python-utilities (pyproject.toml) - # pytest-loguru -markdown==3.10 - # via - # mkdocs - # mkdocs-material - # pymdown-extensions + # via hdx-python-utilities (pyproject.toml) markdown-it-py==4.0.0 # via rich marko==2.2.2 # via frictionless markupsafe==3.0.3 - # via - # jinja2 - # mkdocs + # via jinja2 mdurl==0.1.2 # via markdown-it-py -mergedeep==1.3.4 - # via - # mkdocs - # mkdocs-get-deps -mkapi==4.5.0 - # via hdx-python-utilities (pyproject.toml) -mkdocs==1.6.1 - # via - # mkapi - # mkdocs-material -mkdocs-get-deps==0.2.0 - # via mkdocs -mkdocs-material==9.7.1 - # via mkapi -mkdocs-material-extensions==1.3.1 - # via mkdocs-material -nodeenv==1.10.0 - # via pre-commit openpyxl==3.1.5 # via hdx-python-utilities (pyproject.toml) -packaging==25.0 - # via - # mkdocs - # pytest -paginate==0.5.7 - # via mkdocs-material -pathspec==0.12.1 - # via mkdocs petl==1.7.17 # via frictionless -platformdirs==4.5.1 - # via - # mkdocs-get-deps - # virtualenv -pluggy==1.6.0 - # via - # pytest - # pytest-cov -pre-commit==4.5.1 - # via hdx-python-utilities (pyproject.toml) pydantic==2.12.5 # via frictionless pydantic-core==2.41.5 # via pydantic pygments==2.19.2 - # via - # mkdocs-material - # pytest - # rich -pymdown-extensions==10.20 - # via mkdocs-material + # via rich pyphonetics==0.5.3 # via hdx-python-utilities (pyproject.toml) -pytest==9.0.2 - # via - # hdx-python-utilities (pyproject.toml) - # pytest-cov -pytest-cov==7.0.0 - # via hdx-python-utilities (pyproject.toml) -pytest-loguru==0.4.0 - # via hdx-python-utilities (pyproject.toml) python-dateutil==2.9.0.post0 # via # hdx-python-utilities (pyproject.toml) # frictionless - # ghp-import python-slugify==8.0.4 # via frictionless pyyaml==6.0.3 # via # frictionless - # mkdocs - # mkdocs-get-deps - # pre-commit - # pymdown-extensions - # pyyaml-env-tag # tableschema-to-template -pyyaml-env-tag==1.1 - # via mkdocs ratelimit==2.2.1 # via hdx-python-utilities (pyproject.toml) referencing==0.37.0 @@ -185,7 +79,6 @@ referencing==0.37.0 requests==2.32.5 # via # frictionless - # mkdocs-material # requests-file requests-file==3.0.1 # via hdx-python-utilities (pyproject.toml) @@ -204,22 +97,17 @@ shellingham==1.5.4 simpleeval==1.0.3 # via frictionless six==1.17.0 - # via - # html5lib - # python-dateutil -soupsieve==2.8.1 - # via beautifulsoup4 + # via python-dateutil tableschema-to-template==0.0.13 # via hdx-python-utilities (pyproject.toml) tabulate==0.9.0 # via frictionless text-unidecode==1.3 # via python-slugify -typer==0.21.0 +typer==0.21.1 # via frictionless typing-extensions==4.15.0 # via - # beautifulsoup4 # frictionless # pydantic # pydantic-core @@ -229,16 +117,10 @@ typing-inspection==0.4.2 # via pydantic unidecode==1.4.0 # via pyphonetics -urllib3==2.6.2 +urllib3==2.6.3 # via requests validators==0.35.0 # via frictionless -virtualenv==20.35.4 - # via pre-commit -watchdog==6.0.0 - # via mkdocs -webencodings==0.5.1 - # via html5lib xlrd==2.0.2 # via hdx-python-utilities (pyproject.toml) xlsx2csv==0.8.4 diff --git a/ruff.toml b/ruff.toml index cf4db09..c9c666e 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,11 +1,11 @@ +target-version = "py310" exclude = ["_version.py"] [lint] # List of rules: https://docs.astral.sh/ruff/rules/ -select = [ - "E", # pycodestyle - default - "F", # pyflakes - default - "I" # isort +extend-select = [ + "I", # isort + "UP" # Upgrade Python ] ignore = [ "E501" # Line too long diff --git a/src/hdx/utilities/base_downloader.py b/src/hdx/utilities/base_downloader.py index e230125..57f4c8f 100644 --- a/src/hdx/utilities/base_downloader.py +++ b/src/hdx/utilities/base_downloader.py @@ -1,7 +1,8 @@ from abc import ABC, abstractmethod -from typing import Any, Iterator, List, Tuple, Union +from collections.abc import Iterator +from typing import Any, Sequence + -from hdx.utilities.typehint import ListDict, ListTuple class DownloadError(Exception): @@ -87,13 +88,13 @@ def download_json(self, url: str, *args: Any, **kwargs: Any) -> Any: @abstractmethod def get_tabular_rows( self, - url: Union[str, ListTuple[str]], + url: str | Sequence[str], has_hxl: bool = False, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, dict_form: bool = False, *args: Any, **kwargs: Any, - ) -> Tuple[List[str], Iterator[ListDict]]: + ) -> tuple[list[str], Iterator[list | dict]]: """Returns header of tabular file pointed to by url and an iterator where each row is returned as a list or dictionary depending on the dict_rows argument. @@ -107,13 +108,13 @@ def get_tabular_rows( or a list, defaulting to a list. Args: - url (Union[str, ListTuple[str]]): A single or list of URLs or paths to read from + url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. dict_form (bool): Return dict or list for each row. Defaults to False (list) *args (Any): Positional arguments **kwargs (Any): Keyword arguments Returns: - Tuple[List[str],Iterator[ListDict]]: Tuple (headers, iterator where each row is a list or dictionary) + Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) """ diff --git a/src/hdx/utilities/compare.py b/src/hdx/utilities/compare.py index 2b5f759..4e75b39 100755 --- a/src/hdx/utilities/compare.py +++ b/src/hdx/utilities/compare.py @@ -1,7 +1,6 @@ """File compare utilities.""" from os import linesep -from typing import List try: from cydifflib import ndiff @@ -9,7 +8,7 @@ from difflib import ndiff -def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> List[str]: +def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]: """Returns the delta between two files using -, ?, + format excluding lines that are the same. diff --git a/src/hdx/utilities/dateparse.py b/src/hdx/utilities/dateparse.py index 57a483e..1f4e5b4 100755 --- a/src/hdx/utilities/dateparse.py +++ b/src/hdx/utilities/dateparse.py @@ -5,7 +5,6 @@ from calendar import monthrange from datetime import datetime, timedelta, timezone from io import StringIO -from typing import Dict, Optional, Tuple import dateutil from dateutil.parser import ParserError, parserinfo @@ -94,7 +93,7 @@ -9.5 MART MIT""" -def get_tzinfos(timezone_info: str) -> Dict[str, int]: +def get_tzinfos(timezone_info: str) -> dict[str, int]: """Get tzinfos dictionary used by dateutil from timezone information string. @@ -117,7 +116,7 @@ def get_tzinfos(timezone_info: str) -> Dict[str, int]: # Ugly copy and paste of the whole _timelex class from dateutil to prevent # deprecation warnings -class _timelex(object): +class _timelex: # Fractional seconds are sometimes split by a comma _split_decimal = re.compile("([.,])") @@ -129,9 +128,7 @@ def __init__(self, instream): instream = StringIO(instream) elif getattr(instream, "read", None) is None: raise TypeError( - "Parser must be a string or character stream, not {itype}".format( - itype=instream.__class__.__name__ - ) + f"Parser must be a string or character stream, not {instream.__class__.__name__}" ) self.instream = instream @@ -823,15 +820,15 @@ def now_utc_notz() -> datetime: def parse_date_range( string: str, - date_format: Optional[str] = None, + date_format: str | None = None, timezone_handling: int = 0, - fuzzy: Optional[Dict] = None, + fuzzy: dict | None = None, include_microseconds: bool = False, zero_time: bool = False, max_starttime: bool = False, max_endtime: bool = False, - default_timezones: Optional[str] = None, -) -> Tuple[datetime, datetime]: + default_timezones: str | None = None, +) -> tuple[datetime, datetime]: """Parse date from string using specified date_format if given and return datetime date range in dictionary keys startdate and enddate. If no date_format is supplied, the function will guess, which for unambiguous @@ -993,13 +990,13 @@ def parse_date_range( def parse_date( string: str, - date_format: Optional[str] = None, + date_format: str | None = None, timezone_handling: int = 0, - fuzzy: Optional[Dict] = None, + fuzzy: dict | None = None, include_microseconds: bool = False, zero_time: bool = False, max_time: bool = False, - default_timezones: Optional[str] = None, + default_timezones: str | None = None, ) -> datetime: """Parse date from string using specified date_format and return a datetime object. Raises exception for dates that are missing year, month or day. If diff --git a/src/hdx/utilities/dictandlist.py b/src/hdx/utilities/dictandlist.py index c6f4292..08a5caf 100755 --- a/src/hdx/utilities/dictandlist.py +++ b/src/hdx/utilities/dictandlist.py @@ -2,13 +2,14 @@ import itertools import warnings -from typing import Any, Callable, Dict, List, MutableMapping, Optional, Union +from collections.abc import Callable, MutableMapping +from typing import Any, Sequence, Mapping from hdx.utilities.frictionless_wrapper import get_frictionless_tableresource -from hdx.utilities.typehint import ListDict, ListTuple, ListTupleDict -def invert_dictionary(d: MutableMapping) -> Dict: + +def invert_dictionary(d: MutableMapping) -> dict: """Invert a dictionary from key - value to value - key. Assumes one to one mapping between keys and values. @@ -77,13 +78,13 @@ def merge_two_dictionaries( def merge_dictionaries( - dicts: ListTuple[MutableMapping], merge_lists: bool = False + dicts: Sequence[MutableMapping], merge_lists: bool = False ) -> MutableMapping: """Merges all dictionaries in dicts into a single dictionary and returns result. Args: - dicts (ListTuple[MutableMapping]): Dictionaries to merge into the first one in the list + dicts (Sequence[MutableMapping]): Dictionaries to merge into the first one in the list merge_lists (bool): Whether to merge lists (True) or replace lists (False). Default is False. Returns: @@ -97,7 +98,7 @@ def merge_dictionaries( def dict_diff( d1: MutableMapping, d2: MutableMapping, no_key: str = "" -) -> Dict: +) -> dict: """Compares two dictionaries. Args: @@ -169,14 +170,14 @@ def dict_of_dicts_add( def list_distribute_contents_simple( - input_list: ListTuple, function: Callable[[Any], Any] = lambda x: x -) -> List: + input_list: Sequence, function: Callable[[Any], Any] = lambda x: x +) -> list: """Distribute the contents of a list eg. [1, 1, 1, 2, 2, 3] -> [1, 2, 3, 1, 2, 1]. List can contain complex types like dictionaries in which case the function can return the appropriate value eg. lambda x: x[KEY] Args: - input_list (ListTuple): List to distribute values + input_list (Sequence): List to distribute values function (Callable[[Any], Any]): Return value to use for distributing. Defaults to lambda x: x. Returns: @@ -202,14 +203,14 @@ def list_distribute_contents_simple( def list_distribute_contents( - input_list: ListTuple, function: Callable[[Any], Any] = lambda x: x -) -> List: + input_list: Sequence, function: Callable[[Any], Any] = lambda x: x +) -> list: """Distribute the contents of a list eg. [1, 1, 1, 2, 2, 3] -> [1, 2, 1, 2, 1, 3]. List can contain complex types like dictionaries in which case the function can return the appropriate value eg. lambda x: x[KEY] Args: - input_list (ListTuple): List to distribute values + input_list (Sequence): List to distribute values function (Callable[[Any], Any]): Return value to use for distributing. Defaults to lambda x: x. Returns: @@ -247,12 +248,12 @@ def grouper(n, iterable, fillvalue=None): return riffle_shuffle(intermediate_list) -def extract_list_from_list_of_dict(list_of_dict: ListTuple[Dict], key: Any) -> List: +def extract_list_from_list_of_dict(list_of_dict: Sequence[dict], key: Any) -> list: """Extract a list by looking up key in each member of a list of dictionaries. Args: - list_of_dict (ListTuple[Dict]): List of dictionaries + list_of_dict (Sequence[Dict]): List of dictionaries key (Any): Key to find in each dictionary Returns: @@ -271,7 +272,7 @@ def key_value_convert( dropfailedkeys: bool = False, dropfailedvalues: bool = False, exception: Exception = ValueError, -) -> Dict: +) -> dict: """Convert keys and/or values of dictionary using functions passed in as parameters. @@ -305,7 +306,7 @@ def key_value_convert( return dictout -def integer_key_convert(dictin: MutableMapping, dropfailedkeys: bool = False) -> Dict: +def integer_key_convert(dictin: MutableMapping, dropfailedkeys: bool = False) -> dict: """Convert keys of dictionary to integers. Args: @@ -320,7 +321,7 @@ def integer_key_convert(dictin: MutableMapping, dropfailedkeys: bool = False) -> def integer_value_convert( dictin: MutableMapping, dropfailedvalues: bool = False -) -> Dict: +) -> dict: """Convert values of dictionary to integers. Args: @@ -333,7 +334,7 @@ def integer_value_convert( return key_value_convert(dictin, valuefn=int, dropfailedvalues=dropfailedvalues) -def float_value_convert(dictin: MutableMapping, dropfailedvalues: bool = False) -> Dict: +def float_value_convert(dictin: MutableMapping, dropfailedvalues: bool = False) -> dict: """Convert values of dictionary to floats. Args: @@ -348,7 +349,7 @@ def float_value_convert(dictin: MutableMapping, dropfailedvalues: bool = False) def avg_dicts( dictin1: MutableMapping, dictin2: MutableMapping, dropmissing: bool = True -) -> Dict: +) -> dict: """Create a new dictionary from two dictionaries by averaging values. Args: @@ -374,10 +375,10 @@ def avg_dicts( def read_list_from_csv( url: str, - headers: Union[int, ListTuple[int], ListTuple[str], None] = None, + headers: int | Sequence[int] | Sequence[str] | None = None, dict_form: bool = False, **kwargs: Any, -) -> List[ListDict]: +) -> list[list | dict]: """Read a list of rows in dict or list form from a csv. The headers argument is either a row number or list of row numbers (in case of multi- line headers) to be considered as headers (rows start counting at 1), or @@ -386,12 +387,12 @@ def read_list_from_csv( Args: url (str): URL or path to read from - headers (Union[int, ListTuple[int], ListTuple[str], None]): Row number of headers. Defaults to None. + headers (Union[int, Sequence[int], Sequence[str], None]): Row number of headers. Defaults to None. dict_form (bool): Return dict (requires headers parameter) or list for each row. Defaults to False (list) **kwargs: Other arguments to pass to Tabulator Stream Returns: - List[ListDict]: List of rows in dict or list form + List[list | dict]: List of rows in dict or list form """ if dict_form and headers is None: raise ValueError("If dict_form is True, headers must not be None!") @@ -411,10 +412,10 @@ def read_list_from_csv( def write_list_to_csv( filepath: str, - rows: List[ListTupleDict], - headers: Union[int, ListTuple[str], None] = None, - columns: Union[ListTuple[int], ListTuple[str], None] = None, - encoding: Optional[str] = None, + rows: list[Sequence | Mapping], + headers: int | Sequence[str] | None = None, + columns: Sequence[int] | Sequence[str] | None = None, + encoding: str | None = None, ) -> None: """Write a list of rows in dict or list form to a csv. (The headers argument is either a row number (rows start counting at 1), or the actual @@ -423,9 +424,9 @@ def write_list_to_csv( Args: filepath (str): Path to write to - rows (List[ListTupleDict]): List of rows in dict or list form - headers (Union[int, ListTuple[str], None]): Headers to write. Defaults to None. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to write. Defaults to all. + rows (List[Sequence | Mapping]): List of rows in dict or list form + headers (Union[int, Sequence[str], None]): Headers to write. Defaults to None. + columns (Union[Sequence[int], Sequence[str], None]): Columns to write. Defaults to all. encoding (Optional[str]): Encoding to use. Defaults to None (infer encoding). Returns: @@ -476,7 +477,7 @@ def write_list_to_csv( resource.close() -def args_to_dict(args: str) -> Dict: +def args_to_dict(args: str) -> dict: """Convert command line arguments in a comma separated string to a dictionary. diff --git a/src/hdx/utilities/downloader.py b/src/hdx/utilities/downloader.py index b24a5b7..88eace6 100755 --- a/src/hdx/utilities/downloader.py +++ b/src/hdx/utilities/downloader.py @@ -2,17 +2,19 @@ import hashlib import logging +from collections.abc import Callable, Iterator from copy import deepcopy from os import remove from os.path import exists, isfile, join, split, splitext from pathlib import Path -from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union +from typing import Any, Sequence from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit import requests from frictionless import FrictionlessException from frictionless.resources import TableResource -from ratelimit import RateLimitDecorator, sleep_and_retry +from ratelimit import sleep_and_retry +from ratelimit.decorators import RateLimitDecorator from requests import Request from ruamel.yaml import YAML from xlsx2csv import Xlsx2csv @@ -21,7 +23,7 @@ from hdx.utilities.frictionless_wrapper import get_frictionless_tableresource from hdx.utilities.path import get_filename_from_url, get_temp_dir from hdx.utilities.session import get_session -from hdx.utilities.typehint import ListDict, ListTuple + logger = logging.getLogger(__name__) @@ -55,7 +57,7 @@ class Download(BaseDownload): extra_params_lookup (str): Lookup key for parameters. If not given assumes parameters are at root of the dict. headers (Dict): Additional headers to add to request. use_auth (str): If more than one auth found, specify which one to use, rather than failing. - status_forcelist (ListTuple[int]): HTTP statuses for which to force retry + status_forcelist (Sequence[int]): HTTP statuses for which to force retry allowed_methods (iterable): HTTP methods for which to force retry. Defaults t0 frozenset(['GET']). """ @@ -63,13 +65,13 @@ class Download(BaseDownload): def __init__( self, - user_agent: Optional[str] = None, - user_agent_config_yaml: Optional[str] = None, - user_agent_lookup: Optional[str] = None, + user_agent: str | None = None, + user_agent_config_yaml: str | None = None, + user_agent_lookup: str | None = None, use_env: bool = True, fail_on_missing_file: bool = True, verify: bool = True, - rate_limit: Optional[Dict] = None, + rate_limit: dict | None = None, **kwargs: Any, ) -> None: session = kwargs.get("session") @@ -132,9 +134,9 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: @staticmethod def get_path_for_url( url: str, - folder: Optional[str] = None, - filename: Optional[str] = None, - path: Optional[str] = None, + folder: str | None = None, + filename: str | None = None, + path: str | None = None, overwrite: bool = False, keep: bool = False, ) -> str: @@ -190,7 +192,7 @@ def get_full_url(self, url: str) -> str: return preparedrequest.url @staticmethod - def get_url_for_get(url: str, parameters: Optional[Dict] = None) -> str: + def get_url_for_get(url: str, parameters: dict | None = None) -> str: """Get full url for GET request including parameters. Args: @@ -209,8 +211,8 @@ def get_url_for_get(url: str, parameters: Optional[Dict] = None) -> str: @staticmethod def get_url_params_for_post( - url: str, parameters: Optional[Dict] = None - ) -> Tuple[str, Dict]: + url: str, parameters: dict | None = None + ) -> tuple[str, dict]: """Get full url for POST request and all parameters including any in the url. @@ -231,16 +233,16 @@ def get_url_params_for_post( @staticmethod def hxl_row( - headers: ListTuple[str], - hxltags: Dict[str, str], + headers: Sequence[str], + hxltags: dict[str, str], dict_form: bool = False, - ) -> Union[List[str], Dict[str, str]]: + ) -> list[str] | dict[str, str]: """Return HXL tag row for header row given list of headers and dictionary with header to HXL hashtag mappings. Return list or dictionary depending upon the dict_form argument. Args: - headers (ListTuple[str]): Headers for which to get HXL hashtags + headers (Sequence[str]): Headers for which to get HXL hashtags hxltags (Dict[str,str]): Header to HXL hashtag mapping dict_form (bool): Return dict or list. Defaults to False (list) @@ -256,10 +258,10 @@ def normal_setup( url: str, stream: bool = True, post: bool = False, - parameters: Optional[Dict] = None, - timeout: Optional[float] = None, - headers: Optional[Dict] = None, - encoding: Optional[str] = None, + parameters: dict | None = None, + timeout: float | None = None, + headers: dict | None = None, + encoding: str | None = None, json_string: bool = False, ) -> requests.Response: """Setup download from provided url returning the response. @@ -384,9 +386,9 @@ def stream_path(self, path: str, errormsg: str): def stream_file( self, url: str, - folder: Optional[str] = None, - filename: Optional[str] = None, - path: Optional[str] = None, + folder: str | None = None, + filename: str | None = None, + path: str | None = None, overwrite: bool = False, keep: bool = False, ) -> str: @@ -607,8 +609,8 @@ def get_frictionless_tableresource( infer_types (bool): Whether to infer types. Defaults to False (strings). **kwargs: has_header (bool): Whether data has a header. Defaults to True. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. format (Optional[str]): Type of file. Defaults to inferring. file_type (Optional[str]): Type of file. Defaults to inferring. encoding (Optional[str]): Type of encoding. Defaults to inferring. @@ -618,7 +620,7 @@ def get_frictionless_tableresource( sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. default_type (Optional[str]): Default field type if infer_types False. Defaults to string. float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. null_values (List[Any]): Values that will return None. Defaults to [""]. @@ -642,15 +644,15 @@ def get_frictionless_tableresource( def _get_tabular_rows( self, url: str, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, dict_form: bool = False, include_headers: bool = False, ignore_blank_rows: bool = True, infer_types: bool = False, - header_insertions: Optional[ListTuple[Tuple[int, str]]] = None, - row_function: Optional[Callable[[List[str], ListDict], ListDict]] = None, + header_insertions: Sequence[tuple[int, str]] | None = None, + row_function: Callable[[list[str], list | dict], list | dict] | None = None, **kwargs: Any, - ) -> Tuple[List[str], Iterator[ListDict]]: + ) -> tuple[list[str], Iterator[list | dict]]: """Returns header of tabular file pointed to by url and an iterator where each row is returned as a list or dictionary depending on the dict_form argument. The headers argument is either a row number or list @@ -669,13 +671,13 @@ def _get_tabular_rows( Args: url (str): URL or path to read from - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. dict_form (bool): Return dict or list for each row. Defaults to False (list) include_headers (bool): Whether to include headers in iterator. Defaults to False. ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[ListTuple[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],ListDict],ListDict]]): Function to call for each row. Defaults to None. + header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. + row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. **kwargs: format (Optional[str]): Type of file. Defaults to inferring. file_type (Optional[str]): Type of file. Defaults to inferring. @@ -687,7 +689,7 @@ def _get_tabular_rows( sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. default_type (Optional[str]): Default field type if infer_types False. Defaults to string. float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. null_values (List[Any]): Values that will return None. Defaults to [""]. @@ -697,7 +699,7 @@ def _get_tabular_rows( schema (Schema): This can be set to override the above. See Frictionless docs. Returns: - Tuple[List[str],Iterator[ListDict]]: Tuple (headers, iterator where each row is a list or dictionary) + Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) """ if headers is None: raise DownloadError("Argument headers cannot be None!") @@ -751,17 +753,17 @@ def get_next(): def get_tabular_rows( self, - url: Union[str, ListTuple[str]], + url: str | Sequence[str], has_hxl: bool = False, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, dict_form: bool = False, include_headers: bool = False, ignore_blank_rows: bool = True, infer_types: bool = False, - header_insertions: Optional[ListTuple[Tuple[int, str]]] = None, - row_function: Optional[Callable[[List[str], ListDict], ListDict]] = None, + header_insertions: Sequence[tuple[int, str]] | None = None, + row_function: Callable[[list[str], list | dict], list | dict] | None = None, **kwargs: Any, - ) -> Tuple[List[str], Iterator[ListDict]]: + ) -> tuple[list[str], Iterator[list | dict]]: """Returns header of tabular file(s) pointed to by url and an iterator where each row is returned as a list or dictionary depending on the dict_rows argument. @@ -782,15 +784,15 @@ def get_tabular_rows( outputs a modified row or None to ignore the row. Args: - url (Union[str, ListTuple[str]]): A single or list of URLs or paths to read from + url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. dict_form (bool): Return dict or list for each row. Defaults to False (list) include_headers (bool): Whether to include headers in iterator. Defaults to False. ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[ListTuple[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],ListDict],ListDict]]): Function to call for each row. Defaults to None. + header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. + row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. **kwargs: format (Optional[str]): Type of file. Defaults to inferring. file_type (Optional[str]): Type of file. Defaults to inferring. @@ -802,7 +804,7 @@ def get_tabular_rows( sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. default_type (Optional[str]): Default field type if infer_types False. Defaults to string. float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. null_values (List[Any]): Values that will return None. Defaults to [""]. @@ -812,7 +814,7 @@ def get_tabular_rows( schema (Schema): This can be set to override the above. See Frictionless docs. Returns: - Tuple[List[str],Iterator[ListDict]]: Tuple (headers, iterator where each row is a list or dictionary) + Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) """ if isinstance(url, list): is_list = True @@ -858,16 +860,16 @@ def make_iterator(): def get_tabular_rows_as_list( self, - url: Union[str, ListTuple[str]], + url: str | Sequence[str], has_hxl: bool = False, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, include_headers: bool = True, ignore_blank_rows: bool = True, infer_types: bool = False, - header_insertions: Optional[ListTuple[Tuple[int, str]]] = None, - row_function: Optional[Callable[[List[str], ListDict], ListDict]] = None, + header_insertions: Sequence[tuple[int, str]] | None = None, + row_function: Callable[[list[str], list | dict], list | dict] | None = None, **kwargs: Any, - ) -> Tuple[List[str], Iterator[List]]: + ) -> tuple[list[str], Iterator[list]]: """Returns headers and an iterator where each row is returned as a list. @@ -886,14 +888,14 @@ def get_tabular_rows_as_list( argument) and outputs a modified row or None to ignore the row. Args: - url (Union[str, ListTuple[str]]): A single or list of URLs or paths to read from + url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. include_headers (bool): Whether to include headers in iterator. Defaults to True. ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[ListTuple[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],ListDict],ListDict]]): Function to call for each row. Defaults to None. + header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. + row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. **kwargs: format (Optional[str]): Type of file. Defaults to inferring. file_type (Optional[str]): Type of file. Defaults to inferring. @@ -905,7 +907,7 @@ def get_tabular_rows_as_list( sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. default_type (Optional[str]): Default field type if infer_types False. Defaults to string. float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. null_values (List[Any]): Values that will return None. Defaults to [""]. @@ -934,15 +936,15 @@ def get_tabular_rows_as_list( def get_tabular_rows_as_dict( self, - url: Union[str, ListTuple[str]], + url: str | Sequence[str], has_hxl: bool = False, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, ignore_blank_rows: bool = True, infer_types: bool = False, - header_insertions: Optional[ListTuple[Tuple[int, str]]] = None, - row_function: Optional[Callable[[List[str], ListDict], ListDict]] = None, + header_insertions: Sequence[tuple[int, str]] | None = None, + row_function: Callable[[list[str], list | dict], list | dict] | None = None, **kwargs: Any, - ) -> Tuple[List[str], Iterator[Dict]]: + ) -> tuple[list[str], Iterator[dict]]: """Returns headers and an iterator where each row is returned as a dictionary. @@ -961,13 +963,13 @@ def get_tabular_rows_as_dict( argument) and outputs a modified row or None to ignore the row. Args: - url (Union[str, ListTuple[str]]): A single or list of URLs or paths to read from + url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[ListTuple[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],ListDict],ListDict]]): Function to call for each row. Defaults to None. + header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. + row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. **kwargs: format (Optional[str]): Type of file. Defaults to inferring. file_type (Optional[str]): Type of file. Defaults to inferring. @@ -979,7 +981,7 @@ def get_tabular_rows_as_dict( sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. default_type (Optional[str]): Default field type if infer_types False. Defaults to string. float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. null_values (List[Any]): Values that will return None. Defaults to [""]. @@ -1008,16 +1010,16 @@ def get_tabular_rows_as_dict( def download_tabular_key_value( self, - url: Union[str, ListTuple[str]], + url: str | Sequence[str], has_hxl: bool = False, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, include_headers: bool = True, ignore_blank_rows: bool = True, infer_types: bool = False, - header_insertions: Optional[ListTuple[Tuple[int, str]]] = None, - row_function: Optional[Callable[[List[str], ListDict], ListDict]] = None, + header_insertions: Sequence[tuple[int, str]] | None = None, + row_function: Callable[[list[str], list | dict], list | dict] | None = None, **kwargs: Any, - ) -> Dict: + ) -> dict: """Download 2 column csv from url and return a dictionary of keys (first column) and values (second column). @@ -1037,14 +1039,14 @@ def download_tabular_key_value( Args: - url (Union[str, ListTuple[str]]): A single or list of URLs or paths to read from + url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. include_headers (bool): Whether to include headers in iterator. Defaults to True. ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[ListTuple[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],ListDict],ListDict]]): Function to call for each row. Defaults to None. + header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. + row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. **kwargs: format (Optional[str]): Type of file. Defaults to inferring. file_type (Optional[str]): Type of file. Defaults to inferring. @@ -1055,7 +1057,7 @@ def download_tabular_key_value( sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. default_type (Optional[str]): Default field type if infer_types False. Defaults to string. float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. null_values (List[Any]): Values that will return None. Defaults to [""] @@ -1087,16 +1089,16 @@ def download_tabular_key_value( def download_tabular_rows_as_dicts( self, - url: Union[str, ListTuple[str]], + url: str | Sequence[str], has_hxl: bool = False, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, keycolumn: int = 1, ignore_blank_rows: bool = True, infer_types: bool = False, - header_insertions: Optional[ListTuple[Tuple[int, str]]] = None, - row_function: Optional[Callable[[List[str], ListDict], ListDict]] = None, + header_insertions: Sequence[tuple[int, str]] | None = None, + row_function: Callable[[list[str], list | dict], list | dict] | None = None, **kwargs: Any, - ) -> Dict[str, Dict]: + ) -> dict[str, dict]: """Download multicolumn csv from url and return dictionary where keys are first column and values are dictionaries with keys from column headers and values from columns beneath. @@ -1116,14 +1118,14 @@ def download_tabular_rows_as_dicts( argument) and outputs a modified row or None to ignore the row. Args: - url (Union[str, ListTuple[str]]): A single or list of URLs or paths to read from + url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. keycolumn (int): Number of column to be used for key. Defaults to 1. ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[ListTuple[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],ListDict],ListDict]]): Function to call for each row. Defaults to None. + header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. + row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. **kwargs: format (Optional[str]): Type of file. Defaults to inferring. file_type (Optional[str]): Type of file. Defaults to inferring. @@ -1134,7 +1136,7 @@ def download_tabular_rows_as_dicts( sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. default_type (Optional[str]): Default field type if infer_types False. Defaults to string. float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. null_values (List[Any]): Values that will return None. Defaults to [""]. @@ -1170,16 +1172,16 @@ def download_tabular_rows_as_dicts( def download_tabular_cols_as_dicts( self, - url: Union[str, ListTuple[str]], + url: str | Sequence[str], has_hxl: bool = False, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, keycolumn: int = 1, ignore_blank_rows: bool = True, infer_types: bool = False, - header_insertions: Optional[ListTuple[Tuple[int, str]]] = None, - row_function: Optional[Callable[[List[str], ListDict], ListDict]] = None, + header_insertions: Sequence[tuple[int, str]] | None = None, + row_function: Callable[[list[str], list | dict], list | dict] | None = None, **kwargs: Any, - ) -> Dict[str, Dict]: + ) -> dict[str, dict]: """Download multicolumn csv from url and return dictionary where keys are header names and values are dictionaries with keys from first column and values from other columns. @@ -1199,14 +1201,14 @@ def download_tabular_cols_as_dicts( argument) and outputs a modified row or None to ignore the row. Args: - url (Union[str, ListTuple[str]]): A single or list of URLs or paths to read from + url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. keycolumn (int): Number of column to be used for key. Defaults to 1. ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[ListTuple[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],ListDict],ListDict]]): Function to call for each row. Defaults to None. + header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. + row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. **kwargs: format (Optional[str]): Type of file. Defaults to inferring. file_type (Optional[str]): Type of file. Defaults to inferring. @@ -1217,7 +1219,7 @@ def download_tabular_cols_as_dicts( sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. default_type (Optional[str]): Default field type if infer_types False. Defaults to string. float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. null_values (List[Any]): Values that will return None. Defaults to [""]. @@ -1254,11 +1256,11 @@ def download_tabular_cols_as_dicts( return output_dict @staticmethod - def get_column_positions(headers: ListTuple[str]) -> Dict[str, int]: + def get_column_positions(headers: Sequence[str]) -> dict[str, int]: """Get mapping of headers to column positions. Args: - headers (ListTuple[str]): List of headers + headers (Sequence[str]): List of headers Returns: Dict[str,int]: Dictionary where keys are header names and values are header positions @@ -1271,13 +1273,13 @@ def get_column_positions(headers: ListTuple[str]) -> Dict[str, int]: @classmethod def generate_downloaders( cls, - custom_configs: Dict[str, Dict], - user_agent: Optional[str] = None, - user_agent_config_yaml: Optional[str] = None, - user_agent_lookup: Optional[str] = None, + custom_configs: dict[str, dict], + user_agent: str | None = None, + user_agent_config_yaml: str | None = None, + user_agent_lookup: str | None = None, use_env: bool = True, fail_on_missing_file: bool = True, - rate_limit: Optional[Dict] = None, + rate_limit: dict | None = None, **kwargs: Any, ) -> None: """Generate downloaders. Requires either global user agent to be set or @@ -1308,8 +1310,8 @@ def generate_downloaders( extra_params_lookup (str): Lookup key for parameters. If not given assumes parameters are at root of the dict. headers (Dict): Additional headers to add to request. use_auth (str): If more than one auth found, specify which one to use, rather than failing. - status_forcelist (ListTuple[int]): HTTP statuses for which to force retry. Defaults to (429, 500, 502, 503, 504). - allowed_methods (ListTuple[str]): HTTP methods for which to force retry. Defaults to ("HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"). + status_forcelist (Sequence[int]): HTTP statuses for which to force retry. Defaults to (429, 500, 502, 503, 504). + allowed_methods (Sequence[str]): HTTP methods for which to force retry. Defaults to ("HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"). Returns: None @@ -1328,7 +1330,7 @@ def generate_downloaders( cls.downloaders[name] = cls(**args_copy) @classmethod - def get_downloader(cls, name: Optional[str] = None) -> "Download": + def get_downloader(cls, name: str | None = None) -> "Download": """Get a generated downloader given a name. If name is not supplied, the default one will be returned. diff --git a/src/hdx/utilities/easy_logging.py b/src/hdx/utilities/easy_logging.py index 66e819d..dfe3559 100755 --- a/src/hdx/utilities/easy_logging.py +++ b/src/hdx/utilities/easy_logging.py @@ -4,14 +4,13 @@ import logging.config import sys from sys import stderr -from typing import Optional from loguru import logger def setup_logging( console_log_level: str = "INFO", - log_file: Optional[str] = None, + log_file: str | None = None, file_log_level: str = "ERROR", ) -> None: """Setup logging configuration. Intercepts standard logging and outputs diff --git a/src/hdx/utilities/email.py b/src/hdx/utilities/email.py index dd61a95..b215a9b 100755 --- a/src/hdx/utilities/email.py +++ b/src/hdx/utilities/email.py @@ -5,10 +5,10 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from os.path import expanduser, join -from typing import Any, List, Optional, Union +from typing import Any, Sequence from hdx.utilities.loader import load_json, load_yaml -from hdx.utilities.typehint import ListTuple + try: from email_validator import EmailNotValidError, validate_email @@ -168,12 +168,12 @@ def get_normalised_email(email: str, check_deliverability: bool = False) -> str: @classmethod def get_normalised_emails( cls, - emails: Union[str, ListTuple[str]], - ) -> List[str]: + emails: str | Sequence[str], + ) -> list[str]: """Get list of normalised emails. Args: - emails (Union[str, ListTuple[str]]): Email address or addresses + emails (Union[str, Sequence[str]]): Email address or addresses Returns: List[str]: Normalised emails @@ -190,26 +190,26 @@ def get_normalised_emails( def send( self, - to: Union[str, ListTuple[str]], + to: str | Sequence[str], subject: str, text_body: str, - html_body: Optional[str] = None, - sender: Optional[str] = None, - cc: Union[str, ListTuple[str], None] = None, - bcc: Union[str, ListTuple[str], None] = None, + html_body: str | None = None, + sender: str | None = None, + cc: str | Sequence[str] | None = None, + bcc: str | Sequence[str] | None = None, **kwargs: Any, ) -> None: """Send email. to, cc and bcc take either a string email address or a list of string email addresses. cc and bcc default to None. Args: - to (Union[str, ListTuple[str]]): Email recipient(s) + to (Union[str, Sequence[str]]): Email recipient(s) subject (str): Email subject text_body (str): Plain text email body html_body (Optional[str]): HTML email body sender (Optional[str]): Email sender. Defaults to global sender. - cc (Union[str, ListTuple[str], None]): Email cc. Defaults to None. - bcc (Union[str, ListTuple[str], None]): Email bcc. Defaults to None. + cc (Union[str, Sequence[str], None]): Email cc. Defaults to None. + bcc (Union[str, Sequence[str], None]): Email bcc. Defaults to None. **kwargs: See below mail_options (List): Mail options (see smtplib documentation) rcpt_options (List): Recipient options (see smtplib documentation) diff --git a/src/hdx/utilities/encoding.py b/src/hdx/utilities/encoding.py index 2544dfd..ded7c06 100755 --- a/src/hdx/utilities/encoding.py +++ b/src/hdx/utilities/encoding.py @@ -1,7 +1,6 @@ """Encoding utilities.""" import base64 -from typing import Tuple from urllib.parse import quote, unquote @@ -49,7 +48,7 @@ def basicauth_encode(username: str, password: str) -> str: return "Basic " + str_to_base64(username_password) -def basicauth_decode(encoded_string: str) -> Tuple[str, str]: +def basicauth_decode(encoded_string: str) -> tuple[str, str]: """Decode a HTTP basic authentication string. Returns a tuple of the form (username, password), and raises ValueError if decoding fails. diff --git a/src/hdx/utilities/error_handler.py b/src/hdx/utilities/error_handler.py index f78c7f4..1aca20e 100644 --- a/src/hdx/utilities/error_handler.py +++ b/src/hdx/utilities/error_handler.py @@ -2,10 +2,10 @@ import logging import sys -from typing import Any, Optional +from typing import Any, Sequence from hdx.utilities.dictandlist import dict_of_sets_add -from hdx.utilities.typehint import ListTuple + logger = logging.getLogger(__name__) @@ -95,7 +95,7 @@ def add_missing_value( message_type, ) - def multi_valued_message(self, text: str, values: ListTuple) -> Optional[str]: + def multi_valued_message(self, text: str, values: Sequence) -> str | None: """ Generate a formatted message for a list of values in a fixed format: error category - n {text}. First 10 values: n1,n2,n3... @@ -104,7 +104,7 @@ def multi_valued_message(self, text: str, values: ListTuple) -> Optional[str]: Args: text (str): Descriptive text for the issue (e.g., "invalid values") - values (ListTuple): The list of related values of concern + values (Sequence): The list of related values of concern Returns: Optional[str]: A formatted string in the format defined above @@ -122,7 +122,7 @@ def multi_valued_message(self, text: str, values: ListTuple) -> Optional[str]: def add_multi_valued( self, text: str, - values: ListTuple, + values: Sequence, category: str = "", message_type: str = "error", ) -> bool: @@ -135,7 +135,7 @@ def add_multi_valued( Args: text (str): Text to use e.g. "negative values removed" - values (ListTuple): List of values of concern + values (Sequence): List of values of concern category (str): Error category. Defaults to "". message_type (str): The type of message (error or warning). Default is "error" Returns: diff --git a/src/hdx/utilities/file_hashing.py b/src/hdx/utilities/file_hashing.py index 4ad516e..c7ecffb 100644 --- a/src/hdx/utilities/file_hashing.py +++ b/src/hdx/utilities/file_hashing.py @@ -3,7 +3,6 @@ import zipfile from io import BytesIO, IOBase from os import fstat -from typing import Tuple from openpyxl import load_workbook from openpyxl.utils.exceptions import InvalidFileException @@ -91,7 +90,7 @@ def crc_zip_fp(fp: IOBase) -> str: return get_crc_sum(file_crcs) -def get_size_and_hash(filepath: str, file_format: str) -> Tuple[int, str]: +def get_size_and_hash(filepath: str, file_format: str) -> tuple[int, str]: """Return the size and hash of file Args: diff --git a/src/hdx/utilities/frictionless_wrapper.py b/src/hdx/utilities/frictionless_wrapper.py index bf32e37..69ba2f0 100644 --- a/src/hdx/utilities/frictionless_wrapper.py +++ b/src/hdx/utilities/frictionless_wrapper.py @@ -1,6 +1,6 @@ """Frictionless wrapper""" -from typing import Any, Optional, Tuple +from typing import Any import requests from frictionless import ( @@ -15,7 +15,7 @@ from frictionless.resources import TableResource -def get_frictionless_control(**kwargs: Any) -> Tuple[Control, Any]: +def get_frictionless_control(**kwargs: Any) -> tuple[Control, Any]: """Get Frictionless Control. Args: @@ -68,7 +68,7 @@ def get_frictionless_control(**kwargs: Any) -> Tuple[Control, Any]: return control, kwargs -def get_frictionless_detector(infer_types: bool, **kwargs: Any) -> Tuple[Detector, Any]: +def get_frictionless_detector(infer_types: bool, **kwargs: Any) -> tuple[Detector, Any]: """Get Frictionless Detector. Args: @@ -98,13 +98,13 @@ def get_frictionless_detector(infer_types: bool, **kwargs: Any) -> Tuple[Detecto def get_frictionless_dialect( ignore_blank_rows: bool, **kwargs: Any -) -> Tuple[Dialect, Any]: +) -> tuple[Dialect, Any]: """Get Frictionless Dialect. Args: ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. **kwargs: - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. dialect (Dialect): This can be set to override the above. See Frictionless docs. Returns: @@ -119,11 +119,11 @@ def get_frictionless_dialect( def get_frictionless_tableresource( - url: Optional[str] = None, + url: str | None = None, ignore_blank_rows: bool = True, infer_types: bool = False, - session: Optional[requests.Session] = None, - data: Optional[Any] = None, + session: requests.Session | None = None, + data: Any | None = None, **kwargs: Any, ) -> TableResource: """Get Frictionless TableResource. Either url or data must be supplied. @@ -136,8 +136,8 @@ def get_frictionless_tableresource( data (Optional[Any]): Data to parse. Defaults to None. **kwargs: has_header (bool): Whether data has a header. Defaults to True. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. # pylint: disable=line-too-long - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to pick. Defaults to all. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. # pylint: disable=line-too-long + columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. file_type (Optional[str]): Type of file. Defaults to inferring. format (Optional[str]): Type of file. Defaults to inferring. encoding (Optional[str]): Type of encoding. Defaults to inferring. diff --git a/src/hdx/utilities/html.py b/src/hdx/utilities/html.py index 9383a4a..d40e801 100755 --- a/src/hdx/utilities/html.py +++ b/src/hdx/utilities/html.py @@ -1,7 +1,7 @@ """HTML parsing utilities.""" import logging -from typing import Any, Dict, List, Optional +from typing import Any try: from bs4 import BeautifulSoup, Tag @@ -19,9 +19,9 @@ def get_soup( url: str, downloader: Download = None, - user_agent: Optional[str] = None, - user_agent_config_yaml: Optional[str] = None, - user_agent_lookup: Optional[str] = None, + user_agent: str | None = None, + user_agent_config_yaml: str | None = None, + user_agent_lookup: str | None = None, **kwargs: Any, ) -> BeautifulSoup: """Get BeautifulSoup object for a url. Requires either global user @@ -56,7 +56,7 @@ def get_text(tag: Tag) -> str: """ return tag.get_text().strip(" \t\n\r").replace("\xa0", " ") - def extract_table(tabletag: Tag) -> List[Dict]: + def extract_table(tabletag: Tag) -> list[dict]: """Extract HTML table as list of dictionaries. Args: diff --git a/src/hdx/utilities/loader.py b/src/hdx/utilities/loader.py index 74b193d..222d5fd 100755 --- a/src/hdx/utilities/loader.py +++ b/src/hdx/utilities/loader.py @@ -1,13 +1,13 @@ """Loading utilities for YAML, JSON etc.""" import json -from typing import Any, Dict, Optional +from typing import Any, Sequence, Mapping from warnings import warn from ruamel.yaml import YAML from hdx.utilities.dictandlist import merge_dictionaries, merge_two_dictionaries -from hdx.utilities.typehint import ListTuple + class LoadError(Exception): @@ -18,8 +18,8 @@ def load_text( path: str, encoding: str = "utf-8", strip: bool = False, - replace_newlines: Optional[str] = None, - replace_line_separators: Optional[str] = None, + replace_newlines: str | None = None, + replace_line_separators: str | None = None, loaderror_if_empty: bool = True, default_line_separator: str = "\n", ) -> str: @@ -106,20 +106,20 @@ def load_json( def load_and_merge_yaml( - paths: ListTuple[str], + paths: Sequence[str], encoding: str = "utf-8", loaderror_if_empty: bool = True, -) -> Dict: +) -> Mapping: """Load multiple YAML files that are in dictionary form and merge into one dictionary. Args: - paths (ListTuple[str]): Paths to YAML files + paths (Sequence[str]): Paths to YAML files encoding (str): Encoding of file. Defaults to utf-8. loaderror_if_empty (bool): Whether to raise LoadError if any file is empty. Default to True. Returns: - Dict: Dictionary of merged YAML files + Mapping: Dictionary of merged YAML files """ configs = [ load_yaml(path, encoding=encoding, loaderror_if_empty=loaderror_if_empty) @@ -129,20 +129,20 @@ def load_and_merge_yaml( def load_and_merge_json( - paths: ListTuple[str], + paths: Sequence[str], encoding: str = "utf-8", loaderror_if_empty: bool = True, -) -> Dict: +) -> Mapping: """Load multiple JSON files that are in dictionary form and merge into one dictionary. Args: - paths (ListTuple[str]): Paths to JSON files + paths (Sequence[str]): Paths to JSON files encoding (str): Encoding of file. Defaults to utf-8. loaderror_if_empty (bool): Whether to raise LoadError if any file is empty. Default to True. Returns: - Dict: Dictionary of merged JSON files + Mapping: Dictionary of merged JSON files """ configs = [ load_json(path, encoding=encoding, loaderror_if_empty=loaderror_if_empty) @@ -156,7 +156,7 @@ def load_yaml_into_existing_dict( path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True, -) -> Dict: +) -> Mapping: """Merge YAML file that is in dictionary form into existing dictionary. Args: @@ -166,7 +166,7 @@ def load_yaml_into_existing_dict( loaderror_if_empty (bool): Whether to raise LoadError if file is empty. Default to True. Returns: - Dict: YAML file merged into dictionary + Mapping: YAML file merged into dictionary """ yamldict = load_yaml(path, encoding=encoding, loaderror_if_empty=loaderror_if_empty) return merge_two_dictionaries(data, yamldict) @@ -177,7 +177,7 @@ def load_json_into_existing_dict( path: str, encoding: str = "utf-8", loaderror_if_empty: bool = True, -) -> Dict: +) -> Mapping: """Merge JSON file that is in dictionary form into existing dictionary. Args: @@ -187,7 +187,7 @@ def load_json_into_existing_dict( loaderror_if_empty (bool): Whether to raise LoadError if file is empty. Default to True. Returns: - dict: JSON file merged into dictionary + Mapping: JSON file merged into dictionary """ jsondict = load_json(path, encoding=encoding, loaderror_if_empty=loaderror_if_empty) return merge_two_dictionaries(data, jsondict) diff --git a/src/hdx/utilities/matching.py b/src/hdx/utilities/matching.py index 31828f7..4ad2aaa 100644 --- a/src/hdx/utilities/matching.py +++ b/src/hdx/utilities/matching.py @@ -1,11 +1,12 @@ import difflib import re -from typing import Callable, Dict, List, Optional, Tuple +from collections.abc import Callable +from typing import Sequence from pyphonetics import RefinedSoundex from hdx.utilities.text import normalise -from hdx.utilities.typehint import ListTuple + TEMPLATE_VARIABLES = re.compile("{{.*?}}") @@ -13,21 +14,21 @@ class Phonetics(RefinedSoundex): def match( self, - possible_names: ListTuple, + possible_names: Sequence, name: str, - alternative_name: Optional[str] = None, - transform_possible_names: ListTuple[Callable] = [], + alternative_name: str | None = None, + transform_possible_names: Sequence[Callable] = [], threshold: int = 2, - ) -> Optional[int]: + ) -> int | None: """ Match name to one of the given possible names. Returns None if no match or the index of the matching name Args: - possible_names (ListTuple): Possible names + possible_names (Sequence): Possible names name (str): Name to match alternative_name (str): Alternative name to match. Defaults to None. - transform_possible_names (ListTuple[Callable]): Functions to transform possible names. + transform_possible_names (Sequence[Callable]): Functions to transform possible names. threshold: Match threshold. Defaults to 2. Returns: @@ -61,11 +62,11 @@ def check_name(name, possible_name): def get_code_from_name( name: str, - code_lookup: Dict[str, str], - unmatched: List[str], + code_lookup: dict[str, str], + unmatched: list[str], fuzzy_match: bool = True, match_threshold: int = 5, -) -> Optional[str]: +) -> str | None: """ Given a name (org type, sector, etc), return the corresponding code. @@ -111,7 +112,7 @@ def get_code_from_name( return code -def multiple_replace(string: str, replacements: Dict[str, str]) -> str: +def multiple_replace(string: str, replacements: dict[str, str]) -> str: """Simultaneously replace multiple strings in a string. Args: @@ -132,7 +133,7 @@ def multiple_replace(string: str, replacements: Dict[str, str]) -> str: def match_template_variables( string: str, -) -> Tuple[Optional[str], Optional[str]]: +) -> tuple[str | None, str | None]: """Try to match {{XXX}} in input string. Args: @@ -148,15 +149,13 @@ def match_template_variables( return None, None -def earliest_index( - string_to_search: str, strings_to_try: ListTuple[str] -) -> Optional[int]: +def earliest_index(string_to_search: str, strings_to_try: Sequence[str]) -> int | None: """Search a string for each of a list of strings and return the earliest index. Args: string_to_search (str): String to search - strings_to_try (ListTuple[str]): Strings to try + strings_to_try (Sequence[str]): Strings to try Returns: Optional[int]: Earliest index of the strings to try in string to search or None @@ -182,7 +181,7 @@ def get_matching_text_in_strs( match_min_size: int = 30, ignore: str = "", end_characters: str = "", -) -> List[str]: +) -> list[str]: """Returns a list of matching blocks of text in a and b. Args: @@ -216,7 +215,7 @@ def get_matching_text_in_strs( def get_matching_text( - string_list: List[str], + string_list: list[str], match_min_size: int = 30, ignore: str = "", end_characters: str = ".!\r\n", @@ -248,7 +247,7 @@ def get_matching_text( def get_matching_then_nonmatching_text( - string_list: List[str], + string_list: list[str], separator: str = "", match_min_size: int = 30, ignore: str = "", diff --git a/src/hdx/utilities/path.py b/src/hdx/utilities/path.py index 177cadf..f01eec4 100755 --- a/src/hdx/utilities/path.py +++ b/src/hdx/utilities/path.py @@ -4,6 +4,7 @@ import inspect import logging import sys +from collections.abc import Iterable from os import getenv, makedirs, remove from os.path import ( abspath, @@ -16,14 +17,14 @@ ) from shutil import rmtree from tempfile import gettempdir -from typing import Any, Dict, Iterable, Optional, Tuple +from typing import Any, Sequence from urllib.parse import unquote_plus, urlsplit from slugify import slugify from hdx.utilities.loader import load_text from hdx.utilities.saver import save_text -from hdx.utilities.typehint import ListTuple + from hdx.utilities.uuid import get_uuid logger = logging.getLogger(__name__) @@ -69,9 +70,9 @@ def script_dir_plus_file( def get_temp_dir( - folder: Optional[str] = None, + folder: str | None = None, delete_if_exists: bool = False, - tempdir: Optional[str] = None, + tempdir: str | None = None, ) -> str: """Get a temporary directory. Looks for environment variable TEMP_DIR and falls back on os.gettempdir if a root temporary directory is not supplied. @@ -101,11 +102,11 @@ def get_temp_dir( @contextlib.contextmanager def temp_dir( - folder: Optional[str] = None, + folder: str | None = None, delete_if_exists: bool = False, delete_on_success: bool = True, delete_on_failure: bool = True, - tempdir: Optional[str] = None, + tempdir: str | None = None, ) -> str: """Get a temporary directory optionally with folder appended (and created if it doesn't exist) @@ -132,7 +133,7 @@ def temp_dir( raise -def read_or_create_batch(folder: str, batch: Optional[str] = None) -> str: +def read_or_create_batch(folder: str, batch: str | None = None) -> str: """Get batch or create it if it doesn't exist. Args: @@ -156,13 +157,13 @@ def read_or_create_batch(folder: str, batch: Optional[str] = None) -> str: @contextlib.contextmanager def temp_dir_batch( - folder: Optional[str] = None, + folder: str | None = None, delete_if_exists: bool = False, delete_on_success: bool = True, delete_on_failure: bool = True, - batch: Optional[str] = None, - tempdir: Optional[str] = None, -) -> Dict: + batch: str | None = None, + tempdir: str | None = None, +) -> dict: """Get a temporary directory and batch id. Yields a dictionary with key folder which is the temporary directory optionally with folder appended (and created if it doesn't exist). In key batch is a batch code to be @@ -192,7 +193,7 @@ def temp_dir_batch( } -def get_wheretostart(text: str, message: str, key: str) -> Optional[str]: +def get_wheretostart(text: str, message: str, key: str) -> str | None: """Evaluate WHERETOSTART. Args: @@ -215,11 +216,11 @@ def get_wheretostart(text: str, message: str, key: str) -> Optional[str]: def progress_storing_folder( - info: Dict, - iterator: Iterable[Dict], + info: dict, + iterator: Iterable[dict], key: str, - wheretostart: Optional[str] = None, -) -> Tuple[Dict, Dict]: + wheretostart: str | None = None, +) -> tuple[dict, dict]: """Store progress in folder in key folder of info dictionary parameter. Yields 2 dictionaries. The first is the info dictionary. It contains in key folder the folder being used to store progress and in key progress the @@ -262,9 +263,7 @@ def progress_storing_folder( logger.info(f"Starting run from WHERETOSTART {wheretostart}") else: logger.info( - "Run not started. Ignoring {}. WHERETOSTART ({}) not matched.".format( - current, wheretostart - ) + f"Run not started. Ignoring {current}. WHERETOSTART ({wheretostart}) not matched." ) continue output = f"{key}={current}" @@ -279,8 +278,8 @@ def progress_storing_folder( @contextlib.contextmanager def wheretostart_tempdir_batch( - folder: str, batch: Optional[str] = None, tempdir: Optional[str] = None -) -> Dict: + folder: str, batch: str | None = None, tempdir: str | None = None +) -> dict: """Get a temporary directory and batch id. Deletes any existing folder if WHERETOSTART environment variable is set to RESET. Yields a dictionary with key folder which is the temporary directory optionally with folder appended @@ -314,11 +313,11 @@ def wheretostart_tempdir_batch( def progress_storing_tempdir( folder: str, - iterator: Iterable[Dict], + iterator: Iterable[dict], key: str, - batch: Optional[str] = None, - tempdir: Optional[str] = None, -) -> Tuple[Dict, Dict]: + batch: str | None = None, + tempdir: str | None = None, +) -> tuple[dict, dict]: """Store progress in temporary directory. The folder persists until the final iteration allowing which iteration to start at and the batch code to be persisted between runs. Yields 2 dictionaries. The first contains key @@ -347,10 +346,10 @@ def progress_storing_tempdir( def multiple_progress_storing_tempdir( folder: str, - iterators: ListTuple[Iterable[Dict]], - keys: ListTuple[str], - batch: Optional[str] = None, -) -> Tuple[Dict, Dict]: + iterators: Sequence[Iterable[dict]], + keys: Sequence[str], + batch: str | None = None, +) -> tuple[dict, dict]: """Store progress in temporary directory. The folder persists until the final iteration of the last iterator allowing which iteration to start at and the batch code to be persisted between runs. Yields 2 dictionaries. The @@ -366,8 +365,8 @@ def multiple_progress_storing_tempdir( Args: folder (str): Folder to create in temporary folder - iterators (ListTuple[Iterable[Dict]): Iterate over each iterator in the list consecutively persisting progress - keys (ListTuple[str]): Key to examine from dictionary from each iterator in the above list + iterators (Sequence[Iterable[Dict]): Iterate over each iterator in the list consecutively persisting progress + keys (Sequence[str]): Key to examine from dictionary from each iterator in the above list batch (Optional[str]): Batch to use if there isn't one in a file already. Returns: @@ -419,7 +418,7 @@ def multiple_progress_storing_tempdir( def get_filename_extension_from_url( url: str, second_last: bool = False, use_query: bool = False -) -> Tuple[str, str]: +) -> tuple[str, str]: """Get separately filename and extension from url. Args: diff --git a/src/hdx/utilities/retriever.py b/src/hdx/utilities/retriever.py index b73951f..f4000e3 100644 --- a/src/hdx/utilities/retriever.py +++ b/src/hdx/utilities/retriever.py @@ -1,9 +1,10 @@ import logging +from collections.abc import Iterator from copy import deepcopy from os import mkdir from os.path import join from shutil import rmtree -from typing import Any, Iterator, List, Optional, Tuple, Union +from typing import Any, Sequence from slugify import slugify @@ -12,7 +13,7 @@ from hdx.utilities.loader import load_json, load_text, load_yaml from hdx.utilities.path import get_filename_extension_from_url from hdx.utilities.saver import save_json, save_text, save_yaml -from hdx.utilities.typehint import ListDict, ListTuple + logger = logging.getLogger(__name__) @@ -118,10 +119,10 @@ def clone(self, downloader: Download) -> "Retrieve": def get_filename( self, url: str, - filename: Optional[str] = None, - possible_extensions: Tuple[str, ...] = tuple(), + filename: str | None = None, + possible_extensions: tuple[str, ...] = tuple(), **kwargs: Any, - ) -> Tuple[str, Any]: + ) -> tuple[str, Any]: """Get filename from url and given parameters. Args: @@ -178,8 +179,8 @@ def set_bearer_token(self, bearer_token: str) -> None: def download_file( self, url: str, - filename: Optional[str] = None, - logstr: Optional[str] = None, + filename: str | None = None, + logstr: str | None = None, fallback: bool = False, log_level: int = None, **kwargs: Any, @@ -229,8 +230,8 @@ def download_file( def download_text( self, url: str, - filename: Optional[str] = None, - logstr: Optional[str] = None, + filename: str | None = None, + logstr: str | None = None, fallback: bool = False, log_level: int = None, **kwargs: Any, @@ -280,8 +281,8 @@ def download_text( def download_yaml( self, url: str, - filename: Optional[str] = None, - logstr: Optional[str] = None, + filename: str | None = None, + logstr: str | None = None, fallback: bool = False, log_level: int = None, **kwargs: Any, @@ -331,8 +332,8 @@ def download_yaml( def download_json( self, url: str, - filename: Optional[str] = None, - logstr: Optional[str] = None, + filename: str | None = None, + logstr: str | None = None, fallback: bool = False, log_level: int = None, **kwargs: Any, @@ -381,15 +382,15 @@ def download_json( def get_tabular_rows( self, - url: Union[str, ListTuple[str]], + url: str | Sequence[str], has_hxl: bool = False, - headers: Union[int, ListTuple[int], ListTuple[str]] = 1, + headers: int | Sequence[int] | Sequence[str] = 1, dict_form: bool = False, - filename: Optional[str] = None, - logstr: Optional[str] = None, + filename: str | None = None, + logstr: str | None = None, fallback: bool = False, **kwargs: Any, - ) -> Tuple[List[str], Iterator[ListDict]]: + ) -> tuple[list[str], Iterator[list | dict]]: """Returns header of tabular file(s) pointed to by url and an iterator where each row is returned as a list or dictionary depending on the dict_rows argument. @@ -403,9 +404,9 @@ def get_tabular_rows( or a list, defaulting to a list. Args: - url (Union[str, ListTuple[str]]): A single or list of URLs or paths to read from + url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from has_hxl (bool): Whether files have HXL hashtags. Defaults to False. - headers (Union[int, ListTuple[int], ListTuple[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. + headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. dict_form (bool): Return dict or list for each row. Defaults to False (list) filename (Optional[str]): Filename of saved file. Defaults to getting from url. logstr (Optional[str]): Text to use in log string to describe download. Defaults to filename. @@ -413,7 +414,7 @@ def get_tabular_rows( **kwargs: Parameters to pass to download_file and get_tabular_rows calls Returns: - Tuple[List[str],Iterator[ListDict]]: Tuple (headers, iterator where each row is a list or dictionary) + Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) """ if isinstance(url, list): is_list = True @@ -443,7 +444,7 @@ def generate_retrievers( temp_dir: str, save: bool = False, use_saved: bool = False, - ignore: ListTuple[str] = tuple(), + ignore: Sequence[str] = tuple(), delete: bool = True, **kwargs: Any, ) -> None: @@ -459,7 +460,7 @@ def generate_retrievers( temp_dir (str): Temporary directory for when data is not needed after downloading save (bool): Whether to save downloaded data. Defaults to False. use_saved (bool): Whether to use saved data. Defaults to False. - ignore (ListTuple[str]): Don't generate retrievers for these downloaders + ignore (Sequence[str]): Don't generate retrievers for these downloaders delete (bool): Whether to delete saved_dir if save is True. Defaults to True. **kwargs (Any): Any other arguments to pass. @@ -483,7 +484,7 @@ def generate_retrievers( ) @classmethod - def get_retriever(cls, name: Optional[str] = None) -> "Retrieve": + def get_retriever(cls, name: str | None = None) -> "Retrieve": """Get a generated retriever given a name. If name is not supplied, the default one will be returned. diff --git a/src/hdx/utilities/saver.py b/src/hdx/utilities/saver.py index 6d484af..0837cdf 100644 --- a/src/hdx/utilities/saver.py +++ b/src/hdx/utilities/saver.py @@ -3,8 +3,9 @@ import csv import json from collections import OrderedDict +from collections.abc import Callable, Iterable from os.path import join -from typing import Any, Callable, Dict, Iterable, List, Optional, Union +from typing import Any, Sequence, Mapping from ruamel.yaml import ( YAML, @@ -15,7 +16,7 @@ from hdx.utilities.frictionless_wrapper import get_frictionless_tableresource from hdx.utilities.matching import match_template_variables -from hdx.utilities.typehint import ListTuple, ListTupleDict + class UnPrettyRTRepresenter(RoundTripRepresenter): @@ -135,8 +136,8 @@ def save_json( def save_hxlated_output( - configuration: Dict, - rows: ListTuple[ListTupleDict], + configuration: dict, + rows: Sequence[Sequence | Mapping], includes_header: bool = True, includes_hxltags: bool = False, output_dir: str = "", @@ -151,7 +152,7 @@ def save_hxlated_output( Args: configuration (Dict): Configuration for input and output - rows (ListTuple[ListTupleDict]): Rows of data + rows (Sequence[Sequence | Mapping]): Rows of data includes_header (bool): Whether rows includes header. Defaults to True, includes_hxltags (bool): Whether rows includes HXL hashtags. Defaults to False. output_dir (str): Output directory. Defaults to "". @@ -273,13 +274,13 @@ def get_outrow(file_hxltags): def save_iterable( filepath: str, - rows: Iterable[ListTupleDict], - headers: Union[int, ListTuple[str], None] = None, - columns: Union[ListTuple[int], ListTuple[str], None] = None, + rows: Iterable[Sequence | Mapping], + headers: int | Sequence[str] | None = None, + columns: Sequence[int] | Sequence[str] | None = None, format: str = "csv", - encoding: Optional[str] = None, - row_function: Optional[Callable[[Dict], Optional[Dict]]] = None, -) -> List: + encoding: str | None = None, + row_function: Callable[[dict], dict | None] | None = None, +) -> list: """Save an iterable of rows in dict or list form to a csv. (The headers argument is either a row number (rows start counting at 1), or the actual headers defined as a list of strings. If not set, all rows will be treated @@ -287,9 +288,9 @@ def save_iterable( Args: filepath (str): Path to write to - rows (Iterable[ListTupleDict]): List of rows in dict or list form - headers (Union[int, ListTuple[str], None]): Headers to write. Defaults to None. - columns (Union[ListTuple[int], ListTuple[str], None]): Columns to write. Defaults to all. + rows (Iterable[Sequence | Mapping]): List of rows in dict or list form + headers (Union[int, Sequence[str], None]): Headers to write. Defaults to None. + columns (Union[Sequence[int], Sequence[str], None]): Columns to write. Defaults to all. format (str): Format to write. Defaults to csv. encoding (Optional[str]): Encoding to use. Defaults to None (infer encoding). row_function (Optional[Callable[[Dict],Optional[Dict]]]): Row function to call for each row. Defaults to None. diff --git a/src/hdx/utilities/session.py b/src/hdx/utilities/session.py index fa8779e..d120ea6 100755 --- a/src/hdx/utilities/session.py +++ b/src/hdx/utilities/session.py @@ -2,7 +2,7 @@ import logging import os -from typing import Any, Optional +from typing import Any import requests from requests.adapters import HTTPAdapter @@ -21,9 +21,9 @@ class SessionError(Exception): def get_session( - user_agent: Optional[str] = None, - user_agent_config_yaml: Optional[str] = None, - user_agent_lookup: Optional[str] = None, + user_agent: str | None = None, + user_agent_config_yaml: str | None = None, + user_agent_lookup: str | None = None, use_env: bool = True, fail_on_missing_file: bool = True, verify: bool = True, @@ -60,8 +60,8 @@ def get_session( extra_params_lookup (str): Lookup key for parameters. If not given assumes parameters are at root of the dict. headers (Dict): Additional headers to add to request. use_auth (str): If more than one auth found, specify which one to use, rather than failing. - status_forcelist (ListTuple[int]): HTTP statuses for which to force retry. Defaults to (429, 500, 502, 503, 504). - allowed_methods (ListTuple[str]): HTTP methods for which to force retry. Defaults to ("HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"). + status_forcelist (Sequence[int]): HTTP statuses for which to force retry. Defaults to (429, 500, 502, 503, 504). + allowed_methods (Sequence[str]): HTTP methods for which to force retry. Defaults to ("HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"). """ s = requests.Session() s.verify = verify diff --git a/src/hdx/utilities/state.py b/src/hdx/utilities/state.py index c4ca533..61a7548 100644 --- a/src/hdx/utilities/state.py +++ b/src/hdx/utilities/state.py @@ -1,7 +1,8 @@ """Utility to save state to a file and read it back.""" import logging -from typing import Any, Callable, Dict +from collections.abc import Callable +from typing import Any from hdx.utilities.dateparse import iso_string_from_datetime, parse_date from hdx.utilities.loader import load_text @@ -95,7 +96,7 @@ def set(self, state: Any): self.state = state @staticmethod - def dates_str_to_country_date_dict(dates_str: str) -> Dict: + def dates_str_to_country_date_dict(dates_str: str) -> dict: """Convert a comma separated string of key=date string pairs eg. "default=2017-01-01,afg=2019-01-01" to a dictionary of key date mappings eg. @@ -114,7 +115,7 @@ def dates_str_to_country_date_dict(dates_str: str) -> Dict: return result @staticmethod - def country_date_dict_to_dates_str(country_date_dict: Dict) -> str: + def country_date_dict_to_dates_str(country_date_dict: dict) -> str: """Convert a dictionary of key date mappings eg. {"default": 2017-01-01 as datetime, "afg": 2019-01-01 as datetime} to a comma separated string of key=date string pairs eg. diff --git a/src/hdx/utilities/text.py b/src/hdx/utilities/text.py index b128ab9..832fa40 100755 --- a/src/hdx/utilities/text.py +++ b/src/hdx/utilities/text.py @@ -5,7 +5,7 @@ import string import unicodedata from string import punctuation -from typing import Any, List, Optional, Set +from typing import Any logger = logging.getLogger(__name__) @@ -67,8 +67,8 @@ def remove_end_characters(string: str, characters_to_remove: str = punctuation) def remove_from_end( string: str, - things_to_remove: List[str], - logging_text: Optional[str] = None, + things_to_remove: list[str], + logging_text: str | None = None, whole_words: bool = True, ) -> str: """Remove list of items from end of string, stripping any whitespace. @@ -123,7 +123,7 @@ def remove_string( return f"{newstring}{string[index + len(toremove) :]}" -def get_words_in_sentence(sentence: str) -> List[str]: +def get_words_in_sentence(sentence: str) -> list[str]: """Returns list of words in a sentence. Args: @@ -156,7 +156,7 @@ def number_format(val: Any, format: str = "%.4f", trailing_zeros: bool = True) - def get_fraction_str( numerator: Any, - denominator: Optional[Any] = None, + denominator: Any | None = None, format: str = "%.4f", trailing_zeros: bool = True, ) -> str: @@ -185,7 +185,7 @@ def get_fraction_str( return "" -def only_allowed_in_str(test_str: str, allowed_chars: Set) -> bool: +def only_allowed_in_str(test_str: str, allowed_chars: set) -> bool: """Returns True if test string contains only allowed characters, False if not. diff --git a/src/hdx/utilities/typehint.py b/src/hdx/utilities/typehint.py deleted file mode 100644 index cc50a3d..0000000 --- a/src/hdx/utilities/typehint.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import Dict, List, Tuple, TypeVar, Union - -T = TypeVar("T") -ExceptionUpperBound = TypeVar("ExceptionUpperBound", bound="Exception") -ListTuple = Union[List[T], Tuple[T, ...]] -ListDict = Union[List, Dict] -ListTupleDict = Union[List, Tuple, Dict] diff --git a/src/hdx/utilities/useragent.py b/src/hdx/utilities/useragent.py index e8b9821..53a5f52 100755 --- a/src/hdx/utilities/useragent.py +++ b/src/hdx/utilities/useragent.py @@ -3,7 +3,7 @@ import logging import os from os.path import expanduser, isfile, join -from typing import Any, Dict, Optional +from typing import Any from hdx.utilities import __version__ from hdx.utilities.loader import load_yaml @@ -39,7 +39,7 @@ def _environment_variables(**kwargs: Any) -> Any: return kwargs @staticmethod - def _construct(configdict: Dict, prefix: str, ua: str) -> str: + def _construct(configdict: dict, prefix: str, ua: str) -> str: """Construct user agent. Args: @@ -69,7 +69,7 @@ def _load( cls, prefix: str, user_agent_config_yaml: str, - user_agent_lookup: Optional[str] = None, + user_agent_lookup: str | None = None, ) -> str: """Load user agent YAML file. @@ -107,9 +107,9 @@ def _load( @classmethod def _create( cls, - user_agent: Optional[str] = None, - user_agent_config_yaml: Optional[str] = None, - user_agent_lookup: Optional[str] = None, + user_agent: str | None = None, + user_agent_config_yaml: str | None = None, + user_agent_lookup: str | None = None, **kwargs: Any, ) -> str: """Get full user agent string. @@ -145,9 +145,9 @@ def clear_global(cls) -> None: @classmethod def set_global( cls, - user_agent: Optional[str] = None, - user_agent_config_yaml: Optional[str] = None, - user_agent_lookup: Optional[str] = None, + user_agent: str | None = None, + user_agent_config_yaml: str | None = None, + user_agent_lookup: str | None = None, **kwargs: Any, ) -> None: """Set global user agent string. @@ -167,9 +167,9 @@ def set_global( @classmethod def get( cls, - user_agent: Optional[str] = None, - user_agent_config_yaml: Optional[str] = None, - user_agent_lookup: Optional[str] = None, + user_agent: str | None = None, + user_agent_config_yaml: str | None = None, + user_agent_lookup: str | None = None, **kwargs: Any, ) -> str: """Get full user agent string from parameters if supplied falling back diff --git a/src/hdx/utilities/zip_crc.py b/src/hdx/utilities/zip_crc.py index 70b5be9..5526313 100644 --- a/src/hdx/utilities/zip_crc.py +++ b/src/hdx/utilities/zip_crc.py @@ -1,7 +1,6 @@ import struct from io import IOBase from os import fstat -from typing import Dict, Tuple EOCD_MIN_SIZE = 22 MAX_COMMENT_SIZE = 65535 @@ -9,7 +8,7 @@ CD_HEADER_SIGNATURE = b"PK\x01\x02" -def find_eocd_signature(tail_data: bytes) -> Tuple[int, int, int]: +def find_eocd_signature(tail_data: bytes) -> tuple[int, int, int]: """Find EOCD Signature in zip file Args: @@ -29,7 +28,7 @@ def find_eocd_signature(tail_data: bytes) -> Tuple[int, int, int]: return total_records, cd_offset, cd_end -def parse_central_directory(data: bytes, num_records: int) -> Dict[str, int]: +def parse_central_directory(data: bytes, num_records: int) -> dict[str, int]: """Parse zip file Central Directory and return dictionary with filepaths as keys and CRC32 as values. @@ -77,7 +76,7 @@ def get_tail_start(size: int) -> int: return size - read_size -def get_zip_tail_header(size: int) -> Dict[str, str]: +def get_zip_tail_header(size: int) -> dict[str, str]: """Get a header for a GET request with range from starting offset of the tail to the end of a zip. @@ -90,7 +89,7 @@ def get_zip_tail_header(size: int) -> Dict[str, str]: return {"Range": f"bytes={get_tail_start(size)}-"} -def get_zip_cd_header(tail_data: bytes) -> Tuple[int, Dict]: +def get_zip_cd_header(tail_data: bytes) -> tuple[int, dict]: """Get a header for a GET request with range for the Central Directory of a zip. Args: @@ -105,7 +104,7 @@ def get_zip_cd_header(tail_data: bytes) -> Tuple[int, Dict]: return total_records, {"Range": f"bytes={cd_offset}-{cd_end - 1}"} -def get_zip_crcs_buffer(buffer: bytes) -> Dict[str, int]: +def get_zip_crcs_buffer(buffer: bytes) -> dict[str, int]: """Get CRC32 for each file in a zip given a buffer Args: @@ -122,7 +121,7 @@ def get_zip_crcs_buffer(buffer: bytes) -> Dict[str, int]: return parse_central_directory(cd_data, num_records) -def get_zip_crcs_fp(fp: IOBase) -> Dict[str, int]: +def get_zip_crcs_fp(fp: IOBase) -> dict[str, int]: """Get CRC32 for each file in a zip given a file pointer Args: @@ -143,7 +142,7 @@ def get_zip_crcs_fp(fp: IOBase) -> Dict[str, int]: return parse_central_directory(cd_data, num_records) -def get_crc_sum(file_crcs: Dict[str, int]) -> str: +def get_crc_sum(file_crcs: dict[str, int]) -> str: """Calculate the sum of the CRC32 for all files in a zip Args: From 467b98af659863128db8522ac56b803e98fc0f25 Mon Sep 17 00:00:00 2001 From: mcarans Date: Mon, 12 Jan 2026 16:20:09 +1300 Subject: [PATCH 2/4] Remove types from docstrings --- src/hdx/utilities/base_downloader.py | 22 +- src/hdx/utilities/compare.py | 8 +- src/hdx/utilities/dateparse.py | 60 +- src/hdx/utilities/dictandlist.py | 98 ++-- src/hdx/utilities/downloader.py | 634 +++++++++++----------- src/hdx/utilities/easy_logging.py | 6 +- src/hdx/utilities/email.py | 28 +- src/hdx/utilities/encoding.py | 10 +- src/hdx/utilities/error_handler.py | 32 +- src/hdx/utilities/file_hashing.py | 10 +- src/hdx/utilities/frictionless_wrapper.py | 90 +-- src/hdx/utilities/html.py | 14 +- src/hdx/utilities/loader.py | 54 +- src/hdx/utilities/matching.py | 56 +- src/hdx/utilities/path.py | 92 ++-- src/hdx/utilities/retriever.py | 112 ++-- src/hdx/utilities/saver.py | 50 +- src/hdx/utilities/session.py | 42 +- src/hdx/utilities/state.py | 16 +- src/hdx/utilities/text.py | 42 +- src/hdx/utilities/useragent.py | 32 +- src/hdx/utilities/uuid.py | 4 +- src/hdx/utilities/zip_crc.py | 18 +- 23 files changed, 765 insertions(+), 765 deletions(-) diff --git a/src/hdx/utilities/base_downloader.py b/src/hdx/utilities/base_downloader.py index 57f4c8f..be1ce40 100644 --- a/src/hdx/utilities/base_downloader.py +++ b/src/hdx/utilities/base_downloader.py @@ -25,9 +25,9 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: """Subclasses should define this to allow with usage. Args: - exc_type (Any): Exception type - exc_value (Any): Exception value - traceback (Any): Traceback + exc_type: Exception type + exc_value: Exception value + traceback: Traceback Returns: None @@ -38,7 +38,7 @@ def download_file(self, url: str, *args: Any, **kwargs: Any) -> str: """Download file from url. Args: - url (str): URL or path to download + url: URL or path to download *args (Any): Positional arguments **kwargs (Any): Keyword arguments @@ -51,7 +51,7 @@ def download_text(self, url: str, *args: Any, **kwargs: Any) -> str: """Download text from url. Args: - url (str): URL or path to download + url: URL or path to download *args (Any): Positional arguments **kwargs (Any): Keyword arguments @@ -64,7 +64,7 @@ def download_yaml(self, url: str, *args: Any, **kwargs: Any) -> Any: """Download YAML from url. Args: - url (str): URL or path to download + url: URL or path to download *args (Any): Positional arguments **kwargs (Any): Keyword arguments @@ -77,7 +77,7 @@ def download_json(self, url: str, *args: Any, **kwargs: Any) -> Any: """Download JSON from url. Args: - url (str): URL or path to download + url: URL or path to download *args (Any): Positional arguments **kwargs (Any): Keyword arguments @@ -108,10 +108,10 @@ def get_tabular_rows( or a list, defaulting to a list. Args: - url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from - has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - dict_form (bool): Return dict or list for each row. Defaults to False (list) + url: A single or list of URLs or paths to read from + has_hxl: Whether files have HXL hashtags. Ignored for single url. Defaults to False. + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + dict_form: Return dict or list for each row. Defaults to False (list) *args (Any): Positional arguments **kwargs (Any): Keyword arguments diff --git a/src/hdx/utilities/compare.py b/src/hdx/utilities/compare.py index 4e75b39..dc929fc 100755 --- a/src/hdx/utilities/compare.py +++ b/src/hdx/utilities/compare.py @@ -13,8 +13,8 @@ def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]: that are the same. Args: - path1 (str): Path to first file - path2 (str): Path to second file + path1: Path to first file + path2: Path to second file Returns: List[str]: Delta between the two files @@ -32,8 +32,8 @@ def assert_files_same(path1: str, path2: str, encoding: str = "utf-8") -> None: -, ?, + format if not Args: - path1 (str): Path to first file - path2 (str): Path to second file + path1: Path to first file + path2: Path to second file Returns: None diff --git a/src/hdx/utilities/dateparse.py b/src/hdx/utilities/dateparse.py index 1f4e5b4..b330390 100755 --- a/src/hdx/utilities/dateparse.py +++ b/src/hdx/utilities/dateparse.py @@ -98,7 +98,7 @@ def get_tzinfos(timezone_info: str) -> dict[str, int]: string. Args: - timezone_info (str): Timezones information string + timezone_info: Timezones information string Returns: Dict[str, int]: tzinfos dictionary @@ -861,15 +861,15 @@ def parse_date_range( -10 W CKT HAST HST TAHT TKT Args: - string (str): Dataset date string - date_format (Optional[str]): Date format. If None is given, will attempt to guess. Defaults to None. - timezone_handling (int): Timezone handling. See description. Defaults to 0 (ignore timezone, return UTC). - fuzzy (Optional[Dict]): If dict supplied, fuzzy matching will be used and results returned in dict - include_microseconds (bool): Includes microseconds if True. Defaults to False. - zero_time (bool): Zero time elements of datetime if True. Defaults to False. - max_starttime (bool): Make start date time component 23:59:59:999999. Defaults to False. - max_endtime (bool): Make end date time component 23:59:59:999999. Defaults to False. - default_timezones (Optional[str]): Timezone information. Defaults to None. (Internal default). + string: Dataset date string + date_format: Date format. If None is given, will attempt to guess. Defaults to None. + timezone_handling: Timezone handling. See description. Defaults to 0 (ignore timezone, return UTC). + fuzzy: If dict supplied, fuzzy matching will be used and results returned in dict + include_microseconds: Includes microseconds if True. Defaults to False. + zero_time: Zero time elements of datetime if True. Defaults to False. + max_starttime: Make start date time component 23:59:59:999999. Defaults to False. + max_endtime: Make end date time component 23:59:59:999999. Defaults to False. + default_timezones: Timezone information. Defaults to None. (Internal default). Returns: Tuple[datetime,datetime]: Tuple containing start date and end date @@ -1030,14 +1030,14 @@ def parse_date( -10 W CKT HAST HST TAHT TKT Args: - string (str): Dataset date string - date_format (Optional[str]): Date format. If None is given, will attempt to guess. Defaults to None. - timezone_handling (int): Timezone handling. See description. Defaults to 0 (ignore timezone, return UTC). - fuzzy (Optional[Dict]): If dict supplied, fuzzy matching will be used and results returned in dict - include_microseconds (bool): Includes microseconds if True. Defaults to False. - zero_time (bool): Zero time elements of datetime if True. Defaults to False. - max_time (bool): Make date time component 23:59:59:999999. Defaults to False. - default_timezones (Optional[str]): Timezone information. Defaults to None. (Internal default). + string: Dataset date string + date_format: Date format. If None is given, will attempt to guess. Defaults to None. + timezone_handling: Timezone handling. See description. Defaults to 0 (ignore timezone, return UTC). + fuzzy: If dict supplied, fuzzy matching will be used and results returned in dict + include_microseconds: Includes microseconds if True. Defaults to False. + zero_time: Zero time elements of datetime if True. Defaults to False. + max_time: Make date time component 23:59:59:999999. Defaults to False. + default_timezones: Timezone information. Defaults to None. (Internal default). Returns: datetime: The parsed date @@ -1070,7 +1070,7 @@ def get_timestamp_from_datetime(date: datetime) -> float: """Convert datetime to timestamp. Args: - date (datetime): Date to convert + date: Date to convert Returns: float: Timestamp @@ -1104,9 +1104,9 @@ def get_datetime_from_timestamp( """Convert timestamp to datetime. Args: - timestamp (float): Timestamp to convert - timezone (datetime.tzinfo): Timezone to use - today (datetime): Today's date. Defaults to now_utc. + timestamp: Timestamp to convert + timezone: Timezone to use + today: Today's date. Defaults to now_utc. Returns: datetime: Date of timestamp @@ -1120,7 +1120,7 @@ def iso_string_from_datetime(date: datetime) -> str: """Convert datetime to ISO formatted date without any time elements Args: - date (datetime): Date to convert to string + date: Date to convert to string Returns: str: ISO formatted date without any time elements @@ -1132,7 +1132,7 @@ def get_quarter(date: datetime) -> int: """Get the quarter of the given date Args: - date (datetime): Date + date: Date Returns: int: Quarter in which the given date is contained @@ -1144,8 +1144,8 @@ def get_quarter_start(year: int, quarter: int) -> datetime: """Get the first day of the quarter in which a given date is contained Args: - year (int): Year - quarter (int): Quarter + year: Year + quarter: Quarter Returns: datetime: First day of quarter @@ -1160,10 +1160,10 @@ def get_quarter_end( """Get the last day of the quarter in which a given date is contained Args: - year (int): Year - quarter (int): Quarter - max_time (bool): Make date time component 23:59:59:999999. Defaults to True. - include_microseconds (bool): Includes microseconds if True. Defaults to False. + year: Year + quarter: Quarter + max_time: Make date time component 23:59:59:999999. Defaults to True. + include_microseconds: Includes microseconds if True. Defaults to False. Returns: datetime: First day of quarter diff --git a/src/hdx/utilities/dictandlist.py b/src/hdx/utilities/dictandlist.py index 08a5caf..da3d1e0 100755 --- a/src/hdx/utilities/dictandlist.py +++ b/src/hdx/utilities/dictandlist.py @@ -14,7 +14,7 @@ def invert_dictionary(d: MutableMapping) -> dict: one to one mapping between keys and values. Args: - d (MutableMapping): Dictionary + d: Dictionary Returns: Dict: Return inverse of dictionary @@ -30,9 +30,9 @@ def merge_two_dictionaries( NOTE: tuples and arbitrary objects are not handled as it is totally ambiguous what should happen Args: - a (MutableMapping): dictionary to merge into - b (MutableMapping): dictionary to merge from - merge_lists (bool): Whether to merge lists (True) or replace lists (False). Default is False. + a: dictionary to merge into + b: dictionary to merge from + merge_lists: Whether to merge lists (True) or replace lists (False). Default is False. Returns: MutableMapping: Merged dictionary @@ -84,8 +84,8 @@ def merge_dictionaries( result. Args: - dicts (Sequence[MutableMapping]): Dictionaries to merge into the first one in the list - merge_lists (bool): Whether to merge lists (True) or replace lists (False). Default is False. + dicts: Dictionaries to merge into the first one in the list + merge_lists: Whether to merge lists (True) or replace lists (False). Default is False. Returns: MutableMapping: Merged dictionary @@ -102,9 +102,9 @@ def dict_diff( """Compares two dictionaries. Args: - d1 (Dict): First dictionary to compare - d2 (Dict): Second dictionary to compare - no_key (str): What value to use if key is not found Defaults to ''. + d1: First dictionary to compare + d2: Second dictionary to compare + no_key: What value to use if key is not found Defaults to ''. Returns: Dict: Comparison dictionary @@ -122,9 +122,9 @@ def dict_of_lists_add(dictionary: MutableMapping, key: Any, value: Any) -> None: """Add value to a list in a dictionary by key. Args: - dictionary (MutableMapping): Dictionary to which to add values - key (Any): Key within dictionary - value (Any): Value to add to list in dictionary + dictionary: Dictionary to which to add values + key: Key within dictionary + value: Value to add to list in dictionary Returns: None @@ -138,9 +138,9 @@ def dict_of_sets_add(dictionary: MutableMapping, key: Any, value: Any) -> None: """Add value to a set in a dictionary by key. Args: - dictionary (MutableMapping): Dictionary to which to add values - key (Any): Key within dictionary - value (Any): Value to add to set in dictionary + dictionary: Dictionary to which to add values + key: Key within dictionary + value: Value to add to set in dictionary Returns: None @@ -156,10 +156,10 @@ def dict_of_dicts_add( """Add key value pair to a dictionary within a dictionary by key. Args: - dictionary (MutableMapping): Dictionary to which to add values - parent_key (Any): Key within parent dictionary - key (Any): Key within dictionary - value (Any): Value to add to set in dictionary + dictionary: Dictionary to which to add values + parent_key: Key within parent dictionary + key: Key within dictionary + value: Value to add to set in dictionary Returns: None @@ -177,8 +177,8 @@ def list_distribute_contents_simple( function can return the appropriate value eg. lambda x: x[KEY] Args: - input_list (Sequence): List to distribute values - function (Callable[[Any], Any]): Return value to use for distributing. Defaults to lambda x: x. + input_list: List to distribute values + function: Return value to use for distributing. Defaults to lambda x: x. Returns: List: Distributed list @@ -210,8 +210,8 @@ def list_distribute_contents( function can return the appropriate value eg. lambda x: x[KEY] Args: - input_list (Sequence): List to distribute values - function (Callable[[Any], Any]): Return value to use for distributing. Defaults to lambda x: x. + input_list: List to distribute values + function: Return value to use for distributing. Defaults to lambda x: x. Returns: List: Distributed list @@ -253,8 +253,8 @@ def extract_list_from_list_of_dict(list_of_dict: Sequence[dict], key: Any) -> li dictionaries. Args: - list_of_dict (Sequence[Dict]): List of dictionaries - key (Any): Key to find in each dictionary + list_of_dict: List of dictionaries + key: Key to find in each dictionary Returns: List: List containing values returned from each dictionary @@ -277,12 +277,12 @@ def key_value_convert( parameters. Args: - dictin (MutableMapping): Input dictionary - keyfn (Callable[[Any], Any]): Function to convert keys. Defaults to lambda x: x - valuefn (Callable[[Any], Any]): Function to convert values. Defaults to lambda x: x - dropfailedkeys (bool): Whether to drop dictionary entries where key conversion fails. Defaults to False. - dropfailedvalues (bool): Whether to drop dictionary entries where value conversion fails. Defaults to False. - exception (Exception): The exception to expect if keyfn or valuefn fail. Defaults to ValueError. + dictin: Input dictionary + keyfn: Function to convert keys. Defaults to lambda x: x + valuefn: Function to convert values. Defaults to lambda x: x + dropfailedkeys: Whether to drop dictionary entries where key conversion fails. Defaults to False. + dropfailedvalues: Whether to drop dictionary entries where value conversion fails. Defaults to False. + exception: The exception to expect if keyfn or valuefn fail. Defaults to ValueError. Returns: Dict: New dictionary with converted keys and/or values @@ -310,8 +310,8 @@ def integer_key_convert(dictin: MutableMapping, dropfailedkeys: bool = False) -> """Convert keys of dictionary to integers. Args: - dictin (MutableMapping): Input dictionary - dropfailedkeys (bool): Whether to drop dictionary entries where key conversion fails. Defaults to False. + dictin: Input dictionary + dropfailedkeys: Whether to drop dictionary entries where key conversion fails. Defaults to False. Returns: Dict: Dictionary with keys converted to integers @@ -325,8 +325,8 @@ def integer_value_convert( """Convert values of dictionary to integers. Args: - dictin (MutableMapping): Input dictionary - dropfailedvalues (bool): Whether to drop dictionary entries where key conversion fails. Defaults to False. + dictin: Input dictionary + dropfailedvalues: Whether to drop dictionary entries where key conversion fails. Defaults to False. Returns: Dict: Dictionary with values converted to integers @@ -338,8 +338,8 @@ def float_value_convert(dictin: MutableMapping, dropfailedvalues: bool = False) """Convert values of dictionary to floats. Args: - dictin (MutableMapping): Input dictionary - dropfailedvalues (bool): Whether to drop dictionary entries where key conversion fails. Defaults to False. + dictin: Input dictionary + dropfailedvalues: Whether to drop dictionary entries where key conversion fails. Defaults to False. Returns: Dict: Dictionary with values converted to floats @@ -353,9 +353,9 @@ def avg_dicts( """Create a new dictionary from two dictionaries by averaging values. Args: - dictin1 (MutableMapping): First input dictionary - dictin2 (MutableMapping): Second input dictionary - dropmissing (bool): Whether to drop keys missing in one dictionary. Defaults to True. + dictin1: First input dictionary + dictin2: Second input dictionary + dropmissing: Whether to drop keys missing in one dictionary. Defaults to True. Returns: Dict: Dictionary with values being average of 2 input dictionaries @@ -386,9 +386,9 @@ def read_list_from_csv( be treated as containing values. Args: - url (str): URL or path to read from - headers (Union[int, Sequence[int], Sequence[str], None]): Row number of headers. Defaults to None. - dict_form (bool): Return dict (requires headers parameter) or list for each row. Defaults to False (list) + url: URL or path to read from + headers: Row number of headers. Defaults to None. + dict_form: Return dict (requires headers parameter) or list for each row. Defaults to False (list) **kwargs: Other arguments to pass to Tabulator Stream Returns: @@ -423,11 +423,11 @@ def write_list_to_csv( as containing values.) Args: - filepath (str): Path to write to - rows (List[Sequence | Mapping]): List of rows in dict or list form - headers (Union[int, Sequence[str], None]): Headers to write. Defaults to None. - columns (Union[Sequence[int], Sequence[str], None]): Columns to write. Defaults to all. - encoding (Optional[str]): Encoding to use. Defaults to None (infer encoding). + filepath: Path to write to + rows: List of rows in dict or list form + headers: Headers to write. Defaults to None. + columns: Columns to write. Defaults to all. + encoding: Encoding to use. Defaults to None (infer encoding). Returns: None @@ -482,7 +482,7 @@ def args_to_dict(args: str) -> dict: dictionary. Args: - args (str): Command line arguments + args: Command line arguments Returns: Dict: Dictionary of arguments diff --git a/src/hdx/utilities/downloader.py b/src/hdx/utilities/downloader.py index 88eace6..2e89b62 100755 --- a/src/hdx/utilities/downloader.py +++ b/src/hdx/utilities/downloader.py @@ -38,27 +38,27 @@ class Download(BaseDownload): extra_params_json and extra_params_yaml. Args: - user_agent (Optional[str]): User agent string. HDXPythonUtilities/X.X.X- is prefixed. - user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. - user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied. - use_env (bool): Whether to read environment variables. Defaults to True. - fail_on_missing_file (bool): Raise an exception if any specified configuration files are missing. Defaults to True. - verify (bool): Whether to verify SSL certificates. Defaults to True. - rate_limit (Optional[Dict]): Rate limiting per host eg. {"calls": 1, "period": 0.1}. Defaults to None. + user_agent: User agent string. HDXPythonUtilities/X.X.X- is prefixed. + user_agent_config_yaml: Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. + user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. + use_env: Whether to read environment variables. Defaults to True. + fail_on_missing_file: Raise an exception if any specified configuration files are missing. Defaults to True. + verify: Whether to verify SSL certificates. Defaults to True. + rate_limit: Rate limiting per host eg. {"calls": 1, "period": 0.1}. Defaults to None. **kwargs: See below - auth (Tuple[str, str]): Authorisation information in tuple form (user, pass) OR - basic_auth (str): Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR - basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) - bearer_token (str): Bearer token string OR - bearer_token_file (str): Path to file containing bearer token string OR - extra_params_dict (Dict[str, str]): Extra parameters to put on end of url as a dictionary OR - extra_params_json (str): Path to JSON file containing extra parameters to put on end of url OR - extra_params_yaml (str): Path to YAML file containing extra parameters to put on end of url - extra_params_lookup (str): Lookup key for parameters. If not given assumes parameters are at root of the dict. - headers (Dict): Additional headers to add to request. - use_auth (str): If more than one auth found, specify which one to use, rather than failing. - status_forcelist (Sequence[int]): HTTP statuses for which to force retry - allowed_methods (iterable): HTTP methods for which to force retry. Defaults t0 frozenset(['GET']). + auth: Authorisation information in tuple form (user, pass) OR + basic_auth: Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR + basic_auth_file: Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) + bearer_token: Bearer token string OR + bearer_token_file: Path to file containing bearer token string OR + extra_params_dict: Extra parameters to put on end of url as a dictionary OR + extra_params_json: Path to JSON file containing extra parameters to put on end of url OR + extra_params_yaml: Path to YAML file containing extra parameters to put on end of url + extra_params_lookup: Lookup key for parameters. If not given assumes parameters are at root of the dict. + headers: Additional headers to add to request. + use_auth: If more than one auth found, specify which one to use, rather than failing. + status_forcelist: HTTP statuses for which to force retry + allowed_methods: HTTP methods for which to force retry. Defaults t0 frozenset(['GET']). """ downloaders = {} @@ -122,9 +122,9 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: """Allow usage of with. Args: - exc_type (Any): Exception type - exc_value (Any): Exception value - traceback (Any): Traceback + exc_type: Exception type + exc_value: Exception value + traceback: Traceback Returns: None @@ -144,12 +144,12 @@ def get_path_for_url( folder if no folder supplied, ensuring uniqueness. Args: - url (str): URL to download - folder (Optional[str]): Folder to download it to. Defaults to None (temporary folder). - filename (Optional[str]): Filename to use for downloaded file. Defaults to None (derive from the url). - path (Optional[str]): Full path to use for downloaded file. Defaults to None (use folder and filename). - overwrite (bool): Whether to overwrite existing file. Defaults to False. - keep (bool): Whether to keep already downloaded file. Defaults to False. + url: URL to download + folder: Folder to download it to. Defaults to None (temporary folder). + filename: Filename to use for downloaded file. Defaults to None (derive from the url). + path: Full path to use for downloaded file. Defaults to None (use folder and filename). + overwrite: Whether to overwrite existing file. Defaults to False. + keep: Whether to keep already downloaded file. Defaults to False. Returns: str: Path of downloaded file @@ -182,7 +182,7 @@ def get_full_url(self, url: str) -> str: """Get full url including any additional parameters. Args: - url (str): URL for which to get full url + url: URL for which to get full url Returns: str: Full url including any additional parameters @@ -196,8 +196,8 @@ def get_url_for_get(url: str, parameters: dict | None = None) -> str: """Get full url for GET request including parameters. Args: - url (str): URL to download - parameters (Optional[Dict]): Parameters to pass. Defaults to None. + url: URL to download + parameters: Parameters to pass. Defaults to None. Returns: str: Full url @@ -217,8 +217,8 @@ def get_url_params_for_post( the url. Args: - url (str): URL to download - parameters (Optional[Dict]): Parameters to pass. Defaults to None. + url: URL to download + parameters: Parameters to pass. Defaults to None. Returns: Tuple[str, Dict]: (Full url, parameters) @@ -242,9 +242,9 @@ def hxl_row( dictionary depending upon the dict_form argument. Args: - headers (Sequence[str]): Headers for which to get HXL hashtags - hxltags (Dict[str,str]): Header to HXL hashtag mapping - dict_form (bool): Return dict or list. Defaults to False (list) + headers: Headers for which to get HXL hashtags + hxltags: Header to HXL hashtag mapping + dict_form: Return dict or list. Defaults to False (list) Returns: Union[List[str],Dict[str,str]]: Return either a list or dictionary conating HXL hashtags @@ -267,14 +267,14 @@ def normal_setup( """Setup download from provided url returning the response. Args: - url (str): URL or path to download - stream (bool): Whether to stream download. Defaults to True. - post (bool): Whether to use POST instead of GET. Defaults to False. - parameters (Optional[Dict]): Parameters to pass. Defaults to None. - timeout (Optional[float]): Timeout for connecting to URL. Defaults to None (no timeout). - headers (Optional[Dict]): Headers to pass. Defaults to None. - encoding (Optional[str]): Encoding to use for text response. Defaults to None (best guess). - json_string (bool): Whether to post parameters as JSON string. Defaults to False. + url: URL or path to download + stream: Whether to stream download. Defaults to True. + post: Whether to use POST instead of GET. Defaults to False. + parameters: Parameters to pass. Defaults to None. + timeout: Timeout for connecting to URL. Defaults to None (no timeout). + headers: Headers to pass. Defaults to None. + encoding: Encoding to use for text response. Defaults to None (best guess). + json_string: Whether to post parameters as JSON string. Defaults to False. Returns: requests.Response: requests.Response object @@ -325,7 +325,7 @@ def set_bearer_token(self, bearer_token: str) -> None: """Set bearer token Args: - bearer_token (str): Bearer token + bearer_token: Bearer token Returns: None @@ -342,7 +342,7 @@ def hash_stream(self, url: str) -> str: first. Args: - url (str): URL or path to download + url: URL or path to download Returns: str: MD5 hash of file @@ -363,8 +363,8 @@ def stream_path(self, path: str, errormsg: str): method first. Args: - path (str): Path for downloaded file - errormsg (str): Error message to display if there is a problem + path: Path for downloaded file + errormsg: Error message to display if there is a problem Returns: str: Path of downloaded file @@ -396,12 +396,12 @@ def stream_file( folder if no folder supplied. Must call setup method first. Args: - url (str): URL or path to download - folder (Optional[str]): Folder to download it to. Defaults to None (temporary folder). - filename (Optional[str]): Filename to use for downloaded file. Defaults to None (derive from the url). - path (Optional[str]): Full path to use for downloaded file. Defaults to None (use folder and filename). - overwrite (bool): Whether to overwrite existing file. Defaults to False. - keep (bool): Whether to keep already downloaded file. Defaults to False. + url: URL or path to download + folder: Folder to download it to. Defaults to None (temporary folder). + filename: Filename to use for downloaded file. Defaults to None (derive from the url). + path: Full path to use for downloaded file. Defaults to None (use folder and filename). + overwrite: Whether to overwrite existing file. Defaults to False. + keep: Whether to keep already downloaded file. Defaults to False. Returns: str: Path of downloaded file @@ -422,19 +422,19 @@ def download_file( folder if no folder supplied. Args: - url (str): URL or path to download + url: URL or path to download **kwargs: See below - folder (str): Folder to download it to. Defaults to temporary folder. - filename (str): Filename to use for downloaded file. Defaults to deriving from url. - path (str): Full path to use for downloaded file instead of folder and filename. - overwrite (bool): Whether to overwrite existing file. Defaults to False. - keep (bool): Whether to keep already downloaded file. Defaults to False. - post (bool): Whether to use POST instead of GET. Defaults to False. - parameters (Dict): Parameters to pass. Defaults to None. - timeout (float): Timeout for connecting to URL. Defaults to None (no timeout). - headers (Dict): Headers to pass. Defaults to None. - encoding (str): Encoding to use for text response. Defaults to None (best guess). - json_string (bool): Whether to post parameters as JSON string. Defaults to False. + folder: Folder to download it to. Defaults to temporary folder. + filename: Filename to use for downloaded file. Defaults to deriving from url. + path: Full path to use for downloaded file instead of folder and filename. + overwrite: Whether to overwrite existing file. Defaults to False. + keep: Whether to keep already downloaded file. Defaults to False. + post: Whether to use POST instead of GET. Defaults to False. + parameters: Parameters to pass. Defaults to None. + timeout: Timeout for connecting to URL. Defaults to None (no timeout). + headers: Headers to pass. Defaults to None. + encoding: Encoding to use for text response. Defaults to None (best guess). + json_string: Whether to post parameters as JSON string. Defaults to False. Returns: str: Path of downloaded file @@ -465,14 +465,14 @@ def download(self, url: str, **kwargs: Any) -> requests.Response: """Download url. Args: - url (str): URL or path to download + url: URL or path to download **kwargs: See below - post (bool): Whether to use POST instead of GET. Defaults to False. - parameters (Dict): Parameters to pass. Defaults to None. - timeout (float): Timeout for connecting to URL. Defaults to None (no timeout). - headers (Dict): Headers to pass. Defaults to None. - encoding (str): Encoding to use for text response. Defaults to None (best guess). - json_string (bool): Whether to post parameters as JSON string. Defaults to False. + post: Whether to use POST instead of GET. Defaults to False. + parameters: Parameters to pass. Defaults to None. + timeout: Timeout for connecting to URL. Defaults to None (no timeout). + headers: Headers to pass. Defaults to None. + encoding: Encoding to use for text response. Defaults to None (best guess). + json_string: Whether to post parameters as JSON string. Defaults to False. Returns: requests.Response: Response @@ -492,7 +492,7 @@ def get_header(self, header: str) -> Any: """Get a particular response header of download. Args: - header (str): Header for which to get value + header: Header for which to get value Returns: Any: Response header's value @@ -544,13 +544,13 @@ def download_text(self, url: str, **kwargs: Any) -> str: """Download url as text. Args: - url (str): URL or path to download + url: URL or path to download **kwargs: See below - post (bool): Whether to use POST instead of GET. Defaults to False. - parameters (Dict): Parameters to pass. Defaults to None. - timeout (float): Timeout for connecting to URL. Defaults to None (no timeout). - headers (Dict): Headers to pass. Defaults to None. - encoding (str): Encoding to use for text response. Defaults to None (best guess). + post: Whether to use POST instead of GET. Defaults to False. + parameters: Parameters to pass. Defaults to None. + timeout: Timeout for connecting to URL. Defaults to None (no timeout). + headers: Headers to pass. Defaults to None. + encoding: Encoding to use for text response. Defaults to None (best guess). Returns: str: Text content of download @@ -562,13 +562,13 @@ def download_yaml(self, url: str, **kwargs: Any) -> Any: """Download url as YAML. Args: - url (str): URL or path to download + url: URL or path to download **kwargs: See below - post (bool): Whether to use POST instead of GET. Defaults to False. - parameters (Dict): Parameters to pass. Defaults to None. - timeout (float): Timeout for connecting to URL. Defaults to None (no timeout). - headers (Dict): Headers to pass. Defaults to None. - encoding (str): Encoding to use for text response. Defaults to None (best guess). + post: Whether to use POST instead of GET. Defaults to False. + parameters: Parameters to pass. Defaults to None. + timeout: Timeout for connecting to URL. Defaults to None (no timeout). + headers: Headers to pass. Defaults to None. + encoding: Encoding to use for text response. Defaults to None (best guess). Returns: str: YAML content of download @@ -580,13 +580,13 @@ def download_json(self, url: str, **kwargs: Any) -> Any: """Download url as JSON. Args: - url (str): URL or path to download + url: URL or path to download **kwargs: See below - post (bool): Whether to use POST instead of GET. Defaults to False. - parameters (Dict): Parameters to pass. Defaults to None. - timeout (float): Timeout for connecting to URL. Defaults to None (no timeout). - headers (Dict): Headers to pass. Defaults to None. - encoding (str): Encoding to use for text response. Defaults to None (best guess). + post: Whether to use POST instead of GET. Defaults to False. + parameters: Parameters to pass. Defaults to None. + timeout: Timeout for connecting to URL. Defaults to None (no timeout). + headers: Headers to pass. Defaults to None. + encoding: Encoding to use for text response. Defaults to None (best guess). Returns: str: JSON content of download @@ -604,30 +604,30 @@ def get_frictionless_tableresource( """Get Frictionless TableResource. Args: - url (str): URL or path to download - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). + url: URL or path to download + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). **kwargs: - has_header (bool): Whether data has a header. Defaults to True. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - format (Optional[str]): Type of file. Defaults to inferring. - file_type (Optional[str]): Type of file. Defaults to inferring. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - default_type (Optional[str]): Default field type if infer_types False. Defaults to string. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - dialect (Dialect): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - layout (Layout): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + has_header: Whether data has a header. Defaults to True. + headers: Number of row(s) containing headers or list of headers + columns: Columns to pick. Defaults to all. + format: Type of file. Defaults to inferring. + file_type: Type of file. Defaults to inferring. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + http_session: Session object to use. Defaults to downloader session. + columns: Columns to pick. Defaults to all. + default_type: Default field type if infer_types False. Defaults to string. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + dialect: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + layout: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: TableResource: frictionless TableResource object @@ -670,33 +670,33 @@ def _get_tabular_rows( outputs a modified row or None to ignore the row. Args: - url (str): URL or path to read from - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - dict_form (bool): Return dict or list for each row. Defaults to False (list) - include_headers (bool): Whether to include headers in iterator. Defaults to False. - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. + url: URL or path to read from + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + dict_form: Return dict or list for each row. Defaults to False (list) + include_headers: Whether to include headers in iterator. Defaults to False. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). + header_insertions: List of (position, header) to insert. Defaults to None. + row_function: Function to call for each row. Defaults to None. **kwargs: - format (Optional[str]): Type of file. Defaults to inferring. - file_type (Optional[str]): Type of file. Defaults to inferring. - xlsx2csv (bool): Whether to convert xlsx files. Defaults to False. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - default_type (Optional[str]): Default field type if infer_types False. Defaults to string. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - dialect (Dialect): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - layout (Layout): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + format: Type of file. Defaults to inferring. + file_type: Type of file. Defaults to inferring. + xlsx2csv: Whether to convert xlsx files. Defaults to False. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + http_session: Session object to use. Defaults to downloader session. + columns: Columns to pick. Defaults to all. + default_type: Default field type if infer_types False. Defaults to string. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + dialect: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + layout: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) @@ -784,34 +784,34 @@ def get_tabular_rows( outputs a modified row or None to ignore the row. Args: - url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from - has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - dict_form (bool): Return dict or list for each row. Defaults to False (list) - include_headers (bool): Whether to include headers in iterator. Defaults to False. - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. + url: A single or list of URLs or paths to read from + has_hxl: Whether files have HXL hashtags. Ignored for single url. Defaults to False. + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + dict_form: Return dict or list for each row. Defaults to False (list) + include_headers: Whether to include headers in iterator. Defaults to False. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). + header_insertions: List of (position, header) to insert. Defaults to None. + row_function: Function to call for each row. Defaults to None. **kwargs: - format (Optional[str]): Type of file. Defaults to inferring. - file_type (Optional[str]): Type of file. Defaults to inferring. - xlsx2csv (bool): Whether to convert xlsx files. Defaults to False. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - default_type (Optional[str]): Default field type if infer_types False. Defaults to string. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - dialect (Dialect): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - layout (Layout): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + format: Type of file. Defaults to inferring. + file_type: Type of file. Defaults to inferring. + xlsx2csv: Whether to convert xlsx files. Defaults to False. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + http_session: Session object to use. Defaults to downloader session. + columns: Columns to pick. Defaults to all. + default_type: Default field type if infer_types False. Defaults to string. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + dialect: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + layout: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) @@ -888,33 +888,33 @@ def get_tabular_rows_as_list( argument) and outputs a modified row or None to ignore the row. Args: - url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from - has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - include_headers (bool): Whether to include headers in iterator. Defaults to True. - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. + url: A single or list of URLs or paths to read from + has_hxl: Whether files have HXL hashtags. Ignored for single url. Defaults to False. + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + include_headers: Whether to include headers in iterator. Defaults to True. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). + header_insertions: List of (position, header) to insert. Defaults to None. + row_function: Function to call for each row. Defaults to None. **kwargs: - format (Optional[str]): Type of file. Defaults to inferring. - file_type (Optional[str]): Type of file. Defaults to inferring. - xlsx2csv (bool): Whether to convert xlsx files. Defaults to False. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - default_type (Optional[str]): Default field type if infer_types False. Defaults to string. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - dialect (Dialect): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - layout (Layout): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + format: Type of file. Defaults to inferring. + file_type: Type of file. Defaults to inferring. + xlsx2csv: Whether to convert xlsx files. Defaults to False. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + http_session: Session object to use. Defaults to downloader session. + columns: Columns to pick. Defaults to all. + default_type: Default field type if infer_types False. Defaults to string. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + dialect: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + layout: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: Tuple[List[str],Iterator[List]]: Tuple (headers, iterator where each row is a list) @@ -963,32 +963,32 @@ def get_tabular_rows_as_dict( argument) and outputs a modified row or None to ignore the row. Args: - url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from - has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. + url: A single or list of URLs or paths to read from + has_hxl: Whether files have HXL hashtags. Ignored for single url. Defaults to False. + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). + header_insertions: List of (position, header) to insert. Defaults to None. + row_function: Function to call for each row. Defaults to None. **kwargs: - format (Optional[str]): Type of file. Defaults to inferring. - file_type (Optional[str]): Type of file. Defaults to inferring. - xlsx2csv (bool): Whether to convert xlsx files. Defaults to False. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - default_type (Optional[str]): Default field type if infer_types False. Defaults to string. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - dialect (Dialect): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - layout (Layout): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + format: Type of file. Defaults to inferring. + file_type: Type of file. Defaults to inferring. + xlsx2csv: Whether to convert xlsx files. Defaults to False. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + http_session: Session object to use. Defaults to downloader session. + columns: Columns to pick. Defaults to all. + default_type: Default field type if infer_types False. Defaults to string. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + dialect: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + layout: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: Tuple[List[str], Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary) @@ -1039,32 +1039,32 @@ def download_tabular_key_value( Args: - url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from - has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - include_headers (bool): Whether to include headers in iterator. Defaults to True. - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. + url: A single or list of URLs or paths to read from + has_hxl: Whether files have HXL hashtags. Ignored for single url. Defaults to False. + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + include_headers: Whether to include headers in iterator. Defaults to True. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). + header_insertions: List of (position, header) to insert. Defaults to None. + row_function: Function to call for each row. Defaults to None. **kwargs: - format (Optional[str]): Type of file. Defaults to inferring. - file_type (Optional[str]): Type of file. Defaults to inferring. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - default_type (Optional[str]): Default field type if infer_types False. Defaults to string. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""] - dialect (Dialect): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - layout (Layout): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + format: Type of file. Defaults to inferring. + file_type: Type of file. Defaults to inferring. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + http_session: Session object to use. Defaults to downloader session. + columns: Columns to pick. Defaults to all. + default_type: Default field type if infer_types False. Defaults to string. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""] + dialect: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + layout: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: Dict: Dictionary keys (first column) and values (second column) @@ -1118,32 +1118,32 @@ def download_tabular_rows_as_dicts( argument) and outputs a modified row or None to ignore the row. Args: - url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from - has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - keycolumn (int): Number of column to be used for key. Defaults to 1. - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. + url: A single or list of URLs or paths to read from + has_hxl: Whether files have HXL hashtags. Ignored for single url. Defaults to False. + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + keycolumn: Number of column to be used for key. Defaults to 1. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). + header_insertions: List of (position, header) to insert. Defaults to None. + row_function: Function to call for each row. Defaults to None. **kwargs: - format (Optional[str]): Type of file. Defaults to inferring. - file_type (Optional[str]): Type of file. Defaults to inferring. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - default_type (Optional[str]): Default field type if infer_types False. Defaults to string. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - dialect (Dialect): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - layout (Layout): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + format: Type of file. Defaults to inferring. + file_type: Type of file. Defaults to inferring. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + http_session: Session object to use. Defaults to downloader session. + columns: Columns to pick. Defaults to all. + default_type: Default field type if infer_types False. Defaults to string. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + dialect: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + layout: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: Dict[str,Dict]: Dictionary where keys are first column and values are dictionaries with keys from column @@ -1201,32 +1201,32 @@ def download_tabular_cols_as_dicts( argument) and outputs a modified row or None to ignore the row. Args: - url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from - has_hxl (bool): Whether files have HXL hashtags. Ignored for single url. Defaults to False. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - keycolumn (int): Number of column to be used for key. Defaults to 1. - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). - header_insertions (Optional[Sequence[Tuple[int,str]]]): List of (position, header) to insert. Defaults to None. - row_function (Optional[Callable[[List[str],list | dict],list | dict]]): Function to call for each row. Defaults to None. + url: A single or list of URLs or paths to read from + has_hxl: Whether files have HXL hashtags. Ignored for single url. Defaults to False. + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + keycolumn: Number of column to be used for key. Defaults to 1. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). + header_insertions: List of (position, header) to insert. Defaults to None. + row_function: Function to call for each row. Defaults to None. **kwargs: - format (Optional[str]): Type of file. Defaults to inferring. - file_type (Optional[str]): Type of file. Defaults to inferring. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - http_session (Session): Session object to use. Defaults to downloader session. - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - default_type (Optional[str]): Default field type if infer_types False. Defaults to string. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - dialect (Dialect): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - layout (Layout): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + format: Type of file. Defaults to inferring. + file_type: Type of file. Defaults to inferring. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + http_session: Session object to use. Defaults to downloader session. + columns: Columns to pick. Defaults to all. + default_type: Default field type if infer_types False. Defaults to string. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + dialect: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + layout: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: Dict[str,Dict]: Dictionary where keys are header names and values are dictionaries with keys from first column @@ -1260,7 +1260,7 @@ def get_column_positions(headers: Sequence[str]) -> dict[str, int]: """Get mapping of headers to column positions. Args: - headers (Sequence[str]): List of headers + headers: List of headers Returns: Dict[str,int]: Dictionary where keys are header names and values are header positions @@ -1291,27 +1291,27 @@ def generate_downloaders( this method). Args: - custom_configs (Dict[str, Dict]): Optional dictionary of custom configurations. - user_agent (Optional[str]): User agent string. HDXPythonUtilities/X.X.X- is prefixed. - user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. - user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied. - use_env (bool): Whether to read environment variables. Defaults to True. - fail_on_missing_file (bool): Raise an exception if any specified configuration files are missing. Defaults to True. - rate_limit (Optional[Dict]): Rate limiting per host eg. {"calls": 1, "period": 0.1}. Defaults to None. + custom_configs: Optional dictionary of custom configurations. + user_agent: User agent string. HDXPythonUtilities/X.X.X- is prefixed. + user_agent_config_yaml: Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. + user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. + use_env: Whether to read environment variables. Defaults to True. + fail_on_missing_file: Raise an exception if any specified configuration files are missing. Defaults to True. + rate_limit: Rate limiting per host eg. {"calls": 1, "period": 0.1}. Defaults to None. **kwargs: See below - auth (Tuple[str, str]): Authorisation information in tuple form (user, pass) OR - basic_auth (str): Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR - basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) - bearer_token (str): Bearer token string OR - bearer_token_file (str): Path to file containing bearer token string OR - extra_params_dict (Dict[str, str]): Extra parameters to put on end of url as a dictionary OR - extra_params_json (str): Path to JSON file containing extra parameters to put on end of url OR - extra_params_yaml (str): Path to YAML file containing extra parameters to put on end of url - extra_params_lookup (str): Lookup key for parameters. If not given assumes parameters are at root of the dict. - headers (Dict): Additional headers to add to request. - use_auth (str): If more than one auth found, specify which one to use, rather than failing. - status_forcelist (Sequence[int]): HTTP statuses for which to force retry. Defaults to (429, 500, 502, 503, 504). - allowed_methods (Sequence[str]): HTTP methods for which to force retry. Defaults to ("HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"). + auth: Authorisation information in tuple form (user, pass) OR + basic_auth: Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR + basic_auth_file: Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) + bearer_token: Bearer token string OR + bearer_token_file: Path to file containing bearer token string OR + extra_params_dict: Extra parameters to put on end of url as a dictionary OR + extra_params_json: Path to JSON file containing extra parameters to put on end of url OR + extra_params_yaml: Path to YAML file containing extra parameters to put on end of url + extra_params_lookup: Lookup key for parameters. If not given assumes parameters are at root of the dict. + headers: Additional headers to add to request. + use_auth: If more than one auth found, specify which one to use, rather than failing. + status_forcelist: HTTP statuses for which to force retry. Defaults to (429, 500, 502, 503, 504). + allowed_methods: HTTP methods for which to force retry. Defaults to ("HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"). Returns: None @@ -1335,7 +1335,7 @@ def get_downloader(cls, name: str | None = None) -> "Download": the default one will be returned. Args: - name (Optional[str]): Name of downloader. Defaults to None (get default). + name: Name of downloader. Defaults to None (get default). Returns: Download: Downloader object diff --git a/src/hdx/utilities/easy_logging.py b/src/hdx/utilities/easy_logging.py index dfe3559..43ae538 100755 --- a/src/hdx/utilities/easy_logging.py +++ b/src/hdx/utilities/easy_logging.py @@ -17,9 +17,9 @@ def setup_logging( errors to a file. Args: - console_log_level (str): Log level to use for console output. Defaults to INFO. - log_file (Optional[str]): Path of log file. Defaults to None (No log file). - file_log_level (str): Log level to use for console output. Defaults to ERROR. + console_log_level: Log level to use for console output. Defaults to INFO. + log_file: Path of log file. Defaults to None (No log file). + file_log_level: Log level to use for console output. Defaults to ERROR. Returns: None diff --git a/src/hdx/utilities/email.py b/src/hdx/utilities/email.py index b215a9b..4944ea6 100755 --- a/src/hdx/utilities/email.py +++ b/src/hdx/utilities/email.py @@ -40,9 +40,9 @@ class Email: Args: **kwargs: See below - email_config_dict (dict): HDX configuration dictionary OR - email_config_json (str): Path to JSON HDX configuration OR - email_config_yaml (str): Path to YAML HDX configuration. Defaults to ~/hdx_email_configuration.yaml. + email_config_dict: HDX configuration dictionary OR + email_config_json: Path to JSON HDX configuration OR + email_config_yaml: Path to YAML HDX configuration. Defaults to ~/hdx_email_configuration.yaml. """ default_email_config_yaml = join(expanduser("~"), "hdx_email_configuration.yaml") @@ -152,7 +152,7 @@ def get_normalised_email(email: str, check_deliverability: bool = False) -> str: """Get normalised email. Args: - email (str): Email address to normalise + email: Email address to normalise Returns: str: Normalised email @@ -173,7 +173,7 @@ def get_normalised_emails( """Get list of normalised emails. Args: - emails (Union[str, Sequence[str]]): Email address or addresses + emails: Email address or addresses Returns: List[str]: Normalised emails @@ -203,16 +203,16 @@ def send( list of string email addresses. cc and bcc default to None. Args: - to (Union[str, Sequence[str]]): Email recipient(s) - subject (str): Email subject - text_body (str): Plain text email body - html_body (Optional[str]): HTML email body - sender (Optional[str]): Email sender. Defaults to global sender. - cc (Union[str, Sequence[str], None]): Email cc. Defaults to None. - bcc (Union[str, Sequence[str], None]): Email bcc. Defaults to None. + to: Email recipient(s) + subject: Email subject + text_body: Plain text email body + html_body: HTML email body + sender: Email sender. Defaults to global sender. + cc: Email cc. Defaults to None. + bcc: Email bcc. Defaults to None. **kwargs: See below - mail_options (List): Mail options (see smtplib documentation) - rcpt_options (List): Recipient options (see smtplib documentation) + mail_options: Mail options (see smtplib documentation) + rcpt_options: Recipient options (see smtplib documentation) Returns: None diff --git a/src/hdx/utilities/encoding.py b/src/hdx/utilities/encoding.py index ded7c06..4f1ffff 100755 --- a/src/hdx/utilities/encoding.py +++ b/src/hdx/utilities/encoding.py @@ -8,7 +8,7 @@ def str_to_base64(string: str) -> str: """Base 64 encode string. Args: - string (str): String to encode + string: String to encode Returns: str: Base 64 encoded string @@ -20,7 +20,7 @@ def base64_to_str(bstring: str) -> str: """Base 64 decode string. Args: - bstring (str): Base 64 encoded string to encode + bstring: Base 64 encoded string to encode Returns: str: Decoded string @@ -35,8 +35,8 @@ def basicauth_encode(username: str, password: str) -> str: Inspired by: https://github.com/rdegges/python-basicauth/blob/master/basicauth.py#L16 Args: - username (str): Username - password (str): Password + username: Username + password: Password Returns: str: Basic authentication string @@ -55,7 +55,7 @@ def basicauth_decode(encoded_string: str) -> tuple[str, str]: Inspired by: https://github.com/rdegges/python-basicauth/blob/master/basicauth.py#L27 Args: - encoded_string (str): String to decode + encoded_string: String to decode Returns: Tuple[str, str]: Tuple of form (username, password) diff --git a/src/hdx/utilities/error_handler.py b/src/hdx/utilities/error_handler.py index 1aca20e..8546e33 100644 --- a/src/hdx/utilities/error_handler.py +++ b/src/hdx/utilities/error_handler.py @@ -18,7 +18,7 @@ class ErrorHandler: sorted. Args: - should_exit_on_error (bool): Whether to exit with a 1 code if there are errors. Default is False. + should_exit_on_error: Whether to exit with a 1 code if there are errors. Default is False. """ @@ -39,9 +39,9 @@ def add( error category - {text} Args: - message (str): Error message - category (str): Error category. Defaults to "". - message_type (str): The type of message (error or warning). Default is "error" + message: Error message + category: Error category. Defaults to "". + message_type: The type of message (error or warning). Default is "error" Returns: None @@ -61,8 +61,8 @@ def missing_value_message(value_type: str, value: Any) -> str: error category - type n not found Args: - value_type (str): The type of value that is missing - value (Any): The specific missing value + value_type: The type of value that is missing + value: The specific missing value Returns: str: A formatted message stating the missing value and its type @@ -82,10 +82,10 @@ def add_missing_value( error category - type n not found identifier is usually a dataset name. Args: - value_type (str): Type of value e.g. "sector" - value (Any): Missing value - category (str): Error category. Defaults to "". - message_type (str): The type of message (error or warning). Default is "error" + value_type: Type of value e.g. "sector" + value: Missing value + category: Error category. Defaults to "". + message_type: The type of message (error or warning). Default is "error" Returns: None """ @@ -103,8 +103,8 @@ def multi_valued_message(self, text: str, values: Sequence) -> str | None: a dataset name. Values are cast to string. Args: - text (str): Descriptive text for the issue (e.g., "invalid values") - values (Sequence): The list of related values of concern + text: Descriptive text for the issue (e.g., "invalid values") + values: The list of related values of concern Returns: Optional[str]: A formatted string in the format defined above @@ -134,10 +134,10 @@ def add_multi_valued( a dataset name. Values are cast to string. Args: - text (str): Text to use e.g. "negative values removed" - values (Sequence): List of values of concern - category (str): Error category. Defaults to "". - message_type (str): The type of message (error or warning). Default is "error" + text: Text to use e.g. "negative values removed" + values: List of values of concern + category: Error category. Defaults to "". + message_type: The type of message (error or warning). Default is "error" Returns: bool: True if a message was added, False if not """ diff --git a/src/hdx/utilities/file_hashing.py b/src/hdx/utilities/file_hashing.py index c7ecffb..ed0dced 100644 --- a/src/hdx/utilities/file_hashing.py +++ b/src/hdx/utilities/file_hashing.py @@ -18,7 +18,7 @@ def hash_excel_buffer(buffer: bytes) -> str: """Hash the sheets in an Excel XLSX file given in a buffer using MD5 Args: - buffer (bytes): Excel XLSX file buffer + buffer: Excel XLSX file buffer Returns: str: MD5 hash of the sheets @@ -55,7 +55,7 @@ def hash_excel_fp(fp: IOBase) -> str: """Hash the sheets in an Excel XLSX file given as a file pointer using MD5 Args: - fp (IOBase): Excel file pointer + fp: Excel file pointer Returns: str: MD5 hash of the sheets @@ -67,7 +67,7 @@ def crc_zip_buffer(buffer: bytes) -> str: """Get sum of CRC32s for all files in a zip given a buffer Args: - buffer (bytes): Zip in buffer + buffer: Zip in buffer Returns: str: Sum of the CRC32 @@ -81,7 +81,7 @@ def crc_zip_fp(fp: IOBase) -> str: """Get sum of CRC32s for all files in a zip given a file pointer Args: - fp (IOBase): Zip file pointer + fp: Zip file pointer Returns: str: Sum of the CRC32 @@ -95,7 +95,7 @@ def get_size_and_hash(filepath: str, file_format: str) -> tuple[int, str]: Args: filepath: Path to file - file_format (str): File format + file_format: File format Returns: Tuple[int, str]: Tuple (size, hash) diff --git a/src/hdx/utilities/frictionless_wrapper.py b/src/hdx/utilities/frictionless_wrapper.py index 69ba2f0..60dd139 100644 --- a/src/hdx/utilities/frictionless_wrapper.py +++ b/src/hdx/utilities/frictionless_wrapper.py @@ -20,16 +20,16 @@ def get_frictionless_control(**kwargs: Any) -> tuple[Control, Any]: Args: **kwargs: - file_type (Optional[str]): Type of file. Defaults to inferring. - format (Optional[str]): Type of file. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - keyed (bool): Whether JSON is keyed. Defaults to True. - keys (Optional[List[str]]): JSON keys to get. Defaults to None (all of them). - property (Optional[str]): Path to table in JSON. Defaults to None. - control (Control): This can be set to override the above. See Frictionless docs. + file_type: Type of file. Defaults to inferring. + format: Type of file. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + keyed: Whether JSON is keyed. Defaults to True. + keys: JSON keys to get. Defaults to None (all of them). + property: Path to table in JSON. Defaults to None. + control: This can be set to override the above. See Frictionless docs. Returns: Tuple[Control, Any]: (frictionless Control object, kwargs) @@ -72,12 +72,12 @@ def get_frictionless_detector(infer_types: bool, **kwargs: Any) -> tuple[Detecto """Get Frictionless Detector. Args: - infer_types (bool): Whether to infer types. Defaults to False (strings). + infer_types: Whether to infer types. Defaults to False (strings). **kwargs: - default_type (Optional[str]): Default field type if infer_types False. Defaults to any. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - detector (Detector): This can be set to override the above. See Frictionless docs. + default_type: Default field type if infer_types False. Defaults to any. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + detector: This can be set to override the above. See Frictionless docs. Returns: Tuple[Detector, Any]: (frictionless Detector object, kwargs) @@ -102,10 +102,10 @@ def get_frictionless_dialect( """Get Frictionless Dialect. Args: - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. **kwargs: - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - dialect (Dialect): This can be set to override the above. See Frictionless docs. + columns: Columns to pick. Defaults to all. + dialect: This can be set to override the above. See Frictionless docs. Returns: Tuple[Dialect, Any]: (frictionless Dialect object, Any) @@ -129,34 +129,34 @@ def get_frictionless_tableresource( """Get Frictionless TableResource. Either url or data must be supplied. Args: - url (Optional[str]): URL or path to download. Defaults to None. - ignore_blank_rows (bool): Whether to ignore blank rows. Defaults to True. - infer_types (bool): Whether to infer types. Defaults to False (strings). - session (Optional[requests.Session]): Session to use. Defaults to not setting a session. - data (Optional[Any]): Data to parse. Defaults to None. + url: URL or path to download. Defaults to None. + ignore_blank_rows: Whether to ignore blank rows. Defaults to True. + infer_types: Whether to infer types. Defaults to False (strings). + session: Session to use. Defaults to not setting a session. + data: Data to parse. Defaults to None. **kwargs: - has_header (bool): Whether data has a header. Defaults to True. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. # pylint: disable=line-too-long - columns (Union[Sequence[int], Sequence[str], None]): Columns to pick. Defaults to all. - file_type (Optional[str]): Type of file. Defaults to inferring. - format (Optional[str]): Type of file. Defaults to inferring. - encoding (Optional[str]): Type of encoding. Defaults to inferring. - compression (Optional[str]): Type of compression. Defaults to inferring. - delimiter (Optional[str]): Delimiter for values in csv rows. Defaults to inferring. - skip_initial_space (bool): Ignore whitespace straight after delimiter. Defaults to False. - sheet (Optional[Union[int, str]): Sheet in Excel. Defaults to inferring. - fill_merged_cells (bool): Whether to fill merged cells. Defaults to True. - keyed (bool): Whether JSON is keyed. Defaults to True. - keys (Optional[List[str]]): JSON keys to get. Defaults to None (all of them). - property (Optional[str]): Path to table in JSON. Defaults to None. - http_session (Session): Session object to use. Defaults to downloader session. - default_type (Optional[str]): Default field type if infer_types False. Defaults to any. - float_numbers (bool): Use float not Decimal if infer_types True. Defaults to True. - null_values (List[Any]): Values that will return None. Defaults to [""]. - control (Control): This can be set to override the above. See Frictionless docs. - detector (Detector): This can be set to override the above. See Frictionless docs. - dialect (Dialect): This can be set to override the above. See Frictionless docs. - schema (Schema): This can be set to override the above. See Frictionless docs. + has_header: Whether data has a header. Defaults to True. + headers: Number of row(s) containing headers or list of headers. # pylint: disable=line-too-long + columns: Columns to pick. Defaults to all. + file_type: Type of file. Defaults to inferring. + format: Type of file. Defaults to inferring. + encoding: Type of encoding. Defaults to inferring. + compression: Type of compression. Defaults to inferring. + delimiter: Delimiter for values in csv rows. Defaults to inferring. + skip_initial_space: Ignore whitespace straight after delimiter. Defaults to False. + sheet: Sheet in Excel. Defaults to inferring. + fill_merged_cells: Whether to fill merged cells. Defaults to True. + keyed: Whether JSON is keyed. Defaults to True. + keys: JSON keys to get. Defaults to None (all of them). + property: Path to table in JSON. Defaults to None. + http_session: Session object to use. Defaults to downloader session. + default_type: Default field type if infer_types False. Defaults to any. + float_numbers: Use float not Decimal if infer_types True. Defaults to True. + null_values: Values that will return None. Defaults to [""]. + control: This can be set to override the above. See Frictionless docs. + detector: This can be set to override the above. See Frictionless docs. + dialect: This can be set to override the above. See Frictionless docs. + schema: This can be set to override the above. See Frictionless docs. Returns: TableResource: frictionless TableResource object diff --git a/src/hdx/utilities/html.py b/src/hdx/utilities/html.py index d40e801..60bb0e7 100755 --- a/src/hdx/utilities/html.py +++ b/src/hdx/utilities/html.py @@ -28,11 +28,11 @@ def get_soup( agent to be set or appropriate user agent parameter(s) to be completed. Args: - url (str): url to read - downloader (Download): Download object. Defaults to creating a Download object with given user agent values. - user_agent (Optional[str]): User agent string. HDXPythonUtilities/X.X.X- is prefixed. - user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. - user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied. + url: url to read + downloader: Download object. Defaults to creating a Download object with given user agent values. + user_agent: User agent string. HDXPythonUtilities/X.X.X- is prefixed. + user_agent_config_yaml: Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. + user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: BeautifulSoup: The BeautifulSoup object for a url @@ -49,7 +49,7 @@ def get_text(tag: Tag) -> str: newlines and with   replaced with space. Args: - tag (Tag): BeautifulSoup tag + tag: BeautifulSoup tag Returns: str: Text of tag stripped of leading and trailing whitespace and newlines and with   replaced with space @@ -60,7 +60,7 @@ def extract_table(tabletag: Tag) -> list[dict]: """Extract HTML table as list of dictionaries. Args: - tabletag (Tag): BeautifulSoup tag + tabletag: BeautifulSoup tag Returns: str: Text of tag stripped of leading and trailing whitespace and newlines and with   replaced with space diff --git a/src/hdx/utilities/loader.py b/src/hdx/utilities/loader.py index 222d5fd..e11ed2d 100755 --- a/src/hdx/utilities/loader.py +++ b/src/hdx/utilities/loader.py @@ -26,13 +26,13 @@ def load_text( """Load file into a string removing newlines. Args: - path (str): Path to file - encoding (str): Encoding of file. Defaults to utf-8. - strip (bool): Whether to strip whitespace from start and end. Defaults to False. - replace_newlines (Optional[str]): String with which to replace newlines. Defaults to None (don't replace). (deprecated 2024-02-07) - replace_line_separators (Optional[str]): String with which to replace newlines. Defaults to None (don't replace). - loaderror_if_empty (bool): Whether to raise LoadError if file is empty. Default to True. - default_line_separator (str): line separator to be replaced if replace_line_separators is not None + path: Path to file + encoding: Encoding of file. Defaults to utf-8. + strip: Whether to strip whitespace from start and end. Defaults to False. + replace_newlines: String with which to replace newlines. Defaults to None (don't replace). (deprecated 2024-02-07) + replace_line_separators: String with which to replace newlines. Defaults to None (don't replace). + loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. + default_line_separator: line separator to be replaced if replace_line_separators is not None Returns: str: String contents of file @@ -64,9 +64,9 @@ def load_yaml( """Load YAML file into an ordered dictionary. Args: - path (str): Path to YAML file - encoding (str): Encoding of file. Defaults to utf-8. - loaderror_if_empty (bool): Whether to raise LoadError if file is empty. Default to True. + path: Path to YAML file + encoding: Encoding of file. Defaults to utf-8. + loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. Returns: Any: The data from the YAML file @@ -88,9 +88,9 @@ def load_json( """Load JSON file into an ordered dictionary (dict for Python 3.7+) Args: - path (str): Path to JSON file - encoding (str): Encoding of file. Defaults to utf-8. - loaderror_if_empty (bool): Whether to raise LoadError if file is empty. Default to True. + path: Path to JSON file + encoding: Encoding of file. Defaults to utf-8. + loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. Returns: Any: The data from the JSON file @@ -114,9 +114,9 @@ def load_and_merge_yaml( dictionary. Args: - paths (Sequence[str]): Paths to YAML files - encoding (str): Encoding of file. Defaults to utf-8. - loaderror_if_empty (bool): Whether to raise LoadError if any file is empty. Default to True. + paths: Paths to YAML files + encoding: Encoding of file. Defaults to utf-8. + loaderror_if_empty: Whether to raise LoadError if any file is empty. Default to True. Returns: Mapping: Dictionary of merged YAML files @@ -137,9 +137,9 @@ def load_and_merge_json( dictionary. Args: - paths (Sequence[str]): Paths to JSON files - encoding (str): Encoding of file. Defaults to utf-8. - loaderror_if_empty (bool): Whether to raise LoadError if any file is empty. Default to True. + paths: Paths to JSON files + encoding: Encoding of file. Defaults to utf-8. + loaderror_if_empty: Whether to raise LoadError if any file is empty. Default to True. Returns: Mapping: Dictionary of merged JSON files @@ -160,10 +160,10 @@ def load_yaml_into_existing_dict( """Merge YAML file that is in dictionary form into existing dictionary. Args: - data (dict): Dictionary to merge into - path (str): YAML file to load and merge - encoding (str): Encoding of file. Defaults to utf-8. - loaderror_if_empty (bool): Whether to raise LoadError if file is empty. Default to True. + data: Dictionary to merge into + path: YAML file to load and merge + encoding: Encoding of file. Defaults to utf-8. + loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. Returns: Mapping: YAML file merged into dictionary @@ -181,10 +181,10 @@ def load_json_into_existing_dict( """Merge JSON file that is in dictionary form into existing dictionary. Args: - data (dict): Dictionary to merge into - path (str): JSON file to load and merge - encoding (str): Encoding of file. Defaults to utf-8. - loaderror_if_empty (bool): Whether to raise LoadError if file is empty. Default to True. + data: Dictionary to merge into + path: JSON file to load and merge + encoding: Encoding of file. Defaults to utf-8. + loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. Returns: Mapping: JSON file merged into dictionary diff --git a/src/hdx/utilities/matching.py b/src/hdx/utilities/matching.py index 4ad2aaa..db87c64 100644 --- a/src/hdx/utilities/matching.py +++ b/src/hdx/utilities/matching.py @@ -25,10 +25,10 @@ def match( or the index of the matching name Args: - possible_names (Sequence): Possible names - name (str): Name to match - alternative_name (str): Alternative name to match. Defaults to None. - transform_possible_names (Sequence[Callable]): Functions to transform possible names. + possible_names: Possible names + name: Name to match + alternative_name: Alternative name to match. Defaults to None. + transform_possible_names: Functions to transform possible names. threshold: Match threshold. Defaults to 2. Returns: @@ -71,11 +71,11 @@ def get_code_from_name( Given a name (org type, sector, etc), return the corresponding code. Args: - name (str): Name to match - code_lookup (dict): Dictionary of official names and codes - unmatched (List[str]): List of unmatched names - fuzzy_match (bool): Allow fuzzy matching or not - match_threshold (int): Match threshold + name: Name to match + code_lookup: Dictionary of official names and codes + unmatched: List of unmatched names + fuzzy_match: Allow fuzzy matching or not + match_threshold: Match threshold Returns: Optional[str]: Matching code @@ -116,8 +116,8 @@ def multiple_replace(string: str, replacements: dict[str, str]) -> str: """Simultaneously replace multiple strings in a string. Args: - string (str): Input string - replacements (Dict[str,str]): Replacements dictionary + string: Input string + replacements: Replacements dictionary Returns: str: String with replacements @@ -137,7 +137,7 @@ def match_template_variables( """Try to match {{XXX}} in input string. Args: - string (str): String in which to look for template + string: String in which to look for template Returns: Tuple[Optional[str], Optional[str]]: (Matched string with brackets, matched string without brackets) @@ -154,8 +154,8 @@ def earliest_index(string_to_search: str, strings_to_try: Sequence[str]) -> int index. Args: - string_to_search (str): String to search - strings_to_try (Sequence[str]): Strings to try + string_to_search: String to search + strings_to_try: Strings to try Returns: Optional[int]: Earliest index of the strings to try in string to search or None @@ -185,11 +185,11 @@ def get_matching_text_in_strs( """Returns a list of matching blocks of text in a and b. Args: - a (str): First string to match - b (str): Second string to match - match_min_size (int): Minimum block size to match on. Defaults to 30. - ignore (str): Any characters to ignore in matching. Defaults to ''. - end_characters (str): End characters to look for. Defaults to ''. + a: First string to match + b: Second string to match + match_min_size: Minimum block size to match on. Defaults to 30. + ignore: Any characters to ignore in matching. Defaults to ''. + end_characters: End characters to look for. Defaults to ''. Returns: List[str]: List of matching blocks of text @@ -224,10 +224,10 @@ def get_matching_text( followed by non-matching. Args: - string_list (List[str]): List of strings to match - match_min_size (int): Minimum block size to match on. Defaults to 30. - ignore (str): Any characters to ignore in matching. Defaults to ''. - end_characters (str): End characters to look for. Defaults to '.\r\n'. + string_list: List of strings to match + match_min_size: Minimum block size to match on. Defaults to 30. + ignore: Any characters to ignore in matching. Defaults to ''. + end_characters: End characters to look for. Defaults to '.\r\n'. Returns: str: String containing matching blocks of text followed by non-matching @@ -257,11 +257,11 @@ def get_matching_then_nonmatching_text( followed by non-matching. Args: - string_list (List[str]): List of strings to match - separator (str): Separator to add between blocks of text. Defaults to ''. - match_min_size (int): Minimum block size to match on. Defaults to 30. - ignore (str): Any characters to ignore in matching. Defaults to ''. - end_characters (str): End characters to look for. Defaults to '.\r\n'. + string_list: List of strings to match + separator: Separator to add between blocks of text. Defaults to ''. + match_min_size: Minimum block size to match on. Defaults to 30. + ignore: Any characters to ignore in matching. Defaults to ''. + end_characters: End characters to look for. Defaults to '.\r\n'. Returns: str: String containing matching blocks of text followed by non-matching diff --git a/src/hdx/utilities/path.py b/src/hdx/utilities/path.py index f01eec4..3fcf290 100755 --- a/src/hdx/utilities/path.py +++ b/src/hdx/utilities/path.py @@ -38,8 +38,8 @@ def script_dir(pyobject: Any, follow_symlinks: bool = True) -> str: """Get current script's directory. Args: - pyobject (Any): Any Python object in the script - follow_symlinks (bool): Follow symlinks or not. Defaults to True. + pyobject: Any Python object in the script + follow_symlinks: Follow symlinks or not. Defaults to True. Returns: str: Current script's directory @@ -59,9 +59,9 @@ def script_dir_plus_file( """Get current script's directory and then append a filename. Args: - filename (str): Filename to append to directory path - pyobject (Any): Any Python object in the script - follow_symlinks (bool): Follow symlinks or not. Defaults to True. + filename: Filename to append to directory path + pyobject: Any Python object in the script + follow_symlinks: Follow symlinks or not. Defaults to True. Returns: str: Current script's directory and with filename appended @@ -80,9 +80,9 @@ def get_temp_dir( directory. Optionally deletes and recreates it if it already exists. Args: - folder (Optional[str]): Folder to create in temporary folder. Defaults to None. - delete_if_exists (bool): Whether to delete the folder if it exists. Defaults to False. - tempdir (Optional[str]): Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). + folder: Folder to create in temporary folder. Defaults to None. + delete_if_exists: Whether to delete the folder if it exists. Defaults to False. + tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: str: A temporary directory @@ -112,11 +112,11 @@ def temp_dir( if it doesn't exist) Args: - folder (Optional[str]): Folder to create in temporary folder. Defaults to None. - delete_if_exists (bool): Whether to delete the folder if it exists. Defaults to False. - delete_on_success (bool): Whether to delete folder (if folder supplied) on exiting with statement successfully. Defaults to True. - delete_on_failure (bool): Whether to delete folder (if folder supplied) on exiting with statement unsuccessfully. Defaults to True. - tempdir (Optional[str]): Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). + folder: Folder to create in temporary folder. Defaults to None. + delete_if_exists: Whether to delete the folder if it exists. Defaults to False. + delete_on_success: Whether to delete folder (if folder supplied) on exiting with statement successfully. Defaults to True. + delete_on_failure: Whether to delete folder (if folder supplied) on exiting with statement unsuccessfully. Defaults to True. + tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: str: A temporary directory @@ -137,8 +137,8 @@ def read_or_create_batch(folder: str, batch: str | None = None) -> str: """Get batch or create it if it doesn't exist. Args: - folder (str): Folder in which to look for or create batch file. - batch (Optional[str]): Batch to use if there isn't one in a file already. + folder: Folder in which to look for or create batch file. + batch: Batch to use if there isn't one in a file already. Returns: str: Batch @@ -170,12 +170,12 @@ def temp_dir_batch( passed as the batch parameter in create_in_hdx or update_in_hdx calls. Args: - folder (Optional[str]): Folder to create in temporary folder. Defaults to None. - delete_if_exists (bool): Whether to delete the folder if it exists. Defaults to False. - delete_on_success (bool): Whether to delete folder (if folder supplied) on exiting with statement successfully. Defaults to True. - delete_on_failure (bool): Whether to delete folder (if folder supplied) on exiting with statement unsuccessfully. Defaults to True. - batch (Optional[str]): Batch to use if there isn't one in a file already. - tempdir (Optional[str]): Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). + folder: Folder to create in temporary folder. Defaults to None. + delete_if_exists: Whether to delete the folder if it exists. Defaults to False. + delete_on_success: Whether to delete folder (if folder supplied) on exiting with statement successfully. Defaults to True. + delete_on_failure: Whether to delete folder (if folder supplied) on exiting with statement unsuccessfully. Defaults to True. + batch: Batch to use if there isn't one in a file already. + tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: Dict: Dictionary containing temporary directory in key folder and batch id in key batch @@ -197,9 +197,9 @@ def get_wheretostart(text: str, message: str, key: str) -> str | None: """Evaluate WHERETOSTART. Args: - text (str): String to process - message (str): Text for logging - key (str): Key to comapre with + text: String to process + message: Text for logging + key: Key to comapre with Returns: Optional[str]: A string or None @@ -230,10 +230,10 @@ def progress_storing_folder( dictionary is the next dictionary in the iterator. Args: - info (Dict): Dictionary containing folder and anything else to be yielded - iterator (Iterable[Dict]): Iterate over this object persisting progress - key (str): Key to examine from dictionary from iterator - wheretostart (Optional[str]): Where in iterator to start + info: Dictionary containing folder and anything else to be yielded + iterator: Iterate over this object persisting progress + key: Key to examine from dictionary from iterator + wheretostart: Where in iterator to start Returns: Tuple[Dict,Dict]: A tuple of the form (info dictionary, next object in iterator) @@ -287,9 +287,9 @@ def wheretostart_tempdir_batch( passed as the batch parameter in create_in_hdx or update_in_hdx calls. Args: - folder (str): Folder to create in temporary folder - batch (Optional[str]): Batch to use if there isn't one in a file already. - tempdir (Optional[str]): Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). + folder: Folder to create in temporary folder + batch: Batch to use if there isn't one in a file already. + tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: Dict: Dictionary containing temporary directory in key folder and batch id in key batch @@ -331,11 +331,11 @@ def progress_storing_tempdir( form key=value eg. iso3=PAK indicating where to start. Args: - folder (str): Folder to create in temporary folder - iterator (Iterable[Dict]): Iterate over the iterator persisting progress - key (str): Key to examine from dictionary from iterator - batch (Optional[str]): Batch to use if there isn't one in a file already. - tempdir (Optional[str]): Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). + folder: Folder to create in temporary folder + iterator: Iterate over the iterator persisting progress + key: Key to examine from dictionary from iterator + batch: Batch to use if there isn't one in a file already. + tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: Tuple[Dict,Dict]: A tuple of the form (info dictionary, next object in iterator) @@ -364,10 +364,10 @@ def multiple_progress_storing_tempdir( indicating where to start. Args: - folder (str): Folder to create in temporary folder - iterators (Sequence[Iterable[Dict]): Iterate over each iterator in the list consecutively persisting progress - keys (Sequence[str]): Key to examine from dictionary from each iterator in the above list - batch (Optional[str]): Batch to use if there isn't one in a file already. + folder: Folder to create in temporary folder + iterators: Iterate over each iterator in the list consecutively persisting progress + keys: Key to examine from dictionary from each iterator in the above list + batch: Batch to use if there isn't one in a file already. Returns: Tuple[int, Dict,Dict]: A tuple of the form (iterator index, info dictionary, next object in iterator) @@ -422,9 +422,9 @@ def get_filename_extension_from_url( """Get separately filename and extension from url. Args: - url (str): URL to download - second_last (bool): Get second last segment of url as well. Defaults to False. - use_query (bool): Include query parameters as well. Defaults to False. + url: URL to download + second_last: Get second last segment of url as well. Defaults to False. + use_query: Include query parameters as well. Defaults to False. Returns: Tuple[str,str]: Tuple of (filename, extension) @@ -454,9 +454,9 @@ def get_filename_from_url( """Get filename including extension from url. Args: - url (str): URL - second_last (bool): Get second last segment of url as well. Defaults to False. - use_query (bool): Include query parameters as well. Defaults to False. + url: URL + second_last: Get second last segment of url as well. Defaults to False. + use_query: Include query parameters as well. Defaults to False. Returns: str: filename diff --git a/src/hdx/utilities/retriever.py b/src/hdx/utilities/retriever.py index f4000e3..1fc0ea3 100644 --- a/src/hdx/utilities/retriever.py +++ b/src/hdx/utilities/retriever.py @@ -24,15 +24,15 @@ class Retrieve(BaseDownload): allows the use of a static fallback when downloading fails. Args: - downloader (Download): Download object - fallback_dir (str): Directory containing static fallback data - saved_dir (str): Directory to save or load downloaded data - temp_dir (str): Temporary directory for when data is not needed after downloading - save (bool): Whether to save downloaded data. Defaults to False. - use_saved (bool): Whether to use saved data. Defaults to False. - prefix (str): Prefix to add to filenames. Defaults to "". - delete (bool): Whether to delete saved_dir if save is True. Defaults to True. - log_level (int): Level at which to log messages. Defaults to logging.INFO. + downloader: Download object + fallback_dir: Directory containing static fallback data + saved_dir: Directory to save or load downloaded data + temp_dir: Temporary directory for when data is not needed after downloading + save: Whether to save downloaded data. Defaults to False. + use_saved: Whether to use saved data. Defaults to False. + prefix: Prefix to add to filenames. Defaults to "". + delete: Whether to delete saved_dir if save is True. Defaults to True. + log_level: Level at which to log messages. Defaults to logging.INFO. """ retrievers = {} @@ -64,10 +64,10 @@ def check_flags(saved_dir: str, save: bool, use_saved: bool, delete: bool) -> No """Check flags. Also delete saved_dir if save and delete are True. Args: - saved_dir (str): Directory to save or load downloaded data - save (bool): Whether to save downloaded data - use_saved (bool): Whether to use saved data - delete (bool): Whether to delete saved_dir if save is True + saved_dir: Directory to save or load downloaded data + save: Whether to save downloaded data + use_saved: Whether to use saved data + delete: Whether to delete saved_dir if save is True Returns: None @@ -87,7 +87,7 @@ def get_url_logstr(url: str) -> str: necessary. Args: - url (str): URL to download + url: URL to download Returns: str: Url string to use in logs @@ -100,7 +100,7 @@ def clone(self, downloader: Download) -> "Retrieve": """Clone a given retriever but use the given downloader. Args: - downloader (Download): Downloader to use + downloader: Downloader to use Returns: Retrieve: Cloned retriever @@ -126,12 +126,12 @@ def get_filename( """Get filename from url and given parameters. Args: - url (str): Url from which to get filename - filename (optional[str]): Filename to use. Defaults to None (infer from url). - possible_extensions (Tuple[str, ...]): Possible extensions to look for in url + url: Url from which to get filename + filename: Filename to use. Defaults to None (infer from url). + possible_extensions: Possible extensions to look for in url **kwargs: See below - format (str): Given extension to look for in url - file_type (str): Given extension to look for in url + format: Given extension to look for in url + file_type: Given extension to look for in url Returns: Tuple[str, Any]: Tuple of (filename, kwargs) @@ -169,7 +169,7 @@ def set_bearer_token(self, bearer_token: str) -> None: """Set bearer token in downloader Args: - bearer_token (str): Bearer token + bearer_token: Bearer token Returns: None @@ -188,11 +188,11 @@ def download_file( """Retrieve file. Args: - url (str): URL to download - filename (Optional[str]): Filename of saved file. Defaults to getting from url. - logstr (Optional[str]): Text to use in log string to describe download. Defaults to filename. - fallback (bool): Whether to use static fallback if download fails. Defaults to False. - log_level (int): Level at which to log messages. Overrides level from constructor. + url: URL to download + filename: Filename of saved file. Defaults to getting from url. + logstr: Text to use in log string to describe download. Defaults to filename. + fallback: Whether to use static fallback if download fails. Defaults to False. + log_level: Level at which to log messages. Overrides level from constructor. **kwargs: Parameters to pass to download_file call Returns: @@ -239,11 +239,11 @@ def download_text( """Download text. Args: - url (str): URL to download - filename (Optional[str]): Filename of saved file. Defaults to getting from url. - logstr (Optional[str]): Text to use in log string to describe download. Defaults to filename. - fallback (bool): Whether to use static fallback if download fails. Defaults to False. - log_level (int): Level at which to log messages. Overrides level from constructor. + url: URL to download + filename: Filename of saved file. Defaults to getting from url. + logstr: Text to use in log string to describe download. Defaults to filename. + fallback: Whether to use static fallback if download fails. Defaults to False. + log_level: Level at which to log messages. Overrides level from constructor. **kwargs: Parameters to pass to download_text call Returns: @@ -290,11 +290,11 @@ def download_yaml( """Retrieve YAML. Args: - url (str): URL to download - filename (Optional[str]): Filename of saved file. Defaults to getting from url. - logstr (Optional[str]): Text to use in log string to describe download. Defaults to filename. - fallback (bool): Whether to use static fallback if download fails. Defaults to False. - log_level (int): Level at which to log messages. Overrides level from constructor. + url: URL to download + filename: Filename of saved file. Defaults to getting from url. + logstr: Text to use in log string to describe download. Defaults to filename. + fallback: Whether to use static fallback if download fails. Defaults to False. + log_level: Level at which to log messages. Overrides level from constructor. **kwargs: Parameters to pass to download_yaml call Returns: @@ -341,11 +341,11 @@ def download_json( """Retrieve JSON. Args: - url (str): URL to download - filename (Optional[str]): Filename of saved file. Defaults to getting from url. - logstr (Optional[str]): Text to use in log string to describe download. Defaults to filename. - fallback (bool): Whether to use static fallback if download fails. Defaults to False. - log_level (int): Level at which to log messages. Overrides level from constructor. + url: URL to download + filename: Filename of saved file. Defaults to getting from url. + logstr: Text to use in log string to describe download. Defaults to filename. + fallback: Whether to use static fallback if download fails. Defaults to False. + log_level: Level at which to log messages. Overrides level from constructor. **kwargs: Parameters to pass to download_json call Returns: @@ -404,13 +404,13 @@ def get_tabular_rows( or a list, defaulting to a list. Args: - url (Union[str, Sequence[str]]): A single or list of URLs or paths to read from - has_hxl (bool): Whether files have HXL hashtags. Defaults to False. - headers (Union[int, Sequence[int], Sequence[str]]): Number of row(s) containing headers or list of headers. Defaults to 1. - dict_form (bool): Return dict or list for each row. Defaults to False (list) - filename (Optional[str]): Filename of saved file. Defaults to getting from url. - logstr (Optional[str]): Text to use in log string to describe download. Defaults to filename. - fallback (bool): Whether to use static fallback if download fails. Defaults to False. + url: A single or list of URLs or paths to read from + has_hxl: Whether files have HXL hashtags. Defaults to False. + headers: Number of row(s) containing headers or list of headers. Defaults to 1. + dict_form: Return dict or list for each row. Defaults to False (list) + filename: Filename of saved file. Defaults to getting from url. + logstr: Text to use in log string to describe download. Defaults to filename. + fallback: Whether to use static fallback if download fails. Defaults to False. **kwargs: Parameters to pass to download_file and get_tabular_rows calls Returns: @@ -455,13 +455,13 @@ def generate_retrievers( when downloading fails. Args: - fallback_dir (str): Directory containing static fallback data - saved_dir (str): Directory to save or load downloaded data - temp_dir (str): Temporary directory for when data is not needed after downloading - save (bool): Whether to save downloaded data. Defaults to False. - use_saved (bool): Whether to use saved data. Defaults to False. - ignore (Sequence[str]): Don't generate retrievers for these downloaders - delete (bool): Whether to delete saved_dir if save is True. Defaults to True. + fallback_dir: Directory containing static fallback data + saved_dir: Directory to save or load downloaded data + temp_dir: Temporary directory for when data is not needed after downloading + save: Whether to save downloaded data. Defaults to False. + use_saved: Whether to use saved data. Defaults to False. + ignore: Don't generate retrievers for these downloaders + delete: Whether to delete saved_dir if save is True. Defaults to True. **kwargs (Any): Any other arguments to pass. Returns: @@ -489,7 +489,7 @@ def get_retriever(cls, name: str | None = None) -> "Retrieve": default one will be returned. Args: - name (Optional[str]): Name of retriever. Defaults to None (get default). + name: Name of retriever. Defaults to None (get default). Returns: Retriever: Retriever object diff --git a/src/hdx/utilities/saver.py b/src/hdx/utilities/saver.py index 0837cdf..d73e7d4 100644 --- a/src/hdx/utilities/saver.py +++ b/src/hdx/utilities/saver.py @@ -55,9 +55,9 @@ def save_text(string: str, path: str, encoding: str = "utf-8") -> None: """Save text string to file. Args: - string (str): String to save - path (str): Path to file - encoding (str): Encoding of file. Defaults to utf-8. + string: String to save + path: Path to file + encoding: Encoding of file. Defaults to utf-8. Returns: None @@ -76,11 +76,11 @@ def save_yaml( """Save dictionary to YAML file preserving order if it is an OrderedDict. Args: - object (Any): Python object to save - path (str): Path to YAML file - encoding (str): Encoding of file. Defaults to utf-8. - pretty (bool): Whether to pretty print. Defaults to False. - sortkeys (bool): Whether to sort dictionary keys. Defaults to False. + object: Python object to save + path: Path to YAML file + encoding: Encoding of file. Defaults to utf-8. + pretty: Whether to pretty print. Defaults to False. + sortkeys: Whether to sort dictionary keys. Defaults to False. Returns: None @@ -110,11 +110,11 @@ def save_json( """Save dictionary to JSON file preserving order if it is an OrderedDict. Args: - object (Any): Python object to save - path (str): Path to JSON file - encoding (str): Encoding of file. Defaults to utf-8. - pretty (bool): Whether to pretty print. Defaults to False. - sortkeys (bool): Whether to sort dictionary keys. Defaults to False. + object: Python object to save + path: Path to JSON file + encoding: Encoding of file. Defaults to utf-8. + pretty: Whether to pretty print. Defaults to False. + sortkeys: Whether to sort dictionary keys. Defaults to False. Returns: None @@ -151,11 +151,11 @@ def save_hxlated_output( variables needed by the metadata defined in the configuration. Args: - configuration (Dict): Configuration for input and output - rows (Sequence[Sequence | Mapping]): Rows of data - includes_header (bool): Whether rows includes header. Defaults to True, - includes_hxltags (bool): Whether rows includes HXL hashtags. Defaults to False. - output_dir (str): Output directory. Defaults to "". + configuration: Configuration for input and output + rows: Rows of data + includes_header: Whether rows includes header. Defaults to True, + includes_hxltags: Whether rows includes HXL hashtags. Defaults to False. + output_dir: Output directory. Defaults to "". **kwargs: Variables to use when evaluating template arguments Returns: @@ -287,13 +287,13 @@ def save_iterable( as containing values.) Args: - filepath (str): Path to write to - rows (Iterable[Sequence | Mapping]): List of rows in dict or list form - headers (Union[int, Sequence[str], None]): Headers to write. Defaults to None. - columns (Union[Sequence[int], Sequence[str], None]): Columns to write. Defaults to all. - format (str): Format to write. Defaults to csv. - encoding (Optional[str]): Encoding to use. Defaults to None (infer encoding). - row_function (Optional[Callable[[Dict],Optional[Dict]]]): Row function to call for each row. Defaults to None. + filepath: Path to write to + rows: List of rows in dict or list form + headers: Headers to write. Defaults to None. + columns: Columns to write. Defaults to all. + format: Format to write. Defaults to csv. + encoding: Encoding to use. Defaults to None (infer encoding). + row_function: Row function to call for each row. Defaults to None. Returns: List: List of rows written to file diff --git a/src/hdx/utilities/session.py b/src/hdx/utilities/session.py index d120ea6..2f4e075 100755 --- a/src/hdx/utilities/session.py +++ b/src/hdx/utilities/session.py @@ -40,28 +40,28 @@ def get_session( extra_params_json and extra_params_yaml. Args: - user_agent (Optional[str]): User agent string. HDXPythonUtilities/X.X.X- is prefixed. - user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. - user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied. - use_env (bool): Whether to read environment variables. Defaults to True. - fail_on_missing_file (bool): Raise an exception if any specified configuration files are missing. Defaults to True. - verify (bool): Whether to verify SSL certificates. Defaults to True. - retry_attempts (int): Number of retry attempts. Defaults to 5. - backoff_factor (int): Backoff factor for retry. Defaults to 1 (0s, 2s, 4s, 8s, 16s, 32s). + user_agent: User agent string. HDXPythonUtilities/X.X.X- is prefixed. + user_agent_config_yaml: Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. + user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. + use_env: Whether to read environment variables. Defaults to True. + fail_on_missing_file: Raise an exception if any specified configuration files are missing. Defaults to True. + verify: Whether to verify SSL certificates. Defaults to True. + retry_attempts: Number of retry attempts. Defaults to 5. + backoff_factor: Backoff factor for retry. Defaults to 1 (0s, 2s, 4s, 8s, 16s, 32s). **kwargs: See below - auth (Tuple[str, str]): Authorisation information in tuple form (user, pass) OR - basic_auth (str): Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR - basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR - bearer_token (str): Bearer token string OR - bearer_token_file (str): Path to file containing bearer token string OR - extra_params_dict (Dict): Extra parameters to put on end of url as a dictionary OR - extra_params_json (str): Path to JSON file containing extra parameters to put on end of url OR - extra_params_yaml (str): Path to YAML file containing extra parameters to put on end of url - extra_params_lookup (str): Lookup key for parameters. If not given assumes parameters are at root of the dict. - headers (Dict): Additional headers to add to request. - use_auth (str): If more than one auth found, specify which one to use, rather than failing. - status_forcelist (Sequence[int]): HTTP statuses for which to force retry. Defaults to (429, 500, 502, 503, 504). - allowed_methods (Sequence[str]): HTTP methods for which to force retry. Defaults to ("HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"). + auth: Authorisation information in tuple form (user, pass) OR + basic_auth: Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR + basic_auth_file: Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR + bearer_token: Bearer token string OR + bearer_token_file: Path to file containing bearer token string OR + extra_params_dict: Extra parameters to put on end of url as a dictionary OR + extra_params_json: Path to JSON file containing extra parameters to put on end of url OR + extra_params_yaml: Path to YAML file containing extra parameters to put on end of url + extra_params_lookup: Lookup key for parameters. If not given assumes parameters are at root of the dict. + headers: Additional headers to add to request. + use_auth: If more than one auth found, specify which one to use, rather than failing. + status_forcelist: HTTP statuses for which to force retry. Defaults to (429, 500, 502, 503, 504). + allowed_methods: HTTP methods for which to force retry. Defaults to ("HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"). """ s = requests.Session() s.verify = verify diff --git a/src/hdx/utilities/state.py b/src/hdx/utilities/state.py index 61a7548..28a50ec 100644 --- a/src/hdx/utilities/state.py +++ b/src/hdx/utilities/state.py @@ -20,8 +20,8 @@ class State: run the state is available in the repository. Args: - path (str): Path to save state file - read_fn (Callable[[str], Any]): Input state transformation. Defaults to lambda x: x. + path: Path to save state file + read_fn: Input state transformation. Defaults to lambda x: x. write_fn: Callable[[Any], str]: Output state transformation. Defaults to lambda x: x. """ @@ -48,9 +48,9 @@ def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: """Allow usage of with. Args: - exc_type (Any): Exception type - exc_value (Any): Exception value - traceback (Any): Traceback + exc_type: Exception type + exc_value: Exception value + traceback: Traceback Returns: None @@ -88,7 +88,7 @@ def set(self, state: Any): """Set the state Args: - state (Any): State + state: State Returns: None @@ -103,7 +103,7 @@ def dates_str_to_country_date_dict(dates_str: str) -> dict: {"default": 2017-01-01 as datetime, "afg": 2019-01-01 as datetime} Args: - dates_str (str): Comma separated string of key=date string pairs + dates_str: Comma separated string of key=date string pairs Returns: Dict: Dictionary of key date mappings @@ -122,7 +122,7 @@ def country_date_dict_to_dates_str(country_date_dict: dict) -> str: "default=2017-01-01,afg=2019-01-01" Args: - country_date_dict (Dict): Dictionary of key date mappings + country_date_dict: Dictionary of key date mappings Returns: str: Comma separated string of key=date string pairs diff --git a/src/hdx/utilities/text.py b/src/hdx/utilities/text.py index 832fa40..1ab04ac 100755 --- a/src/hdx/utilities/text.py +++ b/src/hdx/utilities/text.py @@ -28,7 +28,7 @@ def normalise(text: str) -> str: characters are removed. Args: - text (str): Text to normalise + text: Text to normalise Returns: str: Normalised text @@ -54,8 +54,8 @@ def remove_end_characters(string: str, characters_to_remove: str = punctuation) """Remove any characters at end of string that are in characters_to_remove. Args: - string (str): Input string - characters_to_remove (str): Characters to remove. Defaults to punctuation. + string: Input string + characters_to_remove: Characters to remove. Defaults to punctuation. Returns: str: String with any characters at end of string that are in characters_to_remove removed @@ -74,10 +74,10 @@ def remove_from_end( """Remove list of items from end of string, stripping any whitespace. Args: - string (str): Input string - things_to_remove (List[str]): Things to remove from the end of string - logging_text (Optional[str]): Text to log. Defaults to None. - whole_words (bool): Remove parts of or whole words. Defaults to True (whole words only). + string: Input string + things_to_remove: Things to remove from the end of string + logging_text: Text to log. Defaults to None. + whole_words: Remove parts of or whole words. Defaults to True (whole words only). Returns: str: String with text removed @@ -107,9 +107,9 @@ def remove_string( and any whitespace following the punctuation Args: - string (str): String to process - toremove (str): String to remove - end_characters_to_remove (str): Characters to remove. Defaults to punctuation. + string: String to process + toremove: String to remove + end_characters_to_remove: Characters to remove. Defaults to punctuation. Returns: str: String with other string removed @@ -127,7 +127,7 @@ def get_words_in_sentence(sentence: str) -> list[str]: """Returns list of words in a sentence. Args: - sentence (str): Sentence + sentence: Sentence Returns: List[str]: List of words in sentence @@ -139,9 +139,9 @@ def number_format(val: Any, format: str = "%.4f", trailing_zeros: bool = True) - """Format float-castable input as string. Args: - val (float): Number to format - format (str): Format to use. Defaults to %.4f. - trailing_zeros (bool): Leave trailing zeros. Defaults to True. + val: Number to format + format: Format to use. Defaults to %.4f. + trailing_zeros: Leave trailing zeros. Defaults to True. Returns: str: Formatted number as string @@ -164,10 +164,10 @@ def get_fraction_str( format as string, returning '' for invalid numerator or 0 denominator. Args: - numerator (float): Numerator - denominator (Optional[float]): Denominator. Defaults to None. - format (str): Format to use. Defaults to %.4f. - trailing_zeros (bool): Leave trailing zeros. Defaults to True. + numerator: Numerator + denominator: Denominator. Defaults to None. + format: Format to use. Defaults to %.4f. + trailing_zeros: Leave trailing zeros. Defaults to True. Returns: str: Formatted number as string @@ -190,8 +190,8 @@ def only_allowed_in_str(test_str: str, allowed_chars: set) -> bool: not. Args: - test_str (str): Test string - allowed_chars (Set): Set of allowed characters + test_str: Test string + allowed_chars: Set of allowed characters Returns: bool: True if test string contains only allowed characters, False if not @@ -207,7 +207,7 @@ def get_numeric_if_possible(value: Any) -> Any: float or int, taking into account commas and periods. Args: - value (Any): Value + value: Value Returns: Any: Value diff --git a/src/hdx/utilities/useragent.py b/src/hdx/utilities/useragent.py index 53a5f52..c325533 100755 --- a/src/hdx/utilities/useragent.py +++ b/src/hdx/utilities/useragent.py @@ -25,7 +25,7 @@ def _environment_variables(**kwargs: Any) -> Any: Args: **kwargs: See below - user_agent (str): User agent string. + user_agent: User agent string. Returns: kwargs: Changed keyword arguments @@ -43,9 +43,9 @@ def _construct(configdict: dict, prefix: str, ua: str) -> str: """Construct user agent. Args: - configdict (str): Additional configuration for user agent - prefix (str): Text to put at start of user agent - ua (str): Custom user agent text + configdict: Additional configuration for user agent + prefix: Text to put at start of user agent + ua: Custom user agent text Returns: str: Full user agent string @@ -74,9 +74,9 @@ def _load( """Load user agent YAML file. Args: - prefix (str): Text to put at start of user agent - user_agent_config_yaml (str): Path to user agent YAML file - user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied. + prefix: Text to put at start of user agent + user_agent_config_yaml: Path to user agent YAML file + user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: str: user agent @@ -115,9 +115,9 @@ def _create( """Get full user agent string. Args: - user_agent (Optional[str]): User agent string. HDXPythonLibrary/X.X.X- is prefixed. - user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. - user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied. + user_agent: User agent string. HDXPythonLibrary/X.X.X- is prefixed. + user_agent_config_yaml: Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. + user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: str: Full user agent string @@ -153,9 +153,9 @@ def set_global( """Set global user agent string. Args: - user_agent (Optional[str]): User agent string. HDXPythonLibrary/X.X.X- is prefixed. - user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. - user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied. + user_agent: User agent string. HDXPythonLibrary/X.X.X- is prefixed. + user_agent_config_yaml: Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. + user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: None @@ -176,9 +176,9 @@ def get( on global user agent if set. Args: - user_agent (Optional[str]): User agent string. HDXPythonLibrary/X.X.X- is prefixed. - user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. - user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied. + user_agent: User agent string. HDXPythonLibrary/X.X.X- is prefixed. + user_agent_config_yaml: Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yaml. + user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: str: Full user agent string diff --git a/src/hdx/utilities/uuid.py b/src/hdx/utilities/uuid.py index fe4e44b..ea15073 100644 --- a/src/hdx/utilities/uuid.py +++ b/src/hdx/utilities/uuid.py @@ -14,8 +14,8 @@ def is_valid_uuid(uuid_to_test: str, version: int = 4) -> bool: """Check if uuid_to_test is a valid UUID. Args: - uuid_to_test (str): UUID to test for validity - version (int): UUID version. Defaults to 4. + uuid_to_test: UUID to test for validity + version: UUID version. Defaults to 4. Returns: str: Current script's directory diff --git a/src/hdx/utilities/zip_crc.py b/src/hdx/utilities/zip_crc.py index 5526313..63b3706 100644 --- a/src/hdx/utilities/zip_crc.py +++ b/src/hdx/utilities/zip_crc.py @@ -12,7 +12,7 @@ def find_eocd_signature(tail_data: bytes) -> tuple[int, int, int]: """Find EOCD Signature in zip file Args: - tail_data (bytes): Data to search for EOCD + tail_data: Data to search for EOCD Returns: Tuple[int, int, int]: (total_records, cd_offset, cd_end) or (-1, -1, -1) on failure @@ -33,8 +33,8 @@ def parse_central_directory(data: bytes, num_records: int) -> dict[str, int]: and CRC32 as values. Args: - data (bytes): Data to parse - num_records (int): Number of files in zip + data: Data to parse + num_records: Number of files in zip Returns: Dict[str, int]: Dictionary of filepath to file CRC32 @@ -67,7 +67,7 @@ def get_tail_start(size: int) -> int: """Get the starting offset of the tail of a zip. Args: - size (int): File size + size: File size Returns: int: Starting offset of the tail of a zip @@ -81,7 +81,7 @@ def get_zip_tail_header(size: int) -> dict[str, str]: to the end of a zip. Args: - size (int): File size + size: File size Returns: Dict[str, str]: Header for GET request @@ -93,7 +93,7 @@ def get_zip_cd_header(tail_data: bytes) -> tuple[int, dict]: """Get a header for a GET request with range for the Central Directory of a zip. Args: - tail_data (bytes): Data to search for EOCD + tail_data: Data to search for EOCD Returns: Tuple[int, Dict]: (total_records, CD range header) or (-1, {}) on failure @@ -108,7 +108,7 @@ def get_zip_crcs_buffer(buffer: bytes) -> dict[str, int]: """Get CRC32 for each file in a zip given a buffer Args: - buffer (bytes): Zip in buffer + buffer: Zip in buffer Returns: Dict[str, int]: Dictionary of filepath to file CRC32 @@ -125,7 +125,7 @@ def get_zip_crcs_fp(fp: IOBase) -> dict[str, int]: """Get CRC32 for each file in a zip given a file pointer Args: - fp (IOBase): Zip file pointer + fp: Zip file pointer Returns: Dict[str, int]: Dictionary of filepath to file CRC32 @@ -146,7 +146,7 @@ def get_crc_sum(file_crcs: dict[str, int]) -> str: """Calculate the sum of the CRC32 for all files in a zip Args: - file_crcs (Dict[str, int]): Dictionary of filepath to file CRC32 + file_crcs: Dictionary of filepath to file CRC32 Returns: str: Sum of the CRC32 From 29ac8b98b2bee01cd6969a060515d4475ca99a0d Mon Sep 17 00:00:00 2001 From: mcarans Date: Mon, 12 Jan 2026 16:30:34 +1300 Subject: [PATCH 3/4] Remove types from docstrings --- src/hdx/utilities/base_downloader.py | 12 ++--- src/hdx/utilities/compare.py | 2 +- src/hdx/utilities/dateparse.py | 22 ++++----- src/hdx/utilities/dictandlist.py | 28 +++++------ src/hdx/utilities/downloader.py | 60 +++++++++++------------ src/hdx/utilities/email.py | 4 +- src/hdx/utilities/encoding.py | 8 +-- src/hdx/utilities/error_handler.py | 6 +-- src/hdx/utilities/file_hashing.py | 10 ++-- src/hdx/utilities/frictionless_wrapper.py | 8 +-- src/hdx/utilities/html.py | 6 +-- src/hdx/utilities/loader.py | 14 +++--- src/hdx/utilities/matching.py | 16 +++--- src/hdx/utilities/path.py | 40 +++++++-------- src/hdx/utilities/retriever.py | 18 +++---- src/hdx/utilities/saver.py | 2 +- src/hdx/utilities/state.py | 10 ++-- src/hdx/utilities/text.py | 18 +++---- src/hdx/utilities/useragent.py | 10 ++-- src/hdx/utilities/uuid.py | 4 +- src/hdx/utilities/zip_crc.py | 16 +++--- 21 files changed, 157 insertions(+), 157 deletions(-) diff --git a/src/hdx/utilities/base_downloader.py b/src/hdx/utilities/base_downloader.py index be1ce40..ea13bf6 100644 --- a/src/hdx/utilities/base_downloader.py +++ b/src/hdx/utilities/base_downloader.py @@ -17,7 +17,7 @@ def __enter__(self) -> "BaseDownload": """Allow usage of with. Returns: - BaseDownload: Download object + Download object """ return self @@ -43,7 +43,7 @@ def download_file(self, url: str, *args: Any, **kwargs: Any) -> str: **kwargs (Any): Keyword arguments Returns: - str: Path of downloaded file + Path of downloaded file """ @abstractmethod @@ -56,7 +56,7 @@ def download_text(self, url: str, *args: Any, **kwargs: Any) -> str: **kwargs (Any): Keyword arguments Returns: - str: The text from the file + The text from the file """ @abstractmethod @@ -69,7 +69,7 @@ def download_yaml(self, url: str, *args: Any, **kwargs: Any) -> Any: **kwargs (Any): Keyword arguments Returns: - Any: The data from the YAML file + The data from the YAML file """ @abstractmethod @@ -82,7 +82,7 @@ def download_json(self, url: str, *args: Any, **kwargs: Any) -> Any: **kwargs (Any): Keyword arguments Returns: - Any: The data from the JSON file + The data from the JSON file """ @abstractmethod @@ -116,5 +116,5 @@ def get_tabular_rows( **kwargs (Any): Keyword arguments Returns: - Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) + Tuple (headers, iterator where each row is a list or dictionary) """ diff --git a/src/hdx/utilities/compare.py b/src/hdx/utilities/compare.py index dc929fc..736ff86 100755 --- a/src/hdx/utilities/compare.py +++ b/src/hdx/utilities/compare.py @@ -17,7 +17,7 @@ def compare_files(path1: str, path2: str, encoding: str = "utf-8") -> list[str]: path2: Path to second file Returns: - List[str]: Delta between the two files + Delta between the two files """ diff = ndiff( open(path1, encoding=encoding).read().splitlines(), diff --git a/src/hdx/utilities/dateparse.py b/src/hdx/utilities/dateparse.py index b330390..d008d02 100755 --- a/src/hdx/utilities/dateparse.py +++ b/src/hdx/utilities/dateparse.py @@ -101,7 +101,7 @@ def get_tzinfos(timezone_info: str) -> dict[str, int]: timezone_info: Timezones information string Returns: - Dict[str, int]: tzinfos dictionary + tzinfos dictionary """ tzinfos = {} for tz_descr in map(str.split, timezone_info.split("\n")): @@ -804,7 +804,7 @@ def now_utc() -> datetime: """Return now with UTC timezone. Returns: - datetime: Now with UTC timezone + Now with UTC timezone """ return datetime.now(timezone.utc) @@ -813,7 +813,7 @@ def now_utc_notz() -> datetime: """Return now in UTC but with timezone removed. Returns: - datetime: Now in UTC but with timezone removed + Now in UTC but with timezone removed """ return datetime.now(timezone.utc).replace(tzinfo=None) @@ -872,7 +872,7 @@ def parse_date_range( default_timezones: Timezone information. Defaults to None. (Internal default). Returns: - Tuple[datetime,datetime]: Tuple containing start date and end date + Tuple containing start date and end date """ if date_format is None or fuzzy is not None: if timezone_handling >= 2: @@ -1040,7 +1040,7 @@ def parse_date( default_timezones: Timezone information. Defaults to None. (Internal default). Returns: - datetime: The parsed date + The parsed date """ if max_time: max_starttime = True @@ -1073,7 +1073,7 @@ def get_timestamp_from_datetime(date: datetime) -> float: date: Date to convert Returns: - float: Timestamp + Timestamp """ if date.tzinfo is None: return ( @@ -1109,7 +1109,7 @@ def get_datetime_from_timestamp( today: Today's date. Defaults to now_utc. Returns: - datetime: Date of timestamp + Date of timestamp """ if timestamp > get_timestamp_from_datetime(today): timestamp = timestamp / 1000 @@ -1123,7 +1123,7 @@ def iso_string_from_datetime(date: datetime) -> str: date: Date to convert to string Returns: - str: ISO formatted date without any time elements + ISO formatted date without any time elements """ return date.date().isoformat() @@ -1135,7 +1135,7 @@ def get_quarter(date: datetime) -> int: date: Date Returns: - int: Quarter in which the given date is contained + Quarter in which the given date is contained """ return (date.month - 1) // 3 + 1 @@ -1148,7 +1148,7 @@ def get_quarter_start(year: int, quarter: int) -> datetime: quarter: Quarter Returns: - datetime: First day of quarter + First day of quarter """ month = 3 * (quarter - 1) + 1 return datetime(year, month, 1, tzinfo=timezone.utc) @@ -1166,7 +1166,7 @@ def get_quarter_end( include_microseconds: Includes microseconds if True. Defaults to False. Returns: - datetime: First day of quarter + First day of quarter """ year = year + 3 * quarter // 12 month = 3 * quarter % 12 + 1 diff --git a/src/hdx/utilities/dictandlist.py b/src/hdx/utilities/dictandlist.py index da3d1e0..03c15db 100755 --- a/src/hdx/utilities/dictandlist.py +++ b/src/hdx/utilities/dictandlist.py @@ -17,7 +17,7 @@ def invert_dictionary(d: MutableMapping) -> dict: d: Dictionary Returns: - Dict: Return inverse of dictionary + Return inverse of dictionary """ return dict(zip(d.values(), d.keys())) @@ -35,7 +35,7 @@ def merge_two_dictionaries( merge_lists: Whether to merge lists (True) or replace lists (False). Default is False. Returns: - MutableMapping: Merged dictionary + Merged dictionary """ key = None # ## debug output @@ -88,7 +88,7 @@ def merge_dictionaries( merge_lists: Whether to merge lists (True) or replace lists (False). Default is False. Returns: - MutableMapping: Merged dictionary + Merged dictionary """ dict1 = dicts[0] for other_dict in dicts[1:]: @@ -107,7 +107,7 @@ def dict_diff( no_key: What value to use if key is not found Defaults to ''. Returns: - Dict: Comparison dictionary + Comparison dictionary """ d1keys = set(d1.keys()) d2keys = set(d2.keys()) @@ -181,7 +181,7 @@ def list_distribute_contents_simple( function: Return value to use for distributing. Defaults to lambda x: x. Returns: - List: Distributed list + Distributed list """ dictionary = {} for obj in input_list: @@ -214,7 +214,7 @@ def list_distribute_contents( function: Return value to use for distributing. Defaults to lambda x: x. Returns: - List: Distributed list + Distributed list """ def riffle_shuffle(piles_list): @@ -257,7 +257,7 @@ def extract_list_from_list_of_dict(list_of_dict: Sequence[dict], key: Any) -> li key: Key to find in each dictionary Returns: - List: List containing values returned from each dictionary + List containing values returned from each dictionary """ result = [] for dictionary in list_of_dict: @@ -285,7 +285,7 @@ def key_value_convert( exception: The exception to expect if keyfn or valuefn fail. Defaults to ValueError. Returns: - Dict: New dictionary with converted keys and/or values + New dictionary with converted keys and/or values """ dictout = {} for key in dictin: @@ -314,7 +314,7 @@ def integer_key_convert(dictin: MutableMapping, dropfailedkeys: bool = False) -> dropfailedkeys: Whether to drop dictionary entries where key conversion fails. Defaults to False. Returns: - Dict: Dictionary with keys converted to integers + Dictionary with keys converted to integers """ return key_value_convert(dictin, keyfn=int, dropfailedkeys=dropfailedkeys) @@ -329,7 +329,7 @@ def integer_value_convert( dropfailedvalues: Whether to drop dictionary entries where key conversion fails. Defaults to False. Returns: - Dict: Dictionary with values converted to integers + Dictionary with values converted to integers """ return key_value_convert(dictin, valuefn=int, dropfailedvalues=dropfailedvalues) @@ -342,7 +342,7 @@ def float_value_convert(dictin: MutableMapping, dropfailedvalues: bool = False) dropfailedvalues: Whether to drop dictionary entries where key conversion fails. Defaults to False. Returns: - Dict: Dictionary with values converted to floats + Dictionary with values converted to floats """ return key_value_convert(dictin, valuefn=float, dropfailedvalues=dropfailedvalues) @@ -358,7 +358,7 @@ def avg_dicts( dropmissing: Whether to drop keys missing in one dictionary. Defaults to True. Returns: - Dict: Dictionary with values being average of 2 input dictionaries + Dictionary with values being average of 2 input dictionaries """ dictout = {} for key in dictin1: @@ -392,7 +392,7 @@ def read_list_from_csv( **kwargs: Other arguments to pass to Tabulator Stream Returns: - List[list | dict]: List of rows in dict or list form + List of rows in dict or list form """ if dict_form and headers is None: raise ValueError("If dict_form is True, headers must not be None!") @@ -485,7 +485,7 @@ def args_to_dict(args: str) -> dict: args: Command line arguments Returns: - Dict: Dictionary of arguments + Dictionary of arguments """ arguments = {} for arg in args.split(","): diff --git a/src/hdx/utilities/downloader.py b/src/hdx/utilities/downloader.py index 2e89b62..7d2a4fa 100755 --- a/src/hdx/utilities/downloader.py +++ b/src/hdx/utilities/downloader.py @@ -152,7 +152,7 @@ def get_path_for_url( keep: Whether to keep already downloaded file. Defaults to False. Returns: - str: Path of downloaded file + Path of downloaded file """ if path: if folder or filename: @@ -185,7 +185,7 @@ def get_full_url(self, url: str) -> str: url: URL for which to get full url Returns: - str: Full url including any additional parameters + Full url including any additional parameters """ request = Request("GET", url) preparedrequest = self.session.prepare_request(request) @@ -200,7 +200,7 @@ def get_url_for_get(url: str, parameters: dict | None = None) -> str: parameters: Parameters to pass. Defaults to None. Returns: - str: Full url + Full url """ spliturl = urlsplit(url) getparams = dict(parse_qsl(spliturl.query)) @@ -221,7 +221,7 @@ def get_url_params_for_post( parameters: Parameters to pass. Defaults to None. Returns: - Tuple[str, Dict]: (Full url, parameters) + (Full url, parameters) """ spliturl = urlsplit(url) getparams = dict(parse_qsl(spliturl.query)) @@ -247,7 +247,7 @@ def hxl_row( dict_form: Return dict or list. Defaults to False (list) Returns: - Union[List[str],Dict[str,str]]: Return either a list or dictionary conating HXL hashtags + Return either a list or dictionary conating HXL hashtags """ if dict_form: return {header: hxltags.get(header, "") for header in headers} @@ -277,7 +277,7 @@ def normal_setup( json_string: Whether to post parameters as JSON string. Defaults to False. Returns: - requests.Response: requests.Response object + requests.Response object """ self.close_response() self.response = None @@ -345,7 +345,7 @@ def hash_stream(self, url: str) -> str: url: URL or path to download Returns: - str: MD5 hash of file + MD5 hash of file """ md5hash = hashlib.md5() try: @@ -367,7 +367,7 @@ def stream_path(self, path: str, errormsg: str): errormsg: Error message to display if there is a problem Returns: - str: Path of downloaded file + Path of downloaded file """ f = None try: @@ -404,7 +404,7 @@ def stream_file( keep: Whether to keep already downloaded file. Defaults to False. Returns: - str: Path of downloaded file + Path of downloaded file """ path = self.get_path_for_url(url, folder, filename, path, overwrite, keep) if keep and exists(path): @@ -437,7 +437,7 @@ def download_file( json_string: Whether to post parameters as JSON string. Defaults to False. Returns: - str: Path of downloaded file + Path of downloaded file """ folder = kwargs.get("folder") filename = kwargs.get("filename") @@ -475,7 +475,7 @@ def download(self, url: str, **kwargs: Any) -> requests.Response: json_string: Whether to post parameters as JSON string. Defaults to False. Returns: - requests.Response: Response + Response """ return self.setup( url, @@ -495,7 +495,7 @@ def get_header(self, header: str) -> Any: header: Header for which to get value Returns: - Any: Response header's value + Response header's value """ return self.response.headers.get(header) @@ -503,7 +503,7 @@ def get_headers(self) -> Any: """Get response headers of download. Returns: - Any: Response headers + Response headers """ return self.response.headers @@ -511,7 +511,7 @@ def get_status(self) -> int: """Get response status code. Returns: - int: Response status code + Response status code """ return self.response.status_code @@ -519,7 +519,7 @@ def get_text(self) -> str: """Get text content of download. Returns: - str: Text content of download + Text content of download """ return self.response.text @@ -527,7 +527,7 @@ def get_yaml(self) -> Any: """Get YAML content of download. Returns: - Any: YAML content of download + YAML content of download """ with YAML() as yaml: return yaml.load(self.response.text) @@ -536,7 +536,7 @@ def get_json(self) -> Any: """Get JSON content of download. Returns: - Any: JSON content of download + JSON content of download """ return self.response.json() @@ -553,7 +553,7 @@ def download_text(self, url: str, **kwargs: Any) -> str: encoding: Encoding to use for text response. Defaults to None (best guess). Returns: - str: Text content of download + Text content of download """ self.download(url, **kwargs) return self.get_text() @@ -571,7 +571,7 @@ def download_yaml(self, url: str, **kwargs: Any) -> Any: encoding: Encoding to use for text response. Defaults to None (best guess). Returns: - str: YAML content of download + YAML content of download """ self.download(url, **kwargs) return self.get_yaml() @@ -589,7 +589,7 @@ def download_json(self, url: str, **kwargs: Any) -> Any: encoding: Encoding to use for text response. Defaults to None (best guess). Returns: - str: JSON content of download + JSON content of download """ self.download(url, **kwargs) return self.get_json() @@ -630,7 +630,7 @@ def get_frictionless_tableresource( schema: This can be set to override the above. See Frictionless docs. Returns: - TableResource: frictionless TableResource object + frictionless TableResource object """ self.close_response() try: @@ -699,7 +699,7 @@ def _get_tabular_rows( schema: This can be set to override the above. See Frictionless docs. Returns: - Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) + Tuple (headers, iterator where each row is a list or dictionary) """ if headers is None: raise DownloadError("Argument headers cannot be None!") @@ -814,7 +814,7 @@ def get_tabular_rows( schema: This can be set to override the above. See Frictionless docs. Returns: - Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) + Tuple (headers, iterator where each row is a list or dictionary) """ if isinstance(url, list): is_list = True @@ -917,7 +917,7 @@ def get_tabular_rows_as_list( schema: This can be set to override the above. See Frictionless docs. Returns: - Tuple[List[str],Iterator[List]]: Tuple (headers, iterator where each row is a list) + Tuple (headers, iterator where each row is a list) """ headers, iterator = self.get_tabular_rows( @@ -991,7 +991,7 @@ def get_tabular_rows_as_dict( schema: This can be set to override the above. See Frictionless docs. Returns: - Tuple[List[str], Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary) + Tuple (headers, iterator where each row is a dictionary) """ headers, iterator = self.get_tabular_rows( @@ -1067,7 +1067,7 @@ def download_tabular_key_value( schema: This can be set to override the above. See Frictionless docs. Returns: - Dict: Dictionary keys (first column) and values (second column) + Dictionary keys (first column) and values (second column) """ output_dict = {} _, rows = self.get_tabular_rows_as_list( @@ -1146,7 +1146,7 @@ def download_tabular_rows_as_dicts( schema: This can be set to override the above. See Frictionless docs. Returns: - Dict[str,Dict]: Dictionary where keys are first column and values are dictionaries with keys from column + Dictionary where keys are first column and values are dictionaries with keys from column headers and values from columns beneath """ headers, iterator = self.get_tabular_rows_as_dict( @@ -1229,7 +1229,7 @@ def download_tabular_cols_as_dicts( schema: This can be set to override the above. See Frictionless docs. Returns: - Dict[str,Dict]: Dictionary where keys are header names and values are dictionaries with keys from first column + Dictionary where keys are header names and values are dictionaries with keys from first column and values from other columns """ headers, iterator = self.get_tabular_rows_as_dict( @@ -1263,7 +1263,7 @@ def get_column_positions(headers: Sequence[str]) -> dict[str, int]: headers: List of headers Returns: - Dict[str,int]: Dictionary where keys are header names and values are header positions + Dictionary where keys are header names and values are header positions """ columnpositions = {} for i, header in enumerate(headers): @@ -1338,6 +1338,6 @@ def get_downloader(cls, name: str | None = None) -> "Download": name: Name of downloader. Defaults to None (get default). Returns: - Download: Downloader object + Downloader object """ return cls.downloaders.get(name, cls.downloaders["default"]) diff --git a/src/hdx/utilities/email.py b/src/hdx/utilities/email.py index 4944ea6..9a053f3 100755 --- a/src/hdx/utilities/email.py +++ b/src/hdx/utilities/email.py @@ -155,7 +155,7 @@ def get_normalised_email(email: str, check_deliverability: bool = False) -> str: email: Email address to normalise Returns: - str: Normalised email + Normalised email """ try: v = validate_email( @@ -176,7 +176,7 @@ def get_normalised_emails( emails: Email address or addresses Returns: - List[str]: Normalised emails + Normalised emails """ if isinstance(emails, str): emails = (emails,) diff --git a/src/hdx/utilities/encoding.py b/src/hdx/utilities/encoding.py index 4f1ffff..96492be 100755 --- a/src/hdx/utilities/encoding.py +++ b/src/hdx/utilities/encoding.py @@ -11,7 +11,7 @@ def str_to_base64(string: str) -> str: string: String to encode Returns: - str: Base 64 encoded string + Base 64 encoded string """ return base64.urlsafe_b64encode(string.encode("utf-8")).decode("utf-8") @@ -23,7 +23,7 @@ def base64_to_str(bstring: str) -> str: bstring: Base 64 encoded string to encode Returns: - str: Decoded string + Decoded string """ return base64.urlsafe_b64decode(bstring.encode("utf-8")).decode("utf-8") @@ -39,7 +39,7 @@ def basicauth_encode(username: str, password: str) -> str: password: Password Returns: - str: Basic authentication string + Basic authentication string """ if ":" in username: raise ValueError @@ -58,7 +58,7 @@ def basicauth_decode(encoded_string: str) -> tuple[str, str]: encoded_string: String to decode Returns: - Tuple[str, str]: Tuple of form (username, password) + Tuple of form (username, password) """ split_encoded_string = encoded_string.strip().split(" ") diff --git a/src/hdx/utilities/error_handler.py b/src/hdx/utilities/error_handler.py index 8546e33..ac8851e 100644 --- a/src/hdx/utilities/error_handler.py +++ b/src/hdx/utilities/error_handler.py @@ -65,7 +65,7 @@ def missing_value_message(value_type: str, value: Any) -> str: value: The specific missing value Returns: - str: A formatted message stating the missing value and its type + A formatted message stating the missing value and its type """ return f"{value_type} {str(value)} not found" @@ -107,7 +107,7 @@ def multi_valued_message(self, text: str, values: Sequence) -> str | None: values: The list of related values of concern Returns: - Optional[str]: A formatted string in the format defined above + A formatted string in the format defined above """ if not values: return None @@ -139,7 +139,7 @@ def add_multi_valued( category: Error category. Defaults to "". message_type: The type of message (error or warning). Default is "error" Returns: - bool: True if a message was added, False if not + True if a message was added, False if not """ message = self.multi_valued_message(text, values) if message is None: diff --git a/src/hdx/utilities/file_hashing.py b/src/hdx/utilities/file_hashing.py index ed0dced..58896e6 100644 --- a/src/hdx/utilities/file_hashing.py +++ b/src/hdx/utilities/file_hashing.py @@ -21,7 +21,7 @@ def hash_excel_buffer(buffer: bytes) -> str: buffer: Excel XLSX file buffer Returns: - str: MD5 hash of the sheets + MD5 hash of the sheets """ file_stream = BytesIO(buffer) md5hash = hashlib.md5() @@ -58,7 +58,7 @@ def hash_excel_fp(fp: IOBase) -> str: fp: Excel file pointer Returns: - str: MD5 hash of the sheets + MD5 hash of the sheets """ return hash_excel_buffer(fp.read()) @@ -70,7 +70,7 @@ def crc_zip_buffer(buffer: bytes) -> str: buffer: Zip in buffer Returns: - str: Sum of the CRC32 + Sum of the CRC32 """ file_crcs = get_zip_crcs_buffer(buffer) @@ -84,7 +84,7 @@ def crc_zip_fp(fp: IOBase) -> str: fp: Zip file pointer Returns: - str: Sum of the CRC32 + Sum of the CRC32 """ file_crcs = get_zip_crcs_fp(fp) return get_crc_sum(file_crcs) @@ -98,7 +98,7 @@ def get_size_and_hash(filepath: str, file_format: str) -> tuple[int, str]: file_format: File format Returns: - Tuple[int, str]: Tuple (size, hash) + Tuple (size, hash) """ with open(filepath, "rb") as fp: size = fstat(fp.fileno()).st_size diff --git a/src/hdx/utilities/frictionless_wrapper.py b/src/hdx/utilities/frictionless_wrapper.py index 60dd139..a5083dc 100644 --- a/src/hdx/utilities/frictionless_wrapper.py +++ b/src/hdx/utilities/frictionless_wrapper.py @@ -32,7 +32,7 @@ def get_frictionless_control(**kwargs: Any) -> tuple[Control, Any]: control: This can be set to override the above. See Frictionless docs. Returns: - Tuple[Control, Any]: (frictionless Control object, kwargs) + (frictionless Control object, kwargs) """ control = kwargs.get("control") file_type = kwargs.pop("file_type", None) @@ -80,7 +80,7 @@ def get_frictionless_detector(infer_types: bool, **kwargs: Any) -> tuple[Detecto detector: This can be set to override the above. See Frictionless docs. Returns: - Tuple[Detector, Any]: (frictionless Detector object, kwargs) + (frictionless Detector object, kwargs) """ detector = kwargs.get("detector", Detector()) if infer_types: @@ -108,7 +108,7 @@ def get_frictionless_dialect( dialect: This can be set to override the above. See Frictionless docs. Returns: - Tuple[Dialect, Any]: (frictionless Dialect object, Any) + (frictionless Dialect object, Any) """ dialect = kwargs.get("dialect", Dialect()) columns = kwargs.pop("columns", None) @@ -159,7 +159,7 @@ def get_frictionless_tableresource( schema: This can be set to override the above. See Frictionless docs. Returns: - TableResource: frictionless TableResource object + frictionless TableResource object """ if not url and not data: error = ResourceError(note="Neither url or data supplied!") diff --git a/src/hdx/utilities/html.py b/src/hdx/utilities/html.py index 60bb0e7..dda9bb3 100755 --- a/src/hdx/utilities/html.py +++ b/src/hdx/utilities/html.py @@ -35,7 +35,7 @@ def get_soup( user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: - BeautifulSoup: The BeautifulSoup object for a url + The BeautifulSoup object for a url """ if not downloader: downloader = Download( @@ -52,7 +52,7 @@ def get_text(tag: Tag) -> str: tag: BeautifulSoup tag Returns: - str: Text of tag stripped of leading and trailing whitespace and newlines and with   replaced with space + Text of tag stripped of leading and trailing whitespace and newlines and with   replaced with space """ return tag.get_text().strip(" \t\n\r").replace("\xa0", " ") @@ -63,7 +63,7 @@ def extract_table(tabletag: Tag) -> list[dict]: tabletag: BeautifulSoup tag Returns: - str: Text of tag stripped of leading and trailing whitespace and newlines and with   replaced with space + Text of tag stripped of leading and trailing whitespace and newlines and with   replaced with space """ theadtag = tabletag.find_next("thead") diff --git a/src/hdx/utilities/loader.py b/src/hdx/utilities/loader.py index e11ed2d..33aef53 100755 --- a/src/hdx/utilities/loader.py +++ b/src/hdx/utilities/loader.py @@ -35,7 +35,7 @@ def load_text( default_line_separator: line separator to be replaced if replace_line_separators is not None Returns: - str: String contents of file + String contents of file """ if replace_newlines is not None: warn( @@ -69,7 +69,7 @@ def load_yaml( loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. Returns: - Any: The data from the YAML file + The data from the YAML file """ with open(path, encoding=encoding) as f: data = f.read() @@ -93,7 +93,7 @@ def load_json( loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. Returns: - Any: The data from the JSON file + The data from the JSON file """ with open(path, encoding=encoding) as f: data = f.read() @@ -119,7 +119,7 @@ def load_and_merge_yaml( loaderror_if_empty: Whether to raise LoadError if any file is empty. Default to True. Returns: - Mapping: Dictionary of merged YAML files + Dictionary of merged YAML files """ configs = [ load_yaml(path, encoding=encoding, loaderror_if_empty=loaderror_if_empty) @@ -142,7 +142,7 @@ def load_and_merge_json( loaderror_if_empty: Whether to raise LoadError if any file is empty. Default to True. Returns: - Mapping: Dictionary of merged JSON files + Dictionary of merged JSON files """ configs = [ load_json(path, encoding=encoding, loaderror_if_empty=loaderror_if_empty) @@ -166,7 +166,7 @@ def load_yaml_into_existing_dict( loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. Returns: - Mapping: YAML file merged into dictionary + YAML file merged into dictionary """ yamldict = load_yaml(path, encoding=encoding, loaderror_if_empty=loaderror_if_empty) return merge_two_dictionaries(data, yamldict) @@ -187,7 +187,7 @@ def load_json_into_existing_dict( loaderror_if_empty: Whether to raise LoadError if file is empty. Default to True. Returns: - Mapping: JSON file merged into dictionary + JSON file merged into dictionary """ jsondict = load_json(path, encoding=encoding, loaderror_if_empty=loaderror_if_empty) return merge_two_dictionaries(data, jsondict) diff --git a/src/hdx/utilities/matching.py b/src/hdx/utilities/matching.py index db87c64..73e128e 100644 --- a/src/hdx/utilities/matching.py +++ b/src/hdx/utilities/matching.py @@ -32,7 +32,7 @@ def match( threshold: Match threshold. Defaults to 2. Returns: - Optional[int]: Index of matching name from possible names or None + Index of matching name from possible names or None """ mindistance = None matching_index = None @@ -78,7 +78,7 @@ def get_code_from_name( match_threshold: Match threshold Returns: - Optional[str]: Matching code + Matching code """ code = code_lookup.get(name) if code: @@ -120,7 +120,7 @@ def multiple_replace(string: str, replacements: dict[str, str]) -> str: replacements: Replacements dictionary Returns: - str: String with replacements + String with replacements """ if not replacements: return string @@ -140,7 +140,7 @@ def match_template_variables( string: String in which to look for template Returns: - Tuple[Optional[str], Optional[str]]: (Matched string with brackets, matched string without brackets) + (Matched string with brackets, matched string without brackets) """ match = TEMPLATE_VARIABLES.search(string) if match: @@ -158,7 +158,7 @@ def earliest_index(string_to_search: str, strings_to_try: Sequence[str]) -> int strings_to_try: Strings to try Returns: - Optional[int]: Earliest index of the strings to try in string to search or None + Earliest index of the strings to try in string to search or None """ after_string = len(string_to_search) + 1 indices = [] @@ -192,7 +192,7 @@ def get_matching_text_in_strs( end_characters: End characters to look for. Defaults to ''. Returns: - List[str]: List of matching blocks of text + List of matching blocks of text """ compare = difflib.SequenceMatcher(lambda x: x in ignore) compare.set_seqs(a=a, b=b) @@ -230,7 +230,7 @@ def get_matching_text( end_characters: End characters to look for. Defaults to '.\r\n'. Returns: - str: String containing matching blocks of text followed by non-matching + String containing matching blocks of text followed by non-matching """ a = string_list[0] for i in range(1, len(string_list)): @@ -264,7 +264,7 @@ def get_matching_then_nonmatching_text( end_characters: End characters to look for. Defaults to '.\r\n'. Returns: - str: String containing matching blocks of text followed by non-matching + String containing matching blocks of text followed by non-matching """ def add_separator_if_needed(text_list): diff --git a/src/hdx/utilities/path.py b/src/hdx/utilities/path.py index 3fcf290..6de51cf 100755 --- a/src/hdx/utilities/path.py +++ b/src/hdx/utilities/path.py @@ -17,7 +17,7 @@ ) from shutil import rmtree from tempfile import gettempdir -from typing import Any, Sequence +from typing import Any, Sequence, Generator from urllib.parse import unquote_plus, urlsplit from slugify import slugify @@ -42,7 +42,7 @@ def script_dir(pyobject: Any, follow_symlinks: bool = True) -> str: follow_symlinks: Follow symlinks or not. Defaults to True. Returns: - str: Current script's directory + Current script's directory """ if getattr(sys, "frozen", False): # py2exe, PyInstaller, cx_Freeze path = abspath(sys.executable) # pragma: no cover @@ -64,7 +64,7 @@ def script_dir_plus_file( follow_symlinks: Follow symlinks or not. Defaults to True. Returns: - str: Current script's directory and with filename appended + Current script's directory and with filename appended """ return join(script_dir(pyobject, follow_symlinks), filename) @@ -85,7 +85,7 @@ def get_temp_dir( tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: - str: A temporary directory + A temporary directory """ if tempdir is None: tempdir = getenv("TEMP_DIR", gettempdir()) @@ -107,7 +107,7 @@ def temp_dir( delete_on_success: bool = True, delete_on_failure: bool = True, tempdir: str | None = None, -) -> str: +) -> Generator[str, Any, None]: """Get a temporary directory optionally with folder appended (and created if it doesn't exist) @@ -119,7 +119,7 @@ def temp_dir( tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: - str: A temporary directory + A temporary directory """ tempdir = get_temp_dir(folder, delete_if_exists=delete_if_exists, tempdir=tempdir) try: @@ -141,7 +141,7 @@ def read_or_create_batch(folder: str, batch: str | None = None) -> str: batch: Batch to use if there isn't one in a file already. Returns: - str: Batch + Batch """ batch_file = join(folder, "batch.txt") if exists(batch_file): @@ -163,7 +163,7 @@ def temp_dir_batch( delete_on_failure: bool = True, batch: str | None = None, tempdir: str | None = None, -) -> dict: +) -> Generator[dict, Any, None]: """Get a temporary directory and batch id. Yields a dictionary with key folder which is the temporary directory optionally with folder appended (and created if it doesn't exist). In key batch is a batch code to be @@ -178,7 +178,7 @@ def temp_dir_batch( tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: - Dict: Dictionary containing temporary directory in key folder and batch id in key batch + Dictionary containing temporary directory in key folder and batch id in key batch """ with temp_dir( folder, @@ -202,7 +202,7 @@ def get_wheretostart(text: str, message: str, key: str) -> str | None: key: Key to comapre with Returns: - Optional[str]: A string or None + A string or None """ upper_text = text.upper() if upper_text == "RESET": @@ -220,7 +220,7 @@ def progress_storing_folder( iterator: Iterable[dict], key: str, wheretostart: str | None = None, -) -> tuple[dict, dict]: +) -> Generator[tuple[dict, dict], Any, None]: """Store progress in folder in key folder of info dictionary parameter. Yields 2 dictionaries. The first is the info dictionary. It contains in key folder the folder being used to store progress and in key progress the @@ -236,7 +236,7 @@ def progress_storing_folder( wheretostart: Where in iterator to start Returns: - Tuple[Dict,Dict]: A tuple of the form (info dictionary, next object in iterator) + A tuple of the form (info dictionary, next object in iterator) """ folder = info["folder"] progress_file = join(folder, "progress.txt") @@ -279,7 +279,7 @@ def progress_storing_folder( @contextlib.contextmanager def wheretostart_tempdir_batch( folder: str, batch: str | None = None, tempdir: str | None = None -) -> dict: +) -> Generator[dict, Any, None]: """Get a temporary directory and batch id. Deletes any existing folder if WHERETOSTART environment variable is set to RESET. Yields a dictionary with key folder which is the temporary directory optionally with folder appended @@ -292,7 +292,7 @@ def wheretostart_tempdir_batch( tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: - Dict: Dictionary containing temporary directory in key folder and batch id in key batch + Dictionary containing temporary directory in key folder and batch id in key batch """ delete_if_exists = False wheretostart = getenv("WHERETOSTART") @@ -317,7 +317,7 @@ def progress_storing_tempdir( key: str, batch: str | None = None, tempdir: str | None = None, -) -> tuple[dict, dict]: +) -> Generator[tuple[dict, dict], Any, None]: """Store progress in temporary directory. The folder persists until the final iteration allowing which iteration to start at and the batch code to be persisted between runs. Yields 2 dictionaries. The first contains key @@ -338,7 +338,7 @@ def progress_storing_tempdir( tempdir: Folder to use as temporary directory. Defaults to None (TEMP_DIR or os.gettempdir). Returns: - Tuple[Dict,Dict]: A tuple of the form (info dictionary, next object in iterator) + A tuple of the form (info dictionary, next object in iterator) """ with wheretostart_tempdir_batch(folder, batch=batch, tempdir=tempdir) as info: yield from progress_storing_folder(info, iterator, key) @@ -349,7 +349,7 @@ def multiple_progress_storing_tempdir( iterators: Sequence[Iterable[dict]], keys: Sequence[str], batch: str | None = None, -) -> tuple[dict, dict]: +) -> Generator[tuple[int, dict, dict], Any, None]: """Store progress in temporary directory. The folder persists until the final iteration of the last iterator allowing which iteration to start at and the batch code to be persisted between runs. Yields 2 dictionaries. The @@ -370,7 +370,7 @@ def multiple_progress_storing_tempdir( batch: Batch to use if there isn't one in a file already. Returns: - Tuple[int, Dict,Dict]: A tuple of the form (iterator index, info dictionary, next object in iterator) + A tuple of the form (iterator index, info dictionary, next object in iterator) """ delete_if_exists = False wheretostartenv = getenv("WHERETOSTART") @@ -427,7 +427,7 @@ def get_filename_extension_from_url( use_query: Include query parameters as well. Defaults to False. Returns: - Tuple[str,str]: Tuple of (filename, extension) + Tuple of (filename, extension) """ split_url = urlsplit(unquote_plus(url)) urlpath = split_url.path @@ -459,7 +459,7 @@ def get_filename_from_url( use_query: Include query parameters as well. Defaults to False. Returns: - str: filename + filename """ filename, extension = get_filename_extension_from_url(url, second_last, use_query) return f"{filename}{extension}" diff --git a/src/hdx/utilities/retriever.py b/src/hdx/utilities/retriever.py index 1fc0ea3..d6e1277 100644 --- a/src/hdx/utilities/retriever.py +++ b/src/hdx/utilities/retriever.py @@ -90,7 +90,7 @@ def get_url_logstr(url: str) -> str: url: URL to download Returns: - str: Url string to use in logs + Url string to use in logs """ if len(url) > 100: return f"{url[:100]}..." @@ -103,7 +103,7 @@ def clone(self, downloader: Download) -> "Retrieve": downloader: Downloader to use Returns: - Retrieve: Cloned retriever + Cloned retriever """ return Retrieve( downloader, @@ -134,7 +134,7 @@ def get_filename( file_type: Given extension to look for in url Returns: - Tuple[str, Any]: Tuple of (filename, kwargs) + Tuple of (filename, kwargs) """ prefix = kwargs.pop("file_prefix", self.prefix) if prefix: @@ -196,7 +196,7 @@ def download_file( **kwargs: Parameters to pass to download_file call Returns: - str: Path to downloaded file + Path to downloaded file """ if log_level is None: log_level = self.log_level @@ -247,7 +247,7 @@ def download_text( **kwargs: Parameters to pass to download_text call Returns: - str: The text from the file + The text from the file """ if log_level is None: log_level = self.log_level @@ -298,7 +298,7 @@ def download_yaml( **kwargs: Parameters to pass to download_yaml call Returns: - Any: The data from the YAML file + The data from the YAML file """ if log_level is None: log_level = self.log_level @@ -349,7 +349,7 @@ def download_json( **kwargs: Parameters to pass to download_json call Returns: - Any: The data from the JSON file + The data from the JSON file """ if log_level is None: log_level = self.log_level @@ -414,7 +414,7 @@ def get_tabular_rows( **kwargs: Parameters to pass to download_file and get_tabular_rows calls Returns: - Tuple[List[str],Iterator[list | dict]]: Tuple (headers, iterator where each row is a list or dictionary) + Tuple (headers, iterator where each row is a list or dictionary) """ if isinstance(url, list): is_list = True @@ -492,6 +492,6 @@ def get_retriever(cls, name: str | None = None) -> "Retrieve": name: Name of retriever. Defaults to None (get default). Returns: - Retriever: Retriever object + Retriever object """ return cls.retrievers.get(name, cls.retrievers["default"]) diff --git a/src/hdx/utilities/saver.py b/src/hdx/utilities/saver.py index d73e7d4..207bcd8 100644 --- a/src/hdx/utilities/saver.py +++ b/src/hdx/utilities/saver.py @@ -296,7 +296,7 @@ def save_iterable( row_function: Row function to call for each row. Defaults to None. Returns: - List: List of rows written to file + List of rows written to file """ if row_function is None: diff --git a/src/hdx/utilities/state.py b/src/hdx/utilities/state.py index 28a50ec..57ab6a3 100644 --- a/src/hdx/utilities/state.py +++ b/src/hdx/utilities/state.py @@ -40,7 +40,7 @@ def __enter__(self) -> "State": """Allow usage of with. Returns: - State: SavedState object + SavedState object """ return self @@ -61,7 +61,7 @@ def read(self) -> Any: """Read state from file Returns: - Any: State + State """ value = self.read_fn(load_text(self.path)) logger.info(f"State read from {self.path} = {value}") @@ -80,7 +80,7 @@ def get(self) -> Any: """Get the state Returns: - Any: State + State """ return self.state @@ -106,7 +106,7 @@ def dates_str_to_country_date_dict(dates_str: str) -> dict: dates_str: Comma separated string of key=date string pairs Returns: - Dict: Dictionary of key date mappings + Dictionary of key date mappings """ result = {} for keyvalue in dates_str.split(","): @@ -125,7 +125,7 @@ def country_date_dict_to_dates_str(country_date_dict: dict) -> str: country_date_dict: Dictionary of key date mappings Returns: - str: Comma separated string of key=date string pairs + Comma separated string of key=date string pairs """ strlist = [] for key, value in country_date_dict.items(): diff --git a/src/hdx/utilities/text.py b/src/hdx/utilities/text.py index 1ab04ac..32dd7bf 100755 --- a/src/hdx/utilities/text.py +++ b/src/hdx/utilities/text.py @@ -31,7 +31,7 @@ def normalise(text: str) -> str: text: Text to normalise Returns: - str: Normalised text + Normalised text """ chars = [] space = False @@ -58,7 +58,7 @@ def remove_end_characters(string: str, characters_to_remove: str = punctuation) characters_to_remove: Characters to remove. Defaults to punctuation. Returns: - str: String with any characters at end of string that are in characters_to_remove removed + String with any characters at end of string that are in characters_to_remove removed """ while string[-1] in characters_to_remove: string = string[:-1] @@ -80,7 +80,7 @@ def remove_from_end( whole_words: Remove parts of or whole words. Defaults to True (whole words only). Returns: - str: String with text removed + String with text removed """ for thing in things_to_remove: thing_len = len(thing) @@ -112,7 +112,7 @@ def remove_string( end_characters_to_remove: Characters to remove. Defaults to punctuation. Returns: - str: String with other string removed + String with other string removed """ index = string.find(toremove) @@ -130,7 +130,7 @@ def get_words_in_sentence(sentence: str) -> list[str]: sentence: Sentence Returns: - List[str]: List of words in sentence + List of words in sentence """ return re.sub("[" + punctuation.replace("'", "") + "]", " ", sentence).split() @@ -144,7 +144,7 @@ def number_format(val: Any, format: str = "%.4f", trailing_zeros: bool = True) - trailing_zeros: Leave trailing zeros. Defaults to True. Returns: - str: Formatted number as string + Formatted number as string """ if val == "" or val is None: return "" @@ -170,7 +170,7 @@ def get_fraction_str( trailing_zeros: Leave trailing zeros. Defaults to True. Returns: - str: Formatted number as string + Formatted number as string """ try: numerator = float(numerator) @@ -194,7 +194,7 @@ def only_allowed_in_str(test_str: str, allowed_chars: set) -> bool: allowed_chars: Set of allowed characters Returns: - bool: True if test string contains only allowed characters, False if not + True if test string contains only allowed characters, False if not """ return set(test_str) <= allowed_chars @@ -210,7 +210,7 @@ def get_numeric_if_possible(value: Any) -> Any: value: Value Returns: - Any: Value + Value """ def get_int_value(val, denominator): diff --git a/src/hdx/utilities/useragent.py b/src/hdx/utilities/useragent.py index c325533..cffe3fd 100755 --- a/src/hdx/utilities/useragent.py +++ b/src/hdx/utilities/useragent.py @@ -28,7 +28,7 @@ def _environment_variables(**kwargs: Any) -> Any: user_agent: User agent string. Returns: - kwargs: Changed keyword arguments + Changed keyword arguments """ user_agent = os.getenv("USER_AGENT") if user_agent is not None: @@ -48,7 +48,7 @@ def _construct(configdict: dict, prefix: str, ua: str) -> str: ua: Custom user agent text Returns: - str: Full user agent string + Full user agent string """ if not ua: raise UserAgentError( @@ -79,7 +79,7 @@ def _load( user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: - str: user agent + user agent """ if not user_agent_config_yaml: user_agent_config_yaml = cls.default_user_agent_config_yaml @@ -120,7 +120,7 @@ def _create( user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: - str: Full user agent string + Full user agent string """ kwargs = UserAgent._environment_variables(**kwargs) user_agent = kwargs.pop("user_agent", user_agent) @@ -181,7 +181,7 @@ def get( user_agent_lookup: Lookup key for YAML. Ignored if user_agent supplied. Returns: - str: Full user agent string + Full user agent string """ if ( user_agent diff --git a/src/hdx/utilities/uuid.py b/src/hdx/utilities/uuid.py index ea15073..0605d40 100644 --- a/src/hdx/utilities/uuid.py +++ b/src/hdx/utilities/uuid.py @@ -5,7 +5,7 @@ def get_uuid() -> str: """Get an UUID. Returns: - str: A UUID + A UUID """ return str(uuid4()) @@ -18,7 +18,7 @@ def is_valid_uuid(uuid_to_test: str, version: int = 4) -> bool: version: UUID version. Defaults to 4. Returns: - str: Current script's directory + Current script's directory """ try: uuid_obj = UUID(uuid_to_test, version=version) diff --git a/src/hdx/utilities/zip_crc.py b/src/hdx/utilities/zip_crc.py index 63b3706..57a2fd1 100644 --- a/src/hdx/utilities/zip_crc.py +++ b/src/hdx/utilities/zip_crc.py @@ -15,7 +15,7 @@ def find_eocd_signature(tail_data: bytes) -> tuple[int, int, int]: tail_data: Data to search for EOCD Returns: - Tuple[int, int, int]: (total_records, cd_offset, cd_end) or (-1, -1, -1) on failure + (total_records, cd_offset, cd_end) or (-1, -1, -1) on failure """ eocd_pos = tail_data.rfind(EOCD_SIGNATURE) if eocd_pos == -1: @@ -37,7 +37,7 @@ def parse_central_directory(data: bytes, num_records: int) -> dict[str, int]: num_records: Number of files in zip Returns: - Dict[str, int]: Dictionary of filepath to file CRC32 + Dictionary of filepath to file CRC32 """ results = {} offset = 0 @@ -70,7 +70,7 @@ def get_tail_start(size: int) -> int: size: File size Returns: - int: Starting offset of the tail of a zip + Starting offset of the tail of a zip """ read_size = min(size, MAX_COMMENT_SIZE + EOCD_MIN_SIZE) return size - read_size @@ -84,7 +84,7 @@ def get_zip_tail_header(size: int) -> dict[str, str]: size: File size Returns: - Dict[str, str]: Header for GET request + Header for GET request """ return {"Range": f"bytes={get_tail_start(size)}-"} @@ -96,7 +96,7 @@ def get_zip_cd_header(tail_data: bytes) -> tuple[int, dict]: tail_data: Data to search for EOCD Returns: - Tuple[int, Dict]: (total_records, CD range header) or (-1, {}) on failure + (total_records, CD range header) or (-1, {}) on failure """ total_records, cd_offset, cd_end = find_eocd_signature(tail_data) if total_records == -1: @@ -111,7 +111,7 @@ def get_zip_crcs_buffer(buffer: bytes) -> dict[str, int]: buffer: Zip in buffer Returns: - Dict[str, int]: Dictionary of filepath to file CRC32 + Dictionary of filepath to file CRC32 """ tail_data = buffer[get_tail_start(len(buffer)) :] num_records, cd_offset, cd_end = find_eocd_signature(tail_data) @@ -128,7 +128,7 @@ def get_zip_crcs_fp(fp: IOBase) -> dict[str, int]: fp: Zip file pointer Returns: - Dict[str, int]: Dictionary of filepath to file CRC32 + Dictionary of filepath to file CRC32 """ size = fstat(fp.fileno()).st_size tail_start = get_tail_start(size) @@ -149,7 +149,7 @@ def get_crc_sum(file_crcs: dict[str, int]) -> str: file_crcs: Dictionary of filepath to file CRC32 Returns: - str: Sum of the CRC32 + Sum of the CRC32 """ crc_sum = 0 for crc in file_crcs.values(): From 6fb76c2ef1a57588864a3e1c0c0cb4c7c7b7cf40 Mon Sep 17 00:00:00 2001 From: mcarans Date: Mon, 12 Jan 2026 16:32:23 +1300 Subject: [PATCH 4/4] Remove types from docstrings --- src/hdx/utilities/base_downloader.py | 6 ++---- src/hdx/utilities/dictandlist.py | 5 ++--- src/hdx/utilities/downloader.py | 5 ++--- src/hdx/utilities/email.py | 4 ++-- src/hdx/utilities/error_handler.py | 4 ++-- src/hdx/utilities/loader.py | 4 ++-- src/hdx/utilities/matching.py | 4 +--- src/hdx/utilities/path.py | 5 ++--- src/hdx/utilities/retriever.py | 5 ++--- src/hdx/utilities/saver.py | 5 ++--- 10 files changed, 19 insertions(+), 28 deletions(-) diff --git a/src/hdx/utilities/base_downloader.py b/src/hdx/utilities/base_downloader.py index ea13bf6..ac16245 100644 --- a/src/hdx/utilities/base_downloader.py +++ b/src/hdx/utilities/base_downloader.py @@ -1,8 +1,6 @@ from abc import ABC, abstractmethod -from collections.abc import Iterator -from typing import Any, Sequence - - +from collections.abc import Iterator, Sequence +from typing import Any class DownloadError(Exception): diff --git a/src/hdx/utilities/dictandlist.py b/src/hdx/utilities/dictandlist.py index 03c15db..a1c0e33 100755 --- a/src/hdx/utilities/dictandlist.py +++ b/src/hdx/utilities/dictandlist.py @@ -2,13 +2,12 @@ import itertools import warnings -from collections.abc import Callable, MutableMapping -from typing import Any, Sequence, Mapping +from collections.abc import Callable, Mapping, MutableMapping, Sequence +from typing import Any from hdx.utilities.frictionless_wrapper import get_frictionless_tableresource - def invert_dictionary(d: MutableMapping) -> dict: """Invert a dictionary from key - value to value - key. Assumes one to one mapping between keys and values. diff --git a/src/hdx/utilities/downloader.py b/src/hdx/utilities/downloader.py index 7d2a4fa..9beb496 100755 --- a/src/hdx/utilities/downloader.py +++ b/src/hdx/utilities/downloader.py @@ -2,12 +2,12 @@ import hashlib import logging -from collections.abc import Callable, Iterator +from collections.abc import Callable, Iterator, Sequence from copy import deepcopy from os import remove from os.path import exists, isfile, join, split, splitext from pathlib import Path -from typing import Any, Sequence +from typing import Any from urllib.parse import parse_qsl, urlencode, urlsplit, urlunsplit import requests @@ -24,7 +24,6 @@ from hdx.utilities.path import get_filename_from_url, get_temp_dir from hdx.utilities.session import get_session - logger = logging.getLogger(__name__) diff --git a/src/hdx/utilities/email.py b/src/hdx/utilities/email.py index 9a053f3..4619d99 100755 --- a/src/hdx/utilities/email.py +++ b/src/hdx/utilities/email.py @@ -2,14 +2,14 @@ import logging import smtplib +from collections.abc import Sequence from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from os.path import expanduser, join -from typing import Any, Sequence +from typing import Any from hdx.utilities.loader import load_json, load_yaml - try: from email_validator import EmailNotValidError, validate_email except ImportError: diff --git a/src/hdx/utilities/error_handler.py b/src/hdx/utilities/error_handler.py index ac8851e..e447caa 100644 --- a/src/hdx/utilities/error_handler.py +++ b/src/hdx/utilities/error_handler.py @@ -2,11 +2,11 @@ import logging import sys -from typing import Any, Sequence +from collections.abc import Sequence +from typing import Any from hdx.utilities.dictandlist import dict_of_sets_add - logger = logging.getLogger(__name__) diff --git a/src/hdx/utilities/loader.py b/src/hdx/utilities/loader.py index 33aef53..580fb23 100755 --- a/src/hdx/utilities/loader.py +++ b/src/hdx/utilities/loader.py @@ -1,7 +1,8 @@ """Loading utilities for YAML, JSON etc.""" import json -from typing import Any, Sequence, Mapping +from collections.abc import Mapping, Sequence +from typing import Any from warnings import warn from ruamel.yaml import YAML @@ -9,7 +10,6 @@ from hdx.utilities.dictandlist import merge_dictionaries, merge_two_dictionaries - class LoadError(Exception): pass diff --git a/src/hdx/utilities/matching.py b/src/hdx/utilities/matching.py index 73e128e..b263644 100644 --- a/src/hdx/utilities/matching.py +++ b/src/hdx/utilities/matching.py @@ -1,13 +1,11 @@ import difflib import re -from collections.abc import Callable -from typing import Sequence +from collections.abc import Callable, Sequence from pyphonetics import RefinedSoundex from hdx.utilities.text import normalise - TEMPLATE_VARIABLES = re.compile("{{.*?}}") diff --git a/src/hdx/utilities/path.py b/src/hdx/utilities/path.py index 6de51cf..b6eb1a4 100755 --- a/src/hdx/utilities/path.py +++ b/src/hdx/utilities/path.py @@ -4,7 +4,7 @@ import inspect import logging import sys -from collections.abc import Iterable +from collections.abc import Generator, Iterable, Sequence from os import getenv, makedirs, remove from os.path import ( abspath, @@ -17,14 +17,13 @@ ) from shutil import rmtree from tempfile import gettempdir -from typing import Any, Sequence, Generator +from typing import Any from urllib.parse import unquote_plus, urlsplit from slugify import slugify from hdx.utilities.loader import load_text from hdx.utilities.saver import save_text - from hdx.utilities.uuid import get_uuid logger = logging.getLogger(__name__) diff --git a/src/hdx/utilities/retriever.py b/src/hdx/utilities/retriever.py index d6e1277..6019fa2 100644 --- a/src/hdx/utilities/retriever.py +++ b/src/hdx/utilities/retriever.py @@ -1,10 +1,10 @@ import logging -from collections.abc import Iterator +from collections.abc import Iterator, Sequence from copy import deepcopy from os import mkdir from os.path import join from shutil import rmtree -from typing import Any, Sequence +from typing import Any from slugify import slugify @@ -14,7 +14,6 @@ from hdx.utilities.path import get_filename_extension_from_url from hdx.utilities.saver import save_json, save_text, save_yaml - logger = logging.getLogger(__name__) diff --git a/src/hdx/utilities/saver.py b/src/hdx/utilities/saver.py index 207bcd8..0ebe4cc 100644 --- a/src/hdx/utilities/saver.py +++ b/src/hdx/utilities/saver.py @@ -3,9 +3,9 @@ import csv import json from collections import OrderedDict -from collections.abc import Callable, Iterable +from collections.abc import Callable, Iterable, Mapping, Sequence from os.path import join -from typing import Any, Sequence, Mapping +from typing import Any from ruamel.yaml import ( YAML, @@ -18,7 +18,6 @@ from hdx.utilities.matching import match_template_variables - class UnPrettyRTRepresenter(RoundTripRepresenter): def represent_none(self, data: Any) -> Any: return self.represent_scalar("tag:yaml.org,2002:null", "null")