From 5105e99748a928e8d384a4cffb7b6f6fef0b40f1 Mon Sep 17 00:00:00 2001 From: Alex Parsons Date: Thu, 12 Dec 2024 14:00:01 +0000 Subject: [PATCH] Add '--no-progress' regmem option - Don't pollute the cron email --- pyscraper/regmem/__main__.py | 27 ++++++++++++++---- pyscraper/regmem/commons/process.py | 44 ++++++++++++++++++++++------- scripts/dailyupdate | 2 +- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/pyscraper/regmem/__main__.py b/pyscraper/regmem/__main__.py index 0574d777..2f411f9c 100644 --- a/pyscraper/regmem/__main__.py +++ b/pyscraper/regmem/__main__.py @@ -14,11 +14,17 @@ def cli(): @click.option("--chamber", type=str, default="commons") @click.option("--force-refresh", is_flag=True) @click.option("--quiet", is_flag=True) +@click.option("--no-progress", is_flag=True) def download_all_registers( - chamber: str, force_refresh: bool = False, quiet: bool = False + chamber: str, + force_refresh: bool = False, + quiet: bool = False, + no_progress: bool = False, ): if chamber == "commons": - commons_process.download_all_registers(force_refresh=force_refresh, quiet=quiet) + commons_process.download_all_registers( + force_refresh=force_refresh, quiet=quiet, no_progress=no_progress + ) else: raise ValueError(f"Unknown chamber: {chamber}") @@ -28,12 +34,17 @@ def download_all_registers( @click.option("--date", type=datetime.date) @click.option("--force-refresh", is_flag=True) @click.option("--quiet", is_flag=True) +@click.option("--no-progress", is_flag=True) def download_register_from_date( - chamber: str, date: datetime.date, force_refresh: bool = False, quiet: bool = False + chamber: str, + date: datetime.date, + force_refresh: bool = False, + quiet: bool = False, + no_progress: bool = False, ): if chamber == "commons": commons_process.download_register_from_date( - date, force_refresh=force_refresh, quiet=quiet + date, force_refresh=force_refresh, quiet=quiet, no_progress=no_progress ) else: raise ValueError(f"Unknown chamber: {chamber}") @@ -43,12 +54,16 @@ def download_register_from_date( @click.option("--chamber", type=str, default="commons") @click.option("--force-refresh", is_flag=True) @click.option("--quiet", is_flag=True) +@click.option("--no-progress", is_flag=True) def download_latest_register( - chamber: str, force_refresh: bool = False, quiet: bool = False + chamber: str, + force_refresh: bool = False, + quiet: bool = False, + no_progress: bool = False, ): if chamber == "commons": commons_process.download_latest_register( - force_refresh=force_refresh, quiet=quiet + force_refresh=force_refresh, quiet=quiet, no_progress=no_progress ) else: raise ValueError(f"Unknown chamber: {chamber}") diff --git a/pyscraper/regmem/commons/process.py b/pyscraper/regmem/commons/process.py index 4f8e7a77..4b5fb186 100644 --- a/pyscraper/regmem/commons/process.py +++ b/pyscraper/regmem/commons/process.py @@ -50,7 +50,10 @@ def get_popolo() -> Popolo: def recursive_fetch( - url: str, params: Optional[dict[str, Any]] = None, quiet: bool = False + url: str, + params: Optional[dict[str, Any]] = None, + quiet: bool = False, + no_progress: bool = False, ): """ Meta API handler @@ -66,7 +69,7 @@ def recursive_fetch( "Take must be less than or equal to 20 - API limit annoyingly." ) - bar = tqdm(desc="Fetching ", unit="items", disable=quiet) + bar = tqdm(desc="Fetching ", unit="items", disable=quiet or no_progress) while continue_fetching: send_params = {"Take": take, "Skip": skip} @@ -98,9 +101,11 @@ def get_single_item( return interest -def get_list_of_registers(quiet: bool = False): +def get_list_of_registers( + quiet: bool = False, no_progress: bool = False +) -> list[PublishedRegister]: url = REGISTER_BASE + "api/v1/Registers" - items = recursive_fetch(url, quiet=quiet) + items = recursive_fetch(url, quiet=quiet, no_progress=no_progress) registers = TypeAdapter(list[PublishedRegister]).validate_python(items) return registers @@ -236,11 +241,15 @@ class RegisterManager: register_date: datetime.date parldata_dir: Path quiet: bool = False + no_progress: bool = False def get_register_from_api(self): url = REGISTER_BASE + "api/v1/Interests/" items = recursive_fetch( - url, params={"RegisterId": self.register_id}, quiet=self.quiet + url, + params={"RegisterId": self.register_id}, + quiet=self.quiet, + no_progress=self.no_progress, ) interests = TypeAdapter(list[PublishedInterest]).validate_python(items) return interests @@ -376,6 +385,7 @@ def download_register_from_id_and_date( date: datetime.date, force_refresh: bool = False, quiet: bool = False, + no_progress: bool = False, ): if not quiet: rich.print(f"Downloading commons register {register_id} from {date}") @@ -384,34 +394,47 @@ def download_register_from_id_and_date( register_date=date, parldata_dir=parldata_path, quiet=quiet, + no_progress=no_progress, ) manager.write_mysoc_regmem(force_refresh=force_refresh, quiet=quiet) def download_register_from_date( - date: datetime.date, force_refresh: bool = False, quiet: bool = False + date: datetime.date, + force_refresh: bool = False, + quiet: bool = False, + no_progress: bool = False, ): - for register in get_list_of_registers(): + for register in get_list_of_registers(quiet=quiet, no_progress=no_progress): if register.published_date == date: return download_register_from_id_and_date( register.id, register.published_date, force_refresh=force_refresh, quiet=quiet, + no_progress=no_progress, ) raise ValueError(f"No register found for {date}") -def download_latest_register(force_refresh: bool = False, quiet: bool = False): +def download_latest_register( + force_refresh: bool = False, quiet: bool = False, no_progress: bool = False +): registers = get_list_of_registers(quiet) latest = max(registers, key=lambda x: x.published_date) return download_register_from_id_and_date( - latest.id, latest.published_date, force_refresh=force_refresh, quiet=quiet + latest.id, + latest.published_date, + force_refresh=force_refresh, + quiet=quiet, + no_progress=no_progress, ) -def download_all_registers(force_refresh: bool = False, quiet: bool = False): +def download_all_registers( + force_refresh: bool = False, quiet: bool = False, no_progress: bool = False +): registers = get_list_of_registers(quiet) for register in registers: download_register_from_id_and_date( @@ -419,6 +442,7 @@ def download_all_registers(force_refresh: bool = False, quiet: bool = False): register.published_date, force_refresh=force_refresh, quiet=quiet, + no_progress=no_progress, ) diff --git a/scripts/dailyupdate b/scripts/dailyupdate index bd3bdb05..710cae02 100755 --- a/scripts/dailyupdate +++ b/scripts/dailyupdate @@ -6,7 +6,7 @@ source ~/parlparse/scripts/consts # Update register of members interests cd ~/parlparse -poetry run python -m pyscraper.regmem download-all-registers --chamber commons +poetry run python -m pyscraper.regmem download-all-registers --chamber commons --no-progress # Get updated members list all-members.xml, in case it changed #cd ~/parlparse/members