diff --git a/blazingai/kaggle.py b/blazingai/kaggle.py deleted file mode 100644 index d771e17..0000000 --- a/blazingai/kaggle.py +++ /dev/null @@ -1,50 +0,0 @@ -from pathlib import Path -from typing import Protocol, Type - -from loguru import logger - - -class KaggleAPITemplate(Protocol): - def competition_download_cli(self, competition, path) -> None: - pass - - -def download_comp_data( - comp_name: str, path: Path, api: Type[KaggleAPITemplate] -) -> Path: - """Download competition data from Kaggle, and save it to `path` - destination. - - Args: - api: Kaggle API client. - comp_name: Short name of Kaggle competition. - path: Destination. - """ - api.competition_download_cli(competition=comp_name, path=path) - return path / f"{comp_name}.zip" - - -def extract_comp_data(fname: Path, path: Path) -> None: - """Inflate zip archive to `path` Destination. - - Args: - fname: Zip archive to inflate. - path: Destination. - """ - if not path.exists(): - logger.info(f"Extracting {fname} to {path}") - - import zipfile - - from tqdm import tqdm - - with zipfile.ZipFile(fname) as zf: - for member in tqdm(zf.infolist()): - try: - zf.extract(member, path) - except zipfile.error as e: - logger.warning(f"{member}: {e}") - else: - # NOTE: does not check that the content of `path` and the `fname` - # zip archive matches - logger.info(f"{path}: Skipping. Found local copy of extract archive") diff --git a/tests/test_kaggle.py b/tests/test_kaggle.py deleted file mode 100644 index f449185..0000000 --- a/tests/test_kaggle.py +++ /dev/null @@ -1,44 +0,0 @@ -import zipfile -from pathlib import Path - -from blazingai.kaggle import download_comp_data, extract_comp_data - - -class KaggleAPIFake: - def __init__(self) -> None: - pass - - def competition_download_cli(self, competition, path) -> None: - pass - - -def test_download_from_kaggle(): - result: Path = download_comp_data( - api=KaggleAPIFake(), comp_name="test", path=Path("tmp") - ) - assert result == Path("tmp/test.zip") - - -def test_extract_comp_data(tmp_path): - # create empty folder - p: Path = tmp_path / "sub" - p.mkdir() - - assert len(list(p.iterdir())) == 0 - - # create one file - fname: Path = p / "file.txt" - fname.write_text("content") - - assert len(list(p.iterdir())) == 1 - - # create zip archive - with zipfile.ZipFile(p / "file.zip", "w") as zf: - zf.write(fname) - - assert len(list(p.iterdir())) == 2 - - # extract zip archive - extract_comp_data(fname=p / "file.zip", path=p / "plus.txt") - - assert len(list(p.iterdir())) == 3