From d9076c323a222c01e8fd9522094bc7e4af54b97e Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Fri, 23 Aug 2024 14:14:19 +0800 Subject: [PATCH 1/7] dev(narugo): add ils command --- hfutils/entry/cli.py | 2 + hfutils/entry/ils.py | 184 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 hfutils/entry/ils.py diff --git a/hfutils/entry/cli.py b/hfutils/entry/cli.py index b7d31bef53..df4d96b82e 100644 --- a/hfutils/entry/cli.py +++ b/hfutils/entry/cli.py @@ -1,6 +1,7 @@ from .clone import _add_clone_subcommand from .dispatch import hfutilcli from .download import _add_download_subcommand +from .ils import _add_ils_subcommand from .index import _add_index_subcommand from .ls import _add_ls_subcommand from .ls_repo import _add_ls_repo_subcommand @@ -19,6 +20,7 @@ _add_rollback_subcommand, _add_clone_subcommand, _add_tree_subcommand, + _add_ils_subcommand, ] cli = hfutilcli diff --git a/hfutils/entry/ils.py b/hfutils/entry/ils.py new file mode 100644 index 0000000000..80a614d1cd --- /dev/null +++ b/hfutils/entry/ils.py @@ -0,0 +1,184 @@ +import os.path +import warnings +from typing import Optional, Literal + +import click +import numpy as np +import pandas as pd +from hbutils.scale import size_to_bytes_str +from hbutils.string import plural_word, titleize +from huggingface_hub import configure_http_backend + +from .base import CONTEXT_SETTINGS +from ..index import hf_tar_get_index +from ..operate.base import REPO_TYPES +from ..utils import get_requests_session, get_file_type, FileItemType +from ..utils.path import RepoTypeTyping, hf_normpath + +_FT_NAME_MAP = { + FileItemType.IMAGE: 'image', + FileItemType.ARCHIVE: 'archive/compressed', + FileItemType.MODEL: 'model', + FileItemType.DATA: 'data', + FileItemType.FILE: 'other', +} + + +def _add_ils_subcommand(cli: click.Group) -> click.Group: + @cli.command('ils', help='List files from HuggingFace repository\'s index tar file.\n\n' + 'Set environment $HF_TOKEN to use your own access token.', + context_settings=CONTEXT_SETTINGS) + @click.option('-r', '--repository', 'repo_id', type=str, required=True, + help='Repository to download from.') + @click.option('--idx_repository', 'idx_repo_id', type=str, default=None, + help='Index repository to download from.', show_default=True) + @click.option('-t', '--type', 'repo_type', type=click.Choice(REPO_TYPES), default='dataset', + help='Type of the HuggingFace repository.', show_default=True) + @click.option('-R', '--revision', 'revision', type=str, default='main', + help='Revision of repository.', show_default=True) + @click.option('-a', '--archive_file', 'archive_file', type=str, required=True, + help='Archive file in repository.', show_default=True) + @click.option('-i', '--idx_file', 'idx_file', type=str, default=None, + help='', show_default=True) + @click.option('-l', '--list', 'show_detailed', is_flag=True, type=bool, default=False, + help='Show detailed file information.', show_default=True) + @click.option('-s', '--sort_by', 'sort_by', type=click.Choice(['offset', 'name', 'size']), default='offset', + help='Sort order of files.', show_default=True) + @click.option('-o', '--order_by', 'order_by', type=click.Choice(['asc', 'desc']), default='asc', + help='Order of the mentioned sorting.', show_default=True) + @click.option('-I', '--information', 'show_information', type=bool, is_flag=True, default=False, + help='Show information of index file.', show_default=True) + def ls(repo_id: str, idx_repo_id: Optional[str], repo_type: RepoTypeTyping, revision: str, + show_detailed: bool, show_information: bool, + sort_by: Literal['offset', 'name', 'size'], order_by: Literal['asc', 'desc'], + archive_file: str, idx_file: Optional[str] = None): + configure_http_backend(get_requests_session) + + idx_info = hf_tar_get_index( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + archive_in_repo=archive_file, + + idx_repo_id=idx_repo_id or repo_id, + idx_repo_type=repo_type, + idx_revision=revision, + idx_file_in_repo=idx_file, + ) + if show_information: + print('Repo ID: ' + click.style(repo_id, underline=True)) + if idx_repo_id: + print('Index Repo ID: ' + click.style(idx_repo_id, underline=True)) + print('Repo Type: ' + click.style(repo_type, underline=True)) + print('Revision: ' + click.style(revision, underline=True)) + print('Archive File: ' + click.style(archive_file, underline=True)) + if idx_file: + print('Index File: ' + click.style(idx_file, underline=True)) + print() + + print('File Size: ' + click.style(size_to_bytes_str(idx_info['filesize'], precision=3)) + + ' (' + click.style(plural_word(idx_info['filesize'], "Byte"), underline=True) + ')') + print('Native Hash: ' + click.style(idx_info['hash'], underline=True)) + print('LFS Hash: ' + click.style(idx_info['hash_lfs'], underline=True)) + print('Files: ' + click.style(plural_word(len(idx_info['files']), 'file'), underline=True)) + if idx_info['files']: + d_files = {} + for file in idx_info['files'].keys(): + type_ = get_file_type(file) + d_files[type_] = d_files.get(type_, 0) + 1 + for type_, type_name in _FT_NAME_MAP.items(): + if d_files.get(type_, 0) > 0: + print(f' {titleize(type_name)} Files: ' + + click.style(plural_word(d_files[type_], "file"), underline=True)) + pass + + d_exts = {} + for file in idx_info['files'].keys(): + _, ext = os.path.splitext(file) + d_exts[ext] = d_exts.get(ext, 0) + 1 + print('File Extensions:') + for ext, count in sorted(d_exts.items(), key=lambda x: (-x[1], x[0])): + print(f' {ext or ""} : ' + click.style(plural_word(count, "file"), underline=True)) + + # Convert to numpy array for easier calculations + file_sizes = [file_info['size'] for file, file_info in idx_info['files'].items()] + sizes = np.array(file_sizes) + + # Basic statistics + total_files = len(sizes) + total_size = np.sum(sizes) + mean_size = np.mean(sizes) + median_size = np.median(sizes) + min_size = np.min(sizes) + max_size = np.max(sizes) + + # Quartiles + q1, q3 = np.percentile(sizes, [25, 75]) + iqr = q3 - q1 + std_dev = np.std(sizes) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + print(f"Total Size: {size_to_bytes_str(total_size.item(), precision=3)}") + print(f" Average File Size: {size_to_bytes_str(mean_size.item(), precision=3)}") + print(f" Median File Size: {size_to_bytes_str(median_size.item(), precision=3)}") + print(f" Smallest File Size: {size_to_bytes_str(min_size.item(), precision=3)}") + print(f" Largest File Size: {size_to_bytes_str(max_size.item(), precision=3)}") + print(f" Standard Deviation: {size_to_bytes_str(std_dev.item(), precision=3)}") + print("Quartiles:") + print(f" Q1 (25th Percentile): {size_to_bytes_str(q1.item(), precision=3)}") + print(f" Q2 (50th Percentile, Median): {size_to_bytes_str(median_size.item(), precision=3)}") + print(f" Q3 (75th Percentile): {size_to_bytes_str(q3.item(), precision=3)}") + print(f" Interquartile Range (IQR): {size_to_bytes_str(iqr.item(), precision=3)}") + + else: + rows = [] + for file, file_info in sorted(idx_info['files'].items(), key=lambda x: (x[1]['offset'], x[0])): + rows.append({ + 'file': hf_normpath(file), + 'offset': file_info['offset'], + 'size': file_info['size'], + + 't_file': str(file), + 't_offset': str(file_info['offset']), + 't_size': plural_word(file_info['size'], "Byte"), + 't_size_text': size_to_bytes_str(file_info['size'], precision=3), + 't_sha256': file_info['sha256'], + }) + df = pd.DataFrame(rows) + if sort_by == 'offset': + df = df.sort_values(by=['offset', 'file'], ascending=order_by == 'asc') + elif sort_by == 'name': + df = df.sort_values(by=['file', 'offset'], ascending=order_by == 'asc') + elif sort_by == 'size': + df = df.sort_values(by=['size', 'offset', 'file'], ascending=order_by == 'asc') + else: + raise ValueError(f'Unknown sort_by {sort_by!r}.') # pragma: no cover + + if len(df): + if show_detailed: + max_t_file_len = df['t_file'].map(len).max().item() + max_t_offset_len = df['t_offset'].map(len).max().item() + max_t_size_len = df['t_size'].map(len).max().item() + max_t_size_text_len = df['t_size_text'].map(len).max().item() + max_t_sha256_len = df['t_sha256'].map(len).max().item() + + for row in df.to_dict('records'): + print(' ' * (max_t_offset_len - len(row['t_offset'])) + row['t_offset'], end='') + print(' | ', end='') + + fc = get_file_type(row['t_file']) + print(' ' * (max_t_file_len - len(row['t_file'])) + + click.style(row['t_file'], fg=fc.render_color), end=' ') + + print(' ' * (max_t_size_text_len - len(row['t_size_text'])) + + click.style(row['t_size_text'], underline=True), end=' ') + print(' ' * (max_t_sha256_len - len(row['t_sha256'])) + + click.style(row['t_sha256'])) + + else: + for file in df['t_file']: + fc = get_file_type(file) + print(click.style(file, fg=fc.render_color)) + + return cli From cc2cfc628df80c078d7c1609cc6c3ccca1a99043 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Fri, 23 Aug 2024 14:44:15 +0800 Subject: [PATCH 2/7] dev(narugo): remove numpy usage --- hfutils/entry/ils.py | 45 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/hfutils/entry/ils.py b/hfutils/entry/ils.py index 80a614d1cd..538884f1dc 100644 --- a/hfutils/entry/ils.py +++ b/hfutils/entry/ils.py @@ -1,9 +1,9 @@ import os.path +import statistics import warnings from typing import Optional, Literal import click -import numpy as np import pandas as pd from hbutils.scale import size_to_bytes_str from hbutils.string import plural_word, titleize @@ -102,34 +102,35 @@ def ls(repo_id: str, idx_repo_id: Optional[str], repo_type: RepoTypeTyping, revi # Convert to numpy array for easier calculations file_sizes = [file_info['size'] for file, file_info in idx_info['files'].items()] - sizes = np.array(file_sizes) # Basic statistics - total_files = len(sizes) - total_size = np.sum(sizes) - mean_size = np.mean(sizes) - median_size = np.median(sizes) - min_size = np.min(sizes) - max_size = np.max(sizes) + total_size = sum(file_sizes) + mean_size = statistics.mean(file_sizes) + median_size = statistics.median(file_sizes) + min_size = min(file_sizes) + max_size = max(file_sizes) # Quartiles - q1, q3 = np.percentile(sizes, [25, 75]) + sorted_sizes = sorted(file_sizes) + n = len(sorted_sizes) + q1 = sorted_sizes[n // 4] + q3 = sorted_sizes[(3 * n) // 4] iqr = q3 - q1 - std_dev = np.std(sizes) + std_dev = statistics.stdev(file_sizes) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) - print(f"Total Size: {size_to_bytes_str(total_size.item(), precision=3)}") - print(f" Average File Size: {size_to_bytes_str(mean_size.item(), precision=3)}") - print(f" Median File Size: {size_to_bytes_str(median_size.item(), precision=3)}") - print(f" Smallest File Size: {size_to_bytes_str(min_size.item(), precision=3)}") - print(f" Largest File Size: {size_to_bytes_str(max_size.item(), precision=3)}") - print(f" Standard Deviation: {size_to_bytes_str(std_dev.item(), precision=3)}") + print(f"Total Size: {size_to_bytes_str(total_size, precision=3)}") + print(f" Average File Size: {size_to_bytes_str(mean_size, precision=3)}") + print(f" Median File Size: {size_to_bytes_str(median_size, precision=3)}") + print(f" Smallest File Size: {size_to_bytes_str(min_size, precision=3)}") + print(f" Largest File Size: {size_to_bytes_str(max_size, precision=3)}") + print(f" Standard Deviation: {size_to_bytes_str(std_dev, precision=3)}") print("Quartiles:") - print(f" Q1 (25th Percentile): {size_to_bytes_str(q1.item(), precision=3)}") - print(f" Q2 (50th Percentile, Median): {size_to_bytes_str(median_size.item(), precision=3)}") - print(f" Q3 (75th Percentile): {size_to_bytes_str(q3.item(), precision=3)}") - print(f" Interquartile Range (IQR): {size_to_bytes_str(iqr.item(), precision=3)}") + print(f" Q1 (25th Percentile): {size_to_bytes_str(q1, precision=3)}") + print(f" Q2 (50th Percentile, Median): {size_to_bytes_str(median_size, precision=3)}") + print(f" Q3 (75th Percentile): {size_to_bytes_str(q3, precision=3)}") + print(f" Interquartile Range (IQR): {size_to_bytes_str(iqr, precision=3)}") else: rows = [] @@ -159,13 +160,11 @@ def ls(repo_id: str, idx_repo_id: Optional[str], repo_type: RepoTypeTyping, revi if show_detailed: max_t_file_len = df['t_file'].map(len).max().item() max_t_offset_len = df['t_offset'].map(len).max().item() - max_t_size_len = df['t_size'].map(len).max().item() max_t_size_text_len = df['t_size_text'].map(len).max().item() max_t_sha256_len = df['t_sha256'].map(len).max().item() for row in df.to_dict('records'): - print(' ' * (max_t_offset_len - len(row['t_offset'])) + row['t_offset'], end='') - print(' | ', end='') + print(' ' * (max_t_offset_len - len(row['t_offset'])) + row['t_offset'], end=' | ') fc = get_file_type(row['t_file']) print(' ' * (max_t_file_len - len(row['t_file'])) From f3b541e570031a58c3d8b2484eb0c2dbfea952e3 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Sat, 24 Aug 2024 00:46:15 +0800 Subject: [PATCH 3/7] dev(narugo): remove pandas --- hfutils/entry/ils.py | 81 ++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/hfutils/entry/ils.py b/hfutils/entry/ils.py index 538884f1dc..e36211d6b2 100644 --- a/hfutils/entry/ils.py +++ b/hfutils/entry/ils.py @@ -4,13 +4,12 @@ from typing import Optional, Literal import click -import pandas as pd from hbutils.scale import size_to_bytes_str from hbutils.string import plural_word, titleize from huggingface_hub import configure_http_backend from .base import CONTEXT_SETTINGS -from ..index import hf_tar_get_index +from ..index import hf_tar_get_index, hf_tar_validate from ..operate.base import REPO_TYPES from ..utils import get_requests_session, get_file_type, FileItemType from ..utils.path import RepoTypeTyping, hf_normpath @@ -66,21 +65,21 @@ def ls(repo_id: str, idx_repo_id: Optional[str], repo_type: RepoTypeTyping, revi idx_file_in_repo=idx_file, ) if show_information: - print('Repo ID: ' + click.style(repo_id, underline=True)) + print('Repo ID: ' + click.style(repo_id, underline=True, fg='blue')) if idx_repo_id: - print('Index Repo ID: ' + click.style(idx_repo_id, underline=True)) - print('Repo Type: ' + click.style(repo_type, underline=True)) - print('Revision: ' + click.style(revision, underline=True)) - print('Archive File: ' + click.style(archive_file, underline=True)) + print('Index Repo ID: ' + click.style(idx_repo_id, underline=True, fg='blue')) + print('Repo Type: ' + click.style(repo_type, underline=True, fg='blue')) + print('Revision: ' + click.style(revision, underline=True, fg='blue')) + print('Archive File: ' + click.style(archive_file, underline=True, fg='blue')) if idx_file: - print('Index File: ' + click.style(idx_file, underline=True)) + print('Index File: ' + click.style(idx_file, underline=True, fg='blue')) print() - print('File Size: ' + click.style(size_to_bytes_str(idx_info['filesize'], precision=3)) + print('File Size: ' + click.style(size_to_bytes_str(idx_info['filesize'], precision=3), fg='blue') + ' (' + click.style(plural_word(idx_info['filesize'], "Byte"), underline=True) + ')') print('Native Hash: ' + click.style(idx_info['hash'], underline=True)) print('LFS Hash: ' + click.style(idx_info['hash_lfs'], underline=True)) - print('Files: ' + click.style(plural_word(len(idx_info['files']), 'file'), underline=True)) + print('Files: ' + click.style(plural_word(len(idx_info['files']), 'file'), underline=True, fg='blue')) if idx_info['files']: d_files = {} for file in idx_info['files'].keys(): @@ -120,17 +119,43 @@ def ls(repo_id: str, idx_repo_id: Optional[str], repo_type: RepoTypeTyping, revi with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) - print(f"Total Size: {size_to_bytes_str(total_size, precision=3)}") - print(f" Average File Size: {size_to_bytes_str(mean_size, precision=3)}") - print(f" Median File Size: {size_to_bytes_str(median_size, precision=3)}") - print(f" Smallest File Size: {size_to_bytes_str(min_size, precision=3)}") - print(f" Largest File Size: {size_to_bytes_str(max_size, precision=3)}") - print(f" Standard Deviation: {size_to_bytes_str(std_dev, precision=3)}") + print(f"Total Size: " + + click.style(size_to_bytes_str(total_size, precision=3), underline=True, fg='blue')) + print(f" Average File Size: " + + click.style(size_to_bytes_str(mean_size, precision=3), underline=True, fg='blue')) + print(f" Median File Size: " + + click.style(size_to_bytes_str(median_size, precision=3), underline=True, fg='blue')) + print(f" Smallest File Size: " + + click.style(size_to_bytes_str(min_size, precision=3), underline=True)) + print(f" Largest File Size: " + + click.style(size_to_bytes_str(max_size, precision=3), underline=True)) + print(f" Standard Deviation: " + + click.style(size_to_bytes_str(std_dev, precision=3), underline=True)) print("Quartiles:") print(f" Q1 (25th Percentile): {size_to_bytes_str(q1, precision=3)}") print(f" Q2 (50th Percentile, Median): {size_to_bytes_str(median_size, precision=3)}") print(f" Q3 (75th Percentile): {size_to_bytes_str(q3, precision=3)}") print(f" Interquartile Range (IQR): {size_to_bytes_str(iqr, precision=3)}") + print() + + is_ready = hf_tar_validate( + repo_id=repo_id, + repo_type=repo_type, + revision=revision, + archive_in_repo=archive_file, + + idx_repo_id=idx_repo_id or repo_id, + idx_repo_type=repo_type, + idx_revision=revision, + idx_file_in_repo=idx_file, + ) + + print('Status: ' + ( + click.style('Up-To-Date', fg='green', underline=True) if is_ready else + click.style('Outdated', fg='yellow', underline=True) + )) + if not is_ready: + print('Index file is recommended to get refreshed.') else: rows = [] @@ -146,37 +171,35 @@ def ls(repo_id: str, idx_repo_id: Optional[str], repo_type: RepoTypeTyping, revi 't_size_text': size_to_bytes_str(file_info['size'], precision=3), 't_sha256': file_info['sha256'], }) - df = pd.DataFrame(rows) if sort_by == 'offset': - df = df.sort_values(by=['offset', 'file'], ascending=order_by == 'asc') + rows = sorted(rows, key=lambda x: (x['offset'], x['file']), reverse=(order_by != 'asc')) elif sort_by == 'name': - df = df.sort_values(by=['file', 'offset'], ascending=order_by == 'asc') + rows = sorted(rows, key=lambda x: (x['file'], x['offset']), reverse=(order_by != 'asc')) elif sort_by == 'size': - df = df.sort_values(by=['size', 'offset', 'file'], ascending=order_by == 'asc') + rows = sorted(rows, key=lambda x: (x['size'], x['offset'], x['file']), reverse=(order_by != 'asc')) else: raise ValueError(f'Unknown sort_by {sort_by!r}.') # pragma: no cover - if len(df): + if len(rows): if show_detailed: - max_t_file_len = df['t_file'].map(len).max().item() - max_t_offset_len = df['t_offset'].map(len).max().item() - max_t_size_text_len = df['t_size_text'].map(len).max().item() - max_t_sha256_len = df['t_sha256'].map(len).max().item() + max_t_file_len = max(len(row['t_file']) for row in rows) + max_t_offset_len = max(len(row['t_offset']) for row in rows) + max_t_size_text_len = max(len(row['t_size_text']) for row in rows) + max_t_sha256_len = max(len(row['t_sha256']) for row in rows) - for row in df.to_dict('records'): + for row in rows: print(' ' * (max_t_offset_len - len(row['t_offset'])) + row['t_offset'], end=' | ') - fc = get_file_type(row['t_file']) print(' ' * (max_t_file_len - len(row['t_file'])) + click.style(row['t_file'], fg=fc.render_color), end=' ') - print(' ' * (max_t_size_text_len - len(row['t_size_text'])) + click.style(row['t_size_text'], underline=True), end=' ') print(' ' * (max_t_sha256_len - len(row['t_sha256'])) + click.style(row['t_sha256'])) else: - for file in df['t_file']: + for row in rows: + file = row['file'] fc = get_file_type(file) print(click.style(file, fg=fc.render_color)) From b703a4186ba7d68db737088abfd141670169e8ed Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Sun, 25 Aug 2024 15:22:22 +0800 Subject: [PATCH 4/7] dev(narugo): add ils test --- test/entry/test_ils.py | 211 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 test/entry/test_ils.py diff --git a/test/entry/test_ils.py b/test/entry/test_ils.py new file mode 100644 index 0000000000..940edcb54c --- /dev/null +++ b/test/entry/test_ils.py @@ -0,0 +1,211 @@ +import click +import pytest +from hbutils.testing import simulate_entry + +from hfutils.entry import hfutilscli + + +@pytest.mark.unittest +class TestEntryIls: + def test_simple_ils_basic(self): + result = simulate_entry(hfutilscli, [ + 'hfutils', 'ils', + '-r', 'narugo/test_cos5t_tars', + '-a', 'mashu_skins.tar', + ]) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + '常夏的泳装Ver_02.png', + '愚人节.png', + 'Grail_League_1星.png', + '常夏的泳装.png', + 'Grail_League_3星.png', + '第4阶段.png', + 'Grail_League_5星.png', + '.meta.json', + '第3阶段.png', + 'Grail_League_4星.png', + '奥特瑙斯_改建型.png', + '愚人节_奥特瑙斯.png', + 'Grail_League_2星.png', + 'Bright_Voyager.png', + '第1阶段.png', + '第2阶段.png', + '奥特瑙斯.png' + ] + + def test_ils_desc(self): + result = simulate_entry(hfutilscli, [ + 'hfutils', 'ils', + '-r', 'narugo/test_cos5t_tars', + '-a', 'mashu_skins.tar', + '-o', 'desc', + ]) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + '奥特瑙斯.png', + '第2阶段.png', + '第1阶段.png', + 'Bright_Voyager.png', + 'Grail_League_2星.png', + '愚人节_奥特瑙斯.png', + '奥特瑙斯_改建型.png', + 'Grail_League_4星.png', + '第3阶段.png', + '.meta.json', + 'Grail_League_5星.png', + '第4阶段.png', + 'Grail_League_3星.png', + '常夏的泳装.png', + 'Grail_League_1星.png', + '愚人节.png', + '常夏的泳装Ver_02.png' + ] + + def test_ils_default(self): + result = simulate_entry(hfutilscli, ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "常夏的泳装Ver_02.png", + "愚人节.png", + "Grail_League_1星.png", + "常夏的泳装.png", + "Grail_League_3星.png", + "第4阶段.png", + "Grail_League_5星.png", + ".meta.json", + "第3阶段.png", + "Grail_League_4星.png", + "奥特瑙斯_改建型.png", + "愚人节_奥特瑙斯.png", + "Grail_League_2星.png", + "Bright_Voyager.png", + "第1阶段.png", + "第2阶段.png", + "奥特瑙斯.png" + ] + + def test_ils_desc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-o', + 'desc']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "奥特瑙斯.png", + "第2阶段.png", + "第1阶段.png", + "Bright_Voyager.png", + "Grail_League_2星.png", + "愚人节_奥特瑙斯.png", + "奥特瑙斯_改建型.png", + "Grail_League_4星.png", + "第3阶段.png", + ".meta.json", + "Grail_League_5星.png", + "第4阶段.png", + "Grail_League_3星.png", + "常夏的泳装.png", + "Grail_League_1星.png", + "愚人节.png", + "常夏的泳装Ver_02.png" + ] + + def test_ils_name_asc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-s', + 'name']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + ".meta.json", + "Bright_Voyager.png", + "Grail_League_1星.png", + "Grail_League_2星.png", + "Grail_League_3星.png", + "Grail_League_4星.png", + "Grail_League_5星.png", + "奥特瑙斯.png", + "奥特瑙斯_改建型.png", + "常夏的泳装.png", + "常夏的泳装Ver_02.png", + "愚人节.png", + "愚人节_奥特瑙斯.png", + "第1阶段.png", + "第2阶段.png", + "第3阶段.png", + "第4阶段.png" + ] + + def test_ils_name_desc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-s', + 'name', '-o', 'desc']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "第4阶段.png", + "第3阶段.png", + "第2阶段.png", + "第1阶段.png", + "愚人节_奥特瑙斯.png", + "愚人节.png", + "常夏的泳装Ver_02.png", + "常夏的泳装.png", + "奥特瑙斯_改建型.png", + "奥特瑙斯.png", + "Grail_League_5星.png", + "Grail_League_4星.png", + "Grail_League_3星.png", + "Grail_League_2星.png", + "Grail_League_1星.png", + "Bright_Voyager.png", + ".meta.json" + ] + + def test_ils_size_asc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-s', + 'size']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + ".meta.json", + "愚人节.png", + "第4阶段.png", + "常夏的泳装Ver_02.png", + "第1阶段.png", + "奥特瑙斯.png", + "第2阶段.png", + "第3阶段.png", + "愚人节_奥特瑙斯.png", + "奥特瑙斯_改建型.png", + "Grail_League_1星.png", + "Grail_League_2星.png", + "Bright_Voyager.png", + "常夏的泳装.png", + "Grail_League_3星.png", + "Grail_League_4星.png", + "Grail_League_5星.png" + ] + + def test_ils_size_desc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-s', + 'size', '-o', 'desc']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "Grail_League_5星.png", + "Grail_League_4星.png", + "Grail_League_3星.png", + "常夏的泳装.png", + "Bright_Voyager.png", + "Grail_League_2星.png", + "Grail_League_1星.png", + "奥特瑙斯_改建型.png", + "愚人节_奥特瑙斯.png", + "第3阶段.png", + "第2阶段.png", + "奥特瑙斯.png", + "第1阶段.png", + "常夏的泳装Ver_02.png", + "第4阶段.png", + "愚人节.png", + ".meta.json" + ] From 130f748f38b2b1a4145c72bc15a0a644d8c7964d Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Sun, 25 Aug 2024 15:28:54 +0800 Subject: [PATCH 5/7] dev(narugo): add more test cases for detailed ils --- test/entry/test_ils.py | 149 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/test/entry/test_ils.py b/test/entry/test_ils.py index 940edcb54c..b48f61ee0c 100644 --- a/test/entry/test_ils.py +++ b/test/entry/test_ils.py @@ -209,3 +209,152 @@ def test_ils_size_desc(self): "愚人节.png", ".meta.json" ] + + def test_ils_detailed_default(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-l']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + " 1536 | 常夏的泳装Ver_02.png 217.118 KiB a5e55da02440901b249f215135fb6dc2745ed7872b310989ac2426408cd2b88d", + " 225792 | 愚人节.png 152.971 KiB 4e1539e93a82eace5f40293fb64befb85bed7b90174f54bec7e9bbbc98ce55dc", + " 384000 | Grail_League_1星.png 306.765 KiB 8ff32612cd2668ef0ec448a20dff7153a1a023607a91b9981ef713d587fbff4d", + " 699904 | 常夏的泳装.png 427.923 KiB e5934bbd6291dedf5fee9a954de537668d5c1080fb8300760f42e539b0c9f8a7", + "1139712 | Grail_League_3星.png 456.854 KiB c29456d26a1c064cd46a69e8f954f21f1bc6f25dbcca1ec1fad71957a7bb7236", + "1609216 | 第4阶段.png 214.529 KiB ce5f13bd4ed4ac9e5d3a9883e9b8c68dc7cdc109ec94b5c58816fac8bf4c3ad3", + "1830912 | Grail_League_5星.png 871.836 KiB a7491adfd729f0cff742ca46571a6093fed91f46a6f28051a18424e3d991daf1", + "2725376 | .meta.json 8.758 KiB 4585b01c251a496b73cb231d29fc711cfb1d682a84334d95f6f5b6c1cc5b5222", + "2736128 | 第3阶段.png 244.402 KiB 6157f95816f92e1815f9156b74efd876d4ebb026d1573da09d9e311de0bbd435", + "2988032 | Grail_League_4星.png 689.062 KiB c04f1c4e4eead7cb6da1c99fabf39d41890885071e2e9927546ba098d83116e0", + "3695616 | 奥特瑙斯_改建型.png 250.557 KiB 9ae16e275e4597f6c8e6f09ce0af3e7aa9837821ab2f08483fe8dce8317d8b05", + "3954176 | 愚人节_奥特瑙斯.png 249.293 KiB 991497fa586f6f4529827e0f8f1f228c20ec9fb507c314ee9d20d47c46f26e89", + "4211200 | Grail_League_2星.png 329.081 KiB b9ea4cd8340ab0abb926b6e666b3e61d73c44cd1dea2106468d364728704f38e", + "4550144 | Bright_Voyager.png 383.661 KiB bf8db943c474cd786b26eb1ec01341270aa5c6f49c9d922a76c153cfef00c9c8", + "4944896 | 第1阶段.png 227.161 KiB 5b31578f2cc0abf20f25ff35f974d86b67a802f7b931fb74e1e55d723ffe0cfe", + "5179392 | 第2阶段.png 240.042 KiB 3e22f16436fcfc37cd2c117d8878e592884e8b8f2e2b82c3cfa20c2c37bf7db2", + "5427200 | 奥特瑙斯.png 235.501 KiB 559f05829d7454054c0ee15baefed8dc48827a2411b2b4d15f1f287b48f62db2" + ] + + def test_ils_detailed_desc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-l', '-o', + 'desc']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "5427200 | 奥特瑙斯.png 235.501 KiB 559f05829d7454054c0ee15baefed8dc48827a2411b2b4d15f1f287b48f62db2", + "5179392 | 第2阶段.png 240.042 KiB 3e22f16436fcfc37cd2c117d8878e592884e8b8f2e2b82c3cfa20c2c37bf7db2", + "4944896 | 第1阶段.png 227.161 KiB 5b31578f2cc0abf20f25ff35f974d86b67a802f7b931fb74e1e55d723ffe0cfe", + "4550144 | Bright_Voyager.png 383.661 KiB bf8db943c474cd786b26eb1ec01341270aa5c6f49c9d922a76c153cfef00c9c8", + "4211200 | Grail_League_2星.png 329.081 KiB b9ea4cd8340ab0abb926b6e666b3e61d73c44cd1dea2106468d364728704f38e", + "3954176 | 愚人节_奥特瑙斯.png 249.293 KiB 991497fa586f6f4529827e0f8f1f228c20ec9fb507c314ee9d20d47c46f26e89", + "3695616 | 奥特瑙斯_改建型.png 250.557 KiB 9ae16e275e4597f6c8e6f09ce0af3e7aa9837821ab2f08483fe8dce8317d8b05", + "2988032 | Grail_League_4星.png 689.062 KiB c04f1c4e4eead7cb6da1c99fabf39d41890885071e2e9927546ba098d83116e0", + "2736128 | 第3阶段.png 244.402 KiB 6157f95816f92e1815f9156b74efd876d4ebb026d1573da09d9e311de0bbd435", + "2725376 | .meta.json 8.758 KiB 4585b01c251a496b73cb231d29fc711cfb1d682a84334d95f6f5b6c1cc5b5222", + "1830912 | Grail_League_5星.png 871.836 KiB a7491adfd729f0cff742ca46571a6093fed91f46a6f28051a18424e3d991daf1", + "1609216 | 第4阶段.png 214.529 KiB ce5f13bd4ed4ac9e5d3a9883e9b8c68dc7cdc109ec94b5c58816fac8bf4c3ad3", + "1139712 | Grail_League_3星.png 456.854 KiB c29456d26a1c064cd46a69e8f954f21f1bc6f25dbcca1ec1fad71957a7bb7236", + " 699904 | 常夏的泳装.png 427.923 KiB e5934bbd6291dedf5fee9a954de537668d5c1080fb8300760f42e539b0c9f8a7", + " 384000 | Grail_League_1星.png 306.765 KiB 8ff32612cd2668ef0ec448a20dff7153a1a023607a91b9981ef713d587fbff4d", + " 225792 | 愚人节.png 152.971 KiB 4e1539e93a82eace5f40293fb64befb85bed7b90174f54bec7e9bbbc98ce55dc", + " 1536 | 常夏的泳装Ver_02.png 217.118 KiB a5e55da02440901b249f215135fb6dc2745ed7872b310989ac2426408cd2b88d" + ] + + def test_ils_detailed_name_asc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-l', '-s', + 'name']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "2725376 | .meta.json 8.758 KiB 4585b01c251a496b73cb231d29fc711cfb1d682a84334d95f6f5b6c1cc5b5222", + "4550144 | Bright_Voyager.png 383.661 KiB bf8db943c474cd786b26eb1ec01341270aa5c6f49c9d922a76c153cfef00c9c8", + " 384000 | Grail_League_1星.png 306.765 KiB 8ff32612cd2668ef0ec448a20dff7153a1a023607a91b9981ef713d587fbff4d", + "4211200 | Grail_League_2星.png 329.081 KiB b9ea4cd8340ab0abb926b6e666b3e61d73c44cd1dea2106468d364728704f38e", + "1139712 | Grail_League_3星.png 456.854 KiB c29456d26a1c064cd46a69e8f954f21f1bc6f25dbcca1ec1fad71957a7bb7236", + "2988032 | Grail_League_4星.png 689.062 KiB c04f1c4e4eead7cb6da1c99fabf39d41890885071e2e9927546ba098d83116e0", + "1830912 | Grail_League_5星.png 871.836 KiB a7491adfd729f0cff742ca46571a6093fed91f46a6f28051a18424e3d991daf1", + "5427200 | 奥特瑙斯.png 235.501 KiB 559f05829d7454054c0ee15baefed8dc48827a2411b2b4d15f1f287b48f62db2", + "3695616 | 奥特瑙斯_改建型.png 250.557 KiB 9ae16e275e4597f6c8e6f09ce0af3e7aa9837821ab2f08483fe8dce8317d8b05", + " 699904 | 常夏的泳装.png 427.923 KiB e5934bbd6291dedf5fee9a954de537668d5c1080fb8300760f42e539b0c9f8a7", + " 1536 | 常夏的泳装Ver_02.png 217.118 KiB a5e55da02440901b249f215135fb6dc2745ed7872b310989ac2426408cd2b88d", + " 225792 | 愚人节.png 152.971 KiB 4e1539e93a82eace5f40293fb64befb85bed7b90174f54bec7e9bbbc98ce55dc", + "3954176 | 愚人节_奥特瑙斯.png 249.293 KiB 991497fa586f6f4529827e0f8f1f228c20ec9fb507c314ee9d20d47c46f26e89", + "4944896 | 第1阶段.png 227.161 KiB 5b31578f2cc0abf20f25ff35f974d86b67a802f7b931fb74e1e55d723ffe0cfe", + "5179392 | 第2阶段.png 240.042 KiB 3e22f16436fcfc37cd2c117d8878e592884e8b8f2e2b82c3cfa20c2c37bf7db2", + "2736128 | 第3阶段.png 244.402 KiB 6157f95816f92e1815f9156b74efd876d4ebb026d1573da09d9e311de0bbd435", + "1609216 | 第4阶段.png 214.529 KiB ce5f13bd4ed4ac9e5d3a9883e9b8c68dc7cdc109ec94b5c58816fac8bf4c3ad3" + ] + + def test_ils_detailed_name_desc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-l', '-s', + 'name', '-o', 'desc']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "1609216 | 第4阶段.png 214.529 KiB ce5f13bd4ed4ac9e5d3a9883e9b8c68dc7cdc109ec94b5c58816fac8bf4c3ad3", + "2736128 | 第3阶段.png 244.402 KiB 6157f95816f92e1815f9156b74efd876d4ebb026d1573da09d9e311de0bbd435", + "5179392 | 第2阶段.png 240.042 KiB 3e22f16436fcfc37cd2c117d8878e592884e8b8f2e2b82c3cfa20c2c37bf7db2", + "4944896 | 第1阶段.png 227.161 KiB 5b31578f2cc0abf20f25ff35f974d86b67a802f7b931fb74e1e55d723ffe0cfe", + "3954176 | 愚人节_奥特瑙斯.png 249.293 KiB 991497fa586f6f4529827e0f8f1f228c20ec9fb507c314ee9d20d47c46f26e89", + " 225792 | 愚人节.png 152.971 KiB 4e1539e93a82eace5f40293fb64befb85bed7b90174f54bec7e9bbbc98ce55dc", + " 1536 | 常夏的泳装Ver_02.png 217.118 KiB a5e55da02440901b249f215135fb6dc2745ed7872b310989ac2426408cd2b88d", + " 699904 | 常夏的泳装.png 427.923 KiB e5934bbd6291dedf5fee9a954de537668d5c1080fb8300760f42e539b0c9f8a7", + "3695616 | 奥特瑙斯_改建型.png 250.557 KiB 9ae16e275e4597f6c8e6f09ce0af3e7aa9837821ab2f08483fe8dce8317d8b05", + "5427200 | 奥特瑙斯.png 235.501 KiB 559f05829d7454054c0ee15baefed8dc48827a2411b2b4d15f1f287b48f62db2", + "1830912 | Grail_League_5星.png 871.836 KiB a7491adfd729f0cff742ca46571a6093fed91f46a6f28051a18424e3d991daf1", + "2988032 | Grail_League_4星.png 689.062 KiB c04f1c4e4eead7cb6da1c99fabf39d41890885071e2e9927546ba098d83116e0", + "1139712 | Grail_League_3星.png 456.854 KiB c29456d26a1c064cd46a69e8f954f21f1bc6f25dbcca1ec1fad71957a7bb7236", + "4211200 | Grail_League_2星.png 329.081 KiB b9ea4cd8340ab0abb926b6e666b3e61d73c44cd1dea2106468d364728704f38e", + " 384000 | Grail_League_1星.png 306.765 KiB 8ff32612cd2668ef0ec448a20dff7153a1a023607a91b9981ef713d587fbff4d", + "4550144 | Bright_Voyager.png 383.661 KiB bf8db943c474cd786b26eb1ec01341270aa5c6f49c9d922a76c153cfef00c9c8", + "2725376 | .meta.json 8.758 KiB 4585b01c251a496b73cb231d29fc711cfb1d682a84334d95f6f5b6c1cc5b5222" + ] + + def test_ils_detailed_size_asc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-l', '-s', + 'size']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "2725376 | .meta.json 8.758 KiB 4585b01c251a496b73cb231d29fc711cfb1d682a84334d95f6f5b6c1cc5b5222", + " 225792 | 愚人节.png 152.971 KiB 4e1539e93a82eace5f40293fb64befb85bed7b90174f54bec7e9bbbc98ce55dc", + "1609216 | 第4阶段.png 214.529 KiB ce5f13bd4ed4ac9e5d3a9883e9b8c68dc7cdc109ec94b5c58816fac8bf4c3ad3", + " 1536 | 常夏的泳装Ver_02.png 217.118 KiB a5e55da02440901b249f215135fb6dc2745ed7872b310989ac2426408cd2b88d", + "4944896 | 第1阶段.png 227.161 KiB 5b31578f2cc0abf20f25ff35f974d86b67a802f7b931fb74e1e55d723ffe0cfe", + "5427200 | 奥特瑙斯.png 235.501 KiB 559f05829d7454054c0ee15baefed8dc48827a2411b2b4d15f1f287b48f62db2", + "5179392 | 第2阶段.png 240.042 KiB 3e22f16436fcfc37cd2c117d8878e592884e8b8f2e2b82c3cfa20c2c37bf7db2", + "2736128 | 第3阶段.png 244.402 KiB 6157f95816f92e1815f9156b74efd876d4ebb026d1573da09d9e311de0bbd435", + "3954176 | 愚人节_奥特瑙斯.png 249.293 KiB 991497fa586f6f4529827e0f8f1f228c20ec9fb507c314ee9d20d47c46f26e89", + "3695616 | 奥特瑙斯_改建型.png 250.557 KiB 9ae16e275e4597f6c8e6f09ce0af3e7aa9837821ab2f08483fe8dce8317d8b05", + " 384000 | Grail_League_1星.png 306.765 KiB 8ff32612cd2668ef0ec448a20dff7153a1a023607a91b9981ef713d587fbff4d", + "4211200 | Grail_League_2星.png 329.081 KiB b9ea4cd8340ab0abb926b6e666b3e61d73c44cd1dea2106468d364728704f38e", + "4550144 | Bright_Voyager.png 383.661 KiB bf8db943c474cd786b26eb1ec01341270aa5c6f49c9d922a76c153cfef00c9c8", + " 699904 | 常夏的泳装.png 427.923 KiB e5934bbd6291dedf5fee9a954de537668d5c1080fb8300760f42e539b0c9f8a7", + "1139712 | Grail_League_3星.png 456.854 KiB c29456d26a1c064cd46a69e8f954f21f1bc6f25dbcca1ec1fad71957a7bb7236", + "2988032 | Grail_League_4星.png 689.062 KiB c04f1c4e4eead7cb6da1c99fabf39d41890885071e2e9927546ba098d83116e0", + "1830912 | Grail_League_5星.png 871.836 KiB a7491adfd729f0cff742ca46571a6093fed91f46a6f28051a18424e3d991daf1" + ] + + def test_ils_detailed_size_desc(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-l', '-s', + 'size', '-o', 'desc']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "1830912 | Grail_League_5星.png 871.836 KiB a7491adfd729f0cff742ca46571a6093fed91f46a6f28051a18424e3d991daf1", + "2988032 | Grail_League_4星.png 689.062 KiB c04f1c4e4eead7cb6da1c99fabf39d41890885071e2e9927546ba098d83116e0", + "1139712 | Grail_League_3星.png 456.854 KiB c29456d26a1c064cd46a69e8f954f21f1bc6f25dbcca1ec1fad71957a7bb7236", + " 699904 | 常夏的泳装.png 427.923 KiB e5934bbd6291dedf5fee9a954de537668d5c1080fb8300760f42e539b0c9f8a7", + "4550144 | Bright_Voyager.png 383.661 KiB bf8db943c474cd786b26eb1ec01341270aa5c6f49c9d922a76c153cfef00c9c8", + "4211200 | Grail_League_2星.png 329.081 KiB b9ea4cd8340ab0abb926b6e666b3e61d73c44cd1dea2106468d364728704f38e", + " 384000 | Grail_League_1星.png 306.765 KiB 8ff32612cd2668ef0ec448a20dff7153a1a023607a91b9981ef713d587fbff4d", + "3695616 | 奥特瑙斯_改建型.png 250.557 KiB 9ae16e275e4597f6c8e6f09ce0af3e7aa9837821ab2f08483fe8dce8317d8b05", + "3954176 | 愚人节_奥特瑙斯.png 249.293 KiB 991497fa586f6f4529827e0f8f1f228c20ec9fb507c314ee9d20d47c46f26e89", + "2736128 | 第3阶段.png 244.402 KiB 6157f95816f92e1815f9156b74efd876d4ebb026d1573da09d9e311de0bbd435", + "5179392 | 第2阶段.png 240.042 KiB 3e22f16436fcfc37cd2c117d8878e592884e8b8f2e2b82c3cfa20c2c37bf7db2", + "5427200 | 奥特瑙斯.png 235.501 KiB 559f05829d7454054c0ee15baefed8dc48827a2411b2b4d15f1f287b48f62db2", + "4944896 | 第1阶段.png 227.161 KiB 5b31578f2cc0abf20f25ff35f974d86b67a802f7b931fb74e1e55d723ffe0cfe", + " 1536 | 常夏的泳装Ver_02.png 217.118 KiB a5e55da02440901b249f215135fb6dc2745ed7872b310989ac2426408cd2b88d", + "1609216 | 第4阶段.png 214.529 KiB ce5f13bd4ed4ac9e5d3a9883e9b8c68dc7cdc109ec94b5c58816fac8bf4c3ad3", + " 225792 | 愚人节.png 152.971 KiB 4e1539e93a82eace5f40293fb64befb85bed7b90174f54bec7e9bbbc98ce55dc", + "2725376 | .meta.json 8.758 KiB 4585b01c251a496b73cb231d29fc711cfb1d682a84334d95f6f5b6c1cc5b5222" + ] From cc88e3196b63191161862731b29549b5256faa70 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Sun, 25 Aug 2024 15:37:38 +0800 Subject: [PATCH 6/7] dev(narugo): add 2 more unit test cases for -I option --- test/entry/test_ils.py | 114 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/test/entry/test_ils.py b/test/entry/test_ils.py index b48f61ee0c..bbbb392df3 100644 --- a/test/entry/test_ils.py +++ b/test/entry/test_ils.py @@ -358,3 +358,117 @@ def test_ils_detailed_size_desc(self): " 225792 | 愚人节.png 152.971 KiB 4e1539e93a82eace5f40293fb64befb85bed7b90174f54bec7e9bbbc98ce55dc", "2725376 | .meta.json 8.758 KiB 4585b01c251a496b73cb231d29fc711cfb1d682a84334d95f6f5b6c1cc5b5222" ] + + def test_ils_information(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-I']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "Repo ID: narugo/test_cos5t_tars", + "Repo Type: dataset", + "Revision: main", + "Archive File: mashu_skins.tar", + "", + "File Size: 5.410 MiB (5672960 Bytes)", + "Native Hash: a55942a154b97580d4ffdae1219c95120ea94294", + "LFS Hash: 6e7d17af49c7502e8045575f214cd9229a5b0b842d68591a7a5d9aee1448f478", + "Files: 17 files", + " Image Files: 16 files", + " Data Files: 1 file", + "File Extensions:", + " .png : 16 files", + " .json : 1 file", + "Total Size: 5.376 MiB", + " Average File Size: 323.854 KiB", + " Median File Size: 249.293 KiB", + " Smallest File Size: 8.758 KiB", + " Largest File Size: 871.836 KiB", + " Standard Deviation: 202.998 KiB", + "Quartiles:", + " Q1 (25th Percentile): 227.161 KiB", + " Q2 (50th Percentile, Median): 249.293 KiB", + " Q3 (75th Percentile): 383.661 KiB", + " Interquartile Range (IQR): 156.500 KiB", + "", + "Status: Up-To-Date" + ] + + def test_ils_information_not_match(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'narugo/test_cos5t_tars', '-a', 'mashu_skins.tar', '-I', '-i', + 'ex3.json']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "Repo ID: narugo/test_cos5t_tars", + "Repo Type: dataset", + "Revision: main", + "Archive File: mashu_skins.tar", + "Index File: ex3.json", + "", + "File Size: 24.482 MiB (25671680 Bytes)", + "Native Hash: f7f756e3f5e744cc94197cc669c409fd8990deba", + "LFS Hash: abb016a5115ec2f18771b42128f50f2c96caace5cf9e84d1c342a9ab8da93e0e", + "Files: 295 files", + " Image Files: 295 files", + "File Extensions:", + " .jpg : 295 files", + "Total Size: 23.974 MiB", + " Average File Size: 83.218 KiB", + " Median File Size: 79.722 KiB", + " Smallest File Size: 32.238 KiB", + " Largest File Size: 197.258 KiB", + " Standard Deviation: 25.068 KiB", + "Quartiles:", + " Q1 (25th Percentile): 64.862 KiB", + " Q2 (50th Percentile, Median): 79.722 KiB", + " Q3 (75th Percentile): 98.960 KiB", + " Interquartile Range (IQR): 34.098 KiB", + "", + "Status: Outdated", + "Index file is recommended to get refreshed." + ] + + def test_ils_information_sep(self): + result = simulate_entry(hfutilscli, + ['hfutils', 'ils', '-r', 'nyanko7/danbooru2023', '-a', 'original/data-0000.tar', '-I', + '--idx_repository', 'deepghs/danbooru2023_index', '-i', 'original/data-0000.json']) + assert result.exitcode == 0 + assert click.unstyle(result.stdout).splitlines(keepends=False) == [ + "Repo ID: nyanko7/danbooru2023", + "Index Repo ID: deepghs/danbooru2023_index", + "Repo Type: dataset", + "Revision: main", + "Archive File: original/data-0000.tar", + "Index File: original/data-0000.json", + "", + "File Size: 7.592 GiB (8151715840 Bytes)", + "Native Hash: 334e3b11928649d8d2b8fb79d6b281dd72d704d1", + "LFS Hash: f2b1d0650c36af4d20d933b4068d2a7b88f79e4d39bb8776d423fbb40f9e055e", + "Files: 6766 files", + " Image Files: 6729 files", + " Archive/Compressed Files: 9 files", + " Other Files: 28 files", + "File Extensions:", + " .jpg : 5040 files", + " .png : 1657 files", + " .gif : 28 files", + " .mp4 : 26 files", + " .zip : 9 files", + " .webp : 3 files", + " . : 1 file", + " .jpeg : 1 file", + " .swf : 1 file", + "Total Size: 7.587 GiB", + " Average File Size: 1.148 MiB", + " Median File Size: 504.214 KiB", + " Smallest File Size: 0.000 Bit", + " Largest File Size: 41.124 MiB", + " Standard Deviation: 2.147 MiB", + "Quartiles:", + " Q1 (25th Percentile): 229.165 KiB", + " Q2 (50th Percentile, Median): 504.214 KiB", + " Q3 (75th Percentile): 1.111 MiB", + " Interquartile Range (IQR): 908.403 KiB", + "", + "Status: Up-To-Date" + ] From 6482caceb5aaac9ab8725e5ca336fb6791f5eb64 Mon Sep 17 00:00:00 2001 From: narugo1992 Date: Sun, 25 Aug 2024 15:42:56 +0800 Subject: [PATCH 7/7] dev(narugo): add documentation for the new code --- docs/source/api_doc/entry/ils.rst | 10 ++++ docs/source/api_doc/entry/index.rst | 1 + hfutils/entry/ils.py | 82 +++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 docs/source/api_doc/entry/ils.rst diff --git a/docs/source/api_doc/entry/ils.rst b/docs/source/api_doc/entry/ils.rst new file mode 100644 index 0000000000..2c2b3ab5eb --- /dev/null +++ b/docs/source/api_doc/entry/ils.rst @@ -0,0 +1,10 @@ +hfutils.entry.ils +================================ + +.. currentmodule:: hfutils.entry.ils + +.. automodule:: hfutils.entry.ils + + + + diff --git a/docs/source/api_doc/entry/index.rst b/docs/source/api_doc/entry/index.rst index 00d33070f7..008a97a648 100644 --- a/docs/source/api_doc/entry/index.rst +++ b/docs/source/api_doc/entry/index.rst @@ -14,6 +14,7 @@ hfutils.entry clone dispatch download + ils index_ ls ls_repo diff --git a/hfutils/entry/ils.py b/hfutils/entry/ils.py index e36211d6b2..38d8b6fb06 100644 --- a/hfutils/entry/ils.py +++ b/hfutils/entry/ils.py @@ -1,3 +1,23 @@ +""" +This module provides functionality for listing files from a HuggingFace repository's index tar file. + +It includes a command-line interface (CLI) for interacting with HuggingFace repositories, +specifically for listing and displaying information about files within a repository's +index tar file. The module offers various options for sorting, filtering, and displaying +detailed information about the files and the repository itself. + +Key features: + +1. List files from a HuggingFace repository's index tar file +2. Display detailed file information +3. Show repository and index file statistics +4. Sort files by different criteria (offset, name, size) +5. Validate the index file's status (up-to-date or outdated) + +This module is part of a larger system for interacting with HuggingFace repositories +and provides a user-friendly interface for exploring the contents of index tar files. +""" + import os.path import statistics import warnings @@ -24,6 +44,29 @@ def _add_ils_subcommand(cli: click.Group) -> click.Group: + """ + Add the 'ils' subcommand to the given click Group. + + This function defines and adds the 'ils' (Index List) subcommand to the provided + click Group. The 'ils' command allows users to list files from a HuggingFace + repository's index tar file and display various information about the repository + and its contents. + + :param cli: The click Group to which the 'ils' subcommand will be added. + :type cli: click.Group + + :return: The modified click Group with the 'ils' subcommand added. + :rtype: click.Group + + Usage: + This function is typically called when setting up the CLI for the application. + It adds the 'ils' command with various options for customizing the output. + + Example: + cli = click.Group() + cli = _add_ils_subcommand(cli) + """ + @cli.command('ils', help='List files from HuggingFace repository\'s index tar file.\n\n' 'Set environment $HF_TOKEN to use your own access token.', context_settings=CONTEXT_SETTINGS) @@ -51,6 +94,45 @@ def ls(repo_id: str, idx_repo_id: Optional[str], repo_type: RepoTypeTyping, revi show_detailed: bool, show_information: bool, sort_by: Literal['offset', 'name', 'size'], order_by: Literal['asc', 'desc'], archive_file: str, idx_file: Optional[str] = None): + """ + List files from a HuggingFace repository's index tar file. + + This function retrieves and displays information about files in a HuggingFace + repository's index tar file. It can show detailed file information, repository + statistics, and allows for sorting and filtering of the file list. + + :param repo_id: The ID of the HuggingFace repository. + :type repo_id: str + :param idx_repo_id: The ID of the index repository (if different from repo_id). + :type idx_repo_id: Optional[str] + :param repo_type: The type of the HuggingFace repository (e.g., 'dataset', 'model'). + :type repo_type: RepoTypeTyping + :param revision: The revision of the repository to use. + :type revision: str + :param show_detailed: Flag to show detailed file information. + :type show_detailed: bool + :param show_information: Flag to show general information about the index file. + :type show_information: bool + :param sort_by: Criterion to sort the files by ('offset', 'name', or 'size'). + :type sort_by: Literal['offset', 'name', 'size'] + :param order_by: Order of sorting ('asc' or 'desc'). + :type order_by: Literal['asc', 'desc'] + :param archive_file: The name of the archive file in the repository. + :type archive_file: str + :param idx_file: The name of the index file (if different from default). + :type idx_file: Optional[str] + + :return: None + + This function performs the following steps: + + 1. Configures the HTTP backend for HuggingFace Hub. + 2. Retrieves the index information for the specified repository and archive. + 3. If show_information is True, displays general statistics about the repository and files. + 4. If not showing information, lists the files according to the specified sorting and filtering options. + + The function uses click styles to format the output for better readability in the terminal. + """ configure_http_backend(get_requests_session) idx_info = hf_tar_get_index(