diff --git a/.github/workflows/mdformat.yaml b/.github/workflows/mdformat.yaml index bad370e4..1638b76d 100644 --- a/.github/workflows/mdformat.yaml +++ b/.github/workflows/mdformat.yaml @@ -27,7 +27,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install mdformat>=0.7.0 mdformat-myst>=0.1.5 mdformat-tables>=0.4.0 + pip install mdformat>=0.7.0 mdformat-myst>=0.1.5 - name: Run mdformat (check only) run: | diff --git a/README.md b/README.md index 4d2a9d6b..8a36ddc7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8056010.svg)](https://doi.org/10.5281/zenodo.8056010) [![Maintainability](https://qlty.sh/gh/hed-standard/projects/hed-python/maintainability.svg)](https://qlty.sh/gh/hed-standard/projects/hed-python) [![Code Coverage](https://qlty.sh/gh/hed-standard/projects/hed-python/coverage.svg)](https://qlty.sh/gh/hed-standard/projects/hed-python) ![Python3](https://img.shields.io/badge/python-%3E=3.10-yellow.svg) ![PyPI - Status](https://img.shields.io/pypi/v/hedtools) [![Documentation](https://img.shields.io/badge/docs-hed--python-blue.svg)](https://www.hedtags.org/hed-python) +![PyPI - Status](https://img.shields.io/pypi/v/hedtools) ![Python3](https://img.shields.io/badge/python-%3E=3.10-yellow.svg) [![Maintainability](https://qlty.sh/gh/hed-standard/projects/hed-python/maintainability.svg)](https://qlty.sh/gh/hed-standard/projects/hed-python) [![Code Coverage](https://qlty.sh/gh/hed-standard/projects/hed-python/coverage.svg)](https://qlty.sh/gh/hed-standard/projects/hed-python) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8056010.svg)](https://doi.org/10.5281/zenodo.8056010) [![Docs](https://img.shields.io/badge/docs-hed--python-blue.svg)](https://www.hedtags.org/hed-python) # HEDTools - Python diff --git a/docs/user_guide.md b/docs/user_guide.md index 32df3837..1d18228e 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -279,16 +279,15 @@ HEDTools provides a unified command-line interface (CLI) using a **git-style com ### Available commands -| Command | Description | -| ------------------------------ | ----------------------------------------------------------- | -| **Annotation management** | | -| `hedpy validate-bids` | Validate HED annotations in BIDS datasets | -| `hedpy extract-sidecar` | Extract JSON sidecar template from tabular (`.tsv`) files | -| **Schema management** | | -| `hedpy schema validate` | Validate HED schema files | -| `hedpy schema convert` | Convert schemas between formats (XML, MEDIAWIKI, TSV, JSON) | -| `hedpy schema add-ids` | Add unique HED IDs to schema terms | -| `hedpy schema create-ontology` | Generate OWL ontology files from HED schemas | +| Command | Description | +| ----------------------------- | ----------------------------------------------------------- | +| **Annotation management** | | +| `hedpy validate bids-dataset` | Validate HED annotations in BIDS datasets | +| `hedpy extract bids-sidecar` | Extract JSON sidecar template from tabular (`.tsv`) files | +| **Schema management** | | +| `hedpy schema validate` | Validate HED schema files | +| `hedpy schema convert` | Convert schemas between formats (XML, MEDIAWIKI, TSV, JSON) | +| `hedpy schema add-ids` | Add unique HED IDs to schema terms | ### Installation and basic usage @@ -305,7 +304,7 @@ Get help on available commands: hedpy --help # Help for a specific command -hedpy validate-bids --help +hedpy validate bids-dataset --help # Help for command groups hedpy schema --help @@ -321,35 +320,35 @@ ______________________________________________________________________ ### BIDS validation -Validate HED annotations in BIDS datasets using `hedpy validate-bids`. +Validate HED annotations in BIDS datasets using `hedpy validate bids-dataset`. #### Basic validation ```bash # Validate a BIDS dataset -hedpy validate-bids /path/to/bids/dataset +hedpy validate bids-dataset /path/to/bids/dataset # Include warnings in addition to errors -hedpy validate-bids /path/to/bids/dataset -w +hedpy validate bids-dataset /path/to/bids/dataset -w # Enable verbose output -hedpy validate-bids /path/to/bids/dataset -v +hedpy validate bids-dataset /path/to/bids/dataset -v ``` #### Output options ```bash # Save results to a file -hedpy validate-bids /path/to/bids/dataset -o validation_results.txt +hedpy validate bids-dataset /path/to/bids/dataset -o validation_results.txt # Output in compact JSON format (array of issues only) -hedpy validate-bids /path/to/bids/dataset -f json -o results.json +hedpy validate bids-dataset /path/to/bids/dataset -f json -o results.json # Pretty-printed JSON with version metadata (recommended for saving) -hedpy validate-bids /path/to/bids/dataset -f json_pp -o results.json +hedpy validate bids-dataset /path/to/bids/dataset -f json_pp -o results.json # Print to stdout AND save to file -hedpy validate-bids /path/to/bids/dataset -o results.txt -p +hedpy validate bids-dataset /path/to/bids/dataset -o results.txt -p ``` **Output format differences:** @@ -362,43 +361,43 @@ hedpy validate-bids /path/to/bids/dataset -o results.txt -p ```bash # Validate specific file types (default: events, participants) -hedpy validate-bids /path/to/bids/dataset -s events -s participants -s sessions +hedpy validate bids-dataset /path/to/bids/dataset -s events -s participants -s sessions # Exclude certain directories (default: sourcedata, derivatives, code, stimuli) -hedpy validate-bids /path/to/bids/dataset -x derivatives -x sourcedata -x mydata +hedpy validate bids-dataset /path/to/bids/dataset -x derivatives -x sourcedata -x mydata # Limit number of errors reported per error type -hedpy validate-bids /path/to/bids/dataset -ec 5 +hedpy validate bids-dataset /path/to/bids/dataset -el 5 # Apply error limit per file instead of overall -hedpy validate-bids /path/to/bids/dataset -ec 5 -ef +hedpy validate bids-dataset /path/to/bids/dataset -el 5 -ef ``` #### Logging options ```bash # Set log level -hedpy validate-bids /path/to/bids/dataset -l DEBUG +hedpy validate bids-dataset /path/to/bids/dataset -l DEBUG # Save logs to file -hedpy validate-bids /path/to/bids/dataset -lf validation.log +hedpy validate bids-dataset /path/to/bids/dataset -lf validation.log # Save logs to file without stderr output -hedpy validate-bids /path/to/bids/dataset -lf validation.log -lq +hedpy validate bids-dataset /path/to/bids/dataset -lf validation.log -lq ``` #### Complete example ```bash # Comprehensive validation with all options -hedpy validate-bids /path/to/bids/dataset \ +hedpy validate bids-dataset /path/to/bids/dataset \ -w \ -v \ -f json_pp \ -o validation_results.json \ -s events \ -x derivatives \ - -ec 10 \ + -el 10 \ -lf validation.log ``` @@ -406,31 +405,31 @@ ______________________________________________________________________ ### Sidecar template extraction -Extract a JSON sidecar template from BIDS event files using `hedpy extract-sidecar`. +Extract a JSON sidecar template from BIDS event files using `hedpy extract bids-sidecar`. #### Basic extraction ```bash # Extract template for events files -hedpy extract-sidecar /path/to/bids/dataset -s events +hedpy extract bids-sidecar /path/to/bids/dataset -s events # Save to specific file -hedpy extract-sidecar /path/to/bids/dataset -s events -o task_events.json +hedpy extract bids-sidecar /path/to/bids/dataset -s events -o task_events.json ``` #### Column handling ```bash # Specify value columns (use single annotation for column with # placeholder) -hedpy extract-sidecar /path/to/bids/dataset -s events \ +hedpy extract bids-sidecar /path/to/bids/dataset -s events \ -vc response_time -vc accuracy -vc subject_id # Skip specific columns (default: onset, duration, sample) -hedpy extract-sidecar /path/to/bids/dataset -s events \ +hedpy extract bids-sidecar /path/to/bids/dataset -s events \ -sc onset -sc duration -sc trial_type # Exclude certain directories -hedpy extract-sidecar /path/to/bids/dataset -s events \ +hedpy extract bids-sidecar /path/to/bids/dataset -s events \ -x derivatives -x pilot_data ``` @@ -438,7 +437,7 @@ hedpy extract-sidecar /path/to/bids/dataset -s events \ ```bash # Extract events template with custom column handling -hedpy extract-sidecar /path/to/bids/dataset \ +hedpy extract bids-sidecar /path/to/bids/dataset \ -s events \ -vc response_time \ -vc reaction_time \ @@ -517,23 +516,6 @@ hedpy schema add-ids /path/to/hed-schemas score 2.2.0 - Modifies all schema formats (XML, MEDIAWIKI, TSV, JSON) in-place - Should be run after all schema content changes are finalized -#### Create ontology - -Generate OWL (Web Ontology Language) ontology files from HED schemas for semantic web applications and ontology-based tools. - -```bash -# Create ontology for a standard schema -hedpy schema create-ontology /path/to/hed-schemas standard 8.4.0 - -# Create ontology for a library schema with custom output location -hedpy schema create-ontology /path/to/hed-schemas score 2.1.0 \ - --dest /path/to/output - -# Create ontology with all outputs in specific directory -hedpy schema create-ontology /path/to/hed-schemas lang 1.1.0 \ - --dest ./ontologies -``` - **Best practices:** 1. Validate schema thoroughly before adding IDs @@ -575,7 +557,7 @@ Each command provides help at several levels hedpy --help # Command-specific help -hedpy validate-bids --help +hedpy validate bids-dataset --help hedpy schema validate --help # Command group help @@ -586,16 +568,16 @@ hedpy schema --help ```bash # Step 1: Extract sidecar template -hedpy extract-sidecar /path/to/dataset -s events -o events.json +hedpy extract bids-sidecar /path/to/dataset -s events -o events.json # Step 2: Edit events.json to add HED tags # (manual editing step) # Step 3: Validate with warnings -hedpy validate-bids /path/to/dataset -w -v -o validation.txt +hedpy validate bids-dataset /path/to/dataset -w -v -o validation.txt # Step 4: Fix issues and re-validate -hedpy validate-bids /path/to/dataset -w +hedpy validate bids-dataset /path/to/dataset -w ``` #### Workflow 3: Schema development and testing @@ -614,11 +596,8 @@ hedpy schema validate my_schema.xml --add-all-extensions #### Workflow 4: Preparing for schema release ```bash -# Step 1: Add HED IDs +# Add HED IDs hedpy schema add-ids /path/to/hed-schemas my_library 1.0.0 - -# Step 2: Generate ontology -hedpy schema create-ontology /path/to/hed-schemas my_library 1.0.0 ``` ______________________________________________________________________ diff --git a/hed/cli/cli.py b/hed/cli/cli.py index 876544b1..8d413d3b 100644 --- a/hed/cli/cli.py +++ b/hed/cli/cli.py @@ -1,14 +1,23 @@ #!/usr/bin/env python3 """ -HED Command Line Interface +HED Command Line Interface. A unified command-line interface for HED (Hierarchical Event Descriptors) tools. Provides a git-like interface with subcommands for validation and schema management. """ import click +from click_option_group import optgroup from hed import _version as vr +# Consistent metavar definitions used across all commands +METAVAR_PATH = "PATH" +METAVAR_FILE = "FILE" +METAVAR_NAME = "NAME" +METAVAR_STRING = "STRING" +METAVAR_PREFIX = "PREFIX" +METAVAR_N = "N" + @click.group(context_settings={"help_option_names": ["-h", "--help"]}) @click.version_option(version=str(vr.get_versions()["version"]), prog_name="hedpy") @@ -18,6 +27,8 @@ def cli(): This tool provides various commands for working with HED annotations, including validation and schema management. + Use 'hedpy --help' for a list of commands. + Use 'hedpy COMMAND --help' for more information on a specific command. """ pass @@ -25,39 +36,155 @@ def cli(): @cli.group() def schema(): - """HED schema management and validation tools. + """HED schema management and validation tools Commands for validating, updating, and managing HED schemas. """ pass +@cli.group() +def validate(): + """HED validation tools. + + Commands for validating HED annotations in datasets, files, and strings. + """ + pass + + # Import and register subcommands -@cli.command(name="validate-bids") +@validate.command( + name="bids-dataset", + epilog=""" +This command validates HED annotations in BIDS-formatted datasets, checking for +compliance with HED schema rules and proper annotation structure. It processes +TSV files with their associated JSON sidecars, following BIDS inheritance rules. + +\b +Examples: + # Basic validation with minimal output + hedpy validate bids-dataset /path/to/dataset + + # Validation with progress messages + hedpy validate bids-dataset /path/to/dataset --verbose + + # Validate specific file types + hedpy validate bids-dataset /path/to/dataset -s events + + # Validate multiple file types + hedpy validate bids-dataset /path/to/dataset -s events -s participants + + # Check for warnings in addition to errors + hedpy validate bids-dataset /path/to/dataset --check-for-warnings + + # Save validation results to JSON file + hedpy validate bids-dataset /path/to/dataset -f json -o results.json + + # Detailed debugging with file logging + hedpy validate bids-dataset /path/to/dataset -l DEBUG --log-file validation.log --log-quiet + + # Limit error reporting for large datasets + hedpy validate bids-dataset /path/to/dataset --error-limit 10 +""", +) @click.argument("data_path", type=click.Path(exists=True)) -@click.option("-ec", "--error-count", "error_limit", type=int, default=None, help="Limit errors of each code type to report.") -@click.option("-ef", "--errors-by-file", is_flag=True, help="Apply error limit by file rather than overall.") -@click.option("-f", "--format", type=click.Choice(["text", "json", "json_pp"]), default="text", help="Output format.") -@click.option( - "-l", - "--log-level", - type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), - default="WARNING", - help="Log level.", -) -@click.option("-lf", "--log-file", "log_file", type=click.Path(), default=None, help="File path for saving log output.") -@click.option("-lq", "--log-quiet", is_flag=True, help="Suppress log output to stderr (only if --log-file is used).") -@click.option("-o", "--output-file", "output_file", type=click.Path(), default="", help="Output file for validation results.") -@click.option("-p", "--print-output", is_flag=True, help="Output results to stdout in addition to file.") -@click.option("-s", "--suffixes", multiple=True, default=["events", "participants"], help="Suffixes of tsv files to validate.") -@click.option("-v", "--verbose", is_flag=True, help="Output informational messages (equivalent to --log-level INFO).") -@click.option("-w", "--check-for-warnings", is_flag=True, help="Check for warnings as well as errors.") -@click.option( +# File selection options +@optgroup.group("File selection options") +@optgroup.option( + "-s", + "--suffixes", + multiple=True, + default=["events", "participants"], + show_default="events participants", + metavar=METAVAR_NAME, + help="Suffix(es) for base filename(s) to match (e.g., '-s events' matches files ending with 'events.tsv'); repeat to specify multiple suffixes (e.g., '-s events -s participants')", +) +@optgroup.option( "-x", "--exclude-dirs", multiple=True, default=["sourcedata", "derivatives", "code", "stimuli"], - help="Directory names to exclude from search.", + show_default="sourcedata derivatives code stimuli", + metavar=METAVAR_NAME, + help="Directory names (relative to root) to exclude (e.g.,'-x sourcedata -x derivatives' excludes data_root/sourcedata and data_root/derivatives)", +) +# Validation options +@optgroup.group("Validation options") +@optgroup.option( + "-w", + "--check-for-warnings", + is_flag=True, + help="Check for warnings as well as errors", +) +@optgroup.option( + "-el", + "--error-limit", + type=int, + default=None, + metavar=METAVAR_N, + help="Limit number of each error code to report", +) +@optgroup.option( + "-ef", + "--errors-by-file", + is_flag=True, + help="Apply error limit by file rather than overall", +) +# Output options +@optgroup.group("Output options") +@optgroup.option( + "-f", + "--format", + type=click.Choice(["text", "json", "json_pp"]), + default="text", + show_default="text", + help="Output format (e.g., '-f json_pp' outputs errors in pretty-printed JSON)", +) +@optgroup.option( + "-o", + "--output-file", + "output_file", + type=click.Path(), + default="", + metavar=METAVAR_FILE, + help="Output file for validation results; if neither this nor --print-output is specified, results are not printed", +) +@optgroup.option( + "-p", + "--print-output", + is_flag=True, + help="Print validation results to stdout; if --output-file is also specified, output to both", +) +# Logging options +@optgroup.group("Logging options") +@optgroup.option( + "-l", + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), + default="WARNING", + show_default="WARNING", + help="Log level gives the level of detail in the logging output (e.g., '-l INFO' outputs basic informational messages)", +) +@optgroup.option( + "-v", + "--verbose", + is_flag=True, + help="Output informational messages (equivalent to --log-level INFO)", +) +@optgroup.option( + "-lf", + "--log-file", + "log_file", + type=click.Path(), + default=None, + metavar=METAVAR_FILE, + help="File path for saving log output; logs still go to stderr unless --log-quiet is also used", +) +@optgroup.option( + "-lq", + "--log-quiet", + is_flag=True, + help="Suppress log output to stderr; only applicable when --log-file is used (logs go only to file)", ) def validate_bids_cmd( data_path, @@ -83,7 +210,7 @@ def validate_bids_cmd( # Build argument list for the original script args = [data_path] if error_limit is not None: - args.extend(["-ec", str(error_limit)]) + args.extend(["-el", str(error_limit)]) if errors_by_file: args.append("-ef") if format: @@ -114,12 +241,31 @@ def validate_bids_cmd( @schema.command(name="validate") @click.argument("schema_path", type=click.Path(exists=True), nargs=-1, required=True) -@click.option("--add-all-extensions", is_flag=True, help="Always verify all versions of the same schema are equal.") -@click.option("-v", "--verbose", is_flag=True, help="Enable verbose output.") +@click.option("--add-all-extensions", is_flag=True, help="Always verify all versions of the same schema are equal") +@click.option("-v", "--verbose", is_flag=True, help="Enable verbose output") def schema_validate_cmd(schema_path, add_all_extensions, verbose): """Validate HED schema files. - SCHEMA_PATH: Path(s) to schema file(s) to validate. + This command validates HED schema files for correctness, checking structure, + syntax, and compliance with HED schema specification requirements. + + \b + Examples: + # Validate a single schema file + hedpy schema validate /path/to/HED8.3.0.xml + + # Validate multiple schema files + hedpy schema validate schema1.xml schema2.mediawiki + + # Validate with verbose output + hedpy schema validate /path/to/schema.xml --verbose + + # Verify all versions of the same schema are equal + hedpy schema validate schema.xml schema.tsv --add-all-extensions + + \b + Arguments: + SCHEMA_PATH: Path(s) to schema file(s) to validate. """ from hed.scripts.validate_schemas import main as validate_schemas_main @@ -134,11 +280,27 @@ def schema_validate_cmd(schema_path, add_all_extensions, verbose): @schema.command(name="convert") @click.argument("schema_path", type=click.Path(exists=True), nargs=-1, required=True) -@click.option("--set-ids", is_flag=True, help="Set/update HED IDs in the schema.") +@click.option("--set-ids", is_flag=True, help="Set/update HED IDs in the schema") def schema_convert_cmd(schema_path, set_ids): """Convert HED schema between formats (TSV, XML, MEDIAWIKI, JSON). - SCHEMA_PATH: Path(s) to schema file(s) to convert. + This command converts HED schema files between different formats while + maintaining semantic equivalence. Optionally updates HED IDs during conversion. + + \b + Examples: + # Convert schema (format auto-detected) + hedpy schema convert /path/to/schema.xml + + # Convert and assign/update HED IDs + hedpy schema convert /path/to/schema.xml --set-ids + + # Convert multiple schemas + hedpy schema convert schema1.xml schema2.mediawiki + + \b + Arguments: + SCHEMA_PATH: Path(s) to schema file(s) to convert """ from hed.scripts.hed_convert_schema import main as convert_main @@ -156,9 +318,22 @@ def schema_convert_cmd(schema_path, set_ids): def schema_add_ids_cmd(repo_path, schema_name, schema_version): """Add HED IDs to a schema. - REPO_PATH: Path to hed-schemas repository. - SCHEMA_NAME: Schema name (e.g., 'standard'). - SCHEMA_VERSION: Schema version to process. + This command adds unique HED IDs to schema elements that don't have them, + typically used during schema development and maintenance. + + \b + Examples: + # Add IDs to standard schema version 8.3.0 + hedpy schema add-ids /path/to/hed-schemas standard 8.3.0 + + # Add IDs to a library schema + hedpy schema add-ids /path/to/hed-schemas SCORE 1.0.0 + + \b + Arguments: + REPO_PATH: Path to hed-schemas repository + SCHEMA_NAME: Schema name (e.g., 'standard', 'SCORE') + SCHEMA_VERSION: Schema version to process (e.g., '8.3.0') """ from hed.scripts.add_hed_ids import main as add_ids_main @@ -167,51 +342,119 @@ def schema_add_ids_cmd(repo_path, schema_name, schema_version): add_ids_main(args) -@schema.command(name="create-ontology") -@click.argument("repo_path", type=click.Path(exists=True)) -@click.argument("schema_name") -@click.argument("schema_version") -@click.option("--dest", type=click.Path(), help="Output directory for ontology files.") -def schema_create_ontology_cmd(repo_path, schema_name, schema_version, dest): - """Create an ontology from a HED schema. +@cli.group() +def extract(): + """HED extraction and analysis tools. - REPO_PATH: Path to hed-schemas repository. - SCHEMA_NAME: Schema name (e.g., 'standard'). - SCHEMA_VERSION: Schema version. + Commands for extracting summaries and templates from tabular data. """ - from hed.scripts.create_ontology import main as create_ontology_main + pass - args = [repo_path, schema_name, schema_version] - if dest: - args.extend(["--dest", dest]) - create_ontology_main(args) +@extract.command( + name="bids-sidecar", + epilog=""" +This command extracts a JSON sidecar template from BIDS datasets by analyzing +TSV files and identifying unique values in categorical columns. The template +can be used as a starting point for adding HED annotations to the dataset. + +\b +Examples: + # Extract from event files (default suffix) + hedpy extract bids-sidecar /path/to/dataset + # Extract with verbose progress output + hedpy extract bids-sidecar /path/to/dataset --verbose -@cli.command(name="extract-sidecar") + # Extract from participant files instead of events + hedpy extract bids-sidecar /path/to/dataset -s participants + + # Save output to a file instead of stdout + hedpy extract bids-sidecar /path/to/dataset -o template.json + + # Exclude specific columns from the template + hedpy extract bids-sidecar /path/to/dataset -sc onset -sc duration -sc response_time + + # Save logs to file and suppress console output + hedpy extract bids-sidecar /path/to/dataset --log-file extraction.log --log-quiet +""", +) @click.argument("data_path", type=click.Path(exists=True)) -@click.option("-s", "--suffix", required=True, help="File suffix to process (e.g., 'events').") -@click.option("-vc", "--value-columns", multiple=True, help="Column names to treat as value columns.") -@click.option( - "-sc", - "--skip-columns", - multiple=True, - default=["onset", "duration", "sample"], - help="Column names to skip.", -) -@click.option("-l", "--log-level", type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), default="WARNING") -@click.option("-lf", "--log-file", type=click.Path(), help="Log file path.") -@click.option("-lq", "--log-quiet", is_flag=True, help="Suppress stderr output.") -@click.option("-o", "--output-file", type=click.Path(), help="Output file for sidecar template.") -@click.option("-v", "--verbose", is_flag=True, help="Enable verbose output.") -@click.option( +# File selection options +@optgroup.group("File selection options") +@optgroup.option( + "-s", + "--suffix", + default="events", + show_default="events", + metavar=METAVAR_NAME, + help="Suffix for base filename(s) (e.g., '-s participants' to match files ending with participants.tsv", +) +@optgroup.option( "-x", "--exclude-dirs", multiple=True, default=["sourcedata", "derivatives", "code", "stimuli"], - help="Directories to exclude.", + show_default="sourcedata derivatives code stimuli", + metavar=METAVAR_NAME, + help="Directory names (relative to root) to exclude (e.g., -x sourcedata -x derivatives)", +) +# Column processing options +@optgroup.group("Column processing options") +@optgroup.option( + "-vc", + "--value-columns", + multiple=True, + metavar=METAVAR_NAME, + help="Column names to treat as value columns (e.g., -vc response_time -vc accuracy)", +) +@optgroup.option( + "-sc", + "--skip-columns", + multiple=True, + default=["onset", "duration", "sample"], + show_default="onset duration sample", + metavar=METAVAR_NAME, + help="Column names to skip (e.g., -sc onset -sc duration)", +) +# Output options +@optgroup.group("Output options") +@optgroup.option( + "-o", + "--output-file", + type=click.Path(), + metavar=METAVAR_FILE, + help="Output file for sidecar template; if not specified, output written to stdout", +) +# Logging options +@optgroup.group("Logging options") +@optgroup.option( + "-l", + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), + default="WARNING", + show_default="WARNING", +) +@optgroup.option( + "-v", + "--verbose", + is_flag=True, + help="Enable verbose output", ) -def extract_sidecar_cmd( +@optgroup.option( + "-lf", + "--log-file", + type=click.Path(), + metavar=METAVAR_FILE, + help="File path for saving log output; logs still go to stderr unless --log-quiet is also used", +) +@optgroup.option( + "-lq", + "--log-quiet", + is_flag=True, + help="Suppress log output to stderr; only applicable when --log-file is used (logs go only to file)", +) +def extract_bids_sidecar_cmd( data_path, suffix, value_columns, skip_columns, log_level, log_file, log_quiet, output_file, verbose, exclude_dirs ): """Extract a sidecar template from a BIDS dataset. @@ -244,6 +487,197 @@ def extract_sidecar_cmd( extract_main(args) +@extract.command( + name="tabular-summary", + epilog=""" +This command processes TSV (tab-separated values) files and generates summary +statistics about the columns and their values. Unlike extract bids-sidecar, +this command does not assume BIDS dataset organization and can process any +collection of TSV files matching specified criteria. + +\b +Examples: + # Extract summary from event TSV files (default suffix='events') + hedpy extract tabular-summary /path/to/data + + # Extract summary from all TSV files using wildcard + hedpy extract tabular-summary /path/to/data -s '*' + + # Extract summary with verbose output and save to file + hedpy extract tabular-summary /path/to/data --verbose -o summary.json + + # Extract summary with categorical value limit + hedpy extract tabular-summary /path/to/data --categorical-limit 50 + + # Process files with specific suffix and exclude certain directories + hedpy extract tabular-summary /path/to/data -s participants -x test -x backup + + # Filter to only process files containing 'sub-01' in filename + hedpy extract tabular-summary /path/to/data --filter 'sub-01' + + # Filter to only process files from task 'rest' with all TSV files + hedpy extract tabular-summary /path/to/data -s '*' --filter 'task-rest' +""", +) +@click.argument("data_path", type=click.Path(exists=True)) +# File selection options +@optgroup.group("File selection options") +@optgroup.option( + "-p", + "--prefix", + "name_prefix", + metavar=METAVAR_PREFIX, + help="Prefix for base filename (e.g., -p sub- to match 'sub-01_events.tsv')", +) +@optgroup.option( + "-s", + "--suffix", + "name_suffix", + default="events", + show_default="events", + metavar=METAVAR_NAME, + help="Suffix for base filename (e.g., -s events to match files ending with '_events.tsv'); use '*' to match all TSV files regardless of suffix", +) +@optgroup.option( + "-x", + "--exclude-dirs", + multiple=True, + default=[], + metavar=METAVAR_NAME, + help="Directory names (relative to root) to exclude (e.g., -x derivatives -x code)", +) +@optgroup.option( + "-fl", + "--filter", + "filename_filter", + metavar=METAVAR_STRING, + help="Filter files to keep only those whose basename contains the designated filter (e.g., -fl task-rest retains files with 'task-rest' in the filename)", +) +# Column processing options +@optgroup.group("Column processing options") +@optgroup.option( + "-vc", + "--value-columns", + multiple=True, + metavar=METAVAR_NAME, + help="Column names to treat as value columns (e.g., -vc response_time -vc accuracy)", +) +@optgroup.option( + "-sc", + "--skip-columns", + multiple=True, + metavar=METAVAR_NAME, + help="Column names to skip (e.g., -sc onset -sc duration)", +) +@optgroup.option( + "-cl", + "--categorical-limit", + type=int, + metavar=METAVAR_N, + help="Maximum unique values for categorical columns", +) +# Output options +@optgroup.group("Output options") +@optgroup.option( + "-o", + "--output-file", + type=click.Path(), + metavar=METAVAR_FILE, + help="Output file for summary; if not specified, output written to stdout", +) +@optgroup.option( + "-f", + "--format", + "output_format", + type=click.Choice(["json", "text"]), + default="json", + show_default="json", + help="Output format", +) +# Logging options +@optgroup.group("Logging options") +@optgroup.option( + "-l", + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), + default="WARNING", + show_default="WARNING", +) +@optgroup.option( + "-v", + "--verbose", + is_flag=True, + help="Enable verbose output", +) +@optgroup.option( + "-lf", + "--log-file", + type=click.Path(), + metavar=METAVAR_FILE, + help="File path for saving log output; logs still go to stderr unless --log-quiet is also used", +) +@optgroup.option( + "-lq", + "--log-quiet", + is_flag=True, + help="Suppress log output to stderr; only applicable when --log-file is used (logs go only to file)", +) +def extract_tabular_summary_cmd( + data_path, + name_prefix, + name_suffix, + exclude_dirs, + filename_filter, + value_columns, + skip_columns, + categorical_limit, + output_file, + output_format, + log_level, + log_file, + log_quiet, + verbose, +): + """Extract tabular summary from TSV files. + + DATA_PATH: Root directory containing TSV files to process. + """ + from hed.scripts.extract_tabular_summary import main as extract_summary_main + + args = [data_path] + if name_prefix: + args.extend(["-p", name_prefix]) + if name_suffix: + args.extend(["-s", name_suffix]) + if exclude_dirs: + args.append("-x") + args.extend(exclude_dirs) + if filename_filter: + args.extend(["-fl", filename_filter]) + if value_columns: + args.append("-vc") + args.extend(value_columns) + if skip_columns: + args.append("-sc") + args.extend(skip_columns) + if categorical_limit is not None: + args.extend(["-cl", str(categorical_limit)]) + if output_file: + args.extend(["-o", output_file]) + if output_format: + args.extend(["-f", output_format]) + if log_level: + args.extend(["-l", log_level]) + if log_file: + args.extend(["-lf", log_file]) + if log_quiet: + args.append("-lq") + if verbose: + args.append("-v") + + extract_summary_main(args) + + def main(): """Main entry point for the HED CLI.""" cli() diff --git a/hed/scripts/extract_tabular_summary.py b/hed/scripts/extract_tabular_summary.py index b12ceb40..dcc5ff97 100644 --- a/hed/scripts/extract_tabular_summary.py +++ b/hed/scripts/extract_tabular_summary.py @@ -57,110 +57,128 @@ def get_parser(): description="Extract tabular summary from a collection of tabular files.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__, + add_help=False, + ) + + # Add custom help option with consistent formatting + parser.add_argument( + "-h", + "--help", + action="help", + help="Show this help message and exit", ) # Required arguments - parser.add_argument("data_path", help="Full path of root directory containing TSV files to process.") + parser.add_argument("data_path", help="Full path of root directory containing TSV files to process") - # File selection arguments - parser.add_argument( + # File selection options + file_group = parser.add_argument_group("File selection options") + file_group.add_argument( "-p", "--prefix", dest="name_prefix", default=None, - help="Optional prefix for base filename (e.g., 'sub-' to match 'sub-01_events.tsv').", + help="Prefix for base filename (e.g., 'sub-' to match 'sub-01_events.tsv')", ) - parser.add_argument( + file_group.add_argument( "-s", "--suffix", dest="name_suffix", default="events", - help="Suffix for base filename (e.g., 'events' to match files ending with '_events.tsv'). " - "Use '*' to match all TSV files regardless of suffix. Default: events", + help="Suffix for base filename (e.g., 'events' to match files ending with '_events.tsv'); " + "use '*' to match all TSV files regardless of suffix (default: %(default)s)", ) - parser.add_argument( - "-x", "--exclude-dirs", nargs="*", default=[], dest="exclude_dirs", help="Directory names to exclude from file search." + file_group.add_argument( + "-x", + "--exclude-dirs", + nargs="*", + default=[], + dest="exclude_dirs", + help="Directory names to exclude from file search (default: none)", ) - parser.add_argument( + file_group.add_argument( "-fl", "--filter", dest="filename_filter", default=None, - help="Optional string to filter filenames. Only files containing this string in their name will be processed.", + help="Optional string to filter filenames; only files containing this string in their name will be processed", ) - # Column processing arguments - parser.add_argument( + # Column processing options + column_group = parser.add_argument_group("Column processing options") + column_group.add_argument( "-vc", "--value-columns", dest="value_columns", nargs="*", default=None, - help="List of column names to treat as value columns (numeric/continuous data).", + help="List of column names to treat as value columns (numeric/continuous data)", ) - parser.add_argument( + column_group.add_argument( "-sc", "--skip-columns", dest="skip_columns", nargs="*", default=None, - help="List of column names to skip in the extraction.", + help="List of column names to skip in the extraction", ) - parser.add_argument( + column_group.add_argument( "-cl", "--categorical-limit", dest="categorical_limit", type=int, default=None, - help="Maximum number of unique values to store for a categorical column. " - "If a column has more unique values, it will be truncated. Default: None (no limit).", + help="Maximum number of unique values to store for a categorical column; " + "if a column has more unique values, it will be truncated (default: None, no limit)", ) - # Output arguments - parser.add_argument( + # Output options + output_group = parser.add_argument_group("Output options") + output_group.add_argument( "-o", "--output-file", dest="output_file", default="", - help="Full path of output file for the tabular summary (JSON format). " - "If not specified, output written to standard out.", + help="Full path of output file for the tabular summary (JSON format); " + "if not specified, output written to standard out", ) - parser.add_argument( + output_group.add_argument( "-f", "--format", dest="output_format", choices=["json", "text"], default="json", - help="Output format: 'json' for JSON structure or 'text' for human-readable summary. Default: json", + help="Output format: 'json' for JSON structure or 'text' for human-readable summary (default: %(default)s)", ) - # Logging arguments - parser.add_argument( + # Logging options + logging_group = parser.add_argument_group("Logging options") + logging_group.add_argument( "-l", "--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], default="WARNING", - help="Log level (case insensitive). Default: WARNING", + help="Log level (case insensitive, default: %(default)s)", ) - parser.add_argument( + logging_group.add_argument( + "-v", + "--verbose", + action="store_true", + help="Show progress messages during processing (equivalent to --log-level INFO)", + ) + logging_group.add_argument( "-lf", "--log-file", dest="log_file", default=None, - help="Full path to save log output to file. If not specified, logs go to stderr.", + help="Full path to save log output to file; if not specified, logs go to stderr", ) - parser.add_argument( + logging_group.add_argument( "-lq", "--log-quiet", action="store_true", dest="log_quiet", - help="If present, suppress log output to stderr (only applies if --log-file is used).", - ) - parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="If present, output informative messages as computation progresses (equivalent to --log-level INFO).", + help="Suppress log output to stderr (only applies if --log-file is used)", ) return parser diff --git a/hed/scripts/hed_extract_bids_sidecar.py b/hed/scripts/hed_extract_bids_sidecar.py index bca79a36..5f65504a 100644 --- a/hed/scripts/hed_extract_bids_sidecar.py +++ b/hed/scripts/hed_extract_bids_sidecar.py @@ -10,9 +10,23 @@ - --log-quiet: When using --log-file, suppress stderr output (file only) Examples: - extract_bids_sidecar /path/to/dataset --suffix events - extract_bids_sidecar /path/to/dataset --suffix events --verbose - extract_bids_sidecar /path/to/dataset --suffix events --log-file log.txt --log-quiet + # Extract from event files (default suffix='events') + hed_extract_bids_sidecar /path/to/dataset + + # Extract from event files with verbose progress output + hed_extract_bids_sidecar /path/to/dataset --verbose + + # Extract from participant files instead of events + hed_extract_bids_sidecar /path/to/dataset --suffix participants + + # Save output to a file instead of stdout + hed_extract_bids_sidecar /path/to/dataset --output_file template.json + + # Exclude specific columns from the template + hed_extract_bids_sidecar /path/to/dataset --skip-columns onset duration response_time + + # Save logs to file and suppress console output + hed_extract_bids_sidecar /path/to/dataset --log-file extraction.log --log-quiet """ import argparse @@ -25,72 +39,99 @@ def get_parser(): """Create the argument parser for extract_bids_sidecar.""" - parser = argparse.ArgumentParser(description="Extract sidecar template from a BIDS dataset.") - parser.add_argument("data_path", help="Full path of BIDS dataset root directory.") + parser = argparse.ArgumentParser( + description="Extract sidecar template from a BIDS dataset.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + add_help=False, + ) + + # Add custom help option with consistent formatting parser.add_argument( + "-h", + "--help", + action="help", + help="Show this help message and exit", + ) + + # Required arguments + parser.add_argument("data_path", help="Full path of BIDS dataset root directory") + + # File selection options + file_group = parser.add_argument_group("File selection options") + file_group.add_argument( "-s", "--suffix", dest="suffix", - required=True, - help="Suffix (without underscore) of tsv files to process (e.g., 'events', 'participants').", + default="events", + help="Suffix (without underscore) of filenames for TSV files to process (e.g., 'events', 'participants', default: %(default)s)", ) - parser.add_argument( + file_group.add_argument( + "-x", + "--exclude-dirs", + nargs="*", + default=["sourcedata", "derivatives", "code", "stimuli"], + dest="exclude_dirs", + help="Directory names (relative to data_path) to exclude in search for files to process (default: sourcedata derivatives code stimuli)", + ) + + # Column processing options + column_group = parser.add_argument_group("Column processing options") + column_group.add_argument( "-vc", "--value-columns", dest="value_columns", nargs="*", default=None, - help="List of column names to treat as value columns.", + help="List of column names to treat as value columns", ) - parser.add_argument( + column_group.add_argument( "-sc", "--skip-columns", dest="skip_columns", nargs="*", default=["onset", "duration", "sample"], - help="List of column names to skip in the extraction.", + help="List of column names to skip in the extraction (default: onset duration sample)", ) - parser.add_argument( + + # Output options + output_group = parser.add_argument_group("Output options") + output_group.add_argument( + "-o", + "--output_file", + dest="output_file", + default="", + help="Optional full path of output file for the sidecar template; otherwise output written to standard out", + ) + + # Logging options + logging_group = parser.add_argument_group("Logging options") + logging_group.add_argument( "-l", "--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], default="WARNING", - help="Log level (case insensitive). Default: WARNING", + help="Log level (case insensitive, default: %(default)s)", ) - parser.add_argument( + logging_group.add_argument( + "-v", + "--verbose", + action="store_true", + help="Show progress messages during processing (equivalent to --log-level INFO)", + ) + logging_group.add_argument( "-lf", "--log-file", dest="log_file", default=None, - help="Full path to save log output to file. If not specified, logs go to stderr.", + help="Full path to save log output to file; if not specified, logs go to stderr", ) - parser.add_argument( + logging_group.add_argument( "-lq", "--log-quiet", action="store_true", dest="log_quiet", - help="If present, suppress log output to stderr (only applies if --log-file is used).", - ) - parser.add_argument( - "-o", - "--output_file", - dest="output_file", - default="", - help="Full path of output file for the sidecar template -- otherwise output written to standard out.", - ) - parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="If present, output informative messages as computation progresses (equivalent to --log-level INFO).", - ) - parser.add_argument( - "-x", - "--exclude-dirs", - nargs="*", - default=["sourcedata", "derivatives", "code", "stimuli"], - dest="exclude_dirs", - help="Directories name to exclude in search for files to process.", + help="Suppress log output to stderr (only applies if --log-file is used)", ) return parser diff --git a/hed/scripts/validate_bids.py b/hed/scripts/validate_bids.py index f1eca2e4..5365d127 100644 --- a/hed/scripts/validate_bids.py +++ b/hed/scripts/validate_bids.py @@ -2,6 +2,10 @@ """ Command-line script for validating BIDS datasets with HED annotations. +This script validates HED annotations in BIDS-formatted datasets, checking for compliance +with HED schema rules and proper annotation structure. It processes TSV files with their +associated JSON sidecars, following BIDS inheritance rules. + Logging Options: - Default: WARNING level logs go to stderr (quiet unless there are issues) - --verbose or --log-level INFO: Show informational messages about progress @@ -10,10 +14,29 @@ - --log-quiet: When using --log-file, suppress stderr output (file only) Examples: - validate_bids /path/to/dataset # Quiet validation - validate_bids /path/to/dataset --verbose # Show progress - validate_bids /path/to/dataset --log-level DEBUG # Detailed debugging - validate_bids /path/to/dataset --log-file log.txt --log-quiet # Log to file only + # Basic validation with minimal output + validate_bids /path/to/dataset + + # Validation with progress messages + validate_bids /path/to/dataset --verbose + + # Validate specific file types + validate_bids /path/to/dataset --suffixes events + + # Validate multiple file types + validate_bids /path/to/dataset --suffixes events participants + + # Check for warnings in addition to errors + validate_bids /path/to/dataset --check_for_warnings + + # Save validation results to JSON file + validate_bids /path/to/dataset --format json --output_file results.json + + # Detailed debugging with file logging + validate_bids /path/to/dataset --log-level DEBUG --log-file validation.log --log-quiet + + # Limit error reporting for large datasets + validate_bids /path/to/dataset --error_limit 10 """ import argparse @@ -26,97 +49,125 @@ def get_parser(): - # Create the argument parser - parser = argparse.ArgumentParser(description="Validate a BIDS-formatted HED dataset.") - parser.add_argument("data_path", help="Full path of dataset root directory.") + """Create the argument parser for validate_bids. + + Returns: + argparse.ArgumentParser: Configured argument parser. + """ + parser = argparse.ArgumentParser( + description="Validate a BIDS-formatted HED dataset.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + add_help=False, + ) + + # Add custom help option parser.add_argument( - "-ec", - "--error_count", + "-h", + "--help", + action="help", + help="Show this help message and exit", + ) + + # Required arguments + parser.add_argument("data_path", help="Full path of dataset root directory") + + # File selection options + file_group = parser.add_argument_group("File selection options") + file_group.add_argument( + "-s", + "--suffixes", + dest="suffixes", + nargs="*", + default=["events", "participants"], + help="Suffixes (without underscore) of TSV files to validate; use '*' for all TSV files (default: events participants)", + ) + file_group.add_argument( + "-x", + "--exclude-dirs", + nargs="*", + default=["sourcedata", "derivatives", "code", "stimuli"], + dest="exclude_dirs", + help="Directory names to exclude in search for files to validate (default: sourcedata derivatives code stimuli)", + ) + + # Validation options + validation_group = parser.add_argument_group("Validation options") + validation_group.add_argument( + "-w", + "--check_for_warnings", + action="store_true", + dest="check_for_warnings", + help="Check for warnings in addition to errors", + ) + validation_group.add_argument( + "-el", + "--error-limit", dest="error_limit", type=int, default=None, - help="Limit the number of errors of each code type to report for text output.", + help="Limit the number of errors of each code type to report for text output", ) - parser.add_argument( + validation_group.add_argument( "-ef", "--errors-by-file", action="store_true", dest="errors_by_file", - help="Apply error limit by file rather than overall for text output.", + help="Apply error limit by file rather than overall for text output", ) - parser.add_argument( + + # Output options + output_group = parser.add_argument_group("Output options") + output_group.add_argument( "-f", "--format", choices=["text", "json", "json_pp"], default="text", - help="Output format: 'text' (human-readable with counts), " - "'json' (compact JSON array of issues), or " - "'json_pp' (pretty-printed JSON with version metadata and indented formatting)", + help="Output format: 'text' (human-readable with counts), 'json' (compact JSON array), or 'json_pp' (pretty-printed JSON with metadata, default: %(default)s)", ) - parser.add_argument( - "-l", - "--log-level", - choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], - default="WARNING", - help="Log level (case insensitive). Default: INFO", - ) - parser.add_argument( - "-lf", - "--log-file", - dest="log_file", - default=None, - help="Full path to save log output to file. If not specified, logs go to stderr.", - ) - parser.add_argument( - "-lq", - "--log-quiet", - action="store_true", - dest="log_quiet", - help="If present, suppress log output to stderr (only applies if --log-file is used).", - ) - parser.add_argument( + output_group.add_argument( "-o", "--output_file", dest="output_file", default="", - help="Full path of output of validator -- otherwise output written to standard error.", + help="Full path of output file for validation results; if neither this nor --print_output is specified, results are not printed", ) - parser.add_argument( + output_group.add_argument( "-p", "--print_output", action="store_true", dest="print_output", - help="If present, output the results to standard out in addition to any saving of the files.", + help="Print validation results to stdout; if --output_file is also specified, output to both", ) - parser.add_argument( - "-s", - "--suffixes", - dest="suffixes", - nargs="*", - default=["events", "participants"], - help="Optional list of suffixes (no under_bar) of tsv files to validate." - + " If -s with no values, will use all possible suffixes as with single argument '*'.", + + # Logging options + logging_group = parser.add_argument_group("Logging options") + logging_group.add_argument( + "-l", + "--log-level", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + default="WARNING", + help="Log level (case insensitive, default: %(default)s)", ) - parser.add_argument( + logging_group.add_argument( "-v", "--verbose", action="store_true", - help="If present, output informative messages as computation progresses (equivalent to --log-level INFO).", + help="Show progress messages during processing (equivalent to --log-level INFO)", ) - parser.add_argument( - "-w", - "--check_for_warnings", - action="store_true", - dest="check_for_warnings", - help="If present, check for warnings as well as errors.", + logging_group.add_argument( + "-lf", + "--log-file", + dest="log_file", + default=None, + help="Full path to save log output to file; logs still go to stderr unless --log-quiet is also used", ) - parser.add_argument( - "-x", - "--exclude-dirs", - nargs="*", - default=["sourcedata", "derivatives", "code", "stimuli"], - dest="exclude_dirs", - help="Directories name to exclude in search for files to validate.", + logging_group.add_argument( + "-lq", + "--log-quiet", + action="store_true", + dest="log_quiet", + help="Suppress log output to stderr; only applicable when --log-file is used (logs go only to file)", ) return parser diff --git a/pyproject.toml b/pyproject.toml index 25ffea91..69c0c137 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ requires-python = ">=3.10" dependencies = [ "click>=8.0.0", + "click-option-group>=0.5.0", "defusedxml", "et-xmlfile", "inflect", diff --git a/requirements-dev.txt b/requirements-dev.txt index a5db4e31..0aa36ec5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,6 +11,8 @@ coverage>=7.0.0 ruff>=0.8.0 codespell>=2.2.0 black[jupyter]>=24.0.0 +mdformat>=0.7.0 +mdformat-myst>=0.1.5 # Documentation requirements sphinx>=7.1.0,<8.2.0 diff --git a/requirements.txt b/requirements.txt index 1777ca04..dfd0a7f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ click>=8.0.0 +click-option-group>=0.5.0 defusedxml>=0.7.1 inflect>=7.5.0 jsonschema>=4.23.0 diff --git a/tests/test_cli_parameter_parity.py b/tests/test_cli_parameter_parity.py index 78d33e4a..a9f06f22 100644 --- a/tests/test_cli_parameter_parity.py +++ b/tests/test_cli_parameter_parity.py @@ -7,6 +7,7 @@ from hed.cli.cli import cli from hed.scripts.validate_bids import get_parser as get_validate_bids_parser from hed.scripts.hed_extract_bids_sidecar import get_parser as get_extract_sidecar_parser +from hed.scripts.extract_tabular_summary import get_parser as get_extract_summary_parser from hed.scripts.validate_schemas import get_parser as get_validate_schemas_parser @@ -86,19 +87,17 @@ def _get_click_options(self, command): return {"positional": positional, "optional": optional, "flags": flags} def test_validate_bids_parameters(self): - """Test validate-bids CLI parameters match validate_bids.py parser.""" + """Test validate bids-dataset CLI parameters match validate_bids.py parser.""" # Get original parser original_parser = get_validate_bids_parser() original_opts = self._get_parser_options(original_parser) - # Get CLI command - cli_command = None - for cmd_name, cmd in cli.commands.items(): - if cmd_name == "validate-bids": - cli_command = cmd - break + # Get CLI command - now it's validate bids-dataset + validate_group = cli.commands.get("validate") + self.assertIsNotNone(validate_group, "validate command group not found") + cli_command = validate_group.commands.get("bids-dataset") - self.assertIsNotNone(cli_command, "validate-bids command not found in CLI") + self.assertIsNotNone(cli_command, "validate bids-dataset command not found in CLI") cli_opts = self._get_click_options(cli_command) # Check positional arguments @@ -138,13 +137,48 @@ def test_validate_bids_parameters(self): if flag in original_flags: self.assertIn(flag, cli_flags, f"Flag '{flag}' from original parser not found in CLI") - def test_extract_sidecar_parameters(self): - """Test extract-sidecar CLI parameters match hed_extract_bids_sidecar.py parser.""" + def test_extract_bids_sidecar_parameters(self): + """Test extract bids-sidecar CLI parameters match hed_extract_bids_sidecar.py parser.""" original_parser = get_extract_sidecar_parser() original_opts = self._get_parser_options(original_parser) - extract_command = cli.commands.get("extract-sidecar") - self.assertIsNotNone(extract_command, "extract-sidecar command not found") + extract_group = cli.commands.get("extract") + self.assertIsNotNone(extract_group, "extract command group not found") + extract_command = extract_group.commands.get("bids-sidecar") + self.assertIsNotNone(extract_command, "extract bids-sidecar command not found") + + cli_opts = self._get_click_options(extract_command) + + # Check positional count matches + self.assertEqual( + len(cli_opts["positional"]), + len(original_opts["positional"]), + f"Positional argument count mismatch: CLI has {len(cli_opts['positional'])}, original has {len(original_opts['positional'])}", + ) + + # Check optional parameters from original parser exist in CLI + original_dests = set(original_opts["optional"].keys()) + cli_dests = set(cli_opts["optional"].keys()) + + for orig_dest in original_dests: + self.assertIn(orig_dest, cli_dests, f"Parameter '{orig_dest}' from original parser not found in CLI") + + # Check flags from original parser exist in CLI + original_flags = {flag[0] for flag in original_opts["flags"]} + cli_flags = {flag[0] for flag in cli_opts["flags"]} + + for orig_flag in original_flags: + self.assertIn(orig_flag, cli_flags, f"Flag '{orig_flag}' from original parser not found in CLI") + + def test_extract_tabular_summary_parameters(self): + """Test extract tabular-summary CLI parameters match extract_tabular_summary.py parser.""" + original_parser = get_extract_summary_parser() + original_opts = self._get_parser_options(original_parser) + + extract_group = cli.commands.get("extract") + self.assertIsNotNone(extract_group, "extract command group not found") + extract_command = extract_group.commands.get("tabular-summary") + self.assertIsNotNone(extract_command, "extract tabular-summary command not found") cli_opts = self._get_click_options(extract_command) @@ -210,35 +244,15 @@ def test_schema_add_ids_parameters(self): self.assertEqual(cli_opts["positional"][1], "schema_name", "Second positional should be schema_name") self.assertEqual(cli_opts["positional"][2], "schema_version", "Third positional should be schema_version") - def test_schema_create_ontology_parameters(self): - """Test schema create-ontology uses positional arguments.""" - schema_group = cli.commands.get("schema") - ontology_command = schema_group.commands.get("create-ontology") - self.assertIsNotNone(ontology_command, "create-ontology command not found") - - cli_opts = self._get_click_options(ontology_command) - - # Should have 3 positional arguments - self.assertEqual( - len(cli_opts["positional"]), 3, f"Should have 3 positional arguments, got {len(cli_opts['positional'])}" - ) - self.assertEqual(cli_opts["positional"][0], "repo_path", "First positional should be repo_path") - self.assertEqual(cli_opts["positional"][1], "schema_name", "Second positional should be schema_name") - self.assertEqual(cli_opts["positional"][2], "schema_version", "Third positional should be schema_version") - - # Check --dest option exists - self.assertIn("dest", cli_opts["optional"], "--dest option not found") - def test_all_legacy_commands_have_cli_equivalents(self): """Test that all legacy script entry points have CLI equivalents.""" # Legacy commands from pyproject.toml legacy_to_cli = { - "validate_bids": "validate-bids", - "hed_extract_bids_sidecar": "extract-sidecar", + "validate_bids": "validate bids-dataset", + "hed_extract_bids_sidecar": "extract bids-sidecar", "hed_validate_schemas": "schema validate", "hed_update_schemas": "schema convert", "hed_add_ids": "schema add-ids", - "hed_create_ontology": "schema create-ontology", } for legacy, cli_path in legacy_to_cli.items():