diff --git a/hed/cli/cli.py b/hed/cli/cli.py index 8d413d3b..d709715b 100644 --- a/hed/cli/cli.py +++ b/hed/cli/cli.py @@ -186,6 +186,11 @@ def validate(): is_flag=True, help="Suppress log output to stderr; only applicable when --log-file is used (logs go only to file)", ) +@optgroup.option( + "--no-log", + is_flag=True, + help="Disable all logging output", +) def validate_bids_cmd( data_path, error_limit, @@ -194,6 +199,7 @@ def validate_bids_cmd( log_level, log_file, log_quiet, + no_log, output_file, print_output, suffixes, @@ -221,6 +227,8 @@ def validate_bids_cmd( args.extend(["-lf", log_file]) if log_quiet: args.append("-lq") + if no_log: + args.append("--no-log") if output_file: args.extend(["-o", output_file]) if print_output: @@ -239,6 +247,157 @@ def validate_bids_cmd( validate_bids_main(args) +@validate.command( + name="hed-string", + epilog=""" +This command validates a HED annotation string against a specified HED schema +version. It can optionally process definitions and check for warnings in addition +to errors. Multiple schema versions can be specified for validation with library schemas. + +\b +Examples: + # Basic validation of a HED string + hedpy validate hed-string "Event, (Sensory-event, (Visual-presentation, (Computer-screen, Face)))" -sv 8.3.0 + + # Validate with definitions + hedpy validate hed-string "Event, Def/MyDef" -sv 8.4.0 -d "(Definition/MyDef, (Action, Move))" + + # Validate with multiple schemas (base + library) + hedpy validate hed-string "Event, Action" -sv 8.3.0 -sv score_1.1.0 + + # Check for warnings as well as errors + hedpy validate hed-string "Event, Action/Button-press" -sv 8.4.0 --check-for-warnings + + # Save validation results to a file + hedpy validate hed-string "Event" -sv 8.4.0 -o validation_results.txt + + # Output results in JSON format + hedpy validate hed-string "Event, Action" -sv 8.4.0 -f json + + # Verbose output with informational messages + hedpy validate hed-string "Event, (Action, Move)" -sv 8.4.0 --verbose +""", +) +@click.argument("hed_string") +# Validation options +@optgroup.group("Validation options") +@optgroup.option( + "-sv", + "--schema-version", + required=True, + multiple=True, + metavar="VERSION", + help="HED schema version(s) to validate against (e.g., '8.3.0'). Can be specified multiple times for multiple schemas (e.g., -sv 8.3.0 -sv score_1.1.0)", +) +@optgroup.option( + "-d", + "--definitions", + default="", + metavar=METAVAR_STRING, + help="A string containing relevant HED definitions to use during validation (e.g., '(Definition/MyDef, (Action, Move))')", +) +@optgroup.option( + "-w", + "--check-for-warnings", + is_flag=True, + help="Check for warnings as well as errors", +) +# Output options +@optgroup.group("Output options") +@optgroup.option( + "-f", + "--format", + type=click.Choice(["text", "json"]), + default="text", + show_default="text", + help="Output format for validation results (text: human-readable; json: structured format for programmatic use)", +) +@optgroup.option( + "-o", + "--output-file", + type=click.Path(), + default="", + metavar=METAVAR_FILE, + help="Path for output file to hold validation results; if not specified, output to stdout", +) +# Logging options +@optgroup.group("Logging options") +@optgroup.option( + "-l", + "--log-level", + type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]), + default="WARNING", + show_default="WARNING", + help="Log level for diagnostic messages", +) +@optgroup.option( + "-v", + "--verbose", + is_flag=True, + help="Output informational messages (equivalent to --log-level INFO)", +) +@optgroup.option( + "-lf", + "--log-file", + type=click.Path(), + metavar=METAVAR_FILE, + help="File path for saving log output; logs still go to stderr unless --log-quiet is also used", +) +@optgroup.option( + "-lq", + "--log-quiet", + is_flag=True, + help="Suppress log output to stderr; only applicable when --log-file is used (logs go only to file)", +) +@optgroup.option( + "--no-log", + is_flag=True, + help="Disable all logging output", +) +def validate_hed_string_cmd( + hed_string, + schema_version, + definitions, + check_for_warnings, + format, + output_file, + log_level, + log_file, + log_quiet, + no_log, + verbose, +): + """Validate a HED annotation string. + + HED_STRING: The HED annotation string to validate (use quotes for strings with spaces or special characters). + """ + from hed.scripts.validate_hed_string import main as validate_string_main + + args = [hed_string] + for version in schema_version: + args.extend(["-sv", version]) + if definitions: + args.extend(["-d", definitions]) + if check_for_warnings: + args.append("-w") + if format: + args.extend(["-f", format]) + if output_file: + args.extend(["-o", output_file]) + if log_level: + args.extend(["-l", log_level]) + if log_file: + args.extend(["-lf", log_file]) + if log_quiet: + args.append("-lq") + if no_log: + args.append("--no-log") + if verbose: + args.append("-v") + + validate_string_main(args) + + @schema.command(name="validate") @click.argument("schema_path", type=click.Path(exists=True), nargs=-1, required=True) @click.option("--add-all-extensions", is_flag=True, help="Always verify all versions of the same schema are equal") diff --git a/hed/errors/__init__.py b/hed/errors/__init__.py index cca61386..ed213224 100644 --- a/hed/errors/__init__.py +++ b/hed/errors/__init__.py @@ -1,6 +1,6 @@ """Error handling module for HED.""" -from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references +from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references, iter_errors from .error_types import ( DefinitionErrors, TemporalErrors, diff --git a/hed/schema/hed_cache_lock.py b/hed/schema/hed_cache_lock.py index 570eb8d8..655d2d5c 100644 --- a/hed/schema/hed_cache_lock.py +++ b/hed/schema/hed_cache_lock.py @@ -4,7 +4,6 @@ import os import portalocker - TIMESTAMP_FILENAME = "last_update.txt" CACHE_TIME_THRESHOLD = 300 * 6 diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py index 75cf8ef0..5af65a7b 100644 --- a/hed/schema/schema_io/wiki2schema.py +++ b/hed/schema/schema_io/wiki2schema.py @@ -13,7 +13,6 @@ from hed.schema.schema_io.wiki_constants import HedWikiSection, WIKI_EXTRA_DICT from hed.schema.schema_io import text_util - extend_here_line = "extend here" invalid_characters_to_strip = ["​"] tag_name_expression = r"(\*+|\'{3})(.*?)(\'{3})?\s*([\[\{]|$)+" diff --git a/hed/schema/schema_validation_util_deprecated.py b/hed/schema/schema_validation_util_deprecated.py index c39db1fe..e5551bcd 100644 --- a/hed/schema/schema_validation_util_deprecated.py +++ b/hed/schema/schema_validation_util_deprecated.py @@ -3,7 +3,6 @@ from hed.errors.error_reporter import ErrorHandler from hed.errors.error_types import SchemaWarnings - ALLOWED_TAG_CHARS = "-" ALLOWED_DESC_CHARS = "-_:;,./()+ ^" diff --git a/hed/scripts/script_utils.py b/hed/scripts/script_utils.py new file mode 100644 index 00000000..06b11829 --- /dev/null +++ b/hed/scripts/script_utils.py @@ -0,0 +1,109 @@ +""" +Utility functions for HED command-line scripts. + +This module provides common functionality used across multiple HED scripts, +including logging configuration and argument handling. +""" + +import json +import logging +import sys +from hed import _version as vr +from hed.errors import get_printable_issue_string, ErrorHandler, iter_errors + + +def setup_logging(log_level, log_file=None, log_quiet=False, verbose=False, no_log=False): + """Configure logging for HED scripts. + + Sets up the root logger with appropriate handlers for console (stderr) and/or + file output based on the provided arguments. + + Parameters: + log_level (str): Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_file (str or None): Path to log file, or None for no file logging + log_quiet (bool): If True and log_file is specified, suppress stderr output + verbose (bool): If True, override log_level to INFO + no_log (bool): If True, disable all logging output + + Returns: + logging.Logger: Configured logger instance + """ + # Disable logging completely if requested + if no_log: + logging.basicConfig(level=logging.CRITICAL + 1, handlers=[logging.NullHandler()], force=True) + return logging.getLogger() + + # Determine effective log level + level = logging.INFO if verbose else getattr(logging, log_level.upper()) + + # Configure handlers + handlers = [] + if log_file: + handlers.append(logging.FileHandler(log_file, mode="w", encoding="utf-8")) + if not (log_file and log_quiet): + handlers.append(logging.StreamHandler(sys.stderr)) + + # Configure root logger + logging.basicConfig(level=level, format="%(levelname)s: %(message)s", handlers=handlers, force=True) + + return logging.getLogger() + + +def format_validation_results( + issue_list, output_format="text", title_message="Validation errors:", error_limit=None, errors_by_file=False +): + """Format validation results in the requested output format. + + This function provides a consistent way to format validation issues across + different HED validation scripts. It supports text, JSON, and pretty-printed + JSON formats, with optional error limiting for large result sets. + + Parameters: + issue_list (list): List of validation issues (HedIssue objects) + output_format (str): Output format - 'text', 'json', or 'json_pp' (default: 'text') + title_message (str): Title/header for text output (default: 'Validation errors:') + error_limit (int or None): Maximum errors per code type to include in text output (default: None) + errors_by_file (bool): Apply error limit per file rather than globally (default: False) + + Returns: + str: Formatted validation results as a string + + Examples: + >>> issues = validator.validate(hed_string) + >>> output = format_validation_results(issues, "text", "HED string validation:") + >>> output = format_validation_results(issues, "json") + >>> output = format_validation_results(issues, "json_pp") + """ + if output_format == "json_pp": + # Pretty-printed JSON with version metadata + # Convert issues to JSON-serializable format + serializable_issues = list(iter_errors(issue_list)) + return json.dumps({"issues": serializable_issues, "hedtools_version": str(vr.get_versions())}, indent=4) + + elif output_format == "json": + # Compact JSON array of issues + # Convert issues to JSON-serializable format + serializable_issues = list(iter_errors(issue_list)) + return json.dumps(serializable_issues) + + elif output_format == "text": + # Human-readable text format with counts and optional filtering + output = f"Using HEDTools version: {str(vr.get_versions())}\n" + output += f"Number of issues: {len(issue_list)}\n" + + # Apply error limiting if requested + if error_limit: + filtered_issues, code_counts = ErrorHandler.filter_issues_by_count(issue_list, error_limit, by_file=errors_by_file) + output += "Error counts by code: " + output += " ".join(f"{code}:{count}" for code, count in code_counts.items()) + "\n" + output += f"Number of issues after filtering: {len(filtered_issues)}\n" + issue_list = filtered_issues + + # Format the issues with title + if issue_list: + output += get_printable_issue_string(issue_list, title_message, skip_filename=False) + + return output + + else: + raise ValueError(f"Unknown output format: {output_format}") diff --git a/hed/scripts/validate_bids.py b/hed/scripts/validate_bids.py index 5365d127..bf6312d8 100644 --- a/hed/scripts/validate_bids.py +++ b/hed/scripts/validate_bids.py @@ -40,11 +40,11 @@ """ import argparse -import json import logging import sys from hed import _version as vr -from hed.errors import get_printable_issue_string, ErrorHandler +from hed.errors import ErrorHandler +from hed.scripts.script_utils import setup_logging, format_validation_results from hed.tools import BidsDataset @@ -169,41 +169,15 @@ def get_parser(): dest="log_quiet", help="Suppress log output to stderr; only applicable when --log-file is used (logs go only to file)", ) + logging_group.add_argument( + "--no-log", + action="store_true", + dest="no_log", + help="Disable all logging output", + ) return parser -def format_validation_results(issue_list, args, ErrorHandler): - """Generate and output validation results based on format and options. - - Parameters: - issue_list (list): List of validation issues found - args: Parsed command line arguments containing format and output options - ErrorHandler: Error handling class for filtering issues - - Returns: - str: Formatted validation results as a string in the requested format (text, json, or json_pp) - """ - # Output based on format - output = "" - if args.format == "json_pp": - output = json.dumps({"issues": issue_list, "hedtools_version": str(vr.get_versions())}, indent=4) - elif args.format == "json": - output = json.dumps(issue_list) - elif args.format == "text": - output = f"Using HEDTools version: {str(vr.get_versions())}\n" - output += f"Number of issues: {len(issue_list)}\n" - if args.error_limit: - [issue_list, code_counts] = ErrorHandler.filter_issues_by_count( - issue_list, args.error_limit, by_file=args.errors_by_file - ) - output += " ".join(f"{code}:{count}" for code, count in code_counts.items()) + "\n" - output += f"Number of issues after filtering: {len(issue_list)}\n" - if issue_list: - output += get_printable_issue_string(issue_list, "HED validation errors: ", skip_filename=False) - - return output - - def format_final_report(issue_list): """Generate a final summary report of the validation results. @@ -228,42 +202,12 @@ def main(arg_list=None): # Parse the arguments args = parser.parse_args(arg_list) print(f"{str(args)}") - # Setup logging configuration - log_level = args.log_level.upper() if args.log_level else "INFO" - if args.verbose: - log_level = "INFO" - - # Configure logging format - log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - date_format = "%Y-%m-%d %H:%M:%S" - - # Clear any existing handlers from root logger - root_logger = logging.getLogger() - for handler in root_logger.handlers[:]: - root_logger.removeHandler(handler) - # Set the root logger level - this is crucial for filtering - root_logger.setLevel(getattr(logging, log_level)) - - # Create and configure handlers - formatter = logging.Formatter(log_format, datefmt=date_format) - - # File handler if log file specified - if args.log_file: - file_handler = logging.FileHandler(args.log_file, mode="w", encoding="utf-8") - file_handler.setLevel(getattr(logging, log_level)) - file_handler.setFormatter(formatter) - root_logger.addHandler(file_handler) - - # Console handler (stderr) unless explicitly quieted and file logging is used - if not args.log_quiet or not args.log_file: - console_handler = logging.StreamHandler(sys.stderr) - console_handler.setLevel(getattr(logging, log_level)) - console_handler.setFormatter(formatter) - root_logger.addHandler(console_handler) + # Set up logging + setup_logging(args.log_level, args.log_file, args.log_quiet, args.verbose, args.no_log) logger = logging.getLogger("validate_bids") - logger.info(f"Starting BIDS validation with log level: {log_level}") + logger.info(f"Starting BIDS validation with log level: {args.log_level}") if args.log_file: logger.info(f"Log output will be saved to: {args.log_file}") @@ -310,7 +254,13 @@ def validate_dataset(args): # Generate and output the results if there is to be output if args.output_file or args.print_output: - output = format_validation_results(issue_list, args, ErrorHandler) + output = format_validation_results( + issue_list, + output_format=args.format, + title_message="HED validation errors:", + error_limit=args.error_limit, + errors_by_file=args.errors_by_file, + ) else: output = "" diff --git a/hed/scripts/validate_hed_string.py b/hed/scripts/validate_hed_string.py new file mode 100644 index 00000000..a0813ff5 --- /dev/null +++ b/hed/scripts/validate_hed_string.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +""" +Validates a HED string against a specified schema version. + +This script validates a HED annotation string, optionally with definitions, +against a specified HED schema version. +""" + +import argparse +import sys +from hed import HedString +from hed.errors import ErrorHandler +from hed.models import DefinitionDict +from hed.schema import load_schema_version +from hed.validator import HedValidator +from hed.scripts.script_utils import setup_logging, format_validation_results + + +def get_parser(): + """Create the argument parser for validate_hed_string. + + Returns: + argparse.ArgumentParser: Configured argument parser. + """ + parser = argparse.ArgumentParser( + description="Validate a HED annotation string against a schema", formatter_class=argparse.RawDescriptionHelpFormatter + ) + + # Required arguments + parser.add_argument("hed_string", help="HED annotation string to validate") + parser.add_argument( + "-sv", + "--schema-version", + required=True, + nargs="+", + dest="schema_version", + help="HED schema version(s) to validate against (e.g., '8.4.0' or '8.3.0 score_1.1.0' for multiple schemas)", + ) + + # Optional arguments + parser.add_argument("-d", "--definitions", default="", help="HED definition(s) string to use during validation") + parser.add_argument( + "-w", + "--check-for-warnings", + action="store_true", + dest="check_for_warnings", + help="Check for warnings in addition to errors", + ) + + # Output options + output_group = parser.add_argument_group("Output options") + output_group.add_argument( + "-f", + "--format", + choices=["text", "json"], + default="text", + help="Output format for validation results (default: %(default)s)", + ) + output_group.add_argument( + "-o", + "--output-file", + default="", + dest="output_file", + help="Output file for validation results; if not specified, output to stdout", + ) + + # Logging options + logging_group = parser.add_argument_group("Logging options") + logging_group.add_argument( + "-l", + "--log-level", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + default="WARNING", + dest="log_level", + help="Logging level (default: %(default)s)", + ) + logging_group.add_argument("-lf", "--log-file", default="", dest="log_file", help="File path for saving log output") + logging_group.add_argument( + "-lq", "--log-quiet", action="store_true", dest="log_quiet", help="Suppress log output to stderr when using --log-file" + ) + logging_group.add_argument("--no-log", action="store_true", dest="no_log", help="Disable all logging output") + logging_group.add_argument("-v", "--verbose", action="store_true", help="Output informational messages") + + return parser + + +def main(arg_list=None): + """Main function for validating a HED string. + + Parameters: + arg_list (list or None): Command line arguments. + """ + parser = get_parser() + args = parser.parse_args(arg_list) + + # Set up logging + setup_logging(args.log_level, args.log_file, args.log_quiet, args.verbose, args.no_log) + + import logging + + try: + # Load schema (handle single version or list of versions) + schema_versions = args.schema_version[0] if len(args.schema_version) == 1 else args.schema_version + logging.info(f"Loading HED schema version(s) {schema_versions}") + schema = load_schema_version(schema_versions) + + # Parse HED string + logging.info("Parsing HED string") + hed_string = HedString(args.hed_string, schema) + + # Set up definitions if provided + def_dict = None + issues = [] + if args.definitions: + logging.info("Processing definitions") + def_dict = DefinitionDict(args.definitions, hed_schema=schema) + if def_dict.issues: + issues = def_dict.issues + logging.warning("Errors found in definitions, skipping HED string validation") + + # Validate HED string only if no definition errors + if not issues: + logging.info("Validating HED string") + error_handler = ErrorHandler(check_for_warnings=args.check_for_warnings) + validator = HedValidator(schema, def_dict) + issues = validator.validate(hed_string, True, error_handler=error_handler) + + # Handle output + if issues: + # Format validation errors + output = format_validation_results( + issues, output_format=args.format, title_message="HED string validation errors:" + ) + + # Write output + if args.output_file: + with open(args.output_file, "w") as f: + f.write(output) + logging.info(f"Validation errors written to {args.output_file}") + else: + print(output) + + return 1 # Exit with error code if validation failed + else: + # Success message + success_msg = "HED string is valid!" + if args.output_file: + with open(args.output_file, "w") as f: + f.write(success_msg + "\n") + logging.info(f"Validation results written to {args.output_file}") + else: + print(success_msg) + + return 0 + + except Exception as e: + logging.error(f"Validation failed: {str(e)}") + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index e666fec5..a01f298d 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -2,7 +2,6 @@ from hed.tools.remodeling.operations.base_op import BaseOp - # TODO: This class is under development diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index 8fe5510d..6daca454 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -177,7 +177,9 @@ def check_tag_formatting(self, original_tag) -> list[dict]: return validation_issues - def validate_units(self, original_tag, validate_text=None, report_as=None, error_code=None, index_offset=0) -> list[dict]: + def validate_units( + self, original_tag, validate_text=None, report_as=None, error_code=None, index_offset=0, allow_placeholders=True + ) -> list[dict]: """Validate units and value classes Parameters: @@ -187,6 +189,7 @@ def validate_units(self, original_tag, validate_text=None, report_as=None, error Mostly for definitions that expand. error_code (str): The code to override the error as. Again mostly for def/def-expand tags. index_offset (int): Offset into the extension validate_text starts at + allow_placeholders (bool): Whether placeholders are allowed (affects value class validation for "#") Returns: list[dict]: Issues found from units @@ -194,11 +197,11 @@ def validate_units(self, original_tag, validate_text=None, report_as=None, error if validate_text is None: validate_text = original_tag.extension issues = [] - if validate_text == "#": + if validate_text == "#" and allow_placeholders: return [] if original_tag.is_unit_class_tag(): issues += self._unit_validator.check_tag_unit_class_units_are_valid( - original_tag, validate_text, report_as=report_as, error_code=error_code + original_tag, validate_text, report_as=report_as, error_code=error_code, allow_placeholders=allow_placeholders ) elif original_tag.is_value_class_tag(): issues += self._unit_validator.check_tag_value_class_valid(original_tag, validate_text, report_as=report_as) @@ -240,8 +243,13 @@ def _validate_individual_tags_in_hed_string(self, hed_string_obj, allow_placehol hed_tag, self, allow_placeholders=allow_placeholders ) elif (hed_tag.short_base_tag == DefTagNames.DEFINITION_KEY) and hed_tag.extension.endswith("/#"): - validation_issues += self.validate_units(hed_tag, hed_tag.extension[:-2]) + validation_issues += self.validate_units( + hed_tag, hed_tag.extension[:-2], allow_placeholders=allow_placeholders + ) + elif allow_placeholders and hed_tag.is_unit_class_tag() and hed_tag.extension.startswith("# "): + # If placeholder is followed by units (e.g., "# m-per-s^2"), validate the units + validation_issues += self.validate_units(hed_tag, allow_placeholders=allow_placeholders) elif not (allow_placeholders and "#" in hed_tag.extension): - validation_issues += self.validate_units(hed_tag) + validation_issues += self.validate_units(hed_tag, allow_placeholders=allow_placeholders) return validation_issues diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 6f63bce0..b4c674e0 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -16,7 +16,6 @@ from hed.models import df_util from hed.models.model_constants import DefTagNames - PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: " diff --git a/hed/validator/util/class_util.py b/hed/validator/util/class_util.py index 7cdb44ca..fec43c0a 100644 --- a/hed/validator/util/class_util.py +++ b/hed/validator/util/class_util.py @@ -48,7 +48,9 @@ def _get_default_value_class_validators(self): return validator_dict - def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, report_as=None, error_code=None) -> list[dict]: + def check_tag_unit_class_units_are_valid( + self, original_tag, validate_text, report_as=None, error_code=None, allow_placeholders=True + ) -> list[dict]: """Report incorrect unit class or units. Parameters: @@ -56,29 +58,36 @@ def check_tag_unit_class_units_are_valid(self, original_tag, validate_text, repo validate_text (str): The text to validate. report_as (HedTag): Report errors as coming from this tag, rather than original_tag. error_code (str): Override error codes. + allow_placeholders (bool): Whether placeholders are allowed (affects value class validation for "#") Returns: list: Validation issues. Each issue is a dictionary. """ + if not original_tag.is_unit_class_tag(): + return [] + validation_issues = [] - if original_tag.is_unit_class_tag(): - - # Check the units first - stripped_value, units = original_tag.get_stripped_unit_value(validate_text) - if not stripped_value: - validation_issues += self._report_bad_units(original_tag, report_as) - return validation_issues - - # Check the value classes - validation_issues += self._check_value_class(original_tag, stripped_value, report_as) - if validation_issues: - return validation_issues - - # We don't want to give this overall error twice - if error_code and validation_issues and not any(error_code == issue["code"] for issue in validation_issues): - new_issue = validation_issues[0].copy() - new_issue["code"] = error_code - validation_issues += [new_issue] + # Check the units first + stripped_value, units = original_tag.get_stripped_unit_value(validate_text) + if not stripped_value: + # stripped_value is None only when invalid units are present + validation_issues += self._report_bad_units(original_tag, report_as) + return validation_issues + + # If value is a placeholder (#) and placeholders are allowed, it's valid + # Invalid units would have been caught above (stripped_value would be None) + if stripped_value == "#" and allow_placeholders: + return validation_issues + + # Check the value classes + # If placeholders are NOT allowed, "#" will fail value class validation (e.g., not a valid number) + validation_issues += self._check_value_class(original_tag, stripped_value, report_as) + + # Override error code if specified (for def/def-expand tags) + if error_code and validation_issues and not any(error_code == issue["code"] for issue in validation_issues): + new_issue = validation_issues[0].copy() + new_issue["code"] = error_code + validation_issues += [new_issue] return validation_issues diff --git a/pyproject.toml b/pyproject.toml index 69c0c137..e2e093e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ dependencies = [ dev = [ "ruff>=0.8.0", "codespell>=2.2.0", - "black>=24.0.0", + "black>=26.1.0", "mdformat>=0.7.0", "mdformat-myst>=0.1.5", ] @@ -99,6 +99,7 @@ hedpy = "hed.cli.cli:main" # Legacy commands (deprecated - use 'hedpy' instead) validate_bids = "hed.scripts.validate_bids:main" +validate_hed_string = "hed.scripts.validate_hed_string:main" hed_extract_bids_sidecar = "hed.scripts.hed_extract_bids_sidecar:main" hed_validate_schemas = "hed.scripts.validate_schemas:main" hed_update_schemas = "hed.scripts.hed_convert_schema:main" @@ -190,7 +191,7 @@ known-first-party = ["hed"] [tool.black] line-length = 127 -target-version = ["py310", "py311", "py312", "py313"] +target-version = ["py310", "py311", "py312", "py313", "py314"] exclude = ''' /( \.git @@ -204,6 +205,6 @@ exclude = ''' | hed/_version\.py | spec_tests/hed-examples | spec_tests/hed-specification - | status + | .status )/ ''' diff --git a/requirements-dev.txt b/requirements-dev.txt index 0aa36ec5..df5abf25 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,12 +10,12 @@ coverage>=7.0.0 # Code quality tools ruff>=0.8.0 codespell>=2.2.0 -black[jupyter]>=24.0.0 +black[jupyter]>=26.1.0 mdformat>=0.7.0 mdformat-myst>=0.1.5 # Documentation requirements -sphinx>=7.1.0,<8.2.0 +sphinx>=7.1.0,<10.0 furo>=2024.1.29 sphinx-copybutton>=0.5.2 myst-parser>=3.0.0 diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py index 9a03c6ca..6ec362c8 100644 --- a/spec_tests/test_errors.py +++ b/spec_tests/test_errors.py @@ -13,7 +13,6 @@ from hed import HedFileError from hed.errors import ErrorHandler, get_printable_issue_string, SchemaWarnings - skip_tests = { # "tag-extension-invalid-bad-node-name": "Part of character invalid checking/didn't get to it yet", # "curly-braces-has-no-hed": "Need to fix issue #1006", @@ -30,6 +29,8 @@ def setUpClass(cls): ) cls.test_dir = test_dir cls.fail_count = [] + cls.current_test_file = None + cls.test_counter = {"total": 0, "passed": 0, "failed": 0, "skipped": 0} # Check if the required directory exists if not os.path.exists(test_dir): @@ -61,16 +62,19 @@ def run_single_test(self, test_file, test_name=None, test_type=None): test_info = json.load(fp) file_basename = os.path.basename(test_file) + self.current_test_file = file_basename - for info in test_info: + for test_index, info in enumerate(test_info, 1): error_code = info["error_code"] all_codes = [error_code] + info.get("alt_codes", []) if error_code in skip_tests: print(f" ⊘ Skipping {error_code} test: {skip_tests[error_code]}") + self.test_counter["skipped"] += 1 continue name = info.get("name", "") if name in skip_tests: print(f" ⊘ Skipping '{name}' test: {skip_tests[name]}") + self.test_counter["skipped"] += 1 continue if test_name is not None and name != test_name: continue @@ -87,7 +91,9 @@ def run_single_test(self, test_file, test_name=None, test_type=None): issues = e.issues if not issues: issues += [{"code": e.code, "message": e.message}] - self.report_result("fails", issues, error_code, all_codes, description, name, "dummy", "Schema") + self.report_result( + "fails", issues, error_code, all_codes, description, name, "dummy", "Schema", file_basename, test_index + ) continue except Exception as e: print(f"\n⚠️ Error loading schema for test '{name}' in {file_basename}") @@ -111,79 +117,153 @@ def run_single_test(self, test_file, test_name=None, test_type=None): continue if section_name == "string_tests": self._run_single_string_test( - section, schema, def_dict, error_code, all_codes, description, name, error_handler + section, + schema, + def_dict, + error_code, + all_codes, + description, + name, + error_handler, + file_basename, + test_index, ) if section_name == "sidecar_tests": self._run_single_sidecar_test( - section, schema, def_dict, error_code, all_codes, description, name, error_handler + section, + schema, + def_dict, + error_code, + all_codes, + description, + name, + error_handler, + file_basename, + test_index, ) if section_name == "event_tests": self._run_single_events_test( - section, schema, def_dict, error_code, all_codes, description, name, error_handler + section, + schema, + def_dict, + error_code, + all_codes, + description, + name, + error_handler, + file_basename, + test_index, ) if section_name == "combo_tests": self._run_single_combo_test( - section, schema, def_dict, error_code, all_codes, description, name, error_handler + section, + schema, + def_dict, + error_code, + all_codes, + description, + name, + error_handler, + file_basename, + test_index, ) if section_name == "schema_tests": - self._run_single_schema_test(section, error_code, all_codes, description, name, error_handler) + self._run_single_schema_test( + section, error_code, all_codes, description, name, error_handler, file_basename, test_index + ) - def report_result(self, expected_result, issues, error_code, all_codes, description, name, test, test_type): + def report_result( + self, expected_result, issues, error_code, all_codes, description, name, test, test_type, test_file, test_index + ): # Filter out pre-release warnings, we don't care about them. issues = [issue for issue in issues if issue["code"] != SchemaWarnings.SCHEMA_PRERELEASE_VERSION_USED] + test_location = f"{test_file} [Test #{test_index}]" + if expected_result == "fails": if not issues: # Test should have failed but passed - this is a problem + self.test_counter["failed"] += 1 + failure_id = f"{test_file}::{error_code}::{name or 'unnamed'}::{test_type}" print("\n" + "=" * 80) print("❌ TEST FAILURE: Test passed but should have failed") print("=" * 80) + print(f"Location: {test_location}") + print(f"Test ID: {failure_id}") print(f"Error Code: {error_code}") - print(f"Test Name: {name}") + print(f"Test Name: {name or '(unnamed)'}") print(f"Test Type: {test_type}") print(f"Description: {description}") print(f"Expected Error Codes: {all_codes}") print(f"\nTest Data:\n{self._format_test_data(test)}") print(f"\nResult: Test produced NO errors (expected one of: {all_codes})") print("=" * 80 + "\n") - self.fail_count.append(name) + self.fail_count.append( + {"id": failure_id, "name": name, "location": test_location, "reason": "Should fail but passed"} + ) else: # Test failed as expected, check if it's the right error code if any(issue["code"] in all_codes for issue in issues): + self.test_counter["passed"] += 1 return # Correct error code found, test passes # Wrong error code + self.test_counter["failed"] += 1 + failure_id = f"{test_file}::{error_code}::{name or 'unnamed'}::{test_type}" + actual_codes = [issue["code"] for issue in issues] print("\n" + "=" * 80) print("❌ TEST FAILURE: Wrong error code returned") print("=" * 80) + print(f"Location: {test_location}") + print(f"Test ID: {failure_id}") print(f"Error Code: {error_code}") - print(f"Test Name: {name}") + print(f"Test Name: {name or '(unnamed)'}") print(f"Test Type: {test_type}") print(f"Description: {description}") print(f"Expected Error Codes: {all_codes}") + print(f"Actual Error Codes: {actual_codes}") print(f"\nTest Data:\n{self._format_test_data(test)}") - print("\nActual Error Codes Found:") - for issue in issues: - print(f" - {issue['code']}") print("\nDetailed Issues:") print(get_printable_issue_string(issues)) print("=" * 80 + "\n") - self.fail_count.append(name) + self.fail_count.append( + { + "id": failure_id, + "name": name, + "location": test_location, + "reason": f"Wrong code: expected {all_codes}, got {actual_codes}", + } + ) else: # Test should pass if issues: + self.test_counter["failed"] += 1 + failure_id = f"{test_file}::{error_code}::{name or 'unnamed'}::{test_type}" + actual_codes = [issue["code"] for issue in issues] print("\n" + "=" * 80) print("❌ TEST FAILURE: Test failed but should have passed") print("=" * 80) + print(f"Location: {test_location}") + print(f"Test ID: {failure_id}") print(f"Error Code: {error_code}") - print(f"Test Name: {name}") + print(f"Test Name: {name or '(unnamed)'}") print(f"Test Type: {test_type}") print(f"Description: {description}") print(f"\nTest Data:\n{self._format_test_data(test)}") print("\nUnexpected Issues Found:") print(get_printable_issue_string(issues)) + print(f"\nError Codes: {actual_codes}") print("=" * 80 + "\n") - self.fail_count.append(name) + self.fail_count.append( + { + "id": failure_id, + "name": name, + "location": test_location, + "reason": f"Should pass but failed with: {actual_codes}", + } + ) + else: + self.test_counter["passed"] += 1 def _format_test_data(self, test): """Format test data for readable output""" @@ -201,30 +281,60 @@ def _format_test_data(self, test): else: return str(test) - def _run_single_string_test(self, info, schema, def_dict, error_code, all_codes, description, name, error_handler): + def _run_single_string_test( + self, info, schema, def_dict, error_code, all_codes, description, name, error_handler, test_file, test_index + ): string_validator = HedValidator(hed_schema=schema, def_dicts=def_dict) for result, tests in info.items(): - for test in tests: + for sub_test_index, test in enumerate(tests, 1): test_string = HedString(test, schema) issues = string_validator.run_basic_checks(test_string, False) issues += string_validator.run_full_string_checks(test_string) error_handler.add_context_and_filter(issues) - self.report_result(result, issues, error_code, all_codes, description, name, test, "string_test") - - def _run_single_sidecar_test(self, info, schema, def_dict, error_code, all_codes, description, name, error_handler): + self.test_counter["total"] += 1 + self.report_result( + result, + issues, + error_code, + all_codes, + description, + name, + test, + f"string_test[{sub_test_index}]", + test_file, + test_index, + ) + + def _run_single_sidecar_test( + self, info, schema, def_dict, error_code, all_codes, description, name, error_handler, test_file, test_index + ): for result, tests in info.items(): - for test in tests: + for sub_test_index, test in enumerate(tests, 1): buffer = io.BytesIO(json.dumps(test).encode("utf-8")) sidecar = Sidecar(buffer) issues = sidecar.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler) - self.report_result(result, issues, error_code, all_codes, description, name, test, "sidecar_test") - - def _run_single_events_test(self, info, schema, def_dict, error_code, all_codes, description, name, error_handler): + self.test_counter["total"] += 1 + self.report_result( + result, + issues, + error_code, + all_codes, + description, + name, + test, + f"sidecar_test[{sub_test_index}]", + test_file, + test_index, + ) + + def _run_single_events_test( + self, info, schema, def_dict, error_code, all_codes, description, name, error_handler, test_file, test_index + ): from hed import TabularInput for result, tests in info.items(): - for test in tests: + for sub_test_index, test in enumerate(tests, 1): string = "" for row in test: if not isinstance(row, list): @@ -240,13 +350,27 @@ def _run_single_events_test(self, info, schema, def_dict, error_code, all_codes, file = TabularInput(file_obj, sidecar=self.default_sidecar) issues = file.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler) - self.report_result(result, issues, error_code, all_codes, description, name, test, "events_test") - - def _run_single_combo_test(self, info, schema, def_dict, error_code, all_codes, description, name, error_handler): + self.test_counter["total"] += 1 + self.report_result( + result, + issues, + error_code, + all_codes, + description, + name, + test, + f"events_test[{sub_test_index}]", + test_file, + test_index, + ) + + def _run_single_combo_test( + self, info, schema, def_dict, error_code, all_codes, description, name, error_handler, test_file, test_index + ): from hed import TabularInput for result, tests in info.items(): - for test in tests: + for sub_test_index, test in enumerate(tests, 1): sidecar_test = test["sidecar"] default_dict = self.default_sidecar.loaded_dict for key, value in default_dict.items(): @@ -277,11 +401,23 @@ def _run_single_combo_test(self, info, schema, def_dict, error_code, all_codes, print("This is probably a missing set of square brackets.") continue issues += file.validate(hed_schema=schema, extra_def_dicts=def_dict, error_handler=error_handler) - self.report_result(result, issues, error_code, all_codes, description, name, test, "combo_tests") - - def _run_single_schema_test(self, info, error_code, all_codes, description, name, error_handler): + self.test_counter["total"] += 1 + self.report_result( + result, + issues, + error_code, + all_codes, + description, + name, + test, + f"combo_tests[{sub_test_index}]", + test_file, + test_index, + ) + + def _run_single_schema_test(self, info, error_code, all_codes, description, name, error_handler, test_file, test_index): for result, tests in info.items(): - for test in tests: + for sub_test_index, test in enumerate(tests, 1): schema_string = "\n".join(test) issues = [] try: @@ -298,7 +434,19 @@ def _run_single_schema_test(self, info, error_code, all_codes, description, name "message": "HTTP error in testing, probably due to rate limiting for local testing.", } ] - self.report_result(result, issues, error_code, all_codes, description, name, test, "schema_tests") + self.test_counter["total"] += 1 + self.report_result( + result, + issues, + error_code, + all_codes, + description, + name, + test, + f"schema_tests[{sub_test_index}]", + test_file, + test_index, + ) def test_errors(self): if hasattr(self, "skip_tests") and self.skip_tests: @@ -318,15 +466,36 @@ def test_errors(self): print("\n" + "=" * 80) print("TEST SUMMARY") print("=" * 80) + print(f"Total Tests: {self.test_counter['total']}") + print( + f"Passed: {self.test_counter['passed']} ({100*self.test_counter['passed']//max(1,self.test_counter['total'])}%)" + ) + print(f"Failed: {self.test_counter['failed']}") + print(f"Skipped: {self.test_counter['skipped']}") + print("=" * 80) + if len(self.fail_count) == 0: print("✅ All tests passed!") else: - print(f"❌ {len(self.fail_count)} test(s) failed:") + print(f"\n❌ {len(self.fail_count)} test(s) failed:\n") for i, failed_test in enumerate(self.fail_count, 1): - print(f" {i}. {failed_test}") + if isinstance(failed_test, dict): + print(f" {i}. {failed_test['location']}") + print(f" ID: {failed_test['id']}") + print(f" Name: {failed_test.get('name', '(unnamed)')}") + print(f" Reason: {failed_test['reason']}") + print() + else: + # Fallback for old format + print(f" {i}. {failed_test}") print("=" * 80 + "\n") - self.assertEqual(len(self.fail_count), 0, f"\n{len(self.fail_count)} test(s) failed. See detailed output above.") + self.assertEqual( + len(self.fail_count), + 0, + f"\n{len(self.fail_count)} test(s) failed out of {self.test_counter['total']} total. " + f"See detailed output above.", + ) # def test_debug(self): # test_file = os.path.realpath('./temp7.json') diff --git a/tests/schema/test_schema_validator_hed_id.py b/tests/schema/test_schema_validator_hed_id.py index 9c160458..bbe6c260 100644 --- a/tests/schema/test_schema_validator_hed_id.py +++ b/tests/schema/test_schema_validator_hed_id.py @@ -6,7 +6,6 @@ from hed import load_schema_version from hed.schema import HedKey - # tests needed: # 1. Verify HED id(HARDEST, MAY SKIP) # 4. Json tests diff --git a/tests/schema/util_create_schemas.py b/tests/schema/util_create_schemas.py index 55b0ea1f..54a31f92 100644 --- a/tests/schema/util_create_schemas.py +++ b/tests/schema/util_create_schemas.py @@ -1,6 +1,5 @@ from hed.schema import from_string - library_schema_start = """HED library="testcomparison" version="1.1.0" withStandard="8.3.0" unmerged="true" '''Prologue''' diff --git a/tests/scripts/test_validate_hed_string.py b/tests/scripts/test_validate_hed_string.py new file mode 100644 index 00000000..09318ee4 --- /dev/null +++ b/tests/scripts/test_validate_hed_string.py @@ -0,0 +1,143 @@ +"""Tests for validate_hed_string script.""" + +import os +import io +import unittest +from unittest.mock import patch +from hed.scripts.validate_hed_string import main + + +class TestValidateHedString(unittest.TestCase): + """Test validate_hed_string script functionality.""" + + def test_valid_string(self): + """Test validation of a valid HED string.""" + arg_list = ["Event, (Action, Move)", "-sv", "8.3.0", "--no-log"] + + with patch("sys.stdout", new=io.StringIO()) as mock_stdout: + result = main(arg_list) + output = mock_stdout.getvalue() + + self.assertEqual(result, 0, "Valid string should return 0") + self.assertIn("valid", output.lower()) + + def test_invalid_string(self): + """Test validation of an invalid HED string.""" + arg_list = ["InvalidTag", "-sv", "8.3.0", "--no-log"] + + with patch("sys.stdout", new=io.StringIO()) as mock_stdout: + result = main(arg_list) + output = mock_stdout.getvalue() + + self.assertEqual(result, 1, "Invalid string should return 1") + self.assertIn("error", output.lower()) + + def test_with_definitions(self): + """Test validation with definitions.""" + arg_list = ["Event, Def/MyDef", "-sv", "8.3.0", "-d", "(Definition/MyDef, (Action, Move))", "--no-log"] + + with patch("sys.stdout", new=io.StringIO()) as mock_stdout: + result = main(arg_list) + output = mock_stdout.getvalue() + + self.assertEqual(result, 0, "Valid string with definitions should return 0") + self.assertIn("valid", output.lower()) + + def test_invalid_definitions(self): + """Test that invalid definitions are caught before HED string validation.""" + arg_list = ["Event, Def/MyDef", "-sv", "8.3.0", "-d", "(Definition/MyDef, InvalidTag)", "--no-log"] + + with patch("sys.stdout", new=io.StringIO()) as mock_stdout: + result = main(arg_list) + output = mock_stdout.getvalue() + + self.assertEqual(result, 1, "Invalid definitions should return 1") + self.assertIn("error", output.lower()) + + def test_json_output(self): + """Test JSON output format.""" + arg_list = ["InvalidTag", "-sv", "8.3.0", "-f", "json", "--no-log"] + + with patch("sys.stdout", new=io.StringIO()) as mock_stdout: + result = main(arg_list) + output = mock_stdout.getvalue() + + self.assertEqual(result, 1) + # Should be valid JSON + import json + + try: + json.loads(output) + except json.JSONDecodeError: + self.fail("Output should be valid JSON") + + def test_output_file(self): + """Test writing output to a file.""" + import tempfile + + with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as f: + output_file = f.name + + try: + arg_list = ["Event", "-sv", "8.3.0", "-o", output_file, "--no-log"] + result = main(arg_list) + + self.assertEqual(result, 0) + self.assertTrue(os.path.exists(output_file)) + + with open(output_file, "r") as f: + content = f.read() + self.assertIn("valid", content.lower()) + finally: + if os.path.exists(output_file): + os.remove(output_file) + + def test_check_for_warnings(self): + """Test --check-for-warnings flag.""" + arg_list = ["Event", "-sv", "8.3.0", "-w", "--no-log"] + + with patch("sys.stdout", new=io.StringIO()): + result = main(arg_list) + + # Should still validate successfully + self.assertEqual(result, 0) + + def test_verbose_output(self): + """Test verbose logging output.""" + arg_list = ["Event", "-sv", "8.3.0", "-v"] + + with patch("sys.stdout", new=io.StringIO()), patch("sys.stderr", new=io.StringIO()) as mock_stderr: + result = main(arg_list) + stderr_output = mock_stderr.getvalue() + + self.assertEqual(result, 0) + # Verbose mode should produce INFO level messages + self.assertIn("INFO", stderr_output) + + def test_no_log_option(self): + """Test --no-log option suppresses all logging.""" + arg_list = ["Event", "-sv", "8.3.0", "--no-log"] + + with patch("sys.stdout", new=io.StringIO()), patch("sys.stderr", new=io.StringIO()) as mock_stderr: + result = main(arg_list) + stderr_output = mock_stderr.getvalue() + + self.assertEqual(result, 0) + # No log option should suppress all logging to stderr + self.assertEqual(stderr_output, "", "stderr should be empty with --no-log") + + def test_multiple_schemas(self): + """Test validation with multiple schema versions.""" + # Test with base + library schema + arg_list = ["Event, Action", "-sv", "8.3.0", "-sv", "score_1.1.0", "--no-log"] + + with patch("sys.stdout", new=io.StringIO()) as mock_stdout: + result = main(arg_list) + output = mock_stdout.getvalue() + + self.assertEqual(result, 0, "Valid string with multiple schemas should return 0") + self.assertIn("valid", output.lower()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_cli_parameter_parity.py b/tests/test_cli_parameter_parity.py index a9f06f22..dcb723e7 100644 --- a/tests/test_cli_parameter_parity.py +++ b/tests/test_cli_parameter_parity.py @@ -6,6 +6,7 @@ from hed.cli.cli import cli from hed.scripts.validate_bids import get_parser as get_validate_bids_parser +from hed.scripts.validate_hed_string import get_parser as get_validate_hed_string_parser from hed.scripts.hed_extract_bids_sidecar import get_parser as get_extract_sidecar_parser from hed.scripts.extract_tabular_summary import get_parser as get_extract_summary_parser from hed.scripts.validate_schemas import get_parser as get_validate_schemas_parser @@ -228,6 +229,40 @@ def test_schema_validate_parameters(self): for orig_flag in original_flags: self.assertIn(orig_flag, cli_flags, f"Flag '{orig_flag}' from original parser not found in CLI") + def test_validate_hed_string_parameters(self): + """Test validate hed-string CLI parameters match validate_hed_string.py parser.""" + # Get original parser + original_parser = get_validate_hed_string_parser() + self._get_parser_options(original_parser) + + # Get CLI command + validate_group = cli.commands.get("validate") + self.assertIsNotNone(validate_group, "validate command group not found") + cli_command = validate_group.commands.get("hed-string") + + self.assertIsNotNone(cli_command, "validate hed-string command not found in CLI") + cli_opts = self._get_click_options(cli_command) + + # Check positional arguments (should have hed_string) + self.assertEqual( + len(cli_opts["positional"]), 1, f"Should have 1 positional argument, got {len(cli_opts['positional'])}" + ) + self.assertEqual(cli_opts["positional"][0], "hed_string", "Positional should be hed_string") + + # Check that key optional parameters exist + required_params = ["schema_version", "definitions", "format", "output_file", "log_level", "log_file"] + cli_dests = set(cli_opts["optional"].keys()) + + for param in required_params: + self.assertIn(param, cli_dests, f"Parameter '{param}' not found in CLI") + + # Check flags + required_flags = {"check_for_warnings", "log_quiet", "no_log", "verbose"} + cli_flags = {flag[0] for flag in cli_opts["flags"]} + + for flag in required_flags: + self.assertIn(flag, cli_flags, f"Flag '{flag}' not found in CLI") + def test_schema_add_ids_parameters(self): """Test schema add-ids uses positional arguments.""" schema_group = cli.commands.get("schema") diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index f0aa86ee..8cd75e10 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -1,6 +1,8 @@ import unittest from hed.errors.error_types import ValidationErrors, DefinitionErrors +from hed.errors import ErrorHandler, ErrorContext +from hed.models.hed_string import HedString from tests.validator.test_tag_validator_base import TestValidatorBase from hed.schema.hed_schema_io import load_schema_version from functools import partial @@ -270,7 +272,7 @@ def test_extension_warning(self): def test_invalid_placeholder_in_normal_string(self): test_strings = { - "invalidPlaceholder": "Duration/# ms", + "invalidPlaceholder": "Item/#", "invalidMiscPoundSign": "Du#ation/20 ms", "invalidAfterBaseTag": "Action/Invalid#/InvalidExtension", } @@ -283,17 +285,9 @@ def test_invalid_placeholder_in_normal_string(self): "invalidPlaceholder": self.format_error( ValidationErrors.INVALID_TAG_CHARACTER, tag=0, - index_in_tag=9, - index_in_tag_end=10, + index_in_tag=5, + index_in_tag_end=6, actual_error=ValidationErrors.PLACEHOLDER_INVALID, - ) - + self.format_error( - ValidationErrors.INVALID_VALUE_CLASS_VALUE, - tag=0, - index_in_tag=0, - index_in_tag_end=13, - value_class="numericClass", - actual_error=ValidationErrors.VALUE_INVALID, ), "invalidMiscPoundSign": self.format_error( ValidationErrors.NO_VALID_TAG_FOUND, tag=0, index_in_tag=0, index_in_tag_end=8 @@ -308,6 +302,108 @@ def test_invalid_placeholder_in_normal_string(self): } self.validator_semantic(test_strings, expected_results, expected_issues, False) + def test_valid_placeholder_with_units(self): + self.maxDiff = None # Show full diff for debugging + test_strings = { + "placeholderWithValidUnits": "Acceleration/# m-per-s^2", + "placeholderWithInvalidUnits": "Acceleration/# badUnits", + "placeholderAlone": "Acceleration/#", + } + + # Test with allow_placeholders=True + expected_results_allowed = { + "placeholderWithValidUnits": True, + "placeholderWithInvalidUnits": False, + "placeholderAlone": True, + } + legal_acceleration_units = ["m-per-s^2"] + expected_issues_allowed = { + "placeholderWithValidUnits": [], + "placeholderWithInvalidUnits": self.format_error( + ValidationErrors.UNITS_INVALID, tag=0, units=legal_acceleration_units + ), + "placeholderAlone": [], + } + + validator = self.semantic_hed_input_reader + for test_key in test_strings: + hed_string_obj = HedString(test_strings[test_key], self.hed_schema) + test_issues = hed_string_obj._calculate_to_canonical_forms(validator._hed_schema) + if not test_issues: + test_issues = validator._validate_individual_tags_in_hed_string(hed_string_obj, allow_placeholders=True) + + error_handler = ErrorHandler(check_for_warnings=False) + error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) + expected_params = expected_issues_allowed[test_key] + expected_issue = self.format_errors_fully(error_handler, hed_string=hed_string_obj, params=expected_params) + error_handler.add_context_and_filter(test_issues) + test_result = not test_issues + expected_result = expected_results_allowed[test_key] + self.assertEqual(test_result, expected_result, test_strings[test_key]) + self.assertCountEqual(test_issues, expected_issue, test_strings[test_key]) + + # Test with allow_placeholders=False - all should fail + expected_results_disallowed = { + "placeholderWithValidUnits": False, + "placeholderWithInvalidUnits": False, + "placeholderAlone": False, + } + legal_acceleration_units = ["m-per-s^2"] + expected_issues_disallowed = { + "placeholderWithValidUnits": self.format_error( + ValidationErrors.INVALID_TAG_CHARACTER, + tag=0, + index_in_tag=13, + index_in_tag_end=14, + actual_error=ValidationErrors.PLACEHOLDER_INVALID, + ) + + self.format_error( + ValidationErrors.INVALID_VALUE_CLASS_VALUE, + tag=0, + index_in_tag=0, + index_in_tag_end=24, + value_class="numericClass", + ), + "placeholderWithInvalidUnits": self.format_error( + ValidationErrors.INVALID_TAG_CHARACTER, + tag=0, + index_in_tag=13, + index_in_tag_end=14, + actual_error=ValidationErrors.PLACEHOLDER_INVALID, + ) + + self.format_error(ValidationErrors.UNITS_INVALID, tag=0, units=legal_acceleration_units), + "placeholderAlone": self.format_error( + ValidationErrors.INVALID_TAG_CHARACTER, + tag=0, + index_in_tag=13, + index_in_tag_end=14, + actual_error=ValidationErrors.PLACEHOLDER_INVALID, + ) + + self.format_error( + ValidationErrors.INVALID_VALUE_CLASS_VALUE, + tag=0, + index_in_tag=0, + index_in_tag_end=14, + value_class="numericClass", + ), + } + + for test_key in test_strings: + hed_string_obj = HedString(test_strings[test_key], self.hed_schema) + test_issues = hed_string_obj._calculate_to_canonical_forms(validator._hed_schema) + if not test_issues: + test_issues = validator._validate_individual_tags_in_hed_string(hed_string_obj, allow_placeholders=False) + + error_handler = ErrorHandler(check_for_warnings=False) + error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) + expected_params = expected_issues_disallowed[test_key] + expected_issue = self.format_errors_fully(error_handler, hed_string=hed_string_obj, params=expected_params) + error_handler.add_context_and_filter(test_issues) + test_result = not test_issues + expected_result = expected_results_disallowed[test_key] + self.assertEqual(test_result, expected_result, f"{test_strings[test_key]} (placeholders disallowed)") + self.assertCountEqual(test_issues, expected_issue, f"{test_strings[test_key]} (placeholders disallowed)") + def test_span_reporting(self): test_strings = { "orgTagDifferent": "Duration/23 hz", diff --git a/tests/validator/test_tag_validator_library.py b/tests/validator/test_tag_validator_library.py index 0fef85e8..0a58adb2 100644 --- a/tests/validator/test_tag_validator_library.py +++ b/tests/validator/test_tag_validator_library.py @@ -322,8 +322,7 @@ def test_no_duplicates(self): "tl:Purple-color/Purple,tl:Event/Sensory-event)", "noDuplicate": "tl:Event/Sensory-event," "tl:Item/Object/Man-made-object/VehicleTrain," "tl:Purple-color/Purple", "legalDuplicate": "tl:Item/Object/Man-made-object/VehicleTrain,\ - (tl:Item/Object/Man-made-object/VehicleTrain," - "tl:Event/Sensory-event)", + (tl:Item/Object/Man-made-object/VehicleTrain," "tl:Event/Sensory-event)", } expected_results = {"topLevelDuplicate": False, "groupDuplicate": False, "legalDuplicate": True, "noDuplicate": True} expected_issues = {