diff --git a/README.md b/README.md index 0426fb82..cde5066a 100644 --- a/README.md +++ b/README.md @@ -18,12 +18,13 @@ HED (Hierarchical Event Descriptors) is a framework for systematically describin - Validate HED annotations against schema specifications - Analyze and summarize HED-tagged datasets -- Transform and remodel event data - Full HED support in BIDS (Brain Imaging Data Structure) - HED support in NWB (Neurodata Without Borders) when used the [ndx-hed](https://github.com/hed-standard/ndx-hed) extension. - Platform-independent and data-neutral - Command-line tools and Python API +**Note:** Table remodeling tools have been moved to a separate package. See [table-remodeler](https://pypi.org/project/table-remodeler/) on PyPI or visit [https://www.hedtags.org/table-remodeler](https://www.hedtags.org/table-remodeler) for more information. + ## Quick start ```{index} quick start, getting started, installation @@ -59,6 +60,38 @@ Or install from GitHub (latest): pip install git+https://github.com/hed-standard/hed-python/@main ``` +### Development installation + +```{index} development installation, editable install, optional dependencies +``` + +For development work or to access optional features, install from the cloned repository: + +```bash +# Clone the repository +git clone https://github.com/hed-standard/hed-python.git +cd hed-python + +# Install in editable mode with base dependencies +pip install -e . + +# Install with optional dependency groups +pip install -e ".[dev]" # Development tools (ruff, black, codespell) +pip install -e ".[docs]" # Documentation tools (sphinx, furo) +pip install -e ".[test]" # Testing tools (coverage) +pip install -e ".[examples]" # Jupyter notebook support + +# Install all optional dependencies +pip install -e ".[dev,docs,test,examples]" +``` + +**Optional dependency groups:** + +- `dev` - Code quality tools: ruff (linter), black (formatter), codespell, mdformat +- `docs` - Documentation generation: sphinx, furo theme, myst-parser +- `test` - Code coverage reporting: coverage +- `examples` - Jupyter notebook support: jupyter, notebook, ipykernel + ### Basic usage ```{index} usage examples, HedString, load_schema_version, validation example @@ -111,9 +144,10 @@ hedpy schema convert /path/to/schema.xml ```bash validate_bids /path/to/dataset hed_validate_schemas /path/to/schema.xml -run_remodel /path/to/data /path/to/config.json ``` +**Note:** The `run_remodel` command has been removed. Table remodeling functionality is now available in the separate [table-remodeler](https://pypi.org/project/table-remodeler/) package. + For more examples, see the [user guide](https://www.hedtags.org/hed-python/user_guide.html). ### Jupyter notebook examples @@ -172,14 +206,10 @@ pip install -e .[docs] # Build the documentation cd docs sphinx-build -b html . _build/html - -# Or use the make command (if available) -make html - -# View the built documentation -# Open docs/_build/html/index.html in your browser ``` +To iew the built documentation open `docs/_build/html/index.html` in your browser + ### Formatting with Black ```{index} Black, code formatting, style guide @@ -229,6 +259,7 @@ The HED ecosystem consists of several interconnected repositories: | [hed-resources](https://github.com/hed-standard/hed-resources) | Tutorials and other HED resources | | [hed-specification](https://github.com/hed-standard/hed-specification) | Official HED specification documents | | [hed-schemas](https://github.com/hed-standard/hed-schemas) | Official HED schema repository | +| [table-remodeler](https://github.com/hed-standard/table-remodeler) | Table transformation and remodeling tools | | [ndx-hed](https://github.com/hed-standard/ndx-hed) | HED support for NWB | | [hed-javascript](https://github.com/hed-standard/hed-javascript) | JavaScript HED validation tools | diff --git a/docs/index.rst b/docs/index.rst index b0d83d7e..2d7aa96c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,3 +1,7 @@ +.. meta:: + :description: Python tools for HED (Hierarchical Event Descriptors) validation, analysis, and BIDS integration in neuroscience + :keywords: HED, hierarchical event descriptors, Python, validation, BIDS, neuroscience, event annotation, metadata, analysis tools + Python HEDTools =============== diff --git a/docs/overview.md b/docs/overview.md index 0d74013f..e29c21d4 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -1,3 +1,12 @@ +```{meta} +--- +description: Overview of Python HEDTools - validation, analysis, and processing + tools for Hierarchical Event Descriptors (HED) in neuroscience +keywords: HED overview, Python tools, event descriptors, BIDS, NWB, + neuroscience, validation, schema +--- +``` + # Python HEDTools overview ```{index} single: HED; overview diff --git a/docs/user_guide.md b/docs/user_guide.md index b1f5bc8b..a2ffd9c8 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -1,3 +1,12 @@ +```{meta} +--- +description: Complete user guide for Python HEDTools - validation, BIDS + integration, analysis, and command-line tools for HED annotations +keywords: HED tutorial, Python guide, validation examples, BIDS datasets, + sidecar files, command-line interface, Jupyter notebooks +--- +``` + # Python HEDTools guide ```{index} user guide, tutorial, getting started diff --git a/hed/tools/__init__.py b/hed/tools/__init__.py index 0f37ea58..03cbbb2e 100644 --- a/hed/tools/__init__.py +++ b/hed/tools/__init__.py @@ -1,4 +1,4 @@ -"""HED remodeling, analysis and summarization tools.""" +"""HED analysis and summarization tools.""" from .analysis.event_manager import EventManager from .analysis.file_dictionary import FileDictionary @@ -19,27 +19,6 @@ from .bids.bids_tabular_file import BidsTabularFile from .bids.bids_util import parse_bids_filename -from .remodeling.dispatcher import Dispatcher -from .remodeling.backup_manager import BackupManager -from .remodeling.operations.base_summary import BaseSummary -from .remodeling.operations.base_op import BaseOp -from .remodeling.operations.factor_column_op import FactorColumnOp -from .remodeling.operations.factor_hed_tags_op import FactorHedTagsOp -from .remodeling.operations.factor_hed_type_op import FactorHedTypeOp -from .remodeling.operations.merge_consecutive_op import MergeConsecutiveOp -from .remodeling.operations.number_groups_op import NumberGroupsOp -from .remodeling.operations.number_rows_op import NumberRowsOp -from .remodeling.operations import valid_operations -from .remodeling.operations.remap_columns_op import RemapColumnsOp -from .remodeling.operations.remove_columns_op import RemoveColumnsOp -from .remodeling.operations.remove_rows_op import RemoveRowsOp -from .remodeling.operations.rename_columns_op import RenameColumnsOp -from .remodeling.operations.reorder_columns_op import ReorderColumnsOp -from .remodeling.operations.split_rows_op import SplitRowsOp -from .remodeling.operations.summarize_column_names_op import SummarizeColumnNamesOp -from .remodeling.operations.summarize_column_values_op import SummarizeColumnValuesOp -from .remodeling.operations.summarize_hed_type_op import SummarizeHedTypeOp - from .util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns from .util.io_util import check_filename, clean_filename, extract_suffix_path, get_file_list, make_path from .util.io_util import get_path_components @@ -55,7 +34,3 @@ strs_to_sidecar, to_strlist, ) - -from .remodeling.cli import run_remodel -from .remodeling.cli import run_remodel_backup -from .remodeling.cli import run_remodel_restore diff --git a/hed/tools/remodeling/__init__.py b/hed/tools/remodeling/__init__.py deleted file mode 100644 index 4205bfe5..00000000 --- a/hed/tools/remodeling/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Remodeling tools for revising and summarizing tabular files.""" - -from .backup_manager import BackupManager -from .dispatcher import Dispatcher -from .remodeler_validator import RemodelerValidator diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py deleted file mode 100644 index 3b90373b..00000000 --- a/hed/tools/remodeling/backup_manager.py +++ /dev/null @@ -1,295 +0,0 @@ -"""Manager for file backups for remodeling tools.""" - -import os -import json -import shutil -from datetime import datetime -from typing import Union - -from hed.errors.exceptions import HedFileError -from hed.tools.util import io_util - - -class BackupManager: - """Manager for file backups for remodeling tools.""" - - DEFAULT_BACKUP_NAME = "default_back" - RELATIVE_BACKUP_LOCATION = "./derivatives/remodel/backups" - BACKUP_DICTIONARY = "backup_lock.json" - BACKUP_ROOT = "backup_root" - - def __init__(self, data_root, backups_root=None): - """Constructor for the backup manager. - - Parameters: - data_root (str): Full path of the root of the data directory. - backups_root (str or None): Full path to the root where backups subdirectory is located. - - Raises: - HedFileError: If the data_root does not correspond to a real directory. - - Notes: The backup_root will have remodeling/backups appended. - """ - if not os.path.isdir(data_root): - raise HedFileError("NonExistentData", f"{data_root} is not an existing directory", "") - self.data_root = data_root - if backups_root: - self.backups_path = backups_root - else: - self.backups_path = os.path.join(data_root, self.RELATIVE_BACKUP_LOCATION) - self.backups_path = os.path.realpath(self.backups_path) - os.makedirs(self.backups_path, exist_ok=True) - self.backups_dict = self._get_backups() - - def create_backup(self, file_list, backup_name=None, verbose=False) -> bool: - """Create a new backup from file_list. - - Parameters: - file_list (list): Full paths of the files to be in the backup. - backup_name (str or None): Name of the backup. If None, uses the default - verbose (bool): If True, print out the files that are being backed up. - - Returns: - bool: True if the backup was successful. False if a backup of that name already exists. - - Raises: - HedFileError: For missing or incorrect files. - OS-related error: OS-related error when file copying occurs. - - """ - if not backup_name: - backup_name = self.DEFAULT_BACKUP_NAME - if self.backups_dict and backup_name in self.backups_dict: - return False - backup = {} - time_stamp = f"{str(datetime.now())}" - if verbose: - print(f"Creating backup {backup_name}") - backup_dir_path = os.path.realpath(os.path.join(self.backups_path, backup_name, BackupManager.BACKUP_ROOT)) - os.makedirs(backup_dir_path, exist_ok=True) - for file in file_list: - backup_file = self.get_backup_path(backup_name, file) - os.makedirs(os.path.dirname(backup_file), exist_ok=True) - if verbose: - print(f"Copying {file} to {backup_file}") - shutil.copy2(file, backup_file) - backup[self.get_file_key(file)] = time_stamp - self.backups_dict[backup_name] = backup - backup_dict_path = os.path.realpath(os.path.join(self.backups_path, backup_name, self.BACKUP_DICTIONARY)) - with open(backup_dict_path, "w") as fp: - json.dump(backup, fp, indent=4) - return True - - def get_backup(self, backup_name) -> Union[dict, None]: - """Return the dictionary corresponding to backup_name. - - Parameters: - backup_name (str): Name of the backup to be retrieved. - - Returns: - Union[dict, None]: The dictionary with the backup info. - - Notes: - The dictionary with backup information has keys that are the paths of - the backed up files relative to the backup root. The values in this - dictionary are the dates on which the particular file was backed up. - - """ - if backup_name not in self.backups_dict: - return None - return self.backups_dict[backup_name] - - def get_backup_files(self, backup_name, original_paths=False) -> list: - """Returns a list of full paths of files contained in the backup. - - Parameters: - backup_name (str): Name of the backup. - original_paths (bool): If True return the original paths. - - Returns: - list: Full paths of the original files backed (original_paths=True) or the paths in the backup. - - Raises: - HedFileError: If not backup named backup_name exists. - - """ - - backup_dict = self.backups_dict.get(backup_name, None) - if not backup_dict: - raise HedFileError("NoBackup", f"{backup_name} is not a valid backup", "") - - if original_paths: - return [os.path.realpath(os.path.join(self.data_root, backup_key)) for backup_key in backup_dict.keys()] - else: - return [ - os.path.realpath(os.path.join(self.backups_path, backup_name, self.BACKUP_ROOT, backup_key)) - for backup_key in backup_dict.keys() - ] - - def get_backup_path(self, backup_name, file_name) -> str: - """Retrieve the file from the backup or throw an error. - - Parameters: - backup_name (str): Name of the backup. - file_name (str): Full path of the file to be retrieved. - - Returns: - str: Full path of the corresponding file in the backup. - - """ - return os.path.realpath(os.path.join(self.backups_path, backup_name, self.BACKUP_ROOT, self.get_file_key(file_name))) - - def get_file_key(self, file_name): - file_comp = io_util.get_path_components(self.data_root, file_name) + [os.path.basename(file_name)] - return "/".join(file_comp) - - def restore_backup(self, backup_name=DEFAULT_BACKUP_NAME, task_names=None, verbose=True): - """Restore the files from backup_name to the main directory. - - Parameters: - backup_name (str): Name of the backup to restore. - task_names (list or None): A list of task names to restore. If None, defaults to empty list (all tasks). - verbose (bool): If True, print out the file names being restored. - - """ - if task_names is None: - task_names = [] - if verbose: - print(f"Restoring from backup {backup_name}") - backup_files = self.get_backup_files(backup_name) - data_files = self.get_backup_files(backup_name, original_paths=True) - for index, file in enumerate(backup_files): - if task_names and not self.get_task(task_names, file): - continue - os.makedirs(os.path.dirname(data_files[index]), exist_ok=True) - if verbose: - print(f"Copying {file} to {data_files[index]}") - shutil.copy2(file, data_files[index]) - - def _get_backups(self): - """Set the manager's backup-dictionary based on backup directory contents. - - Returns: - dict: dictionary of dictionaries of the valid backups in the backups_path directory. - - Raises: - HedFileError: If a backup is inconsistent for any reason. - - """ - backups = {} - for backup in os.listdir(self.backups_path): - backup_root = os.path.realpath(os.path.join(self.backups_path, backup)) - if not os.path.isdir(backup_root): - raise HedFileError("BadBackupPath", f"{backup_root} is not a backup directory.", "") - if len(os.listdir(backup_root)) != 2: - raise HedFileError( - "BadBackupFormat", f"Backup {backup_root} must only contain backup_root and backup_lock.json file.", "" - ) - backup_dict, files_not_in_backup, backups_not_in_directory = self._check_backup_consistency(backup) - if files_not_in_backup: - raise HedFileError("MissingBackupFile", f"Backup {backup} has files not in backup_lock.json.", "") - if backups_not_in_directory: - raise HedFileError( - "ExtraFilesInBackup", f"Backup {backup} backup_lock.json entries not in backup directory.", "" - ) - backups[backup] = backup_dict - return backups - - def _check_backup_consistency(self, backup_name): - """Return the consistency of a backup. - - Parameters: - backup_name (str): Name of the backup. - - Returns: - tuple[dict, list, list]: - - Dictionary containing the backup info. - - Files in backup directory that are not in the backup dict. - - Files in backup dictionary not in backup directory. - - Notes: - If file_path is None, this checks against consistency in the backup dictionary. - - """ - - backup_dict_path = os.path.realpath(os.path.join(self.backups_path, backup_name, self.BACKUP_DICTIONARY)) - if not os.path.exists(backup_dict_path): - raise HedFileError( - "BadBackupDictionaryPath", - f"Backup dictionary path {backup_dict_path} for backup " f"{backup_name} does not exist so backup invalid", - "", - ) - backup_root_path = os.path.realpath(os.path.join(self.backups_path, backup_name, self.BACKUP_ROOT)) - if not os.path.isdir(backup_root_path): - raise HedFileError( - "BadBackupRootPath", - f"Backup root path {backup_root_path} for {backup_name} " f"does not exist so backup invalid", - "", - ) - with open(backup_dict_path, "r") as fp: - backup_dict = json.load(fp) - backup_paths = {os.path.realpath(os.path.join(backup_root_path, backup_key)) for backup_key in backup_dict.keys()} - file_paths = set(io_util.get_file_list(backup_root_path)) - files_not_in_backup = list(file_paths.difference(backup_paths)) - backups_not_in_directory = list(backup_paths.difference(file_paths)) - return backup_dict, files_not_in_backup, backups_not_in_directory - - @staticmethod - def get_task(task_names, file_path) -> str: - """Return the task if the file name contains a task_xxx where xxx is in task_names. - - Parameters: - task_names (list): List of task names (without the `task_` prefix). - file_path (str): Path of the filename to be tested. - - Returns: - str: the task name or '' if there is no task_xxx or xxx is not in task_names. - - """ - - base = os.path.basename(file_path) - for task in task_names: - if ("task_" + task) in base: - return task - else: - return "" - - def make_backup(self, task, backup_name=None, verbose=False) -> bool: - """Make a backup copy the files in the task file list. - - Parameters: - task (dict): Dictionary representing the remodeling task. - backup_name (str or None): Name of the backup. If None, uses the default - verbose (bool): If True, print out the files that are being backed up. - - Returns: - bool: True if the backup was successful. False if a backup of that name already exists. - - Raises: - HedFileError: For missing or incorrect files. - OS-related error: OS-related error when file copying occurs. - - """ - if not backup_name: - backup_name = self.DEFAULT_BACKUP_NAME - if self.backups_dict and backup_name in self.backups_dict: - return False - backup = {} - time_stamp = f"{str(datetime.now())}" - if verbose: - print(f"Creating backup {backup_name}") - backup_dir_path = os.path.realpath(os.path.join(self.backups_path, backup_name, BackupManager.BACKUP_ROOT)) - os.makedirs(backup_dir_path, exist_ok=True) - file_list = task.get("file_list", []) - for file in file_list: - backup_file = self.get_backup_path(backup_name, file) - os.makedirs(os.path.dirname(backup_file), exist_ok=True) - if verbose: - print(f"Copying {file} to {backup_file}") - shutil.copy2(file, backup_file) - backup[self.get_file_key(file)] = time_stamp - self.backups_dict[backup_name] = backup - backup_dict_path = os.path.realpath(os.path.join(self.backups_path, backup_name, self.BACKUP_DICTIONARY)) - with open(backup_dict_path, "w") as fp: - json.dump(backup, fp, indent=4) - return True diff --git a/hed/tools/remodeling/cli/__init__.py b/hed/tools/remodeling/cli/__init__.py deleted file mode 100644 index 080ad032..00000000 --- a/hed/tools/remodeling/cli/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Command-line interface for remodeling tools.""" diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py deleted file mode 100644 index 5b032296..00000000 --- a/hed/tools/remodeling/cli/run_remodel.py +++ /dev/null @@ -1,297 +0,0 @@ -"""Main command-line program for running the remodeling tools.""" - -import os -import io -import json -import argparse -import logging -from hed.errors.exceptions import HedFileError -from hed.models.sidecar import Sidecar -from hed.tools.remodeling.remodeler_validator import RemodelerValidator -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.backup_manager import BackupManager -from hed.tools.util import io_util -from hed.tools.bids import bids_util - - -def get_parser(): - """Create a parser for the run_remodel command-line arguments. - - Returns: - argparse.ArgumentParser: A parser for parsing the command line arguments. - - """ - parser = argparse.ArgumentParser(description="Converts event files based on a json file specifying operations.") - parser.add_argument("data_dir", help="Full path of dataset root directory.") - parser.add_argument("model_path", help="Full path of the file with remodeling instructions.") - parser.add_argument( - "-bd", "--backup_dir", default="", dest="backup_dir", help="Directory for the backup that is being created" - ) - parser.add_argument( - "-bn", - "--backup_name", - default=BackupManager.DEFAULT_BACKUP_NAME, - dest="backup_name", - help="Name of the default backup for remodeling", - ) - parser.add_argument( - "-b", - "--bids-format", - action="store_true", - dest="use_bids", - help="If present, the dataset is in BIDS format with sidecars. HED analysis is available.", - ) - parser.add_argument( - "-fs", - "--file-suffix", - dest="suffixes", - nargs="*", - default=["events"], - help="Optional list of suffixes (no under_bar) of tsv files to validate. If -s with no values, will use all possible suffixes as with single argument '*'.", - ) - parser.add_argument( - "-i", - "--individual-summaries", - dest="individual_summaries", - default="separate", - choices=["separate", "consolidated", "none"], - help="Controls individual file summaries ('none', 'separate', 'consolidated')", - ) - parser.add_argument( - "-j", "--json-sidecar", dest="json_sidecar", nargs="?", help="Optional path to JSON sidecar with HED information" - ) - parser.add_argument("-ld", "--log_dir", dest="log_dir", default="", help="Directory for storing log entries for errors.") - parser.add_argument( - "-nb", - "--no-backup", - action="store_true", - dest="no_backup", - help="If present, the operations are run directly on the files with no backup.", - ) - parser.add_argument( - "-ns", - "--no-summaries", - action="store_true", - dest="no_summaries", - help="If present, the summaries are not saved, but rather discarded.", - ) - parser.add_argument( - "-nu", - "--no-update", - action="store_true", - dest="no_update", - help="If present, the files are not saved, but rather discarded.", - ) - parser.add_argument( - "-hv", - "--hed-versions", - dest="hed_versions", - nargs="*", - default=[], - help="Optional list of HED schema versions used for annotation, include prefixes.", - ) - parser.add_argument( - "-s", - "--save-formats", - nargs="*", - default=[".json", ".txt"], - dest="save_formats", - help="Format for saving any summaries, if any. If no summaries are to be written," + "use the -ns option.", - ) - parser.add_argument( - "-t", - "--task-names", - dest="task_names", - nargs="*", - default=[], - help="The names of the task. If an empty list is given, all tasks are lumped together." - + " If * is given, then tasks are found and reported individually.", - ) - parser.add_argument( - "-v", "--verbose", action="store_true", help="If present, output informative messages as computation progresses." - ) - parser.add_argument( - "-w", - "--work-dir", - default="", - dest="work_dir", - help="If given, is the path to directory for saving, otherwise derivatives/remodel is used.", - ) - parser.add_argument( - "-x", - "--exclude-dirs", - nargs="*", - default=[], - dest="exclude_dirs", - help="Directories names to exclude from search for files.", - ) - return parser - - -def handle_backup(args): - """Restore the backup if applicable. - - Parameters: - args (obj): Parsed arguments as an object. - - Returns: - Union[str, None]: Backup name if there was a backup done. - - """ - if args.no_backup: - backup_name = None - else: - backup_man = BackupManager(args.data_dir) - if not backup_man.get_backup(args.backup_name): - raise HedFileError( - "BackupDoesNotExist", f"Backup {args.backup_name} does not exist. " f"Please run_remodel_backup first", "" - ) - backup_man.restore_backup(args.backup_name, args.task_names, verbose=args.verbose) - backup_name = args.backup_name - return backup_name - - -def parse_arguments(arg_list=None): - """Parse the command line arguments or arg_list if given. - - Parameters: - arg_list (list): List of command line arguments as a list. - - Returns: - Tuple[Object, list]: - - Argument object. - - A list of parsed operations (each operation is a dictionary). - - Raises: - ValueError: If the operations were unable to be correctly parsed. - - """ - parser = get_parser() - args = parser.parse_args(arg_list) - if "*" in args.suffixes: - args.suffixes = None - args.data_dir = os.path.realpath(args.data_dir) - args.exclude_dirs = args.exclude_dirs + ["remodel"] - args.model_path = os.path.realpath(args.model_path) - if args.verbose: - print(f"Data directory: {args.data_dir}\nModel path: {args.model_path}") - with open(args.model_path, "r") as fp: - operations = json.load(fp) - validator = RemodelerValidator() - errors = validator.validate(operations) - if errors: - raise ValueError("UnableToFullyParseOperations", f"Fatal operation error, cannot continue:\n{errors}") - return args, operations - - -def parse_tasks(files, task_args): - """Parse the tasks argument to get a task list. - - Parameters: - files (list): List of full paths of files. - task_args (str or list): The argument values for the task parameter. - - """ - if not task_args: - return {"": files} - task_dict = io_util.get_task_dict(files) - if task_args == "*" or isinstance(task_args, list) and task_args[0] == "*": - return task_dict - task_dict = {key: task_dict[key] for key in task_args if key in task_dict} - return task_dict - - -def run_ops(dispatch, args, tabular_files): - """Run the remodeler on files of a specified form in a directory tree. - - Parameters: - dispatch (Dispatcher): Controls the application of the operations and backup. - args (argparse.Namespace): Dictionary of arguments and their values. - tabular_files (list): List of files to include in this run. - - """ - - if args.verbose: - print(f"Found {len(tabular_files)} files to process") - if hasattr(args, "json_sidecar"): - base_sidecar = Sidecar(args.json_sidecar, name=args.json_sidecar) - else: - base_sidecar = None - for file_path in tabular_files: - if not base_sidecar and args.use_bids: - sidecar = get_sidecar(file_path, args.data_dir) - else: - sidecar = base_sidecar - if args.verbose: - print(f"Tabular file {file_path} sidecar {str(sidecar)}") - df = dispatch.run_operations(file_path, verbose=args.verbose, sidecar=sidecar) - if not args.no_update: - df.to_csv(file_path, sep="\t", index=False, header=True) - - -def get_sidecar(data_dir, tsv_path): - """Get the sidecar for a file if it exists. - - Parameters: - data_dir (str): Full path of the data directory. - tsv_path (str): Full path of the file. - - Returns: - Union[Sidecar, None]: The Sidecar if it exists, otherwise None. - - """ - merged_dict = bids_util.get_merged_sidecar(data_dir, tsv_path) - if not merged_dict: - return None - name = "merged_" + io_util.get_basename(tsv_path)[0] + ".json" - return Sidecar(files=io.StringIO(json.dumps(merged_dict)), name=name) - - -def main(arg_list=None): - """The command-line program. - - Parameters: - arg_list (list or None): Called with value None when called from the command line. - Otherwise, called with the command-line parameters as an argument list. - - Raises: - HedFileError: - - if the data root directory does not exist. - - if the specified backup does not exist. - - """ - args, operations = parse_arguments(arg_list) - - if args.log_dir: - os.makedirs(args.log_dir, exist_ok=True) - timestamp = "_" + io_util.get_timestamp() - else: - timestamp = "" - try: - if not os.path.isdir(args.data_dir): - raise HedFileError("DataDirectoryDoesNotExist", f"The root data directory {args.data_dir} does not exist", "") - backup_name = handle_backup(args) - save_dir = None - if args.work_dir: - save_dir = os.path.realpath(os.path.join(args.work_dir, Dispatcher.REMODELING_SUMMARY_PATH)) - tsv_files = io_util.get_file_list( - args.data_dir, name_suffix=args.suffixes, extensions=[".tsv"], exclude_dirs=args.exclude_dirs - ) - task_dict = parse_tasks(tsv_files, args.task_names) - for task, files in task_dict.items(): - dispatch = Dispatcher(operations, data_root=args.data_dir, backup_name=backup_name, hed_versions=args.hed_versions) - run_ops(dispatch, args, files) - if not args.no_summaries: - dispatch.save_summaries( - args.save_formats, individual_summaries=args.individual_summaries, summary_dir=save_dir, task_name=task - ) - except Exception: - if args.log_dir: - log_name = io_util.get_alphanumeric_path(os.path.realpath(args.data_dir)) + timestamp + ".txt" - logging.basicConfig(filename=os.path.join(args.log_dir, log_name), level=logging.ERROR) - logging.exception(f"{args.data_dir}: {args.model_path}") - raise - - -if __name__ == "__main__": - main() diff --git a/hed/tools/remodeling/cli/run_remodel_backup.py b/hed/tools/remodeling/cli/run_remodel_backup.py deleted file mode 100644 index 25b2c9ef..00000000 --- a/hed/tools/remodeling/cli/run_remodel_backup.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Command-line program for creating a remodeler backup.""" - -import argparse -from hed.errors.exceptions import HedFileError -from hed.tools.util import io_util -from hed.tools.remodeling.backup_manager import BackupManager - - -def get_parser(): - """Create a parser for the run_remodel_backup command-line arguments. - - Returns: - argparse.ArgumentParser: A parser for parsing the command line arguments. - - """ - parser = argparse.ArgumentParser(description="Creates a backup for the remodeling process.") - parser.add_argument("data_dir", help="Full path of dataset root directory.") - parser.add_argument( - "-bd", "--backup_dir", default="", dest="backup_dir", help="Directory for the backup that is being created" - ) - parser.add_argument( - "-bn", - "--backup_name", - default=BackupManager.DEFAULT_BACKUP_NAME, - dest="backup_name", - help="Name of the default backup for remodeling", - ) - parser.add_argument( - "-fs", - "--file-suffix", - dest="suffixes", - nargs="*", - default=["events"], - help="Filename suffix of files to be backed up. A * indicates all files allowed.", - ) - - parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[], help="The name of the task.") - parser.add_argument( - "-v", "--verbose", action="store_true", help="If present, output informative messages as computation progresses." - ) - parser.add_argument( - "-x", - "--exclude-dirs", - nargs="*", - default=["derivatives"], - dest="exclude_dirs", - help="Directories names to exclude from search for files. " - + "If omitted, no directories except the backup directory will be excluded." - + "Note [data_root]/derivatives/remodel will always be excluded.", - ) - return parser - - -def main(arg_list=None): - """The command-line program for making a remodel backup. - - Parameters: - arg_list (list or None): Called with value None when called from the command line. - Otherwise, called with the command-line parameters as an argument list. - - Raises: - HedFileError: If the specified backup already exists. - - """ - - parser = get_parser() - args = parser.parse_args(arg_list) - if "*" in args.suffixes: - args.suffixes = None - exclude_dirs = args.exclude_dirs + ["remodeling"] - file_list = io_util.get_file_list(args.data_dir, name_suffix=args.suffixes, extensions=[".tsv"], exclude_dirs=exclude_dirs) - if args.task_names: - file_list = io_util.get_filtered_by_element(file_list, args.task_names) - if args.backup_dir: - backups_root = args.backup_dir - else: - backups_root = None - backup_man = BackupManager(args.data_dir, backups_root=backups_root) - if backup_man.get_backup(args.backup_name): - raise HedFileError("BackupExists", f"Backup {args.backup_name} already exists", "") - else: - backup_man.create_backup(file_list, backup_name=args.backup_name, verbose=args.verbose) - - -if __name__ == "__main__": - main() diff --git a/hed/tools/remodeling/cli/run_remodel_restore.py b/hed/tools/remodeling/cli/run_remodel_restore.py deleted file mode 100644 index 792af050..00000000 --- a/hed/tools/remodeling/cli/run_remodel_restore.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Command-line program for restoring files from remodeler backup.""" - -import argparse -from hed.errors.exceptions import HedFileError -from hed.tools.remodeling.backup_manager import BackupManager - - -def get_parser(): - """Create a parser for the run_remodel_restore command-line arguments. - - Returns: - argparse.ArgumentParser: A parser for parsing the command line arguments. - - """ - parser = argparse.ArgumentParser(description="Restores the backup files for the original data.") - parser.add_argument("data_dir", help="Full path of dataset root directory.") - parser.add_argument( - "-bd", "--backup_dir", default="", dest="backup_dir", help="Directory for the backup that is being created" - ) - parser.add_argument( - "-bn", - "--backup_name", - default=BackupManager.DEFAULT_BACKUP_NAME, - dest="backup_name", - help="Name of the default backup for remodeling", - ) - parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[], help="The names of the task.") - parser.add_argument( - "-v", "--verbose", action="store_true", help="If present, output informative messages as computation progresses." - ) - return parser - - -def main(arg_list=None): - """The command-line program for restoring a remodel backup. - - Parameters: - arg_list (list or None): Called with value None when called from the command line. - Otherwise, called with the command-line parameters as an argument list. - - Raises: - HedFileError: If the specified backup does not exist. - - """ - parser = get_parser() - args = parser.parse_args(arg_list) - if args.backup_dir: - backups_root = args.backup_dir - else: - backups_root = None - backup_man = BackupManager(args.data_dir, backups_root=backups_root) - if not backup_man.get_backup(args.backup_name): - raise HedFileError("BackupDoesNotExist", f"{args.backup_name}", "") - backup_man.restore_backup(args.backup_name, task_names=args.task_names, verbose=args.verbose) - - -if __name__ == "__main__": - main() diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py deleted file mode 100644 index 5f2423d6..00000000 --- a/hed/tools/remodeling/dispatcher.py +++ /dev/null @@ -1,291 +0,0 @@ -"""Controller for applying operations to tabular files and saving the results.""" - -from __future__ import annotations -import os -from typing import Union - -import numpy as np -import pandas as pd -import json -from hed.errors.exceptions import HedFileError -from hed.schema.hed_schema_io import load_schema_version -from hed.schema.hed_schema import HedSchema -from hed.schema.hed_schema_group import HedSchemaGroup -from hed.tools.remodeling.backup_manager import BackupManager -from hed.tools.remodeling.operations.valid_operations import valid_operations -from hed.tools.util import io_util - -# This isn't supported in all versions of pandas -try: - pd.set_option("future.no_silent_downcasting", True) -except pd.errors.OptionError: - pass - - -class Dispatcher: - """Controller for applying operations to tabular files and saving the results.""" - - REMODELING_SUMMARY_PATH = "remodel/summaries" - - def __init__(self, operation_list, data_root=None, backup_name=BackupManager.DEFAULT_BACKUP_NAME, hed_versions=None): - """Constructor for the dispatcher. - - Parameters: - operation_list (list): List of valid unparsed operations. - data_root (str or None): Root directory for the dataset. If none, then backups are not made. - hed_versions (str, list, HedSchema, or HedSchemaGroup): The HED schema. - - Raises: - HedFileError: If the specified backup does not exist. - ValueError: If any of the operations cannot be parsed correctly. - """ - - self.data_root = data_root - self.backup_name = backup_name - self.backup_man = None - if self.data_root and backup_name: - self.backup_man = BackupManager(data_root) - if not self.backup_man.get_backup(self.backup_name): - raise HedFileError( - "BackupDoesNotExist", - f"Remodeler cannot be run with a dataset without first creating the " - f"{self.backup_name} backup for {self.data_root}", - "", - ) - self.parsed_ops = self.parse_operations(operation_list) - self.hed_schema = self.get_schema(hed_versions) - self.summary_dicts = {} - - def get_summaries(self, file_formats=None) -> list[dict]: - """Return the summaries in a dictionary of strings suitable for saving or archiving. - - Parameters: - file_formats (list or None): List of formats for the context files ('.json' and '.txt' are allowed). - If None, defaults to ['.txt', '.json']. - - Returns: - list[dict]: A list of dictionaries of summaries keyed to filenames. - """ - if file_formats is None: - file_formats = [".txt", ".json"] - - summary_list = [] - time_stamp = "_" + io_util.get_timestamp() - for _context_name, context_item in self.summary_dicts.items(): - file_base = context_item.op.summary_filename - if self.data_root: - file_base = io_util.extract_suffix_path(self.data_root, file_base) - file_base = io_util.clean_filename(file_base) - for file_format in file_formats: - if file_format == ".txt": - summary = context_item.get_text_summary(individual_summaries="consolidated") - summary = summary["Dataset"] - elif file_format == ".json": - summary = json.dumps(context_item.get_summary(individual_summaries="consolidated"), indent=4) - - else: - continue - summary_list.append( - { - "file_name": file_base + time_stamp + file_format, - "file_format": file_format, - "file_type": "summary", - "content": summary, - } - ) - return summary_list - - def get_data_file(self, file_designator) -> "pd.DataFrame": - """Get the correct data file give the file designator. - - Parameters: - file_designator (str, DataFrame ): A dataFrame or the full path of the dataframe in the original dataset. - - Returns: - pd.DataFrame: DataFrame after reading the path. - - Raises - HedFileError: If a valid file cannot be found. - - Notes: - - If a string is passed and there is a backup manager, - the string must correspond to the full path of the file in the original dataset. - In this case, the corresponding backup file is read and returned. - - If a string is passed and there is no backup manager, - the data file corresponding to the file_designator is read and returned. - - If a Pandas DataFrame, return a copy. - """ - - if isinstance(file_designator, pd.DataFrame): - return file_designator.copy() - if self.backup_man: - actual_path = self.backup_man.get_backup_path(self.backup_name, file_designator) - else: - actual_path = file_designator - try: - df = pd.read_csv(actual_path, sep="\t", header=0, keep_default_na=False, na_values=",null") - except Exception as e: - raise HedFileError( - "BadDataFile", f"{str(actual_path)} (orig: {file_designator}) does not correspond to a valid tsv file", "" - ) from e - return df - - def get_summary_save_dir(self) -> str: - """Return the directory in which to save the summaries. - - Returns: - str: the data_root + remodeling summary path - - Raises - HedFileError: If this dispatcher does not have a data_root. - """ - - if self.data_root: - return os.path.realpath(os.path.join(self.data_root, "derivatives", Dispatcher.REMODELING_SUMMARY_PATH)) - raise HedFileError("NoDataRoot", "Dispatcher must have a data root to produce directories", "") - - def run_operations(self, file_path, sidecar=None, verbose=False) -> "pd.DataFrame": - """Run the dispatcher operations on a file. - - Parameters: - file_path (str or DataFrame): Full path of the file to be remodeled or a DataFrame. - sidecar (Sidecar or file-like): Only needed for HED operations. - verbose (bool): If True, print out progress reports. - - Returns: - pd.DataFrame: The processed dataframe. - """ - - # string to functions - if verbose: - print(f"Reading {file_path}...") - df = self.get_data_file(file_path) - for operation in self.parsed_ops: - df = self.prep_data(df) - df = operation.do_op(self, df, file_path, sidecar=sidecar) - df = self.post_proc_data(df) - return df - - def save_summaries(self, save_formats=None, individual_summaries="separate", summary_dir=None, task_name=""): - """Save the summary files in the specified formats. - - Parameters: - save_formats (list or None): A list of formats [".txt", ".json"]. If None, defaults to ['.json', '.txt']. - individual_summaries (str): "consolidated", "individual", or "none". - summary_dir (str or None): Directory for saving summaries. - task_name (str): Name of task if summaries separated by task or "" if not separated. - - Notes: - The summaries are saved in the dataset derivatives/remodeling folder if no save_dir is provided. - - Notes: - - "consolidated" means that the overall summary and summaries of individual files are in one summary file. - - "individual" means that the summaries of individual files are in separate files. - - "none" means that only the overall summary is produced. - """ - if save_formats is None: - save_formats = [".json", ".txt"] - - if not save_formats: - return - if not summary_dir: - summary_dir = self.get_summary_save_dir() - os.makedirs(summary_dir, exist_ok=True) - for _summary_name, summary_item in self.summary_dicts.items(): - summary_item.save(summary_dir, save_formats, individual_summaries=individual_summaries, task_name=task_name) - - @staticmethod - def parse_operations(operation_list) -> list: - """Return a parsed a list of remodeler operations. - - Parameters: - operation_list (list): List of JSON remodeler operations. - - Returns: - list: List of Python objects containing parsed remodeler operations. - """ - - operations = [] - for _index, item in enumerate(operation_list): - new_operation = valid_operations[item["operation"]](item["parameters"]) - operations.append(new_operation) - return operations - - @staticmethod - def prep_data(df) -> "pd.DataFrame": - """Make a copy and replace all n/a entries in the data frame by np.nan for processing. - - Parameters: - df (DataFrame): The DataFrame to be processed. - - Returns: - DataFrame: A copy of the DataFrame with n/a entries replaced by np.nan. - """ - - result = df.replace("n/a", np.nan) - # Comment in the next line if this behavior was actually needed, but I don't think it is. - # result = result.infer_objects(copy=False) - return result - - @staticmethod - def post_proc_data(df) -> "pd.DataFrame": - """Replace all nan entries with 'n/a' for BIDS compliance. - - Parameters: - df (DataFrame): The DataFrame to be processed. - - Returns: - pd.DataFrame: DataFrame with the 'np.nan replaced by 'n/a'. - """ - - dtypes = df.dtypes.to_dict() - for col_name, typ in dtypes.items(): - if typ == "category": - df[col_name] = df[col_name].astype(str) - return df.fillna("n/a") - - @staticmethod - def errors_to_str(messages, title="", sep="\n") -> str: - """Return an error string representing error messages in a list. - - Parameters: - messages (list of dict): List of error dictionaries each representing a single error. - title (str): If provided the title is concatenated at the top. - sep (str): Character used between lines in concatenation. - - Returns: - str: Single string representing the messages. - """ - - error_list = [0] * len(messages) - for index, message in enumerate(messages): - error_list[index] = ( - f"Operation[{message.get('index', None)}] " - + f"has error:{message.get('error_type', None)}" - + f" with error code:{message.get('error_code', None)} " - + f"\n\terror msg:{message.get('error_msg', None)}" - ) - errors = sep.join(error_list) - if title: - return title + sep + errors - return errors - - @staticmethod - def get_schema(hed_versions) -> Union["HedSchema", "HedSchemaGroup", None]: - """Return the schema objects represented by the hed_versions. - - Parameters: - hed_versions (str, list, HedSchema, HedSchemaGroup): If str, interpreted as a version number. - - Returns: - Union[HedSchema, HedSchemaGroup, None]: Objects loaded from the hed_versions specification. - """ - - if not hed_versions: - return None - elif isinstance(hed_versions, str) or isinstance(hed_versions, list): - return load_schema_version(hed_versions) - elif isinstance(hed_versions, HedSchema) or isinstance(hed_versions, HedSchemaGroup): - return hed_versions - else: - raise ValueError("InvalidHedSchemaOrSchemaVersion", "Expected schema or schema version") diff --git a/hed/tools/remodeling/operations/__init__.py b/hed/tools/remodeling/operations/__init__.py deleted file mode 100644 index 7010be5c..00000000 --- a/hed/tools/remodeling/operations/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Remodeling operations.""" diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py deleted file mode 100644 index c5d24f74..00000000 --- a/hed/tools/remodeling/operations/base_op.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Base class for remodeling operations.""" - -from abc import ABC, abstractmethod - - -class BaseOp(ABC): - """Base class for operations. All remodeling operations should extend this class.""" - - def __init__(self, parameters): - """Constructor for the BaseOp class. Should be extended by operations. - - Parameters: - parameters (dict): A dictionary specifying the appropriate parameters for the operation. - """ - self.parameters = parameters - - @property - @abstractmethod - def NAME(self): - pass - - @property - @abstractmethod - def PARAMS(self): - pass - - @abstractmethod - def do_op(self, dispatcher, df, name, sidecar=None): - """Base class method to be overridden by each operation. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The tabular file to be remodeled. - name (str): Unique identifier for the data -- often the original file path. - sidecar (Sidecar or file-like): A JSON sidecar needed for HED operations. - - """ - - return df.copy() - - @staticmethod - @abstractmethod - def validate_input_data(parameters): - """Validates whether operation parameters meet op-specific criteria beyond that captured in json schema. - - Example: A check to see whether two input arrays are the same length. - - Notes: The minimum implementation should return an empty list to indicate no errors were found. - If additional validation is necessary, method should perform the validation and - return a list with user-friendly error strings. - """ - return [] diff --git a/hed/tools/remodeling/operations/base_summary.py b/hed/tools/remodeling/operations/base_summary.py deleted file mode 100644 index c2011b8c..00000000 --- a/hed/tools/remodeling/operations/base_summary.py +++ /dev/null @@ -1,326 +0,0 @@ -"""Abstract base class for the contents of summary operations.""" - -import os -from abc import ABC, abstractmethod -import json -from hed.tools.util import io_util - - -class BaseSummary(ABC): - """Abstract base class for summary contents. Should not be instantiated. - - Parameters: - sum_op (BaseOp): Operation corresponding to this summary. - - """ - - DISPLAY_INDENT = " " - INDIVIDUAL_SUMMARIES_PATH = "individual_summaries" - - def __init__(self, sum_op): - self.op = sum_op - self.summary_dict = {} - - def get_summary_details(self, include_individual=True) -> dict: - """Return a dictionary with the details for individual files and the overall dataset. - - Parameters: - include_individual (bool): If True, summaries for individual files are included. - - Returns: - dict: A dictionary with 'Dataset' and 'Individual files' keys. - - Notes: - - The 'Dataset' value is either a string or a dictionary with the overall summary. - - The 'Individual files' value is dictionary whose keys are file names and values are - their corresponding summaries. - - Users are expected to provide merge_all_info and get_details_dict functions to support this. - - """ - merged_counts = self.merge_all_info() - if merged_counts: - details = self.get_details_dict(merged_counts) - else: - details = "Overall summary unavailable" - - summary_details = {"Dataset": details, "Individual files": {}} - if include_individual: - for name, count in self.summary_dict.items(): - summary_details["Individual files"][name] = self.get_details_dict(count) - return summary_details - - def get_summary(self, individual_summaries="separate"): - """Return a summary dictionary with the information. - - Parameters: - individual_summaries (str): "separate", "consolidated", or "none" - - Returns: - dict: Dictionary with "Dataset" and "Individual files" keys. - - Notes: The individual_summaries value is processed as follows: - - "separate" individual summaries are to be in separate files. - - "consolidated" means that the individual summaries are in same file as overall summary. - - "none" means that only the overall summary is produced. - - """ - include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" - summary_details = self.get_summary_details(include_individual=include_individual) - dataset_summary = { - "Summary name": self.op.summary_name, - "Summary type": self.op.SUMMARY_TYPE, - "Summary filename": self.op.summary_filename, - "Overall summary": summary_details["Dataset"], - } - summary = {"Dataset": dataset_summary, "Individual files": {}} - if summary_details["Individual files"]: - summary["Individual files"] = self.get_individual( - summary_details["Individual files"], separately=individual_summaries == "separate" - ) - return summary - - def get_individual(self, summary_details, separately=True): - """Return a dictionary of the individual file summaries. - - Parameters: - summary_details (dict): Dictionary of the individual file summaries. - separately (bool): If True (the default), each individual summary has a header for separate output. - """ - individual_dict = {} - for name, name_summary in summary_details.items(): - if separately: - individual_dict[name] = { - "Summary name": self.op.summary_name, - "summary type": self.op.SUMMARY_TYPE, - "Summary filename": self.op.summary_filename, - "File summary": name_summary, - } - else: - individual_dict[name] = name_summary - return individual_dict - - def get_text_summary_details(self, include_individual=True) -> dict: - """Return a text summary of the information represented by this summary. - - Parameters: - include_individual (bool): If True (the default), individual summaries are in "Individual files". - - Returns: - dict: Dictionary with "Dataset" and "Individual files" keys. - - """ - result = self.get_summary_details(include_individual=include_individual) - summary_details = {"Dataset": self._get_result_string("Dataset", result.get("Dataset", "")), "Individual files": {}} - if include_individual: - for name, individual_result in result.get("Individual files", {}).items(): - summary_details["Individual files"][name] = self._get_result_string(name, individual_result) - return summary_details - - def get_text_summary(self, individual_summaries="separate") -> dict: - """Return a complete text summary by assembling the individual pieces. - - Parameters: - individual_summaries(str): One of the values "separate", "consolidated", or "none". - - Returns: - dict: Complete text summary. - - Notes: The options are: - - "none": Just has "Dataset" key. - - "consolidated" Has "Dataset" and "Individual files" keys with the values of each is a string. - - "separate" Has "Dataset" and "Individual files" keys. The values of "Individual files" is a dict. - - """ - include_individual = individual_summaries == "separate" or individual_summaries == "consolidated" - summary_details = self.get_text_summary_details(include_individual=include_individual) - summary = { - "Dataset": f"Summary name: {self.op.summary_name}\n" - + f"Summary type: {self.op.SUMMARY_TYPE}\n" - + f"Summary filename: {self.op.summary_filename}\n\n" - + f"Overall summary:\n{summary_details['Dataset']}" - } - if individual_summaries == "separate": - summary["Individual files"] = {} - for name, name_summary in summary_details["Individual files"].items(): - summary["Individual files"][name] = ( - f"Summary name: {self.op.summary_name}\n" - + f"Summary type: {self.op.SUMMARY_TYPE}\n" - + f"Summary filename: {self.op.summary_filename}\n\n" - + f"Summary for {name}:\n{name_summary}" - ) - elif include_individual: - ind_list = [] - for name, name_summary in summary_details["Individual files"].items(): - ind_list.append(f"{name}:\n{name_summary}\n") - ind_str = "\n\n".join(ind_list) - summary["Dataset"] = summary["Dataset"] + f"\n\nIndividual files:\n\n{ind_str}" - - return summary - - def save(self, save_dir, file_formats=None, individual_summaries="separate", task_name=""): - """Save the summaries using the format indicated. - - Parameters: - save_dir (str): Name of the directory to save the summaries in. - file_formats (list or None): List of file formats to use for saving. If None, defaults to ['.txt']. - individual_summaries (str): Save one file or multiple files based on setting. - task_name (str): If this summary corresponds to files from a task, the task_name is used in filename. - - """ - if file_formats is None: - file_formats = [".txt"] - for file_format in file_formats: - if file_format == ".txt": - summary = self.get_text_summary(individual_summaries=individual_summaries) - elif file_format == ".json": - summary = self.get_summary(individual_summaries=individual_summaries) - else: - continue - self._save_summary_files(save_dir, file_format, summary, individual_summaries, task_name=task_name) - - self.save_visualizations( - save_dir, file_formats=file_formats, individual_summaries=individual_summaries, task_name=task_name - ) - - def save_visualizations(self, save_dir, file_formats=None, individual_summaries="separate", task_name=""): - """Save summary visualizations, if any, using the format indicated. - - Parameters: - save_dir (str): Name of the directory to save the summaries in. - file_formats (list or None): List of file formats to use for saving. If None, defaults to ['.svg']. - individual_summaries (str): Save one file or multiple files based on setting. - task_name (str): If this summary corresponds to files from a task, the task_name is used in filename. - - """ - if file_formats is None: - file_formats = [".svg"] - pass - - def _save_summary_files(self, save_dir, file_format, summary, individual_summaries, task_name=""): - """Save the files in the appropriate format. - - Parameters: - save_dir (str): Path to the directory in which the summaries will be saved. - file_format (str): string representing the extension (including .), '.txt' or '.json'. - summary (dictionary): Dictionary of summaries (has "Dataset" and "Individual files" keys). - individual_summaries (str): "consolidated", "individual", or "none". - task_name (str): Name of task to be included in file name if multiple tasks. - - """ - if self.op.append_timecode: - time_stamp = "_" + io_util.get_timestamp() - else: - time_stamp = "" - if task_name: - task_name = "_" + task_name - this_save = os.path.join(save_dir, self.op.summary_name + "/") - os.makedirs(os.path.realpath(this_save), exist_ok=True) - filename = os.path.realpath(os.path.join(this_save, self.op.summary_filename + task_name + time_stamp + file_format)) - individual = summary.get("Individual files", {}) - if individual_summaries == "none" or not individual: - self.dump_summary(filename, summary["Dataset"]) - return - if individual_summaries == "consolidated": - self.dump_summary(filename, summary) - return - self.dump_summary(filename, summary["Dataset"]) - individual_dir = os.path.join(this_save, self.INDIVIDUAL_SUMMARIES_PATH + "/") - os.makedirs(os.path.realpath(individual_dir), exist_ok=True) - for name, sum_str in individual.items(): - filename = self._get_summary_filepath(individual_dir, name, task_name, time_stamp, file_format) - self.dump_summary(filename, sum_str) - - def _get_summary_filepath(self, individual_dir, name, task_name, time_stamp, file_format): - """Return the filepath for the summary including the timestamp - - Parameters: - individual_dir (str): path of the directory in which the summary should be stored. - name (str): Path of the original file from which the summary was extracted. - task_name (str): Task name if separate summaries for different tasks or the empty string if not separated. - time_stamp (str): Formatted date-time string to be included in the filename of the summary. - - Returns: - str: Full path name of the summary. - - """ - this_name = os.path.basename(name) - this_name = os.path.splitext(this_name)[0] - count = 1 - match = True - filename = None - while match: - filename = f"{self.op.summary_filename}_{this_name}{task_name}_{count}{time_stamp}{file_format}" - filename = os.path.realpath(os.path.join(individual_dir, filename)) - if not os.path.isfile(filename): - break - count = count + 1 - return filename - - def _get_result_string(self, name, result, indent=DISPLAY_INDENT): - """Return a formatted string with the summary for the indicated name. - - Parameters: - name (str): Identifier (usually the filename) of the individual file. - result (dict): The dictionary of the summary results indexed by name. - indent (str): A string containing spaces used for indentation (usually 3 spaces). - - Returns: - str: The results in a printable format ready to be saved to a text file. - - Notes: - This file should be overridden by each summary. - - """ - return f"\n{name}\n{indent}{str(result)}" - - @staticmethod - def dump_summary(filename, summary): - with open(filename, "w") as text_file: - if not isinstance(summary, str): - summary = json.dumps(summary, indent=4) - text_file.write(summary) - - @abstractmethod - def get_details_dict(self, summary_info): - """Return the summary-specific information. - - Parameters: - summary_info (object): Summary to return info from. - - Returns: - dict: dictionary with the results. - - Notes: - Abstract method be implemented by each individual summary. - - Notes: - The expected return value is a dictionary of the form: - - {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": {}}" - - """ - raise NotImplementedError - - @abstractmethod - def merge_all_info(self): - """Return merged information. - - Returns: - object: Consolidated summary of information. - - Notes: - Abstract method be implemented by each individual summary. - - """ - raise NotImplementedError - - @abstractmethod - def update_summary(self, summary_dict): - """Method to update summary for a given tabular input. - - Parameters: - summary_dict (dict) A summary specific dictionary with the update information. - - """ - raise NotImplementedError diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py deleted file mode 100644 index 1a962ae1..00000000 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ /dev/null @@ -1,79 +0,0 @@ -"""Convert the type of the specified columns of a tabular file.""" - -# TODO finish implementation - -from hed.tools.remodeling.operations.base_op import BaseOp - - -class ConvertColumnsOp(BaseOp): - """Convert specified columns to have specified data type. - - Required remodeling parameters: - - **column_names** (*list*): The list of columns to convert. - - **convert_to** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) - - Optional remodeling parameters: - - **decimal_places** (*int*): Number decimal places to keep (for fixed only). - - Notes: - - """ - - NAME = "convert_columns" - - PARAMS = { - "type": "object", - "properties": { - "column_names": { - "type": "array", - "description": "List of names of the columns whose types are to be converted to the specified type.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "convert_to": { - "type": "string", - "description": "Data type to convert the columns to.", - "enum": ["str", "int", "float", "fixed"], - }, - "decimal_places": {"type": "integer", "description": "The number of decimal points if converted to fixed."}, - }, - "required": ["column_names", "convert_to"], - "additionalProperties": False, - "if": {"properties": {"convert_to": {"const": "fixed"}}}, - "then": {"required": ["decimal_places"]}, - } - - def __init__(self, parameters): - """Constructor for the convert columns operation. - - Parameters: - parameters (dict): Parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.column_names = parameters["column_names"] - self.convert_to = parameters["convert_to"] - self.decimal_places = parameters.get("decimal_places", None) - - def do_op(self, dispatcher, df, name, sidecar=None): - """Convert the specified column to a specified type. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Only needed for HED operations. - - Returns: - DataFrame: A new DataFrame with the factor columns appended. - - """ - - df_new = df.copy() - return df_new - - @staticmethod - def validate_input_data(operations): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py deleted file mode 100644 index 1cab7e57..00000000 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ /dev/null @@ -1,101 +0,0 @@ -"""Append to tabular file columns of factors based on column values.""" - -import pandas as pd -from hed.tools.remodeling.operations.base_op import BaseOp - - -class FactorColumnOp(BaseOp): - """Append to tabular file columns of factors based on column values. - - Required remodeling parameters: - - **column_name** (*str*): The name of a column in the DataFrame to compute factors from. - - Optional remodeling parameters - - **factor_names** (*list*): Names to use as the factor columns. - - **factor_values** (*list*): Values in the column column_name to create factors for. - - Notes: - - If no factor_values are provided, factors are computed for each of the unique values in column_name column. - - If factor_names are provided, then factor_values must also be provided and the two lists be the same size. - - """ - - NAME = "factor_column" - - PARAMS = { - "type": "object", - "properties": { - "column_name": { - "type": "string", - "description": "Name of the column for which to create one-hot factors for unique values.", - }, - "factor_names": { - "type": "array", - "description": "Names of the resulting factor columns. If given must be same length as factor_values", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "factor_values": { - "type": "array", - "description": "Specific unique column values to compute factors for (otherwise all unique values).", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - }, - "required": ["column_name"], - "dependentRequired": {"factor_names": ["factor_values"]}, - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for the factor column operation. - - Parameters: - parameters (dict): Parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.column_name = parameters["column_name"] - self.factor_values = parameters.get("factor_values", None) - self.factor_names = parameters.get("factor_names", None) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Create factor columns based on values in a specified column. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - DataFrame: A new DataFrame with the factor columns appended. - - """ - - factor_values = self.factor_values - factor_names = self.factor_names - if len(factor_values) == 0: - factor_values = df[self.column_name].unique() - factor_names = [self.column_name + "." + str(column_value) for column_value in factor_values] - - df_new = df.copy() - for index, factor_value in enumerate(factor_values): - factor_index = df_new[self.column_name].map(str).isin([str(factor_value)]) - column = factor_names[index] - df_new[column] = factor_index.astype(int) - return df_new - - @staticmethod - def validate_input_data(parameters): - """Check that factor_names and factor_values have same length if given.""" - names = parameters.get("factor_names", None) - values = parameters.get("factor_values", None) - if names and not values: - return ["factor_names cannot be given without factor_values"] - elif names and values and len(names) != len(values): - return ["factor_names must be same length as factor_values"] - else: - return [] diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py deleted file mode 100644 index b1334ac2..00000000 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ /dev/null @@ -1,134 +0,0 @@ -"""Append columns of factors based on column values to a columnar file.""" - -import pandas as pd -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.models.tabular_input import TabularInput -from hed.models.sidecar import Sidecar -from hed.models import query_service -from hed.tools.analysis.event_manager import EventManager -from hed.tools.analysis.hed_tag_manager import HedTagManager -from hed.tools.util.data_util import replace_na - - -class FactorHedTagsOp(BaseOp): - """Append columns of factors based on column values to a columnar file. - - Required remodeling parameters: - - **queries** (*list*): Queries to be applied successively as filters. - - Optional remodeling parameters: - - **expand_context** (*bool*): Expand the context if True. - - **query_names** (*list*): Column names for the query factors. - - **remove_types** (*list*): Structural HED tags to be removed (such as Condition-variable or Task). - - **expand_context** (*bool*): If true, expand the context based on Onset, Offset, and Duration. - - Notes: - - If query names are not provided, *query1*, *query2*, ... are used. - - If query names are provided, the list must have same list as the number of queries. - - When the context is expanded, the effect of events for temporal extent is accounted for. - - """ - - NAME = "factor_hed_tags" - - PARAMS = { - "type": "object", - "properties": { - "queries": { - "type": "array", - "description": "List of HED tag queries to compute one-hot factors for.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "query_names": { - "type": "array", - "description": "Optional column names for the queries.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "remove_types": { - "type": "array", - "descriptions": "List of type tags to remove from before querying (e.g., Condition-variable, Task).", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "expand_context": { - "type": "boolean", - "description": "If true, the assembled HED tags include the effects of temporal extent (e.g., Onset).", - }, - "replace_defs": {"type": "boolean", "description": "If true, Def tags are replaced with definition contents."}, - }, - "required": ["queries"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for the factor HED tags operation. - - Parameters: - parameters (dict): Actual values of the parameters for the operation. - - """ - super().__init__(parameters) - self.queries = parameters["queries"] - self.remove_types = parameters.get("remove_types", []) - self.expand_context = parameters.get("expand_context", True) - self.replace_defs = parameters.get("replace_defs", True) - self.query_handlers, self.query_names, issues = query_service.get_query_handlers( - self.queries, parameters.get("query_names", None) - ) - if issues: - raise ValueError("FactorHedTagInvalidQueries", "\n".join(issues)) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Create factor columns based on HED tag queries. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Only needed for HED operations. - - Returns: - DataFrame: A new dataframe after processing. - - Raises: - ValueError: If a name for a new query factor column is already a column. - - """ - - if sidecar and not isinstance(sidecar, Sidecar): - sidecar = Sidecar(sidecar) - input_data = TabularInput(df.copy().fillna("n/a"), sidecar=sidecar, name=name) - column_names = list(df.columns) - for query_name in self.query_names: - if query_name in column_names: - raise ValueError("QueryNameAlreadyColumn", f"Query [{query_name}]: is already a column name of the data frame") - df_list = [input_data.dataframe] - tag_man = HedTagManager(EventManager(input_data, dispatcher.hed_schema), remove_types=self.remove_types) - hed_objs = tag_man.get_hed_objs(include_context=self.expand_context, replace_defs=self.replace_defs) - df_factors = query_service.search_hed_objs(hed_objs, self.query_handlers, query_names=self.query_names) - if len(df_factors.columns) > 0: - df_list.append(df_factors) - df_new = pd.concat(df_list, axis=1) - replace_na(df_new) - return df_new - - @staticmethod - def validate_input_data(parameters) -> list: - """Parse and valid the queries and return issues in parsing queries, if any. - - Parameters: - parameters (dict): Dictionary representing the actual operation values. - - Returns: - list: List of issues in parsing queries. - - """ - queries, names, issues = query_service.get_query_handlers( - parameters.get("queries", []), parameters.get("query_names", None) - ) - return issues diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py deleted file mode 100644 index b9df5beb..00000000 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Append to columnar file the factors computed from type variables.""" - -import pandas as pd -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.models.tabular_input import TabularInput -from hed.tools.analysis.event_manager import EventManager -from hed.tools.analysis.hed_type_manager import HedTypeManager -from hed.tools.util.data_util import replace_na - - -class FactorHedTypeOp(BaseOp): - """Append to columnar file the factors computed from type variables. - - Required remodeling parameters: - - **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`). - - Optional remodeling parameters: - - **type_values** (*list*): If provided, specifies which factor values to include. - - """ - - NAME = "factor_hed_type" - - PARAMS = { - "type": "object", - "properties": { - "type_tag": { - "type": "string", - "description": "Type tag to use for computing factor vectors (e.g., Condition-variable or Task).", - }, - "type_values": { - "type": "array", - "description": "If provided, only compute one-hot factors for these values of the type tag.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - }, - "required": ["type_tag"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for the factor HED type operation. - - Parameters: - parameters (dict): Actual values of the parameters for the operation. - - """ - super().__init__(parameters) - self.type_tag = parameters["type_tag"] - self.type_values = parameters.get("type_values", None) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Factor columns based on HED type and append to tabular data. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Only needed for HED operations. - - Returns: - DataFrame: A new DataFame with that includes the factors. - - Notes: - - If column_name is not a column in df, df is just returned. - - """ - - input_data = TabularInput(df.copy().fillna("n/a"), sidecar=sidecar, name=name) - df_list = [input_data.dataframe] - var_manager = HedTypeManager(EventManager(input_data, dispatcher.hed_schema)) - var_manager.add_type(self.type_tag.casefold()) - - df_factors = var_manager.get_factor_vectors(self.type_tag, self.type_values, factor_encoding="one-hot") - if len(df_factors.columns) > 0: - df_list.append(df_factors) - df_new = pd.concat(df_list, axis=1) - replace_na(df_new) - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py deleted file mode 100644 index c9ce431c..00000000 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ /dev/null @@ -1,174 +0,0 @@ -"""Merge consecutive rows of a columnar file with same column value.""" - -import pandas as pd -from hed.tools.remodeling.operations.base_op import BaseOp - - -class MergeConsecutiveOp(BaseOp): - """Merge consecutive rows of a columnar file with same column value. - - Required remodeling parameters: - - **column_name** (*str*): name of column whose consecutive values are to be compared (the merge column). - - **event_code** (*str* or *int* or *float*): the particular value in the match column to be merged. - - **set_durations** (*bool*): If true, set the duration of the merged event to the extent of the merged events. - - **ignore_missing** (*bool*): If true, missing match_columns are ignored. - - Optional remodeling parameters: - - **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same. - - Notes: - This operation is meant for time-based tabular files that have an onset column. - - """ - - NAME = "merge_consecutive" - - PARAMS = { - "type": "object", - "properties": { - "column_name": { - "type": "string", - "description": "The name of the column to check for repeated consecutive codes.", - }, - "event_code": {"type": ["string", "number"], "description": "The event code to match for duplicates."}, - "match_columns": { - "type": "array", - "description": "List of columns whose values must also match to be considered a repeat.", - "items": {"type": "string"}, - }, - "set_durations": { - "type": "boolean", - "description": "If true, then the duration should be computed based on start of first to end of last.", - }, - "ignore_missing": {"type": "boolean", "description": "If true, missing match columns are ignored."}, - }, - "required": ["column_name", "event_code", "set_durations", "ignore_missing"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for the merge consecutive operation. - - Parameters: - parameters (dict): Actual values of the parameters for the operation. - - """ - super().__init__(parameters) - self.column_name = parameters["column_name"] - self.event_code = parameters["event_code"] - self.set_durations = parameters["set_durations"] - self.ignore_missing = parameters["ignore_missing"] - self.match_columns = parameters.get("match_columns", None) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Merge consecutive rows with the same column value. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - Dataframe: A new dataframe after processing. - - :raises ValueError: - - If dataframe does not have the anchor column and ignore_missing is False. - - If a match column is missing and ignore_missing is False. - - If the durations were to be set and the dataframe did not have an onset column. - - If the durations were to be set and the dataframe did not have a duration column. - - """ - - if not self.ignore_missing and self.column_name not in df.columns: - raise ValueError( - "ColumnMissing", - f"{name}: {self.column_name} is not in data columns [{str(df.columns)}] " - f"and missing columns are not ignored", - ) - if self.set_durations and "onset" not in df.columns: - raise ValueError("MissingOnsetColumn", f"{name}: Data must have an onset column in order to set durations") - if self.set_durations and "duration" not in df.columns: - raise ValueError("MissingDurationColumn", f"{name}: Data must have a duration column in order to set durations") - missing = set(self.match_columns).difference(set(df.columns)) - if self.match_columns and not self.ignore_missing and missing: - raise ValueError( - "MissingMatchColumns", - f"{name}: {str(missing)} columns are unmatched by data columns" f"[{str(df.columns)}] and not ignored", - ) - match_columns = list(set(self.match_columns).intersection(set(df.columns))) - - df_new = df.copy() - code_mask = df_new[self.column_name] == self.event_code - if not code_mask.any(): - return df_new - match_columns.append(self.column_name) - match_df = df_new.loc[:, match_columns] - remove_groups = self._get_remove_groups(match_df, code_mask) - if self.set_durations and max(remove_groups) > 0: - self._update_durations(df_new, remove_groups) - keep_mask = [remove_group == 0 for remove_group in remove_groups] - df_new = df_new.loc[keep_mask, :].reset_index(drop=True) - return df_new - - @staticmethod - def _get_remove_groups(match_df, code_mask): - """Return a list of same length as match_df with group numbers of consecutive items. - - Parameters: - match_df (DataFrame): DataFrame containing columns to be matched. - code_mask (DataSeries): Same length as match_df with the names. - - Returns: - list: Group numbers set (starting at 1). - - # TODO: Handle round off in rows for comparison. - """ - in_group = False - remove_groups = [0] * len(match_df) - group_count = 0 - for index, row in match_df.iterrows(): - if not code_mask.iloc[index]: - in_group = False - continue - elif not in_group: - in_group = True - group_count += 1 - continue - if in_group and row.equals(match_df.loc[index - 1, :]): - remove_groups[index] = group_count - else: - group_count += 1 - return remove_groups - - @staticmethod - def _update_durations(df_new, remove_groups): - """Update the durations for the columns based on merged columns. - - Parameters: - df_new (DataFrame): Tabular data to merge. - remove_groups (list): List of names of columns to remove. - - """ - remove_df = pd.DataFrame(remove_groups, columns=["remove"]) - max_groups = max(remove_groups) - for index in range(max_groups): - df_group = df_new.loc[remove_df["remove"] == index + 1, ["onset", "duration"]] - max_group = df_group.sum(axis=1, skipna=True).max() - anchor = df_group.index[0] - 1 - max_anchor = df_new.loc[anchor, ["onset", "duration"]].sum(skipna=True).max() - df_new.loc[anchor, "duration"] = max(max_group, max_anchor) - df_new.loc[anchor, "onset"] - - @staticmethod - def validate_input_data(parameters): - """Verify that the column name is not in match columns. - - Parameters: - parameters (dict): Dictionary of parameters of actual implementation. - - """ - match_columns = parameters.get("match_columns", None) - name = parameters.get("column_name", None) - if match_columns and name in match_columns: - return [f"column_name `{name}` cannot be a match_column."] - return [] diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py deleted file mode 100644 index a01f298d..00000000 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Implementation in progress.""" - -from hed.tools.remodeling.operations.base_op import BaseOp - -# TODO: This class is under development - - -class NumberGroupsOp(BaseOp): - """Implementation in progress.""" - - NAME = "number_groups" - - PARAMS = { - "type": "object", - "properties": { - "number_column_name": {"type": "string"}, - "source_column": {"type": "string"}, - "start": { - "type": "object", - "properties": {"values": {"type": "array"}, "inclusion": {"type": "string", "enum": ["include", "exclude"]}}, - "required": ["values", "inclusion"], - "additionalProperties": False, - }, - "stop": { - "type": "object", - "properties": {"values": {"type": "array"}, "inclusion": {"type": "string", "enum": ["include", "exclude"]}}, - "required": ["values", "inclusion"], - "additionalProperties": False, - }, - "overwrite": {"type": "boolean"}, - }, - "required": ["number_column_name", "source_column", "start", "stop"], - "additionalProperties": False, - } - - def __init__(self, parameters): - super().__init__(parameters) - self.number_column_name = parameters["number_column_name"] - self.source_column = parameters["source_column"] - self.start = parameters["start"] - self.stop = parameters["stop"] - self.start_stop_test = {"values": list, "inclusion": str} - self.inclusion_test = ["include", "exclude"] - self.overwrite = parameters.get("overwrite", False) - - def do_op(self, dispatcher, df, name, sidecar=None): - """Add numbers to groups of events in dataframe. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Only needed for HED operations. - - Returns: - Dataframe: A new dataframe after processing. - - """ - # check if number_column_name exists and if so, check overwrite setting - if self.number_column_name in df.columns: - if self.overwrite is False: - raise ValueError("ExistingNumberColumn", f"Column {self.number_column_name} already exists in event file.", "") - - # check if source_column exists - if self.source_column not in df.columns: - raise ValueError("MissingSourceColumn", f"Column {self.source_column} does not exist in event file {name}.", "") - - # check if all elements in value lists start and stop exist in the source_column - missing = [] - for element in self.start["values"]: - if element not in df[self.source_column].tolist(): - missing.append(element) - if len(missing) > 0: - raise ValueError( - "MissingValue", f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}" - ) - - missing = [] - for element in self.stop["values"]: - if element not in df[self.source_column].tolist(): - missing.append(element) - if len(missing) > 0: - raise ValueError( - "MissingValue", f"Start value(s) {missing} does not exist in {self.source_column} of event file {name}" - ) - - df_new = df.copy() - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py deleted file mode 100644 index 1e48a1fc..00000000 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Implementation in progress.""" - -from hed.tools.remodeling.operations.base_op import BaseOp - -# TODO: This class is under development - - -class NumberRowsOp(BaseOp): - """Implementation in progress.""" - - NAME = "number_rows" - - PARAMS = { - "type": "object", - "properties": { - "number_column_name": {"type": "string"}, - "overwrite": {"type": "boolean"}, - "match_value": { - "type": "object", - "properties": {"column": {"type": "string"}, "value": {"type": ["string", "number"]}}, - "required": ["column", "value"], - "additionalProperties": False, - }, - }, - "required": ["number_column_name"], - "additionalProperties": False, - } - - def __init__(self, parameters): - super().__init__(parameters) - self.number_column_name = parameters["number_column_name"] - self.overwrite = parameters.get("overwrite", False) - self.match_value = parameters.get("match_value", False) - - def do_op(self, dispatcher, df, name, sidecar=None): - """Add numbers events dataframe. - - Parameters: - dispatcher (Dispatcher): Manages operation I/O. - df (DataFrame): - The DataFrame to be remodeled. - name (str): - Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Only needed for HED operations. - - Returns: - Dataframe: A new dataframe after processing. - - """ - if self.number_column_name in df.columns: - if self.overwrite is False: - raise ValueError("ExistingNumberColumn", f"Column {self.number_column_name} already exists in event file.", "") - - if self.match_value: - if self.match_value["column"] not in df.columns: - raise ValueError( - "MissingMatchColumn", f"Column {self.match_value['column']} does not exist in event file.", "" - ) - if self.match_value["value"] not in df[self.match_value["column"]].tolist(): - raise ValueError( - "MissingMatchValue", - f"Value {self.match_value['value']} does not exist in event file column" f"{self.match_value['column']}.", - "", - ) - - df_new = df.copy() - # df_new[self.number_column_name] = np.nan - # if self.match_value: - # filter = df[self.match_value['column']] == self.match_value['value'] - # numbers = [*range(1, sum(filter)+1)] - # df_new.loc[filter, self.number_column_name] = numbers - # else: - # df_new[self.number_column_name] = df_new.index + 1 - - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py deleted file mode 100644 index c8730c75..00000000 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ /dev/null @@ -1,138 +0,0 @@ -"""Map values in m columns in a columnar file into a new combinations in n columns.""" - -import pandas as pd -import numpy as np -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.analysis.key_map import KeyMap - - -class RemapColumnsOp(BaseOp): - """Map values in m columns in a columnar file into a new combinations in n columns. - - Required remodeling parameters: - - **source_columns** (*list*): The key columns to map (m key columns). - - **destination_columns** (*list*): The destination columns to have the mapped values (n destination columns). - - **map_list** (*list*): A list of lists with the mapping. - - **ignore_missing** (*bool*): If True, entries whose key column values are not in map_list are ignored. - - Optional remodeling parameters: - **integer_sources** (*list*): Source columns that should be treated as integers rather than strings. - - Notes: - Each list element list is of length m + n with the key columns followed by mapped columns. - - TODO: Allow wildcards - - """ - - NAME = "remap_columns" - - PARAMS = { - "type": "object", - "properties": { - "source_columns": { - "type": "array", - "description": "The columns whose values are combined to provide the remap keys.", - "items": {"type": "string"}, - "minItems": 1, - }, - "destination_columns": { - "type": "array", - "description": "The columns to insert new values based on a key lookup of the source columns.", - "items": {"type": "string"}, - "minItems": 1, - }, - "map_list": { - "type": "array", - "description": "An array of k lists each with m+n entries corresponding to the k unique keys.", - "items": {"type": "array", "items": {"type": ["string", "number"]}, "minItems": 1}, - "minItems": 1, - "uniqueItems": True, - }, - "ignore_missing": { - "type": "boolean", - "description": "If true, insert missing source columns in the result, filled with n/a, else error.", - }, - "integer_sources": { - "type": "array", - "description": "A list of source column names whose values are to be treated as integers.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - }, - "required": ["source_columns", "destination_columns", "map_list", "ignore_missing"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for the remap columns operation. - - Parameters: - parameters (dict): Parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.source_columns = parameters["source_columns"] - self.destination_columns = parameters["destination_columns"] - self.map_list = parameters["map_list"] - self.ignore_missing = parameters["ignore_missing"] - self.string_sources = self.source_columns - self.integer_sources = parameters.get("integer_sources", []) - self.string_sources = list(set(self.source_columns).difference(set(self.integer_sources))) - self.key_map = self._make_key_map() - - def _make_key_map(self): - """ - - Raises: - ValueError: If a column designated as an integer source does not have valid integers. - - """ - - key_df = pd.DataFrame(self.map_list, columns=self.source_columns + self.destination_columns) - key_map = KeyMap(self.source_columns, target_cols=self.destination_columns, name="remap") - key_map.update(key_df) - return key_map - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Remap new columns from combinations of others. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - Dataframe: A new dataframe after processing. - - :raises ValueError: - - If ignore_missing is False and source values from the data are not in the map. - - """ - df1 = df.copy() - df1[self.source_columns] = df1[self.source_columns].replace(np.nan, "n/a") - for column in self.integer_sources: - int_mask = df1[column] != "n/a" - df1.loc[int_mask, column] = df1.loc[int_mask, column].astype(int) - df1[self.source_columns] = df1[self.source_columns].astype(str) - df_new, missing = self.key_map.remap(df1) - if missing and not self.ignore_missing: - raise ValueError( - "MapSourceValueMissing", - f"{name}: Ignore missing is False, but source values [{missing}] are in data but not map", - ) - return df_new - - @staticmethod - def validate_input_data(parameters): - map_list = parameters["map_list"] - required_len = len(parameters["source_columns"]) + len(parameters["destination_columns"]) - for x in map_list: - if len(x) != required_len: - return [f"all map_list arrays must be of length {str(required_len)}."] - missing = set(parameters.get("integer_sources", [])) - set(parameters["source_columns"]) - if missing: - return [f"the integer_sources {str(missing)} are missing from source_columns."] - return [] diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py deleted file mode 100644 index 3547066a..00000000 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Remove columns from a columnar file.""" - -from __future__ import annotations - -import pandas as pd - -from hed.tools.remodeling.operations.base_op import BaseOp - - -class RemoveColumnsOp(BaseOp): - """Remove columns from a columnar file. - - Required remodeling parameters: - - **column_names** (*list*): The names of the columns to be removed. - - **ignore_missing** (*boolean*): If True, names in column_names that are not columns in df should be ignored. - - """ - - NAME = "remove_columns" - - PARAMS = { - "type": "object", - "properties": { - "column_names": {"type": "array", "items": {"type": "string"}, "minItems": 1, "uniqueItems": True}, - "ignore_missing": {"type": "boolean"}, - }, - "required": ["column_names", "ignore_missing"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for remove columns operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.column_names = parameters["column_names"] - ignore_missing = parameters["ignore_missing"] - if ignore_missing: - self.error_handling = "ignore" - else: - self.error_handling = "raise" - - def do_op(self, dispatcher, df, name, sidecar=None) -> "pd.DataFrame": - """Remove indicated columns from a dataframe. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - pd.DataFrame: A new dataframe after processing. - - :raises KeyError: - - If ignore_missing is False and a column not in the data is to be removed. - - """ - df_new = df.copy() - try: - return df_new.drop(self.column_names, axis=1, errors=self.error_handling) - except KeyError as e: - raise KeyError( - "MissingColumnCannotBeRemoved", - f"{name}: Ignore missing is False but a column in {str(self.column_names)} is " - f"not in the data columns [{str(df_new.columns)}]", - ) from e - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py deleted file mode 100644 index c2479a5d..00000000 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Remove rows from a columnar file based on column values.""" - -import pandas as pd -from hed.tools.remodeling.operations.base_op import BaseOp - - -class RemoveRowsOp(BaseOp): - """Remove rows from a columnar file based on the values in a specified row. - - Required remodeling parameters: - - **column_name** (*str*): The name of column to be tested. - - **remove_values** (*list*): The values to test for row removal. - - """ - - NAME = "remove_rows" - - PARAMS = { - "type": "object", - "properties": { - "column_name": {"type": "string", "description": "Name of the key column to determine which rows to remove."}, - "remove_values": { - "type": "array", - "description": "List of key values for rows to remove.", - "items": {"type": ["string", "number"]}, - "minItems": 1, - "uniqueItems": True, - }, - }, - "required": ["column_name", "remove_values"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for remove rows operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.column_name = parameters["column_name"] - self.remove_values = parameters["remove_values"] - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Remove rows with the values indicated in the column. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - Dataframe: A new dataframe after processing. - - """ - df_new = df.copy() - if self.column_name not in df_new.columns: - return df_new - for value in self.remove_values: - df_new = df_new.loc[df_new[self.column_name] != value, :] - df_new = df_new.reset_index(drop=True) - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py deleted file mode 100644 index d5f519ea..00000000 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Rename columns in a columnar file.""" - -from __future__ import annotations -import pandas as pd -from hed.tools.remodeling.operations.base_op import BaseOp - - -class RenameColumnsOp(BaseOp): - """Rename columns in a tabular file. - - Required remodeling parameters: - - **column_mapping** (*dict*): The names of the columns to be renamed with values to be remapped to. - - **ignore_missing** (*bool*): If true, the names in column_mapping that are not columns and should be ignored. - - """ - - NAME = "rename_columns" - - PARAMS = { - "type": "object", - "properties": { - "column_mapping": { - "type": "object", - "description": "Mapping between original column names and their respective new names.", - "patternProperties": {".*": {"type": "string"}}, - "minProperties": 1, - }, - "ignore_missing": { - "type": "boolean", - "description": "If true ignore column_mapping keys that don't correspond to columns, otherwise error.", - }, - }, - "required": ["column_mapping", "ignore_missing"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for rename columns operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters - - """ - super().__init__(parameters) - self.column_mapping = parameters["column_mapping"] - if parameters["ignore_missing"]: - self.error_handling = "ignore" - else: - self.error_handling = "raise" - - def do_op(self, dispatcher, df, name, sidecar=None) -> "pd.DataFrame": - """Rename columns as specified in column_mapping dictionary. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - pd.Dataframe: A new dataframe after processing. - - Raises: - KeyError: When ignore_missing is False and column_mapping has columns not in the data. - - """ - df_new = df.copy() - try: - return df_new.rename(columns=self.column_mapping, errors=self.error_handling) - except KeyError as e: - raise KeyError( - "MappedColumnsMissingFromData", - f"{name}: ignore_missing is False, mapping columns [{self.column_mapping}]" - f" but df columns are [{str(df.columns)}", - ) from e - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py deleted file mode 100644 index 93a25036..00000000 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ /dev/null @@ -1,89 +0,0 @@ -"""Reorder columns in a columnar file.""" - -import pandas as pd -from hed.tools.remodeling.operations.base_op import BaseOp - - -class ReorderColumnsOp(BaseOp): - """Reorder columns in a columnar file. - - Required parameters: - - column_order (*list*): The names of the columns to be reordered. - - ignore_missing (*bool*): If False and a column in column_order is not in df, skip the column. - - keep_others (*bool*): If True, columns not in column_order are placed at end. - - """ - - NAME = "reorder_columns" - - PARAMS = { - "type": "object", - "properties": { - "column_order": { - "type": "array", - "description": "A list of column names in the order you wish them to be.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "ignore_missing": { - "type": "boolean", - "description": "If true, ignore column_order columns that aren't in file, otherwise error.", - }, - "keep_others": { - "type": "boolean", - "description": "If true columns not in column_order are placed at end, otherwise ignored.", - }, - }, - "required": ["column_order", "ignore_missing", "keep_others"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for reorder columns operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.column_order = parameters["column_order"] - self.ignore_missing = parameters["ignore_missing"] - self.keep_others = parameters["keep_others"] - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Reorder columns as specified in event dictionary. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - Dataframe: A new dataframe after processing. - - Raises: - ValueError: When ignore_missing is false and column_order has columns not in the data. - - """ - df_new = df.copy() - current_columns = list(df_new.columns) - missing_columns = set(self.column_order).difference(set(df_new.columns)) - ordered = self.column_order - if missing_columns and not self.ignore_missing: - raise ValueError( - "MissingReorderedColumns", - f"{str(missing_columns)} are not in dataframe columns " f" [{str(df_new.columns)}] and not ignored.", - ) - elif missing_columns: - ordered = [elem for elem in self.column_order if elem not in list(missing_columns)] - if self.keep_others: - ordered += [elem for elem in current_columns if elem not in ordered] - df_new = df_new.loc[:, ordered] - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py deleted file mode 100644 index 14b8193e..00000000 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ /dev/null @@ -1,183 +0,0 @@ -"""Split rows in a columnar file with onset and duration columns into multiple rows based on a specified column.""" - -import numpy as np -import pandas as pd -from hed.tools.remodeling.operations.base_op import BaseOp - - -class SplitRowsOp(BaseOp): - """Split rows in a columnar file with onset and duration columns into multiple rows based on a specified column. - - Required remodeling parameters: - - **anchor_column** (*str*): The column in which the names of new items are stored. - - **new_events** (*dict*): Mapping of new values based on values in the original row. - - **remove_parent_row** (*bool*): If true, the original row that was split is removed. - - Notes: - - In specifying onset and duration for the new row, you can give values or the names of columns as strings. - - """ - - NAME = "split_rows" - - PARAMS = { - "type": "object", - "properties": { - "anchor_column": { - "type": "string", - "description": "The column containing the keys for the new rows. (Original rows will have own keys.)", - }, - "new_events": { - "type": "object", - "description": "A map describing how the rows for the new codes will be created.", - "patternProperties": { - ".*": { - "type": "object", - "properties": { - "onset_source": { - "type": "array", - "description": "List of items to add to compute the onset time of the new row.", - "items": {"type": ["string", "number"]}, - "minItems": 1, - }, - "duration": { - "type": "array", - "description": "List of items to add to compute the duration of the new row.", - "items": {"type": ["string", "number"]}, - "minItems": 1, - }, - "copy_columns": { - "type": "array", - "description": "List of columns whose values to copy for the new row.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - }, - "required": ["onset_source", "duration"], - "additionalProperties": False, - } - }, - "minProperties": 1, - }, - "remove_parent_row": { - "type": "boolean", - "description": "If true, the row from which these rows were split is removed, otherwise it stays.", - }, - }, - "required": ["anchor_column", "new_events", "remove_parent_row"], - "additionalProperties": False, - } - - def __init__(self, parameters): - """Constructor for the split rows operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.anchor_column = parameters["anchor_column"] - self.new_events = parameters["new_events"] - self.remove_parent_row = parameters["remove_parent_row"] - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Split a row representing a particular event into multiple rows. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - Dataframe: A new dataframe after processing. - - Raises: - TypeError: If bad onset or duration. - - """ - if "onset" not in df.columns: - raise ValueError("MissingOnsetColumn", f"{name}: Data must have an onset column for split_rows_op") - elif "duration" not in df.columns: - raise ValueError("MissingDurationColumn", f"{name}: Data must have an duration column for split_rows_op") - df_new = df.copy() - - if self.anchor_column not in df_new.columns: - df_new[self.anchor_column] = np.nan - if self.remove_parent_row: - df_list = [] - else: - df_list = [df_new] - self._split_rows(df, df_list) - df_ret = pd.concat(df_list, axis=0, ignore_index=True) - df_ret["onset"] = df_ret["onset"].apply(pd.to_numeric) - df_ret = df_ret.sort_values("onset").reset_index(drop=True) - return df_ret - - def _split_rows(self, df, df_list): - """Split the rows based on an anchor and different columns. - - Parameters: - df (DataFrame): The DataFrame to be split. - df_list (list): The list of split events and possibly the - - """ - for event, event_params in self.new_events.items(): - add_events = pd.DataFrame([], columns=df.columns) - add_events["onset"] = self._create_onsets(df, event_params["onset_source"]) - add_events[self.anchor_column] = event - self._add_durations(df, add_events, event_params["duration"]) - if len(event_params["copy_columns"]) > 0: - for column in event_params["copy_columns"]: - add_events[column] = df[column] - - # add_events['event_type'] = event - add_events = add_events.dropna(axis="rows", subset=["onset"]) - df_list.append(add_events) - - @staticmethod - def _add_durations(df, add_events, duration_sources): - add_events["duration"] = 0 - for duration in duration_sources: - if isinstance(duration, float) or isinstance(duration, int): - add_events["duration"] = add_events["duration"].add(duration) - elif isinstance(duration, str) and duration in list(df.columns): - add_events["duration"] = add_events["duration"].add(pd.to_numeric(df[duration], errors="coerce")) - else: - raise TypeError( - "BadDurationInModel", f"Remodeling duration {str(duration)} must either be numeric or a column name", "" - ) - - @staticmethod - def _create_onsets(df, onset_source): - """Create a vector of onsets for the new events. - - Parameters: - df (DataFrame): The dataframe to process. - onset_source (list): List of onsets of process. - - Returns: - list: list of same length as df with the onsets. - - :raises HedFileError: - - If one of the onset specifiers is invalid. - - """ - - onsets = pd.to_numeric(df["onset"], errors="coerce") - for onset in onset_source: - if isinstance(onset, float) or isinstance(onset, int): - onsets = onsets + onset - elif isinstance(onset, str) and onset in list(df.columns): - onsets = onsets.add(pd.to_numeric(df[onset], errors="coerce")) - else: - raise TypeError( - "BadOnsetInModel", f"Remodeling onset {str(onset)} must either be numeric or a column name.", "" - ) - return onsets - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py deleted file mode 100644 index a8300c3e..00000000 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ /dev/null @@ -1,185 +0,0 @@ -"""Summarize the column names in a collection of tabular files.""" - -import pandas as pd -from hed.tools.analysis.column_name_summary import ColumnNameSummary -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_summary import BaseSummary - - -class SummarizeColumnNamesOp(BaseOp): - """Summarize the column names in a collection of tabular files. - - Required remodeling parameters: - - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - Optional remodeling parameters: - - **append_timecode** (*bool*): If False (default), the timecode is not appended to the summary filename. - - The purpose is to check that all the tabular files have the same columns in same order. - - """ - - NAME = "summarize_column_names" - - PARAMS = { - "type": "object", - "properties": { - "summary_name": {"type": "string", "description": "Name to use for the summary in titles."}, - "summary_filename": {"type": "string", "description": "Name to use for the summary file name base."}, - "append_timecode": { - "type": "boolean", - "description": "If true, the timecode is appended to the base filename so each run has a unique name.", - }, - }, - "required": ["summary_name", "summary_filename"], - "additionalProperties": False, - } - - SUMMARY_TYPE = "column_names" - - def __init__(self, parameters): - """Constructor for summarize column names operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.summary_name = parameters["summary_name"] - self.summary_filename = parameters["summary_filename"] - self.append_timecode = parameters.get("append_timecode", False) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Create a column name summary for df. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - DataFrame: A copy of df. - - Side effect: - Updates the relevant summary. - - """ - df_new = df.copy() - summary = dispatcher.summary_dicts.get(self.summary_name, None) - if not summary: - summary = ColumnNamesSummary(self) - dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({"name": name, "column_names": list(df_new.columns)}) - return df_new - - @staticmethod - def validate_input_data(parameters) -> list: - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] - - -class ColumnNamesSummary(BaseSummary): - """Manager for summaries of column names for a dataset.""" - - def __init__(self, sum_op): - """Constructor for column name summary manager. - - Parameters: - sum_op (SummarizeColumnNamesOp): Operation associated with this summary. - - """ - super().__init__(sum_op) - - def update_summary(self, new_info): - """Update the summary for a given tabular input file. - - Parameters: - new_info (dict): A dictionary with the parameters needed to update a summary. - - Notes: - - The summary information is kept in separate ColumnNameSummary objects for each file. - - The summary needs a "name" str and a "column_names" list. - - The summary uses ColumnNameSummary as the summary object. - """ - name = new_info["name"] - if name not in self.summary_dict: - self.summary_dict[name] = ColumnNameSummary(name=name) - self.summary_dict[name].update(name, new_info["column_names"]) - - def get_details_dict(self, column_summary) -> dict: - """Return the summary dictionary extracted from a ColumnNameSummary. - - Parameters: - column_summary (ColumnNameSummary): A column name summary for the data file. - - Returns: - dict - a dictionary with the summary information for column names. - - """ - summary = column_summary.get_summary() - return { - "Name": summary["Summary name"], - "Total events": "n/a", - "Total files": summary["Number files"], - "Files": list(column_summary.file_dict.keys()), - "Specifics": {"Columns": summary["Columns"]}, - } - - def merge_all_info(self) -> "ColumnNameSummary": - """Create a ColumnNameSummary containing the overall dataset summary. - - Returns: - ColumnNameSummary - the overall summary object for column names. - - """ - all_sum = ColumnNameSummary(name="Dataset") - for _key, counts in self.summary_dict.items(): - for name, pos in counts.file_dict.items(): - all_sum.update(name, counts.unique_headers[pos]) - return all_sum - - def _get_result_string(self, name, summary, individual=False) -> str: - """Return a formatted string with the summary for the indicated name. - - Parameters: - name (str): Identifier (usually the filename) of the individual file. - summary (dict): The dictionary of the summary results indexed by name. - individual (bool): True if individual summary, False otherwise. - - Returns: - str - The results in a printable format ready to be saved to a text file. - - Notes: - This calls _get_dataset_string to get the overall summary string. - - """ - if name == "Dataset": - return self._get_dataset_string(summary, BaseSummary.DISPLAY_INDENT) - columns = summary.get("Specifics", {}).get("Columns", []) - if columns: - return f"{BaseSummary.DISPLAY_INDENT}{str(columns[0])}" - else: - return "" - - @staticmethod - def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a string with the overall summary for all the tabular files. - - Parameters: - result (dict): Dictionary of merged summary information. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - sum_list = [f"Dataset: Number of files={result.get('Total files', 0)}"] - specifics = result.get("Specifics", {}) - columns = specifics.get("Columns", {}) - for element in columns: - sum_list.append(f"{indent}Columns: {str(element['Column names'])}") - for file in element.get("Files", []): - sum_list.append(f"{indent}{indent}{file}") - return "\n".join(sum_list) diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py deleted file mode 100644 index 5ffcfe47..00000000 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ /dev/null @@ -1,359 +0,0 @@ -"""Summarize the values in the columns of a columnar file.""" - -import pandas as pd -from hed.tools.analysis.tabular_summary import TabularSummary -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_summary import BaseSummary - - -class SummarizeColumnValuesOp(BaseOp): - """Summarize the values in the columns of a columnar file. - - Required remodeling parameters: - - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - Optional remodeling parameters: - - **append_timecode** (*bool*): (**Optional**: Default False) If True append timecodes to the summary filename. - - **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column. - - **skip_columns** (*list*): Names of columns to skip in the summary. - - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. - - **values_per_line** (*int*): The number of values output per line in the summary. - - The purpose is to produce a summary of the values in a tabular file. - - """ - - NAME = "summarize_column_values" - - PARAMS = { - "type": "object", - "properties": { - "summary_name": {"type": "string", "description": "Name to use for the summary in titles."}, - "summary_filename": {"type": "string", "description": "Name to use for the summary file name base."}, - "append_timecode": { - "type": "boolean", - "description": "If true, the timecode is appended to the base filename so each run has a unique name.", - }, - "max_categorical": { - "type": "integer", - "description": "Maximum number of unique column values to show in text description.", - }, - "skip_columns": { - "type": "array", - "description": "List of columns to skip when creating the summary.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "value_columns": { - "type": "array", - "description": "Columns to be annotated with a single HED annotation and placeholder.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "values_per_line": {"type": "integer", "description": "Number of items per line to display in the text file."}, - }, - "required": ["summary_name", "summary_filename"], - "additionalProperties": False, - } - - SUMMARY_TYPE = "column_values" - VALUES_PER_LINE = 5 - MAX_CATEGORICAL = 50 - - def __init__(self, parameters): - """Constructor for the summarize column values operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - - super().__init__(parameters) - self.summary_name = parameters["summary_name"] - self.summary_filename = parameters["summary_filename"] - self.append_timecode = parameters.get("append_timecode", False) - self.max_categorical = parameters.get("max_categorical", float("inf")) - self.skip_columns = parameters.get("skip_columns", []) - self.value_columns = parameters.get("value_columns", []) - self.values_per_line = parameters.get("values_per_line", self.VALUES_PER_LINE) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Create a summary of the column values in df. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - DataFrame: A copy of df. - - Side effect: - Updates the relevant summary. - - """ - - df_new = df.copy() - summary = dispatcher.summary_dicts.get(self.summary_name, None) - if not summary: - summary = ColumnValueSummary(self) - dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({"df": dispatcher.post_proc_data(df_new), "name": name}) - return df_new - - @staticmethod - def validate_input_data(parameters) -> list: - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] - - -class ColumnValueSummary(BaseSummary): - """Manager for summaries of column contents for columnar files.""" - - def __init__(self, sum_op): - """Constructor for column value summary manager. - - Parameters: - sum_op (SummarizeColumnValuesOp): Operation associated with this summary. - - """ - super().__init__(sum_op) - - def update_summary(self, new_info): - """Update the summary for a given tabular input file. - - Parameters: - new_info (dict): A dictionary with the parameters needed to update a summary. - - Notes: - - The summary information is kept in separate TabularSummary objects for each file. - - The summary needs a "name" str and a "df" . - - """ - name = new_info["name"] - if name not in self.summary_dict: - self.summary_dict[name] = TabularSummary( - value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name=name - ) - self.summary_dict[name].update(new_info["df"]) - - def get_details_dict(self, tabular_summary) -> dict: - """Return a dictionary with the summary contained in a TabularSummary. - - Parameters: - tabular_summary (TabularSummary): The tabular summary object. - - Returns: - dict: Dictionary with the information suitable for extracting printout. - - """ - this_summary = tabular_summary.get_summary(as_json=False) - unique_counts = [(key, len(count_dict)) for key, count_dict in this_summary["Categorical columns"].items()] - this_summary["Categorical counts"] = dict(unique_counts) - for key, dict_entry in this_summary["Categorical columns"].items(): - num_disp, sorted_tuples = ColumnValueSummary.sort_dict(dict_entry, reverse=True) - this_summary["Categorical columns"][key] = dict(sorted_tuples[: min(num_disp, self.op.max_categorical)]) - return { - "Name": this_summary["Name"], - "Total events": this_summary["Total events"], - "Total files": this_summary["Total files"], - "Files": list(this_summary["Files"].keys()), - "Specifics": { - "Value columns": list(this_summary["Value columns"]), - "Skip columns": this_summary["Skip columns"], - "Value column summaries": this_summary["Value columns"], - "Categorical column summaries": this_summary["Categorical columns"], - "Categorical counts": this_summary["Categorical counts"], - }, - } - - def merge_all_info(self) -> "TabularSummary": - """Create a TabularSummary containing the overall dataset summary. - - Returns: - TabularSummary - the summary object for column values. - - """ - all_sum = TabularSummary(value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name="Dataset") - for counts in self.summary_dict.values(): - all_sum.update_summary(counts) - return all_sum - - def _get_result_string(self, name, summary, individual=False) -> str: - """Return a formatted string with the summary for the indicated name. - - Parameters: - name (str): Identifier (usually the filename) of the individual file. - summary (dict): The dictionary of the summary results indexed by name. - individual (bool): Whether this is for an individual file summary. - - Returns: - str: The results in a printable format ready to be saved to a text file. - - Notes: - This calls _get_dataset_string to get the overall summary string and - _get_individual_string to get an individual summary string. - - """ - - if name == "Dataset": - sum_list = [ - f"Dataset: Total events={summary.get('Total events', 0)} " f"Total files={summary.get('Total files', 0)}" - ] - else: - sum_list = [f"Total events={summary.get('Total events', 0)}"] - sum_list = sum_list + self._get_detail_list(summary, indent=BaseSummary.DISPLAY_INDENT) - return "\n".join(sum_list) - - def _get_individual_string(self, result, indent=BaseSummary.DISPLAY_INDENT) -> str: - """Return a formatted string with the summary for an individual file. - - Parameters: - result (dict): The dictionary of the summary results indexed by name. - indent (str): A string containing spaces used for indentation (usually 3 spaces). - - Returns: - str: The results in a printable format ready to be saved to a text file. - - Notes: - This calls _get_categorical_string to get the categorical part of the summary, - and _get_value_string to get the value column part of the summary. - - """ - - return "\n".join( - [ - f"Summary for {result['Name']}", - f" Total events: {result.get('Total events', 0)}", - f" Total files: {result.get('Total files', 0)}", - f" Value columns: {result['Specifics']['Value columns']}", - f" Skip columns: {result['Specifics']['Skip columns']}", - self._get_categorical_string(result), - self._get_value_string(result["Specifics"]["Value column summaries"]), - ] - ) - - def _format_categorical_lists(self, specifics) -> list: - """Format the categorical column summaries for display. - - Parameters: - specifics (dict): The specifics dictionary from the summary. - - Returns: - list: A list of formatted strings for the categorical column summaries. - - """ - cat_dict = specifics.get("Categorical column summaries", {}) - if not cat_dict: - return [] - count_dict = specifics["Categorical counts"] - formatted_list = ["Categorical column values[Events, Files]:"] - sorted_tuples = sorted(cat_dict.items(), key=lambda x: x[0]) - for entry in sorted_tuples: - formatted_list = formatted_list + self._get_categorical_col(entry, count_dict, offset="", indent=" ") - return formatted_list - - def _get_categorical_string(self, summary, offset="", indent=" "): - """Return a string with the summary for a particular categorical dictionary. - - Parameters: - summary (dict): Dictionary of summary information for a particular tabular file. - offset (str): String of blanks used as offset for every item - indent (str): String of blanks used as the additional amount to indent an item's for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - specifics = summary.get("Specifics", {}) - cat_dict = specifics.get("Categorical column summaries", {}) - if not cat_dict: - return "" - count_dict = specifics.get("Categorical counts", {}) - sum_list = [f"{offset}{indent}Categorical column values[Events, Files]:"] - sorted_tuples = sorted(cat_dict.items(), key=lambda x: x[0]) - for entry in sorted_tuples: - sum_list = sum_list + self._get_categorical_col(entry, count_dict, offset="", indent=" ") - return "\n".join(sum_list) - - def _get_detail_list(self, result, indent=BaseSummary.DISPLAY_INDENT): - """Return a list of strings with the details - - Parameters: - result (dict): Dictionary of merged summary information. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - list: list of formatted strings suitable for saving in a file or printing. - - """ - sum_list = [] - specifics = result["Specifics"] - cat_string = self._get_categorical_string(specifics, offset="", indent=indent) - if cat_string: - sum_list.append(cat_string) - val_dict = specifics.get("Value column summaries", {}) - if val_dict: - sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset="", indent=indent)) - return sum_list - - def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): - """Return a string with the summary for a particular categorical column. - - Parameters: - entry(tuple): (Name of the column, summary dict for that column) - count_dict (dict): Count of the total number of unique values indexed by the name - offset(str): String of blanks used as offset for all items - indent (str): String of blanks used as the additional amount to indent for this item's readability. - - Returns: - list: Formatted strings, each corresponding to a line in the output. - """ - num_unique = count_dict[entry[0]] - num_disp = min(self.op.max_categorical, num_unique) - col_list = [f"{offset}{indent * 2}{entry[0]}: {num_unique} unique values " f"(displaying top {num_disp} values)"] - # Create and partition the list of individual entries - value_list = [f"{item[0]}{str(item[1])}" for item in entry[1].items()] - value_list = value_list[:num_disp] - part_list = ColumnValueSummary.partition_list(value_list, self.op.values_per_line) - return col_list + [f"{offset}{indent * 3}{ColumnValueSummary.get_list_str(item)}" for item in part_list] - - @staticmethod - def get_list_str(lst) -> str: - """Return a str version of a list with items separated by a blank. - - Returns: - str: String version of list. - - """ - return f"{' '.join(str(item) for item in lst)}" - - @staticmethod - def partition_list(lst, n) -> list: - """Partition a list into lists of n items. - - Parameters: - lst (list): List to be partitioned. - n (int): Number of items in each sublist. - - Returns: - list: list of lists of n elements, the last might have fewer. - - """ - return [lst[i : i + n] for i in range(0, len(lst), n)] - - @staticmethod - def _get_value_string(val_dict, offset="", indent="") -> str: - sum_list = [f"{offset}{indent}Value columns[Events, Files]:"] - for col_name, val_counts in val_dict.items(): - sum_list.append(f"{offset}{indent*2}{col_name}{str(val_counts)}") - return "\n".join(sum_list) - - @staticmethod - def sort_dict(count_dict, reverse=False): - sorted_tuples = sorted(count_dict.items(), key=lambda x: x[1][0], reverse=reverse) - return len(sorted_tuples), sorted_tuples diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py deleted file mode 100644 index 3a5be12c..00000000 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ /dev/null @@ -1,257 +0,0 @@ -"""Summarize the type_defs in the dataset.""" - -import pandas as pd -from hed.models.tabular_input import TabularInput -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_summary import BaseSummary -from hed.models.def_expand_gather import DefExpandGatherer - - -class SummarizeDefinitionsOp(BaseOp): - """Summarize the definitions used in the dataset based on Def and Def-expand. - - Required remodeling parameters: - - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - Optional remodeling parameters: - - **append_timecode** (*bool*): If False (default), the timecode is not appended to the summary filename. - - The purpose is to produce a summary of the definitions used in a dataset. - - """ - - NAME = "summarize_definitions" - - PARAMS = { - "type": "object", - "properties": { - "summary_name": {"type": "string", "description": "Name to use for the summary in titles."}, - "summary_filename": {"type": "string", "description": "Name to use for the summary file name base."}, - "append_timecode": { - "type": "boolean", - "description": "If true, the timecode is appended to the base filename so each run has a unique name.", - }, - }, - "required": ["summary_name", "summary_filename"], - "additionalProperties": False, - } - - SUMMARY_TYPE = "type_defs" - - def __init__(self, parameters): - """Constructor for the summary of definitions used in the dataset. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.summary_name = parameters["summary_name"] - self.summary_filename = parameters["summary_filename"] - self.append_timecode = parameters.get("append_timecode", False) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Create summaries of definitions. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Only needed for HED operations. - - Returns: - DataFrame: a copy of df - - Side effect: - Updates the relevant summary. - - """ - df_new = df.copy() - summary = dispatcher.summary_dicts.setdefault(self.summary_name, DefinitionSummary(self, dispatcher.hed_schema)) - summary.update_summary( - {"df": dispatcher.post_proc_data(df_new), "name": name, "sidecar": sidecar, "schema": dispatcher.hed_schema} - ) - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] - - -class DefinitionSummary(BaseSummary): - """Manager for summaries of the definitions used in a dataset.""" - - def __init__(self, sum_op, hed_schema, known_defs=None): - """Constructor for the summary of definitions. - - Parameters: - sum_op (SummarizeDefinitionsOp): Summary operation class for gathering definitions. - hed_schema (HedSchema or HedSchemaGroup): Schema used for the dataset. - known_defs (str or list or DefinitionDict): Definitions already known to be used. - - - """ - super().__init__(sum_op) - self.def_gatherer = DefExpandGatherer(hed_schema, known_defs=known_defs) - - def update_summary(self, new_info): - """Update the summary for a given tabular input file. - - Parameters: - new_info (dict): A dictionary with the parameters needed to update a summary. - - Notes: - - The summary needs a "name" str, a "schema" and a "Sidecar". - - """ - data_input = TabularInput(new_info["df"], sidecar=new_info["sidecar"], name=new_info["name"]) - series, def_dict = data_input.series_a, data_input.get_def_dict(new_info["schema"]) - self.def_gatherer.process_def_expands(series, def_dict) - - @staticmethod - def _build_summary_dict(items_dict, title, process_func, display_description=False): - summary_dict = {} - items = {} - for key, value in items_dict.items(): - if process_func: - value = process_func(value) - if "#" in str(value): - key = key + "/#" - if display_description: - description, value = DefinitionSummary._remove_description(value) - items[key] = {"description": description, "contents": str(value)} - elif isinstance(value, list): - items[key] = [str(x) for x in value] - else: - items[key] = str(value) - summary_dict[title] = items - return summary_dict - - def get_details_dict(self, def_summary) -> dict: - """Return the summary-specific information in a dictionary. - - Parameters: - def_summary (DefExpandGatherer): Contains the resolved dictionaries. - - Returns: - dict: dictionary with the summary results. - - """ - known_defs_summary = self._build_summary_dict( - def_summary.def_dict, "Known Definitions", None, display_description=True - ) - # ambiguous_defs_summary = self._build_summary_dict(def_gatherer.ambiguous_defs, "Ambiguous Definitions", - # def_gatherer.get_ambiguous_group) - # ambiguous_defs_summary = {} - # TODO: Summary of ambiguous definitions is not implemented - errors_summary = self._build_summary_dict(def_summary.errors, "Errors", None) - - known_defs_summary.update(errors_summary) - return {"Name": "", "Total events": 0, "Total files": 0, "Files": [], "Specifics": known_defs_summary} - # return known_defs_summary - - def merge_all_info(self) -> object: - """Create an Object containing the definition summary. - - Returns: - Object: The overall summary object for type_defs. - - """ - return self.def_gatherer - - def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): - """Return a formatted string with the summary for the indicated name. - - Parameters: - name (str): Identifier (usually the filename) of the individual file. - result (dict): The dictionary of the summary results indexed by name. - indent (str): A string containing spaces used for indentation (usually 3 spaces). - - Returns: - str: The results in a printable format ready to be saved to a text file. - - Notes: - This calls _get_dataset_string to get the overall summary string and - _get_individual_string to get an individual summary string. - - """ - if name == "Dataset": - return self._get_dataset_string(result, indent=indent) - return self._get_individual_string(result, indent=indent) - - @staticmethod - def _nested_dict_to_string(data, indent, level=1): - """Return string summary of definitions used by recursively traversing the summary info. - - Parameters: - data (dict): Dictionary containing information. - indent (str): Spaces to indent the nested results. - level (int): (Default 1): Level indicator for recursive calls. - - """ - result = [] - for key, value in data.items(): - if isinstance(value, dict): - result.append(f"{indent * level}{key}: {len(value)} items") - result.append(DefinitionSummary._nested_dict_to_string(value, indent, level + 1)) - elif isinstance(value, list): - result.append(f"{indent * level}{key}:") - for item in value: - result.append(f"{indent * (level + 1)}{item}") - else: - result.append(f"{indent * level}{key}: {value}") - return "\n".join(result) - - @staticmethod - def _get_dataset_string(summary_dict, indent=BaseSummary.DISPLAY_INDENT): - """Return the string representing the summary of the definitions across the dataset. - - Parameters: - summary_dict (dict): Contains the merged summary information. - indent (str): Spaces to indent successively levels. - - Returns: - str: String summary of the definitions used in the dataset. - - """ - return DefinitionSummary._nested_dict_to_string(summary_dict, indent) - - @staticmethod - def _remove_description(def_entry): - """Remove description from a definition entry. - - Parameters: - def_entry (DefinitionEntry): Definition entry from which to remove its definition. - - Returns: - tuple[str, DefinitionEntry]: - - Description string. - - DefinitionEntry after description has been removed. - - - """ - def_group = def_entry.contents.copy() - description = "" - desc_tag = def_group.find_tags({"description"}, include_groups=False) - if desc_tag: - def_group.remove(desc_tag) - desc_tag = desc_tag[0] - description = desc_tag.extension - - return description, def_group - - @staticmethod - def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a string with the summary for an individual tabular file. - - Parameters: - result (dict): Dictionary of summary information for a particular tabular file. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - return "" diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py deleted file mode 100644 index ff390803..00000000 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ /dev/null @@ -1,448 +0,0 @@ -"""Summarize the HED tags in collection of tabular files.""" - -import os -import numpy as np -import pandas as pd -from hed.models.tabular_input import TabularInput -from hed.tools.analysis.hed_tag_counts import HedTagCounts -from hed.tools.analysis.event_manager import EventManager -from hed.tools.analysis.hed_tag_manager import HedTagManager -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_summary import BaseSummary -from hed.tools.visualization import tag_word_cloud - - -class SummarizeHedTagsOp(BaseOp): - """Summarize the HED tags in collection of tabular files. - - Required remodeling parameters: - - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - **tags** (*dict*): Specifies how to organize the tag output. - - Optional remodeling parameters: - - **append_timecode** (*bool*): If True, the timecode is appended to the base filename when summary is saved. - - **include_context** (*bool*): If True, context of events is included in summary. - - **remove_types** (*list*): A list of type tags such as Condition-variable or Task to exclude from summary. - - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. - - **word_cloud** (*bool*): If True, output a word cloud visualization. - - The purpose of this op is to produce a summary of the occurrences of HED tags organized in a specified manner. - - Notes: The tags template is a dictionary whose keys are the organization titles (not necessarily tags) for the - output and whose values are the tags, which if they or their children appear, they will be listed under that - title. - - """ - - NAME = "summarize_hed_tags" - - PARAMS = { - "type": "object", - "properties": { - "summary_name": {"type": "string", "description": "Name to use for the summary in titles."}, - "summary_filename": {"type": "string", "description": "Name to use for the summary file name base."}, - "tags": { - "type": "object", - "description": "A dictionary with the template for how output of tags should be organized.", - "patternProperties": { - ".*": {"type": "array", "items": {"type": "string"}, "minItems": 1, "uniqueItems": True}, - "minProperties": 1, - "additionalProperties": False, - }, - }, - "append_timecode": { - "type": "boolean", - "description": "If true, the timecode is appended to the base filename so each run has a unique name.", - }, - "include_context": { - "type": "boolean", - "description": "If true, tags for events that unfold over time are counted at each intermediate time.", - }, - "remove_types": { - "type": "array", - "description": "A list of special tags such as Condition-variable whose influence is to be removed.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "replace_defs": { - "type": "boolean", - "description": "If true, then the Def tags are replaced with actual definitions for the count.", - }, - "word_cloud": { - "type": "object", - "properties": { - "height": {"type": "integer", "description": "Height of word cloud image in pixels."}, - "width": {"type": "integer", "description": "Width of word cloud image in pixels."}, - "prefer_horizontal": { - "type": "number", - "description": "Fraction of the words that are oriented horizontally.", - }, - "min_font_size": { - "type": "number", - "description": "Minimum font size in points for the word cloud words.", - }, - "max_font_size": {"type": "number", "description": "Maximum font size in point for the word cloud words."}, - "set_font": { - "type": "boolean", - "description": "If true, set the font to a system font (provided by font_path).", - }, - "font_path": { - "type": "string", - "description": "Path to system font to use for word cloud display (system-specific).", - }, - "scale_adjustment": { - "type": "number", - "description": "Constant to add to log-transformed frequencies of the words to get scale.", - }, - "contour_width": {"type": "number", "description": "Width in pixels of contour surrounding the words."}, - "contour_color": { - "type": "string", - "description": "Name of the contour color (uses MatPlotLib names for colors).", - }, - "background_color": { - "type": "string", - "description": "Name of the background color (uses MatPlotLib names for colors).", - }, - "use_mask": { - "type": "boolean", - "description": "If true then confine the word display to region within the provided mask.", - }, - "mask_path": {"type": "string", "description": "Path of the mask image used to surround the words."}, - }, - "additionalProperties": False, - }, - }, - "required": ["summary_name", "summary_filename", "tags"], - "additionalProperties": False, - } - - SUMMARY_TYPE = "hed_tag_summary" - - def __init__(self, parameters): - """Constructor for the summarize_hed_tags operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.summary_name = parameters["summary_name"] - self.summary_filename = parameters["summary_filename"] - self.tags = parameters["tags"] - self.append_timecode = parameters.get("append_timecode", False) - self.include_context = parameters.get("include_context", True) - self.replace_defs = parameters.get("replace_defs", True) - self.remove_types = parameters.get("remove_types", []) - if "word_cloud" not in parameters: - self.word_cloud = None - else: - wc_params = parameters["word_cloud"] - self.word_cloud = { - "height": wc_params.get("height", 300), - "width": wc_params.get("width", 400), - "prefer_horizontal": wc_params.get("prefer_horizontal", 0.75), - "min_font_size": wc_params.get("min_font_size", 8), - "max_font_size": wc_params.get("max_font_size", 15), - "font_path": wc_params.get("font_path", None), - "scale_adjustment": wc_params.get("scale_adjustment", 7), - "contour_width": wc_params.get("contour_width", 3), - "contour_color": wc_params.get("contour_color", "black"), - "background_color": wc_params.get("background_color", None), - "use_mask": wc_params.get("use_mask", False), - "mask_path": wc_params.get("mask_path", None), - } - if self.word_cloud["use_mask"] and not self.word_cloud["mask_path"]: - self.word_cloud["mask_path"] = os.path.realpath( - os.path.join(os.path.dirname(__file__), "../../../resources/word_cloud_brain_mask.png") - ) - if self.word_cloud["font_path"]: - self.word_cloud["font_path"] = os.path.realpath(self.word_cloud["font_path"]) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Summarize the HED tags present in the dataset. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Only needed for HED operations. - - Returns: - DataFrame: A copy of df. - - Side effect: - Updates the context. - - """ - df_new = df.copy() - summary = dispatcher.summary_dicts.get(self.summary_name, None) - if not summary: - summary = HedTagSummary(self) - dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary( - {"df": dispatcher.post_proc_data(df_new), "name": name, "schema": dispatcher.hed_schema, "sidecar": sidecar} - ) - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] - - -class HedTagSummary(BaseSummary): - """Manager of the HED tag summaries.""" - - def __init__(self, sum_op): - """Constructor for HED tag summary manager. - - Parameters: - sum_op (SummarizeHedTagsOp): Operation associated with this summary. - - """ - - super().__init__(sum_op) - self.sum_op = sum_op - - def update_summary(self, new_info): - """Update the summary for a given tabular input file. - - Parameters: - new_info (dict): A dictionary with the parameters needed to update a summary. - - Notes: - - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". - - """ - counts = HedTagCounts(new_info["name"], total_events=len(new_info["df"])) - input_data = TabularInput(new_info["df"], sidecar=new_info["sidecar"], name=new_info["name"]) - tag_man = HedTagManager(EventManager(input_data, new_info["schema"]), remove_types=self.sum_op.remove_types) - hed_objs = tag_man.get_hed_objs(include_context=self.sum_op.include_context, replace_defs=self.sum_op.replace_defs) - for hed in hed_objs: - counts.update_tag_counts(hed, new_info["name"]) - self.summary_dict[new_info["name"]] = counts - - def get_details_dict(self, tag_counts) -> dict: - """Return the summary-specific information in a dictionary. - - Parameters: - tag_counts (HedTagCounts): Contains the counts of tags in the dataset. - - Returns: - dict: dictionary with the summary results. - - """ - template, unmatched = tag_counts.organize_tags(self.sum_op.tags) - details = {} - for key, key_list in self.sum_op.tags.items(): - details[key] = self._get_details(key_list, template, verbose=True) - leftovers = [value.get_info(verbose=True) for value in unmatched] - return { - "Name": tag_counts.name, - "Total events": tag_counts.total_events, - "Total files": len(tag_counts.files.keys()), - "Files": list(tag_counts.files.keys()), - "Specifics": {"Main tags": details, "Other tags": leftovers}, - } - - def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): - """Return a formatted string with the summary for the indicated name. - - Parameters: - name (str): Identifier (usually the filename) of the individual file. - result (dict): The dictionary of the summary results indexed by name. - indent (str): A string containing spaces used for indentation (usually 3 spaces). - - Returns: - str: The results in a printable format ready to be saved to a text file. - - Notes: - This calls _get_dataset_string to get the overall summary string and - _get_individual_string to get an individual summary string. - - """ - if name == "Dataset": - return self._get_dataset_string(result, indent=indent) - return self._get_individual_string(result, indent=indent) - - def merge_all_info(self) -> "HedTagCounts": - """Create a HedTagCounts containing the overall dataset HED tag summary. - - Returns: - HedTagCounts: The overall dataset summary object for HED tag counts. - - """ - - all_counts = HedTagCounts("Dataset") - for _key, counts in self.summary_dict.items(): - all_counts.merge_tag_dicts(counts.tag_dict) - for file_name in counts.files.keys(): - all_counts.files[file_name] = "" - all_counts.total_events = all_counts.total_events + counts.total_events - return all_counts - - def save_visualizations(self, save_dir, file_formats=None, individual_summaries="separate", task_name=""): - """Save the summary visualizations if any. - - Parameters: - save_dir (str): Path to directory in which visualizations should be saved. - file_formats (list or None): List of file formats to use in saving. If None, defaults to ['.svg']. - individual_summaries (str): One of "consolidated", "separate", or "none" indicating what to save. - task_name (str): Name of task if segregated by task. - - """ - if file_formats is None: - file_formats = [".svg"] - if not self.sum_op.word_cloud: - return - else: - wc = self.sum_op.word_cloud - # summary = self.get_summary(individual_summaries='none') - summary = self.get_summary(individual_summaries="none") - overall_summary = summary.get("Dataset", {}) - overall_summary = overall_summary.get("Overall summary", {}) - specifics = overall_summary.get("Specifics", {}) - word_dict = self.summary_to_dict(specifics, scale_adjustment=wc["scale_adjustment"]) - - tag_wc = tag_word_cloud.create_wordcloud( - word_dict, - mask_path=wc["mask_path"], - width=wc["width"], - height=wc["height"], - prefer_horizontal=wc["prefer_horizontal"], - background_color=wc["background_color"], - min_font_size=wc["min_font_size"], - max_font_size=wc["max_font_size"], - contour_width=wc["contour_width"], - contour_color=wc["contour_color"], - font_path=wc["font_path"], - ) - svg_data = tag_word_cloud.word_cloud_to_svg(tag_wc) - cloud_filename = os.path.realpath( - os.path.join(save_dir, self.sum_op.summary_name, self.sum_op.summary_name + "_word_cloud.svg") - ) - with open(cloud_filename, "w") as outfile: - outfile.writelines(svg_data) - - @staticmethod - def summary_to_dict(specifics, transform=np.log10, scale_adjustment=7) -> dict: - """Convert a HedTagSummary json specifics dict into the word cloud input format. - - Parameters: - specifics (dict): Dictionary with keys "Main tags" and "Other tags". - transform (func): The function to transform the number of found tags. - Default log10 - scale_adjustment (int): Value added after transform. - - Returns: - dict: A dict of the words and their occurrence count. - - Raises: - KeyError: A malformed dictionary was passed. - - """ - if transform is None: - - def transform(x): - return x - - word_dict = {} - tag_dict = specifics.get("Main tags", {}) - for tag, tag_sub_list in tag_dict.items(): - if tag == "Exclude tags": - continue - for tag_sub_dict in tag_sub_list: - word_dict[tag_sub_dict["tag"]] = transform(tag_sub_dict["events"]) + scale_adjustment - other_dict = specifics.get("Other tags", []) - for tag_sub_list in other_dict: - word_dict[tag_sub_list["tag"]] = transform(tag_sub_list["events"]) + scale_adjustment - return word_dict - - @staticmethod - def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a string with the overall summary for all the tabular files. - - Parameters: - result (dict): Dictionary of merged summary information. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - sum_list = [f"Dataset: Total events={result.get('Total events', 0)} " f"Total files={len(result.get('Files', []))}"] - sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) - return "\n".join(sum_list) - - @staticmethod - def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a string with the summary for an individual tabular file. - - Parameters: - result (dict): Dictionary of summary information for a particular tabular file. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - sum_list = [f"Total events={result.get('Total events', 0)}"] - sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) - return "\n".join(sum_list) - - @staticmethod - def _tag_details(tags): - """Return a list of strings with the tag details. - - Parameters: - tags (list): List of tags to summarize. - - Returns: - list: Each entry has the summary details for a tag. - - """ - tag_list = [] - for tag in tags: - tag_list.append(f"{tag['tag']}[{tag['events']},{len(tag['files'])}]") - return tag_list - - @staticmethod - def _get_tag_list(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a list lines to be output to summarize the tags as organized in the result. - - Parameters: - result (dict): Dictionary with the results organized under key "Specifics". - indent (str): Spaces to indent each line. - - Returns: - list: Each entry is a string representing a line to be printed. - - """ - tag_info = result["Specifics"] - sum_list = [f"\n{indent}Main tags[events,files]:"] - for category, tags in tag_info["Main tags"].items(): - sum_list.append(f"{indent}{indent}{category}:") - if tags: - sum_list.append(f"{indent}{indent}{indent}{' '.join(HedTagSummary._tag_details(tags))}") - if tag_info["Other tags"]: - sum_list.append(f"{indent}Other tags[events,files]:") - sum_list.append(f"{indent}{indent}{' '.join(HedTagSummary._tag_details(tag_info['Other tags']))}") - return sum_list - - @staticmethod - def _get_details(key_list, template, verbose=False): - """Organized a tag information from a list based on the template. - - Parameters: - key_list (list): List of information to be organized based on the template. - template (dict): An input template derived from the input parameters. - verbose (bool): If False (the default) output minimal information about the summary. - - """ - key_details = [] - for item in key_list: - for tag_cnt in template[item.casefold()]: - key_details.append(tag_cnt.get_info(verbose=verbose)) - return key_details diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py deleted file mode 100644 index 74718800..00000000 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ /dev/null @@ -1,266 +0,0 @@ -"""Summarize the HED type tags in collection of tabular files.""" - -import pandas as pd -from hed.models.tabular_input import TabularInput -from hed.models.sidecar import Sidecar -from hed.tools.analysis.hed_type import HedType -from hed.tools.analysis.hed_type_counts import HedTypeCounts -from hed.tools.analysis.event_manager import EventManager -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_summary import BaseSummary - - -class SummarizeHedTypeOp(BaseOp): - """Summarize a HED type tag in a collection of tabular files. - - Required remodeling parameters: - - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - **type_tag** (*str*):Type tag to get_summary (e.g. `condition-variable` or `task` tags). - - Optional remodeling parameters: - - **append_timecode** (*bool*): If true, the timecode is appended to the base filename when summary is saved. - - The purpose of this op is to produce a summary of the occurrences of specified tag. This summary - is often used with `condition-variable` to produce a summary of the experimental design. - - """ - - NAME = "summarize_hed_type" - - PARAMS = { - "type": "object", - "properties": { - "summary_name": {"type": "string", "description": "Name to use for the summary in titles."}, - "summary_filename": {"type": "string", "description": "Name to use for the summary file name base."}, - "type_tag": { - "type": "string", - "description": "Type tag (such as Condition-variable or Task to design summaries for..", - }, - "append_timecode": { - "type": "boolean", - "description": "If true, the timecode is appended to the base filename so each run has a unique name.", - }, - }, - "required": ["summary_name", "summary_filename", "type_tag"], - "additionalProperties": False, - } - - SUMMARY_TYPE = "hed_type_summary" - - def __init__(self, parameters): - """Constructor for the summarize HED type operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.summary_name = parameters["summary_name"] - self.summary_filename = parameters["summary_filename"] - self.type_tag = parameters["type_tag"].casefold() - self.append_timecode = parameters.get("append_timecode", False) - - def do_op(self, dispatcher, df, name, sidecar=None) -> pd.DataFrame: - """Summarize a specified HED type variable such as Condition-variable. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be summarized. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Usually required unless event file has a HED column. - - Returns: - DataFrame: A copy of df - - Side effect: - Updates the relevant summary. - - """ - df_new = df.copy() - summary = dispatcher.summary_dicts.get(self.summary_name, None) - if not summary: - summary = HedTypeSummary(self) - dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary( - {"df": dispatcher.post_proc_data(df_new), "name": name, "schema": dispatcher.hed_schema, "sidecar": sidecar} - ) - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] - - -class HedTypeSummary(BaseSummary): - """Manager of the HED type summaries.""" - - def __init__(self, sum_op): - """Constructor for HED type summary manager. - - Parameters: - sum_op (SummarizeHedTypeOp): Operation associated with this summary. - - """ - super().__init__(sum_op) - self.type_tag = sum_op.type_tag - - def update_summary(self, new_info): - """Update the summary for a given tabular input file. - - Parameters: - new_info (dict): A dictionary with the parameters needed to update a summary. - - Notes: - - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". - - """ - - sidecar = new_info["sidecar"] - if sidecar and not isinstance(sidecar, Sidecar): - sidecar = Sidecar(sidecar) - input_data = TabularInput(new_info["df"], sidecar=sidecar, name=new_info["name"]) - type_values = HedType(EventManager(input_data, new_info["schema"]), new_info["name"], type_tag=self.type_tag) - counts = HedTypeCounts(new_info["name"], self.type_tag) - counts.update_summary(type_values.get_summary(), type_values.total_events, new_info["name"]) - counts.add_descriptions(type_values.type_defs) - self.summary_dict[new_info["name"]] = counts - - def get_details_dict(self, hed_type_counts) -> dict: - """Return the summary-specific information in a dictionary. - - Parameters: - hed_type_counts (HedTypeCounts): Contains the counts of the events in which the type occurs. - - Returns: - dict: dictionary with the summary results. - - """ - summary = hed_type_counts.get_summary() - files = summary.get("files", []) - return { - "Name": summary.get("name", ""), - "Total events": summary.get("total_events", 0), - "Total files": len(files), - "Files": files, - "Specifics": {"Type tag": summary.get("type_tag", "condition-variable"), "Type info": summary.get("details", {})}, - } - - def merge_all_info(self) -> "HedTypeCounts": - """Create a HedTypeCounts containing the overall dataset HED type summary. - - Returns: - HedTypeCounts - the overall dataset summary object for HED type summary. - - """ - all_counts = HedTypeCounts("Dataset", self.type_tag) - for _key, counts in self.summary_dict.items(): - all_counts.update(counts) - return all_counts - - def _get_result_string(self, name, summary, individual=False) -> str: - """Return a formatted string with the summary for the indicated name. - - Parameters: - name (str): Identifier (usually the filename) of the individual file. - summary (dict): The dictionary of the summary results indexed by name. - individual (bool): Whether this is for an individual file summary. - - Returns: - str: The results in a printable format ready to be saved to a text file. - - Notes: - This calls _get_dataset_string to get the overall summary string and - _get_individual_string to get an individual summary string. - - """ - if name == "Dataset": - return self._get_dataset_string(summary, indent=BaseSummary.DISPLAY_INDENT) - return self._get_individual_string(summary, indent=BaseSummary.DISPLAY_INDENT) - - @staticmethod - def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a string with the overall summary for all the tabular files. - - Parameters: - result (dict): Dictionary of merged summary information. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - specifics = result.get("Specifics", {}) - type_info = specifics.get("Type info", {}) - sum_list = [ - f"Dataset: Type={specifics.get('Type tag', 'condition-variable')} Type values={len(type_info)} " - f"Total events={result.get('Total events', 0)} Total files={len(result.get('Files', []))}" - ] - - for key, item in type_info.items(): - str1 = ( - f"{item['events']} event(s) out of {item['total_events']} total events in " + f"{len(item['files'])} file(s)" - ) - if item["level_counts"]: - str1 = f"{len(item['level_counts'])} levels in " + str1 - if item["direct_references"]: - str1 = str1 + f" Direct references:{item['direct_references']}" - if item["events_with_multiple_refs"]: - str1 = str1 + f" Multiple references:{item['events_with_multiple_refs']})" - sum_list.append(f"{indent}{key}: {str1}") - if item["level_counts"]: - sum_list = sum_list + HedTypeSummary._level_details(item["level_counts"], indent=indent) - return "\n".join(sum_list) - - @staticmethod - def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a string with the summary for an individual tabular file. - - Parameters: - result (dict): Dictionary of summary information for a particular tabular file. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - specifics = result.get("Specifics", {}) - type_info = specifics.get("Type info", {}) - sum_list = [ - f"Type={specifics.get('Type tag', 'condition-variable')} Type values={len(type_info)} " - f"Total events={result.get('Total events', 0)}" - ] - - for key, item in type_info.items(): - sum_list.append(f"{indent*2}{key}: {item['levels']} levels in {item['events']} events") - str1 = "" - if item["direct_references"]: - str1 = str1 + f" Direct references:{item['direct_references']}" - if item["events_with_multiple_refs"]: - str1 = str1 + f" (Multiple references:{item['events_with_multiple_refs']})" - if str1: - sum_list.append(f"{indent*3}{str1}") - if item["level_counts"]: - sum_list = sum_list + HedTypeSummary._level_details(item["level_counts"], offset=indent, indent=indent) - return "\n".join(sum_list) - - @staticmethod - def _level_details(level_counts, offset="", indent=""): - """Return a list of tag type summary counts at different levels. - - Parameters: - level_counts (dict): Dictionary of tags with counts. - offset (str): Spaces to offset the entire entry. - indent (str): Additional spaces to indent each level. - - """ - level_list = [] - for key, details in level_counts.items(): - str1 = f"[{details['events']} events, {details['files']} files]:" - level_list.append(f"{offset}{indent*2}{key} {str1}") - if details["tags"]: - level_list.append(f"{offset}{indent*3}Tags: {str(details['tags'])}") - if details["description"]: - level_list.append(f"{offset}{indent*3}Description: {details['description']}") - return level_list diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py deleted file mode 100644 index 116d8ebc..00000000 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ /dev/null @@ -1,353 +0,0 @@ -"""Validate the HED tags in a dataset and report errors.""" - -import os -import pandas as pd -from hed.errors import error_reporter -from hed.errors import error_types -from hed.models.sidecar import Sidecar -from hed.models.tabular_input import TabularInput -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_summary import BaseSummary - - -class SummarizeHedValidationOp(BaseOp): - """Validate the HED tags in a dataset and report errors. - - Required remodeling parameters: - - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - **check_for_warnings** (*bool*): If true include warnings as well as errors. - - Optional remodeling parameters: - - **append_timecode** (*bool*): If true, the timecode is appended to the base filename when summary is saved. - - The purpose of this op is to produce a summary of the HED validation errors in a file. - - """ - - NAME = "summarize_hed_validation" - - PARAMS = { - "type": "object", - "properties": { - "summary_name": {"type": "string", "description": "Name to use for the summary in titles."}, - "summary_filename": {"type": "string", "description": "Name to use for the summary file name base."}, - "append_timecode": { - "type": "boolean", - "description": "If true, the timecode is appended to the base filename so each run has a unique name.", - }, - "check_for_warnings": {"type": "boolean", "description": "If true warnings as well as errors are reported."}, - }, - "required": ["summary_name", "summary_filename", "check_for_warnings"], - "additionalProperties": False, - } - - SUMMARY_TYPE = "hed_validation" - - def __init__(self, parameters): - """Constructor for the summarize HED validation operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - super().__init__(parameters) - self.summary_name = parameters["summary_name"] - self.summary_filename = parameters["summary_filename"] - self.append_timecode = parameters.get("append_timecode", False) - self.check_for_warnings = parameters["check_for_warnings"] - - def do_op(self, dispatcher, df, name, sidecar=None) -> "pd.DataFrame": - """Validate the dataframe with the accompanying sidecar, if any. - - Parameters: - dispatcher (Dispatcher): Manages the operation I/O. - df (DataFrame): The DataFrame to be validated. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Usually needed unless only HED tags in HED column of event file. - - Returns: - pd.DataFrame: A copy of df - - Side effect: - Updates the relevant summary. - - """ - df_new = df.copy() - summary = dispatcher.summary_dicts.get(self.summary_name, None) - if not summary: - summary = HedValidationSummary(self) - dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary( - {"df": dispatcher.post_proc_data(df_new), "name": name, "schema": dispatcher.hed_schema, "sidecar": sidecar} - ) - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] - - -class HedValidationSummary(BaseSummary): - """Manager for summary of validation issues.""" - - def __init__(self, sum_op): - """Constructor for validation issue manager. - - Parameters: - sum_op (SummarizeHedValidationOp): Operation associated with this summary. - - """ - super().__init__(sum_op) - self.sum_op = sum_op - - def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): - """Return a formatted string with the summary for the indicated name. - - Parameters: - name (str): Identifier (usually the filename) of the individual file. - result (dict): The dictionary of the summary results indexed by name. - indent (str): A string containing spaces used for indentation (usually 3 spaces). - - Returns: - str: The results in a printable format ready to be saved to a text file. - - Notes: - This gets the error list from "sidecar_issues" and "event_issues". - - """ - specifics = result.get("Specifics", {}) - sum_list = [ - f"{name}: [{len(specifics['sidecar_files'])} sidecar files, " f"{len(specifics['event_files'])} event files]" - ] - if specifics.get("is_merged"): - sum_list = sum_list + self.get_error_list(specifics["sidecar_issues"], count_only=True) - sum_list = sum_list + self.get_error_list(specifics["event_issues"], count_only=True) - else: - sum_list = sum_list + self.get_error_list(specifics["sidecar_issues"]) - if specifics["sidecar_had_issues"]: - sum_list = sum_list + self.get_error_list(specifics["sidecar_issues"], count_only=False) - else: - sum_list = sum_list + self.get_error_list(specifics["event_issues"], count_only=False) - return "\n".join(sum_list) - - def update_summary(self, new_info): - """Update the summary for a given tabular input file. - - Parameters: - new_info (dict): A dictionary with the parameters needed to update a summary. - - Notes: - - The summary needs a "name" str, a schema, a "df", and a "Sidecar". - """ - - sidecar = new_info.get("sidecar", None) - if sidecar and not isinstance(sidecar, Sidecar): - sidecar = Sidecar(files=new_info["sidecar"], name=os.path.basename(sidecar)) - results = self._get_sidecar_results(sidecar, new_info, self.sum_op.check_for_warnings) - if not results["sidecar_had_issues"]: - input_data = TabularInput(new_info["df"], sidecar=sidecar) - issues = input_data.validate(new_info["schema"]) - if not self.sum_op.check_for_warnings: - issues = error_reporter.ErrorHandler.filter_issues_by_severity(issues, error_types.ErrorSeverity.ERROR) - issues = [error_reporter.get_printable_issue_string([issue], skip_filename=True) for issue in issues] - results["event_issues"][new_info["name"]] = issues - results["total_event_issues"] = len(issues) - self.summary_dict[new_info["name"]] = results - - def get_details_dict(self, summary_info) -> dict: - """Return the summary details from the summary_info. - - Parameters: - summary_info (dict): Dictionary of issues - - Returns: - dict: Same summary_info as was passed in. - - """ - - return { - "Name": "", - "Total events": "n/a", - "Total files": len(summary_info.get("event_files", [])), - "Files": summary_info.get("event_files", []), - "Specifics": summary_info, - } - - def merge_all_info(self) -> dict: - """Create a dictionary containing all the errors in the dataset. - - Returns: - dict: dictionary of issues organized into sidecar_issues and event_issues. - - """ - results = self.get_empty_results() - results["is_merged"] = True - for key, ind_results in self.summary_dict.items(): - HedValidationSummary._update_sidecar_results(results, ind_results) - results["event_files"].append(key) - HedValidationSummary._update_events_results(results, ind_results) - return results - - @staticmethod - def _update_events_results(results, ind_results): - """Update the issues counts in a results dictionary based on a dictionary of individual info. - - Parameters: - results (dict): Dictionary containing overall information. - ind_results (dict): Dictionary to be updated. - - """ - results["total_event_issues"] += ind_results["total_event_issues"] - for ikey, errors in ind_results["event_issues"].items(): - if ind_results["sidecar_had_issues"]: - results["event_issues"][ - ikey - ] = f"Validation incomplete due to {ind_results['total_sidecar_issues']} sidecar issues" - else: - results["event_issues"][ikey] = f"{len(errors)}" - - @staticmethod - def _update_sidecar_results(results, ind_results): - """Update the sidecar issue counts in a results dictionary based on dictionary of individual info. - - Parameters: - ind_results (dict): Info dictionary from another HedValidationSummary - - """ - results["total_sidecar_issues"] += ind_results["total_sidecar_issues"] - results["sidecar_files"] = results["sidecar_files"] + ind_results["sidecar_files"] - for ikey, errors in ind_results["sidecar_issues"].items(): - results["sidecar_issues"][ikey] = errors - - @staticmethod - def get_empty_results() -> dict: - """Return an empty results dictionary to use as a template. - - Returns: - dict: Dictionary template of results info for the validation summary to fill in - - """ - return { - "event_files": [], - "total_event_issues": 0, - "event_issues": {}, - "is_merged": False, - "sidecar_files": [], - "total_sidecar_issues": 0, - "sidecar_issues": {}, - "sidecar_had_issues": False, - } - - @staticmethod - def get_error_list(error_dict, count_only=False) -> list: - """Convert errors produced by the HED validation into a list which includes filenames. - - Parameters: - error_dict (dict): Dictionary {filename: error_list} from validation. - count_only (bool): If False (the default), a full list of errors is included otherwise only error counts. - - Returns: - list: Error list of form [filenameA, issueA1, issueA2, ..., filenameB, issueB1, ...]. - - """ - error_list = [] - for key, item in error_dict.items(): - if count_only and isinstance(item, list): - error_list.append(f"{key}: {len(item)} issues") - elif count_only: - error_list.append(f"{key}: {item} issues") - elif not len(item): - error_list.append(f"{key} has no issues") - else: - error_list.append(f"{key}:") - error_list = error_list + item - return error_list - - @staticmethod - def _format_errors(error_list, name, errors, indent): - """Reformat errors to have appropriate indentation for readability. - - Parameters: - error_list (list): Overall list of error to append these errors to. - name (str): Name of the file which generated these errors. - errors (list): List of error associated with filename. - indent (str): Spaces used to control indentation. - - """ - error_list.append(f"{indent}{name} issues:") - for this_item in errors: - error_list.append(f"{indent * 2}{HedValidationSummary._format_error(this_item)}") - - @staticmethod - def _format_error(error): - """Format a HED error in a string suitable for summary display. - - Parameters: - error (dict): Represents a single HED error with its standard keys. - - Returns: - str: String version of the error. - - - """ - if not error: - return "" - error_str = error["code"] - error_locations = [] - HedValidationSummary.update_error_location(error_locations, "row", "ec_row", error) - HedValidationSummary.update_error_location(error_locations, "column", "ec_column", error) - HedValidationSummary.update_error_location(error_locations, "sidecar column", "ec_sidecarColumnName", error) - HedValidationSummary.update_error_location(error_locations, "sidecar key", "ec_sidecarKeyName", error) - location_str = ",".join(error_locations) - if location_str: - error_str = error_str + f"[{location_str}]" - error_str = error_str + f": {error['message']}" - return error_str - - @staticmethod - def update_error_location(error_locations, location_name, location_key, error): - """Updates error information about where an error occurred in sidecar or columnar file. - - Parameters: - error_locations (list): List of error locations detected so far is this error. - location_name (str): Error location name, for example 'row', 'column', or 'sidecar column'. - location_key (str): Standard key name for this location in the dictionary for an error. - error (dict): Dictionary containing the information about this error. - - """ - if location_key in error: - error_locations.append(f"{location_name}={error[location_key][0]}") - - @staticmethod - def _get_sidecar_results(sidecar, new_info, check_for_warnings): - """Return a dictionary of errors detected in a sidecar. - - Parameters: - sidecar (Sidecar): The Sidecar to validate. - new_info (dict): Dictionary with information such as the schema needed for validation. - check_for_warnings (bool): If False, filter out warning errors. - - Returns: - dict: Results of the validation. - - """ - results = HedValidationSummary.get_empty_results() - results["event_files"].append(new_info["name"]) - results["event_issues"][new_info["name"]] = [] - if sidecar: - results["sidecar_files"].append(sidecar.name) - results["sidecar_issues"][sidecar.name] = [] - sidecar_issues = sidecar.validate(new_info.get("schema", None)) - filtered_issues = error_reporter.ErrorHandler.filter_issues_by_severity( - sidecar_issues, error_types.ErrorSeverity.ERROR - ) - if filtered_issues: - results["sidecar_had_issues"] = True - if not check_for_warnings: - sidecar_issues = filtered_issues - str_issues = [error_reporter.get_printable_issue_string([issue], skip_filename=True) for issue in sidecar_issues] - results["sidecar_issues"][sidecar.name] = str_issues - results["total_sidecar_issues"] = len(sidecar_issues) - return results diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py deleted file mode 100644 index 138b1da3..00000000 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ /dev/null @@ -1,234 +0,0 @@ -"""Create a JSON sidecar from column values in a collection of tabular files.""" - -import json -from hed.tools.analysis.tabular_summary import TabularSummary -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.remodeling.operations.base_summary import BaseSummary - - -class SummarizeSidecarFromEventsOp(BaseOp): - """Create a JSON sidecar from column values in a collection of tabular files. - - Required remodeling parameters: - - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - Optional remodeling parameters: - - **append_timecode** (*bool*): - - **skip_columns** (*list*): Names of columns to skip in the summary. - - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. - - The purpose is to produce a JSON sidecar template for annotating a dataset with HED tags. - - """ - - NAME = "summarize_sidecar_from_events" - - PARAMS = { - "type": "object", - "properties": { - "summary_name": {"type": "string", "description": "Name to use for the summary in titles."}, - "summary_filename": {"type": "string", "description": "Name to use for the summary file name base."}, - "skip_columns": { - "type": "array", - "description": "List of columns to skip in generating the sidecar.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "value_columns": { - "type": "array", - "description": "List of columns to provide a single annotation with placeholder for the values.", - "items": {"type": "string"}, - "minItems": 1, - "uniqueItems": True, - }, - "append_timecode": {"type": "boolean"}, - }, - "required": ["summary_name", "summary_filename"], - "additionalProperties": False, - } - - SUMMARY_TYPE = "events_to_sidecar" - - def __init__(self, parameters): - """Constructor for summarize sidecar from events operation. - - Parameters: - parameters (dict): Dictionary with the parameter values for required and optional parameters. - - """ - - super().__init__(parameters) - self.summary_name = parameters["summary_name"] - self.summary_filename = parameters["summary_filename"] - self.skip_columns = parameters.get("skip_columns", None) - self.value_columns = parameters.get("value_columns", None) - self.append_timecode = parameters.get("append_timecode", False) - - def do_op(self, dispatcher, df, name, sidecar=None): - """Extract a sidecar from events file. - - Parameters: - dispatcher (Dispatcher): The dispatcher object for managing the operations. - df (DataFrame): The tabular file to be remodeled. - name (str): Unique identifier for the dataframe -- often the original file path. - sidecar (Sidecar or file-like): Not needed for this operation. - - Returns: - DataFrame: A copy of df. - - Side effect: - Updates the associated summary if applicable. - - """ - - df_new = df.copy() - summary = dispatcher.summary_dicts.get(self.summary_name, None) - if not summary: - summary = EventsToSidecarSummary(self) - dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({"df": dispatcher.post_proc_data(df_new), "name": name}) - return df_new - - @staticmethod - def validate_input_data(parameters): - """Additional validation required of operation parameters not performed by JSON schema validator.""" - return [] - - -class EventsToSidecarSummary(BaseSummary): - """Manager for events to sidecar generation.""" - - def __init__(self, sum_op): - """Constructor for events to sidecar manager. - - Parameters: - sum_op (BaseOp): Operation associated with this summary. - - """ - super().__init__(sum_op) - self.value_cols = sum_op.value_columns - self.skip_cols = sum_op.skip_columns - - def update_summary(self, new_info): - """Update the summary for a given tabular input file. - - Parameters: - new_info (dict): A dictionary with the parameters needed to update a summary. - - Notes: - - The summary needs a "name" str and a "df". - - """ - - tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"]) - tab_sum.update(new_info["df"], new_info["name"]) - self.summary_dict[new_info["name"]] = tab_sum - - def get_details_dict(self, summary_info): - """Return the summary-specific information. - - Parameters: - summary_info (TabularSummary): Summary to return info from. - - Returns: - dict: Standardized details dictionary extracted from the summary information. - - Notes: - Abstract method be implemented by each individual context summary. - - """ - - return { - "Name": summary_info.name, - "Total events": summary_info.total_events, - "Total files": summary_info.total_files, - "Files": list(summary_info.files.keys()), - "Specifics": { - "Categorical info": summary_info.categorical_info, - "Value info": summary_info.value_info, - "Skip columns": summary_info.skip_cols, - "Sidecar": summary_info.extract_sidecar_template(), - }, - } - - def merge_all_info(self): - """Merge summary information from all the files. - - Returns: - TabularSummary: Consolidated summary of information. - - """ - - all_sum = TabularSummary(name="Dataset") - for _key, tab_sum in self.summary_dict.items(): - all_sum.update_summary(tab_sum) - return all_sum - - def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): - """Return a formatted string with the summary for the indicated name. - - Parameters: - name (str): Identifier (usually the filename) of the individual file. - result (dict): The dictionary of the summary results indexed by name. - indent (str): A string containing spaces used for indentation (usually 3 spaces). - - Returns: - str: The results in a printable format ready to be saved to a text file. - - Notes: - This calls _get_dataset_string to get the overall summary string and - _get_individual_string to get an individual summary string. - - """ - - if name == "Dataset": - return self._get_dataset_string(result, indent=indent) - return self._get_individual_string(result, indent=indent) - - @staticmethod - def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a string with the overall summary for all the tabular files. - - Parameters: - result (dict): Dictionary of merged summary information. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - specifics = result.get("Specifics", {}) - sum_list = [ - f"Dataset: Total events={result.get('Total events', 0)} " f"Total files={result.get('Total files', 0)}", - f"Skip columns: {str(specifics.get('Skip columns', []))}", - f"Value columns: {str(specifics.get('Value info', {}).keys())}", - f"Sidecar:\n{json.dumps(specifics.get('Sidecar', {}), indent=indent)}", - ] - return "\n".join(sum_list) - - @staticmethod - def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): - """Return a string with the summary for an individual tabular file. - - Parameters: - result (dict): Dictionary of summary information for a particular tabular file. - indent (str): String of blanks used as the amount to indent for readability. - - Returns: - str: Formatted string suitable for saving in a file or printing. - - """ - specifics = result.get("Specifics", {}) - sum_list = [ - f"Total events={result.get('Total events', 0)}", - f"Skip columns: {str(specifics.get('Slip columns', []))}", - f"Value columns: {str(specifics.get('Value info', {}).keys())}", - f"Sidecar:\n{json.dumps(specifics['Sidecar'], indent=indent)}", - ] - return "\n".join(sum_list) - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py deleted file mode 100644 index a0f4d0df..00000000 --- a/hed/tools/remodeling/operations/valid_operations.py +++ /dev/null @@ -1,44 +0,0 @@ -"""The valid operations for the remodeling tools.""" - -from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp -from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp -from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp -from hed.tools.remodeling.operations.merge_consecutive_op import MergeConsecutiveOp -from hed.tools.remodeling.operations.number_rows_op import NumberRowsOp -from hed.tools.remodeling.operations.number_groups_op import NumberGroupsOp -from hed.tools.remodeling.operations.remove_columns_op import RemoveColumnsOp -from hed.tools.remodeling.operations.reorder_columns_op import ReorderColumnsOp -from hed.tools.remodeling.operations.remap_columns_op import RemapColumnsOp -from hed.tools.remodeling.operations.remove_rows_op import RemoveRowsOp -from hed.tools.remodeling.operations.rename_columns_op import RenameColumnsOp -from hed.tools.remodeling.operations.split_rows_op import SplitRowsOp -from hed.tools.remodeling.operations.summarize_column_names_op import SummarizeColumnNamesOp -from hed.tools.remodeling.operations.summarize_column_values_op import SummarizeColumnValuesOp -from hed.tools.remodeling.operations.summarize_definitions_op import SummarizeDefinitionsOp -from hed.tools.remodeling.operations.summarize_sidecar_from_events_op import SummarizeSidecarFromEventsOp -from hed.tools.remodeling.operations.summarize_hed_type_op import SummarizeHedTypeOp -from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp -from hed.tools.remodeling.operations.summarize_hed_validation_op import SummarizeHedValidationOp - -valid_operations = { - # 'convert_columns': ConvertColumnsOp, - "factor_column": FactorColumnOp, - "factor_hed_tags": FactorHedTagsOp, - "factor_hed_type": FactorHedTypeOp, - "merge_consecutive": MergeConsecutiveOp, - "number_groups": NumberGroupsOp, - "number_rows": NumberRowsOp, - "remap_columns": RemapColumnsOp, - "remove_columns": RemoveColumnsOp, - "remove_rows": RemoveRowsOp, - "rename_columns": RenameColumnsOp, - "reorder_columns": ReorderColumnsOp, - "split_rows": SplitRowsOp, - "summarize_column_names": SummarizeColumnNamesOp, - "summarize_column_values": SummarizeColumnValuesOp, - "summarize_definitions": SummarizeDefinitionsOp, - "summarize_hed_tags": SummarizeHedTagsOp, - "summarize_hed_type": SummarizeHedTypeOp, - "summarize_hed_validation": SummarizeHedValidationOp, - "summarize_sidecar_from_events": SummarizeSidecarFromEventsOp, -} diff --git a/hed/tools/remodeling/remodeler_validator.py b/hed/tools/remodeling/remodeler_validator.py deleted file mode 100644 index 4036c743..00000000 --- a/hed/tools/remodeling/remodeler_validator.py +++ /dev/null @@ -1,178 +0,0 @@ -"""Validator for remodeler input files.""" - -import jsonschema -from copy import deepcopy -from hed.tools.remodeling.operations.valid_operations import valid_operations - - -class RemodelerValidator: - """Validator for remodeler input files.""" - - MESSAGE_STRINGS = { - "0": { - "minItems": "There are no operations defined. Specify at least 1 operation for the remodeler to execute.", - "type": "Operations must be contained in a list or array. This is also true for a single operation.", - }, - "1": { - "type": "Each operation must be defined in a dictionary: {instance} is not a dictionary object.", - "required": "Operation dictionary {operation_index} is missing '{missing_value}'. " - + "Every operation dictionary must specify the type of operation, " - + "a description, and the operation parameters.", - "additionalProperties": "Operation dictionary {operation_index} contains an unexpected field " - + "'{added_property}'. Every operation dictionary must specify the type " - + "of operation, a description, and the operation parameters.", - }, - "2": { - "type": "Operation {operation_index}: {instance} is not a {validator_value}. " - + "{operation_field} should be of type {validator_value}.", - "enum": "{instance} is not a known remodeler operation. See the documentation for valid operations.", - "required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} " - + "is a required parameter of {operation_name}.", - "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} " - + "contain an unexpected field '{added_property}'.", - "dependentRequired": "Operation {operation_index}: The parameter {missing_value} is missing: " - + "{missing_value} is a required parameter of {operation_name} " - + "when {dependent_on} is specified.", - }, - "more": { - "type": "Operation {operation_index}: The value of {parameter_path} in the {operation_name} operation " - + "should be {validator_value}. {instance} is not a {validator_value}.", - "minItems": "Operation {operation_index}: The list in {parameter_path} in the {operation_name} " - + "operation should have at least {validator_value} item(s).", - "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. " - + "{missing_value} is a required parameter of {parameter_path}.", - "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} " - + "contain an unexpected field '{added_property}'.", - "enum": "Operation {operation_index}: Operation parameter {parameter_path} in the {operation_name} " - + "operation contains and unexpected value. Value should be one of {validator_value}.", - "uniqueItems": "Operation {operation_index}: The list in {parameter_path} in the {operation_name} " - + "operation should only contain unique items.", - "minProperties": "Operation {operation_index}: The dictionary in {parameter_path} in the " - + "{operation_name} operation should have at least {validator_value} key(s).", - }, - } - - BASE_ARRAY = {"type": "array", "items": {}, "minItems": 1} - - OPERATION_DICT = { - "type": "object", - "required": ["operation", "description", "parameters"], - "additionalProperties": False, - "properties": { - "operation": {"type": "string", "enum": [], "default": "convert_columns"}, - "description": {"type": "string"}, - "parameters": {"type": "object", "properties": {}}, - }, - "allOf": [], - } - - PARAMETER_SPECIFICATION_TEMPLATE = { - "if": {"properties": {"operation": {"const": ""}}, "required": ["operation"]}, - "then": {"properties": {"parameters": {}}}, - } - - def __init__(self): - """Constructor for remodeler Validator.""" - self.schema = self._construct_schema() # The compiled json schema against which remodeler files are validated. - self.validator = jsonschema.Draft202012Validator(self.schema) # The instantiated json schema validator. - - def validate(self, operations) -> list[str]: - """Validate remodeler operations against the json schema specification and specific op requirements. - - Parameters: - operations (list[dict]): List of di with input operations to run through the remodeler. - - Returns: - list[str]: List with the error messages for errors identified by the validator. - """ - - list_of_error_strings = [] - for error in sorted(self.validator.iter_errors(operations), key=lambda e: e.path): - list_of_error_strings.append(self._parse_message(error, operations)) - if list_of_error_strings: - return list_of_error_strings - - operation_by_parameters = [(operation["operation"], operation["parameters"]) for operation in operations] - - for index, operation in enumerate(operation_by_parameters): - error_strings = valid_operations[operation[0]].validate_input_data(operation[1]) - for error_string in error_strings: - list_of_error_strings.append(f"Operation {index + 1} ({operation[0]}): {error_string}") - - return list_of_error_strings - - def _parse_message(self, error, operations): - """Return a user-friendly error message based on the jsonschema validation error. - - Parameters: - error (ValidationError): A validation error from jsonschema validator. - operations (list of dict): The operations that were validated. - - Note: - - json schema error does not contain all necessary information to return a - proper error message so, we also take some information directly from the operations - that led to the error. - - - all necessary information is gathered into an error dict, message strings are predefined - in a dictionary which are formatted with additional information. - """ - error_dict = vars(error) - - level = len(error_dict["path"]) - if level > 2: - level = "more" - # some information is in the validation error but not directly in a field, so I need to - # modify before they can be parsed in - # if they are necessary, they are there, if they are not there, they are not necessary - try: - error_dict["operation_index"] = error_dict["path"][0] + 1 - error_dict["operation_field"] = error_dict["path"][1].capitalize() - error_dict["operation_name"] = operations[int(error_dict["path"][0])]["operation"] - # everything except the first two values reversed - parameter_path = [*error_dict["path"]][:1:-1] - for ind, value in enumerate(parameter_path): - if isinstance(value, int): - parameter_path[ind] = f"item {value+1}" - error_dict["parameter_path"] = " ".join(parameter_path) - except (IndexError, TypeError, KeyError): - pass - - attr_type = str(error_dict["validator"]) - - # the missing value with required elements, or the wrong additional value is not known to the - # validation error object - # this is a known issue of jsonschema: https://github.com/python-jsonschema/jsonschema/issues/119 - # for now the simplest thing seems to be to extract it from the error message - if attr_type == "required": - error_dict["missing_value"] = error_dict["message"].split("'")[1::2][0] - if attr_type == "additionalProperties": - error_dict["added_property"] = error_dict["message"].split("'")[1::2][0] - - # dependent is required, provided both the missing value and the reason it is required in one dictionary - # it is split over two for the error message - if attr_type == "dependentRequired": - error_dict["missing_value"] = list(error_dict["validator_value"].keys())[0] - error_dict["dependent_on"] = list(error_dict["validator_value"].values())[0] - - return self.MESSAGE_STRINGS[str(level)][attr_type].format(**error_dict) - - def _construct_schema(self): - """Return a schema specialized to the operations. - - Returns: - dict: Array of schema operations. - - """ - schema = deepcopy(self.BASE_ARRAY) - schema["items"] = deepcopy(self.OPERATION_DICT) - - for operation in valid_operations.items(): - schema["items"]["properties"]["operation"]["enum"].append(operation[0]) - - parameter_specification = deepcopy(self.PARAMETER_SPECIFICATION_TEMPLATE) - parameter_specification["if"]["properties"]["operation"]["const"] = operation[0] - parameter_specification["then"]["properties"]["parameters"] = operation[1].PARAMS - - schema["items"]["allOf"].append(deepcopy(parameter_specification)) - - return schema diff --git a/pyproject.toml b/pyproject.toml index 58ae3718..373327e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ dependencies = [ dev = [ "ruff>=0.8.0", "codespell>=2.2.0", - "black>=26.1.0", + "black[jupyter]>=26.1.0", "mdformat>=0.7.0", "mdformat-myst>=0.1.5", ] diff --git a/tests/data/remodel_tests/attention_shift_remap_event_template_filled.tsv b/tests/data/other_tests/attention_shift_remap_event_template_filled.tsv similarity index 100% rename from tests/data/remodel_tests/attention_shift_remap_event_template_filled.tsv rename to tests/data/other_tests/attention_shift_remap_event_template_filled.tsv diff --git a/tests/data/remodel_tests/bcit_baseline_driving_samplingRates.tsv b/tests/data/other_tests/bcit_baseline_driving_samplingRates.tsv similarity index 100% rename from tests/data/remodel_tests/bcit_baseline_driving_samplingRates.tsv rename to tests/data/other_tests/bcit_baseline_driving_samplingRates.tsv diff --git a/tests/data/remodel_tests/sternberg_map.tsv b/tests/data/other_tests/sternberg_map.tsv similarity index 100% rename from tests/data/remodel_tests/sternberg_map.tsv rename to tests/data/other_tests/sternberg_map.tsv diff --git a/tests/data/remodel_tests/sternberg_no_quotes_events.tsv b/tests/data/other_tests/sternberg_no_quotes_events.tsv similarity index 100% rename from tests/data/remodel_tests/sternberg_no_quotes_events.tsv rename to tests/data/other_tests/sternberg_no_quotes_events.tsv diff --git a/tests/data/remodel_tests/sternberg_test_events.tsv b/tests/data/other_tests/sternberg_test_events.tsv similarity index 100% rename from tests/data/remodel_tests/sternberg_test_events.tsv rename to tests/data/other_tests/sternberg_test_events.tsv diff --git a/tests/data/remodel_tests/sternberg_with_quotes_events.tsv b/tests/data/other_tests/sternberg_with_quotes_events.tsv similarity index 100% rename from tests/data/remodel_tests/sternberg_with_quotes_events.tsv rename to tests/data/other_tests/sternberg_with_quotes_events.tsv diff --git a/tests/data/remodel_tests/sub-001_task-AuditoryVisualShift_run-01_events.tsv b/tests/data/other_tests/sub-001_task-AuditoryVisualShift_run-01_events.tsv similarity index 100% rename from tests/data/remodel_tests/sub-001_task-AuditoryVisualShift_run-01_events.tsv rename to tests/data/other_tests/sub-001_task-AuditoryVisualShift_run-01_events.tsv diff --git a/tests/data/remodel_tests/sub-002withHed_task-FacePerception_run-1_events.tsv b/tests/data/other_tests/sub-002withHed_task-FacePerception_run-1_events.tsv similarity index 100% rename from tests/data/remodel_tests/sub-002withHed_task-FacePerception_run-1_events.tsv rename to tests/data/other_tests/sub-002withHed_task-FacePerception_run-1_events.tsv diff --git a/tests/data/remodel_tests/task-AuditoryVisualShift_events.json b/tests/data/other_tests/task-AuditoryVisualShift_events.json similarity index 100% rename from tests/data/remodel_tests/task-AuditoryVisualShift_events.json rename to tests/data/other_tests/task-AuditoryVisualShift_events.json diff --git a/tests/data/remodel_tests/task-FacePerceptionSmall_events.json b/tests/data/other_tests/task-FacePerceptionSmall_events.json similarity index 100% rename from tests/data/remodel_tests/task-FacePerceptionSmall_events.json rename to tests/data/other_tests/task-FacePerceptionSmall_events.json diff --git a/tests/data/remodel_tests/all_remodel_operations.json b/tests/data/remodel_tests/all_remodel_operations.json deleted file mode 100644 index 34e929f9..00000000 --- a/tests/data/remodel_tests/all_remodel_operations.json +++ /dev/null @@ -1,278 +0,0 @@ -[ - { - "operation": "remove_columns", - "description": "Remove unwanted columns prior to analysis", - "parameters": { - "column_names": [ - "value", - "sample" - ], - "ignore_missing": true - } - }, - { - "operation": "factor_column", - "description": "Create factors for the succesful_stop and unsuccesful_stop values.", - "parameters": { - "column_name": "trial_type", - "factor_values": [ - "succesful_stop", - "unsuccesful_stop" - ], - "factor_names": [ - "stopped", - "stop_failed" - ] - } - }, - { - "operation": "factor_hed_tags", - "description": "Create factors based on whether the event represented a correct or incorrect action.", - "parameters": { - "queries": [ - "correct-action", - "incorrect-action" - ], - "query_names": [ - "correct", - "incorrect" - ], - "expand_context": false - } - }, - { - "operation": "factor_hed_type", - "description": "Factor based on the sex of the images being presented.", - "parameters": { - "type_tag": "Condition-variable" - } - }, - { - "operation": "merge_consecutive", - "description": "Merge consecutive *succesful_stop* events that match the *match_columns.", - "parameters": { - "column_name": "trial_type", - "event_code": "succesful_stop", - "match_columns": [ - "stop_signal_delay", - "response_hand", - "sex" - ], - "set_durations": true, - "ignore_missing": true - } - }, - { - "operation": "remap_columns", - "description": "Map response_accuracy and response hand into a single column.", - "parameters": { - "source_columns": [ - "response_accuracy", - "response_hand" - ], - "destination_columns": [ - "response_type" - ], - "map_list": [ - [ - "correct", - "left", - "correct_left" - ], - [ - "correct", - "right", - "correct_right" - ], - [ - "incorrect", - "left", - "incorrect_left" - ], - [ - "incorrect", - "right", - "incorrect_left" - ], - [ - "n/a", - "n/a", - "n/a" - ] - ], - "ignore_missing": true - } - }, - { - "operation": "remove_columns", - "description": "Remove extra columns before the next step.", - "parameters": { - "column_names": [ - "stop_signal_delay", - "response_accuracy", - "face" - ], - "ignore_missing": true - } - }, - { - "operation": "remove_rows", - "description": "Remove rows where trial_type is either succesful_stop or unsuccesful_stop.", - "parameters": { - "column_name": "trial_type", - "remove_values": [ - "succesful_stop", - "unsuccesful_stop" - ] - } - }, - { - "operation": "rename_columns", - "description": "Rename columns to be more descriptive.", - "parameters": { - "column_mapping": { - "stop_signal_delay": "stop_delay", - "response_hand": "hand_used" - }, - "ignore_missing": true - } - }, - { - "operation": "reorder_columns", - "description": "Reorder columns.", - "parameters": { - "column_order": [ - "onset", - "duration", - "response_time", - "trial_type" - ], - "ignore_missing": true, - "keep_others": false - } - }, - { - "operation": "split_rows", - "description": "add response events to the trials.", - "parameters": { - "anchor_column": "trial_type", - "new_events": { - "response": { - "onset_source": [ - "response_time" - ], - "duration": [ - 0 - ], - "copy_columns": [ - "response_accuracy", - "response_hand", - "sex", - "trial_number" - ] - }, - "stop_signal": { - "onset_source": [ - "stop_signal_delay" - ], - "duration": [ - 0.5 - ], - "copy_columns": [ - "trial_number" - ] - } - }, - "remove_parent_row": false - } - }, - { - "operation": "summarize_column_names", - "description": "Summarize column names.", - "parameters": { - "summary_name": "AOMIC_column_names", - "summary_filename": "AOMIC_column_names" - } - }, - { - "operation": "summarize_column_values", - "description": "Summarize the column values in an excerpt.", - "parameters": { - "summary_name": "AOMIC_column_values", - "summary_filename": "AOMIC_column_values", - "skip_columns": [ - "onset", - "duration" - ], - "value_columns": [ - "response_time", - "stop_signal_delay" - ] - } - }, - { - "operation": "summarize_definitions", - "description": "Summarize the definitions used in this dataset.", - "parameters": { - "summary_name": "HED_column_definition_summary", - "summary_filename": "HED_column_definition_summary" - } - }, - { - "operation": "summarize_hed_tags", - "description": "Summarize the HED tags in the dataset.", - "parameters": { - "summary_name": "summarize_hed_tags", - "summary_filename": "summarize_hed_tags", - "tags": { - "Sensory events": [ - "Sensory-event", - "Sensory-presentation", - "Task-stimulus-role", - "Experimental-stimulus" - ], - "Agent actions": [ - "Agent-action", - "Agent", - "Action", - "Agent-task-role", - "Task-action-type", - "Participant-response" - ], - "Objects": [ - "Item" - ] - } - } - }, - { - "operation": "summarize_hed_type", - "description": "Summarize column names.", - "parameters": { - "summary_name": "AOMIC_condition_variables", - "summary_filename": "AOMIC_condition_variables", - "type_tag": "condition-variable" - } - }, - { - "operation": "summarize_hed_validation", - "description": "Summarize validation errors in the sample dataset.", - "parameters": { - "summary_name": "AOMIC_sample_validation", - "summary_filename": "AOMIC_sample_validation", - "check_for_warnings": true - } - }, - { - "operation": "summarize_sidecar_from_events", - "description": "Generate a sidecar from the excerpted events file.", - "parameters": { - "summary_name": "AOMIC_generate_sidecar", - "summary_filename": "AOMIC_generate_sidecar", - "value_columns": [ - "response_time", - "stop_signal_delay" - ] - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/aomic_sub-0013_before_after_reorder_rmdl.json b/tests/data/remodel_tests/aomic_sub-0013_before_after_reorder_rmdl.json deleted file mode 100644 index f51ccca9..00000000 --- a/tests/data/remodel_tests/aomic_sub-0013_before_after_reorder_rmdl.json +++ /dev/null @@ -1,31 +0,0 @@ -[ - { - "operation": "summarize_column_values", - "description": "Summarize the column values in an excerpt before reordering.", - "parameters": { - "summary_name": "AOMIC_column_values_before_reorder", - "summary_filename": "AOMIC_column_values_before_reorder", - "skip_columns": ["onset", "duration"], - "value_columns": ["response_time", "stop_signal_delay"] - } - }, - { - "operation": "reorder_columns", - "description": "Order columns so that response_time and trial_type come after onset and duration", - "parameters": { - "column_order": ["onset", "duration", "trial_type", "response_time"], - "ignore_missing": true, - "keep_others": false - } - }, - { - "operation": "summarize_column_values", - "description": "Summarize the column values in an excerpt after reordering.", - "parameters": { - "summary_name": "AOMIC_column_values_after_reorder", - "summary_filename": "AOMIC_column_values_after_reorder", - "skip_columns": ["onset", "duration"], - "value_columns": ["response_time", "stop_signal_delay"] - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/aomic_sub-0013_events.json b/tests/data/remodel_tests/aomic_sub-0013_events.json deleted file mode 100644 index d9d1a79a..00000000 --- a/tests/data/remodel_tests/aomic_sub-0013_events.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "trial_type": { - "HED": { - "succesful_stop": "Sensory-presentation, Visual-presentation, Correct-action, Image, Label/succesful_stop", - "unsuccesful_stop": "Sensory-presentation, Visual-presentation, Incorrect-action, Image, Label/unsuccesful_stop", - "go": "Sensory-presentation, Visual-presentation, Image, Label/go" - } - }, - "stop_signal_delay": { - "HED": "(Auditory-presentation, Delay/# s)" - }, - "sex": { - "HED": { - "male": "Def/Male-image-cond", - "female": "Def/Female-image-cond" - } - }, - "hed_defs": { - "HED": { - "def_male": "(Definition/Male-image-cond, (Condition-variable/Image-sex, (Male, (Image, Face))))", - "def_female": "(Definition/Female-image-cond, (Condition-variable/Image-sex, (Female, (Image, Face))))" - } - } -} \ No newline at end of file diff --git a/tests/data/remodel_tests/aomic_sub-0013_excerpt_events.tsv b/tests/data/remodel_tests/aomic_sub-0013_excerpt_events.tsv deleted file mode 100644 index 57d2131e..00000000 --- a/tests/data/remodel_tests/aomic_sub-0013_excerpt_events.tsv +++ /dev/null @@ -1,7 +0,0 @@ -onset duration trial_type stop_signal_delay response_time response_accuracy response_hand sex -0.0776 0.5083 go n/a 0.565 correct right female -5.5774 0.5083 unsuccesful_stop 0.2 0.49 correct right female -9.5856 0.5084 go n/a 0.45 correct right female -13.5939 0.5083 succesful_stop 0.2 n/a n/a right female -17.1021 0.5083 unsuccesful_stop 0.25 0.633 correct left male -21.6103 0.5083 go n/a 0.443 correct left male \ No newline at end of file diff --git a/tests/data/remodel_tests/aomic_sub-0013_generate_sidecar_rmdl.json b/tests/data/remodel_tests/aomic_sub-0013_generate_sidecar_rmdl.json deleted file mode 100644 index d80283df..00000000 --- a/tests/data/remodel_tests/aomic_sub-0013_generate_sidecar_rmdl.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "operation": "summarize_sidecar_from_events", - "description": "Generate a sidecar from the excerpted events file.", - "parameters": { - "summary_name": "AOMIC_generate_sidecar", - "summary_filename": "AOMIC_generate_sidecar", - "skip_columns": ["onset", "duration"], - "value_columns": ["response_time", "stop_signal_delay"] - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json b/tests/data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json deleted file mode 100644 index 7b34d237..00000000 --- a/tests/data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json +++ /dev/null @@ -1,29 +0,0 @@ -[ - { - "operation": "summarize_column_names", - "description": "Summarize column names.", - "parameters": { - "summary_name": "AOMIC_column_names", - "summary_filename": "AOMIC_column_names" - } - }, - { - "operation": "summarize_column_values", - "description": "Summarize the column values in an excerpt.", - "parameters": { - "summary_name": "AOMIC_column_values", - "summary_filename": "AOMIC_column_values", - "skip_columns": ["onset", "duration"], - "value_columns": ["response_time", "stop_signal_delay"] - } - }, - { - "operation": "summarize_hed_type", - "description": "Summarize experimental conditions.", - "parameters": { - "summary_name": "AOMIC_condition_variables", - "summary_filename": "AOMIC_condition_variables", - "type_tag": "condition-variable" - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/aomic_summary_rmdl.json b/tests/data/remodel_tests/aomic_summary_rmdl.json deleted file mode 100644 index a925f976..00000000 --- a/tests/data/remodel_tests/aomic_summary_rmdl.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "operation": "summarize_column_names", - "description": "Summarize column names.", - "parameters": { - "summary_name": "AOMIC_column_headers1", - "summary_filename": "AOMIC_column_headers1" - } - }, - { - "operation": "summarize_column_names", - "description": "Summarize column names.", - "parameters": { - "summary_name": "AOMIC_column_headers2", - "summary_filename": "AOMIC_column_headers2" - } - }, - { - "operation": "summarize_column_names", - "description": "Summarize column names.", - "parameters": { - "summary_name": "AOMIC_column_headers3", - "summary_filename": "AOMIC_column_headers3" - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/bad_rename_rmdl.json b/tests/data/remodel_tests/bad_rename_rmdl.json deleted file mode 100644 index 71cae467..00000000 --- a/tests/data/remodel_tests/bad_rename_rmdl.json +++ /dev/null @@ -1,9 +0,0 @@ -[ - { - "operation": "rename_columns", - "description": "Rename the columns to test bad file.", - "parameters": { - "column_mapping": { "sex": "face_sex"} - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/only_splitrow_rmdl.json b/tests/data/remodel_tests/only_splitrow_rmdl.json deleted file mode 100644 index be85e6dc..00000000 --- a/tests/data/remodel_tests/only_splitrow_rmdl.json +++ /dev/null @@ -1,16 +0,0 @@ -[ - { - "operation": "split_rows", - "description": "Create separate response event from response time column.", - "parameters": { - "anchor_column": "event_type", - "new_events": {"response": {"onset_source": ["response_time"], - "duration": [0], - "copy_columns": ["response_accuracy", - "response_hand", - "trial_type"]}}, - "remove_parent_row": false - } - } -] - diff --git a/tests/data/remodel_tests/responses_only_m250_rmdl.json b/tests/data/remodel_tests/responses_only_m250_rmdl.json deleted file mode 100644 index 868df633..00000000 --- a/tests/data/remodel_tests/responses_only_m250_rmdl.json +++ /dev/null @@ -1,43 +0,0 @@ -[ - { - "operation": "remove_rows", - "description": "Keep only go trials.", - "parameters": { - "column_name": "trial_type", - "remove_values": ["succesful_stop", "unsuccesful_stop"] - } - }, - { - "operation": "remove_rows", - "description": "Get rid of rows where response_time is n/a", - "parameters": { - "column_name": "response_time", - "remove_values": ["n/a"] - } - }, - { - "operation": "split_rows", - "description": "Create event files that only have response events", - "parameters": { - "anchor_column": "event_type", - "new_events": { - "response": { - "onset_source": ["response_time", -0.25], - "duration": [0.5], - "copy_columns": ["trial_type", "response_hand", "sex"] - } - }, - "add_trial_numbers": false, - "remove_parent_row": true - } - }, - { - "operation": "reorder_columns", - "description": "Order columns for the output", - "parameters": { - "column_order": ["onset", "duration", "trial_type", "event_type", "response_hand", "sex"], - "ignore_missing": true, - "keep_others": false - } - } -] diff --git a/tests/data/remodel_tests/simple_reorder_rmdl.json b/tests/data/remodel_tests/simple_reorder_rmdl.json deleted file mode 100644 index 2695f79b..00000000 --- a/tests/data/remodel_tests/simple_reorder_rmdl.json +++ /dev/null @@ -1,19 +0,0 @@ -[ - { - "operation": "remove_columns", - "description": "Get rid of the sample and the value columns", - "parameters": { - "column_names": ["sample", "value"], - "ignore_missing": true - } - }, - { - "operation": "reorder_columns", - "description": "Order columns so that response_time and trial_type come after onset and duration", - "parameters": { - "column_order": ["onset", "duration", "trial_type", "response_time"], - "ignore_missing": true, - "keep_others": false - } - } -] diff --git a/tests/data/remodel_tests/sub-0013_task-stopsignal_acq-seq_events.json b/tests/data/remodel_tests/sub-0013_task-stopsignal_acq-seq_events.json deleted file mode 100644 index 205666d0..00000000 --- a/tests/data/remodel_tests/sub-0013_task-stopsignal_acq-seq_events.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "trial_type": { - "Description": "Description for trial_type", - "HED": { - "succesful_stop": "Sensory-presentation, Visual-presentation, Image, Label/succesful_stop", - "unsuccesful_stop": "Sensory-presentation, Visual-presentation, Image, Label/unsuccesful_stop", - "go": "Sensory-presentation, Visual-presentation, Image, Label/go" - }, - "Levels": { - "succesful_stop": "Presentation of a face image in a trial with a stop signal in which participant inhibited response.", - "unsuccesful_stop": "Presentation of a face image in a trial with a stop signal in which participant did not inhibit response.", - "go": "Presentation of a face image in a trial with no stop signal" - } - }, - "stop_signal_delay": { - "Description": "Stop-signal cue delay from onset.", - "HED": "((Cue, Think/Inhibit), Delay/# s)" - }, - "response_time": { - "Description": "Response time delay from onset.", - "HED": "(Participant-response, Delay/# s)" - }, - "response_accuracy": { - "Description": "Indicates whether a response correctly indicated", - "HED": { - "incorrect": "(Incorrect-action, (Identify, (Image, Sex)))", - "correct": "(Correct-action, (Identify, (Image, Sex)))" - }, - "Levels": { - "incorrect": "Used the wrong hand to indicate the sex of the face image.", - "correct": "Used the correct hand to indicate the sex of the face image." - } - }, - "response_hand": { - "Description": "Description for response_hand", - "HED": { - "left": "(Hand, (Left-side-of, Body))", - "right": "(Hand, (Right-side-of, Body))" - }, - "Levels": { - "left": "A response using the left hand.", - "right": "A response using the right hand." - } - }, - "sex": { - "Description": "The sex of the image", - "HED": { - "male": "Def/Male-image-cond", - "female": "Def/Female-image-cond" - }, - "Levels": { - "male": "The image was the face of a male person.", - "female": "The image was the face of a female person." - } - }, - "hed_defs": { - "Description": "HED user-defined terms for the dataset.", - "HED": { - "def_male": "(Definition/Male-image-cond, (Condition-variable/Image-sex, (Male, (Image, Face))))", - "def_female": "(Definition/Female-image-cond, (Condition-variable/Image-sex, (Female, (Image, Face))))" - } - } -} \ No newline at end of file diff --git a/tests/data/remodel_tests/sub-0013_task-stopsignal_acq-seq_events.tsv b/tests/data/remodel_tests/sub-0013_task-stopsignal_acq-seq_events.tsv deleted file mode 100644 index 05bffac2..00000000 --- a/tests/data/remodel_tests/sub-0013_task-stopsignal_acq-seq_events.tsv +++ /dev/null @@ -1,101 +0,0 @@ -onset duration trial_type stop_signal_delay response_time response_accuracy response_hand sex -0.0776 0.5083 go n/a 0.565 correct right female -5.5774 0.5083 unsuccesful_stop 0.2 0.49 correct right female -9.5856 0.5084 go n/a 0.45 correct right female -13.5939 0.5083 succesful_stop 0.2 n/a n/a right female -17.1021 0.5083 unsuccesful_stop 0.25 0.633 correct left male -21.6103 0.5083 go n/a 0.443 correct left male -24.6186 0.5083 go n/a 0.439 correct left male -28.6268 0.5083 go n/a 0.667 correct left male -32.1434 0.5083 go n/a 0.55 correct right female -36.1516 0.5083 succesful_stop 0.25 n/a n/a right female -41.6514 0.5084 go n/a 0.59 correct right female -44.6597 0.5083 unsuccesful_stop 0.3 0.511 correct right female -49.6679 0.5083 go n/a 0.604 correct right female -52.1845 0.5083 go n/a 0.743 correct left male -56.1927 0.5084 succesful_stop 0.3 n/a n/a right female -60.6926 0.5083 unsuccesful_stop 0.35 0.555 correct left male -65.7008 0.5083 go n/a 0.584 correct right female -73.7173 0.5083 succesful_stop 0.35 n/a n/a right female -76.7255 0.5083 succesful_stop 0.4 n/a n/a right male -81.2337 0.5084 go n/a 0.615 correct left male -84.742 0.5083 go n/a 0.754 correct left male -89.2502 0.5083 go n/a 0.777 correct right female -92.2668 0.5083 go n/a 0.644 correct right female -97.2666 0.5084 unsuccesful_stop 0.45 0.629 correct right female -100.2832 0.5083 go n/a 0.714 correct right female -104.7831 0.5083 go n/a 0.627 correct left male -108.2997 0.5083 go n/a 0.668 correct left male -113.2995 0.5084 go n/a 0.558 correct left male -117.3078 0.5083 go n/a 1.038 incorrect left female -120.816 0.5083 go n/a 0.764 correct left male -125.8242 0.5083 go n/a 0.782 correct right female -129.3325 0.5083 unsuccesful_stop 0.5 0.722 correct left male -132.8407 0.5083 go n/a 0.716 correct right female -137.8489 0.5083 go n/a 0.741 correct right female -141.3571 0.5084 succesful_stop 0.5 n/a n/a right male -145.8653 0.5084 go n/a 1.027 correct right female -149.3736 0.5083 go n/a 0.881 correct left male -153.3818 0.5083 go n/a 0.801 correct right female -157.89 0.5084 go n/a 0.803 correct left male -160.8983 0.5083 go n/a 0.771 correct right female -164.4149 0.5083 succesful_stop 0.55 n/a n/a right female -169.4147 0.5083 go n/a 0.899 correct left male -172.923 0.5083 unsuccesful_stop 0.6 0.754 correct left male -176.9312 0.5083 go n/a 1.11 correct left male -180.4478 0.5083 succesful_stop 0.65 n/a n/a right male -188.9559 0.5083 unsuccesful_stop 0.7 0.867 correct right female -193.4641 0.5083 unsuccesful_stop 0.75 0.814 correct left male -197.4723 0.5083 go n/a 1.21 correct right female -201.4805 0.5084 go n/a 0.859 correct left male -204.9888 0.5083 unsuccesful_stop 0.75 0.973 correct right female -212.5136 0.5083 go n/a 1.02 correct left male -221.5217 0.5083 go n/a 0.817 correct left male -225.5299 0.5083 go n/a 1.038 correct right female -228.5465 0.5083 go n/a 1.049 correct right female -234.0463 0.5084 go n/a 0.92 correct left male -237.0546 0.5083 succesful_stop 0.7 n/a n/a right female -241.0628 0.5083 go n/a 1.266 correct right female -245.071 0.5084 unsuccesful_stop 0.7 0.854 correct right female -248.5876 0.5083 go n/a 0.985 correct left male -254.0875 0.5083 go n/a 0.789 correct right female -260.6123 0.5083 go n/a 0.928 correct right female -266.1122 0.5083 go n/a 0.807 correct left male -269.6204 0.5083 go n/a 0.735 correct left male -273.6286 0.5083 succesful_stop 0.65 n/a n/a right male -277.6368 0.5084 go n/a 0.896 correct right female -281.6451 0.5083 succesful_stop 0.65 n/a n/a right female -289.6615 0.5083 unsuccesful_stop 0.7 0.831 correct right female -293.1698 0.5083 go n/a 0.876 correct left male -296.6863 0.5084 go n/a 1.021 correct right female -302.1862 0.5083 unsuccesful_stop 0.7 1.085 correct left male -306.1944 0.5083 succesful_stop 0.65 n/a n/a right female -309.2027 0.5083 go n/a 0.814 correct right female -313.2109 0.5083 go n/a 1.053 correct left male -318.2191 0.5083 go n/a 1.002 correct left male -322.2273 0.5083 go n/a 1.057 correct right female -326.2355 0.5084 succesful_stop 0.65 n/a n/a right male -330.2438 0.5083 succesful_stop 0.7 n/a n/a right male -334.252 0.5083 go n/a 0.962 correct left male -341.2685 0.5083 go n/a 0.817 correct right female -346.2767 0.5083 unsuccesful_stop 0.75 0.822 correct left male -350.2849 0.5083 go n/a 0.889 correct left male -353.2932 0.5083 go n/a 0.946 correct right female -358.3014 0.5083 go n/a 0.911 correct right female -360.818 0.5083 unsuccesful_stop 0.8 1.054 correct left male -364.8262 0.5083 go n/a 0.966 correct right female -368.8344 0.5083 unsuccesful_stop 0.8 0.99 correct right female -373.8343 0.5083 go n/a 1.004 correct right female -377.8425 0.5083 unsuccesful_stop 0.75 0.909 correct left male -381.8507 0.5084 go n/a 0.859 correct left male -385.859 0.5083 go n/a 1.186 correct right female -389.3672 0.5083 go n/a 1.288 correct right female -393.3754 0.5083 go n/a 0.979 correct left male -398.3836 0.5084 go n/a 1.067 correct left male -400.9002 0.5083 succesful_stop 0.7 n/a n/a right male -409.4083 0.5084 go n/a 0.901 correct left male -414.4165 0.5084 unsuccesful_stop 0.65 0.879 correct left male -418.4248 0.5083 go n/a 1.003 correct left male -422.433 0.5083 succesful_stop 0.6 n/a n/a right female -429.9495 0.5083 succesful_stop 0.55 n/a n/a right female -437.9659 0.5083 go n/a 0.866 correct left male diff --git a/tests/data/remodel_tests/sub-002_task-FacePerception_run-1_events.tsv b/tests/data/remodel_tests/sub-002_task-FacePerception_run-1_events.tsv deleted file mode 100644 index 25860c16..00000000 --- a/tests/data/remodel_tests/sub-002_task-FacePerception_run-1_events.tsv +++ /dev/null @@ -1,201 +0,0 @@ -onset duration sample event_type face_type rep_status trial rep_lag value stim_file -0.004 n/a 1.0 setup_right_sym n/a n/a n/a n/a 3 n/a -24.2098181818 n/a 6052.4545 show_face_initial unfamiliar_face first_show 1 n/a 13 u032.bmp -25.0352727273 n/a 6258.8182 show_circle n/a n/a 1 n/a 0 circle.bmp -25.158 n/a 6289.5 left_press n/a n/a 1 n/a 256 n/a -26.7352727273 n/a 6683.8182 show_cross n/a n/a 2 n/a 1 cross.bmp -27.2498181818 n/a 6812.4545 show_face unfamiliar_face immediate_repeat 2 1 14 u032.bmp -27.8970909091 n/a 6974.2727 left_press n/a n/a 2 n/a 256 n/a -28.0998181818 n/a 7024.9545 show_circle n/a n/a 2 n/a 0 circle.bmp -29.7998181818 n/a 7449.9545 show_cross n/a n/a 3 n/a 1 cross.bmp -30.3570909091 n/a 7589.2727 show_face unfamiliar_face first_show 3 n/a 13 u088.bmp -31.188 n/a 7797.0 show_circle n/a n/a 3 n/a 0 circle.bmp -32.888 n/a 8222.0 show_cross n/a n/a 4 n/a 1 cross.bmp -33.3643636364 n/a 8341.0909 show_face unfamiliar_face first_show 4 n/a 13 u084.bmp -34.368 n/a 8592.0 show_circle n/a n/a 4 n/a 0 circle.bmp -36.068 n/a 9017.0 show_cross n/a n/a 5 n/a 1 cross.bmp -36.5561818182 n/a 9139.0455 show_face famous_face first_show 5 n/a 5 f123.bmp -37.3161818182 n/a 9329.0455 right_press n/a n/a 5 n/a 4096 n/a -37.3825454545 n/a 9345.6364 show_circle n/a n/a 5 n/a 0 circle.bmp -39.0825454545 n/a 9770.6364 show_cross n/a n/a 6 n/a 1 cross.bmp -39.5789090909 n/a 9894.7273 show_face unfamiliar_face first_show 6 n/a 13 u022.bmp -40.581636363600005 n/a 10145.4091 show_circle n/a n/a 6 n/a 0 circle.bmp -42.2816363636 n/a 10570.4091 show_cross n/a n/a 7 n/a 1 cross.bmp -42.8025454545 n/a 10700.6364 show_face famous_face first_show 7 n/a 5 f094.bmp -43.5489090909 n/a 10887.2273 right_press n/a n/a 7 n/a 4096 n/a -43.7198181818 n/a 10929.9545 show_circle n/a n/a 7 n/a 0 circle.bmp -45.4198181818 n/a 11354.9545 show_cross n/a n/a 8 n/a 1 cross.bmp -46.043454545500005 n/a 11510.8636 show_face scrambled_face first_show 8 n/a 17 s150.bmp -46.95072727270001 n/a 11737.6818 show_circle n/a n/a 8 n/a 0 circle.bmp -48.6507272727 n/a 12162.6818 show_cross n/a n/a 9 n/a 1 cross.bmp -49.1343636364 n/a 12283.5909 show_face unfamiliar_face delayed_repeat 9 6 15 u088.bmp -50.1352727273 n/a 12533.8182 show_circle n/a n/a 9 n/a 0 circle.bmp -51.8352727273 n/a 12958.8182 show_cross n/a n/a 10 n/a 1 cross.bmp -52.3916363636 n/a 13097.9091 show_face famous_face first_show 10 n/a 5 f063.bmp -53.100727272700006 n/a 13275.1818 right_press n/a n/a 10 n/a 4096 n/a -53.2616363636 n/a 13315.4091 show_circle n/a n/a 10 n/a 0 circle.bmp -54.96163636359999 n/a 13740.4091 show_cross n/a n/a 11 n/a 1 cross.bmp -55.5489090909 n/a 13887.2273 show_face unfamiliar_face delayed_repeat 11 7 15 u084.bmp -56.5589090909 n/a 14139.7273 show_circle n/a n/a 11 n/a 0 circle.bmp -58.258909090900005 n/a 14564.7273 show_cross n/a n/a 12 n/a 1 cross.bmp -58.8061818182 n/a 14701.5455 show_face unfamiliar_face first_show 12 n/a 13 u004.bmp -59.5407272727 n/a 14885.1818 left_press n/a n/a 12 n/a 256 n/a -59.7270909091 n/a 14931.7727 show_circle n/a n/a 12 n/a 0 circle.bmp -61.4270909091 n/a 15356.7727 show_cross n/a n/a 13 n/a 1 cross.bmp -61.9134545455 n/a 15478.3636 show_face unfamiliar_face immediate_repeat 13 1 14 u004.bmp -62.850727272700006 n/a 15712.6818 left_press n/a n/a 13 n/a 256 n/a -62.8934545455 n/a 15723.3636 show_circle n/a n/a 13 n/a 0 circle.bmp -64.5934545455 n/a 16148.3636 show_cross n/a n/a 14 n/a 1 cross.bmp -65.10436363640001 n/a 16276.0909 show_face famous_face delayed_repeat 14 9 7 f123.bmp -65.7989090909 n/a 16449.7273 right_press n/a n/a 14 n/a 4096 n/a -66.10436363640001 n/a 16526.0909 show_circle n/a n/a 14 n/a 0 circle.bmp -67.8043636364 n/a 16951.0909 show_cross n/a n/a 15 n/a 1 cross.bmp -68.4289090909 n/a 17107.2273 show_face famous_face first_show 15 n/a 5 f006.bmp -69.3470909091 n/a 17336.7727 right_press n/a n/a 15 n/a 4096 n/a -69.3625454545 n/a 17340.6364 show_circle n/a n/a 15 n/a 0 circle.bmp -71.0625454545 n/a 17765.6364 show_cross n/a n/a 16 n/a 1 cross.bmp -71.6025454545 n/a 17900.6364 show_face unfamiliar_face delayed_repeat 16 10 15 u022.bmp -72.32981818180001 n/a 18082.4545 right_press n/a n/a 16 n/a 4096 n/a -72.5616363636 n/a 18140.4091 show_circle n/a n/a 16 n/a 0 circle.bmp -74.26163636359999 n/a 18565.4091 show_cross n/a n/a 17 n/a 1 cross.bmp -74.7598181818 n/a 18689.9545 show_face scrambled_face first_show 17 n/a 17 s043.bmp -75.6989090909 n/a 18924.7273 left_press n/a n/a 17 n/a 256 n/a -75.7334545455 n/a 18933.3636 show_circle n/a n/a 17 n/a 0 circle.bmp -77.4334545455 n/a 19358.3636 show_cross n/a n/a 18 n/a 1 cross.bmp -78.0670909091 n/a 19516.7727 show_face scrambled_face immediate_repeat 18 1 18 s043.bmp -78.8107272727 n/a 19702.6818 left_press n/a n/a 18 n/a 256 n/a -78.9434545455 n/a 19735.8636 show_circle n/a n/a 18 n/a 0 circle.bmp -80.6434545455 n/a 20160.8636 show_cross n/a n/a 19 n/a 1 cross.bmp -81.1916363636 n/a 20297.9091 show_face famous_face delayed_repeat 19 12 7 f094.bmp -81.8416363636 n/a 20460.4091 right_press n/a n/a 19 n/a 4096 n/a -82.1370909091 n/a 20534.2727 show_circle n/a n/a 19 n/a 0 circle.bmp -83.8370909091 n/a 20959.2727 show_cross n/a n/a 20 n/a 1 cross.bmp -84.4989090909 n/a 21124.7273 show_face scrambled_face first_show 20 n/a 17 s083.bmp -85.43072727270001 n/a 21357.6818 show_circle n/a n/a 20 n/a 0 circle.bmp -85.6189090909 n/a 21404.7273 left_press n/a n/a 20 n/a 256 n/a -87.1307272727 n/a 21782.6818 show_cross n/a n/a 21 n/a 1 cross.bmp -87.75618181819999 n/a 21939.0455 show_face scrambled_face immediate_repeat 21 1 18 s083.bmp -88.6252727273 n/a 22156.3182 show_circle n/a n/a 21 n/a 0 circle.bmp -90.3252727273 n/a 22581.3182 show_cross n/a n/a 22 n/a 1 cross.bmp -90.8970909091 n/a 22724.2727 show_face scrambled_face delayed_repeat 22 14 19 s150.bmp -91.7243636364 n/a 22931.0909 show_circle n/a n/a 22 n/a 0 circle.bmp -93.4243636364 n/a 23356.0909 show_cross n/a n/a 23 n/a 1 cross.bmp -93.8870909091 n/a 23471.7727 show_face famous_face first_show 23 n/a 5 f093.bmp -94.7770909091 n/a 23694.2727 show_circle n/a n/a 23 n/a 0 circle.bmp -94.8107272727 n/a 23702.6818 left_press n/a n/a 23 n/a 256 n/a -96.4770909091 n/a 24119.2727 show_cross n/a n/a 24 n/a 1 cross.bmp -97.078 n/a 24269.5 show_face famous_face delayed_repeat 24 14 7 f063.bmp -97.668 n/a 24417.0 right_press n/a n/a 24 n/a 4096 n/a -97.9389090909 n/a 24484.7273 show_circle n/a n/a 24 n/a 0 circle.bmp -99.6389090909 n/a 24909.7273 show_cross n/a n/a 25 n/a 1 cross.bmp -100.218 n/a 25054.5 show_face famous_face first_show 25 n/a 5 f143.bmp -100.9407272727 n/a 25235.1818 right_press n/a n/a 25 n/a 4096 n/a -101.2198181818 n/a 25304.9545 show_circle n/a n/a 25 n/a 0 circle.bmp -102.9198181818 n/a 25729.9545 show_cross n/a n/a 26 n/a 1 cross.bmp -103.4589090909 n/a 25864.7273 show_face famous_face immediate_repeat 26 1 6 f143.bmp -104.0743636364 n/a 26018.5909 right_press n/a n/a 26 n/a 4096 n/a -104.3925454545 n/a 26098.1364 show_circle n/a n/a 26 n/a 0 circle.bmp -106.0925454545 n/a 26523.1364 show_cross n/a n/a 27 n/a 1 cross.bmp -106.5661818182 n/a 26641.5455 show_face scrambled_face first_show 27 n/a 17 s142.bmp -107.408 n/a 26852.0 show_circle n/a n/a 27 n/a 0 circle.bmp -107.4134545455 n/a 26853.3636 left_press n/a n/a 27 n/a 256 n/a -109.108 n/a 27277.0 show_cross n/a n/a 28 n/a 1 cross.bmp -109.7407272727 n/a 27435.1818 show_face famous_face delayed_repeat 28 13 7 f006.bmp -110.5252727273 n/a 27631.3182 left_press n/a n/a 28 n/a 256 n/a -110.7089090909 n/a 27677.2273 show_circle n/a n/a 28 n/a 0 circle.bmp -112.4089090909 n/a 28102.2273 show_cross n/a n/a 29 n/a 1 cross.bmp -112.998 n/a 28249.5 show_face unfamiliar_face first_show 29 n/a 13 u131.bmp -113.7443636364 n/a 28436.0909 left_press n/a n/a 29 n/a 256 n/a -113.9298181818 n/a 28482.4545 show_circle n/a n/a 29 n/a 0 circle.bmp -115.6298181818 n/a 28907.4545 show_cross n/a n/a 30 n/a 1 cross.bmp -116.138 n/a 29034.5 show_face unfamiliar_face first_show 30 n/a 13 u020.bmp -117.1143636364 n/a 29278.5909 show_circle n/a n/a 30 n/a 0 circle.bmp -118.8143636364 n/a 29703.5909 show_cross n/a n/a 31 n/a 1 cross.bmp -119.2789090909 n/a 29819.7273 show_face unfamiliar_face immediate_repeat 31 1 14 u020.bmp -119.8943636364 n/a 29973.5909 left_press n/a n/a 31 n/a 256 n/a -120.1043636364 n/a 30026.0909 show_circle n/a n/a 31 n/a 0 circle.bmp -121.8043636364 n/a 30451.0909 show_cross n/a n/a 32 n/a 1 cross.bmp -122.3025454545 n/a 30575.6364 show_face scrambled_face first_show 32 n/a 17 s088.bmp -123.1370909091 n/a 30784.2727 left_press n/a n/a 32 n/a 256 n/a -123.2089090909 n/a 30802.2273 show_circle n/a n/a 32 n/a 0 circle.bmp -124.9089090909 n/a 31227.2273 show_cross n/a n/a 33 n/a 1 cross.bmp -125.4098181818 n/a 31352.4545 show_face famous_face delayed_repeat 33 10 7 f093.bmp -126.0852727273 n/a 31521.3182 right_press n/a n/a 33 n/a 4096 n/a -126.3307272727 n/a 31582.6818 show_circle n/a n/a 33 n/a 0 circle.bmp -128.0307272727 n/a 32007.6818 show_cross n/a n/a 34 n/a 1 cross.bmp -128.5834545455 n/a 32145.8636 show_face scrambled_face first_show 34 n/a 17 s081.bmp -129.5343636364 n/a 32383.5909 show_circle n/a n/a 34 n/a 0 circle.bmp -129.778 n/a 32444.5 right_press n/a n/a 34 n/a 4096 n/a -131.2343636364 n/a 32808.5909 show_cross n/a n/a 35 n/a 1 cross.bmp -131.758 n/a 32939.5 show_face scrambled_face immediate_repeat 35 1 18 s081.bmp -132.7398181818 n/a 33184.9545 show_circle n/a n/a 35 n/a 0 circle.bmp -132.7798181818 n/a 33194.9545 left_press n/a n/a 35 n/a 256 n/a -134.4398181818 n/a 33609.9545 show_cross n/a n/a 36 n/a 1 cross.bmp -135.0816363636 n/a 33770.4091 show_face unfamiliar_face first_show 36 n/a 13 u077.bmp -135.9107272727 n/a 33977.6818 show_circle n/a n/a 36 n/a 0 circle.bmp -135.9407272727 n/a 33985.1818 right_press n/a n/a 36 n/a 4096 n/a -137.61072727270002 n/a 34402.6818 show_cross n/a n/a 37 n/a 1 cross.bmp -138.2061818182 n/a 34551.5455 show_face unfamiliar_face immediate_repeat 37 1 14 u077.bmp -139.1061818182 n/a 34776.5455 show_circle n/a n/a 37 n/a 0 circle.bmp -139.12345454549998 n/a 34780.8636 right_press n/a n/a 37 n/a 4096 n/a -140.8061818182 n/a 35201.5455 show_cross n/a n/a 38 n/a 1 cross.bmp -141.4307272727 n/a 35357.6818 show_face scrambled_face delayed_repeat 38 11 19 s142.bmp -142.2698181818 n/a 35567.4545 show_circle n/a n/a 38 n/a 0 circle.bmp -142.32254545450002 n/a 35580.6364 left_press n/a n/a 38 n/a 256 n/a -143.9698181818 n/a 35992.4545 show_cross n/a n/a 39 n/a 1 cross.bmp -144.6207272727 n/a 36155.1818 show_face scrambled_face first_show 39 n/a 17 s034.bmp -145.4498181818 n/a 36362.4545 right_press n/a n/a 39 n/a 4096 n/a -145.478 n/a 36369.5 show_circle n/a n/a 39 n/a 0 circle.bmp -147.178 n/a 36794.5 show_cross n/a n/a 40 n/a 1 cross.bmp -147.74436363639998 n/a 36936.0909 show_face scrambled_face immediate_repeat 40 1 18 s034.bmp -148.7343636364 n/a 37183.5909 show_circle n/a n/a 40 n/a 0 circle.bmp -148.7452727273 n/a 37186.3182 right_press n/a n/a 40 n/a 4096 n/a -150.4343636364 n/a 37608.5909 show_cross n/a n/a 41 n/a 1 cross.bmp -150.9016363636 n/a 37725.4091 show_face unfamiliar_face delayed_repeat 41 12 15 u131.bmp -151.868 n/a 37967.0 right_press n/a n/a 41 n/a 4096 n/a -151.9189090909 n/a 37979.7273 show_circle n/a n/a 41 n/a 0 circle.bmp -151.9752727273 n/a 37993.8182 double_press n/a n/a 41 n/a 4352 n/a -153.6189090909 n/a 38404.7273 show_cross n/a n/a 42 n/a 1 cross.bmp -154.2598181818 n/a 38564.9545 show_face unfamiliar_face first_show 42 n/a 13 u100.bmp -155.0989090909 n/a 38774.7273 show_circle n/a n/a 42 n/a 0 circle.bmp -155.1689090909 n/a 38792.2273 left_press n/a n/a 42 n/a 256 n/a -156.7989090909 n/a 39199.7273 show_cross n/a n/a 43 n/a 1 cross.bmp -157.43345454549998 n/a 39358.3636 show_face unfamiliar_face immediate_repeat 43 1 14 u100.bmp -157.9898181818 n/a 39497.4545 left_press n/a n/a 43 n/a 256 n/a -158.3389090909 n/a 39584.7273 show_circle n/a n/a 43 n/a 0 circle.bmp -160.0389090909 n/a 40009.7273 show_cross n/a n/a 44 n/a 1 cross.bmp -160.5070909091 n/a 40126.7727 show_face scrambled_face first_show 44 n/a 17 s087.bmp -161.3289090909 n/a 40332.2273 left_press n/a n/a 44 n/a 256 n/a -161.4152727273 n/a 40353.8182 show_circle n/a n/a 44 n/a 0 circle.bmp -163.1152727273 n/a 40778.8182 show_cross n/a n/a 45 n/a 1 cross.bmp -163.748 n/a 40937.0 show_face scrambled_face immediate_repeat 45 1 18 s087.bmp -164.4807272727 n/a 41120.1818 right_press n/a n/a 45 n/a 4096 n/a -164.7252727273 n/a 41181.3182 show_circle n/a n/a 45 n/a 0 circle.bmp -166.42527272729998 n/a 41606.3182 show_cross n/a n/a 46 n/a 1 cross.bmp -166.97254545450002 n/a 41743.1364 show_face scrambled_face delayed_repeat 46 14 19 s088.bmp -167.9352727273 n/a 41983.8182 show_circle n/a n/a 46 n/a 0 circle.bmp -168.1816363636 n/a 42045.4091 left_press n/a n/a 46 n/a 256 n/a -169.6352727273 n/a 42408.8182 show_cross n/a n/a 47 n/a 1 cross.bmp -170.2298181818 n/a 42557.4545 show_face scrambled_face first_show 47 n/a 17 s074.bmp -171.0552727273 n/a 42763.8182 show_circle n/a n/a 47 n/a 0 circle.bmp -171.398 n/a 42849.5 right_press n/a n/a 47 n/a 4096 n/a -172.7552727273 n/a 43188.8182 show_cross n/a n/a 48 n/a 1 cross.bmp -173.2534545455 n/a 43313.3636 show_face unfamiliar_face first_show 48 n/a 13 u058.bmp -174.1370909091 n/a 43534.2727 right_press n/a n/a 48 n/a 4096 n/a -174.1789090909 n/a 43544.7273 show_circle n/a n/a 48 n/a 0 circle.bmp -175.8789090909 n/a 43969.7273 show_cross n/a n/a 49 n/a 1 cross.bmp -176.49436363639998 n/a 44123.5909 show_face unfamiliar_face immediate_repeat 49 1 14 u058.bmp -177.33436363639998 n/a 44333.5909 show_circle n/a n/a 49 n/a 0 circle.bmp -177.4043636364 n/a 44351.0909 right_press n/a n/a 49 n/a 4096 n/a -179.0343636364 n/a 44758.5909 show_cross n/a n/a 50 n/a 1 cross.bmp -179.6343636364 n/a 44908.5909 show_face famous_face first_show 50 n/a 5 f038.bmp -180.4970909091 n/a 45124.2727 show_circle n/a n/a 50 n/a 0 circle.bmp -180.6652727273 n/a 45166.3182 left_press n/a n/a 50 n/a 256 n/a -182.1970909091 n/a 45549.2727 show_cross n/a n/a 51 n/a 1 cross.bmp -182.6752727273 n/a 45668.8182 show_face scrambled_face first_show 51 n/a 17 s090.bmp -183.4961818182 n/a 45874.0455 show_circle n/a n/a 51 n/a 0 circle.bmp -183.4961818182 n/a 45874.0455 right_press n/a n/a 51 n/a 4096 n/a -185.1961818182 n/a 46299.0455 show_cross n/a n/a 52 n/a 1 cross.bmp -185.6652727273 n/a 46416.3182 show_face famous_face first_show 52 n/a 5 f020.bmp -186.3670909091 n/a 46591.7727 right_press n/a n/a 52 n/a 4096 n/a -186.6225454545 n/a 46655.6364 show_circle n/a n/a 52 n/a 0 circle.bmp diff --git a/tests/data/remodel_tests/summarize_hed_tags_rmdl.json b/tests/data/remodel_tests/summarize_hed_tags_rmdl.json deleted file mode 100644 index aa1d595d..00000000 --- a/tests/data/remodel_tests/summarize_hed_tags_rmdl.json +++ /dev/null @@ -1,36 +0,0 @@ - [ - { - "operation": "summarize_hed_tags", - "description": "Produce a summary of HED tags.", - "parameters": { - "summary_name": "Summarize_hed tags_basic", - "summary_filename": "summarize_hed_tags_basic", - "tags": { - "Sensory events": [ - "Sensory-event", - "Sensory-presentation", - "Task-stimulus-role", - "Experimental-stimulus" - ], - "Agent actions": [ - "Agent-action", - "Agent", - "Action", - "Agent-task-role", - "Task-action-type", - "Participant-response" - ], - "Task properties": [ - "Task-property" - ], - "Objects": [ - "Item" - ], - "Properties": [ - "Property" - ] - }, - "expand_context": false - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/summarize_hed_types_rmdl.json b/tests/data/remodel_tests/summarize_hed_types_rmdl.json deleted file mode 100644 index 7441555e..00000000 --- a/tests/data/remodel_tests/summarize_hed_types_rmdl.json +++ /dev/null @@ -1,11 +0,0 @@ -[ - { - "operation": "summarize_hed_type", - "description": "Summarize a HED type tag such as condition-variable", - "parameters": { - "summary_name": "Hed type summary", - "summary_filename": "hed_type_summary", - "type_tag": "condition-variable" - } - } -] diff --git a/tests/data/remodel_tests/tag_summary_template.json5 b/tests/data/remodel_tests/tag_summary_template.json5 deleted file mode 100644 index 4a227b20..00000000 --- a/tests/data/remodel_tests/tag_summary_template.json5 +++ /dev/null @@ -1,7 +0,0 @@ -{ - "Tag-categories": [ - "Sensory-event", "Agent-action", "Event", "Action", "Task-event-role", "Task-action-type", - "Task-stimulus-role", "Agent-task-role", "Item", "Sensory-presentation", "Organizational-property", - "Informational-property", "Sensory-property", "Property", "Relation" - ] -} diff --git a/tests/data/remodel_tests/task-FacePerceptionMissingDefs_events.json b/tests/data/remodel_tests/task-FacePerceptionMissingDefs_events.json deleted file mode 100644 index ec869966..00000000 --- a/tests/data/remodel_tests/task-FacePerceptionMissingDefs_events.json +++ /dev/null @@ -1,137 +0,0 @@ -{ - "onset": { - "Description": "Position of event marker in seconds relative to the start.", - "Units": "s" - }, - "duration": { - "Description": "Duration of the event in seconds.", - "Units": "s" - }, - "event_type": { - "LongName": "Event category", - "Description": "The main category of the event.", - "Levels": { - "show_face": "Display a face to mark end of pre-stimulus and start of blink-inhibition.", - "show_face_initial": "Display a face at the beginning of the recording.", - "show_circle": "Display a white circle to mark end of the stimulus and blink inhibition.", - "show_cross": "Display only a white cross to mark start of trial and fixation.", - "left_press": "Experiment participant presses a key with left index finger.", - "right_press": "Experiment participant presses a key with right index finger.", - "setup_left_sym": "Setup for experiment with pressing key with left index finger means a face with above average symmetry.", - "setup_right_sym": "Setup for experiment with pressing key with right index finger means a face with above average symmetry.", - "double_press": "Experiment participant presses both keys ." - }, - "HED": { - "show_face": "Sensory-event, Experimental-stimulus, (Def/Face-image, Onset), (Def/Blink-inhibition-task,Onset),(Def/Cross-only, Offset)", - "show_face_initial": "Sensory-event, Experimental-stimulus, (Def/Face-image, Onset), (Def/Blink-inhibition-task,Onset), (Def/Fixation-task, Onset)", - "show_circle": "Sensory-event, (Intended-effect, Cue), (Def/Circle-only, Onset), (Def/Face-image, Offset), (Def/Blink-inhibition-task, Offset), (Def/Fixation-task, Offset)", - "show_cross": "Sensory-event, (Intended-effect, Cue), (Def/Cross-only, Onset), (Def/Fixation-task, Onset), (Def/Circle-only, Offset)", - "left_press": "Agent-action, Participant-response, Def/Press-left-finger", - "right_press": "Agent-action, Participant-response, Def/Press-right-finger", - "setup_left_sym": "Experiment-structure, (Def/Left-sym-cond, Onset), (Def/Initialize-recording, Onset)", - "setup_right_sym": "Experiment-structure, (Def/Right-sym-cond, Onset), (Def/Initialize-recording, Onset)", - "double_press": "Agent-action, Indeterminate-action, (Press, Keyboard-key)" - } - }, - "face_type": { - "Description": "Factor indicating type of face image being displayed.", - "Levels": { - "famous_face": "A face that should be recognized by the participants.", - "unfamiliar_face": "A face that should not be recognized by the participants.", - "scrambled_face": "A scrambled face image generated by taking face 2D FFT." - }, - "HED": { - "famous_face": "Def/Famous-face-cond", - "unfamiliar_face": "Def/Unfamiliar-face-cond", - "scrambled_face": "Def/Scrambled-face-cond" - } - }, - "rep_status": { - "Description": "Factor indicating whether this image has been already seen.", - "Levels": { - "first_show": "Factor level indicating the first display of this face.", - "immediate_repeat": "Factor level indicating this face was the same as previous one.", - "delayed_repeat": "Factor level indicating face was seen 5 to 15 trials ago." - }, - "HED": { - "first_show": "Def/First-show-cond", - "immediate_repeat": "Def/Immediate-repeat-cond", - "delayed_repeat": "Def/Delayed-repeat-cond" - } - }, - "trial": { - "Description": "Indicates which trial this event belongs to.", - "HED": "Experimental-trial/#" - }, - "rep_lag": { - "Description": "How face images before this one was the image was previously presented.", - "HED": "(Face, Item-interval/#)" - }, - "stim_file": { - "Description": "Path of the stimulus file in the stimuli directory.", - "HED": "(Image, Pathname/#)" - }, - "hed_def_sensory": { - "Description": "Metadata dictionary for gathering sensory definitions", - "HED": { - "face_image_def": "(Definition/Face-image, (Visual-presentation, (Foreground-view, ((Image, Face, Hair), Color/Grayscale), ((White, Cross), (Center-of, Computer-screen))), (Background-view, Black), Description/A happy or neutral face in frontal or three-quarters frontal pose with long hair cropped presented as an achromatic foreground image on a black background with a white fixation cross superposed.))", - "circle_only_def": "(Definition/Circle-only, (Visual-presentation, (Foreground-view, ((White, Circle), (Center-of, Computer-screen))), (Background-view, Black), Description/A white circle on a black background in the center of the screen.))" - } - }, - "hed_def_actions": { - "Description": "Metadata dictionary for gathering participant action definitions", - "HED": { - "press_left_finger_def": "(Definition/Press-left-finger, ((Index-finger, (Left-side-of, Experiment-participant)), (Press, Keyboard-key), Description/The participant presses a key with the left index finger to indicate a face symmetry judgment.))", - "press_right_finger_def": "(Definition/Press-right-finger, ((Index-finger, (Right-side-of, Experiment-participant)), (Press, Keyboard-key), Description/The participant presses a key with the right index finger to indicate a face symmetry evaluation.))" - } - }, - "hed_def_conds": { - "Description": "Metadata dictionary for gathering experimental condition definitions", - "HED": { - "famous_face_cond_def": "(Definition/Famous-face-cond, (Condition-variable/Face-type, (Image, (Face, Famous)), Description/A face that should be recognized by the participants))", - "unfamiliar_face_cond_def": "(Definition/Unfamiliar-face-cond, (Condition-variable/Face-type, (Image, (Face, Unfamiliar)), Description/A face that should not be recognized by the participants.))", - "scrambled_face_cond_def": "(Definition/Scrambled-face-cond, (Condition-variable/Face-type, (Image, (Face, Disordered)), Description/A scrambled face image generated by taking face 2D FFT.))", - "first_show_cond_def": "(Definition/First-show-cond, ((Condition-variable/Repetition-type, (Item-count/1, Face), Item-interval/0), Description/Factor level indicating the first display of this face.))", - "immediate_repeat_cond_def": "(Definition/Immediate-repeat-cond, ((Condition-variable/Repetition-type, (Item-count/2, Face), Item-interval/1), Description/Factor level indicating this face was the same as previous one.))", - "delayed_repeat_cond_def": "(Definition/Delayed-repeat-cond, (Condition-variable/Repetition-type, (Item-count/2, Face), (Item-interval, (Greater-than-or-equal-to, Item-interval/5)), Description/Factor level indicating face was seen 5 to 15 trials ago.))", - "left_sym_cond_def": "(Definition/Left-sym-cond, (Condition-variable/Key-assignment, ((Index-finger, (Left-side-of, Experiment-participant)), (Behavioral-evidence, Symmetrical)), ((Index-finger, (Right-side-of, Experiment-participant)), (Behavioral-evidence, Asymmetrical)), Description/Left index finger key press indicates a face with above average symmetry.))", - "right_sym_cond_def": "(Definition/Right-sym-cond, (Condition-variable/Key-assignment, ((Index-finger, (Right-side-of, Experiment-participant)), (Behavioral-evidence, Symmetrical)), ((Index-finger, (Left-side-of, Experiment-participant)), (Behavioral-evidence, Asymmetrical)), Description/Right index finger key press indicates a face with above average symmetry.))" - } - }, - "hed_def_tasks": { - "Description": "Metadata dictionary for gathering task definitions", - "HED": { - "face_symmetry_evaluation_task_def": "(Definition/Face-symmetry-evaluation-task, (Task, Experiment-participant, (See, Face), (Discriminate, (Face, Symmetrical)), (Press, Keyboard-key), Description/Evaluate degree of image symmetry and respond with key press evaluation.))", - "blink_inhibition_task_def": "(Definition/Blink-inhibition-task, (Task, Experiment-participant, Inhibit-blinks, Description/Do not blink while the face image is displayed.))", - "fixation_task_def": "(Definition/Fixation-task, (Task, Experiment-participant, (Fixate, Cross), Description/Fixate on the cross at the screen center.))" - } - }, - "hed_def_setup": { - "Description": "Metadata dictionary for gathering setup definitions", - "HED": { - "setup_def": "(Definition/Initialize-recording, (Recording))" - } - - }, - "value": { - "Description": "Numerical event marker", - "Levels": { - "x0": "Disappearance of face image and display of the inter-stimulus circle simultaneously", - "x1": "Disappearance of face image and display of the inter-stimulus circle simultaneously", - "x2": "Initial setup with left finger key press indicating above average symmetry", - "x3": "Initial setup with right finger key press indicating above average symmetry", - "x5": "Initial presentation of famous face", - "x6": "Immediate repeated presentation of famous face", - "x7": "Delayed repeated presentation of famous face", - "x13": "Initial presentation of unfamiliar face", - "x14": "Immediate repeated presentation of unfamiliar face", - "x15": "Delayed repeated presentation of unfamiliar face", - "x17": "Initial presentation of scrambled face", - "x18": "Immediate repeated presentation of scrambled face", - "x19": "Delayed repeated presentation of scrambled face", - "x256": "Left finger key press", - "x4096": "Right finger key press", - "x4352": "Left and right finger key presses" - } - } -} diff --git a/tests/data/remodel_tests/task-FacePerception_events.json b/tests/data/remodel_tests/task-FacePerception_events.json deleted file mode 100644 index fa018c47..00000000 --- a/tests/data/remodel_tests/task-FacePerception_events.json +++ /dev/null @@ -1,138 +0,0 @@ -{ - "onset": { - "Description": "Position of event marker in seconds relative to the start.", - "Units": "s" - }, - "duration": { - "Description": "Duration of the event in seconds.", - "Units": "s" - }, - "event_type": { - "LongName": "Event category", - "Description": "The main category of the event.", - "Levels": { - "show_face": "Display a face to mark end of pre-stimulus and start of blink-inhibition.", - "show_face_initial": "Display a face at the beginning of the recording.", - "show_circle": "Display a white circle to mark end of the stimulus and blink inhibition.", - "show_cross": "Display only a white cross to mark start of trial and fixation.", - "left_press": "Experiment participant presses a key with left index finger.", - "right_press": "Experiment participant presses a key with right index finger.", - "setup_left_sym": "Setup for experiment with pressing key with left index finger means a face with above average symmetry.", - "setup_right_sym": "Setup for experiment with pressing key with right index finger means a face with above average symmetry.", - "double_press": "Experiment participant presses both keys ." - }, - "HED": { - "show_face": "Sensory-event, Experimental-stimulus, (Def/Face-image, Onset), (Def/Blink-inhibition-task,Onset),(Def/Cross-only, Offset)", - "show_face_initial": "Sensory-event, Experimental-stimulus, (Def/Face-image, Onset), (Def/Blink-inhibition-task,Onset), (Def/Fixation-task, Onset)", - "show_circle": "Sensory-event, (Intended-effect, Cue), (Def/Circle-only, Onset), (Def/Face-image, Offset), (Def/Blink-inhibition-task, Offset), (Def/Fixation-task, Offset)", - "show_cross": "Sensory-event, (Intended-effect, Cue), (Def/Cross-only, Onset), (Def/Fixation-task, Onset), (Def/Circle-only, Offset)", - "left_press": "Agent-action, Participant-response, Def/Press-left-finger", - "right_press": "Agent-action, Participant-response, Def/Press-right-finger", - "setup_left_sym": "Experiment-structure, (Def/Left-sym-cond, Onset), (Def/Initialize-recording, Onset)", - "setup_right_sym": "Experiment-structure, (Def/Right-sym-cond, Onset), (Def/Initialize-recording, Onset)", - "double_press": "Agent-action, Indeterminate-action, (Press, Keyboard-key)" - } - }, - "face_type": { - "Description": "Factor indicating type of face image being displayed.", - "Levels": { - "famous_face": "A face that should be recognized by the participants.", - "unfamiliar_face": "A face that should not be recognized by the participants.", - "scrambled_face": "A scrambled face image generated by taking face 2D FFT." - }, - "HED": { - "famous_face": "Def/Famous-face-cond", - "unfamiliar_face": "Def/Unfamiliar-face-cond", - "scrambled_face": "Def/Scrambled-face-cond" - } - }, - "rep_status": { - "Description": "Factor indicating whether this image has been already seen.", - "Levels": { - "first_show": "Factor level indicating the first display of this face.", - "immediate_repeat": "Factor level indicating this face was the same as previous one.", - "delayed_repeat": "Factor level indicating face was seen 5 to 15 trials ago." - }, - "HED": { - "first_show": "Def/First-show-cond", - "immediate_repeat": "Def/Immediate-repeat-cond", - "delayed_repeat": "Def/Delayed-repeat-cond" - } - }, - "trial": { - "Description": "Indicates which trial this event belongs to.", - "HED": "Experimental-trial/#" - }, - "rep_lag": { - "Description": "How face images before this one was the image was previously presented.", - "HED": "(Face, Item-interval/#)" - }, - "stim_file": { - "Description": "Path of the stimulus file in the stimuli directory.", - "HED": "(Image, Pathname/#)" - }, - "hed_def_sensory": { - "Description": "Metadata dictionary for gathering sensory definitions", - "HED": { - "cross_only_def": "(Definition/Cross-only, (Visual-presentation, (Foreground-view, (White, Cross), (Center-of, Computer-screen)), (Background-view, Black), Description/A white fixation cross on a black background in the center of the screen.))", - "face_image_def": "(Definition/Face-image, (Visual-presentation, (Foreground-view, ((Image, Face, Hair), Color/Grayscale), ((White, Cross), (Center-of, Computer-screen))), (Background-view, Black), Description/A happy or neutral face in frontal or three-quarters frontal pose with long hair cropped presented as an achromatic foreground image on a black background with a white fixation cross superposed.))", - "circle_only_def": "(Definition/Circle-only, (Visual-presentation, (Foreground-view, ((White, Circle), (Center-of, Computer-screen))), (Background-view, Black), Description/A white circle on a black background in the center of the screen.))" - } - }, - "hed_def_actions": { - "Description": "Metadata dictionary for gathering participant action definitions", - "HED": { - "press_left_finger_def": "(Definition/Press-left-finger, ((Index-finger, (Left-side-of, Experiment-participant)), (Press, Keyboard-key), Description/The participant presses a key with the left index finger to indicate a face symmetry judgment.))", - "press_right_finger_def": "(Definition/Press-right-finger, ((Index-finger, (Right-side-of, Experiment-participant)), (Press, Keyboard-key), Description/The participant presses a key with the right index finger to indicate a face symmetry evaluation.))" - } - }, - "hed_def_conds": { - "Description": "Metadata dictionary for gathering experimental condition definitions", - "HED": { - "famous_face_cond_def": "(Definition/Famous-face-cond, (Condition-variable/Face-type, (Image, (Face, Famous)), Description/A face that should be recognized by the participants))", - "unfamiliar_face_cond_def": "(Definition/Unfamiliar-face-cond, (Condition-variable/Face-type, (Image, (Face, Unfamiliar)), Description/A face that should not be recognized by the participants.))", - "scrambled_face_cond_def": "(Definition/Scrambled-face-cond, (Condition-variable/Face-type, (Image, (Face, Disordered)), Description/A scrambled face image generated by taking face 2D FFT.))", - "first_show_cond_def": "(Definition/First-show-cond, ((Condition-variable/Repetition-type, (Item-count/1, Face), Item-interval/0), Description/Factor level indicating the first display of this face.))", - "immediate_repeat_cond_def": "(Definition/Immediate-repeat-cond, ((Condition-variable/Repetition-type, (Item-count/2, Face), Item-interval/1), Description/Factor level indicating this face was the same as previous one.))", - "delayed_repeat_cond_def": "(Definition/Delayed-repeat-cond, (Condition-variable/Repetition-type, (Item-count/2, Face), (Item-interval, (Greater-than-or-equal-to, Item-interval/5)), Description/Factor level indicating face was seen 5 to 15 trials ago.))", - "left_sym_cond_def": "(Definition/Left-sym-cond, (Condition-variable/Key-assignment, ((Index-finger, (Left-side-of, Experiment-participant)), (Behavioral-evidence, Symmetrical)), ((Index-finger, (Right-side-of, Experiment-participant)), (Behavioral-evidence, Asymmetrical)), Description/Left index finger key press indicates a face with above average symmetry.))", - "right_sym_cond_def": "(Definition/Right-sym-cond, (Condition-variable/Key-assignment, ((Index-finger, (Right-side-of, Experiment-participant)), (Behavioral-evidence, Symmetrical)), ((Index-finger, (Left-side-of, Experiment-participant)), (Behavioral-evidence, Asymmetrical)), Description/Right index finger key press indicates a face with above average symmetry.))" - } - }, - "hed_def_tasks": { - "Description": "Metadata dictionary for gathering task definitions", - "HED": { - "face_symmetry_evaluation_task_def": "(Definition/Face-symmetry-evaluation-task, (Task, Experiment-participant, (See, Face), (Discriminate, (Face, Symmetrical)), (Press, Keyboard-key), Description/Evaluate degree of image symmetry and respond with key press evaluation.))", - "blink_inhibition_task_def": "(Definition/Blink-inhibition-task, (Task, Experiment-participant, Inhibit-blinks, Description/Do not blink while the face image is displayed.))", - "fixation_task_def": "(Definition/Fixation-task, (Task, Experiment-participant, (Fixate, Cross), Description/Fixate on the cross at the screen center.))" - } - }, - "hed_def_setup": { - "Description": "Metadata dictionary for gathering setup definitions", - "HED": { - "setup_def": "(Definition/Initialize-recording, (Recording))" - } - - }, - "value": { - "Description": "Numerical event marker", - "Levels": { - "x0": "Disappearance of face image and display of the inter-stimulus circle simultaneously", - "x1": "Disappearance of face image and display of the inter-stimulus circle simultaneously", - "x2": "Initial setup with left finger key press indicating above average symmetry", - "x3": "Initial setup with right finger key press indicating above average symmetry", - "x5": "Initial presentation of famous face", - "x6": "Immediate repeated presentation of famous face", - "x7": "Delayed repeated presentation of famous face", - "x13": "Initial presentation of unfamiliar face", - "x14": "Immediate repeated presentation of unfamiliar face", - "x15": "Delayed repeated presentation of unfamiliar face", - "x17": "Initial presentation of scrambled face", - "x18": "Immediate repeated presentation of scrambled face", - "x19": "Delayed repeated presentation of scrambled face", - "x256": "Left finger key press", - "x4096": "Right finger key press", - "x4352": "Left and right finger key presses" - } - } -} diff --git a/tests/data/remodel_tests/task-stopsignal_acq-seq_events.json b/tests/data/remodel_tests/task-stopsignal_acq-seq_events.json deleted file mode 100644 index 95a86c05..00000000 --- a/tests/data/remodel_tests/task-stopsignal_acq-seq_events.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "trial_type": { - "HED": { - "succesful_stop": "Sensory-presentation, Visual-presentation, Correct-action, Image, Label/succesful_stop", - "unsuccesful_stop": "Sensory-presentation, Visual-presentation, Incorrect-action, Image, Label/unsuccesful_stop", - "go": "Sensory-presentation, Visual-presentation, Image, Label/go" - } - }, - "stop_signal_delay": { - "HED": "(Auditory-presentation, Delay/# s)" - }, - "sex": { - "HED": { - "male": "Def/Male-image-cond", - "female": "Def/Female-image-cond" - } - }, - "hed_defs": { - "HED": { - "def_male": "(Definition/Male-image-cond, (Condition-variable/Image-sex, (Male, (Image, Face))))", - "def_female": "(Definition/Female-image-cond, (Condition-variable/Image-sex, (Female, (Image, Face))))" - } - } -} \ No newline at end of file diff --git a/tests/data/remodel_tests/test_ds003645_summarize_condition_variable_rmdl.json b/tests/data/remodel_tests/test_ds003645_summarize_condition_variable_rmdl.json deleted file mode 100644 index 606e03ea..00000000 --- a/tests/data/remodel_tests/test_ds003645_summarize_condition_variable_rmdl.json +++ /dev/null @@ -1,12 +0,0 @@ -[ - { - "operation": "summarize_hed_type", - "description": "Summarize the experimental design.", - "parameters": { - "summary_name": "summarize conditions", - "summary_filename": "summarize_condition_variable_type", - "type_tag": "condition-variable", - "expand_context": false - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/test_root.zip b/tests/data/remodel_tests/test_root.zip deleted file mode 100644 index c15f4a5d..00000000 Binary files a/tests/data/remodel_tests/test_root.zip and /dev/null differ diff --git a/tests/data/remodel_tests/test_root1_rename_rmdl.json b/tests/data/remodel_tests/test_root1_rename_rmdl.json deleted file mode 100644 index 1cf4f997..00000000 --- a/tests/data/remodel_tests/test_root1_rename_rmdl.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "operation": "rename_columns", - "description": "Rename the columns.", - "parameters": { - "column_mapping": { "stuff": "value"}, - "ignore_missing": true - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/test_root1_summarize_column_value_rmdl.json b/tests/data/remodel_tests/test_root1_summarize_column_value_rmdl.json deleted file mode 100644 index c022eff4..00000000 --- a/tests/data/remodel_tests/test_root1_summarize_column_value_rmdl.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "operation": "summarize_column_values", - "description": "Summarize column values before renaming.", - "parameters": { - "summary_name": "test summary_values_before", - "summary_filename": "column_values_summary_before_rename", - "skip_columns": [], - "value_columns": [] - } - }, - { - "operation": "rename_columns", - "description": "Rename the columns.", - "parameters": { - "column_mapping": { - "stuff": "value" - }, - "ignore_missing": true - } - }, - { - "operation": "summarize_column_values", - "description": "Summarize column values after renaming.", - "parameters": { - "summary_name": "test summary_values_after", - "summary_filename": "column_values_summary_after_rename", - "skip_columns": [], - "value_columns": [] - } - } -] \ No newline at end of file diff --git a/tests/data/remodel_tests/test_root_back1.zip b/tests/data/remodel_tests/test_root_back1.zip deleted file mode 100644 index 03440ff8..00000000 Binary files a/tests/data/remodel_tests/test_root_back1.zip and /dev/null differ diff --git a/tests/data/remodel_tests/test_root_bad.zip b/tests/data/remodel_tests/test_root_bad.zip deleted file mode 100644 index 21f210c5..00000000 Binary files a/tests/data/remodel_tests/test_root_bad.zip and /dev/null differ diff --git a/tests/models/test_tabular_input.py b/tests/models/test_tabular_input.py index 99b8f338..d24105b5 100644 --- a/tests/models/test_tabular_input.py +++ b/tests/models/test_tabular_input.py @@ -27,7 +27,7 @@ def setUpClass(cls): ) sidecar2_path = os.path.realpath( os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../data/remodel_tests/task-FacePerceptionSmall_events.json" + os.path.dirname(os.path.realpath(__file__)), "../data/other_tests/task-FacePerceptionSmall_events.json" ) ) cls.hed_schema = schema.load_schema(schema_path) diff --git a/tests/tools/analysis/test_annotation_util.py b/tests/tools/analysis/test_annotation_util.py index 2f784e69..7b224783 100644 --- a/tests/tools/analysis/test_annotation_util.py +++ b/tests/tools/analysis/test_annotation_util.py @@ -20,7 +20,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/remodel_tests") + curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/other_tests") bids_root_path = os.path.realpath( os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/bids_tests/eeg_ds003645s_hed") ) diff --git a/tests/tools/analysis/test_key_map.py b/tests/tools/analysis/test_key_map.py index cc3088f3..2b80bdcd 100644 --- a/tests/tools/analysis/test_key_map.py +++ b/tests/tools/analysis/test_key_map.py @@ -10,7 +10,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/remodel_tests") + curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/other_tests") cls.curation_base_dir = curation_base_dir cls.stern_map_path = os.path.join(curation_base_dir, "sternberg_map.tsv") cls.stern_test1_path = os.path.join(curation_base_dir, "sternberg_test_events.tsv") diff --git a/tests/tools/analysis/test_sequence_map.py b/tests/tools/analysis/test_sequence_map.py index 13d40b99..771891a8 100644 --- a/tests/tools/analysis/test_sequence_map.py +++ b/tests/tools/analysis/test_sequence_map.py @@ -6,7 +6,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - # curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/remodel_tests') + # curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../data/other_tests') base_path = "" cls.events_path = os.path.realpath( base_path + "/sub-01/ses-01/eeg/sub-01_ses-01_task-DriveRandomSound_run-1_events.tsv" diff --git a/tests/tools/analysis/test_tabular_summary.py b/tests/tools/analysis/test_tabular_summary.py index 7afb9bb8..2fdfaca0 100644 --- a/tests/tools/analysis/test_tabular_summary.py +++ b/tests/tools/analysis/test_tabular_summary.py @@ -10,7 +10,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/remodel_tests") + curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/other_tests") bids_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/bids_tests/eeg_ds003645s_hed") cls.bids_base_dir = bids_base_dir cls.stern_map_path = os.path.join(curation_base_dir, "sternberg_map.tsv") diff --git a/tests/tools/bids/test_bids_tabular_file.py b/tests/tools/bids/test_bids_tabular_file.py index 0285f5a1..4e9bd3e4 100644 --- a/tests/tools/bids/test_bids_tabular_file.py +++ b/tests/tools/bids/test_bids_tabular_file.py @@ -11,13 +11,13 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - event_path = "../../data/remodel_tests/sub-001_task-AuditoryVisualShift_run-01_events.tsv" + event_path = "../../data/other_tests/sub-001_task-AuditoryVisualShift_run-01_events.tsv" cls.event_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), event_path) - hed_col_path = "../../data/remodel_tests/sub-002withHed_task-FacePerception_run-1_events.tsv" + hed_col_path = "../../data/other_tests/sub-002withHed_task-FacePerception_run-1_events.tsv" cls.event_path_hed_col = os.path.join(os.path.dirname(os.path.realpath(__file__)), hed_col_path) - sidecar_path = "../../data/remodel_tests/task-AuditoryVisualShift_events.json" + sidecar_path = "../../data/other_tests/task-AuditoryVisualShift_events.json" cls.sidecar_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), sidecar_path) def test_constructor(self): diff --git a/tests/tools/remodeling/__init__.py b/tests/tools/remodeling/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/tools/remodeling/cli/__init__.py b/tests/tools/remodeling/cli/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py deleted file mode 100644 index 96edb7fa..00000000 --- a/tests/tools/remodeling/cli/test_run_remodel.py +++ /dev/null @@ -1,250 +0,0 @@ -import os -import io -import shutil -import unittest -from unittest.mock import patch -import zipfile -from hed.errors import HedFileError -from hed.tools.remodeling.cli.run_remodel import parse_arguments, parse_tasks, main - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.data_zip = os.path.realpath( - os.path.join(os.path.dirname(__file__), "../../../data/bids_tests/eeg_ds003645s_hed_remodel.zip") - ) - cls.extract_path = os.path.realpath(os.path.join(os.path.dirname(__file__), "../../../data/remodel_tests")) - cls.data_root = os.path.realpath( - os.path.join(os.path.dirname(__file__), "../../../data/remodel_tests/eeg_ds003645s_hed_remodel") - ) - cls.model_path = os.path.realpath( - os.path.join( - os.path.dirname(__file__), - "../../../data/remodel_tests/eeg_ds003645s_hed_remodel", - "derivatives/remodel/remodeling_files/remove_extra_rmdl.json", - ) - ) - cls.sidecar_path = os.path.realpath( - os.path.join( - os.path.dirname(__file__), - "../../../data/remodel_tests/eeg_ds003645s_hed_remodel", - "task-FacePerception_events.json", - ) - ) - cls.summary_model_path = os.path.realpath( - os.path.join( - os.path.dirname(__file__), - "../../../data/remodel_tests/eeg_ds003645s_hed_remodel", - "derivatives/remodel/remodeling_files", - "summarize_hed_types_rmdl.json", - ) - ) - cls.bad_model_path = os.path.realpath( - os.path.join(os.path.dirname(__file__), "../../../data/remodel_tests/bad_rename_rmdl.json") - ) - cls.files = [ - "/datasets/fmri_ds002790s_hed_aomic/sub-0001/func/sub-0001_task-stopsignal_acq-seq_events.tsv", - "/datasets/fmri_ds002790s_hed_aomic/sub-0001/func/sub-0001_task-workingmemory_acq-seq_events.tsv", - "/datasets/fmri_ds002790s_hed_aomic/sub-0002/func/sub-0002_task-emomatching_acq-seq_events.tsv", - "/datasets/fmri_ds002790s_hed_aomic/sub-0002/func/sub-0002_task-stopsignal_acq-seq_events.tsv", - "/datasets/fmri_ds002790s_hed_aomic/sub-0002/func/sub-0002_task-workingmemory_acq-seq_events.tsv", - ] - - def setUp(self): - with zipfile.ZipFile(self.data_zip, "r") as zip_ref: - zip_ref.extractall(self.extract_path) - - def tearDown(self): - shutil.rmtree(self.data_root) - work_path = os.path.realpath(os.path.join(self.extract_path, "temp")) - if os.path.exists(work_path): - shutil.rmtree(work_path) - - @classmethod - def tearDownClass(cls): - pass - - def test_parse_arguments(self): - # Test no verbose - arg_list1 = [self.data_root, self.model_path, "-x", "derivatives", "-bn", "back1"] - with patch("sys.stdout", new=io.StringIO()) as fp1: - args1, operations1 = parse_arguments(arg_list1) - self.assertFalse(fp1.getvalue()) - self.assertTrue(args1) - self.assertEqual(len(operations1), 1) - self.assertEqual(args1.suffixes, ["events"]) - - # Test * for extensions and suffix as well as verbose - arg_list2 = [self.data_root, self.model_path, "-x", "derivatives", "-bn", "back1", "-f", "*", "-v"] - with patch("sys.stdout", new=io.StringIO()) as fp2: - args2, operations2 = parse_arguments(arg_list2) - self.assertTrue(fp2.getvalue()) - self.assertTrue(args2) - self.assertEqual(len(operations2), 1) - self.assertIsNone(args2.suffixes) - - # Test not able to parse - arg_list3 = [self.data_root, self.bad_model_path, "-x", "derivatives"] - with self.assertRaises(ValueError) as context3: - parse_arguments(arg_list3) - self.assertEqual(context3.exception.args[0], "UnableToFullyParseOperations") - - def test_parse_tasks(self): - tasks1 = parse_tasks(self.files, "*") - self.assertIn("stopsignal", tasks1) - self.assertEqual(3, len(tasks1)) - self.assertEqual(2, len(tasks1["workingmemory"])) - tasks2 = parse_tasks(self.files, ["workingmemory"]) - self.assertEqual(1, len(tasks2)) - files2 = ["task-.tsv", "/base/"] - tasks3 = parse_tasks(files2, "*") - self.assertFalse(tasks3) - - def test_main_bids(self): - arg_list = [self.data_root, self.model_path, "-x", "derivatives", "stimuli", "-b", "-hv", "8.3.0"] - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - def test_main_bids_alt_path(self): - work_path = os.path.realpath(os.path.join(self.extract_path, "temp")) - arg_list = [ - self.data_root, - self.summary_model_path, - "-x", - "derivatives", - "stimuli", - "-hv", - "8.3.0", - "-j", - self.sidecar_path, - "-w", - work_path, - ] - - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - def test_main_bids_verbose_bad_task(self): - arg_list = [self.data_root, self.model_path, "-x", "derivatives", "stimuli", "-b", "-t", "junk", "-v"] - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertTrue(fp.getvalue()) - - def test_main_bids_verbose(self): - arg_list = [self.data_root, self.model_path, "-x", "derivatives", "stimuli", "-b", "-v"] - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertTrue(fp.getvalue()) - - def test_main_bids_no_sidecar(self): - arg_list = [self.data_root, self.model_path, "-x", "derivatives", "stimuli", "-b"] - os.remove(self.sidecar_path) - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - def test_main_bids_no_sidecar_with_hed(self): - arg_list = [self.data_root, self.summary_model_path, "-x", "derivatives", "stimuli", "-b"] - os.remove(self.sidecar_path) - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - def test_main_direct_no_sidecar(self): - arg_list = [self.data_root, self.model_path, "-x", "derivatives", "stimuli"] - os.remove(self.sidecar_path) - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - def test_main_direct_no_sidecar_with_hed(self): - arg_list = [self.data_root, self.summary_model_path, "-x", "derivatives", "stimuli", "-hv", "8.3.0"] - os.remove(self.sidecar_path) - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - def test_main_direct_sidecar_with_hed_bad_task(self): - arg_list = [ - self.data_root, - self.summary_model_path, - "-x", - "derivatives", - "stimuli", - "-hv", - "8.3.0", - "-j", - self.sidecar_path, - "-t", - "junk", - ] - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - def test_main_direct_sidecar_with_hed(self): - arg_list = [ - self.data_root, - self.summary_model_path, - "-x", - "derivatives", - "stimuli", - "-hv", - "8.4.0", - "-j", - self.sidecar_path, - "-v", - ] - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertTrue(fp.getvalue()) - - def test_main_bids_no_sidecar_with_hed_task(self): - arg_list = [ - self.data_root, - self.summary_model_path, - "-x", - "derivatives", - "stimuli", - "-t", - "FacePerception", - "-hv", - "8.3.0", - ] - os.remove(self.sidecar_path) - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - def test_main_errors(self): - # Test bad data directory - arg_list = ["junk/junk", self.model_path, "-x", "derivatives", "-bn", "back1"] - with self.assertRaises(HedFileError) as context: - main(arg_list=arg_list) - self.assertEqual(context.exception.args[0], "DataDirectoryDoesNotExist") - - # Test no backup - arg_list = [self.data_root, self.model_path, "-x", "derivatives", "-bn", "back1"] - with self.assertRaises(HedFileError) as context: - main(arg_list=arg_list) - self.assertEqual(context.exception.args[0], "BackupDoesNotExist") - - def test_main_verbose(self): - arg_list = [self.data_root, self.model_path, "-x", "derivatives", "-v"] - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertTrue(fp.getvalue()) - - def test_run_bids_ops_verbose(self): - arg_list = [self.data_root, self.model_path, "-x", "derivatives"] - with patch("sys.stdout", new=io.StringIO()) as fp: - main(arg_list) - self.assertFalse(fp.getvalue()) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/cli/test_run_remodel_backup.py b/tests/tools/remodeling/cli/test_run_remodel_backup.py deleted file mode 100644 index 1222d9cf..00000000 --- a/tests/tools/remodeling/cli/test_run_remodel_backup.py +++ /dev/null @@ -1,198 +0,0 @@ -import os -import json -import shutil -import unittest -import zipfile -from hed.errors import HedFileError -from hed.tools.remodeling.backup_manager import BackupManager -from hed.tools.remodeling.cli.run_remodel_backup import main -from hed.tools.util.io_util import get_file_list - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - file_list = ["top_level.tsv", "sub1/sub1_events.tsv", "sub2/sub2_events.tsv", "sub2/sub2_next_events.tsv"] - # cls.file_list = file_list - extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests") - cls.alt_path = os.path.realpath(os.path.join(extract_path, "temp")) - cls.extract_path = extract_path - test_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/test_root") - cls.test_root = test_root - cls.test_paths = [os.path.join(test_root, file) for file in file_list] - cls.test_zip = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/test_root.zip") - cls.derv_path = os.path.realpath(os.path.join(test_root, BackupManager.RELATIVE_BACKUP_LOCATION)) - cls.data_zip = os.path.realpath( - os.path.join(os.path.dirname(__file__), "../../../data/bids_tests/eeg_ds003645s_hed_remodel.zip") - ) - cls.data_root = os.path.realpath( - os.path.join(os.path.dirname(__file__), "../../../data/remodel_tests/eeg_ds003645s_hed_remodel") - ) - - def setUp(self): - with zipfile.ZipFile(self.test_zip, "r") as zip_ref: - zip_ref.extractall(self.extract_path) - with zipfile.ZipFile(self.data_zip, "r") as zip_ref: - zip_ref.extractall(self.extract_path) - - def tearDown(self): - if os.path.exists(self.test_root): - shutil.rmtree(self.test_root) - if os.path.exists(self.data_root): - shutil.rmtree(self.data_root) - if os.path.exists(self.alt_path): - shutil.rmtree(self.alt_path) - - @classmethod - def tearDownClass(cls): - pass - - def test_main_events(self): - self.assertFalse(os.path.exists(self.derv_path), "backup directory does not exist before creation") - arg_list = [ - self.test_root, - "-bn", - BackupManager.DEFAULT_BACKUP_NAME, - "-bd", - self.derv_path, - "-x", - "derivatives", - "-fs", - "events", - ] - main(arg_list) - self.assertTrue(os.path.exists(self.derv_path), "backup directory exists before creation") - json_path = os.path.realpath( - os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME, BackupManager.BACKUP_DICTIONARY) - ) - with open(json_path, "r") as fp: - key_dict = json.load(fp) - self.assertEqual(len(key_dict), 3, "The backup of events.tsv does not include top_level.tsv") - file_list = get_file_list(self.derv_path, name_suffix="events") - self.assertEqual(len(file_list), 3, "The backup of events.tsv has the right number of files") - - def test_main_all(self): - arg_list = [ - self.test_root, - "-bn", - BackupManager.DEFAULT_BACKUP_NAME, - "-bd", - self.derv_path, - "-x", - "derivatives", - "-fs", - "*", - ] - - self.assertFalse(os.path.exists(self.derv_path), "backup directory does not exist before creation") - main(arg_list) - self.assertTrue(os.path.exists(self.derv_path), "backup directory exists before creation") - json_path = os.path.realpath( - os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME, BackupManager.BACKUP_DICTIONARY) - ) - with open(json_path, "r") as fp: - key_dict = json.load(fp) - self.assertEqual(len(key_dict), 4, "The backup of events.tsv does not include top_level.tsv") - back_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME, "backup_root")) - file_list1 = get_file_list(back_path) - self.assertIsInstance(file_list1, list) - self.assertEqual(len(file_list1), 4) - - def test_main_task(self): - der_path = os.path.realpath(os.path.join(self.data_root, "derivatives")) - self.assertTrue(os.path.exists(der_path)) - shutil.rmtree(der_path) - self.assertFalse(os.path.exists(der_path)) - arg_list = [ - self.data_root, - "-bn", - BackupManager.DEFAULT_BACKUP_NAME, - "-x", - "derivatives", - "-fs", - "events", - "-t", - "FacePerception", - ] - main(arg_list) - self.assertTrue(os.path.exists(der_path)) - back_path = os.path.realpath( - os.path.join( - self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION, BackupManager.DEFAULT_BACKUP_NAME, "backup_root" - ) - ) - self.assertTrue(os.path.exists(back_path)) - backed_files = get_file_list(back_path) - self.assertEqual(len(backed_files), 6) - - def test_main_bad_task(self): - der_path = os.path.realpath(os.path.join(self.data_root, "derivatives")) - self.assertTrue(os.path.exists(der_path)) - shutil.rmtree(der_path) - self.assertFalse(os.path.exists(der_path)) - arg_list = [ - self.data_root, - "-bn", - BackupManager.DEFAULT_BACKUP_NAME, - "-x", - "derivatives", - "-fs", - "events", - "-t", - "Baloney", - ] - main(arg_list) - self.assertTrue(os.path.exists(der_path)) - back_path = os.path.realpath( - os.path.join( - self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION, BackupManager.DEFAULT_BACKUP_NAME, "backup_root" - ) - ) - self.assertTrue(os.path.exists(back_path)) - backed_files = get_file_list(back_path) - self.assertEqual(len(backed_files), 0) - - def test_alt_loc(self): - if os.path.exists(self.alt_path): - shutil.rmtree(self.alt_path) - self.assertFalse(os.path.exists(self.alt_path)) - arg_list = [ - self.data_root, - "-bn", - BackupManager.DEFAULT_BACKUP_NAME, - "-x", - "derivatives", - "-bd", - self.alt_path, - "-fs", - "events", - ] - main(arg_list) - self.assertTrue(os.path.exists(self.alt_path)) - back_path = os.path.realpath(os.path.join(self.alt_path, "default_back/backup_root")) - self.assertTrue(os.path.exists(back_path)) - backed_files = get_file_list(back_path) - self.assertEqual(len(backed_files), 6) - - def test_main_backup_exists(self): - der_path = os.path.realpath(os.path.join(self.data_root, "derivatives")) - self.assertTrue(os.path.exists(der_path)) - arg_list = [ - self.data_root, - "-bn", - BackupManager.DEFAULT_BACKUP_NAME, - "-x", - "derivatives", - "-fs", - "events", - "-t", - "Baloney", - ] - with self.assertRaises(HedFileError) as context: - main(arg_list) - self.assertEqual(context.exception.args[0], "BackupExists") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/cli/test_run_remodel_restore.py b/tests/tools/remodeling/cli/test_run_remodel_restore.py deleted file mode 100644 index 8b15831d..00000000 --- a/tests/tools/remodeling/cli/test_run_remodel_restore.py +++ /dev/null @@ -1,76 +0,0 @@ -import os -import shutil -import unittest -import zipfile -from hed.errors import HedFileError -from hed.tools.remodeling.cli.run_remodel_backup import main as back_main -from hed.tools.remodeling.cli.run_remodel_restore import main -from hed.tools.util.io_util import get_file_list - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.file_list = ["top_level.tsv", "sub1/sub1_events.tsv", "sub2/sub2_events.tsv", "sub2/sub2_next_events.tsv"] - cls.test_root_back1 = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/test_root_back1" - ) - cls.test_zip_back1 = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/test_root_back1.zip" - ) - extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests") - cls.alt_path = os.path.realpath(os.path.join(extract_path, "temp")) - cls.extract_path = extract_path - - def setUp(self): - with zipfile.ZipFile(self.test_zip_back1, "r") as zip_ref: - zip_ref.extractall(self.extract_path) - - def tearDown(self): - if os.path.exists(self.test_root_back1): - shutil.rmtree(self.test_root_back1) - if os.path.exists(self.alt_path): - shutil.rmtree(self.alt_path) - - def test_main_restore(self): - files1 = get_file_list(self.test_root_back1, exclude_dirs=["derivatives"]) - self.assertEqual(len(files1), 4, "run_restore starts with the right number of files.") - shutil.rmtree(os.path.realpath(os.path.join(self.test_root_back1, "sub1"))) - shutil.rmtree(os.path.realpath(os.path.join(self.test_root_back1, "sub2"))) - os.remove(os.path.realpath(os.path.join(self.test_root_back1, "top_level.tsv"))) - files2 = get_file_list(self.test_root_back1, exclude_dirs=["derivatives"]) - self.assertFalse(files2, "run_restore starts with the right number of files.") - arg_list = [self.test_root_back1, "-bn", "back1"] - main(arg_list) - files3 = get_file_list(self.test_root_back1, exclude_dirs=["derivatives"]) - self.assertEqual(len(files3), len(files1), "run_restore restores all the files after") - - def test_no_backup(self): - # Test bad data directory - arg_list = [self.test_root_back1] - with self.assertRaises(HedFileError) as context: - main(arg_list=arg_list) - self.assertEqual(context.exception.args[0], "BackupDoesNotExist") - - def test_restore_alt_loc(self): - if os.path.exists(self.alt_path): - shutil.rmtree(self.alt_path) - self.assertFalse(os.path.exists(self.alt_path)) - arg_list = [self.test_root_back1, "-bn", "back1", "-x", "derivatives", "-bd", self.alt_path, "-fs", "events"] - back_main(arg_list) - files1 = get_file_list(self.test_root_back1, exclude_dirs=["derivatives"]) - self.assertEqual(len(files1), 4, "run_restore starts with the right number of files.") - shutil.rmtree(os.path.realpath(os.path.join(self.test_root_back1, "sub1"))) - shutil.rmtree(os.path.realpath(os.path.join(self.test_root_back1, "sub2"))) - os.remove(os.path.realpath(os.path.join(self.test_root_back1, "top_level.tsv"))) - files2 = get_file_list(self.test_root_back1, exclude_dirs=["derivatives"]) - self.assertFalse(files2, "run_restore starts with the right number of files.") - arg_list = [self.test_root_back1, "-bn", "back1", "-bd", self.alt_path] - main(arg_list) - files3 = get_file_list(self.test_root_back1, exclude_dirs=["derivatives"]) - self.assertEqual(len(files3) + 1, len(files1), "run_restore restores all the files after") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/__init__.py b/tests/tools/remodeling/operations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/tools/remodeling/operations/package.json b/tests/tools/remodeling/operations/package.json deleted file mode 100644 index c09b58ce..00000000 --- a/tests/tools/remodeling/operations/package.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "operations", - "version": "1.0.0", - "dependencies": { - } -} diff --git a/tests/tools/remodeling/operations/test_base_op.py b/tests/tools/remodeling/operations/test_base_op.py deleted file mode 100644 index 23fc16d3..00000000 --- a/tests/tools/remodeling/operations/test_base_op.py +++ /dev/null @@ -1,56 +0,0 @@ -import json -import unittest -from hed.tools.remodeling.operations.base_op import BaseOp - - -class TestOp(BaseOp): - NAME = "test" - PARAMS = { - "type": "object", - "properties": {"column_name": {"type": "string"}}, - "required": ["column_name"], - "additionalProperties": False, - } - - def do_op(self, dispatcher, df, name, sidecar=None): - return df - - @staticmethod - def validate_input_data(parameters): - return [] - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - base_parameters = {"column_name": "a_descriptive_name"} - cls.json_parameters = json.dumps(base_parameters) - - @classmethod - def tearDownClass(cls): - pass - - def test_constructor(self): - parameters = json.loads(self.json_parameters) - test_instantiate = TestOp(parameters) - self.assertDictEqual(test_instantiate.parameters, parameters) - - def test_constructor_no_name(self): - class TestOpNoName(BaseOp): - PARAMS = { - "type": "object", - "properties": {"column_name": {"type": "string"}}, - "required": ["column_name"], - "additionalProperties": False, - } - - def do_op(self, dispatcher, df, name, sidecar=None): - return df - - with self.assertRaises(TypeError): - TestOpNoName({}) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_base_summary.py b/tests/tools/remodeling/operations/test_base_summary.py deleted file mode 100644 index 62d2a775..00000000 --- a/tests/tools/remodeling/operations/test_base_summary.py +++ /dev/null @@ -1,142 +0,0 @@ -import os -import shutil -import unittest -from hed.tools.remodeling.operations.base_summary import BaseSummary -from hed.tools.remodeling.operations.base_op import BaseOp - - -class TestOp(BaseOp): - NAME = "test_op" - PARAMS = { - "operation": "test_summary_op", - "required_parameters": {"summary_name": str, "summary_filename": str}, - "optional_parameters": {"append_timecode": bool}, - } - - SUMMARY_TYPE = "test_sum" - - def __init__(self, parameters): - super().__init__(parameters) - self.summary_name = parameters["summary_name"] - self.summary_filename = parameters["summary_filename"] - self.append_timecode = parameters.get("append_timecode", False) - - def do_op(self, dispatcher, df, name, sidecar=None): - return df.copy() - - @staticmethod - def validate_input_data(parameters): - return [] - - -class TestSummary(BaseSummary): - - def __init__(self, op): - - super().__init__(op) - self.summary_dict["data1"] = "test data 1" - self.summary_dict["data2"] = "test data 2" - - def get_details_dict(self, include_individual=True): - summary = {"name": self.op.summary_name} - if include_individual: - summary["more"] = "more stuff" - return summary - - def merge_all_info(self): - return {"merged": self.op.summary_name} - - def update_summary(self, info_dict): - pass - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - summary_dir = os.path.realpath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/temp") - ) - cls.summary_dir = summary_dir - - def test_constructor(self): - op2 = TestOp({"summary_name": "test", "summary_filename": "test_context"}) - test = TestSummary(op2) - self.assertIsInstance(test, TestSummary) - - def test_get_text_summary(self): - op = TestOp({"summary_name": "test", "summary_filename": "test_context"}) - test = TestSummary(op) - out1 = test.get_text_summary(individual_summaries="none") - self.assertIsInstance(out1, dict) - self.assertTrue(out1["Dataset"]) - self.assertEqual(len(out1), 1) - out2 = test.get_text_summary(individual_summaries="consolidated") - self.assertIsInstance(out2, dict) - self.assertIn("Dataset", out2) - self.assertNotIn("Individual files", out2) - self.assertLess(len(out1["Dataset"]), len(out2["Dataset"])) - out3 = test.get_text_summary(individual_summaries="separate") - self.assertIsInstance(out3, dict) - self.assertIn("Dataset", out3) - self.assertIn("Individual files", out3) - self.assertEqual(out1["Dataset"], out3["Dataset"]) - self.assertIn("data1", out3["Individual files"]) - - def test_save_no_ind(self): - if os.path.isdir(self.summary_dir): - shutil.rmtree(self.summary_dir) - os.makedirs(self.summary_dir) - op = TestOp({"summary_name": "test", "summary_filename": "test_context"}) - test1 = TestSummary(op) - file_list1 = os.listdir(self.summary_dir) - self.assertFalse(file_list1) - test1.save(self.summary_dir, individual_summaries="none") - dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + "/")) - file_list2 = os.listdir(dir_full) - self.assertEqual(len(file_list2), 1) - basename = os.path.basename(file_list2[0]) - self.assertTrue(basename.startswith("test_context")) - self.assertEqual(os.path.splitext(basename)[1], ".txt") - shutil.rmtree(self.summary_dir) - - def test_save_consolidated(self): - if os.path.isdir(self.summary_dir): - shutil.rmtree(self.summary_dir) - os.makedirs(self.summary_dir) - op = TestOp({"summary_name": "test", "summary_filename": "test_context"}) - test1 = TestSummary(op) - file_list1 = os.listdir(self.summary_dir) - self.assertFalse(file_list1) - dir_ind = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + "/", "individual_summaries/")) - self.assertFalse(os.path.isdir(dir_ind)) - test1.save(self.summary_dir, file_formats=[".json", ".tsv"], individual_summaries="consolidated") - dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + "/")) - file_list2 = os.listdir(dir_full) - self.assertEqual(len(file_list2), 1) - basename = os.path.basename(file_list2[0]) - self.assertTrue(basename.startswith("test_context")) - self.assertEqual(os.path.splitext(basename)[1], ".json") - shutil.rmtree(self.summary_dir) - - def test_save_separate(self): - if os.path.isdir(self.summary_dir): - shutil.rmtree(self.summary_dir) - os.makedirs(self.summary_dir) - op = TestOp({"summary_name": "test", "summary_filename": "test_context"}) - test1 = TestSummary(op) - file_list1 = os.listdir(self.summary_dir) - self.assertFalse(file_list1) - test1.save(self.summary_dir, file_formats=[".json", ".tsv"], individual_summaries="separate") - dir_ind = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + "/", "individual_summaries/")) - dir_full = os.path.realpath(os.path.join(self.summary_dir, test1.op.summary_name + "/")) - self.assertTrue(os.path.isdir(dir_ind)) - file_list4 = os.listdir(dir_full) - self.assertEqual(len(file_list4), 2) - file_list5 = os.listdir(dir_ind) - self.assertEqual(len(file_list5), 2) - shutil.rmtree(self.summary_dir) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py deleted file mode 100644 index 7e029326..00000000 --- a/tests/tools/remodeling/operations/test_convert_columns_op.py +++ /dev/null @@ -1,50 +0,0 @@ -import unittest - - -class Test(unittest.TestCase): - """ - - TODO: Test when no factor names and values are given. - - """ - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.factored = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female", 0, 0], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female", 0, 1], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female", 0, 0], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female", 1, 0], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male", 0, 1], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male", 0, 0], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - cls.default_factor_columns = ["trial_type.succesful_stop", "trial_type.unsuccesful_stop"] - - def setUp(self): - self.base_parameters = {"column_names": ["onset", "duration", "response_time"], "convert_to": "int"} - - @classmethod - def tearDownClass(cls): - pass - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_factor_column_op.py b/tests/tools/remodeling/operations/test_factor_column_op.py deleted file mode 100644 index 8b5af9cb..00000000 --- a/tests/tools/remodeling/operations/test_factor_column_op.py +++ /dev/null @@ -1,226 +0,0 @@ -import pandas as pd -import numpy as np -import unittest -from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp -from hed.tools.remodeling.dispatcher import Dispatcher - - -class Test(unittest.TestCase): - """ - - TODO: Test when no factor names and values are given. - - """ - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.factored = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female", 0, 0], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female", 0, 1], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female", 0, 0], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female", 1, 0], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male", 0, 1], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male", 0, 0], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - cls.default_factor_columns = ["trial_type.succesful_stop", "trial_type.unsuccesful_stop"] - - def setUp(self): - self.base_parameters = { - "column_name": "trial_type", - "factor_values": ["succesful_stop", "unsuccesful_stop"], - "factor_names": ["stopped", "stop_failed"], - } - - @classmethod - def tearDownClass(cls): - pass - - def test_no_names(self): - self.base_parameters["factor_names"] = [] - self.base_parameters["factor_values"] = [] - op = FactorColumnOp(self.base_parameters) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = Dispatcher.prep_data(df) - df_new = op.do_op(None, df, "sample_data") - self.assertEqual(len(df_new.columns), len(df.columns) + 3) - - def test_valid_factors_no_extras(self): - # Test correct when all valid and no unwanted information - op = FactorColumnOp(self.base_parameters) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - - df_check = pd.DataFrame(self.factored, columns=self.sample_columns + self.base_parameters["factor_names"]) - df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(None, Dispatcher.prep_data(df_test), "sample_data") - df_new = Dispatcher.post_proc_data(df_new) - self.assertEqual(len(df_check), len(df_new), "factor_column should not change number of rows with ignore missing") - self.assertEqual( - len(df_check.columns), - len(df.columns) + len(self.base_parameters["factor_values"]), - "factor_column check should have extra columns with no extras and ignore missing", - ) - self.assertTrue( - list(df_new.columns) == list(df_check.columns), - "factor_column resulting df should have correct columns with no extras and ignore missing", - ) - self.assertTrue( - np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - "factor_column should have expected values when no extras and ignore missing", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df_test.columns), - "factor_column should not change the input df columns when no extras and ignore missing", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df_test.to_numpy()), - "factor_column should not change the input df values when no extras and ignore missing", - ) - - def test_valid_factors_no_extras_no_ignore(self): - # Test when no extras and extras not ignored. - op = FactorColumnOp(self.base_parameters) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_check = pd.DataFrame(self.factored, columns=self.sample_columns + self.base_parameters["factor_names"]) - df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_test = Dispatcher.prep_data(df_test) - df_new = op.do_op(None, df_test, "sample_data") - df_new = Dispatcher.post_proc_data(df_new) - df_test1 = Dispatcher.post_proc_data(df_test) - - self.assertEqual( - len(df_check), len(df_new), "factor_column should not change number of rows with no extras and no ignore" - ) - self.assertEqual( - len(df_check.columns), - len(df.columns) + len(self.base_parameters["factor_values"]), - "factor_column check should have extra columns with no extras and no ignore", - ) - self.assertTrue( - list(df_new.columns) == list(df_check.columns), - "factor_column resulting df should have correct columns with no extras and no ignore", - ) - self.assertTrue( - np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - "factor_column should have expected values when no extras and no ignore", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df_test1.columns), - "factor_column should not change the input df columns when no extras and no ignore missing", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df_test1.to_numpy()), - "factor_column should not change the input df values when no extras and no ignore missing", - ) - - def test_valid_factors_extras_ignore(self): - # Test when extra factor values but ignored - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_check = pd.DataFrame(self.factored, columns=self.sample_columns + self.base_parameters["factor_names"]) - self.base_parameters["factor_values"] = ["succesful_stop", "unsuccesful_stop", "face"] - self.base_parameters["factor_names"] = ["stopped", "stop_failed", "baloney"] - op = FactorColumnOp(self.base_parameters) - df_check["baloney"] = [0, 0, 0, 0, 0, 0] - df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = Dispatcher.prep_data(df_test) - df_new = op.do_op(None, df_new, "sample_data") - df_new = Dispatcher.post_proc_data(df_new) - self.assertEqual( - len(df_check), len(df_new), "factor_column should not change number of rows with extras and ignore missing" - ) - self.assertEqual( - len(df_check.columns), - len(df.columns) + len(self.base_parameters["factor_values"]), - "factor_column check should have extra columns with extras and ignore missing", - ) - self.assertEqual( - len(df_check.columns), - len(df.columns) + len(self.base_parameters["factor_values"]), - "factor_column should have extra columns with extras and ignore missing", - ) - self.assertTrue( - list(df_new.columns) == list(df_check.columns), - "factor_column resulting df should have correct columns with extras and ignore missing", - ) - self.assertTrue( - np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - "factor_column should have expected values with extras and ignore missing", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df_test.columns), - "factor_column should not change the input df columns when extras and no ignore missing", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df_test.to_numpy()), - "factor_column should not change the input df values when extras and no ignore missing", - ) - - def test_valid_factors_extras_no_ignore(self): - # Test when extra factors are included but not ignored. - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_check = pd.DataFrame(self.factored, columns=self.sample_columns + self.base_parameters["factor_names"]) - self.base_parameters["factor_values"] = ["succesful_stop", "unsuccesful_stop", "face"] - self.base_parameters["factor_names"] = ["stopped", "stop_failed", "baloney"] - op = FactorColumnOp(self.base_parameters) - df_check["baloney"] = [0, 0, 0, 0, 0, 0] - df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(None, Dispatcher.prep_data(df_test), "sample_data") - df_new = Dispatcher.post_proc_data(df_new) - self.assertEqual( - len(df_check), len(df_new), "factor_column should not change number of rows with extras and ignore missing" - ) - self.assertEqual( - len(df_check.columns), - len(df.columns) + len(self.base_parameters["factor_values"]), - "factor_column check should have extra columns with extras and ignore missing", - ) - self.assertEqual( - len(df_check.columns), - len(df.columns) + len(self.base_parameters["factor_values"]), - "factor_column should have extra columns with extras and ignore missing", - ) - self.assertTrue( - list(df_new.columns) == list(df_check.columns), - "factor_column resulting df should have correct columns with extras and ignore missing", - ) - self.assertTrue( - np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - "factor_column should have expected values with extras and ignore missing", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df_test.columns), - "factor_column should not change the input df columns when extras and no ignore missing", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df_test.to_numpy()), - "factor_column should not change the input df values when extras and no ignore missing", - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_factor_hed_tags_op.py b/tests/tools/remodeling/operations/test_factor_hed_tags_op.py deleted file mode 100644 index 2c1422da..00000000 --- a/tests/tools/remodeling/operations/test_factor_hed_tags_op.py +++ /dev/null @@ -1,203 +0,0 @@ -import os -import json -import unittest -from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp -from hed.tools.remodeling.dispatcher import Dispatcher - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/")) - cls.data_path = os.path.realpath(os.path.join(path, "sub-002_task-FacePerception_run-1_events.tsv")) - cls.json_path = os.path.realpath(os.path.join(path, "task-FacePerception_events.json")) - base_parameters = { - "queries": ["sensory-event", "agent-action"], - "query_names": [], - "remove_types": [], - "expand_context": False, - "replace_defs": True, - } - cls.json_params = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - - @classmethod - def tearDownClass(cls): - pass - - def test_valid_no_query_names(self): - # Test correct when all valid and no unwanted information - params = json.loads(self.json_params) - op = FactorHedTagsOp(params) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - df_new = dispatch.get_data_file(self.data_path) - pre_columns = len(list(df_new.columns)) - df_new = op.do_op(dispatch, dispatch.prep_data(df_new), "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), 200, "factor_hed_tags_op length is correct") - self.assertEqual(len(df_new.columns), pre_columns + 2, "factor_hed_tags_op has correct number of columns") - self.assertIn("query_0", list(df_new.columns)) - self.assertIn("query_1", list(df_new.columns)) - - def test_valid_with_query_names(self): - # Test correct when all valid and no unwanted information - params = json.loads(self.json_params) - params["query_names"] = ["apple", "banana"] - op = FactorHedTagsOp(params) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - df_new = dispatch.get_data_file(self.data_path) - pre_columns = len(list(df_new.columns)) - df_new = op.do_op(dispatch, dispatch.prep_data(df_new), "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), 200, "factor_hed_tags_op length is correct") - self.assertEqual(len(df_new.columns), pre_columns + 2, "factor_hed_tags_op has correct number of columns") - self.assertIn("apple", list(df_new.columns)) - self.assertIn("banana", list(df_new.columns)) - - def test_invalid_query_names(self): - # Duplicate query names - params = json.loads(self.json_params) - params["query_names"] = ["apple", "apple"] - with self.assertRaises(ValueError) as context: - FactorHedTagsOp(params) - self.assertEqual(context.exception.args[0], "FactorHedTagInvalidQueries") - - # Query names have wrong length - params = json.loads(self.json_params) - params["query_names"] = ["apple", "banana", "pear"] - with self.assertRaises(ValueError) as context: - FactorHedTagsOp(params) - self.assertEqual(context.exception.args[0], "FactorHedTagInvalidQueries") - - # Query name already a column name - params = json.loads(self.json_params) - params["query_names"] = ["face_type", "bananas"] - op = FactorHedTagsOp(params) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - df_new = dispatch.get_data_file(self.data_path) - with self.assertRaises(ValueError) as context: - op.do_op(dispatch, dispatch.prep_data(df_new), "run-01", sidecar=self.json_path) - self.assertEqual(context.exception.args[0], "QueryNameAlreadyColumn") - - def test_no_expand_context(self): - # Setup for testing remove types - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - params = json.loads(self.json_params) - params["expand_context"] = False - params["queries"] = ["Def/Famous-face-cond", "Def/Right-sym-cond", "Def/Initialize-recording"] - df = dispatch.get_data_file(self.data_path) - df = dispatch.prep_data(df) - df_columns = len(list(df.columns)) - total_famous = (df["face_type"] == "famous_face").sum() - - # If Defs are replaced and Condition-variable not removed, should not find Def/Famous-face-cond - params["replace_defs"] = True - params["remove_types"] = [] - op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df, "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), len(df)) - self.assertEqual(len(df_new.columns), df_columns + 3) - self.assertFalse(df_new["query_0"].sum()) - self.assertFalse(df_new["query_1"].sum()) - self.assertFalse(df_new["query_2"].sum()) - - # If Defs are not replaced and Condition-variable not removed, should find Def/Famous-face-cond - params["replace_defs"] = False - params["remove_types"] = [] - op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df, "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), len(df)) - self.assertEqual(len(df_new.columns), df_columns + 3) - self.assertEqual(df_new["query_0"].sum(), total_famous) - self.assertEqual(df_new["query_1"].sum(), 1) - self.assertEqual(df_new["query_2"].sum(), 1) - - # If Defs are not replaced and Condition-variable is removed, should not find Def/Famous-face-cond - params["replace_defs"] = False - params["remove_types"] = ["Condition-variable", "Task"] - op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df, "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), len(df)) - self.assertEqual(len(df_new.columns), df_columns + 3) - self.assertFalse(df_new["query_0"].sum()) - self.assertFalse(df_new["query_1"].sum()) - self.assertEqual(df_new["query_2"].sum(), 1) - - # If Defs are not replaced and Condition-variable is removed, should not find Def/Famous-face-cond - params["replace_defs"] = True - params["remove_types"] = ["Condition-variable", "Task"] - op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df, "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), len(df)) - self.assertEqual(len(df_new.columns), df_columns + 3) - self.assertFalse(df_new["query_0"].sum()) - self.assertFalse(df_new["query_1"].sum()) - self.assertFalse(df_new["query_2"].sum()) - - def test_expand_context(self): - # Setup for testing remove types - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - params = json.loads(self.json_params) - params["expand_context"] = True - params["queries"] = ["Def/Famous-face-cond", "Def/Right-sym-cond", "Def/Initialize-recording"] - df = dispatch.get_data_file(self.data_path) - df = dispatch.prep_data(df) - df_columns = len(list(df.columns)) - total_famous = (df["face_type"] == "famous_face").sum() - - # If Defs are replaced and Condition-variable not removed, should not find Def/Famous-face-cond - params["replace_defs"] = True - params["remove_types"] = [] - op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df, "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), len(df)) - self.assertEqual(len(df_new.columns), df_columns + 3) - self.assertFalse(df_new["query_0"].sum()) - self.assertFalse(df_new["query_1"].sum()) - self.assertFalse(df_new["query_2"].sum()) - - # If Defs are not replaced and Condition-variable not removed, should find Def/Famous-face-cond - params["replace_defs"] = False - params["remove_types"] = [] - op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df, "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), len(df)) - self.assertEqual(len(df_new.columns), df_columns + 3) - self.assertEqual(df_new["query_0"].sum(), total_famous) - self.assertEqual(df_new["query_1"].sum(), len(df)) - self.assertEqual(df_new["query_2"].sum(), len(df)) - - # If Defs are not replaced and Condition-variable is removed, should not find Def/Famous-face-cond - params["replace_defs"] = False - params["remove_types"] = ["Condition-variable", "Task"] - op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df, "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), len(df)) - self.assertEqual(len(df_new.columns), df_columns + 3) - self.assertFalse(df_new["query_0"].sum()) - self.assertFalse(df_new["query_1"].sum()) - self.assertEqual(df_new["query_2"].sum(), len(df)) - - # If Defs are not replaced and Condition-variable is removed, should not find Def/Famous-face-cond - params["replace_defs"] = True - params["remove_types"] = ["Condition-variable", "Task"] - op = FactorHedTagsOp(params) - df_new = op.do_op(dispatch, df, "run-01", sidecar=self.json_path) - df_new = dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), len(df)) - self.assertEqual(len(df_new.columns), df_columns + 3) - self.assertFalse(df_new["query_0"].sum()) - self.assertFalse(df_new["query_1"].sum()) - self.assertFalse(df_new["query_2"].sum()) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_factor_hed_type_op.py b/tests/tools/remodeling/operations/test_factor_hed_type_op.py deleted file mode 100644 index 30b72f13..00000000 --- a/tests/tools/remodeling/operations/test_factor_hed_type_op.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -import unittest -from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp -from hed.tools.remodeling.dispatcher import Dispatcher - - -class Test(unittest.TestCase): - """ - - TODO: Test when no factor names and values are given. - - """ - - @classmethod - def setUpClass(cls): - path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/")) - data_path = os.path.realpath(os.path.join(path, "sub-002_task-FacePerception_run-1_events.tsv")) - cls.json_path = os.path.realpath(os.path.join(path, "task-FacePerception_events.json")) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - cls.df_test = dispatch.prep_data(dispatch.get_data_file(data_path)) - cls.dispatch = dispatch - - @classmethod - def tearDownClass(cls): - pass - - def setUp(self): - self.base_parameters = {"type_tag": "Condition-variable", "type_values": []} - - def test_valid(self): - # Test correct when all valid and no unwanted information - op = FactorHedTypeOp(self.base_parameters) - df_new = op.do_op(self.dispatch, self.df_test, "subj2_run1", sidecar=self.json_path) - self.assertEqual(len(df_new), 200, "factor_hed_type_op length is correct") - self.assertEqual(len(df_new.columns), 17, "factor_hed_type_op has correct number of columns") - - def test_valid_specific_column(self): - parms = self.base_parameters - parms["type_values"] = ["key-assignment"] - op = FactorHedTypeOp(parms) - df_new = op.do_op(self.dispatch, self.df_test, "run-01", sidecar=self.json_path) - df_new = self.dispatch.post_proc_data(df_new) - self.assertEqual(len(df_new), 200, "factor_hed_type_op length is correct when type_values specified") - self.assertEqual( - len(df_new.columns), 11, "factor_hed_type_op has correct number of columns when type_values specified" - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_merge_consecutive_op.py b/tests/tools/remodeling/operations/test_merge_consecutive_op.py deleted file mode 100644 index dca40a43..00000000 --- a/tests/tools/remodeling/operations/test_merge_consecutive_op.py +++ /dev/null @@ -1,185 +0,0 @@ -import json -import math -import numpy as np -import pandas as pd -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.merge_consecutive_op import MergeConsecutiveOp - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, "right", "female"], - [9.5856, 0.5083, "go", "n/a", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "female"], - [14.2, 0.5083, "succesful_stop", 0.2, "n/a", "female"], - [15.3, 0.7083, "succesful_stop", 0.2, "n/a", "female"], - [17.3, 0.5083, "succesful_stop", 0.25, "n/a", "female"], - [19.0, 0.5083, "succesful_stop", 0.25, "n/a", "female"], - [21.1021, 0.5083, "unsuccesful_stop", 0.25, "left", "male"], - [22.6103, 0.5083, "go", "n/a", "left", "male"], - ] - cls.sample_columns = ["onset", "duration", "trial_type", "stop_signal_delay", "response_hand", "sex"] - - cls.result_data = [ - [0.0776, 0.5083, "go", "n/a", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, "right", "female"], - [9.5856, 0.5083, "go", "n/a", "right", "female"], - [13.5939, 2.4144, "succesful_stop", 0.2, "n/a", "female"], - [17.3, 2.2083, "succesful_stop", 0.25, "n/a", "female"], - [21.1021, 0.5083, "unsuccesful_stop", 0.25, "left", "male"], - [22.6103, 0.5083, "go", "n/a", "left", "male"], - ] - - base_parameters = { - "column_name": "trial_type", - "event_code": "succesful_stop", - "match_columns": ["stop_signal_delay", "response_hand", "sex"], - "set_durations": True, - "ignore_missing": True, - } - cls.json_parms = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op): - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), "run-01") - return df, self.dispatch.post_proc_data(df_new) - - def test_do_op_valid(self): - # Test when no extras but ignored. - parms = json.loads(self.json_parms) - op = MergeConsecutiveOp(parms) - df_test, df_new = self.get_dfs(op) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - self.assertTrue(list(df_new.columns) == list(df.columns), "merge_consecutive should not change the number of columns") - for index, _row in df_new.iterrows(): - if not math.isclose(df_new.loc[index, "onset"], df_new.loc[index, "onset"]): - self.fail( - f"merge_consecutive result has wrong onset at {index}: {df_new.loc[index, 'onset']} " - + "instead of{df_results.loc[index, 'onset']}" - ) - if not math.isclose(df_new.loc[index, "duration"], df_new.loc[index, "duration"]): - self.fail( - f"merge_consecutive result has wrong duration at {index}: {df_new.loc[index, 'duration']} " - + f"instead of {df_new.loc[index, 'duration']}" - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df_test.columns), - "merge_consecutive should not change the input df columns when no extras and not ignored", - ) - for index, _row in df.iterrows(): - if not math.isclose(df.loc[index, "onset"], df_test.loc[index, "onset"]): - self.fail( - "merge_consecutive should not change onset after op, but onset does not agree at" - + f"at {index}: {df.loc[index, 'onset']} instead of {df_test.loc[index, 'onset']}" - ) - if not math.isclose(df.loc[index, "duration"], df_test.loc[index, "duration"]): - self.fail( - "merge_consecutive should not change duration after op, but duration does not agree at" - + f"at {index}: {df.loc[index, 'duration']} instead of {df_test.loc[index, 'duration']}" - ) - - def test_do_op_no_set_durations(self): - # Test when no set duration. - parms1 = json.loads(self.json_parms) - parms1["set_durations"] = False - op1 = MergeConsecutiveOp(parms1) - df_test, df_new1 = self.get_dfs(op1) - parms2 = json.loads(self.json_parms) - parms2["set_durations"] = True - op2 = MergeConsecutiveOp(parms2) - df_test2, df_new2 = self.get_dfs(op2) - self.assertTrue(list(df_new1.columns) == list(df_new2.columns)) - code_mask = df_new1["duration"] != df_new2["duration"] - self.assertEqual(sum(code_mask.astype(int)), 2) - - def test_do_op_valid_no_change(self): - # Test when no extras but ignored. - parms = json.loads(self.json_parms) - parms["event_code"] = "baloney" - op = MergeConsecutiveOp(parms) - df, df_new = self.get_dfs(op) - self.assertEqual(len(df), len(df_new)) - - def test_get_remove_groups(self): - match_df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - match_df = match_df.replace("n/a", np.nan) - match_df1 = match_df.loc[:, ["duration", "stop_signal_delay", "response_hand", "sex"]] - code_mask1 = pd.Series([False, False, False, True, True, True, True, True, False, False]) - remove_groups1 = MergeConsecutiveOp._get_remove_groups(match_df1, code_mask1) - self.assertEqual(max(remove_groups1), 3, "_get_remove_groups has three groups when duration is included") - self.assertEqual(remove_groups1[4], 1, "_get_remove_groups has correct first group") - self.assertEqual(remove_groups1[7], 3, "_get_remove_groups has correct second group") - match_df2 = match_df.loc[:, ["stop_signal_delay", "response_hand", "sex"]] - remove_groups2 = MergeConsecutiveOp._get_remove_groups(match_df2, code_mask1) - self.assertEqual(max(remove_groups2), 2, "_get_remove_groups has 2 groups when duration not included") - self.assertEqual(remove_groups2[4], 1, "_get_remove_groups has correct first group") - self.assertEqual(remove_groups2[5], 1, "_get_remove_groups has correct first group") - self.assertEqual(remove_groups2[7], 2, "_get_remove_groups has correct second group") - match_df3 = match_df.loc[:, ["trial_type"]] - remove_groups3 = MergeConsecutiveOp._get_remove_groups(match_df3, code_mask1) - self.assertEqual(max(remove_groups3), 1, "_get_remove_groups has 2 groups when duration not included") - self.assertEqual(remove_groups3[4], 1, "_get_remove_groups has correct first group") - self.assertEqual(remove_groups3[5], 1, "_get_remove_groups has correct first group") - self.assertEqual(remove_groups3[7], 1, "_get_remove_groups has correct second group") - - def test_invalid_missing_column(self): - parms = json.loads(self.json_parms) - parms["column_name"] = "baloney" - parms["ignore_missing"] = False - op = MergeConsecutiveOp(parms) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = df.replace("n/a", np.nan) - with self.assertRaises(ValueError) as context: - op.do_op(self.dispatch, df, "sample_data") - self.assertEqual(context.exception.args[0], "ColumnMissing") - - def test_do_op_missing_onset(self): - parms = json.loads(self.json_parms) - parms["ignore_missing"] = False - op = MergeConsecutiveOp(parms) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = df.replace("n/a", np.nan) - df_new = df.drop("onset", axis=1) - self.assertEqual(len(df.columns), len(df_new.columns) + 1) - with self.assertRaises(ValueError) as context: - op.do_op(self.dispatch, df_new, "sample_data") - self.assertEqual(context.exception.args[0], "MissingOnsetColumn") - - def test_do_op_missing_duration(self): - parms = json.loads(self.json_parms) - parms["set_durations"] = True - op = MergeConsecutiveOp(parms) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = df.replace("n/a", np.nan) - df_new = df.drop("duration", axis=1) - self.assertEqual(len(df.columns), len(df_new.columns) + 1) - with self.assertRaises(ValueError) as context: - op.do_op(self.dispatch, df_new, "sample_data") - self.assertEqual(context.exception.args[0], "MissingDurationColumn") - - def test_do_op_missing_match(self): - parms = json.loads(self.json_parms) - parms["match_columns"] = ["stop_signal_delay", "response_hand", "sex", "baloney"] - parms["ignore_missing"] = False - op = MergeConsecutiveOp(parms) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df = df.replace("n/a", np.nan) - with self.assertRaises(ValueError) as context: - op.do_op(self.dispatch, df, "sample_data") - self.assertEqual(context.exception.args[0], "MissingMatchColumns") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_number_groups.py b/tests/tools/remodeling/operations/test_number_groups.py deleted file mode 100644 index ac1ed008..00000000 --- a/tests/tools/remodeling/operations/test_number_groups.py +++ /dev/null @@ -1,154 +0,0 @@ -from copy import deepcopy -import json -import unittest - - -class Test(unittest.TestCase): - """ """ - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [33.4228, 2.0084, "80"], - [36.9395, 0.5, "40"], - [37.4395, 0.25, "30"], - [37.6895, 0.4083, "12"], - [38.0936, 0.0, "2"], - [38.0979, 0.5, "40"], - [38.5979, 0.25, "30"], - [38.8479, 0.3, "11"], - [39.1435, 0.0, "1"], - [39.1479, 0.5, "40"], - [115.6238, 0.25, "30"], - [115.8738, 0.3083, "12"], - [116.1782, 0.0, "1"], - [116.18220000000001, 0.0167, "70"], - [134.1619, 0.0, "3"], - [134.16570000000002, 2.0084, "80"], - [151.7409, 0.5, "40"], - [152.241, 0.25, "30"], - [152.491, 0.2, "211"], - [152.691, 1.05, "221"], - [347.9184, 0.5, "40"], - [348.4184, 0.25, "30"], - [348.6684, 0.4667, "11"], - [349.1281, 0.0, "1"], - [349.1351, 0.0167, "70"], - [366.5138, 0.0, "3"], - [366.5186, 2.0084, "Stress_post"], - ] - - cls.sample_columns = ["onset", "duration", "code"] - cls.existing_sample_columns = ["onset", "duration", "number"] - - cls.numbered_data = [ - [33.4228, 2.0084, "80", "n/a"], - [36.9395, 0.5, "40", "n/a"], - [37.4395, 0.25, "30", 1], - [37.6895, 0.4083, "12", 1], - [38.0936, 0.0, "2", 1], - [38.0979, 0.5, "40", "n/a"], - [38.5979, 0.25, "30", 2], - [38.8479, 0.3, "11", 2], - [39.1435, 0.0, "1", 2], - [39.1479, 0.5, "40", "n/a"], - [115.6238, 0.25, "30", 3], - [115.8738, 0.3083, "12", 3], - [116.1782, 0.0, "1", 3], - [116.18220000000001, 0.0167, "70", "n/a"], - [134.1619, 0.0, "3", "n/a"], - [134.16570000000002, 2.0084, "80", "n/a"], - [151.7409, 0.5, "40", "n/a"], - [152.241, 0.25, "30", 4], - [152.491, 0.2, "211", 4], - [152.691, 1.05, "221", 4], - [347.9184, 0.5, "40", "n/a"], - [348.4184, 0.25, "30", 5], - [348.6684, 0.4667, "11", 5], - [349.1281, 0.0, "1", 5], - [349.1351, 0.0167, "70", "n/a"], - [366.5138, 0.0, "3", "n/a"], - [366.5186, 2.0084, "Stress_post", "n/a"], - ] - - cls.numbered_columns = ["onset", "duration", "code", "number"] - - cls.overwritten_data = [ - [33.4228, 2.0084, "n/a"], - [36.9395, 0.5, "n/a"], - [37.4395, 0.25, 1], - [37.6895, 0.4083, 1], - [38.0936, 0.0, 1], - [38.0979, 0.5, "n/a"], - [38.5979, 0.25, 2], - [38.8479, 0.3, 2], - [39.1435, 0.0, 2], - [39.1479, 0.5, "n/a"], - [115.6238, 0.25, 3], - [115.8738, 0.3083, 3], - [116.1782, 0.0, 3], - [116.18220000000001, 0.0167, "n/a"], - [134.1619, 0.0, "n/a"], - [134.16570000000002, 2.0084, "n/a"], - [151.7409, 0.5, "n/a"], - [152.241, 0.25, 4], - [152.491, 0.2, 4], - [152.691, 1.05, 4], - [347.9184, 0.5, "n/a"], - [348.4184, 0.25, 5], - [348.6684, 0.4667, 5], - [349.1281, 0.0, 5], - [349.1351, 0.0167, "n/a"], - [366.5138, 0.0, "n/a"], - [366.5186, 2.0084, "n/a"], - ] - - base_parameters = { - "number_column_name": "number", - "source_column": "code", - "start": {"values": ["40"], "inclusion": "exclude"}, - "stop": {"values": ["40", "70"], "inclusion": "exclude"}, - } - - overwrite_false_parameters = deepcopy(base_parameters) - overwrite_false_parameters["overwrite"] = False - overwrite_false_parameters["source_column"] = "number" - - overwrite_true_parms = deepcopy(base_parameters) - overwrite_true_parms["overwrite"] = True - overwrite_true_parms["source_column"] = "number" - - missing_startstop_parms = deepcopy(base_parameters) - missing_startstop_parms["start"] = {"values": ["40"]} - - wrong_startstop_parms = deepcopy(base_parameters) - wrong_startstop_parms["stop"]["column"] = "number" - - wrong_startstop_type_parms = deepcopy(base_parameters) - wrong_startstop_type_parms["start"]["values"] = "40" - - wrong_inclusion_parms = deepcopy(base_parameters) - wrong_inclusion_parms["stop"]["inclusion"] = "exclusive" - - missing_startstop_value_parms = deepcopy(base_parameters) - missing_startstop_value_parms["start"]["values"] = ["40", "20"] - - cls.json_parms = json.dumps(base_parameters) - cls.json_overwrite_false_parms = json.dumps(overwrite_false_parameters) - cls.json_overwrite_true_parms = json.dumps(overwrite_true_parms) - cls.json_missing_startstop_parms = json.dumps(missing_startstop_parms) - cls.json_wrong_startstop_parms = json.dumps(wrong_startstop_parms) - cls.json_wrong_startstop_type_parms = json.dumps(wrong_startstop_type_parms) - cls.json_wrong_inclusion_parms = json.dumps(wrong_inclusion_parms) - cls.json_missing_startstop_value_parms = json.dumps(missing_startstop_value_parms) - - cls.dispatcher = None - cls.file_name = None - - @classmethod - def tearDownClass(cls): - pass - - # test working - def test_number_groups_new_column(self): - pass diff --git a/tests/tools/remodeling/operations/test_number_rows_op.py b/tests/tools/remodeling/operations/test_number_rows_op.py deleted file mode 100644 index 38144f9a..00000000 --- a/tests/tools/remodeling/operations/test_number_rows_op.py +++ /dev/null @@ -1,205 +0,0 @@ -import json -import unittest -from hed.tools.remodeling.operations.number_rows_op import NumberRowsOp - - -class Test(unittest.TestCase): - """ """ - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [33.4228, 2.0084, "80"], - [36.9395, 0.5, "40"], - [37.4395, 0.25, "30"], - [37.6895, 0.4083, "12"], - [38.0936, 0.0, "2"], - [38.0979, 0.5, "40"], - [38.5979, 0.25, "30"], - [38.8479, 0.3, "11"], - [39.1435, 0.0, "1"], - [39.1479, 0.5, "40"], - [115.6238, 0.25, "30"], - [115.8738, 0.3083, "12"], - [116.1782, 0.0, "1"], - [116.18220000000001, 0.0167, "70"], - [134.1619, 0.0, "3"], - [134.16570000000002, 2.0084, "80"], - [151.7409, 0.5, "40"], - [152.241, 0.25, "30"], - [152.491, 0.2, "211"], - [152.691, 1.05, "221"], - [347.9184, 0.5, "40"], - [348.4184, 0.25, "30"], - [348.6684, 0.4667, "11"], - [349.1281, 0.0, "1"], - [349.1351, 0.0167, "70"], - [366.5138, 0.0, "3"], - [366.5186, 2.0084, "Stress_post"], - ] - - cls.sample_columns = ["onset", "duration", "code"] - cls.existing_sample_columns = ["onset", "duration", "number"] - - cls.numbered_data = [ - [33.4228, 2.0084, "80", 1], - [36.9395, 0.5, "40", 2], - [37.4395, 0.25, "30", 3], - [37.6895, 0.4083, "12", 4], - [38.0936, 0.0, "2", 5], - [38.0979, 0.5, "40", 6], - [38.5979, 0.25, "30", 7], - [38.8479, 0.3, "11", 8], - [39.1435, 0.0, "1", 9], - [39.1479, 0.5, "40", 10], - [115.6238, 0.25, "30", 11], - [115.8738, 0.3083, "12", 12], - [116.1782, 0.0, "1", 13], - [116.18220000000001, 0.0167, "70", 14], - [134.1619, 0.0, "3", 15], - [134.16570000000002, 2.0084, "80", 16], - [151.7409, 0.5, "40", 17], - [152.241, 0.25, "30", 18], - [152.491, 0.2, "211", 19], - [152.691, 1.05, "221", 20], - [347.9184, 0.5, "40", 21], - [348.4184, 0.25, "30", 22], - [348.6684, 0.4667, "11", 23], - [349.1281, 0.0, "1", 24], - [349.1351, 0.0167, "70", 25], - [366.5138, 0.0, "3", 26], - [366.5186, 2.0084, "Stress_post", 27], - ] - - cls.numbered_columns = ["onset", "duration", "code", "number"] - - cls.overwritten_data = [ - [33.4228, 2.0084, 1], - [36.9395, 0.5, 2], - [37.4395, 0.25, 3], - [37.6895, 0.4083, 4], - [38.0936, 0.0, 5], - [38.0979, 0.5, 6], - [38.5979, 0.25, 7], - [38.8479, 0.3, 8], - [39.1435, 0.0, 9], - [39.1479, 0.5, 10], - [115.6238, 0.25, 11], - [115.8738, 0.3083, 12], - [116.1782, 0.0, 13], - [116.18220000000001, 0.0167, 14], - [134.1619, 0.0, 15], - [134.16570000000002, 2.0084, 16], - [151.7409, 0.5, 17], - [152.241, 0.25, 18], - [152.491, 0.2, 19], - [152.691, 1.05, 20], - [347.9184, 0.5, 21], - [348.4184, 0.25, 22], - [348.6684, 0.4667, 23], - [349.1281, 0.0, 24], - [349.1351, 0.0167, 25], - [366.5138, 0.0, 26], - [366.5186, 2.0084, 27], - ] - - cls.filter_numbered_data = [ - [33.4228, 2.0084, "80", "n/a"], - [36.9395, 0.5, "40", 1], - [37.4395, 0.25, "30", "n/a"], - [37.6895, 0.4083, "12", "n/a"], - [38.0936, 0.0, "2", "n/a"], - [38.0979, 0.5, "40", 2], - [38.5979, 0.25, "30", "n/a"], - [38.8479, 0.3, "11", "n/a"], - [39.1435, 0.0, "1", "n/a"], - [39.1479, 0.5, "40", 3], - [115.6238, 0.25, "30", "n/a"], - [115.8738, 0.3083, "12", "n/a"], - [116.1782, 0.0, "1", "n/a"], - [116.18220000000001, 0.0167, "70", "n/a"], - [134.1619, 0.0, "3", "n/a"], - [134.16570000000002, 2.0084, "80", "n/a"], - [151.7409, 0.5, "40", 4], - [152.241, 0.25, "30", "n/a"], - [152.491, 0.2, "211", "n/a"], - [152.691, 1.05, "221", "n/a"], - [347.9184, 0.5, "40", 5], - [348.4184, 0.25, "30", "n/a"], - [348.6684, 0.4667, "11", "n/a"], - [349.1281, 0.0, "1", "n/a"], - [349.1351, 0.0167, "70", "n/a"], - [366.5138, 0.0, "3", "n/a"], - [366.5186, 2.0084, "Stress_post", "n/a"], - ] - - cls.filter_overwritten_numbered_data = [ - [33.4228, 2.0084, "n/a"], - [36.9395, 0.5, 1], - [37.4395, 0.25, "n/a"], - [37.6895, 0.4083, "n/a"], - [38.0936, 0.0, "n/a"], - [38.0979, 0.5, 2], - [38.5979, 0.25, "n/a"], - [38.8479, 0.3, "n/a"], - [39.1435, 0.0, "n/a"], - [39.1479, 0.5, 3], - [115.6238, 0.25, "n/a"], - [115.8738, 0.3083, "n/a"], - [116.1782, 0.0, "n/a"], - [116.18220000000001, 0.0167, "n/a"], - [134.1619, 0.0, "n/a"], - [134.16570000000002, 2.0084, "n/a"], - [151.7409, 0.5, 4], - [152.241, 0.25, "n/a"], - [152.491, 0.2, "n/a"], - [152.691, 1.05, "n/a"], - [347.9184, 0.5, 5], - [348.4184, 0.25, "n/a"], - [348.6684, 0.4667, "n/a"], - [349.1281, 0.0, "n/a"], - [349.1351, 0.0167, "n/a"], - [366.5138, 0.0, "n/a"], - [366.5186, 2.0084, "n/a"], - ] - - base_parameters = {"number_column_name": "number"} - - cls.json_parms = json.dumps(base_parameters) - - cls.dispatcher = None - cls.file_name = None - - @classmethod - def tearDownClass(cls): - pass - - def test_number_rows_new_column(self): - # Test when new column name is given with overwrite unspecified (=False) - parms = json.loads(self.json_parms) - op = NumberRowsOp(parms) - self.assertIsInstance(op, NumberRowsOp) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_check = pd.DataFrame(self.numbered_data, columns=self.numbered_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # df_new = df_new.fillna('n/a') - - # self.assertTrue(list(df_new.columns) == list(df_check.columns), - # "numbered_events should have the expected columns") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(all([i + 1 == value for (i, value) in enumerate(df_new[parms['number_column_name']])]), - # "event should be numbered consecutively from 1 to length of the dataframe") - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - - # # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "number_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "number_rows should not change the input df values") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_remap_columns_op.py b/tests/tools/remodeling/operations/test_remap_columns_op.py deleted file mode 100644 index 443a0936..00000000 --- a/tests/tools/remodeling/operations/test_remap_columns_op.py +++ /dev/null @@ -1,174 +0,0 @@ -import json -import pandas as pd -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.remap_columns_op import RemapColumnsOp - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, 1, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, 2, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "n/a", "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, 3, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, 4, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, 5, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "test", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - - base_parameters = { - "source_columns": ["response_accuracy", "response_hand"], - "destination_columns": ["response_type"], - "map_list": [ - ["correct", "left", "correct_left"], - ["correct", "right", "correct_right"], - ["incorrect", "left", "incorrect_left"], - ["incorrect", "right", "incorrect_left"], - ["n/a", "n/a", "n/a"], - ], - "ignore_missing": True, - } - cls.json_parms = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=None) - - base_parameters1 = { - "source_columns": ["test"], - "destination_columns": ["new_duration", "new_hand"], - "map_list": [[1, 1, "correct_left"], [2, 2, "correct_right"]], - "ignore_missing": True, - "integer_sources": ["test"], - } - cls.json_parms1 = json.dumps(base_parameters1) - - base_parameters2 = { - "source_columns": ["test", "response_accuracy", "response_hand"], - "destination_columns": ["response_type"], - "map_list": [ - [1, "correct", "left", "correct_left"], - [2, "correct", "right", "correct_right"], - [3, "incorrect", "left", "incorrect_left"], - [4, "incorrect", "right", "incorrect_left"], - [5, "n/a", "n/a", "n/a"], - ], - "ignore_missing": True, - "integer_sources": ["test"], - } - cls.json_parms2 = json.dumps(base_parameters2) - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op, df=None): - if df is None: - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), "run-01") - return df, self.dispatch.post_proc_data(df_new) - - def test_valid_missing(self): - # Test when no extras but ignored. - parms = json.loads(self.json_parms) - before_len = len(parms["map_list"]) - parms["map_list"] = parms["map_list"][:-1] - after_len = len(parms["map_list"]) - self.assertEqual(after_len + 1, before_len) - op = RemapColumnsOp(parms) - df, df_test = self.get_dfs(op) - self.assertNotIn("response_type", df.columns, "remap_columns before does not have response_type column") - self.assertIn("response_type", df_test.columns, "remap_columns after has response_type column") - - def test_invalid_missing(self): - # Test when no extras but ignored. - parms = json.loads(self.json_parms) - before_len = len(parms["map_list"]) - parms["map_list"] = parms["map_list"][:-1] - parms["ignore_missing"] = False - after_len = len(parms["map_list"]) - self.assertEqual(after_len + 1, before_len) - op = RemapColumnsOp(parms) - with self.assertRaises(ValueError) as context: - self.get_dfs(op) - self.assertEqual(context.exception.args[0], "MapSourceValueMissing") - - def test_numeric_keys(self): - parms = { - "source_columns": ["duration"], - "destination_columns": ["new_duration"], - "map_list": [[0.5083, 0.6], [0.5084, 0.7]], - "ignore_missing": True, - } - op = RemapColumnsOp(parms) - df, df_test = self.get_dfs(op) - self.assertNotIn("new_duration", df.columns.values) - self.assertIn("new_duration", df_test.columns.values) - - def test_numeric_keys_cascade(self): - # Test when no extras but ignored. - op_list = [ - { - "operation": "remap_columns", - "description": "This is first operation in sequence", - "parameters": { - "source_columns": ["duration"], - "destination_columns": ["new_duration"], - "map_list": [[5, 6], [3, 2]], - "ignore_missing": True, - "integer_sources": ["duration"], - }, - }, - { - "operation": "remap_columns", - "description": "This is first operation in sequence", - "parameters": { - "source_columns": ["new_duration"], - "destination_columns": ["new_value"], - "map_list": [[3, 0.5], [2, 0.4]], - "ignore_missing": True, - "integer_sources": ["new_duration"], - }, - }, - ] - dispatcher = Dispatcher(op_list, data_root=None, backup_name=None, hed_versions=[]) - - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_test = dispatcher.run_operations(df, verbose=False, sidecar=None) - self.assertIn("new_duration", df_test.columns.values) - self.assertIn("new_value", df_test.columns.values) - - def test_scratch(self): - pass - # import os - # from hed.tools.util.io_util import get_file_list - # from hed.tools.util.data_util import get_new_dataframe - # event_path = os.path.realpath('D:/monique/test_events.tsv') - # save_path = os.path.realpath('D:/monique/output') - # json_dir = os.path.realpath('D:/monique/json') - # json_list = get_file_list(json_dir, extensions=['.json']) - # for json_file in json_list: - # event_out = os.path.basename(json_file) - # event_out = f"events_{os.path.splitext(event_out)[0]}.tsv" - # with open(json_file, 'r') as fp: - # op_list = json.load(fp) - # df = get_new_dataframe(event_path) - # dispatcher = Dispatcher(op_list, data_root=None, backup_name=None, hed_versions=[]) - # df_test = dispatcher.run_operations(df, verbose=False, sidecar=None) - # new_path = os.path.realpath(os.path.join(save_path, event_out)) - # df_test.to_csv(new_path, sep='\t', index=False, header=True) - # break - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_remove_columns_op.py b/tests/tools/remodeling/operations/test_remove_columns_op.py deleted file mode 100644 index c9427020..00000000 --- a/tests/tools/remodeling/operations/test_remove_columns_op.py +++ /dev/null @@ -1,128 +0,0 @@ -import json -import numpy as np -import pandas as pd -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.remove_columns_op import RemoveColumnsOp - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - - base_parameters = {"column_names": ["stop_signal_delay", "response_accuracy"], "ignore_missing": True} - cls.json_parms = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op): - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), "run-01") - return df, self.dispatch.post_proc_data(df_new) - - def test_valid_no_extras_ignore_missing(self): - # Test when no extras but ignored. - parms = json.loads(self.json_parms) - op = RemoveColumnsOp(parms) - df, df_new = self.get_dfs(op) - remaining_columns = ["onset", "duration", "trial_type", "response_time", "response_hand", "sex"] - self.assertTrue(remaining_columns == list(df_new.columns), "remove_columns resulting df should have correct columns") - self.assertEqual( - len(df), len(df_new), "remove_columns should not change the number of events when no extras and ignored" - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df.columns), - "remove_columns should not change the input df columns when no extras and ignore missing", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df.to_numpy()), - "remove_columns should not change the input df values when no extras and ignore missing", - ) - - def test_valid_extras_ignore_missing(self): - # Test when extras, but ignored - parms = json.loads(self.json_parms) - parms["column_names"].append("face") - op = RemoveColumnsOp(parms) - df, df_new = self.get_dfs(op) - remaining_columns = ["onset", "duration", "trial_type", "response_time", "response_hand", "sex"] - self.assertTrue( - remaining_columns == list(df_new.columns), - "remove_columns resulting df should have correct columns when extras ignored", - ) - self.assertEqual(len(df), len(df_new), "remove_columns should not change the number of events when extras but ignored") - - # Test that df has not been changed by the op - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - self.assertTrue( - list(df1.columns) == list(df.columns), - "remove_columns should not change the input df columns when extras and ignore missing", - ) - self.assertTrue( - np.array_equal(df1.to_numpy(), df.to_numpy()), - "remove_columns should not change the input df values when extras and ignore missing", - ) - - def test_valid_no_extras_no_ignore(self): - # Test when no extras but not ignored. - parms = json.loads(self.json_parms) - parms["ignore_missing"] = False - op = RemoveColumnsOp(parms) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df, df_new = self.get_dfs(op) - remaining_columns = ["onset", "duration", "trial_type", "response_time", "response_hand", "sex"] - self.assertTrue( - remaining_columns == list(df_new.columns), - "remove_columns resulting df should have correct columns when no extras but not ignored", - ) - self.assertEqual( - len(df), len(df_new), "remove_columns should not change the number of events when no extras but not ignored" - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), - "remove_columns should not change the input df columns when no extras and not ignored", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), - "remove_columns should not change the input df values when no extras and not ignored", - ) - - def test_invalid_extras_no_ignore(self): - # Test when extras and not ignored. - parms = json.loads(self.json_parms) - parms["column_names"].append("face") - parms["ignore_missing"] = False - op = RemoveColumnsOp(parms) - with self.assertRaises(KeyError) as context: - self.get_dfs(op) - self.assertEqual(context.exception.args[0], "MissingColumnCannotBeRemoved") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_remove_rows_op.py b/tests/tools/remodeling/operations/test_remove_rows_op.py deleted file mode 100644 index 3b077903..00000000 --- a/tests/tools/remodeling/operations/test_remove_rows_op.py +++ /dev/null @@ -1,121 +0,0 @@ -import json -import numpy as np -import pandas as pd -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.remove_rows_op import RemoveRowsOp - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - - cls.result_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - - base_parameters = {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]} - cls.json_parms = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op): - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), "run-01") - return df, self.dispatch.post_proc_data(df_new) - - def test_valid(self): - # Test when errors. - parms = json.loads(self.json_parms) - op = RemoveRowsOp(parms) - df, df_new = self.get_dfs(op) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - self.assertTrue( - list(df.columns) == list(df_new.columns), "remove_rows does not change the number of columns when all valid" - ) - df_result = pd.DataFrame(self.result_data, columns=self.sample_columns) - self.assertTrue( - np.array_equal(df_result.to_numpy(), df_new.to_numpy()), "remove_rows should have the right values after removal" - ) - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), "remove_rows should not change the input df columns when all valid" - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), "remove_rows should not change the input df values when all valid" - ) - - def test_bad_values(self): - # Test when bad values included - parms = json.loads(self.json_parms) - parms["remove_values"] = ["succesful_stop", "unsuccesful_stop", "baloney"] - op = RemoveRowsOp(parms) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df, df_new = self.get_dfs(op) - self.assertTrue( - list(df.columns) == list(df_new.columns), - "remove_rows does not change the number of columns when bad values included", - ) - df_result = pd.DataFrame(self.result_data, columns=self.sample_columns) - self.assertTrue( - np.array_equal(df_result.to_numpy(), df_new.to_numpy()), - "remove_rows should have the right values after removal when bad values", - ) - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), "remove_rows should not change the input df columns when bad values" - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), "remove_rows should not change the input df values when bad values" - ) - - def test_bad_column_name(self): - # A bad column name should result in no change to df. - parms = json.loads(self.json_parms) - parms["column_name"] = "baloney" - op = RemoveRowsOp(parms) - df, df_new = self.get_dfs(op) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - self.assertTrue( - list(df.columns) == list(df_new.columns), "remove_rows does not change the number of columns when bad column" - ) - - self.assertTrue( - np.array_equal(df.to_numpy(), df_new.to_numpy()), - "remove_rows should have the right values after removal when bad column", - ) - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), "remove_rows should not change the input df columns when bad column" - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), "remove_rows should not change the input df values when bad column" - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_rename_columns_op.py b/tests/tools/remodeling/operations/test_rename_columns_op.py deleted file mode 100644 index a9eb1344..00000000 --- a/tests/tools/remodeling/operations/test_rename_columns_op.py +++ /dev/null @@ -1,167 +0,0 @@ -import json -import pandas as pd -import numpy as np -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.rename_columns_op import RenameColumnsOp - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - base_parameters = { - "column_mapping": {"stop_signal_delay": "stop_delay", "response_hand": "hand_used", "sex": "image_sex"}, - "ignore_missing": True, - } - cls.json_parms = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op): - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), "run-01") - return df, self.dispatch.post_proc_data(df_new) - - def test_valid_no_extras_ignore_missing(self): - # Test when no extras and ignored. - parms = json.loads(self.json_parms) - op = RenameColumnsOp(parms) - df, df_new = self.get_dfs(op) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - renamed_columns = [ - "onset", - "duration", - "trial_type", - "stop_delay", - "response_time", - "response_accuracy", - "hand_used", - "image_sex", - ] - self.assertTrue( - renamed_columns == list(df_new.columns), "rename_columns has correct columns when no extras and not ignored." - ) - self.assertTrue( - np.array_equal(df1.to_numpy(), df_new.to_numpy()), - "rename_columns does not change the values when no extras and ignored", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), - "rename_columns should not change the input df columns when no extras and ignore missing", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), - "rename_columns should not change the input df values when no extras and ignore missing", - ) - - def test_valid_extras_ignore_missing(self): - # Test when extras but ignored - parms = json.loads(self.json_parms) - parms["ignore_missing"] = True - parms["column_mapping"]["random_column"] = "new_random_column" - op = RenameColumnsOp(parms) - df, df_new = self.get_dfs(op) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - renamed_columns = [ - "onset", - "duration", - "trial_type", - "stop_delay", - "response_time", - "response_accuracy", - "hand_used", - "image_sex", - ] - self.assertTrue( - renamed_columns == list(df_new.columns), - "rename_columns resulting df should have correct columns when extras but ignored", - ) - self.assertTrue( - np.array_equal(df1.to_numpy(), df_new.to_numpy()), - "rename_columns does not change the values when extras but ignored", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), - "rename_columns should not change the input df columns when extras and ignore missing", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), - "rename_columns should not change the input df values when extras and ignore missing", - ) - - def test_valid_no_extras(self): - # Test when no extras but not ignored. - parms = json.loads(self.json_parms) - parms["ignore_missing"] = False - op = RenameColumnsOp(parms) - df, df_new = self.get_dfs(op) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - renamed_columns = [ - "onset", - "duration", - "trial_type", - "stop_delay", - "response_time", - "response_accuracy", - "hand_used", - "image_sex", - ] - self.assertTrue( - renamed_columns == list(df_new.columns), - "rename_columns resulting df should have correct columns when no extras not ignored", - ) - - self.assertTrue( - np.array_equal(df1.to_numpy(), df_new.to_numpy()), - "rename_columns does not change the values when no extras not ignored", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), - "rename_columns should not change the input df columns when no extras not ignored", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), - "rename_columns should not change the input df values when no extras not ignored", - ) - - def test_invalid_extras(self): - # Test extras not ignored. - parms = json.loads(self.json_parms) - parms["ignore_missing"] = False - parms["column_mapping"]["random_column"] = "new_random_column" - op = RenameColumnsOp(parms) - with self.assertRaises(KeyError) as context: - self.get_dfs(op) - self.assertEqual(context.exception.args[0], "MappedColumnsMissingFromData") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_reorder_columns_op.py b/tests/tools/remodeling/operations/test_reorder_columns_op.py deleted file mode 100644 index c097916a..00000000 --- a/tests/tools/remodeling/operations/test_reorder_columns_op.py +++ /dev/null @@ -1,179 +0,0 @@ -import json -import numpy as np -import pandas as pd -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.reorder_columns_op import ReorderColumnsOp - - -class Test(unittest.TestCase): - """ - - TODO: - - extras, no keep, no ignore - - no extras, keep, ignore - - no extras, no keep, ignore - - no extras, keep, no ignore - - no extras, no keep, ignore - - """ - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - cls.reordered = [ - [0.0776, 0.5083, 0.565, "go"], - [5.5774, 0.5083, 0.49, "unsuccesful_stop"], - [9.5856, 0.5084, 0.45, "go"], - [13.5939, 0.5083, "n/a", "succesful_stop"], - [17.1021, 0.5083, 0.633, "unsuccesful_stop"], - [21.6103, 0.5083, 0.443, "go"], - ] - - base_parameters = { - "column_order": ["onset", "duration", "response_time", "trial_type"], - "ignore_missing": True, - "keep_others": False, - } - cls.reordered_columns = ["onset", "duration", "response_time", "trial_type"] - cls.reordered_keep_columns = [ - "onset", - "duration", - "response_time", - "trial_type", - "stop_signal_delay", - "response_accuracy", - "response_hand", - "sex", - ] - cls.json_parms = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op): - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), "run-01") - return df, self.dispatch.post_proc_data(df_new) - - def test_valid_no_keep_others_ignore_missing(self): - # Test no extras no keep and ignore missing - parms = json.loads(self.json_parms) - op = ReorderColumnsOp(parms) - df, df_new = self.get_dfs(op) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - self.assertTrue( - self.reordered_columns == list(df_new.columns), - "reorder_columns resulting df should have correct columns when no extras, no keep, and ignore", - ) - self.assertEqual( - len(df), len(df_new), "reorder_columns should not change the number of events when no extras, no keep, and ignore" - ) - df_reordered = pd.DataFrame(self.reordered, columns=self.reordered_columns) - self.assertTrue( - np.array_equal(df_new.to_numpy(), df_reordered.to_numpy()), - "reorder_column should have expected values when no extras, no keep, and ignore", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), - "reorder_columns should not change the input df columns when no extras, no keep, and ignore", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), - "reorder_columns should not change the input df values when no extras, no keep, and ignore", - ) - - def test_valid_extras_no_keep_others_ignore_missing(self): - # Test when extras, no keep and ignore missing - parms = json.loads(self.json_parms) - parms["column_order"] = ["onset", "duration", "response_time", "apples", "trial_type"] - op = ReorderColumnsOp(parms) - df, df_new = self.get_dfs(op) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - num_test_rows = len(df1) - self.assertTrue( - self.reordered_columns == list(df_new.columns), - "reorder_columns resulting df should have correct columns when extras, no keep, and ignore", - ) - self.assertEqual( - num_test_rows, - len(df_new), - "reorder_columns should not change the number of events when extras, no keep, and ignore", - ) - df_reordered = pd.DataFrame(self.reordered, columns=self.reordered_columns) - self.assertTrue( - np.array_equal(df_new.to_numpy(), df_reordered.to_numpy()), - "reorder_columns should have expected values when extras, no keep, and ignore", - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), - "reorder_columns should not change the input df columns when extras, no keep, and ignore", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), - "reorder_columns should not change the input df values when extras, no keep, and ignore", - ) - - def test_invalid_extras_no_keep_others_no_ignore_missing(self): - # Test when extras, no keep and no ignore - parms = json.loads(self.json_parms) - parms["column_order"] = ["onset", "duration", "response_time", "apples", "trial_type"] - parms["ignore_missing"] = False - op = ReorderColumnsOp(parms) - with self.assertRaises(ValueError) as context: - self.get_dfs(op) - self.assertEqual(context.exception.args[0], "MissingReorderedColumns") - - def test_valid_keep_others_ignore_missing(self): - # Test extras, keep, ignore - parms = json.loads(self.json_parms) - parms["column_order"] = ["onset", "duration", "response_time", "apples", "trial_type"] - parms["keep_others"] = True - op = ReorderColumnsOp(parms) - df, df_new = self.get_dfs(op) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - self.assertTrue( - self.reordered_keep_columns == list(df_new.columns), - "reorder_columns resulting df should have correct columns when extras, keep, and ignore", - ) - self.assertEqual( - len(df), len(df_new), "reorder_columns should not change the number of events when extras, keep, and ignore" - ) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), - "reorder_columns should not change the input df columns when extras, keep, and ignore", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), - "reorder_columns should not change the input df values when extras, keep, and ignore", - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_split_rows_op.py b/tests/tools/remodeling/operations/test_split_rows_op.py deleted file mode 100644 index 8881ba90..00000000 --- a/tests/tools/remodeling/operations/test_split_rows_op.py +++ /dev/null @@ -1,212 +0,0 @@ -import os -import json -import pandas as pd -import numpy as np -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.split_rows_op import SplitRowsOp - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests") - cls.events_path = os.path.realpath(os.path.join(base_dir, "sub-0013_task-stopsignal_acq-seq_events.tsv")) - cls.model1_path = os.path.realpath(os.path.join(base_dir, "only_splitrow_rmdl.json")) - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - - cls.split = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [0.6426, 0, "response", "n/a", "n/a", "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [5.7774, 0.5, "stop_signal", "n/a", "n/a", "n/a", "n/a", "n/a"], - [6.0674, 0, "response", "n/a", "n/a", "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [10.0356, 0, "response", "n/a", "n/a", "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [13.7939, 0.5, "stop_signal", "n/a", "n/a", "n/a", "n/a", "n/a"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [17.3521, 0.5, "stop_signal", "n/a", "n/a", "n/a", "n/a", "n/a"], - [17.7351, 0, "response", "n/a", "n/a", "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - [22.0533, 0, "response", "n/a", "n/a", "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - cls.split_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - base_parameters = { - "anchor_column": "trial_type", - "new_events": { - "response": { - "onset_source": ["response_time"], - "duration": [0], - "copy_columns": ["response_accuracy", "response_hand", "sex"], - }, - "stop_signal": {"onset_source": ["stop_signal_delay"], "duration": [0.5], "copy_columns": []}, - }, - "remove_parent_row": False, - } - cls.json_parms = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op): - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), "run-01") - return df, self.dispatch.post_proc_data(df_new) - - def test_valid_existing_anchor_column(self): - # Test when existing column is used as anchor event - parms = json.loads(self.json_parms) - op = SplitRowsOp(parms) - df, df_new = self.get_dfs(op) - df_check = pd.DataFrame(self.split, columns=self.split_columns) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - - # Test that df_new has the right values - self.assertEqual( - len(df_check), len(df_new), "split_rows should have expected number of rows when existing column anchor" - ) - self.assertEqual( - len(df_new.columns), - len(self.split_columns), - "split_rows should have expected number of columns when existing column anchor", - ) - self.assertTrue( - list(df_new.columns) == list(self.split_columns), - "split_rows should have the expected columns when existing column anchor", - ) - - # Must check individual columns because of round-off on the numeric columns - for col in list(df_new.columns): - new = df_new[col].to_numpy() - check = df_check[col].to_numpy() - if np.array_equal(new, check): - continue - self.assertTrue(np.allclose(new, check, equal_nan=True)) - - # Test that df has not been changed by the op - self.assertTrue( - list(df.columns) == list(df1.columns), - "split_rows should not change the input df columns when existing column anchor", - ) - self.assertTrue( - np.array_equal(df.to_numpy(), df1.to_numpy()), - "split_rows should not change the input df values when existing column anchor", - ) - - def test_invalid_onset_duration(self): - # Test when existing column is used as anchor event - parms = json.loads(self.json_parms) - op = SplitRowsOp(parms) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df1 = df.drop(columns=["onset"]) - with self.assertRaises(ValueError) as ex: - op.do_op(self.dispatch, self.dispatch.prep_data(df1), "run-01") - self.assertEqual("MissingOnsetColumn", ex.exception.args[0]) - df2 = df.drop(columns=["duration"]) - with self.assertRaises(ValueError) as ex: - op.do_op(self.dispatch, self.dispatch.prep_data(df2), "run-01") - self.assertEqual("MissingDurationColumn", ex.exception.args[0]) - - def test_valid_new_anchor_column(self): - # Test when new column is used as anchor event - parms = json.loads(self.json_parms) - parms["anchor_column"] = "event_type" - op = SplitRowsOp(parms) - df_check = pd.DataFrame(self.split, columns=self.split_columns) - df, df_new = self.get_dfs(op) - - # Test that df_new has the right values - self.assertEqual(len(df_check), len(df_new), "split_rows should have expected number of rows when new column anchor") - self.assertEqual( - len(df_new.columns), - len(self.split_columns) + 1, - "split_rows should have expected number of columns when new column anchor", - ) - self.assertIn("event_type", list(df_new.columns), "split_rows should have the new column when new column anchor") - - def test_remove_parent(self): - # Test when existing column is used as anchor event - parms = json.loads(self.json_parms) - parms["remove_parent_row"] = True - op = SplitRowsOp(parms) - df, df_new = self.get_dfs(op) - self.assertEqual(len(df), 6) - self.assertEqual(len(df_new), 8) - - def test_onsets_and_durations(self): - # Onset - parms = json.loads(self.json_parms) - parms["new_events"]["response"]["onset_source"] = ["response_time", 0.35] - parms["new_events"]["response"]["duration"] = [0.3, "duration"] - op = SplitRowsOp(parms) - df, df_new = self.get_dfs(op) - self.assertEqual(len(df), 6) - self.assertEqual(len(df_new), len(self.split)) - - def test_bad_onset(self): - # Onset - parms = json.loads(self.json_parms) - parms["new_events"]["response"]["onset_source"] = ["baloney"] - op = SplitRowsOp(parms) - with self.assertRaises(TypeError) as context: - self.get_dfs(op) - self.assertEqual(context.exception.args[0], "BadOnsetInModel") - - def test_bad_duration(self): - # Onset - parms = json.loads(self.json_parms) - parms["new_events"]["response"]["duration"] = ["baloney"] - op = SplitRowsOp(parms) - with self.assertRaises(TypeError) as context: - self.get_dfs(op) - self.assertEqual(context.exception.args[0], "BadDurationInModel") - - def test_split_rows_from_files(self): - # Test when existing column is used as anchor event - df = pd.read_csv(self.events_path, delimiter="\t", header=0, dtype=str, keep_default_na=False, na_values=None) - with open(self.model1_path) as fp: - operation_list = json.load(fp) - operations = Dispatcher.parse_operations(operation_list) - dispatch = Dispatcher(operation_list) - df = dispatch.prep_data(df) - df_new = operations[0].do_op(dispatch, df, "Name") - self.assertIsInstance(df_new, pd.DataFrame) - df_check = pd.read_csv(self.events_path, delimiter="\t", header=0, dtype=str, keep_default_na=False, na_values=None) - self.assertEqual(len(df_check), len(df), "split_rows should not change the length of the original dataframe") - self.assertEqual( - len(df_check.columns), len(df.columns), "split_rows should change the number of columns of the original dataframe" - ) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_column_names_op.py b/tests/tools/remodeling/operations/test_summarize_column_names_op.py deleted file mode 100644 index 0b0810b5..00000000 --- a/tests/tools/remodeling/operations/test_summarize_column_names_op.py +++ /dev/null @@ -1,152 +0,0 @@ -import json -import os -import pandas as pd -import unittest - -# from hed.tools.analysis.column_name_summary import ColumnNameSummary -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_column_names_op import SummarizeColumnNamesOp - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.data_root = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../data")) - cls.sample_columns1 = ["onset", "duration", "trial_type", "stop_signal_delay", "response_time"] - cls.sample_columns2 = ["trial_type", "onset", "duration", "stop_signal_delay", "response_time"] - cls.data1 = [[3.0, 0.5, "go", 0.2, 1.3], [5.0, 0.5, "go", 0.2, 1.3]] - base_parameters = {"summary_name": "columns", "summary_filename": "column_name_summary"} - cls.json_parms = json.dumps(base_parameters) - - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - cls.events_path = os.path.realpath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_excerpt_events.tsv" - ) - ) - cls.sidecar_path = os.path.realpath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_events.json") - ) - cls.model_path = os.path.realpath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json" - ) - ) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op, name, dispatch): - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(dispatch, dispatch.prep_data(df), name) - return df, dispatch.post_proc_data(df_new) - - def test_constructor(self): - parms = json.loads(self.json_parms) - sum_op = SummarizeColumnNamesOp(parms) - self.assertIsInstance(sum_op, SummarizeColumnNamesOp, "constructor creates an object of the correct type") - - def test_summary_op(self): - with open(self.model_path, "r") as fp: - parms = json.load(fp) - parsed_commands = Dispatcher.parse_operations(parms) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - df = dispatch.get_data_file(self.events_path) - df = dispatch.prep_data(df) - - old_len = len(df) - sum_op = parsed_commands[0] - df = sum_op.do_op(dispatch, df, "run-02") - df = sum_op.do_op(dispatch, df, "run-01") - self.assertEqual(len(df), old_len) - df1 = df.drop(labels="onset", axis=1) - sum_op.do_op(dispatch, df1, "run-03") - this_context = dispatch.summary_dicts[sum_op.summary_name] - for _key, item in this_context.summary_dict.items(): - summary = item.get_summary() - self.assertIsInstance(summary, dict) - json_value = item.get_summary(as_json=True) - self.assertIsInstance(json_value, str) - new_summary = json.loads(json_value) - self.assertIsInstance(new_summary, dict) - merged1 = this_context.merge_all_info() - # self.assertIsInstance(merged1, ColumnNameSummary) - self.assertEqual(len(merged1.file_dict), 3) - self.assertEqual(len(merged1.unique_headers), 2) - with self.assertRaises(ValueError) as except_context: - sum_op.do_op(dispatch, df, "run-03") - self.assertEqual(except_context.exception.args[0], "FileHasChangedColumnNames") - - def test_summary(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - parms = json.loads(self.json_parms) - op = SummarizeColumnNamesOp(parms) - df, df_new = self.get_dfs(op, "run-01", dispatch) - self.assertEqual(len(df), len(df_new)) - context_dict = dispatch.summary_dicts - self.assertIsInstance(context_dict, dict) - self.get_dfs(op, "run-02", dispatch) - context = dispatch.summary_dicts["columns"] - summary = context.get_summary() - dataset_sum = summary["Dataset"] - json_str = json.dumps(dataset_sum) - json_obj = json.loads(json_str) - columns = json_obj["Overall summary"]["Specifics"]["Columns"] - self.assertEqual(len(columns), 1) - self.assertEqual(len(columns[0]["Files"]), 2) - ind_sum = summary["Individual files"] - self.assertEqual(len(ind_sum), 2) - - def test_text_summary(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - parms = json.loads(self.json_parms) - op = SummarizeColumnNamesOp(parms) - self.get_dfs(op, "run-01", dispatch) - self.get_dfs(op, "run-02", dispatch) - context = dispatch.summary_dicts["columns"] - # self.assertIsInstance(context, ColumnNameSummary) - text_summary1 = context.get_text_summary() - self.assertIsInstance(text_summary1, dict) - - def test_multiple(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - parms = json.loads(self.json_parms) - op = SummarizeColumnNamesOp(parms) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - op.do_op(dispatch, dispatch.prep_data(df), "run-01") - df1 = pd.DataFrame(self.data1, columns=self.sample_columns1) - op.do_op(dispatch, dispatch.prep_data(df1), "run-02") - op.do_op(dispatch, dispatch.prep_data(df1), "run-03") - df2 = pd.DataFrame(self.data1, columns=self.sample_columns2) - op.do_op(dispatch, dispatch.prep_data(df2), "run-05") - context = dispatch.summary_dicts["columns"] - summary = context.get_summary() - text_summary1 = context.get_text_summary() - self.assertEqual(len(summary), 2) - self.assertIsInstance(text_summary1, dict) - self.assertEqual(len(text_summary1), 2) - self.assertEqual(len(text_summary1["Individual files"]), 4) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_column_values_op.py b/tests/tools/remodeling/operations/test_summarize_column_values_op.py deleted file mode 100644 index 37ccd0fc..00000000 --- a/tests/tools/remodeling/operations/test_summarize_column_values_op.py +++ /dev/null @@ -1,133 +0,0 @@ -import json -import os -import pandas as pd -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_column_values_op import ColumnValueSummary, SummarizeColumnValuesOp - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - base_parameters = { - "summary_name": "test summary", - "summary_filename": "column_values_summary", - "skip_columns": [], - "value_columns": ["onset", "response_time"], - } - - cls.json_parms = json.dumps(base_parameters) - cls.data_root = os.path.realpath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests") - ) - - @classmethod - def tearDownClass(cls): - pass - - def get_dfs(self, op, name, dispatch): - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df_new = op.do_op(dispatch, dispatch.prep_data(df), name) - return df, dispatch.post_proc_data(df_new) - - def test_constructor(self): - parms = json.loads(self.json_parms) - sum_op = SummarizeColumnValuesOp(parms) - self.assertIsInstance(sum_op, SummarizeColumnValuesOp, "constructor creates an object of the correct type") - - def test_do_ops(self): - parms = json.loads(self.json_parms) - sum_op = SummarizeColumnValuesOp(parms) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - self.get_dfs(sum_op, "name1", dispatch) - context1 = dispatch.summary_dicts.get(parms["summary_name"], None) - summary1 = context1.summary_dict["name1"] - cat_len = len(summary1.categorical_info) - self.assertEqual( - cat_len, - len(self.sample_columns) - 2, - "do_ops if all columns are categorical summary has same number of columns as df", - ) - self.get_dfs(sum_op, "name2", dispatch) - self.assertEqual( - cat_len, len(self.sample_columns) - 2, "do_ops updating does not change number of categorical columns." - ) - context = dispatch.summary_dicts["test summary"] - text_sum = context.get_text_summary() - self.assertIsInstance(text_sum, dict) - self.assertEqual(len(context.summary_dict), 2) - - def test_get_summary(self): - parms = json.loads(self.json_parms) - sum_op = SummarizeColumnValuesOp(parms) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions="8.1.0") - self.get_dfs(sum_op, "name1", dispatch) - - cont = dispatch.summary_dicts - context = cont.get("test summary", None) - self.assertIsInstance(context, ColumnValueSummary, "get_summary testing ColumnValueSummary") - summary1a = context.get_summary() - self.assertIsInstance(summary1a, dict) - self.assertIsInstance(summary1a["Dataset"], dict) - text_summary1 = context.get_text_summary(individual_summaries=None) - self.assertIsInstance(text_summary1, dict) - self.assertIsInstance(text_summary1["Dataset"], str) - text_summary1a = context.get_text_summary(individual_summaries="separate") - self.assertIsInstance(text_summary1a, dict) - self.get_dfs(sum_op, "name2", dispatch) - self.get_dfs(sum_op, "name3", dispatch) - context2 = dispatch.summary_dicts.get(parms["summary_name"], None) - summary2 = context2.get_summary() - self.assertIsInstance(summary2, dict) - text_summary2 = context2.get_text_summary(individual_summaries="consolidated") - self.assertIsInstance(text_summary2, dict) - - def test_summary_op(self): - events = os.path.realpath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_excerpt_events.tsv" - ) - ) - column_summary_path = os.path.realpath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json" - ) - ) - with open(column_summary_path, "r") as fp: - parms = json.load(fp) - parsed_commands = Dispatcher.parse_operations(parms) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - df = dispatch.get_data_file(events) - old_len = len(df) - sum_op = parsed_commands[1] - df = sum_op.do_op(dispatch, dispatch.prep_data(df), os.path.basename(events)) - self.assertEqual(len(df), old_len) - context_dict = dispatch.summary_dicts - for _key, item in context_dict.items(): - text_value = item.get_text_summary() - self.assertTrue(text_value) - json_value = item.get_summary() - self.assertTrue(json_value) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_definitions_op.py b/tests/tools/remodeling/operations/test_summarize_definitions_op.py deleted file mode 100644 index 2c14c873..00000000 --- a/tests/tools/remodeling/operations/test_summarize_definitions_op.py +++ /dev/null @@ -1,104 +0,0 @@ -import json -import os -import unittest -import pandas as pd -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_definitions_op import SummarizeDefinitionsOp, DefinitionSummary - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/")) - cls.data_path = os.path.realpath(os.path.join(path, "sub-002_task-FacePerception_run-1_events.tsv")) - cls.json_path = os.path.realpath(os.path.join(path, "task-FacePerception_events.json")) - base_parameters = {"summary_name": "get_definition_summary", "summary_filename": "summarize_definitions"} - cls.json_parms = json.dumps(base_parameters) - - @classmethod - def tearDownClass(cls): - pass - - def test_do_op(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeDefinitionsOp(parms) - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - df_new = sum_op.do_op(dispatch, df, "subj2_run1", sidecar=self.json_path) - self.assertEqual(200, len(df_new), " dataframe length is correct") - self.assertEqual(10, len(df_new.columns), " has correct number of columns") - self.assertIn(sum_op.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) - - def test_summary(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeDefinitionsOp(parms) - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertEqual(200, len(df_new), " dataframe length is correct") - self.assertEqual(10, len(df_new.columns), " has correct number of columns") - self.assertIn(sum_op.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) - # print(str(dispatch.summary_dicts[sum_op.summary_name].get_text_summary()['Dataset'])) - - cont = dispatch.summary_dicts - context = cont.get("get_definition_summary", None) - self.assertIsInstance(context, DefinitionSummary, "get_summary testing DefinitionSummary") - summary1a = context.get_summary() - self.assertIsInstance(summary1a, dict) - self.assertIsInstance(summary1a["Dataset"], dict) - text_summary1 = context.get_text_summary(individual_summaries=None) - self.assertIsInstance(text_summary1, dict) - self.assertIsInstance(text_summary1["Dataset"], str) - - def test_summary_errors(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeDefinitionsOp(parms) - df = pd.DataFrame( - { - "HED": [ - "(Def-expand/A1/1, (Action/1, Acceleration/5, Item-count/2))", - "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", - "(Def-expand/C3/5, (Action/5, Acceleration/5, Item-count/5))", - "(Def-expand/D4/7, (Action/7, Acceleration/7, Item-count/8))", - "(Def-expand/D5/7, (Action/7, Acceleration/7, Item-count/8, Event))", - "(Def-expand/A1/2, (Action/2, Age/5, Item-count/2))", - "(Def-expand/A1/3, (Action/3, Age/4, Item-count/3))", - # This could be identified, but fails due to the above raising errors - "(Def-expand/A1/4, (Action/4, Age/5, Item-count/2))", - ] - } - ) - df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertIsInstance(df_new, pd.DataFrame) - self.assertIn(sum_op.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) - # print(str(dispatch.summary_dicts[sum_op.summary_name].get_text_summary()['Dataset'])) - - def test_ambiguous_def_errors(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeDefinitionsOp(parms) - df = pd.DataFrame( - { - "HED": [ - "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", - ] - } - ) - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertIn(sum_op.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) - # print(str(dispatch.summary_dicts[sum_op.summary_name].get_text_summary()['Dataset'])) - cont = dispatch.summary_dicts - context = cont.get("get_definition_summary", None) - self.assertIsInstance(context, DefinitionSummary, "get_summary testing DefinitionSummary") - summary1a = context.get_summary() - self.assertIsInstance(summary1a, dict) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py deleted file mode 100644 index 227eca14..00000000 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ /dev/null @@ -1,304 +0,0 @@ -import json -import os -import unittest -import pandas as pd -from hed.models import HedString, TabularInput, Sidecar -from hed.schema import load_schema_version -from hed.tools.analysis.hed_tag_counts import HedTagCounts -from hed.tools.analysis.event_manager import EventManager -from hed.tools.analysis.hed_tag_manager import HedTagManager -from io import StringIO -from hed.models.df_util import expand_defs -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp, HedTagSummary - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/")) - cls.data_path = os.path.realpath(os.path.join(path, "sub-002_task-FacePerception_run-1_events.tsv")) - cls.json_path = os.path.realpath(os.path.join(path, "task-FacePerception_events.json")) - base_parameters = { - "summary_name": "get_summary hed tags", - "summary_filename": "summarize_hed_tags", - "tags": { - "Sensory events": ["Sensory-event", "Sensory-presentation", "Task-stimulus-role", "Experimental-stimulus"], - "Agent actions": [ - "Agent-action", - "Agent", - "Action", - "Agent-task-role", - "Task-action-type", - "Participant-response", - ], - "Task properties": ["Task-property"], - "Objects": ["Item"], - "Properties": ["Property"], - }, - "include_context": False, - "replace_defs": False, - "remove_types": ["Condition-variable", "Task"], - } - cls.base_parameters = base_parameters - cls.json_parms = json.dumps(base_parameters) - - @classmethod - def tearDownClass(cls): - pass - - def test_do_op_no_replace_no_context_remove_on(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeHedTagsOp(parms) - self.assertIsInstance(sum_op, SummarizeHedTagsOp, "constructor creates an object of the correct type") - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertEqual(200, len(df_new), "summarize_hed_type_op dataframe length is correct") - self.assertEqual(10, len(df_new.columns), "summarize_hed_type_op has correct number of columns") - self.assertIn(sum_op.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], HedTagSummary) - counts = dispatch.summary_dicts[sum_op.summary_name].summary_dict["subj2_run1"] - self.assertIsInstance(counts, HedTagCounts) - self.assertEqual(len(counts.tag_dict), 16) - self.assertIn("def", counts.tag_dict) - self.assertNotIn("task", counts.tag_dict) - self.assertNotIn("condition-variable", counts.tag_dict) - df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run2", sidecar=self.json_path) - self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict["subj2_run2"].tag_dict), 16) - - def test_do_op_options(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.2.0"]) - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - - # no replace, no context, types removed - parms1 = json.loads(self.json_parms) - parms1["summary_name"] = "tag summary 1" - sum_op1 = SummarizeHedTagsOp(parms1) - df_new1 = sum_op1.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") - self.assertEqual(200, len(df_new1), "summarize_hed_type_op dataframe length is correct") - self.assertEqual(10, len(df_new1.columns), "summarize_hed_type_op has correct number of columns") - self.assertIn(sum_op1.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op1.summary_name], HedTagSummary) - counts1 = dispatch.summary_dicts[sum_op1.summary_name].summary_dict["subj2_run1"] - self.assertIsInstance(counts1, HedTagCounts) - self.assertEqual(len(counts1.tag_dict), 16) - self.assertNotIn("event-context", counts1.tag_dict) - self.assertIn("def", counts1.tag_dict) - self.assertNotIn("task", counts1.tag_dict) - self.assertNotIn("condition-variable", counts1.tag_dict) - - # no replace, context, types removed - parms2 = json.loads(self.json_parms) - parms2["include_context"] = True - parms2["summary_name"] = "tag summary 2" - sum_op2 = SummarizeHedTagsOp(parms2) - df_new2 = sum_op2.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertIsInstance(sum_op2, SummarizeHedTagsOp, "constructor creates an object of the correct type") - self.assertEqual(200, len(df_new2), "summarize_hed_type_op dataframe length is correct") - self.assertEqual(10, len(df_new2.columns), "summarize_hed_type_op has correct number of columns") - self.assertIn(sum_op2.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op2.summary_name], HedTagSummary) - counts2 = dispatch.summary_dicts[sum_op2.summary_name].summary_dict["subj2_run1"] - self.assertIsInstance(counts2, HedTagCounts) - self.assertEqual(len(counts2.tag_dict), len(counts1.tag_dict) + 1) - self.assertIn("event-context", counts2.tag_dict) - self.assertIn("def", counts2.tag_dict) - self.assertNotIn("task", counts2.tag_dict) - self.assertNotIn("condition-variable", counts2.tag_dict) - - # no replace, context, types removed - parms3 = json.loads(self.json_parms) - parms3["include_context"] = True - parms3["replace_defs"] = True - parms3["summary_name"] = "tag summary 3" - sum_op3 = SummarizeHedTagsOp(parms3) - df_new3 = sum_op3.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertIsInstance(sum_op3, SummarizeHedTagsOp, "constructor creates an object of the correct type") - self.assertEqual(200, len(df_new3), "summarize_hed_type_op dataframe length is correct") - self.assertEqual(10, len(df_new3.columns), "summarize_hed_type_op has correct number of columns") - self.assertIn(sum_op3.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op3.summary_name], HedTagSummary) - counts3 = dispatch.summary_dicts[sum_op3.summary_name].summary_dict["subj2_run1"] - self.assertIsInstance(counts3, HedTagCounts) - self.assertEqual(33, len(counts3.tag_dict)) - self.assertIn("event-context", counts3.tag_dict) - self.assertNotIn("def", counts3.tag_dict) - self.assertNotIn("task", counts3.tag_dict) - self.assertNotIn("condition-variable", counts3.tag_dict) - - def test_quick3(self): - remove_types = [] - my_schema = load_schema_version("8.2.0") - my_json = { - "code": {"HED": {"code1": "((Def/Blech1, Green), Blue)", "code2": "((Def/Blech3, Description/Help me), Blue)"}}, - "defs": {"HED": {"def1": "(Definition/Blech1, (Condition-variable/Cat, Description/this is hard))"}}, - } - my_json_str = json.dumps(my_json) - my_sidecar = Sidecar(StringIO(my_json_str)) - data = [ - [0.5, 0, "code1", "Description/This is a test, Label/Temp, (Def/Blech1, Green)"], - [0.6, 0, "code2", "Sensory-event, ((Description/Animal, Condition-variable/Blech))"], - ] - df = pd.DataFrame(data, columns=["onset", "duration", "code", "HED"]) - input_data = TabularInput(df, sidecar=my_sidecar, name="myName") - tag_man = HedTagManager(EventManager(input_data, my_schema), remove_types=remove_types) - counts = HedTagCounts("myName", 2) - self.assertIsInstance(counts, HedTagCounts) - self.assertIsInstance(tag_man, HedTagManager) - # hed_objs = tag_man.get_hed_objs(include_context=include_context, replace_defs=replace_defs) - # for hed in hed_objs: - # counts.update_tag_counts(hed, 'myName') - # summary_dict['myName'] = counts - - def test_quick4(self): - path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/")) - data_path = os.path.realpath(os.path.join(path, "sub-002_task-FacePerception_run-1_events.tsv")) - json_path = os.path.realpath(os.path.join(path, "task-FacePerception_events.json")) - schema = load_schema_version("8.1.0") - sidecar = Sidecar( - json_path, - ) - input_data = TabularInput(data_path, sidecar=sidecar) - counts = HedTagCounts("myName", 2) - summary_dict = {} - definitions = input_data.get_def_dict(schema) - df = pd.DataFrame({"HED_assembled": input_data.series_a}) - expand_defs(df, schema, definitions) - - # type_defs = input_data.get_definitions().gathered_defs - for hed in df["HED_assembled"]: - counts.update_tag_counts(HedString(hed, schema), "myName") - summary_dict["myName"] = counts - - def test_get_summary_details(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeHedTagsOp(parms) - self.assertIsInstance(sum_op, SummarizeHedTagsOp, "constructor creates an object of the correct type") - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertIn(sum_op.summary_name, dispatch.summary_dicts) - sum_context = dispatch.summary_dicts[sum_op.summary_name] - self.assertIsInstance(sum_context, HedTagSummary) - sum_obj1 = sum_context.get_summary_details() - self.assertIsInstance(sum_obj1, dict) - json_str1 = json.dumps(sum_obj1, indent=4) - self.assertIsInstance(json_str1, str) - json_obj1 = json.loads(json_str1) - self.assertIsInstance(json_obj1, dict) - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run2", sidecar=self.json_path) - sum_context2 = dispatch.summary_dicts[sum_op.summary_name] - sum_obj2 = sum_context2.get_summary_details() - json_str2 = json.dumps(sum_obj2, indent=4) - self.assertIsInstance(json_str2, str) - sum_obj3 = sum_context2.get_summary_details(include_individual=False) - self.assertFalse(sum_obj3["Individual files"]) - - def test_get_summary_text_summary(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeHedTagsOp(parms) - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - df = dispatch.prep_data(df) - sum_op.do_op(dispatch, df, "subj2_run1", sidecar=self.json_path) - sum_op.do_op(dispatch, df, "subj2_run2", sidecar=self.json_path) - sum_context1 = dispatch.summary_dicts[sum_op.summary_name] - text_sum_none = sum_context1.get_text_summary(individual_summaries="none") - self.assertIn("Dataset", text_sum_none) - self.assertIsInstance(text_sum_none["Dataset"], str) - self.assertFalse(text_sum_none.get("Individual files", {})) - - text_sum_consolidated = sum_context1.get_text_summary(individual_summaries="consolidated") - self.assertIn("Dataset", text_sum_consolidated) - self.assertIsInstance(text_sum_consolidated["Dataset"], str) - self.assertFalse(text_sum_consolidated.get("Individual files", {})) - self.assertGreater(len(text_sum_consolidated["Dataset"]), len(text_sum_none["Dataset"])) - - text_sum_separate = sum_context1.get_text_summary(individual_summaries="separate") - self.assertIn("Dataset", text_sum_separate) - self.assertIsInstance(text_sum_separate["Dataset"], str) - self.assertIn("Individual files", text_sum_separate) - self.assertIsInstance(text_sum_separate["Individual files"], dict) - self.assertEqual(len(text_sum_separate["Individual files"]), 2) - - def test_sample_example(self): - remodel_list = [ - { - "operation": "summarize_hed_tags", - "description": "Produce a summary of HED tags.", - "parameters": { - "summary_name": "summarize_hed_tags", - "summary_filename": "summarize_hed_tags", - "tags": { - "Sensory events": [ - "Sensory-event", - "Sensory-presentation", - "Task-stimulus-role", - "Experimental-stimulus", - ], - "Agent actions": [ - "Agent-action", - "Agent", - "Action", - "Agent-task-role", - "Task-action-type", - "Participant-response", - ], - "Objects": ["Item"], - }, - "include_context": False, - }, - } - ] - - sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - - sidecar_path = os.path.realpath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_events.json") - ) - - dispatch = Dispatcher(remodel_list, data_root=None, backup_name=None, hed_versions=["8.1.0"]) - df = pd.DataFrame(sample_data, columns=sample_columns) - df = dispatch.prep_data(df) - for operation in dispatch.parsed_ops: - df = operation.do_op(dispatch, df, "sample", sidecar=sidecar_path) - context_dict = dispatch.summary_dicts.get("summarize_hed_tags") - text_summary = context_dict.get_text_summary() - self.assertIsInstance(text_summary["Dataset"], str) - - def test_convert_summary_to_word_dict(self): - # Assume we have a valid summary_json - summary_json = { - "Main tags": { - "tag_category_1": [{"tag": "tag1", "events": 5}, {"tag": "tag2", "events": 3}], - "tag_category_2": [{"tag": "tag3", "events": 7}], - } - } - expected_output = {"tag1": 5, "tag2": 3, "tag3": 7} - - word_dict = HedTagSummary.summary_to_dict(summary_json, transform=None, scale_adjustment=0) - self.assertEqual(word_dict, expected_output) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py deleted file mode 100644 index 82b10ade..00000000 --- a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py +++ /dev/null @@ -1,137 +0,0 @@ -import json -import os -import unittest -import pandas as pd -from hed.models import Sidecar -from hed.schema import load_schema_version -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_hed_type_op import SummarizeHedTypeOp, HedTypeSummary - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/")) - cls.data_path = os.path.realpath(os.path.join(path, "sub-002_task-FacePerception_run-1_events.tsv")) - cls.json_path = os.path.realpath(os.path.join(path, "task-FacePerception_events.json")) - base_parameters = { - "summary_name": "get summary conditions", - "summary_filename": "summarize_condition_variable_type", - "type_tag": "condition-variable", - } - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - - cls.json_parms = json.dumps(base_parameters) - cls.dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - cls.events = os.path.realpath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_excerpt_events.tsv" - ) - ) - cls.sidecar_path = os.path.realpath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_events.json") - ) - cls.hed_schema = load_schema_version("8.1.0") - cls.summary_path = os.path.realpath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json" - ) - ) - rel_path = "../../../data/remodel_tests/sub-002_task-FacePerception_run-1_events.tsv" - cls.events_wh = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_path)) - rel_side = "../../../data/remodel_tests/task-FacePerception_events.json" - cls.sidecar_path_wh = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), rel_side)) - - @classmethod - def tearDownClass(cls): - pass - - def test_constructor(self): - parms = json.loads(self.json_parms) - sum_op = SummarizeHedTypeOp(parms) - self.assertIsInstance(sum_op, SummarizeHedTypeOp, "constructor creates an object of the correct type") - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - df_new = sum_op.do_op(self.dispatch, df, "subj2_run1", sidecar=self.json_path) - self.assertEqual(200, len(df_new), "summarize_hed_type_op dataframe length is correct") - self.assertEqual(10, len(list(df_new.columns)), "summarize_hed_type_op has correct number of columns") - - def test_summary(self): - with open(self.summary_path, "r") as fp: - parms = json.load(fp) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - df = dispatch.get_data_file(self.events) - parsed_commands = Dispatcher.parse_operations(parms) - sum_op = parsed_commands[2] - sum_op.do_op(dispatch, dispatch.prep_data(df), "run-01", sidecar=self.sidecar_path) - context1 = dispatch.summary_dicts["AOMIC_condition_variables"] - summary1 = context1.get_summary() - self.assertIn("run-01", summary1["Individual files"]) - self.assertEqual(len(summary1["Individual files"]), 1) - summary1a = context1.get_summary() - self.assertIsInstance(summary1a["Dataset"], dict) - sum_op.do_op(dispatch, dispatch.prep_data(df), "run-02", sidecar=self.sidecar_path) - context2 = dispatch.summary_dicts["AOMIC_condition_variables"] - summary2 = context2.get_summary(individual_summaries="separate") - self.assertEqual(summary2["Dataset"]["Overall summary"]["Files"][0], "run-01") - self.assertEqual(len(summary2["Dataset"]["Overall summary"]["Files"]), 2) - summary2a = context2.get_summary(individual_summaries="separate") - self.assertIsInstance(summary2a["Individual files"]["run-02"], dict) - - def test_text_summary_with_levels(self): - with open(self.summary_path, "r") as fp: - parms = json.load(fp) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - df = dispatch.get_data_file(self.events_wh) - parsed_commands = Dispatcher.parse_operations(parms) - sum_op = parsed_commands[2] - sum_op.do_op(dispatch, dispatch.prep_data(df), "run-01", sidecar=self.sidecar_path_wh) - context1 = dispatch.summary_dicts["AOMIC_condition_variables"] - text_summary1 = context1.get_text_summary() - self.assertIsInstance(text_summary1, dict) - - def test_text_summary(self): - sidecar = Sidecar(self.sidecar_path, name="aomic_sidecar") - - with open(self.summary_path, "r") as fp: - parms = json.load(fp) - parsed_commands = Dispatcher.parse_operations(parms) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - df = dispatch.get_data_file(self.events) - old_len = len(df) - sum_op = parsed_commands[2] - df = sum_op.do_op(dispatch, dispatch.prep_data(df), os.path.basename(self.events), sidecar=sidecar) - self.assertEqual(len(df), old_len) - context_dict = dispatch.summary_dicts - self.assertIsInstance(context_dict, dict) - context1 = dispatch.summary_dicts["AOMIC_condition_variables"] - self.assertIsInstance(context1, HedTypeSummary) - text_summary1 = context1.get_text_summary() - self.assertIsInstance(text_summary1, dict) - sum_op.do_op(dispatch, dispatch.prep_data(df), "new_events", sidecar=sidecar) - context2 = dispatch.summary_dicts["AOMIC_condition_variables"] - text_summary2 = context2.get_text_summary() - self.assertIsInstance(text_summary2, dict) - self.assertEqual(len(text_summary1["Individual files"]), 1) - self.assertEqual(len(text_summary2["Individual files"]), 2) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py deleted file mode 100644 index f15bb023..00000000 --- a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py +++ /dev/null @@ -1,154 +0,0 @@ -import json -import os -import unittest -import pandas as pd -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_hed_validation_op import SummarizeHedValidationOp, HedValidationSummary -from hed.errors import error_reporter - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests/")) - cls.data_path = os.path.realpath(os.path.join(path, "sub-002_task-FacePerception_run-1_events.tsv")) - cls.json_path = os.path.realpath(os.path.join(path, "task-FacePerception_events.json")) - cls.bad_json_path = os.path.realpath(os.path.join(path, "task-FacePerceptionMissingDefs_events.json")) - cls.sample_sidecar_path = os.path.realpath(os.path.join(path, "task-stopsignal_acq-seq_events.json")) - cls.sample_data = [ - [0.0776, 0.5083, "baloney", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - base_parameters = { - "summary_name": "summarize_hed_validation_errors", - "summary_filename": "summarize_hed_validation_errors", - "check_for_warnings": True, - } - cls.json_parms = json.dumps(base_parameters) - - @classmethod - def tearDownClass(cls): - pass - - def test_constructor(self): - parms = json.loads(self.json_parms) - sum_op1 = SummarizeHedValidationOp(parms) - self.assertIsInstance(sum_op1, SummarizeHedValidationOp, "constructor creates an object of the correct type") - - def test_do_op(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeHedValidationOp(parms) - self.assertIsInstance(sum_op, SummarizeHedValidationOp, "constructor creates an object of the correct type") - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - self.assertIn(sum_op.summary_name, dispatch.summary_dicts) - self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], HedValidationSummary) - sub1 = dispatch.summary_dicts[sum_op.summary_name].summary_dict["subj2_run1"] - self.assertEqual(len(sub1["event_issues"]), 1) - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run2", sidecar=self.json_path) - self.assertEqual(len(sub1["event_issues"]), 1) - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run3", sidecar=self.bad_json_path) - self.assertEqual(len(dispatch.summary_dicts[sum_op.summary_name].summary_dict), 3) - run3 = dispatch.summary_dicts[sum_op.summary_name].summary_dict["subj2_run3"] - self.assertEqual(run3["total_sidecar_issues"], 4) - - def test_get_summary_details(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeHedValidationOp(parms) - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run1", sidecar=self.json_path) - sum_context = dispatch.summary_dicts[sum_op.summary_name] - sum_obj1 = sum_context.get_summary_details() - self.assertIsInstance(sum_obj1, dict) - error_reporter.replace_tag_references(sum_obj1) - json_str1 = json.dumps(sum_obj1, indent=4) - self.assertIsInstance(json_str1, str) - json_obj1 = json.loads(json_str1) - self.assertIsInstance(json_obj1, dict) - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run2", sidecar=self.json_path) - sum_context2 = dispatch.summary_dicts[sum_op.summary_name] - sum_obj2 = sum_context2.get_summary_details() - error_reporter.replace_tag_references(sum_obj2) - json_str2 = json.dumps(sum_obj2, indent=4) - self.assertIsInstance(json_str2, str) - sum_obj3 = sum_context2.get_summary_details(include_individual=False) - self.assertFalse(sum_obj3["Individual files"]) - sum_op.do_op(dispatch, dispatch.prep_data(df), "subj2_run4", sidecar=self.bad_json_path) - sum_obj4 = sum_context2.get_summary_details(include_individual=True) - self.assertIsInstance(sum_obj4, dict) - - def test_get_summary(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeHedValidationOp(parms) - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - df = dispatch.prep_data(df) - sum_op.do_op(dispatch, df, "subj2_run1", sidecar=self.bad_json_path) - - context = dispatch.summary_dicts[sum_op.summary_name] - sum1a = context.get_summary(individual_summaries="separate") - self.assertEqual(len(sum1a["Dataset"]["Overall summary"]["Files"]), 1) - self.assertEqual(sum1a["Dataset"]["Overall summary"]["Files"][0], "subj2_run1") - self.assertEqual(len(sum1a["Dataset"]["Overall summary"]), 5) - sum2a = context.get_summary(individual_summaries="separate") - self.assertIsInstance(sum2a["Individual files"]["subj2_run1"], dict) - sum_op.do_op(dispatch, df, "subj2_run2", sidecar=self.json_path) - sum_op.do_op(dispatch, df, "subj2_run3", sidecar=self.bad_json_path) - sum3a = context.get_summary(individual_summaries="none") - self.assertIsInstance(sum3a, dict) - self.assertFalse(sum3a["Individual files"]) - self.assertEqual(len(sum3a["Dataset"]["Overall summary"]["Files"]), 3) - sum3b = context.get_summary(individual_summaries="consolidated") - self.assertEqual(len(sum3b["Individual files"]), 3) - self.assertEqual(sum3b["Dataset"]["Overall summary"]["Total files"], 3) - self.assertIsInstance(sum3b, dict) - - def test_get_text_summary(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - parms = json.loads(self.json_parms) - sum_op = SummarizeHedValidationOp(parms) - df = pd.read_csv(self.data_path, delimiter="\t", header=0, keep_default_na=False, na_values=",null") - df = dispatch.prep_data(df) - sum_op.do_op(dispatch, df, "subj2_run1", sidecar=self.bad_json_path) - context = dispatch.summary_dicts[sum_op.summary_name] - text_sum1 = context.get_text_summary(individual_summaries="separate") - self.assertEqual(len(text_sum1), 2) - sum_op.do_op(dispatch, df, "subj2_run2", sidecar=self.json_path) - sum_op.do_op(dispatch, df, "subj2_run3", sidecar=self.bad_json_path) - text_sum2 = context.get_text_summary(individual_summaries="none") - text_sum3 = context.get_text_summary(individual_summaries="consolidated") - self.assertIsInstance(text_sum3, dict) - self.assertIsInstance(text_sum2, dict) - self.assertEqual(len(text_sum2), 1) - self.assertEqual(len(text_sum3), 1) - - def test_with_sample_data(self): - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - parms = json.loads(self.json_parms) - sum_op = SummarizeHedValidationOp(parms) - sum_op.do_op(dispatch, df, "sub-0013_task-stopsignal_acq-seq_events.tsv", sidecar=self.sample_sidecar_path) - sum_context1 = dispatch.summary_dicts[sum_op.summary_name] - self.assertIsInstance(sum_context1, HedValidationSummary) - self.assertEqual(len(sum_context1.summary_dict), 1) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_sidecar_from_events_op.py b/tests/tools/remodeling/operations/test_summarize_sidecar_from_events_op.py deleted file mode 100644 index 27ae3f57..00000000 --- a/tests/tools/remodeling/operations/test_summarize_sidecar_from_events_op.py +++ /dev/null @@ -1,100 +0,0 @@ -import os -import pandas as pd -import unittest -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.summarize_sidecar_from_events_op import ( - EventsToSidecarSummary, - SummarizeSidecarFromEventsOp, -) - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - cls.base_parameters = { - "summary_name": "extracted_json", - "summary_filename": "extracted_json", - "skip_columns": ["onset", "duration"], - "value_columns": ["response_time", "stop_signal_delay"], - } - - cls.data_root = os.path.realpath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../data/remodel_tests") - ) - - @classmethod - def tearDownClass(cls): - pass - - def test_constructor(self): - sum_op = SummarizeSidecarFromEventsOp(self.base_parameters) - self.assertIsInstance(sum_op, SummarizeSidecarFromEventsOp, "constructor creates an object of the correct type") - - def test_do_ops(self): - sum_op = SummarizeSidecarFromEventsOp(self.base_parameters) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - df1a = pd.DataFrame(self.sample_data, columns=self.sample_columns) - sum_op.do_op(dispatch, dispatch.prep_data(df1), "name1") - context1 = dispatch.summary_dicts.get(self.base_parameters["summary_name"], None) - summary = context1.summary_dict["name1"] - cat_len = len(summary.categorical_info) - cat_base = ( - len(self.sample_columns) - len(self.base_parameters["skip_columns"]) - len(self.base_parameters["value_columns"]) - ) - self.assertEqual(cat_len, cat_base, "do_ops has right number of categorical columns") - sum_op.do_op(dispatch, dispatch.prep_data(df1a), "name1") - self.assertEqual(len(df1.columns), len(self.sample_columns), "do_ops updating does not change number columns.") - sum_op.do_op(dispatch, dispatch.prep_data(df1a), "name2") - - def test_get_summary(self): - sum_op = SummarizeSidecarFromEventsOp(self.base_parameters) - dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - df1 = pd.DataFrame(self.sample_data, columns=self.sample_columns) - sum_op.do_op(dispatch, dispatch.prep_data(df1), "name1") - context1 = dispatch.summary_dicts.get(self.base_parameters["summary_name"], None) - self.assertIsInstance(context1, EventsToSidecarSummary, "get_summary testing EventsToSidecarSummary") - summary1 = context1.get_summary() - self.assertIsInstance(summary1, dict, "get_summary returns a dictionary by default") - self.assertIsInstance(summary1["Dataset"], dict) - self.assertEqual(len(summary1["Individual files"]), 1) - summary2 = context1.get_summary() - self.assertIsInstance(summary2, dict, "get_summary returns a dictionary by default") - self.assertIsInstance(summary2["Dataset"], dict) - self.assertIsInstance(summary2["Individual files"]["name1"], dict) - summary_text3 = context1.get_text_summary(individual_summaries="none") - self.assertIsInstance(summary_text3, dict, "get_text_summary returns a str if verbose is False") - self.assertNotIn("Individual files", summary_text3) - summary_text4 = context1.get_text_summary(individual_summaries="consolidated") - self.assertIsInstance(summary_text4, dict) - summary_text5 = context1.get_text_summary(individual_summaries="separate") - self.assertIsInstance(summary_text5, dict) - self.assertGreater(len(summary_text4["Dataset"]), len(summary_text5["Dataset"])) - sum_op.do_op(dispatch, dispatch.prep_data(df1), "name2") - context2 = dispatch.summary_dicts.get(self.base_parameters["summary_name"], None) - self.assertIsInstance(context1, EventsToSidecarSummary, "get_summary testing EventsToSidecarSummary") - summary_text6 = context2.get_text_summary(individual_summaries="separate") - self.assertIsInstance(summary_text6, dict) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/test_backup_manager.py b/tests/tools/remodeling/test_backup_manager.py deleted file mode 100644 index 393062b8..00000000 --- a/tests/tools/remodeling/test_backup_manager.py +++ /dev/null @@ -1,146 +0,0 @@ -import os -import io -import shutil -import unittest -from unittest.mock import patch -import zipfile -from hed.errors import HedFileError -from hed.tools.remodeling.backup_manager import BackupManager -from hed.tools.util.io_util import get_file_list - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - file_list = ["top_level.tsv", "sub1/sub1_events.tsv", "sub2/sub2_events.tsv", "sub2/sub2_next_events.tsv"] - cls.file_list = file_list - cls.extract_path = os.path.realpath(os.path.join(os.path.dirname(__file__), "../../data/remodel_tests")) - test_root = os.path.realpath(os.path.join(os.path.dirname(__file__), "../../data/remodel_tests/test_root")) - cls.test_root = test_root - cls.test_paths = [os.path.join(test_root, file) for file in file_list] - cls.test_zip = os.path.realpath(os.path.join(os.path.dirname(__file__), "../../data/remodel_tests/test_root.zip")) - - test_root_back1 = os.path.realpath(os.path.join(os.path.dirname(__file__), "../../data/remodel_tests/test_root_back1")) - cls.test_root_back1 = test_root_back1 - cls.test_paths_back1 = [os.path.join(test_root_back1, file) for file in file_list] - cls.test_zip_back1 = os.path.realpath( - os.path.join(os.path.dirname(__file__), "../../data/remodel_tests/test_root_back1.zip") - ) - - test_root_bad = os.path.realpath(os.path.join(os.path.dirname(__file__), "../../data/remodel_tests/test_root_bad")) - cls.test_root_bad = test_root_bad - cls.test_root_bad_backups = os.path.join(test_root_bad, BackupManager.RELATIVE_BACKUP_LOCATION) - cls.test_paths_bad = [os.path.join(test_root_bad, file) for file in file_list] - cls.test_zip_bad = os.path.realpath( - os.path.join(os.path.dirname(__file__), "../../data/remodel_tests/test_root_bad.zip") - ) - - def setUp(self): - with zipfile.ZipFile(self.test_zip, "r") as zip_ref: - zip_ref.extractall(self.extract_path) - with zipfile.ZipFile(self.test_zip_back1, "r") as zip_ref: - zip_ref.extractall(self.extract_path) - with zipfile.ZipFile(self.test_zip_bad, "r") as zip_ref: - zip_ref.extractall(self.extract_path) - - def tearDown(self): - shutil.rmtree(self.test_root) - shutil.rmtree(self.test_root_back1) - shutil.rmtree(self.test_root_bad) - - @classmethod - def tearDownClass(cls): - pass - - def test_constructor(self): - back1_man = BackupManager(self.test_root_back1) - self.assertIsInstance(back1_man, BackupManager, "constructor creates a BackupManager if no backups") - self.assertTrue(back1_man.backups_dict) - - def test_constructor_alternative_location(self): - alt_path = os.path.realpath(os.path.join(self.extract_path, "temp_backs")) - back1_man = BackupManager(self.test_root_back1, backups_root=alt_path) - self.assertIsInstance(back1_man, BackupManager, "constructor creates a BackupManager if no backups") - self.assertFalse(back1_man.backups_dict) - file_list = get_file_list( - self.test_root_back1, name_suffix="events", exclude_dirs=["derivatives"], extensions=[".tsv"] - ) - self.assertEqual(len(file_list), 3) - back1_man.create_backup(file_list, backup_name="my_back") - self.assertTrue(back1_man.backups_dict) - backup = back1_man.backups_dict["my_back"] - self.assertEqual(len(backup), len(file_list)) - if os.path.exists(alt_path): - shutil.rmtree(alt_path) - - def test_bad_data_root(self): - with self.assertRaises(HedFileError) as context: - BackupManager("/baloney/Junk") - self.assertEqual(context.exception.args[0], "NonExistentData") - - def test_constructor_missing_backup(self): - remove_list = ["back2_miss_json", "back3_miss_back", "back4_miss_file"] - remove_dirs = [os.path.join(self.test_root_bad_backups, file) for file in remove_list] - for remove_dir in remove_dirs: - shutil.rmtree(remove_dir) - with self.assertRaises(HedFileError) as context: - BackupManager(self.test_root_bad) - self.assertEqual(context.exception.code, "MissingBackupFile") - - def test_constructor_missing_json(self): - remove_list = ["back1_extra", "back3_miss_back", "back4_miss_file"] - remove_dirs = [os.path.realpath(os.path.join(self.test_root_bad_backups, file)) for file in remove_list] - for remove_dir in remove_dirs: - shutil.rmtree(remove_dir) - with self.assertRaises(HedFileError) as context: - BackupManager(self.test_root_bad) - self.assertEqual(context.exception.code, "BadBackupFormat") - - def test_constructor_extra_backup_file(self): - remove_list = ["back1_extra", "back2_miss_json", "back4_miss_file"] - remove_dirs = [os.path.realpath(os.path.join(self.test_root_bad_backups, file)) for file in remove_list] - for remove_dir in remove_dirs: - shutil.rmtree(remove_dir) - with self.assertRaises(HedFileError) as context: - BackupManager(self.test_root_bad) - self.assertEqual(context.exception.code, "BadBackupFormat") - - def test_create_backup(self): - test_man = BackupManager(self.test_root) - file_list = get_file_list(self.test_root) - self.assertFalse(test_man.get_backup("test_back1"), "create_backup doesn't have the backup before creation") - return_val1 = test_man.create_backup(file_list, backup_name="test_back1", verbose=False) - self.assertTrue(return_val1, "create_backup returns true when it has created a backup.") - backup1 = test_man.get_backup("test_back1") - self.assertIsInstance(backup1, dict, "create_backup creates a dictionary") - return_val2 = test_man.create_backup(file_list, backup_name="test_back1", verbose=False) - self.assertFalse(return_val2, "create_backup returns true when it has created a backup.") - - def test_create_backup_no_name(self): - test_man = BackupManager(self.test_root) - self.assertFalse(test_man.backups_dict) - file_list = get_file_list(self.test_root) - with patch("sys.stdout", new=io.StringIO()) as fp1: - return_val1 = test_man.create_backup(file_list, verbose=False) - self.assertFalse(fp1.getvalue()) - self.assertTrue(return_val1, "create_backup returns true when it has created a backup.") - backup1 = test_man.get_backup("test_back1") - self.assertFalse(backup1) - back = test_man.get_backup(BackupManager.DEFAULT_BACKUP_NAME) - self.assertIsInstance(back, dict, "create_backup creates a dictionary") - self.assertTrue(back) - return_val2 = test_man.create_backup(file_list, backup_name="test_back1", verbose=False) - self.assertTrue(return_val2, "create_backup returns true when it has created a backup.") - - def test_get_task(self): - task1 = BackupManager.get_task(["abc", "def"], "temp/myabc.txt") - self.assertFalse(task1) - task2 = BackupManager.get_task([], "temp/myabc.txt") - self.assertFalse(task2) - task3 = BackupManager.get_task(["abc", "def"], "temp/alpha_key_task_abc.txt") - self.assertEqual(task3, "abc") - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/test_dispatcher.py b/tests/tools/remodeling/test_dispatcher.py deleted file mode 100644 index 84c94edc..00000000 --- a/tests/tools/remodeling/test_dispatcher.py +++ /dev/null @@ -1,189 +0,0 @@ -import os -import json -import shutil -import unittest -import pandas as pd -import numpy as np -import zipfile -from hed.errors.exceptions import HedFileError -from hed.tools.remodeling.dispatcher import Dispatcher -from hed.tools.remodeling.operations.base_op import BaseOp -from hed.tools.util.io_util import get_file_list - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - data_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/remodel_tests")) - cls.sample_data = [ - [0.0776, 0.5083, "go", "n/a", 0.565, "correct", "right", "female"], - [5.5774, 0.5083, "unsuccesful_stop", 0.2, 0.49, "correct", "right", "female"], - [9.5856, 0.5084, "go", "n/a", 0.45, "correct", "right", "female"], - [13.5939, 0.5083, "succesful_stop", 0.2, "n/a", "n/a", "n/a", "female"], - [17.1021, 0.5083, "unsuccesful_stop", 0.25, 0.633, "correct", "left", "male"], - [21.6103, 0.5083, "go", "n/a", 0.443, "correct", "left", "male"], - ] - cls.sample_columns = [ - "onset", - "duration", - "trial_type", - "stop_signal_delay", - "response_time", - "response_accuracy", - "response_hand", - "sex", - ] - cls.data_path = data_path - cls.file_path = os.path.realpath(os.path.join(data_path, "aomic_sub-0013_excerpt_events.tsv")) - cls.test_zip_back1 = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../data/remodel_tests/test_root_back1.zip" - ) - cls.test_root_back1 = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../data/remodel_tests/test_root_back1" - ) - cls.summarize_model = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "../../data/remodel_tests/test_root1_summarize_column_value_rmdl.json" - ) - cls.summarize_excerpt = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "../../data/remodel_tests/aomic_sub-0013_before_after_reorder_rmdl.json", - ) - - def setUp(self): - with zipfile.ZipFile(self.test_zip_back1, "r") as zip_ref: - zip_ref.extractall(self.data_path) - - def tearDown(self): - if os.path.exists(self.test_root_back1): - shutil.rmtree(self.test_root_back1) - - def test_dispatcher_constructor(self): - model_path1 = os.path.join(self.data_path, "simple_reorder_rmdl.json") - with open(model_path1) as fp: - model1 = json.load(fp) - dispatch = Dispatcher(model1) - self.assertEqual( - len(dispatch.parsed_ops), len(model1), "dispatcher operation list should have one item for each operation" - ) - - def test_constructor_empty_operations(self): - disp = Dispatcher([], data_root=None, backup_name=None, hed_versions=["8.1.0"]) - self.assertIsInstance(disp, Dispatcher, "") - self.assertFalse(disp.parsed_ops, "constructor empty operations list has empty parsed ops") - - def test_get_data_file(self): - model_path1 = os.path.join(self.data_path, "simple_reorder_rmdl.json") - with open(model_path1) as fp: - model1 = json.load(fp) - sidecar_file = os.path.realpath(os.path.join(self.data_path, "task-FacePerception_events.json")) - dispatch = Dispatcher(model1) - with self.assertRaises(HedFileError) as context: - dispatch.get_data_file(sidecar_file) - self.assertEqual(context.exception.code, "BadDataFile") - - def test_get_summary_save_dir(self): - model_path1 = os.path.join(self.data_path, "simple_reorder_rmdl.json") - with open(model_path1) as fp: - model1 = json.load(fp) - dispatch1 = Dispatcher(model1, data_root=self.test_root_back1, backup_name="back1") - summary_path = dispatch1.get_summary_save_dir() - self.assertEqual( - summary_path, - os.path.realpath(os.path.join(self.test_root_back1, "derivatives", Dispatcher.REMODELING_SUMMARY_PATH)), - ) - dispatch2 = Dispatcher(model1) - with self.assertRaises(HedFileError) as context: - dispatch2.get_summary_save_dir() - self.assertEqual(context.exception.code, "NoDataRoot") - - def test_parse_operation_list(self): - test = [ - { - "operation": "remove_rows", - "parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}, - }, - {"operation": "remove_rows", "parameters": {"column_name": "response_time", "remove_values": ["n/a"]}}, - ] - dispatch = Dispatcher(test) - parsed_ops = dispatch.parsed_ops - self.assertEqual(len(parsed_ops), len(test), "dispatch has a operation for each item in operation list") - for item in parsed_ops: - self.assertIsInstance(item, BaseOp) - - def test_run_operations(self): - model_path1 = os.path.join(self.data_path, "simple_reorder_rmdl.json") - with open(model_path1) as fp: - model1 = json.load(fp) - dispatch = Dispatcher(model1) - df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - num_test_rows = len(df_test) - df_test_values = df_test.to_numpy() - df_new = dispatch.run_operations(self.file_path) - reordered_columns = ["onset", "duration", "trial_type", "response_time"] - self.assertTrue(reordered_columns == list(df_new.columns), "run_operations resulting df should have correct columns") - self.assertTrue(list(df_test.columns) == self.sample_columns, "run_operations did not change the input df columns") - self.assertEqual(len(df_test), num_test_rows, "run_operations did not change the input df rows") - self.assertFalse( - np.array_equal(df_test_values, df_test.to_numpy), "run_operations does not change the values in the input df" - ) - self.assertEqual(len(df_new), num_test_rows, "run_operations did not change the number of output rows") - self.assertEqual( - len(dispatch.parsed_ops), len(model1), "dispatcher operation list should have one item for each operation" - ) - - def test_run_operations_hed(self): - events_path = os.path.realpath(os.path.join(self.data_path, "sub-002_task-FacePerception_run-1_events.tsv")) - sidecar_path = os.path.realpath(os.path.join(self.data_path, "task-FacePerception_events.json")) - op_list = [ - { - "operation": "factor_hed_type", - "description": "Test run", - "parameters": {"type_tag": "Condition-variable", "type_values": []}, - } - ] - dispatch = Dispatcher(op_list, hed_versions=["8.1.0"]) - df = dispatch.run_operations(events_path, sidecar=sidecar_path, verbose=False) - self.assertIsInstance(df, pd.DataFrame) - self.assertEqual(len(df), 200) - self.assertEqual(len(df.columns), 17) - self.assertIn("key-assignment.right-sym-cond", df.columns) - - def test_save_summaries(self): - with open(self.summarize_model) as fp: - model1 = json.load(fp) - dispatch1 = Dispatcher(model1, data_root=self.test_root_back1, backup_name="back1") - file_list = get_file_list( - self.test_root_back1, name_suffix="events", extensions=[".tsv"], exclude_dirs=["derivatives"] - ) - for file in file_list: - dispatch1.run_operations(file) - summary_path = dispatch1.get_summary_save_dir() - self.assertFalse(os.path.exists(summary_path)) - dispatch1.save_summaries() - self.assertTrue(os.path.exists(summary_path)) - file_list1 = os.listdir(summary_path) - self.assertEqual(2, len(file_list1), "save_summaries creates correct number of summary files when run.") - dispatch1.save_summaries(save_formats=[]) - dir_list2 = os.listdir(summary_path) - self.assertEqual(2, len(dir_list2), "save both summaries") - path_before = os.path.realpath(os.path.join(summary_path, "test summary_values_before")) - file_list2 = [f for f in os.listdir(path_before) if os.path.isfile(os.path.join(path_before, f))] - self.assertEqual(2, len(file_list2)) - dispatch1.save_summaries(task_name="task-blech") - file_list3 = [f for f in os.listdir(path_before) if os.path.isfile(os.path.join(path_before, f))] - self.assertEqual(4, len(file_list3), "saving with task has different name than without") - - def test_get_summaries(self): - with open(self.summarize_excerpt) as fp: - model1 = json.load(fp) - dispatch = Dispatcher(model1) - df_new = dispatch.run_operations(self.file_path) - self.assertIsInstance(df_new, pd.DataFrame) - summaries = dispatch.get_summaries(file_formats=[".txt", ".json", ".tsv"]) - self.assertIsInstance(summaries, list) - # self.assertEqual(len(summaries), 4) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py deleted file mode 100644 index fca52023..00000000 --- a/tests/tools/remodeling/test_validator.py +++ /dev/null @@ -1,217 +0,0 @@ -import os -import json -import unittest -from copy import deepcopy -from hed.tools.remodeling.remodeler_validator import RemodelerValidator - - -class Test(unittest.TestCase): - - @classmethod - def setUpClass(cls): - with open( - os.path.realpath( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), "..", "../data/remodel_tests/all_remodel_operations.json" - ) - ) - ) as f: - cls.remodel_file = json.load(f) - cls.validator = RemodelerValidator() - - @classmethod - def tearDownClass(cls): - pass - - def test_validator_build(self): - pass - - def test_validate_valid(self): - error_strings = self.validator.validate(self.remodel_file) - self.assertFalse(error_strings) - - def test_validate_array(self): - wrong_input_type = {"operation": "remove_columns"} - error_strings = self.validator.validate(wrong_input_type) - self.assertEqual( - error_strings[0], "Operations must be contained in a list or array. " + "This is also true for a single operation." - ) - - no_operations = [] - error_strings = self.validator.validate(no_operations) - self.assertEqual( - error_strings[0], "There are no operations defined. Specify at least 1 operation for the remodeler to execute." - ) - - def test_validate_operations(self): - invalid_operation_type = ["string"] - error_strings = self.validator.validate(invalid_operation_type) - self.assertEqual( - error_strings[0], "Each operation must be defined in a dictionary: " + "string is not a dictionary object." - ) - - invalid_operation_missing = [self.remodel_file[0].copy()] - del invalid_operation_missing[0]["description"] - error_strings = self.validator.validate(invalid_operation_missing) - self.assertEqual( - error_strings[0], - "Operation dictionary 1 is missing 'description'. " - + "Every operation dictionary must specify the type of operation, a description, " - + "and the operation parameters.", - ) - - invalid_operation_name = [self.remodel_file[0].copy()] - invalid_operation_name[0]["operation"] = "unlisted_operation" - error_strings = self.validator.validate(invalid_operation_name) - self.assertEqual( - error_strings[0], - "unlisted_operation is not a known remodeler operation. " + "See the documentation for valid operations.", - ) - - def test_validate_parameters(self): - missing_parameter = [deepcopy(self.remodel_file[0])] - del missing_parameter[0]["parameters"]["column_names"] - error_strings = self.validator.validate(missing_parameter) - self.assertEqual( - error_strings[0], - "Operation 1: The parameter column_names is missing. " + "column_names is a required parameter of remove_columns.", - ) - - missing_parameter_nested = [deepcopy(self.remodel_file[10])] - del missing_parameter_nested[0]["parameters"]["new_events"]["response"]["onset_source"] - error_strings = self.validator.validate(missing_parameter_nested) - self.assertEqual( - error_strings[0], - "Operation 1: The field onset_source is missing in response new_events. " - + "onset_source is a required parameter of response new_events.", - ) - - invalid_parameter = [deepcopy(self.remodel_file[0])] - invalid_parameter[0]["parameters"]["invalid"] = "invalid_value" - error_strings = self.validator.validate(invalid_parameter) - self.assertEqual( - error_strings[0], - "Operation 1: Operation parameters for remove_columns " + "contain an unexpected field 'invalid'.", - ) - - invalid_parameter_nested = [deepcopy(self.remodel_file[10])] - invalid_parameter_nested[0]["parameters"]["new_events"]["response"]["invalid"] = "invalid_value" - error_strings = self.validator.validate(invalid_parameter_nested) - self.assertEqual( - error_strings[0], - "Operation 1: Operation parameters for response " + "new_events contain an unexpected field 'invalid'.", - ) - - invalid_type = [deepcopy(self.remodel_file[0])] - invalid_type[0]["parameters"]["column_names"] = 0 - error_strings = self.validator.validate(invalid_type) - self.assertEqual( - error_strings[0], - "Operation 1: The value of column_names in the remove_columns operation " + "should be array. 0 is not a array.", - ) - - invalid_type_nested = [deepcopy(self.remodel_file[10])] - invalid_type_nested[0]["parameters"]["new_events"]["response"]["onset_source"] = {"key": "value"} - error_strings = self.validator.validate(invalid_type_nested) - self.assertEqual( - error_strings[0], - "Operation 1: The value of onset_source response new_events " - + "in the split_rows operation should be array. {'key': 'value'} is not a array.", - ) - - empty_array = [deepcopy(self.remodel_file[0])] - empty_array[0]["parameters"]["column_names"] = [] - error_strings = self.validator.validate(empty_array) - self.assertEqual( - error_strings[0], - "Operation 1: The list in column_names in the remove_columns " + "operation should have at least 1 item(s).", - ) - - empty_array_nested = [deepcopy(self.remodel_file[5])] - empty_array_nested[0]["parameters"]["map_list"][0] = [] - error_strings = self.validator.validate(empty_array_nested) - self.assertEqual( - error_strings[0], - "Operation 1: The list in item 1 map_list in the remap_columns " + "operation should have at least 1 item(s).", - ) - - # invalid_value = [deepcopy(self.remodel_file[18])] - # invalid_value[0]["parameters"]["convert_to"] = "invalid_value" - # error_strings = validator.validate(invalid_value) - # self.assertEqual(error_strings[0], "Operation 1: Operation parameter convert_to, in the " + - # "convert_columns operation, contains and unexpected value. " + - # "Value should be one of ['str', 'int', 'float', 'fixed'].") - - # value_dependency = [deepcopy(self.remodel_file[18])] - # value_dependency[0]["parameters"]["convert_to"] = "fixed" - # error_strings = validator.validate(value_dependency) - # self.assertEqual(error_strings[0], "Operation 1: The parameter decimal_places is missing. " + - # " The decimal_places is a required parameter of convert_columns.") - - property_dependency = [deepcopy(self.remodel_file[1])] - del property_dependency[0]["parameters"]["factor_values"] - error_strings = self.validator.validate(property_dependency) - self.assertEqual( - error_strings[0], - "Operation 1: The parameter factor_names is missing: " - + "factor_names is a required parameter of factor_column when ['factor_values'] is specified.", - ) - - double_item_in_array = [deepcopy(self.remodel_file[0])] - double_item_in_array[0]["parameters"]["column_names"] = ["response", "response"] - error_strings = self.validator.validate(double_item_in_array) - self.assertEqual( - error_strings[0], - "Operation 1: The list in column_names in the remove_columns " + "operation should only contain unique items.", - ) - - double_item_in_array_nested = [deepcopy(self.remodel_file[10])] - double_item_in_array_nested[0]["parameters"]["new_events"]["response"]["copy_columns"] = ["response", "response"] - error_strings = self.validator.validate(double_item_in_array_nested) - self.assertEqual( - error_strings[0], - "Operation 1: The list in copy_columns response new_events in the split_rows " - + "operation should only contain unique items.", - ) - - def test_validate_parameter_data(self): - factor_column_validate = [deepcopy(self.remodel_file)[1]] - factor_column_validate[0]["parameters"]["factor_names"] = ["stopped"] - error_strings = self.validator.validate(factor_column_validate) - self.assertEqual( - error_strings[0], "Operation 1 (factor_column): factor_names must be " + "same length as factor_values" - ) - - factor_hed_tags_validate = [deepcopy(self.remodel_file)[2]] - factor_hed_tags_validate[0]["parameters"]["query_names"] = ["correct"] - error_strings = self.validator.validate(factor_hed_tags_validate) - self.assertEqual( - error_strings[0], - "Operation 1 (factor_hed_tags): QueryNamesLengthBad: " - + "The query_names length 1 must be empty or equal to the queries length 2.", - ) - - merge_consecutive_validate = [deepcopy(self.remodel_file)[4]] - merge_consecutive_validate[0]["parameters"]["match_columns"].append("trial_type") - error_strings = self.validator.validate(merge_consecutive_validate) - self.assertEqual( - error_strings[0], "Operation 1 (merge_consecutive): column_name `trial_type` " + "cannot be a match_column." - ) - - remap_columns_validate_same_length = [deepcopy(self.remodel_file)[5]] - remap_columns_validate_same_length[0]["parameters"]["map_list"][0] = [""] - error_strings = self.validator.validate(remap_columns_validate_same_length) - self.assertEqual(error_strings[0], "Operation 1 (remap_columns): all map_list arrays must be of length 3.") - - remap_columns_validate_right_length = [deepcopy(self.remodel_file[5])] - remap_columns_validate_right_length[0]["parameters"]["map_list"] = [["string1", "string2"], ["string3", "string4"]] - error_strings = self.validator.validate(remap_columns_validate_right_length) - self.assertEqual(error_strings[0], "Operation 1 (remap_columns): all map_list arrays must be of length 3.") - - remap_columns_integer_sources = [deepcopy(self.remodel_file[5])] - remap_columns_integer_sources[0]["parameters"]["integer_sources"] = ["unknown_column"] - error_strings = self.validator.validate(remap_columns_integer_sources) - self.assertEqual( - error_strings[0], - "Operation 1 (remap_columns): the integer_sources {'unknown_column'} " + "are missing from source_columns.", - ) diff --git a/tests/tools/util/test_data_util.py b/tests/tools/util/test_data_util.py index f4c2b7e9..8aee7d6a 100644 --- a/tests/tools/util/test_data_util.py +++ b/tests/tools/util/test_data_util.py @@ -25,7 +25,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/remodel_tests") + curation_base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../data/other_tests") cls.stern_map_path = os.path.join(curation_base_dir, "sternberg_map.tsv") cls.stern_test1_path = os.path.join(curation_base_dir, "sternberg_test_events.tsv") cls.stern_test2_path = os.path.join(curation_base_dir, "sternberg_with_quotes_events.tsv")