From 228c945be418197378461c48c352b321a75c2dc4 Mon Sep 17 00:00:00 2001 From: aparnabg Date: Wed, 24 Sep 2025 21:01:53 +0530 Subject: [PATCH 01/36] add bids code --- src/new_code/bids.py | 1004 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1004 insertions(+) create mode 100644 src/new_code/bids.py diff --git a/src/new_code/bids.py b/src/new_code/bids.py new file mode 100644 index 0000000..e0b05b7 --- /dev/null +++ b/src/new_code/bids.py @@ -0,0 +1,1004 @@ +# Standard library imports +import json +import os +import re +import shutil +import subprocess +import sys +import time +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union + +# Third-party imports +import pandas as pd +import numpy as np +import cv2 + +def safe_print(message: str): + """Print with timestamps.""" + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"{timestamp} [MAIN] {message}") + +# Helper functions +def parse_duration(duration_str) -> float: + """Parse duration string to seconds""" + try: + if pd.isna(duration_str) or duration_str == '': + return 0.0 + duration_str = str(duration_str) + if ':' in duration_str: + parts = duration_str.split(':') + if len(parts) == 3: + hours = int(parts[0]) + minutes = int(parts[1]) + seconds = float(parts[2]) + return hours * 3600 + minutes * 60 + seconds + elif len(parts) == 2: + minutes = int(parts[0]) + seconds = float(parts[1]) + return minutes * 60 + seconds + return float(duration_str) + except: + return 0.0 + +def make_bids_task_label(task_name): + """Convert TaskName to BIDS-compatible task label for filenames.""" + s = str(task_name).strip() + s = re.sub(r'[^0-9a-zA-Z+]', '', s) # Keep only alphanumeric and + + return s + +def get_video_properties(video_path): + """Extract video properties using OpenCV""" + try: + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + return {"SamplingFrequency": None, "Resolution": None} + + fps = cap.get(cv2.CAP_PROP_FPS) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap.release() + + return { + "SamplingFrequency": fps, + "Resolution": f"{width}x{height}", + } + except: + return {"SamplingFrequency": None, "Resolution": None} + +def determine_session_from_folder(folder_name: str) -> Optional[str]: + """Determine session ID from folder names with spaces.""" + folder_lower = folder_name.lower() + + # Check for 12-16 month patterns (including spaces and variations) + if any(pattern in folder_lower for pattern in [ + '12-16 month', '12-14 month', '12_16', '12_14', '12-16month', '12-14month' + ]): + return "01" + + # Check for 34-38 month patterns (including spaces, typos, and variations) + elif any(pattern in folder_lower for pattern in [ + '34-38 month', '34-28 month', '34-48 month', '34_38', '34_28', '34_48', + '34-38month', '34-28month', '34-48month' + ]): + return "02" + + return None + +def find_age_folder_session(current_path: str, participant_path: str) -> Optional[str]: + """Recursively check if current path or any parent path contains age-related folder pattern.""" + if not current_path.startswith(participant_path) or current_path == participant_path: + return None + + current_folder = os.path.basename(current_path) + session_id = determine_session_from_folder(current_folder) + if session_id: + return session_id + + parent_path = os.path.dirname(current_path) + return find_age_folder_session(parent_path, participant_path) + +def find_all_videos_recursive(directory: str, participant_path: str) -> List[Tuple[str, Optional[str]]]: + """Recursively find all video files in a directory and determine their session.""" + videos = [] + + try: + for item in os.listdir(directory): + if item.startswith('.'): # Skip hidden files + continue + + item_path = os.path.join(directory, item) + + if os.path.isfile(item_path): + if item.lower().endswith(('.mp4', '.mov', '.avi', '.mkv', '.m4v', '.3gp', '.mts')): + session_id = find_age_folder_session(directory, participant_path) + videos.append((item_path, session_id)) + + elif os.path.isdir(item_path): + videos.extend(find_all_videos_recursive(item_path, participant_path)) + + except PermissionError: + print(f"Permission denied: {directory}") + except Exception as e: + print(f"Error accessing {directory}: {e}") + + return videos + +def extract_participant_id_from_folder(folder_name: str) -> str: + """Extract participant ID from folder names like 'A.A._Home_Videos_AMES_A2P7X9N8L7'.""" + if 'AMES_' in folder_name: + parts = folder_name.split('AMES_') + if len(parts) > 1: + return parts[1].strip() + + if '_' in folder_name: + return folder_name.split('_')[-1] + + return folder_name + +def get_all_videos_from_age_folders(video_root): + """Find ALL videos in age folders regardless of Excel file.""" + all_videos = [] + + try: + for participant_folder in os.listdir(video_root): + participant_path = os.path.join(video_root, participant_folder) + if not os.path.isdir(participant_path): + continue + + participant_id = extract_participant_id_from_folder(participant_folder) + if not participant_id: + continue + + participant_videos = find_all_videos_recursive(participant_path, participant_path) + + for video_path, session_id in participant_videos: + if session_id in ['01', '02']: + all_videos.append({ + 'participant_id': participant_id, + 'filename': os.path.basename(video_path), + 'full_path': video_path, + 'session_id': session_id, + 'age_folder': os.path.basename(os.path.dirname(video_path)) + }) + + except Exception as e: + print(f"Error scanning video folders: {e}") + + return all_videos + +def create_dummy_excel_data(video_path, participant_id, session_id, task_label="unknown"): + """Create dummy behavioral data for videos not in Excel file.""" + video_filename = os.path.basename(video_path) + + dummy_row_data = { + 'ID': participant_id, + 'FileName': video_filename, + 'Context': task_label, + 'Location': 'n/a', + 'Activity': 'n/a', + 'Child_of_interest_clear': 'n/a', + '#_adults': 'n/a', + '#_children': 'n/a', + '#_people_background': 'n/a', + 'Interaction_with_child': 'n/a', + '#_people_interacting': 'n/a', + 'Child_constrained': 'n/a', + 'Constraint_type': 'n/a', + 'Supports': 'n/a', + 'Support_type': 'n/a', + 'Example_support_type': 'n/a', + 'Gestures': 'n/a', + 'Gesture_type': 'n/a', + 'Vocalizations': 'n/a', + 'RMM': 'n/a', + 'RMM_type': 'n/a', + 'Response_to_name': 'n/a', + 'Locomotion': 'n/a', + 'Locomotion_type': 'n/a', + 'Grasping': 'n/a', + 'Grasp_type': 'n/a', + 'Body_Parts_Visible': 'n/a', + 'Angle_of_Body': 'n/a', + 'time_point': 'n/a', + 'DOB': 'n/a', + 'Vid_date': 'n/a', + 'Video_Quality_Child_Face_Visibility': 'n/a', + 'Video_Quality_Child_Body_Visibility': 'n/a', + 'Video_Quality_Child_Hand_Visibility': 'n/a', + 'Video_Quality_Lighting': 'n/a', + 'Video_Quality_Resolution': 'n/a', + 'Video_Quality_Motion': 'n/a', + 'Coder': 'n/a', + 'SourceFile': 'n/a', + 'Vid_duration': '00:00:00', + 'Notes': 'Video not found in Excel file - behavioral data unavailable' + } + + return dummy_row_data + +def get_task_from_excel_row(row: pd.Series) -> str: + """Extract and create task label from Excel row data.""" + context = str(row.get('Context', '')).strip() + + if context and context.lower() not in ['nan', 'n/a', '']: + return make_bids_task_label(context) + else: + return "unknown" + +def get_next_run_number(participant_id: str, session_id: str, task_label: str, + final_bids_root: str) -> int: + """Find the next available run number for this participant/session/task.""" + beh_dir = os.path.join(final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh") + + if not os.path.exists(beh_dir): + return 1 + + # Look for existing files with this task + pattern = f"sub-{participant_id}_ses-{session_id}_task-{task_label}_" + existing_files = [f for f in os.listdir(beh_dir) if f.startswith(pattern)] + + if not existing_files: + return 1 + + # Extract run numbers from existing files + run_numbers = [] + for filename in existing_files: + if "_run-" in filename: + run_part = filename.split("_run-")[1].split("_")[0] + try: + run_numbers.append(int(run_part)) + except ValueError: + continue + else: + run_numbers.append(1) # Files without run numbers are considered run-1 + + return max(run_numbers) + 1 if run_numbers else 1 + +def create_bids_filename(participant_id: str, session_id: str, task_label: str, + suffix: str, extension: str, run_id: int = 1) -> str: + """Create BIDS-compliant filename with run identifier for multiple videos per task.""" + return f"sub-{participant_id}_ses-{session_id}_task-{task_label}_run-{run_id:02d}_{suffix}.{extension}" + +# Video processing functions +def extract_exif(video_path: str) -> Dict[str, Any]: + """Extract video metadata using ffprobe.""" + try: + cmd = [ + "ffprobe", + "-v", "quiet", + "-print_format", "json", + "-show_format", + "-show_streams", + video_path, + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + return {"ffprobe_error": result.stderr.strip()} + + metadata = json.loads(result.stdout) + extracted = {} + + format_info = metadata.get("format", {}) + extracted["filename"] = format_info.get("filename") + extracted["format"] = format_info.get("format_long_name") + extracted["duration_sec"] = float(format_info.get("duration", 0)) + extracted["bit_rate"] = int(format_info.get("bit_rate", 0)) + extracted["size_bytes"] = int(format_info.get("size", 0)) + + return extracted + except Exception as e: + return {"error": str(e)} + +def stabilize_video(input_path: str, stabilized_path: str, temp_dir: str) -> None: + """Stabilize video using ffmpeg vidstab.""" + transforms_file = os.path.join(temp_dir, "transforms.trf") + + detect_cmd = [ + "ffmpeg", "-i", input_path, + "-vf", f"vidstabdetect=shakiness=5:accuracy=15:result={transforms_file}", + "-f", "null", "-" + ] + subprocess.run(detect_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + transform_cmd = [ + "ffmpeg", "-y", "-i", input_path, + "-vf", f"vidstabtransform=smoothing=30:input={transforms_file}", + "-c:v", "libx264", "-preset", "slow", "-crf", "23", + "-c:a", "copy", stabilized_path + ] + subprocess.run(transform_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + if os.path.exists(transforms_file): + os.remove(transforms_file) + +def preprocess_video(input_path: str, output_path: str, temp_dir: str, target_framerate: int) -> None: + """Preprocess video with stabilization, denoising, and standardization.""" + if not os.path.exists(input_path): + raise ValueError(f"Input video not found: {input_path}") + + stabilized_tmp = os.path.join(temp_dir, f"stabilized_temp_{os.getpid()}.mp4") + + try: + stabilize_video(input_path, stabilized_tmp, temp_dir) + + # Verify stabilization succeeded + if not os.path.exists(stabilized_tmp): + raise ValueError("Video stabilization failed - no intermediate file created") + + vf_filters = ( + "yadif," + "hqdn3d," + "eq=contrast=1.0:brightness=0.0:saturation=1.0," + "scale=-2:720," + "pad=ceil(iw/2)*2:ceil(ih/2)*2," + f"fps={target_framerate}" + ) + + cmd = [ + "ffmpeg", "-y", "-i", stabilized_tmp, + "-vf", vf_filters, + "-c:v", "libx264", "-crf", "23", "-preset", "fast", + "-c:a", "aac", "-b:a", "128k", + "-movflags", "+faststart", + output_path, + ] + + # Capture and check stderr + result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True) + if result.returncode != 0: + raise ValueError(f"Video processing failed: {result.stderr}") + + # Verify output file was created and has content + if not os.path.exists(output_path): + raise ValueError(f"Video processing failed - no output file: {output_path}") + if os.path.getsize(output_path) == 0: + raise ValueError(f"Video processing failed - empty output file: {output_path}") + + finally: + # Clean up temp file + if os.path.exists(stabilized_tmp): + os.remove(stabilized_tmp) + +def extract_audio(input_path: str, output_audio_path: str) -> None: + """Extract audio from video file.""" + if not os.path.exists(input_path): + raise ValueError(f"Input video not found: {input_path}") + + cmd = [ + "ffmpeg", "-y", "-i", input_path, + "-vn", "-acodec", "pcm_s16le", + "-ar", "16000", "-ac", "1", + output_audio_path, + ] + + # Check return code and stderr + result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True) + if result.returncode != 0: + raise ValueError(f"Audio extraction failed: {result.stderr}") + + # Verify output file was created + if not os.path.exists(output_audio_path): + raise ValueError(f"Audio extraction failed - no output file: {output_audio_path}") + + +def safe_float_conversion(value, default='n/a'): + """Safely convert value to float, return default if conversion fails.""" + if pd.isna(value): + return default + + # Convert to string and check for common non-numeric indicators + str_val = str(value).strip().lower() + if str_val in ['', 'n/a', 'na', 'nan', 'none', 'null']: + return default + + try: + return float(value) + except (ValueError, TypeError): + return default + +# BIDS file creation functions +def create_events_file(group_df: pd.DataFrame, output_path: str) -> None: + """Create events.tsv file from Excel data with all columns.""" + events_data = [] + + for idx, row in group_df.iterrows(): + event = { + 'onset': 0.0, + 'duration': parse_duration(row.get('Vid_duration', '00:00:00')), + 'coder': str(row.get('Coder', 'n/a')), + 'source_file': str(row.get('SourceFile', 'n/a')), + 'context': str(row.get('Context', 'n/a')), + 'location': str(row.get('Location', 'n/a')), + 'activity': str(row.get('Activity', 'n/a')), + 'child_clear': str(row.get('Child_of_interest_clear', 'n/a')), + 'num_adults': str(row.get('#_adults', 'n/a')), + 'num_children': str(row.get('#_children', 'n/a')), + 'num_people_background': str(row.get('#_people_background', 'n/a')), + 'interaction_with_child': str(row.get('Interaction_with_child', 'n/a')), + 'num_people_interacting': str(row.get('#_people_interacting', 'n/a')), + 'child_constrained': str(row.get('Child_constrained', 'n/a')), + 'constraint_type': str(row.get('Constraint_type', 'n/a')), + 'supports': str(row.get('Supports', 'n/a')), + 'support_type': str(row.get('Support_type', 'n/a')), + 'example_support_type': str(row.get('Example_support_type', 'n/a')), + 'gestures': str(row.get('Gestures', 'n/a')), + 'gesture_type': str(row.get('Gesture_type', 'n/a')), + 'vocalizations': str(row.get('Vocalizations', 'n/a')), + 'rmm': str(row.get('RMM', 'n/a')), + 'rmm_type': str(row.get('RMM_type', 'n/a')), + 'response_to_name': str(row.get('Response_to_name', 'n/a')), + 'locomotion': str(row.get('Locomotion', 'n/a')), + 'locomotion_type': str(row.get('Locomotion_type', 'n/a')), + 'grasping': str(row.get('Grasping', 'n/a')), + 'grasp_type': str(row.get('Grasp_type', 'n/a')), + 'body_parts_visible': str(row.get('Body_Parts_Visible', 'n/a')), + 'angle_of_body': str(row.get('Angle_of_Body', 'n/a')), + 'timepoint': str(row.get('time_point', 'n/a')), + 'dob': str(row.get('DOB', 'n/a')), + 'vid_date': str(row.get('Vid_date', 'n/a')), + 'video_quality_face': safe_float_conversion(row.get('Video_Quality_Child_Face_Visibility')), + 'video_quality_body': safe_float_conversion(row.get('Video_Quality_Child_Body_Visibility')), + 'video_quality_hand': safe_float_conversion(row.get('Video_Quality_Child_Hand_Visibility')), + 'video_quality_lighting': safe_float_conversion(row.get('Video_Quality_Lighting')), + 'video_quality_resolution': safe_float_conversion(row.get('Video_Quality_Resolution')), + 'video_quality_motion': safe_float_conversion(row.get('Video_Quality_Motion')), + 'notes': str(row.get('Notes', 'n/a')) + } + events_data.append(event) + + events_df = pd.DataFrame(events_data) + events_df.to_csv(output_path, sep='\t', index=False, na_rep='n/a') + +def create_video_metadata_json(metadata: Dict[str, Any], processing_info: Dict[str, Any], task_info: Dict[str, Any], output_path: str, target_framerate: int, target_resolution: str) -> None: + """Create JSON metadata file for processed video with dynamic task info.""" + video_json = { + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get("task_description", "Video recorded during behavioral session"), + "Instructions": task_info.get("instructions", "Natural behavior in home environment"), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), + "SamplingFrequency": target_framerate, + "Resolution": target_resolution, + "ProcessingPipeline": { + "Stabilization": processing_info.get("has_stabilization", False), + "Denoising": processing_info.get("has_denoising", False), + "Equalization": processing_info.get("has_equalization", False), + "StandardizedFPS": target_framerate, + "StandardizedResolution": target_resolution, + }, + "OriginalMetadata": metadata, + } + + with open(output_path, "w") as f: + json.dump(video_json, f, indent=4) + +def create_audio_metadata_json(duration_sec: float, task_info: Dict[str, Any], output_path: str) -> None: + """Create JSON metadata file for extracted audio with dynamic task info.""" + audio_json = { + "SamplingFrequency": 16000, + "Channels": 1, + "SampleEncoding": "16bit", + "Duration": duration_sec, + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get("task_description", "Audio extracted from behavioral session"), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), + } + + with open(output_path, "w") as f: + json.dump(audio_json, f, indent=4) + +def create_raw_video_json(row, task_info: Dict[str, Any], video_path: str, output_path: str) -> None: + """Create JSON metadata for raw video.""" + video_props = get_video_properties(video_path) + + video_json = { + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get("task_description", "Raw video from behavioral session"), + "SamplingFrequency": video_props.get("SamplingFrequency", "n/a"), + "Resolution": video_props.get("Resolution", "n/a"), + "OriginalFilename": str(row.get('FileName', '')), + "Duration": parse_duration(row.get('Vid_duration', '00:00:00')), + "RecordingDate": str(row.get('Vid_date', 'n/a')), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), + "TimePoint": str(row.get('time_point', 'n/a')), + "SourceFile": str(row.get('SourceFile', 'n/a')) + } + + with open(output_path, 'w') as f: + json.dump(video_json, f, indent=4) + +def process_single_video(video_info: Dict, excel_df: pd.DataFrame, + final_bids_root: str, final_derivatives_dir: str, + final_sourcedata_dir: str, temp_dir: str, + target_framerate: int, target_resolution: str) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]]]: + """Process a single video with all BIDS structures.""" + + participant_id = video_info['participant_id'] + filename = video_info['filename'] + session_id = video_info['session_id'] + input_video_path = video_info['full_path'] + + safe_print(f"Processing: {participant_id}/{filename}") + + try: + # Check if video exists in Excel or create dummy data + participant_excel = excel_df[excel_df['ID'].astype(str) == str(participant_id)] + video_excel = participant_excel[participant_excel['FileName'].astype(str) == filename] + + if video_excel.empty: + # Create dummy data for missing Excel entries + dummy_data = create_dummy_excel_data(input_video_path, participant_id, session_id) + video_excel = pd.DataFrame([dummy_data]) + has_excel_data = False + safe_print(f" No Excel data found - using dummy data") + else: + has_excel_data = True + + excel_row = video_excel.iloc[0] + task_label = get_task_from_excel_row(excel_row) + + # Create task information + task_info = { + "task_name": task_label, + "task_description": f"Behavioral session: {excel_row.get('Activity', 'unknown activity')}", + "instructions": "Natural behavior observation", + "context": str(excel_row.get('Context', 'n/a')), + "activity": str(excel_row.get('Activity', 'n/a')) + } + + # Create BIDS directory structure + raw_subj_dir = os.path.join(final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh") + deriv_subj_dir = os.path.join(final_derivatives_dir, f"sub-{participant_id}", f"ses-{session_id}", "beh") + source_subj_dir = os.path.join(final_sourcedata_dir, f"sub-{participant_id}", f"ses-{session_id}", "video") + + os.makedirs(raw_subj_dir, exist_ok=True) + os.makedirs(deriv_subj_dir, exist_ok=True) + os.makedirs(source_subj_dir, exist_ok=True) + + # Create BIDS filenames with run number + ext = os.path.splitext(filename)[1] + run_number = get_next_run_number(participant_id, session_id, task_label, final_bids_root) + + raw_video_name = create_bids_filename(participant_id, session_id, task_label, "beh", "mp4", run_number) + processed_video_name = create_bids_filename(participant_id, session_id, task_label, "desc-processed_beh", "mp4", run_number) + audio_name = create_bids_filename(participant_id, session_id, task_label, "audio", "wav", run_number) + events_name = create_bids_filename(participant_id, session_id, task_label, "events", "tsv", run_number) + source_video_name = create_bids_filename(participant_id, session_id, task_label, "video", ext, run_number) + + # File paths + raw_video_path = os.path.join(raw_subj_dir, raw_video_name) + processed_video_path = os.path.join(deriv_subj_dir, processed_video_name) + audio_path = os.path.join(deriv_subj_dir, audio_name) + events_path = os.path.join(raw_subj_dir, events_name) + source_video_path = os.path.join(source_subj_dir, source_video_name) + + # Copy to sourcedata (original, unmodified) + if not os.path.exists(source_video_path): + shutil.copy2(input_video_path, source_video_path) + if not os.path.exists(source_video_path): + raise ValueError(f"Failed to copy to sourcedata: {source_video_path}") + safe_print(f" Copied to sourcedata") + + if not os.path.exists(raw_video_path): + if ext.lower() != '.mp4': + # Convert to mp4 without processing + cmd = ["ffmpeg", "-y", "-i", source_video_path, "-c", "copy", raw_video_path] + result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True) + # Check return code and verify output file + if result.returncode != 0: + raise ValueError(f"FFmpeg conversion failed: {result.stderr}") + if not os.path.exists(raw_video_path): + raise ValueError(f"FFmpeg did not create output file: {raw_video_path}") + safe_print(f" Converted to raw BIDS format") + else: + shutil.copy2(source_video_path, raw_video_path) + # FIX: Verify copy succeeded + if not os.path.exists(raw_video_path): + raise ValueError(f"Failed to copy to raw BIDS: {raw_video_path}") + safe_print(f" Copied to raw BIDS") + + # Extract metadata from raw video + exif_data = extract_exif(raw_video_path) + if "error" in exif_data or "ffprobe_error" in exif_data: + raise ValueError("Unreadable or unsupported video format") + + + # Process video for derivatives + if not os.path.exists(processed_video_path): + safe_print(f" Starting video processing...") + preprocess_video(raw_video_path, processed_video_path, temp_dir, target_framerate) + # Verify processing succeeded + if not os.path.exists(processed_video_path): + raise ValueError(f"Video processing failed - no output file: {processed_video_path}") + if os.path.getsize(processed_video_path) == 0: + raise ValueError(f"Video processing failed - empty output file: {processed_video_path}") + safe_print(f" Video processing complete") + + + if not os.path.exists(audio_path): + safe_print(f" Extracting audio...") + extract_audio(processed_video_path, audio_path) + # Verify audio extraction succeeded + if not os.path.exists(audio_path): + raise ValueError(f"Audio extraction failed - no output file: {audio_path}") + if os.path.getsize(audio_path) == 0: + raise ValueError(f"Audio extraction failed - empty output file: {audio_path}") + safe_print(f" Audio extraction complete") + + # Create events files + create_events_file(video_excel, events_path) + if not os.path.exists(events_path): + raise ValueError(f"Failed to create events file: {events_path}") + + # Create metadata JSON files + processing_info = { + "has_stabilization": True, + "has_denoising": True, + "has_equalization": True, + } + + # Raw video JSON + raw_video_json_path = raw_video_path.replace(".mp4", ".json") + create_raw_video_json(excel_row, task_info, raw_video_path, raw_video_json_path) + if not os.path.exists(raw_video_json_path): + raise ValueError(f"Failed to create raw video JSON: {raw_video_json_path}") + + # Processed video JSON + processed_video_json_path = processed_video_path.replace(".mp4", ".json") + create_video_metadata_json(exif_data, processing_info, task_info, processed_video_json_path, target_framerate, target_resolution) + if not os.path.exists(processed_video_json_path): + raise ValueError(f"Failed to create processed video JSON: {processed_video_json_path}") + + # Audio JSON + audio_json_path = audio_path.replace(".wav", ".json") + create_audio_metadata_json(exif_data.get("duration_sec", 0), task_info, audio_json_path) + if not os.path.exists(audio_json_path): + raise ValueError(f"Failed to create audio JSON: {audio_json_path}") + + # Store processing information + entry = { + "participant_id": participant_id, + "session_id": session_id, + "task_label": task_label, + "original_video": input_video_path, + "source_video_bids": source_video_path, + "raw_video_bids": raw_video_path, + "processed_video_bids": processed_video_path, + "audio_file_bids": audio_path, + "events_file_bids": events_path, + "filename": filename, + "age_folder": video_info['age_folder'], + "duration_sec": exif_data.get("duration_sec", 0), + "has_excel_data": has_excel_data, + "excel_metadata": excel_row.to_dict(), + "task_info": task_info, + "processing_info": processing_info, + } + + safe_print(f" Successfully processed: {participant_id}/{filename}") + return entry, None + + except Exception as e: + safe_print(f" ERROR processing {input_video_path}: {str(e)}") + return None, {"video": input_video_path, "error": str(e)} + +def create_dataset_description(final_bids_root: str) -> None: + """Create dataset_description.json for main BIDS dataset.""" + os.makedirs(final_bids_root, exist_ok=True) + + dataset_desc = { + "Name": "SAILS Phase III Home Videos", + "BIDSVersion": "1.9.0", + "DatasetType": "raw", + "License": "na", + "Authors": ["Research Team"], + "Acknowledgements": "participants and families", + "HowToAcknowledge": "na", + "Funding": ["na"], + "EthicsApprovals": ["na"], + "ReferencesAndLinks": ["na"], + "DatasetDOI": "doi:", + } + + filepath = os.path.join(final_bids_root, "dataset_description.json") + with open(filepath, "w") as f: + json.dump(dataset_desc, f, indent=4) + + if not os.path.exists(filepath): + raise ValueError(f"Failed to create dataset_description.json at {filepath}") + + + +def create_derivatives_dataset_description(final_derivatives_dir: str) -> None: + """Create dataset_description.json for derivatives.""" + os.makedirs(final_derivatives_dir, exist_ok=True) + + derivatives_desc = { + "Name": "SAILS Phase III Home Videos - Preprocessed", + "BIDSVersion": "1.9.0", + "DatasetType": "derivative", + "GeneratedBy": [ + { + "Name": "Video Preprocessing Pipeline", + "Version": "1.0.0", + "Description": ( + "FFmpeg-based video stabilization, denoising, " + "and standardization pipeline with audio extraction" + ), + "CodeURL": "local", + } + ], + "SourceDatasets": [{"URL": "", "Version": "1.0.0"}], + "HowToAcknowledge": "Please cite the original study", + } + + filepath = os.path.join(final_derivatives_dir, "dataset_description.json") + with open(filepath, "w") as f: + json.dump(derivatives_desc, f, indent=4) + + if not os.path.exists(filepath): + raise ValueError(f"Failed to create derivatives dataset_description.json at {filepath}") + + +def create_readme(final_bids_root: str) -> None: + """Create README file for the BIDS dataset.""" + + os.makedirs(final_bids_root, exist_ok=True) + + readme_content = """# SAILS Phase III Home Videos BIDS Dataset + +## Overview +This dataset contains home videos from the SAILS Phase III study, +organized according to the Brain Imaging Data Structure (BIDS) specification. + +## Data Collection +Videos were collected from home environments during various activities. +Two main age groups were included: +- Session 01: 12-16 month old children +- Session 02: 34-38 month old children + +## Dataset Structure +### Raw Data +- sub-*/ses-*/beh/: Raw behavioral videos (converted to mp4) and event annotations +- sourcedata/: Original unmodified video files in their native formats + +### Derivatives +- derivatives/preprocessed/sub-*/ses-*/beh/: Processed videos and extracted audio + - Videos: Stabilized, denoised, standardized to 720p/30fps + - Audio: Extracted to 16kHz mono WAV format + +## Data Processing +All videos underwent standardized preprocessing including: +- Video stabilization using vidstab +- Denoising and quality enhancement +- Standardization to 720p resolution and 30fps +- Audio extraction for speech analysis + +## Behavioral Coding +Events files include annotations from csv file. + +## Task Labels +Task labels are derived from the Context column in the csv. +Videos without behavioral coding data use "unknown" task label. +""" + + filepath = os.path.join(final_bids_root, "README") + with open(filepath, "w") as f: + f.write(readme_content) + + # FIX: Verify file was created + if not os.path.exists(filepath): + raise ValueError(f"Failed to create README at {filepath}") + +def create_participants_files(processed_data: List[Dict[str, Any]], final_bids_root: str) -> None: + """Create participants.tsv and participants.json files.""" + processed_participants = set(entry["participant_id"] for entry in processed_data) + + participants_data = [] + for participant_id in sorted(processed_participants): + participants_data.append({ + 'participant_id': f'sub-{participant_id}', + 'age': 'n/a', + 'validity': 'n/a' + }) + + participants_df = pd.DataFrame(participants_data) + participants_df.to_csv(os.path.join(final_bids_root, "participants.tsv"), sep='\t', index=False, na_rep='n/a') + + participants_json = { + "participant_id": {"Description": "Unique participant identifier"}, + "age": {"Description": "Age information", "Units": "months"}, + "validity": {"Description": "data validity information"}, + } + + with open(os.path.join(final_bids_root, "participants.json"), "w") as f: + json.dump(participants_json, f, indent=4) + +def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: + """Print processing summary statistics.""" + + print("PROCESSING SUMMARY") + + + print(f"Successfully processed: {len(all_processed)} videos") + print(f"Failed to process: {len(all_failed)} videos") + print(f"Total videos attempted: {len(all_processed) + len(all_failed)}") + + if all_processed: + # Excel data availability + with_excel = sum(1 for entry in all_processed if entry.get('has_excel_data', False)) + without_excel = len(all_processed) - with_excel + print(f"\nData sources:") + print(f" With Excel behavioral data: {with_excel} videos") + print(f" With dummy behavioral data: {without_excel} videos") + + # Task distribution + task_counts = {} + participant_counts = {} + session_counts = {} + + for entry in all_processed: + task = entry['task_label'] + participant = entry['participant_id'] + session = entry['session_id'] + + task_counts[task] = task_counts.get(task, 0) + 1 + participant_counts[participant] = participant_counts.get(participant, 0) + 1 + session_counts[session] = session_counts.get(session, 0) + 1 + + print(f"\nTask distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count} videos") + + print(f"\nSession distribution:") + for session, count in sorted(session_counts.items()): + print(f" Session {session}: {count} videos") + + print(f"\nUnique participants processed: {len(participant_counts)}") + + # Duration statistics + durations = [entry.get('duration_sec', 0) for entry in all_processed] + total_duration = sum(durations) + avg_duration = total_duration / len(durations) if durations else 0 + + print(f"\nDuration statistics:") + print(f" Total video duration: {total_duration/3600:.1f} hours") + print(f" Average video duration: {avg_duration/60:.1f} minutes") + + if all_failed: + print(f"\nFailed videos breakdown:") + error_types = {} + for entry in all_failed: + error = entry.get('error', 'Unknown error') + error_types[error] = error_types.get(error, 0) + 1 + + for error, count in sorted(error_types.items()): + print(f" {error}: {count} videos") + +def main(): + """Main function.""" + + if len(sys.argv) != 3: + print("Usage: python bids.py ") + sys.exit(1) + + # Configuration + EXCEL_FILE = "/orcd/data/satra/002/datasets/SAILS/data4analysis/Video Rating Data/SAILS_RATINGS_ALL_8.8.25.xlsx" + VIDEO_ROOT = "/orcd/data/satra/002/datasets/SAILS/Phase_III_Videos/Videos_from_external/" + OUTPUT_DIR = "/home/aparnabg/orcd/scratch/bidsdata" + TARGET_RESOLUTION = "1280x720" + TARGET_FRAMERATE = 30 + + FINAL_BIDS_ROOT = os.path.join(OUTPUT_DIR, "final_bids-dataset") + FINAL_DERIVATIVES_DIR = os.path.join(FINAL_BIDS_ROOT, "derivatives", "preprocessed") + FINAL_SOURCEDATA_DIR = os.path.join(FINAL_BIDS_ROOT, "sourcedata") + + # Parse command line arguments + my_task_id = int(sys.argv[1]) + num_tasks = int(sys.argv[2]) + + # Create task-specific temp directory + TEMP_DIR = os.path.join(OUTPUT_DIR, str(my_task_id), "temp") + os.makedirs(TEMP_DIR, exist_ok=True) + + # Start timing + start_time = time.time() + + # Check if paths exist + if not os.path.exists(VIDEO_ROOT): + print(f"ERROR: Video root directory not found: {VIDEO_ROOT}") + sys.exit(1) + + if not os.path.exists(EXCEL_FILE): + print(f"ERROR: Excel file not found: {EXCEL_FILE}") + sys.exit(1) + + # Load Excel file + try: + excel_df = pd.read_excel(EXCEL_FILE) + excel_df.columns = excel_df.columns.str.strip() + safe_print(f"Loaded {len(excel_df)} rows from Excel file") + except Exception as e: + safe_print(f"ERROR: Failed to load Excel file: {e}") + sys.exit(1) + + # Discover videos + print("Discovering all video files from age folders") + all_videos = get_all_videos_from_age_folders(VIDEO_ROOT) + print(f"Found {len(all_videos)} video files in age-specific folders") + + if not all_videos: + print("ERROR: No video files found") + sys.exit(1) + + # Create BIDS structure files + if my_task_id == 0: + try: + safe_print("Creating BIDS structure files...") + create_dataset_description(FINAL_BIDS_ROOT) + create_derivatives_dataset_description(FINAL_DERIVATIVES_DIR) + create_readme(FINAL_BIDS_ROOT) + safe_print("Successfully created BIDS structure files") + except Exception as e: + safe_print(f"CRITICAL ERROR: Failed to create BIDS structure files: {e}") + sys.exit(1) + + # Divide videos among tasks + video_chunks = all_videos[my_task_id::num_tasks] + safe_print(f"Task {my_task_id}: Processing {len(video_chunks)} videos") + + # Process videos + all_processed = [] + all_failed = [] + + for i, video_info in enumerate(video_chunks, 1): + safe_print(f"Video {i}/{len(video_chunks)}") + + processed_entry, failed_entry = process_single_video( + video_info, excel_df, FINAL_BIDS_ROOT, FINAL_DERIVATIVES_DIR, + FINAL_SOURCEDATA_DIR, TEMP_DIR, TARGET_FRAMERATE, TARGET_RESOLUTION + ) + + if processed_entry: + all_processed.append(processed_entry) + if failed_entry: + all_failed.append(failed_entry) + + # Save processing logs + task_output_dir = os.path.join(OUTPUT_DIR, str(my_task_id)) + os.makedirs(task_output_dir, exist_ok=True) + + log_path = os.path.join(task_output_dir, "processing_log.json") + failed_path = os.path.join(task_output_dir, "not_processed.json") + + try: + with open(log_path, "w") as f: + json.dump(all_processed, f, indent=4, default=str) + + with open(failed_path, "w") as f: + json.dump(all_failed, f, indent=4, default=str) + except Exception as e: + safe_print(f"ERROR: Failed to save processing logs: {e}") + + # clean up temp directory + if os.path.exists(TEMP_DIR): + shutil.rmtree(TEMP_DIR) + + # summary + end_time = time.time() + total_time = end_time - start_time + print_summary(all_processed, all_failed) + safe_print(f"Total processing time: {total_time/3600:.1f} hours ({total_time/60:.1f} minutes)") + + if all_processed: + avg_time_per_video = total_time / len(all_processed) + safe_print(f"Average time per video: {avg_time_per_video:.1f} seconds") + + safe_print("Processing complete") + +if __name__ == "__main__": + main() \ No newline at end of file From 739d3527ee7587fc8bec05f66074d97004fb91c4 Mon Sep 17 00:00:00 2001 From: aparnabg Date: Wed, 24 Sep 2025 21:43:20 +0530 Subject: [PATCH 02/36] add log _file.py --- src/new_code/log_file.ipynb | 597 ++++++++++++++++++++++++++++++++++++ 1 file changed, 597 insertions(+) create mode 100644 src/new_code/log_file.ipynb diff --git a/src/new_code/log_file.ipynb b/src/new_code/log_file.ipynb new file mode 100644 index 0000000..b753187 --- /dev/null +++ b/src/new_code/log_file.ipynb @@ -0,0 +1,597 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total processed: 3259\n", + "Total failed: 20\n", + "\n", + "Tasks processed:\n", + " bookshare: 56\n", + " dailyroutine: 252\n", + " generalsocialcommunicationinteraction: 1010\n", + " generalsocialinteraction: 1\n", + " motorplay: 497\n", + " other: 283\n", + " socialroutine: 158\n", + " specialoccasion: 135\n", + " toyplay: 564\n", + " unknown: 303\n", + "\n", + "Sessions:\n", + " Session 01: 1677\n", + " Session 02: 1582\n", + "\n", + "Failed videos by error type:\n", + " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-B1S3L8Q7Y6/ses-02/beh/sub-B1S3L8Q7Y6_ses-02_task-motorplay_run-03_desc-processed_beh.mp4':\n", + " Metadata:\n", + " major_brand : isom\n", + " minor_version : 512\n", + " compatible_brands: isomiso2avc1mp41\n", + " encoder : Lavf62.6.100\n", + " Duration: 00:00:01.87, start: 0.000000, bitrate: 2765 kb/s\n", + " Stream #0:0[0x1](und): Video: h264 (High) (avc1 / 0x31637661), yuvj420p(pc, smpte170m/smpte432/bt709, progressive), 540x720, 2758 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", + " Metadata:\n", + " handler_name : Core Media Video\n", + " vendor_id : [0][0][0][0]\n", + " encoder : Lavc62.15.100 libx264\n", + "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-B1S3L8Q7Y6/ses-02/beh/sub-B1S3L8Q7Y6_ses-02_task-motorplay_run-03_audio.wav':\n", + "[out#0/wav @ 0x5629c3cb82c0] Output file does not contain any stream\n", + "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-B1S3L8Q7Y6/ses-02/beh/sub-B1S3L8Q7Y6_ses-02_task-motorplay_run-03_audio.wav.\n", + "Error opening output files: Invalid argument\n", + ": 1\n", + " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-I8T4E2J3M6/ses-01/beh/sub-I8T4E2J3M6_ses-01_task-unknown_run-02_desc-processed_beh.mp4':\n", + " Metadata:\n", + " major_brand : isom\n", + " minor_version : 512\n", + " compatible_brands: isomiso2avc1mp41\n", + " encoder : Lavf62.6.100\n", + " Duration: 00:00:04.23, start: 0.000000, bitrate: 1096 kb/s\n", + " Stream #0:0[0x1](und): Video: h264 (High) (avc1 / 0x31637661), yuvj420p(pc, progressive), 406x720, 1092 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", + " Metadata:\n", + " handler_name : Core Media Video\n", + " vendor_id : [0][0][0][0]\n", + " encoder : Lavc62.15.100 libx264\n", + "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-I8T4E2J3M6/ses-01/beh/sub-I8T4E2J3M6_ses-01_task-unknown_run-02_audio.wav':\n", + "[out#0/wav @ 0x55dac7e00900] Output file does not contain any stream\n", + "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-I8T4E2J3M6/ses-01/beh/sub-I8T4E2J3M6_ses-01_task-unknown_run-02_audio.wav.\n", + "Error opening output files: Invalid argument\n", + ": 1\n", + " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-L5Q0Z1W8S2/ses-01/beh/sub-L5Q0Z1W8S2_ses-01_task-generalsocialcommunicationinteraction_run-01_desc-processed_beh.mp4':\n", + " Metadata:\n", + " major_brand : isom\n", + " minor_version : 512\n", + " compatible_brands: isomiso2avc1mp41\n", + " title : 10158604828739603\n", + " encoder : Lavf62.6.100\n", + " Duration: 00:00:07.90, start: 0.000000, bitrate: 984 kb/s\n", + " Stream #0:0[0x1](und): Video: h264 (High) (avc1 / 0x31637661), yuv420p(tv, smpte170m/bt470bg/smpte170m, progressive), 720x720, 981 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", + " Metadata:\n", + " handler_name : VideoHandler\n", + " vendor_id : [0][0][0][0]\n", + " encoder : Lavc62.15.100 libx264\n", + "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-L5Q0Z1W8S2/ses-01/beh/sub-L5Q0Z1W8S2_ses-01_task-generalsocialcommunicationinteraction_run-01_audio.wav':\n", + "[out#0/wav @ 0x555e1335df40] Output file does not contain any stream\n", + "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-L5Q0Z1W8S2/ses-01/beh/sub-L5Q0Z1W8S2_ses-01_task-generalsocialcommunicationinteraction_run-01_audio.wav.\n", + "Error opening output files: Invalid argument\n", + ": 1\n", + " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-T8P2Z1M9O9/ses-01/beh/sub-T8P2Z1M9O9_ses-01_task-motorplay_run-09_desc-processed_beh.mp4':\n", + " Metadata:\n", + " major_brand : isom\n", + " minor_version : 512\n", + " compatible_brands: isomiso2avc1mp41\n", + " encoder : Lavf62.6.100\n", + " Duration: 00:00:04.30, start: 0.000000, bitrate: 1678 kb/s\n", + " Stream #0:0[0x1](eng): Video: h264 (High) (avc1 / 0x31637661), yuv420p(tv, bt470bg/bt470bg/smpte170m, progressive), 408x720 [SAR 1:1 DAR 17:30], 1674 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", + " Metadata:\n", + " handler_name : Snap Video\n", + " vendor_id : [0][0][0][0]\n", + " encoder : Lavc62.15.100 libx264\n", + "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-T8P2Z1M9O9/ses-01/beh/sub-T8P2Z1M9O9_ses-01_task-motorplay_run-09_audio.wav':\n", + "[out#0/wav @ 0x560d6c5794c0] Output file does not contain any stream\n", + "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-T8P2Z1M9O9/ses-01/beh/sub-T8P2Z1M9O9_ses-01_task-motorplay_run-09_audio.wav.\n", + "Error opening output files: Invalid argument\n", + ": 1\n", + " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-W3O7N1N8U2/ses-01/beh/sub-W3O7N1N8U2_ses-01_task-toyplay_run-01_desc-processed_beh.mp4':\n", + " Metadata:\n", + " major_brand : isom\n", + " minor_version : 512\n", + " compatible_brands: isomiso2avc1mp41\n", + " encoder : Lavf62.6.100\n", + " Duration: 00:01:43.23, start: 0.000000, bitrate: 895 kb/s\n", + " Stream #0:0[0x1](und): Video: h264 (High) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 406x720, 892 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", + " Metadata:\n", + " handler_name : Core Media Video\n", + " vendor_id : [0][0][0][0]\n", + " encoder : Lavc62.15.100 libx264\n", + "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-W3O7N1N8U2/ses-01/beh/sub-W3O7N1N8U2_ses-01_task-toyplay_run-01_audio.wav':\n", + "[out#0/wav @ 0x55de6b4854c0] Output file does not contain any stream\n", + "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-W3O7N1N8U2/ses-01/beh/sub-W3O7N1N8U2_ses-01_task-toyplay_run-01_audio.wav.\n", + "Error opening output files: Invalid argument\n", + ": 1\n", + " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x563f29669880] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x563f29669880] moov atom not found\n", + "[in#0 @ 0x563f29669500] Error opening input: Invalid data found when processing input\n", + "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-D9N0U7M9X3/ses-02/beh/sub-D9N0U7M9X3_ses-02_task-specialoccasion_run-01_desc-processed_beh.mp4.\n", + "Error opening input files: Invalid data found when processing input\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-A2P7X9N8L7/ses-01/video/sub-A2P7X9N8L7_ses-01_task-generalsocialcommunicationinteraction_run-04_video..3gp':\n", + " Metadata:\n", + " major_brand : 3gp6\n", + " minor_version : 256\n", + " compatible_brands: 3gp6isom\n", + " creation_time : 2020-01-01T21:21:42.000000Z\n", + " Duration: 00:00:36.60, start: 0.000000, bitrate: 150 kb/s\n", + " Stream #0:0[0x1](und): Audio: amr_nb (amrnb) (samr / 0x726D6173), 8000 Hz, mono, fltp, 12 kb/s (default)\n", + " Metadata:\n", + " creation_time : 2020-01-01T21:21:42.000000Z\n", + " handler_name : Core Media Audio\n", + " vendor_id : [0][0][0][0]\n", + " Stream #0:1[0x2](und): Video: h264 (Baseline) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 320x240, 135 kb/s, 15 fps, 15 tbr, 600 tbn (default)\n", + " Metadata:\n", + " creation_time : 2020-01-01T21:21:42.000000Z\n", + " handler_name : Core Media Video\n", + " vendor_id : [0][0][0][0]\n", + "Stream mapping:\n", + " Stream #0:1 -> #0:0 (copy)\n", + " Stream #0:0 -> #0:1 (copy)\n", + "[mp4 @ 0x5632294e44c0] Could not find tag for codec amr_nb in stream #1, codec not currently supported in container\n", + "[out#0/mp4 @ 0x56322959b0c0] Could not write header (incorrect codec parameters ?): Invalid argument\n", + "Conversion failed!\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-H9T5Y8D3A1/ses-01/video/sub-H9T5Y8D3A1_ses-01_task-unknown_run-01_video..3gp':\n", + " Metadata:\n", + " major_brand : mp42\n", + " minor_version : 0\n", + " compatible_brands: isommp42\n", + " creation_time : 2018-02-18T18:45:45.000000Z\n", + " Duration: 00:00:29.46, start: 0.000000, bitrate: 148 kb/s\n", + " Stream #0:0[0x1](eng): Video: h263 (s263 / 0x33363273), yuv420p, 176x144 [SAR 12:11 DAR 4:3], 80 kb/s, SAR 1:1 DAR 11:9, 20.22 fps, 20.25 tbr, 90k tbn (default)\n", + " Metadata:\n", + " creation_time : 2018-02-18T18:45:45.000000Z\n", + " handler_name : VideoHandle\n", + " vendor_id : [0][0][0][0]\n", + " Side data:\n", + " Display Matrix: rotation of -90.00 degrees\n", + " Stream #0:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 64 kb/s (default)\n", + " Metadata:\n", + " creation_time : 2018-02-18T18:45:45.000000Z\n", + " handler_name : SoundHandle\n", + " vendor_id : [0][0][0][0]\n", + "Stream mapping:\n", + " Stream #0:0 -> #0:0 (copy)\n", + " Stream #0:1 -> #0:1 (copy)\n", + "[mp4 @ 0x55d965e0eac0] Could not find tag for codec h263 in stream #0, codec not currently supported in container\n", + "[out#0/mp4 @ 0x55d965e4b480] Could not write header (incorrect codec parameters ?): Invalid argument\n", + "Conversion failed!\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-N3L7A1I2B9/ses-02/video/sub-N3L7A1I2B9_ses-02_task-generalsocialcommunicationinteraction_run-09_video..3gp':\n", + " Metadata:\n", + " major_brand : 3gp4\n", + " minor_version : 0\n", + " compatible_brands: isom3gp4\n", + " creation_time : 2022-01-14T18:19:46.000000Z\n", + " com.android.version: 11\n", + " com.android.capture.fps: 15.000000\n", + " Duration: 00:01:00.11, start: 0.000000, bitrate: 141 kb/s\n", + " Stream #0:0[0x1](eng): Video: h264 (Baseline) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 320x240, 127 kb/s, SAR 1:1 DAR 4:3, 14.93 fps, 15 tbr, 90k tbn, start 0.013200 (default)\n", + " Metadata:\n", + " creation_time : 2022-01-14T18:19:46.000000Z\n", + " handler_name : VideoHandle\n", + " vendor_id : [0][0][0][0]\n", + " Side data:\n", + " Display Matrix: rotation of -90.00 degrees\n", + " Stream #0:1[0x2](eng): Audio: amr_nb (amrnb) (samr / 0x726D6173), 8000 Hz, mono, fltp, 12 kb/s (default)\n", + " Metadata:\n", + " creation_time : 2022-01-14T18:19:46.000000Z\n", + " handler_name : SoundHandle\n", + " vendor_id : [0][0][0][0]\n", + "Stream mapping:\n", + " Stream #0:0 -> #0:0 (copy)\n", + " Stream #0:1 -> #0:1 (copy)\n", + "[mp4 @ 0x557146320880] Could not find tag for codec amr_nb in stream #1, codec not currently supported in container\n", + "[out#0/mp4 @ 0x55714639c580] Could not write header (incorrect codec parameters ?): Invalid argument\n", + "Conversion failed!\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[aist#0:1/pcm_u8 @ 0x556affb3acc0] Guessed Channel Layout: mono\n", + "Input #0, avi, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-Y5C5R8P9Z5/ses-01/video/sub-Y5C5R8P9Z5_ses-01_task-unknown_run-01_video..AVI':\n", + " Metadata:\n", + " creation_time : 2015-05-30 05:09:17\n", + " software : CanonMVI06\n", + " Duration: 00:00:27.20, start: 0.000000, bitrate: 9899 kb/s\n", + " Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 640x480, 9826 kb/s, 20 fps, 20 tbr, 20 tbn\n", + " Stream #0:1: Audio: pcm_u8 ([1][0][0][0] / 0x0001), 11024 Hz, mono, u8, 88 kb/s\n", + "Stream mapping:\n", + " Stream #0:0 -> #0:0 (copy)\n", + " Stream #0:1 -> #0:1 (copy)\n", + "[mp4 @ 0x556affb44d80] Could not find tag for codec pcm_u8 in stream #1, codec not currently supported in container\n", + "[out#0/mp4 @ 0x556affb44c40] Could not write header (incorrect codec parameters ?): Invalid argument\n", + "Conversion failed!\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[aist#0:1/pcm_u8 @ 0x556fb8d06f00] Guessed Channel Layout: mono\n", + "Input #0, avi, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-Y5C5R8P9Z5/ses-01/video/sub-Y5C5R8P9Z5_ses-01_task-unknown_run-03_video..AVI':\n", + " Metadata:\n", + " creation_time : 2015-05-31 22:51:06\n", + " software : CanonMVI06\n", + " Duration: 00:01:35.40, start: 0.000000, bitrate: 10003 kb/s\n", + " Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 640x480, 9917 kb/s, 20 fps, 20 tbr, 20 tbn\n", + " Stream #0:1: Audio: pcm_u8 ([1][0][0][0] / 0x0001), 11024 Hz, mono, u8, 88 kb/s\n", + "Stream mapping:\n", + " Stream #0:0 -> #0:0 (copy)\n", + " Stream #0:1 -> #0:1 (copy)\n", + "[mp4 @ 0x556fb8d08d00] Could not find tag for codec pcm_u8 in stream #1, codec not currently supported in container\n", + "[out#0/mp4 @ 0x556fb8d08bc0] Could not write header (incorrect codec parameters ?): Invalid argument\n", + "Conversion failed!\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[aist#0:1/pcm_u8 @ 0x559330724100] Guessed Channel Layout: mono\n", + "Input #0, avi, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-Y5C5R8P9Z5/ses-01/video/sub-Y5C5R8P9Z5_ses-01_task-unknown_run-02_video..AVI':\n", + " Metadata:\n", + " creation_time : 2015-05-31 07:14:18\n", + " software : CanonMVI06\n", + " Duration: 00:01:58.10, start: 0.000000, bitrate: 10032 kb/s\n", + " Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 640x480, 9945 kb/s, 20 fps, 20 tbr, 20 tbn\n", + " Stream #0:1: Audio: pcm_u8 ([1][0][0][0] / 0x0001), 11024 Hz, mono, u8, 88 kb/s\n", + "Stream mapping:\n", + " Stream #0:0 -> #0:0 (copy)\n", + " Stream #0:1 -> #0:1 (copy)\n", + "[mp4 @ 0x559330725f40] Could not find tag for codec pcm_u8 in stream #1, codec not currently supported in container\n", + "[out#0/mp4 @ 0x559330725e00] Could not write header (incorrect codec parameters ?): Invalid argument\n", + "Conversion failed!\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[aist#0:1/pcm_u8 @ 0x55e250233100] Guessed Channel Layout: mono\n", + "Input #0, avi, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-Y5C5R8P9Z5/ses-01/video/sub-Y5C5R8P9Z5_ses-01_task-unknown_run-04_video..AVI':\n", + " Metadata:\n", + " creation_time : 2015-05-31 12:26:23\n", + " software : CanonMVI06\n", + " Duration: 00:02:01.90, start: 0.000000, bitrate: 9803 kb/s\n", + " Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 640x480, 9716 kb/s, 20 fps, 20 tbr, 20 tbn\n", + " Stream #0:1: Audio: pcm_u8 ([1][0][0][0] / 0x0001), 11024 Hz, mono, u8, 88 kb/s\n", + "Stream mapping:\n", + " Stream #0:0 -> #0:0 (copy)\n", + " Stream #0:1 -> #0:1 (copy)\n", + "[mp4 @ 0x55e250234f40] Could not find tag for codec pcm_u8 in stream #1, codec not currently supported in container\n", + "[out#0/mp4 @ 0x55e250234e00] Could not write header (incorrect codec parameters ?): Invalid argument\n", + "Conversion failed!\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5564177205c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5564177205c0] moov atom not found\n", + "[in#0 @ 0x556417716f40] Error opening input: Invalid data found when processing input\n", + "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-S7F0B6H3A3/ses-02/video/sub-S7F0B6H3A3_ses-02_task-unknown_run-01_video..MOV.\n", + "Error opening input files: Invalid data found when processing input\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55bf4343e5c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55bf4343e5c0] moov atom not found\n", + "[in#0 @ 0x55bf43434f40] Error opening input: Invalid data found when processing input\n", + "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-S7F0B6H3A3/ses-02/video/sub-S7F0B6H3A3_ses-02_task-unknown_run-01_video..MOV.\n", + "Error opening input files: Invalid data found when processing input\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x562269a195c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x562269a195c0] moov atom not found\n", + "[in#0 @ 0x562269a0ff40] Error opening input: Invalid data found when processing input\n", + "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-G3D3S2N7W8/ses-02/video/sub-G3D3S2N7W8_ses-02_task-unknown_run-01_video..MOV.\n", + "Error opening input files: Invalid data found when processing input\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5624281205c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5624281205c0] moov atom not found\n", + "[in#0 @ 0x562428116f40] Error opening input: Invalid data found when processing input\n", + "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-S7F0B6H3A3/ses-02/video/sub-S7F0B6H3A3_ses-02_task-unknown_run-01_video..MOV.\n", + "Error opening input files: Invalid data found when processing input\n", + ": 1\n", + " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", + " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", + " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", + " libavutil 60. 13.100 / 60. 13.100\n", + " libavcodec 62. 15.100 / 62. 15.100\n", + " libavformat 62. 6.100 / 62. 6.100\n", + " libavdevice 62. 2.100 / 62. 2.100\n", + " libavfilter 11. 9.100 / 11. 9.100\n", + " libswscale 9. 3.100 / 9. 3.100\n", + " libswresample 6. 2.100 / 6. 2.100\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x564ce80415c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", + "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x564ce80415c0] moov atom not found\n", + "[in#0 @ 0x564ce8037f40] Error opening input: Invalid data found when processing input\n", + "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-K6O1J5K7Z0/ses-01/video/sub-K6O1J5K7Z0_ses-01_task-unknown_run-01_video..mov.\n", + "Error opening input files: Invalid data found when processing input\n", + ": 1\n", + " Unreadable or unsupported video format: 2\n" + ] + } + ], + "source": [ + "import json\n", + "import os\n", + "import pandas as pd\n", + "\n", + "# paths\n", + "BASE_DIR = \"/home/aparnabg/orcd/scratch/bidsdata\"\n", + "BIDS_ROOT = \"/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset\"\n", + "\n", + "def merge_logs():\n", + " all_processed, all_failed = [], []\n", + " task_dirs = sorted(\n", + " int(item) for item in os.listdir(BASE_DIR)\n", + " if item.isdigit() and os.path.isdir(os.path.join(BASE_DIR, item))\n", + " )\n", + " \n", + " for task_id in task_dirs:\n", + " task_dir = os.path.join(BASE_DIR, str(task_id))\n", + " processed_file = os.path.join(task_dir, \"processing_log.json\")\n", + " failed_file = os.path.join(task_dir, \"not_processed.json\")\n", + "\n", + " if os.path.exists(processed_file):\n", + " with open(processed_file, 'r') as f:\n", + " all_processed.extend(json.load(f))\n", + " \n", + " if os.path.exists(failed_file):\n", + " with open(failed_file, 'r') as f:\n", + " all_failed.extend(json.load(f))\n", + " \n", + " return all_processed, all_failed\n", + "\n", + "processed_data, failed_data = merge_logs()\n", + "\n", + "# Save all logs\n", + "with open(os.path.join(BASE_DIR, \"all_processing_log.json\"), 'w') as f:\n", + " json.dump(processed_data, f, indent=2, default=str)\n", + "\n", + "with open(os.path.join(BASE_DIR, \"all_not_processed.json\"), 'w') as f:\n", + " json.dump(failed_data, f, indent=2, default=str)\n", + "\n", + "# Create participants files\n", + "if processed_data:\n", + " participant_ids = {\n", + " entry['participant_id'] for entry in processed_data if 'participant_id' in entry\n", + " }\n", + "\n", + " participants_data = [\n", + " {\"participant_id\": f\"sub-{pid}\", \"age\": \"n/a\", \"validity\": \"n/a\"}\n", + " for pid in sorted(participant_ids)\n", + " ]\n", + " \n", + " pd.DataFrame(participants_data).to_csv(\n", + " os.path.join(BIDS_ROOT, \"participants.tsv\"), \n", + " sep='\\t', index=False, na_rep='n/a'\n", + " )\n", + " \n", + " participants_json = {\n", + " \"participant_id\": {\"Description\": \"Unique participant identifier\"},\n", + " \"age\": {\"Description\": \"Age information\", \"Units\": \"months\"}, \n", + " \"validity\": {\"Description\": \"Data validity information\"}\n", + " }\n", + " \n", + " with open(os.path.join(BIDS_ROOT, \"participants.json\"), 'w') as f:\n", + " json.dump(participants_json, f, indent=2)\n", + "\n", + "# Summary\n", + "print(f\"Total processed: {len(processed_data)}\")\n", + "print(f\"Total failed: {len(failed_data)}\")\n", + "\n", + "if processed_data:\n", + " task_counts = {}\n", + " for entry in processed_data:\n", + " task = entry.get('task_label', 'unknown')\n", + " task_counts[task] = task_counts.get(task, 0) + 1\n", + " \n", + " print(\"\\nTasks processed:\")\n", + " for task, count in sorted(task_counts.items()):\n", + " print(f\" {task}: {count}\")\n", + " \n", + " session_counts = {}\n", + " for entry in processed_data:\n", + " session = entry.get('session_id', 'unknown')\n", + " session_counts[session] = session_counts.get(session, 0) + 1\n", + " \n", + " print(\"\\nSessions:\")\n", + " for session, count in sorted(session_counts.items()):\n", + " print(f\" Session {session}: {count}\")\n", + "\n", + "if failed_data:\n", + " error_counts = {}\n", + " for entry in failed_data:\n", + " error = entry.get('error', 'Unknown')\n", + " error_counts[error] = error_counts.get(error, 0) + 1\n", + " \n", + " print(\"\\nFailed videos by error type:\")\n", + " for error, count in sorted(error_counts.items()):\n", + " print(f\" {error}: {count}\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 1564f165e29112a652db11b3dace82498e8e5495 Mon Sep 17 00:00:00 2001 From: aparnabg Date: Thu, 25 Sep 2025 23:07:33 +0530 Subject: [PATCH 03/36] add .sh --- src/new_code/submit_bids.sh | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 src/new_code/submit_bids.sh diff --git a/src/new_code/submit_bids.sh b/src/new_code/submit_bids.sh new file mode 100644 index 0000000..58fd973 --- /dev/null +++ b/src/new_code/submit_bids.sh @@ -0,0 +1,28 @@ +#!/bin/bash +#SBATCH --job-name=bids_processing +#SBATCH --partition=mit_normal +#SBATCH --array=0-50 +#SBATCH --output=logs/bids_%A_%a.out +#SBATCH --error=logs/bids_%A_%a.err +#SBATCH --mem=10G +#SBATCH --time=10:00:00 +#SBATCH --cpus-per-task=10 + +mkdir -p logs + +module load miniforge + + +source $(conda info --base)/etc/profile.d/conda.sh + +eval "$(conda shell.bash hook)" + +conda activate data_env + +echo "Python executable: $(which python)" +echo "Python version: $(python --version)" + +echo "Starting video processing for task $SLURM_ARRAY_TASK_ID" +python /home/aparnabg/bids.py $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_TASK_COUNT + +echo "Job completed at: $(date)" From c124e0cd2b36b9358c3819f1e7dc016903c98c22 Mon Sep 17 00:00:00 2001 From: aparnabg Date: Wed, 1 Oct 2025 20:41:10 +0530 Subject: [PATCH 04/36] add bids code --- {src/new_code => new_code}/bids.py | 14 +++++++------- {src/new_code => new_code}/log_file.ipynb | 0 {src/new_code => new_code}/submit_bids.sh | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) rename {src/new_code => new_code}/bids.py (99%) rename {src/new_code => new_code}/log_file.ipynb (100%) rename {src/new_code => new_code}/submit_bids.sh (89%) diff --git a/src/new_code/bids.py b/new_code/bids.py similarity index 99% rename from src/new_code/bids.py rename to new_code/bids.py index e0b05b7..1807e91 100644 --- a/src/new_code/bids.py +++ b/new_code/bids.py @@ -73,14 +73,14 @@ def determine_session_from_folder(folder_name: str) -> Optional[str]: # Check for 12-16 month patterns (including spaces and variations) if any(pattern in folder_lower for pattern in [ - '12-16 month', '12-14 month', '12_16', '12_14', '12-16month', '12-14month' + '12-16 month', '12-14 month', '12_16', '12_14', '12-16month', '12-14month', '12-16_month_videos' ]): return "01" # Check for 34-38 month patterns (including spaces, typos, and variations) elif any(pattern in folder_lower for pattern in [ '34-38 month', '34-28 month', '34-48 month', '34_38', '34_28', '34_48', - '34-38month', '34-28month', '34-48month' + '34-38month', '34-28month', '34-48month','34-38_month_videos' ]): return "02" @@ -560,7 +560,7 @@ def process_single_video(video_info: Dict, excel_df: pd.DataFrame, os.makedirs(source_subj_dir, exist_ok=True) # Create BIDS filenames with run number - ext = os.path.splitext(filename)[1] + ext = os.path.splitext(filename)[1][1:] run_number = get_next_run_number(participant_id, session_id, task_label, final_bids_root) raw_video_name = create_bids_filename(participant_id, session_id, task_label, "beh", "mp4", run_number) @@ -889,7 +889,7 @@ def main(): # Configuration EXCEL_FILE = "/orcd/data/satra/002/datasets/SAILS/data4analysis/Video Rating Data/SAILS_RATINGS_ALL_8.8.25.xlsx" VIDEO_ROOT = "/orcd/data/satra/002/datasets/SAILS/Phase_III_Videos/Videos_from_external/" - OUTPUT_DIR = "/home/aparnabg/orcd/scratch/bidsdata" + OUTPUT_DIR = "/home/aparnabg/orcd/scratch/BIDS" TARGET_RESOLUTION = "1280x720" TARGET_FRAMERATE = 30 @@ -935,7 +935,7 @@ def main(): print("ERROR: No video files found") sys.exit(1) - # Create BIDS structure files + # Create BIDS structure files (only for task 0 to avoid conflicts) if my_task_id == 0: try: safe_print("Creating BIDS structure files...") @@ -984,11 +984,11 @@ def main(): except Exception as e: safe_print(f"ERROR: Failed to save processing logs: {e}") - # clean up temp directory + # Clean up temp directory if os.path.exists(TEMP_DIR): shutil.rmtree(TEMP_DIR) - # summary + # Print summary end_time = time.time() total_time = end_time - start_time print_summary(all_processed, all_failed) diff --git a/src/new_code/log_file.ipynb b/new_code/log_file.ipynb similarity index 100% rename from src/new_code/log_file.ipynb rename to new_code/log_file.ipynb diff --git a/src/new_code/submit_bids.sh b/new_code/submit_bids.sh similarity index 89% rename from src/new_code/submit_bids.sh rename to new_code/submit_bids.sh index 58fd973..872c1ce 100644 --- a/src/new_code/submit_bids.sh +++ b/new_code/submit_bids.sh @@ -1,12 +1,12 @@ #!/bin/bash #SBATCH --job-name=bids_processing #SBATCH --partition=mit_normal -#SBATCH --array=0-50 +#SBATCH --array=0-90 #SBATCH --output=logs/bids_%A_%a.out #SBATCH --error=logs/bids_%A_%a.err -#SBATCH --mem=10G +#SBATCH --mem=5G #SBATCH --time=10:00:00 -#SBATCH --cpus-per-task=10 +#SBATCH --cpus-per-task=5 mkdir -p logs From 725305a6688f66829e03f6284e324a46b633ffd9 Mon Sep 17 00:00:00 2001 From: aparnabg Date: Wed, 1 Oct 2025 20:42:22 +0530 Subject: [PATCH 05/36] add bids code --- {new_code => src/new_code}/bids.py | 0 {new_code => src/new_code}/log_file.ipynb | 0 {new_code => src/new_code}/submit_bids.sh | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {new_code => src/new_code}/bids.py (100%) rename {new_code => src/new_code}/log_file.ipynb (100%) rename {new_code => src/new_code}/submit_bids.sh (100%) diff --git a/new_code/bids.py b/src/new_code/bids.py similarity index 100% rename from new_code/bids.py rename to src/new_code/bids.py diff --git a/new_code/log_file.ipynb b/src/new_code/log_file.ipynb similarity index 100% rename from new_code/log_file.ipynb rename to src/new_code/log_file.ipynb diff --git a/new_code/submit_bids.sh b/src/new_code/submit_bids.sh similarity index 100% rename from new_code/submit_bids.sh rename to src/new_code/submit_bids.sh From cb634ceeda8cf2d80f4ecab3a89fc1b7c6e34760 Mon Sep 17 00:00:00 2001 From: manaalm <90986243+manaalm@users.noreply.github.com> Date: Tue, 14 Oct 2025 16:29:55 -0400 Subject: [PATCH 06/36] Add participants.tsv population file --- src/new_code/data_copy_to_bids.ipynb | 261 +++++++++++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100644 src/new_code/data_copy_to_bids.ipynb diff --git a/src/new_code/data_copy_to_bids.ipynb b/src/new_code/data_copy_to_bids.ipynb new file mode 100644 index 0000000..565bbab --- /dev/null +++ b/src/new_code/data_copy_to_bids.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "23ebe559-c77e-4d3e-adab-ded275ce6cb5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "import pandas as pd\n", + "import numpy as np\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1b027235-ead2-4209-bf05-cf3993310394", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def parse_duration_to_seconds(s):\n", + " if pd.isna(s):\n", + " return np.nan\n", + " s = str(s).strip()\n", + " if s == \"\":\n", + " return np.nan\n", + " parts = s.split(\":\")\n", + " try:\n", + " parts = [float(p) for p in parts]\n", + " except Exception:\n", + " # maybe already numeric string (seconds)\n", + " try:\n", + " return float(s)\n", + " except Exception:\n", + " return np.nan\n", + " # seconds only\n", + " if len(parts) == 1:\n", + " return parts[0]\n", + " # mm:ss(.fraction)\n", + " if len(parts) == 2:\n", + " m, sec = parts\n", + " return m * 60.0 + sec\n", + " # hh:mm:ss\n", + " if len(parts) == 3:\n", + " h, m, sec = parts\n", + " return h * 3600.0 + m * 60.0 + sec\n", + " return np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f9c6c300-2cf1-4bee-b99b-b92b42a77e2b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def parse_date(v):\n", + " if pd.isna(v):\n", + " return None\n", + " try:\n", + " return pd.to_datetime(v, utc=True, errors=\"coerce\")\n", + " except Exception:\n", + " return pd.to_datetime(v, errors=\"coerce\")\n", + "def compute_age_months(dob, vid_date):\n", + " if dob is None or pd.isna(dob) or vid_date is None or pd.isna(vid_date):\n", + " return np.nan\n", + " dob_ts = pd.to_datetime(dob, utc=True, errors=\"coerce\")\n", + " vd_ts = pd.to_datetime(vid_date, utc=True, errors=\"coerce\")\n", + " if pd.isna(dob_ts) or pd.isna(vd_ts):\n", + " return np.nan\n", + " # use total seconds to avoid .days/.seconds mix and convert to months (~30.4375 days)\n", + " seconds = (vd_ts - dob_ts).total_seconds()\n", + " months = seconds / (30.4375 * 86400.0)\n", + " return float(months)\n", + "def session_from_age_months(m):\n", + " if pd.isna(m):\n", + " return \"session_unknown\"\n", + " m = float(m)\n", + " if 12 <= m <= 16:\n", + " return \"12-16 months\"\n", + " if 34 <= m <= 38:\n", + " return \"34-38 months\"\n", + " # older mapping used in checkpoints: 14_month etc. keep human labels by default\n", + " if 10 <= m < 24:\n", + " return \"12-16 months\"\n", + " if 30 <= m < 40:\n", + " return \"34-38 months\"\n", + " return \"session_unknown\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "531a9948-98e0-40a5-9bda-c93bd1e23200", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1029c4d-d577-43b3-9bb9-8c5999656cc3", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[OK] wrote: bids-dataset/participants.tsv\n" + ] + } + ], + "source": [ + "\n", + "out_dir = \".\"\n", + "p = Path(\"/orcd/data/satra/002/datasets/SAILS/data4analysis/Video Rating Data/SAILS_RATINGS_ALL_DEDUPLICATED_NotForFinalAnalyses_2025.10.csv\")\n", + "df = pd.read_csv(p, dtype=str)\n", + "\n", + "# normalize common column name variants\n", + "df.columns = [c.strip() for c in df.columns]\n", + "def col_first(df, candidates):\n", + " for c in candidates:\n", + " if c in df.columns:\n", + " return c\n", + " return None\n", + "\n", + "src_col = col_first(df, [\"SourceFile\", \"sourcefile\", \"Source_File\"])\n", + "id_col = col_first(df, [\"ID\", \"Id\", \"id\"])\n", + "fn_col = col_first(df, [\"FileName\", \"FileName\", \"filename\", \"File_Name\"])\n", + "dur_col = col_first(df, [\"Vid_duration\", \"Vid_Duration\", \"duration\"])\n", + "dob_col = col_first(df, [\"DOB\", \"Dob\", \"dob\"])\n", + "vd_col = col_first(df, [\"VideoDate\", \"VideoDate\", \"Video Date\", \"Video_Date\"])\n", + "age_col = col_first(df, [\"Age\", \"age\", \"age_in_months\", \"Age_in_months\"])\n", + "timepoint_col = col_first(df, [\"timepoint\", \"Timepoint\", \"TimePoint\"])\n", + "date_valid_col = col_first(df, [\"DateValidityScore\", \"Date Validity Score\", \"Date_Validity_Score\"])\n", + "\n", + "required = [src_col, id_col, fn_col]\n", + "if not all(required):\n", + " raise SystemExit(\"CSV missing one of required columns: SourceFile, ID, FileName\")\n", + "\n", + "out = pd.DataFrame()\n", + "out[\"sourcefile\"] = df[src_col].astype(str).str.strip()\n", + "out[\"video_filename\"] = df[fn_col].astype(str).apply(lambda s: Path(s).name)\n", + "out[\"participant_id\"] = df[id_col].astype(str).str.strip().str.upper()\n", + "\n", + "# duration parse\n", + "if dur_col:\n", + " out[\"duration\"] = df[dur_col].apply(parse_duration_to_seconds)\n", + "else:\n", + " out[\"duration\"] = pd.NA\n", + "\n", + "# video_date parse\n", + "if vd_col:\n", + " out[\"video_date\"] = df[vd_col].apply(parse_date).dt.tz_convert(None).dt.strftime(\"%Y-%m-%d\")\n", + "else:\n", + " out[\"video_date\"] = pd.NA\n", + "\n", + "out[\"date_validity_score\"] = df[date_valid_col] if date_valid_col else pd.NA\n", + "\n", + "ages = []\n", + "for i, row in df.iterrows():\n", + " dob = row.get(dob_col) if dob_col else None\n", + " vd = row.get(vd_col) if vd_col else None\n", + " age_m = compute_age_months(dob, vd) if dob or vd else np.nan\n", + " if np.isnan(age_m):\n", + " raw_age = row.get(age_col) if age_col else None\n", + " if raw_age is None or (isinstance(raw_age, float) and np.isnan(raw_age)) or str(raw_age).strip()==\"\":\n", + " age_m = np.nan\n", + " else:\n", + " try:\n", + " a = float(str(raw_age))\n", + " # heuristic: if a looks like years (<=6) convert to months\n", + " if a <= 6:\n", + " age_m = a * 12.0\n", + " else:\n", + " # assume already months\n", + " age_m = a\n", + " except Exception:\n", + " age_m = np.nan\n", + " ages.append(age_m)\n", + "out[\"age\"] = ages\n", + "\n", + "if timepoint_col and timepoint_col in df.columns:\n", + " out[\"session_id\"] = df[timepoint_col].fillna(\"\").astype(str).replace(\"\", \"session_unknown\")\n", + "else:\n", + " out[\"session_id\"] = out[\"age\"].apply(lambda m: session_from_age_months(m))\n", + "\n", + "out = out.drop_duplicates(subset=[\"participant_id\", \"video_filename\"])\n", + "bids_root = Path(out_dir) / \"bids-dataset\"\n", + "bids_root.mkdir(parents=True, exist_ok=True)\n", + "\n", + "participants_cols = [\"participant_id\", \"session_id\", \"video_filename\", \"duration\", \"age\", \"video_date\", \"date_validity_score\", \"sourcefile\"]\n", + "out.to_csv(bids_root / \"participants.tsv\", sep=\"\\t\", index=False, columns=participants_cols)\n", + "\n", + "participants_json = {\n", + " \"participant_id\": {\"Description\": \"Original alphanumeric participant identifier (without 'sub-').\"},\n", + " \"session_id\": {\"Description\": \"Session label (e.g., '12-16 months', '34-38 months').\"},\n", + " \"video_filename\": {\"Description\": \"Source video file name (basename).\"},\n", + " \"duration\": {\"Description\": \"Video duration (seconds).\", \"Units\": \"s\"},\n", + " \"age\": {\"Description\": \"Age at video time (months).\", \"Units\": \"months\"},\n", + " \"video_date\": {\"Description\": \"Date of the video (ISO YYYY-MM-DD).\"},\n", + " \"date_validity_score\": {\"Description\": \"Date validity score from SAILS ratings CSV.\"},\n", + " \"sourcefile\": {\"Description\": \"Original full source file path from SAILS CSV.\"},\n", + "}\n", + "import json\n", + "with open(bids_root / \"participants.json\", \"w\") as f:\n", + " json.dump(participants_json, f, indent=4)\n", + "\n", + "print(f\"[OK] wrote: {bids_root / 'participants.tsv'}\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84c649ea-d321-4389-ae95-d86a011c5e40", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e35c5b8-05a1-4445-819c-7d6e4be2106a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From ea61c29017eacd1f07bda2f262e6681b16b947da Mon Sep 17 00:00:00 2001 From: manaalm <90986243+manaalm@users.noreply.github.com> Date: Tue, 14 Oct 2025 17:50:32 -0400 Subject: [PATCH 07/36] Create README.md --- src/new_code/README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/new_code/README.md diff --git a/src/new_code/README.md b/src/new_code/README.md new file mode 100644 index 0000000..631a234 --- /dev/null +++ b/src/new_code/README.md @@ -0,0 +1,25 @@ +# BIDS Conversion and Preprocessing + +These files convert the SAILS home videos collection into a standardized BIDS-compliant dataset. + +## `bids.py` + +This is the main executable that performs the conversion and processing for a single video; it's called with task IDs from the SLURM schedule to process +a specific chunk of the total video dataset. It discovers video files and attaches IDs to them, integrates behavioral metadata, creates a BIDS directory structure + with `/sourcedata` and `/derivatives`, performs video preprocessing with ffmpeg (stabilization, denoising, standardization to 720p), extracts audio, and generates + required BIDS metadata in .json and .tsv files. + + +## `log_file.ipynb` + +After the `bids.py` job array is complete, this file mergers the individual log files from each job (`processing_log.json` and `not_processed.json`) into a summary +and provides statistics on the number of processed and failed videos and other information and summaries of errors. + +## `submit_bids.sh` + +This is a SLURM batch script to submit `bids.py` as a job and manages parallel execution of the pipeline - run with `sbatch submit_bids.sh`. + +## `data_copy_to_bids.ipynb` + +This file populates a `participants.tsv` file with participant IDs using information from the SAILS xlsx data; +age, session ID, date, duration, data validity score, etc. are used to populate `participants.tsv`. From 74bb9f401b64a0a1c1bc9a356bbe8631f045c448 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Thu, 30 Oct 2025 17:00:22 -0400 Subject: [PATCH 08/36] Added configuration file for BIDS conversion --- .pre-commit-config.yaml | 2 ++ configs/config_bids_convertor.yaml | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 configs/config_bids_convertor.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d0bed6b..d45aa79 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,6 +17,8 @@ repos: hooks: - id: mypy args: [--ignore-missing-imports] + additional_dependencies: + - types-PyYAML - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks rev: v2.12.0 hooks: diff --git a/configs/config_bids_convertor.yaml b/configs/config_bids_convertor.yaml new file mode 100644 index 0000000..ab38518 --- /dev/null +++ b/configs/config_bids_convertor.yaml @@ -0,0 +1,17 @@ +# Video Processing Configuration + +# Input data +annotation_file: /orcd/data/satra/002/datasets/SAILS/data4analysis/Video Rating Data/SAILS_RATINGS_ALL_DEDUPLICATED_NotForFinalAnalyses_2025.10.csv +video_root: /orcd/data/satra/002/datasets/SAILS/Phase_III_Videos/Videos_from_external_standardized +asd_status: /orcd/data/satra/002/datasets/SAILS/data4analysis/ASD_Status.xlsx + +# Output data +output_dir: /orcd/scratch/bcs/001/sensein/sails/BIDS_data + +# Video processing parameters +target_resolution: 1280x720 +target_framerate: 30 + +# Derived directory names (optional — can be built dynamically) +final_bids_root: final_bids-dataset +derivatives_subdir: derivatives/preprocessed From bb918b922c9add8ee954dcef8479df98a167540b Mon Sep 17 00:00:00 2001 From: lucie271 Date: Thu, 30 Oct 2025 17:02:05 -0400 Subject: [PATCH 09/36] Cleaned src folder --- src/new_code/README.md | 25 - src/new_code/bids.py | 1004 -------------------------- src/new_code/data_copy_to_bids.ipynb | 261 ------- src/new_code/log_file.ipynb | 597 --------------- src/new_code/submit_bids.sh | 28 - 5 files changed, 1915 deletions(-) delete mode 100644 src/new_code/README.md delete mode 100644 src/new_code/bids.py delete mode 100644 src/new_code/data_copy_to_bids.ipynb delete mode 100644 src/new_code/log_file.ipynb delete mode 100644 src/new_code/submit_bids.sh diff --git a/src/new_code/README.md b/src/new_code/README.md deleted file mode 100644 index 631a234..0000000 --- a/src/new_code/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# BIDS Conversion and Preprocessing - -These files convert the SAILS home videos collection into a standardized BIDS-compliant dataset. - -## `bids.py` - -This is the main executable that performs the conversion and processing for a single video; it's called with task IDs from the SLURM schedule to process -a specific chunk of the total video dataset. It discovers video files and attaches IDs to them, integrates behavioral metadata, creates a BIDS directory structure - with `/sourcedata` and `/derivatives`, performs video preprocessing with ffmpeg (stabilization, denoising, standardization to 720p), extracts audio, and generates - required BIDS metadata in .json and .tsv files. - - -## `log_file.ipynb` - -After the `bids.py` job array is complete, this file mergers the individual log files from each job (`processing_log.json` and `not_processed.json`) into a summary -and provides statistics on the number of processed and failed videos and other information and summaries of errors. - -## `submit_bids.sh` - -This is a SLURM batch script to submit `bids.py` as a job and manages parallel execution of the pipeline - run with `sbatch submit_bids.sh`. - -## `data_copy_to_bids.ipynb` - -This file populates a `participants.tsv` file with participant IDs using information from the SAILS xlsx data; -age, session ID, date, duration, data validity score, etc. are used to populate `participants.tsv`. diff --git a/src/new_code/bids.py b/src/new_code/bids.py deleted file mode 100644 index 1807e91..0000000 --- a/src/new_code/bids.py +++ /dev/null @@ -1,1004 +0,0 @@ -# Standard library imports -import json -import os -import re -import shutil -import subprocess -import sys -import time -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union - -# Third-party imports -import pandas as pd -import numpy as np -import cv2 - -def safe_print(message: str): - """Print with timestamps.""" - timestamp = datetime.now().strftime("%H:%M:%S") - print(f"{timestamp} [MAIN] {message}") - -# Helper functions -def parse_duration(duration_str) -> float: - """Parse duration string to seconds""" - try: - if pd.isna(duration_str) or duration_str == '': - return 0.0 - duration_str = str(duration_str) - if ':' in duration_str: - parts = duration_str.split(':') - if len(parts) == 3: - hours = int(parts[0]) - minutes = int(parts[1]) - seconds = float(parts[2]) - return hours * 3600 + minutes * 60 + seconds - elif len(parts) == 2: - minutes = int(parts[0]) - seconds = float(parts[1]) - return minutes * 60 + seconds - return float(duration_str) - except: - return 0.0 - -def make_bids_task_label(task_name): - """Convert TaskName to BIDS-compatible task label for filenames.""" - s = str(task_name).strip() - s = re.sub(r'[^0-9a-zA-Z+]', '', s) # Keep only alphanumeric and + - return s - -def get_video_properties(video_path): - """Extract video properties using OpenCV""" - try: - cap = cv2.VideoCapture(video_path) - if not cap.isOpened(): - return {"SamplingFrequency": None, "Resolution": None} - - fps = cap.get(cv2.CAP_PROP_FPS) - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - cap.release() - - return { - "SamplingFrequency": fps, - "Resolution": f"{width}x{height}", - } - except: - return {"SamplingFrequency": None, "Resolution": None} - -def determine_session_from_folder(folder_name: str) -> Optional[str]: - """Determine session ID from folder names with spaces.""" - folder_lower = folder_name.lower() - - # Check for 12-16 month patterns (including spaces and variations) - if any(pattern in folder_lower for pattern in [ - '12-16 month', '12-14 month', '12_16', '12_14', '12-16month', '12-14month', '12-16_month_videos' - ]): - return "01" - - # Check for 34-38 month patterns (including spaces, typos, and variations) - elif any(pattern in folder_lower for pattern in [ - '34-38 month', '34-28 month', '34-48 month', '34_38', '34_28', '34_48', - '34-38month', '34-28month', '34-48month','34-38_month_videos' - ]): - return "02" - - return None - -def find_age_folder_session(current_path: str, participant_path: str) -> Optional[str]: - """Recursively check if current path or any parent path contains age-related folder pattern.""" - if not current_path.startswith(participant_path) or current_path == participant_path: - return None - - current_folder = os.path.basename(current_path) - session_id = determine_session_from_folder(current_folder) - if session_id: - return session_id - - parent_path = os.path.dirname(current_path) - return find_age_folder_session(parent_path, participant_path) - -def find_all_videos_recursive(directory: str, participant_path: str) -> List[Tuple[str, Optional[str]]]: - """Recursively find all video files in a directory and determine their session.""" - videos = [] - - try: - for item in os.listdir(directory): - if item.startswith('.'): # Skip hidden files - continue - - item_path = os.path.join(directory, item) - - if os.path.isfile(item_path): - if item.lower().endswith(('.mp4', '.mov', '.avi', '.mkv', '.m4v', '.3gp', '.mts')): - session_id = find_age_folder_session(directory, participant_path) - videos.append((item_path, session_id)) - - elif os.path.isdir(item_path): - videos.extend(find_all_videos_recursive(item_path, participant_path)) - - except PermissionError: - print(f"Permission denied: {directory}") - except Exception as e: - print(f"Error accessing {directory}: {e}") - - return videos - -def extract_participant_id_from_folder(folder_name: str) -> str: - """Extract participant ID from folder names like 'A.A._Home_Videos_AMES_A2P7X9N8L7'.""" - if 'AMES_' in folder_name: - parts = folder_name.split('AMES_') - if len(parts) > 1: - return parts[1].strip() - - if '_' in folder_name: - return folder_name.split('_')[-1] - - return folder_name - -def get_all_videos_from_age_folders(video_root): - """Find ALL videos in age folders regardless of Excel file.""" - all_videos = [] - - try: - for participant_folder in os.listdir(video_root): - participant_path = os.path.join(video_root, participant_folder) - if not os.path.isdir(participant_path): - continue - - participant_id = extract_participant_id_from_folder(participant_folder) - if not participant_id: - continue - - participant_videos = find_all_videos_recursive(participant_path, participant_path) - - for video_path, session_id in participant_videos: - if session_id in ['01', '02']: - all_videos.append({ - 'participant_id': participant_id, - 'filename': os.path.basename(video_path), - 'full_path': video_path, - 'session_id': session_id, - 'age_folder': os.path.basename(os.path.dirname(video_path)) - }) - - except Exception as e: - print(f"Error scanning video folders: {e}") - - return all_videos - -def create_dummy_excel_data(video_path, participant_id, session_id, task_label="unknown"): - """Create dummy behavioral data for videos not in Excel file.""" - video_filename = os.path.basename(video_path) - - dummy_row_data = { - 'ID': participant_id, - 'FileName': video_filename, - 'Context': task_label, - 'Location': 'n/a', - 'Activity': 'n/a', - 'Child_of_interest_clear': 'n/a', - '#_adults': 'n/a', - '#_children': 'n/a', - '#_people_background': 'n/a', - 'Interaction_with_child': 'n/a', - '#_people_interacting': 'n/a', - 'Child_constrained': 'n/a', - 'Constraint_type': 'n/a', - 'Supports': 'n/a', - 'Support_type': 'n/a', - 'Example_support_type': 'n/a', - 'Gestures': 'n/a', - 'Gesture_type': 'n/a', - 'Vocalizations': 'n/a', - 'RMM': 'n/a', - 'RMM_type': 'n/a', - 'Response_to_name': 'n/a', - 'Locomotion': 'n/a', - 'Locomotion_type': 'n/a', - 'Grasping': 'n/a', - 'Grasp_type': 'n/a', - 'Body_Parts_Visible': 'n/a', - 'Angle_of_Body': 'n/a', - 'time_point': 'n/a', - 'DOB': 'n/a', - 'Vid_date': 'n/a', - 'Video_Quality_Child_Face_Visibility': 'n/a', - 'Video_Quality_Child_Body_Visibility': 'n/a', - 'Video_Quality_Child_Hand_Visibility': 'n/a', - 'Video_Quality_Lighting': 'n/a', - 'Video_Quality_Resolution': 'n/a', - 'Video_Quality_Motion': 'n/a', - 'Coder': 'n/a', - 'SourceFile': 'n/a', - 'Vid_duration': '00:00:00', - 'Notes': 'Video not found in Excel file - behavioral data unavailable' - } - - return dummy_row_data - -def get_task_from_excel_row(row: pd.Series) -> str: - """Extract and create task label from Excel row data.""" - context = str(row.get('Context', '')).strip() - - if context and context.lower() not in ['nan', 'n/a', '']: - return make_bids_task_label(context) - else: - return "unknown" - -def get_next_run_number(participant_id: str, session_id: str, task_label: str, - final_bids_root: str) -> int: - """Find the next available run number for this participant/session/task.""" - beh_dir = os.path.join(final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh") - - if not os.path.exists(beh_dir): - return 1 - - # Look for existing files with this task - pattern = f"sub-{participant_id}_ses-{session_id}_task-{task_label}_" - existing_files = [f for f in os.listdir(beh_dir) if f.startswith(pattern)] - - if not existing_files: - return 1 - - # Extract run numbers from existing files - run_numbers = [] - for filename in existing_files: - if "_run-" in filename: - run_part = filename.split("_run-")[1].split("_")[0] - try: - run_numbers.append(int(run_part)) - except ValueError: - continue - else: - run_numbers.append(1) # Files without run numbers are considered run-1 - - return max(run_numbers) + 1 if run_numbers else 1 - -def create_bids_filename(participant_id: str, session_id: str, task_label: str, - suffix: str, extension: str, run_id: int = 1) -> str: - """Create BIDS-compliant filename with run identifier for multiple videos per task.""" - return f"sub-{participant_id}_ses-{session_id}_task-{task_label}_run-{run_id:02d}_{suffix}.{extension}" - -# Video processing functions -def extract_exif(video_path: str) -> Dict[str, Any]: - """Extract video metadata using ffprobe.""" - try: - cmd = [ - "ffprobe", - "-v", "quiet", - "-print_format", "json", - "-show_format", - "-show_streams", - video_path, - ] - result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode != 0: - return {"ffprobe_error": result.stderr.strip()} - - metadata = json.loads(result.stdout) - extracted = {} - - format_info = metadata.get("format", {}) - extracted["filename"] = format_info.get("filename") - extracted["format"] = format_info.get("format_long_name") - extracted["duration_sec"] = float(format_info.get("duration", 0)) - extracted["bit_rate"] = int(format_info.get("bit_rate", 0)) - extracted["size_bytes"] = int(format_info.get("size", 0)) - - return extracted - except Exception as e: - return {"error": str(e)} - -def stabilize_video(input_path: str, stabilized_path: str, temp_dir: str) -> None: - """Stabilize video using ffmpeg vidstab.""" - transforms_file = os.path.join(temp_dir, "transforms.trf") - - detect_cmd = [ - "ffmpeg", "-i", input_path, - "-vf", f"vidstabdetect=shakiness=5:accuracy=15:result={transforms_file}", - "-f", "null", "-" - ] - subprocess.run(detect_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - - transform_cmd = [ - "ffmpeg", "-y", "-i", input_path, - "-vf", f"vidstabtransform=smoothing=30:input={transforms_file}", - "-c:v", "libx264", "-preset", "slow", "-crf", "23", - "-c:a", "copy", stabilized_path - ] - subprocess.run(transform_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - - if os.path.exists(transforms_file): - os.remove(transforms_file) - -def preprocess_video(input_path: str, output_path: str, temp_dir: str, target_framerate: int) -> None: - """Preprocess video with stabilization, denoising, and standardization.""" - if not os.path.exists(input_path): - raise ValueError(f"Input video not found: {input_path}") - - stabilized_tmp = os.path.join(temp_dir, f"stabilized_temp_{os.getpid()}.mp4") - - try: - stabilize_video(input_path, stabilized_tmp, temp_dir) - - # Verify stabilization succeeded - if not os.path.exists(stabilized_tmp): - raise ValueError("Video stabilization failed - no intermediate file created") - - vf_filters = ( - "yadif," - "hqdn3d," - "eq=contrast=1.0:brightness=0.0:saturation=1.0," - "scale=-2:720," - "pad=ceil(iw/2)*2:ceil(ih/2)*2," - f"fps={target_framerate}" - ) - - cmd = [ - "ffmpeg", "-y", "-i", stabilized_tmp, - "-vf", vf_filters, - "-c:v", "libx264", "-crf", "23", "-preset", "fast", - "-c:a", "aac", "-b:a", "128k", - "-movflags", "+faststart", - output_path, - ] - - # Capture and check stderr - result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True) - if result.returncode != 0: - raise ValueError(f"Video processing failed: {result.stderr}") - - # Verify output file was created and has content - if not os.path.exists(output_path): - raise ValueError(f"Video processing failed - no output file: {output_path}") - if os.path.getsize(output_path) == 0: - raise ValueError(f"Video processing failed - empty output file: {output_path}") - - finally: - # Clean up temp file - if os.path.exists(stabilized_tmp): - os.remove(stabilized_tmp) - -def extract_audio(input_path: str, output_audio_path: str) -> None: - """Extract audio from video file.""" - if not os.path.exists(input_path): - raise ValueError(f"Input video not found: {input_path}") - - cmd = [ - "ffmpeg", "-y", "-i", input_path, - "-vn", "-acodec", "pcm_s16le", - "-ar", "16000", "-ac", "1", - output_audio_path, - ] - - # Check return code and stderr - result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True) - if result.returncode != 0: - raise ValueError(f"Audio extraction failed: {result.stderr}") - - # Verify output file was created - if not os.path.exists(output_audio_path): - raise ValueError(f"Audio extraction failed - no output file: {output_audio_path}") - - -def safe_float_conversion(value, default='n/a'): - """Safely convert value to float, return default if conversion fails.""" - if pd.isna(value): - return default - - # Convert to string and check for common non-numeric indicators - str_val = str(value).strip().lower() - if str_val in ['', 'n/a', 'na', 'nan', 'none', 'null']: - return default - - try: - return float(value) - except (ValueError, TypeError): - return default - -# BIDS file creation functions -def create_events_file(group_df: pd.DataFrame, output_path: str) -> None: - """Create events.tsv file from Excel data with all columns.""" - events_data = [] - - for idx, row in group_df.iterrows(): - event = { - 'onset': 0.0, - 'duration': parse_duration(row.get('Vid_duration', '00:00:00')), - 'coder': str(row.get('Coder', 'n/a')), - 'source_file': str(row.get('SourceFile', 'n/a')), - 'context': str(row.get('Context', 'n/a')), - 'location': str(row.get('Location', 'n/a')), - 'activity': str(row.get('Activity', 'n/a')), - 'child_clear': str(row.get('Child_of_interest_clear', 'n/a')), - 'num_adults': str(row.get('#_adults', 'n/a')), - 'num_children': str(row.get('#_children', 'n/a')), - 'num_people_background': str(row.get('#_people_background', 'n/a')), - 'interaction_with_child': str(row.get('Interaction_with_child', 'n/a')), - 'num_people_interacting': str(row.get('#_people_interacting', 'n/a')), - 'child_constrained': str(row.get('Child_constrained', 'n/a')), - 'constraint_type': str(row.get('Constraint_type', 'n/a')), - 'supports': str(row.get('Supports', 'n/a')), - 'support_type': str(row.get('Support_type', 'n/a')), - 'example_support_type': str(row.get('Example_support_type', 'n/a')), - 'gestures': str(row.get('Gestures', 'n/a')), - 'gesture_type': str(row.get('Gesture_type', 'n/a')), - 'vocalizations': str(row.get('Vocalizations', 'n/a')), - 'rmm': str(row.get('RMM', 'n/a')), - 'rmm_type': str(row.get('RMM_type', 'n/a')), - 'response_to_name': str(row.get('Response_to_name', 'n/a')), - 'locomotion': str(row.get('Locomotion', 'n/a')), - 'locomotion_type': str(row.get('Locomotion_type', 'n/a')), - 'grasping': str(row.get('Grasping', 'n/a')), - 'grasp_type': str(row.get('Grasp_type', 'n/a')), - 'body_parts_visible': str(row.get('Body_Parts_Visible', 'n/a')), - 'angle_of_body': str(row.get('Angle_of_Body', 'n/a')), - 'timepoint': str(row.get('time_point', 'n/a')), - 'dob': str(row.get('DOB', 'n/a')), - 'vid_date': str(row.get('Vid_date', 'n/a')), - 'video_quality_face': safe_float_conversion(row.get('Video_Quality_Child_Face_Visibility')), - 'video_quality_body': safe_float_conversion(row.get('Video_Quality_Child_Body_Visibility')), - 'video_quality_hand': safe_float_conversion(row.get('Video_Quality_Child_Hand_Visibility')), - 'video_quality_lighting': safe_float_conversion(row.get('Video_Quality_Lighting')), - 'video_quality_resolution': safe_float_conversion(row.get('Video_Quality_Resolution')), - 'video_quality_motion': safe_float_conversion(row.get('Video_Quality_Motion')), - 'notes': str(row.get('Notes', 'n/a')) - } - events_data.append(event) - - events_df = pd.DataFrame(events_data) - events_df.to_csv(output_path, sep='\t', index=False, na_rep='n/a') - -def create_video_metadata_json(metadata: Dict[str, Any], processing_info: Dict[str, Any], task_info: Dict[str, Any], output_path: str, target_framerate: int, target_resolution: str) -> None: - """Create JSON metadata file for processed video with dynamic task info.""" - video_json = { - "TaskName": task_info.get("task_name", "unknown"), - "TaskDescription": task_info.get("task_description", "Video recorded during behavioral session"), - "Instructions": task_info.get("instructions", "Natural behavior in home environment"), - "Context": task_info.get("context", "n/a"), - "Activity": task_info.get("activity", "n/a"), - "SamplingFrequency": target_framerate, - "Resolution": target_resolution, - "ProcessingPipeline": { - "Stabilization": processing_info.get("has_stabilization", False), - "Denoising": processing_info.get("has_denoising", False), - "Equalization": processing_info.get("has_equalization", False), - "StandardizedFPS": target_framerate, - "StandardizedResolution": target_resolution, - }, - "OriginalMetadata": metadata, - } - - with open(output_path, "w") as f: - json.dump(video_json, f, indent=4) - -def create_audio_metadata_json(duration_sec: float, task_info: Dict[str, Any], output_path: str) -> None: - """Create JSON metadata file for extracted audio with dynamic task info.""" - audio_json = { - "SamplingFrequency": 16000, - "Channels": 1, - "SampleEncoding": "16bit", - "Duration": duration_sec, - "TaskName": task_info.get("task_name", "unknown"), - "TaskDescription": task_info.get("task_description", "Audio extracted from behavioral session"), - "Context": task_info.get("context", "n/a"), - "Activity": task_info.get("activity", "n/a"), - } - - with open(output_path, "w") as f: - json.dump(audio_json, f, indent=4) - -def create_raw_video_json(row, task_info: Dict[str, Any], video_path: str, output_path: str) -> None: - """Create JSON metadata for raw video.""" - video_props = get_video_properties(video_path) - - video_json = { - "TaskName": task_info.get("task_name", "unknown"), - "TaskDescription": task_info.get("task_description", "Raw video from behavioral session"), - "SamplingFrequency": video_props.get("SamplingFrequency", "n/a"), - "Resolution": video_props.get("Resolution", "n/a"), - "OriginalFilename": str(row.get('FileName', '')), - "Duration": parse_duration(row.get('Vid_duration', '00:00:00')), - "RecordingDate": str(row.get('Vid_date', 'n/a')), - "Context": task_info.get("context", "n/a"), - "Activity": task_info.get("activity", "n/a"), - "TimePoint": str(row.get('time_point', 'n/a')), - "SourceFile": str(row.get('SourceFile', 'n/a')) - } - - with open(output_path, 'w') as f: - json.dump(video_json, f, indent=4) - -def process_single_video(video_info: Dict, excel_df: pd.DataFrame, - final_bids_root: str, final_derivatives_dir: str, - final_sourcedata_dir: str, temp_dir: str, - target_framerate: int, target_resolution: str) -> Tuple[Dict[str, Any], Optional[Dict[str, Any]]]: - """Process a single video with all BIDS structures.""" - - participant_id = video_info['participant_id'] - filename = video_info['filename'] - session_id = video_info['session_id'] - input_video_path = video_info['full_path'] - - safe_print(f"Processing: {participant_id}/{filename}") - - try: - # Check if video exists in Excel or create dummy data - participant_excel = excel_df[excel_df['ID'].astype(str) == str(participant_id)] - video_excel = participant_excel[participant_excel['FileName'].astype(str) == filename] - - if video_excel.empty: - # Create dummy data for missing Excel entries - dummy_data = create_dummy_excel_data(input_video_path, participant_id, session_id) - video_excel = pd.DataFrame([dummy_data]) - has_excel_data = False - safe_print(f" No Excel data found - using dummy data") - else: - has_excel_data = True - - excel_row = video_excel.iloc[0] - task_label = get_task_from_excel_row(excel_row) - - # Create task information - task_info = { - "task_name": task_label, - "task_description": f"Behavioral session: {excel_row.get('Activity', 'unknown activity')}", - "instructions": "Natural behavior observation", - "context": str(excel_row.get('Context', 'n/a')), - "activity": str(excel_row.get('Activity', 'n/a')) - } - - # Create BIDS directory structure - raw_subj_dir = os.path.join(final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh") - deriv_subj_dir = os.path.join(final_derivatives_dir, f"sub-{participant_id}", f"ses-{session_id}", "beh") - source_subj_dir = os.path.join(final_sourcedata_dir, f"sub-{participant_id}", f"ses-{session_id}", "video") - - os.makedirs(raw_subj_dir, exist_ok=True) - os.makedirs(deriv_subj_dir, exist_ok=True) - os.makedirs(source_subj_dir, exist_ok=True) - - # Create BIDS filenames with run number - ext = os.path.splitext(filename)[1][1:] - run_number = get_next_run_number(participant_id, session_id, task_label, final_bids_root) - - raw_video_name = create_bids_filename(participant_id, session_id, task_label, "beh", "mp4", run_number) - processed_video_name = create_bids_filename(participant_id, session_id, task_label, "desc-processed_beh", "mp4", run_number) - audio_name = create_bids_filename(participant_id, session_id, task_label, "audio", "wav", run_number) - events_name = create_bids_filename(participant_id, session_id, task_label, "events", "tsv", run_number) - source_video_name = create_bids_filename(participant_id, session_id, task_label, "video", ext, run_number) - - # File paths - raw_video_path = os.path.join(raw_subj_dir, raw_video_name) - processed_video_path = os.path.join(deriv_subj_dir, processed_video_name) - audio_path = os.path.join(deriv_subj_dir, audio_name) - events_path = os.path.join(raw_subj_dir, events_name) - source_video_path = os.path.join(source_subj_dir, source_video_name) - - # Copy to sourcedata (original, unmodified) - if not os.path.exists(source_video_path): - shutil.copy2(input_video_path, source_video_path) - if not os.path.exists(source_video_path): - raise ValueError(f"Failed to copy to sourcedata: {source_video_path}") - safe_print(f" Copied to sourcedata") - - if not os.path.exists(raw_video_path): - if ext.lower() != '.mp4': - # Convert to mp4 without processing - cmd = ["ffmpeg", "-y", "-i", source_video_path, "-c", "copy", raw_video_path] - result = subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True) - # Check return code and verify output file - if result.returncode != 0: - raise ValueError(f"FFmpeg conversion failed: {result.stderr}") - if not os.path.exists(raw_video_path): - raise ValueError(f"FFmpeg did not create output file: {raw_video_path}") - safe_print(f" Converted to raw BIDS format") - else: - shutil.copy2(source_video_path, raw_video_path) - # FIX: Verify copy succeeded - if not os.path.exists(raw_video_path): - raise ValueError(f"Failed to copy to raw BIDS: {raw_video_path}") - safe_print(f" Copied to raw BIDS") - - # Extract metadata from raw video - exif_data = extract_exif(raw_video_path) - if "error" in exif_data or "ffprobe_error" in exif_data: - raise ValueError("Unreadable or unsupported video format") - - - # Process video for derivatives - if not os.path.exists(processed_video_path): - safe_print(f" Starting video processing...") - preprocess_video(raw_video_path, processed_video_path, temp_dir, target_framerate) - # Verify processing succeeded - if not os.path.exists(processed_video_path): - raise ValueError(f"Video processing failed - no output file: {processed_video_path}") - if os.path.getsize(processed_video_path) == 0: - raise ValueError(f"Video processing failed - empty output file: {processed_video_path}") - safe_print(f" Video processing complete") - - - if not os.path.exists(audio_path): - safe_print(f" Extracting audio...") - extract_audio(processed_video_path, audio_path) - # Verify audio extraction succeeded - if not os.path.exists(audio_path): - raise ValueError(f"Audio extraction failed - no output file: {audio_path}") - if os.path.getsize(audio_path) == 0: - raise ValueError(f"Audio extraction failed - empty output file: {audio_path}") - safe_print(f" Audio extraction complete") - - # Create events files - create_events_file(video_excel, events_path) - if not os.path.exists(events_path): - raise ValueError(f"Failed to create events file: {events_path}") - - # Create metadata JSON files - processing_info = { - "has_stabilization": True, - "has_denoising": True, - "has_equalization": True, - } - - # Raw video JSON - raw_video_json_path = raw_video_path.replace(".mp4", ".json") - create_raw_video_json(excel_row, task_info, raw_video_path, raw_video_json_path) - if not os.path.exists(raw_video_json_path): - raise ValueError(f"Failed to create raw video JSON: {raw_video_json_path}") - - # Processed video JSON - processed_video_json_path = processed_video_path.replace(".mp4", ".json") - create_video_metadata_json(exif_data, processing_info, task_info, processed_video_json_path, target_framerate, target_resolution) - if not os.path.exists(processed_video_json_path): - raise ValueError(f"Failed to create processed video JSON: {processed_video_json_path}") - - # Audio JSON - audio_json_path = audio_path.replace(".wav", ".json") - create_audio_metadata_json(exif_data.get("duration_sec", 0), task_info, audio_json_path) - if not os.path.exists(audio_json_path): - raise ValueError(f"Failed to create audio JSON: {audio_json_path}") - - # Store processing information - entry = { - "participant_id": participant_id, - "session_id": session_id, - "task_label": task_label, - "original_video": input_video_path, - "source_video_bids": source_video_path, - "raw_video_bids": raw_video_path, - "processed_video_bids": processed_video_path, - "audio_file_bids": audio_path, - "events_file_bids": events_path, - "filename": filename, - "age_folder": video_info['age_folder'], - "duration_sec": exif_data.get("duration_sec", 0), - "has_excel_data": has_excel_data, - "excel_metadata": excel_row.to_dict(), - "task_info": task_info, - "processing_info": processing_info, - } - - safe_print(f" Successfully processed: {participant_id}/{filename}") - return entry, None - - except Exception as e: - safe_print(f" ERROR processing {input_video_path}: {str(e)}") - return None, {"video": input_video_path, "error": str(e)} - -def create_dataset_description(final_bids_root: str) -> None: - """Create dataset_description.json for main BIDS dataset.""" - os.makedirs(final_bids_root, exist_ok=True) - - dataset_desc = { - "Name": "SAILS Phase III Home Videos", - "BIDSVersion": "1.9.0", - "DatasetType": "raw", - "License": "na", - "Authors": ["Research Team"], - "Acknowledgements": "participants and families", - "HowToAcknowledge": "na", - "Funding": ["na"], - "EthicsApprovals": ["na"], - "ReferencesAndLinks": ["na"], - "DatasetDOI": "doi:", - } - - filepath = os.path.join(final_bids_root, "dataset_description.json") - with open(filepath, "w") as f: - json.dump(dataset_desc, f, indent=4) - - if not os.path.exists(filepath): - raise ValueError(f"Failed to create dataset_description.json at {filepath}") - - - -def create_derivatives_dataset_description(final_derivatives_dir: str) -> None: - """Create dataset_description.json for derivatives.""" - os.makedirs(final_derivatives_dir, exist_ok=True) - - derivatives_desc = { - "Name": "SAILS Phase III Home Videos - Preprocessed", - "BIDSVersion": "1.9.0", - "DatasetType": "derivative", - "GeneratedBy": [ - { - "Name": "Video Preprocessing Pipeline", - "Version": "1.0.0", - "Description": ( - "FFmpeg-based video stabilization, denoising, " - "and standardization pipeline with audio extraction" - ), - "CodeURL": "local", - } - ], - "SourceDatasets": [{"URL": "", "Version": "1.0.0"}], - "HowToAcknowledge": "Please cite the original study", - } - - filepath = os.path.join(final_derivatives_dir, "dataset_description.json") - with open(filepath, "w") as f: - json.dump(derivatives_desc, f, indent=4) - - if not os.path.exists(filepath): - raise ValueError(f"Failed to create derivatives dataset_description.json at {filepath}") - - -def create_readme(final_bids_root: str) -> None: - """Create README file for the BIDS dataset.""" - - os.makedirs(final_bids_root, exist_ok=True) - - readme_content = """# SAILS Phase III Home Videos BIDS Dataset - -## Overview -This dataset contains home videos from the SAILS Phase III study, -organized according to the Brain Imaging Data Structure (BIDS) specification. - -## Data Collection -Videos were collected from home environments during various activities. -Two main age groups were included: -- Session 01: 12-16 month old children -- Session 02: 34-38 month old children - -## Dataset Structure -### Raw Data -- sub-*/ses-*/beh/: Raw behavioral videos (converted to mp4) and event annotations -- sourcedata/: Original unmodified video files in their native formats - -### Derivatives -- derivatives/preprocessed/sub-*/ses-*/beh/: Processed videos and extracted audio - - Videos: Stabilized, denoised, standardized to 720p/30fps - - Audio: Extracted to 16kHz mono WAV format - -## Data Processing -All videos underwent standardized preprocessing including: -- Video stabilization using vidstab -- Denoising and quality enhancement -- Standardization to 720p resolution and 30fps -- Audio extraction for speech analysis - -## Behavioral Coding -Events files include annotations from csv file. - -## Task Labels -Task labels are derived from the Context column in the csv. -Videos without behavioral coding data use "unknown" task label. -""" - - filepath = os.path.join(final_bids_root, "README") - with open(filepath, "w") as f: - f.write(readme_content) - - # FIX: Verify file was created - if not os.path.exists(filepath): - raise ValueError(f"Failed to create README at {filepath}") - -def create_participants_files(processed_data: List[Dict[str, Any]], final_bids_root: str) -> None: - """Create participants.tsv and participants.json files.""" - processed_participants = set(entry["participant_id"] for entry in processed_data) - - participants_data = [] - for participant_id in sorted(processed_participants): - participants_data.append({ - 'participant_id': f'sub-{participant_id}', - 'age': 'n/a', - 'validity': 'n/a' - }) - - participants_df = pd.DataFrame(participants_data) - participants_df.to_csv(os.path.join(final_bids_root, "participants.tsv"), sep='\t', index=False, na_rep='n/a') - - participants_json = { - "participant_id": {"Description": "Unique participant identifier"}, - "age": {"Description": "Age information", "Units": "months"}, - "validity": {"Description": "data validity information"}, - } - - with open(os.path.join(final_bids_root, "participants.json"), "w") as f: - json.dump(participants_json, f, indent=4) - -def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: - """Print processing summary statistics.""" - - print("PROCESSING SUMMARY") - - - print(f"Successfully processed: {len(all_processed)} videos") - print(f"Failed to process: {len(all_failed)} videos") - print(f"Total videos attempted: {len(all_processed) + len(all_failed)}") - - if all_processed: - # Excel data availability - with_excel = sum(1 for entry in all_processed if entry.get('has_excel_data', False)) - without_excel = len(all_processed) - with_excel - print(f"\nData sources:") - print(f" With Excel behavioral data: {with_excel} videos") - print(f" With dummy behavioral data: {without_excel} videos") - - # Task distribution - task_counts = {} - participant_counts = {} - session_counts = {} - - for entry in all_processed: - task = entry['task_label'] - participant = entry['participant_id'] - session = entry['session_id'] - - task_counts[task] = task_counts.get(task, 0) + 1 - participant_counts[participant] = participant_counts.get(participant, 0) + 1 - session_counts[session] = session_counts.get(session, 0) + 1 - - print(f"\nTask distribution:") - for task, count in sorted(task_counts.items()): - print(f" {task}: {count} videos") - - print(f"\nSession distribution:") - for session, count in sorted(session_counts.items()): - print(f" Session {session}: {count} videos") - - print(f"\nUnique participants processed: {len(participant_counts)}") - - # Duration statistics - durations = [entry.get('duration_sec', 0) for entry in all_processed] - total_duration = sum(durations) - avg_duration = total_duration / len(durations) if durations else 0 - - print(f"\nDuration statistics:") - print(f" Total video duration: {total_duration/3600:.1f} hours") - print(f" Average video duration: {avg_duration/60:.1f} minutes") - - if all_failed: - print(f"\nFailed videos breakdown:") - error_types = {} - for entry in all_failed: - error = entry.get('error', 'Unknown error') - error_types[error] = error_types.get(error, 0) + 1 - - for error, count in sorted(error_types.items()): - print(f" {error}: {count} videos") - -def main(): - """Main function.""" - - if len(sys.argv) != 3: - print("Usage: python bids.py ") - sys.exit(1) - - # Configuration - EXCEL_FILE = "/orcd/data/satra/002/datasets/SAILS/data4analysis/Video Rating Data/SAILS_RATINGS_ALL_8.8.25.xlsx" - VIDEO_ROOT = "/orcd/data/satra/002/datasets/SAILS/Phase_III_Videos/Videos_from_external/" - OUTPUT_DIR = "/home/aparnabg/orcd/scratch/BIDS" - TARGET_RESOLUTION = "1280x720" - TARGET_FRAMERATE = 30 - - FINAL_BIDS_ROOT = os.path.join(OUTPUT_DIR, "final_bids-dataset") - FINAL_DERIVATIVES_DIR = os.path.join(FINAL_BIDS_ROOT, "derivatives", "preprocessed") - FINAL_SOURCEDATA_DIR = os.path.join(FINAL_BIDS_ROOT, "sourcedata") - - # Parse command line arguments - my_task_id = int(sys.argv[1]) - num_tasks = int(sys.argv[2]) - - # Create task-specific temp directory - TEMP_DIR = os.path.join(OUTPUT_DIR, str(my_task_id), "temp") - os.makedirs(TEMP_DIR, exist_ok=True) - - # Start timing - start_time = time.time() - - # Check if paths exist - if not os.path.exists(VIDEO_ROOT): - print(f"ERROR: Video root directory not found: {VIDEO_ROOT}") - sys.exit(1) - - if not os.path.exists(EXCEL_FILE): - print(f"ERROR: Excel file not found: {EXCEL_FILE}") - sys.exit(1) - - # Load Excel file - try: - excel_df = pd.read_excel(EXCEL_FILE) - excel_df.columns = excel_df.columns.str.strip() - safe_print(f"Loaded {len(excel_df)} rows from Excel file") - except Exception as e: - safe_print(f"ERROR: Failed to load Excel file: {e}") - sys.exit(1) - - # Discover videos - print("Discovering all video files from age folders") - all_videos = get_all_videos_from_age_folders(VIDEO_ROOT) - print(f"Found {len(all_videos)} video files in age-specific folders") - - if not all_videos: - print("ERROR: No video files found") - sys.exit(1) - - # Create BIDS structure files (only for task 0 to avoid conflicts) - if my_task_id == 0: - try: - safe_print("Creating BIDS structure files...") - create_dataset_description(FINAL_BIDS_ROOT) - create_derivatives_dataset_description(FINAL_DERIVATIVES_DIR) - create_readme(FINAL_BIDS_ROOT) - safe_print("Successfully created BIDS structure files") - except Exception as e: - safe_print(f"CRITICAL ERROR: Failed to create BIDS structure files: {e}") - sys.exit(1) - - # Divide videos among tasks - video_chunks = all_videos[my_task_id::num_tasks] - safe_print(f"Task {my_task_id}: Processing {len(video_chunks)} videos") - - # Process videos - all_processed = [] - all_failed = [] - - for i, video_info in enumerate(video_chunks, 1): - safe_print(f"Video {i}/{len(video_chunks)}") - - processed_entry, failed_entry = process_single_video( - video_info, excel_df, FINAL_BIDS_ROOT, FINAL_DERIVATIVES_DIR, - FINAL_SOURCEDATA_DIR, TEMP_DIR, TARGET_FRAMERATE, TARGET_RESOLUTION - ) - - if processed_entry: - all_processed.append(processed_entry) - if failed_entry: - all_failed.append(failed_entry) - - # Save processing logs - task_output_dir = os.path.join(OUTPUT_DIR, str(my_task_id)) - os.makedirs(task_output_dir, exist_ok=True) - - log_path = os.path.join(task_output_dir, "processing_log.json") - failed_path = os.path.join(task_output_dir, "not_processed.json") - - try: - with open(log_path, "w") as f: - json.dump(all_processed, f, indent=4, default=str) - - with open(failed_path, "w") as f: - json.dump(all_failed, f, indent=4, default=str) - except Exception as e: - safe_print(f"ERROR: Failed to save processing logs: {e}") - - # Clean up temp directory - if os.path.exists(TEMP_DIR): - shutil.rmtree(TEMP_DIR) - - # Print summary - end_time = time.time() - total_time = end_time - start_time - print_summary(all_processed, all_failed) - safe_print(f"Total processing time: {total_time/3600:.1f} hours ({total_time/60:.1f} minutes)") - - if all_processed: - avg_time_per_video = total_time / len(all_processed) - safe_print(f"Average time per video: {avg_time_per_video:.1f} seconds") - - safe_print("Processing complete") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/new_code/data_copy_to_bids.ipynb b/src/new_code/data_copy_to_bids.ipynb deleted file mode 100644 index 565bbab..0000000 --- a/src/new_code/data_copy_to_bids.ipynb +++ /dev/null @@ -1,261 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "23ebe559-c77e-4d3e-adab-ded275ce6cb5", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import sys\n", - "from pathlib import Path\n", - "import pandas as pd\n", - "import numpy as np\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1b027235-ead2-4209-bf05-cf3993310394", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def parse_duration_to_seconds(s):\n", - " if pd.isna(s):\n", - " return np.nan\n", - " s = str(s).strip()\n", - " if s == \"\":\n", - " return np.nan\n", - " parts = s.split(\":\")\n", - " try:\n", - " parts = [float(p) for p in parts]\n", - " except Exception:\n", - " # maybe already numeric string (seconds)\n", - " try:\n", - " return float(s)\n", - " except Exception:\n", - " return np.nan\n", - " # seconds only\n", - " if len(parts) == 1:\n", - " return parts[0]\n", - " # mm:ss(.fraction)\n", - " if len(parts) == 2:\n", - " m, sec = parts\n", - " return m * 60.0 + sec\n", - " # hh:mm:ss\n", - " if len(parts) == 3:\n", - " h, m, sec = parts\n", - " return h * 3600.0 + m * 60.0 + sec\n", - " return np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "f9c6c300-2cf1-4bee-b99b-b92b42a77e2b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def parse_date(v):\n", - " if pd.isna(v):\n", - " return None\n", - " try:\n", - " return pd.to_datetime(v, utc=True, errors=\"coerce\")\n", - " except Exception:\n", - " return pd.to_datetime(v, errors=\"coerce\")\n", - "def compute_age_months(dob, vid_date):\n", - " if dob is None or pd.isna(dob) or vid_date is None or pd.isna(vid_date):\n", - " return np.nan\n", - " dob_ts = pd.to_datetime(dob, utc=True, errors=\"coerce\")\n", - " vd_ts = pd.to_datetime(vid_date, utc=True, errors=\"coerce\")\n", - " if pd.isna(dob_ts) or pd.isna(vd_ts):\n", - " return np.nan\n", - " # use total seconds to avoid .days/.seconds mix and convert to months (~30.4375 days)\n", - " seconds = (vd_ts - dob_ts).total_seconds()\n", - " months = seconds / (30.4375 * 86400.0)\n", - " return float(months)\n", - "def session_from_age_months(m):\n", - " if pd.isna(m):\n", - " return \"session_unknown\"\n", - " m = float(m)\n", - " if 12 <= m <= 16:\n", - " return \"12-16 months\"\n", - " if 34 <= m <= 38:\n", - " return \"34-38 months\"\n", - " # older mapping used in checkpoints: 14_month etc. keep human labels by default\n", - " if 10 <= m < 24:\n", - " return \"12-16 months\"\n", - " if 30 <= m < 40:\n", - " return \"34-38 months\"\n", - " return \"session_unknown\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "531a9948-98e0-40a5-9bda-c93bd1e23200", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c1029c4d-d577-43b3-9bb9-8c5999656cc3", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[OK] wrote: bids-dataset/participants.tsv\n" - ] - } - ], - "source": [ - "\n", - "out_dir = \".\"\n", - "p = Path(\"/orcd/data/satra/002/datasets/SAILS/data4analysis/Video Rating Data/SAILS_RATINGS_ALL_DEDUPLICATED_NotForFinalAnalyses_2025.10.csv\")\n", - "df = pd.read_csv(p, dtype=str)\n", - "\n", - "# normalize common column name variants\n", - "df.columns = [c.strip() for c in df.columns]\n", - "def col_first(df, candidates):\n", - " for c in candidates:\n", - " if c in df.columns:\n", - " return c\n", - " return None\n", - "\n", - "src_col = col_first(df, [\"SourceFile\", \"sourcefile\", \"Source_File\"])\n", - "id_col = col_first(df, [\"ID\", \"Id\", \"id\"])\n", - "fn_col = col_first(df, [\"FileName\", \"FileName\", \"filename\", \"File_Name\"])\n", - "dur_col = col_first(df, [\"Vid_duration\", \"Vid_Duration\", \"duration\"])\n", - "dob_col = col_first(df, [\"DOB\", \"Dob\", \"dob\"])\n", - "vd_col = col_first(df, [\"VideoDate\", \"VideoDate\", \"Video Date\", \"Video_Date\"])\n", - "age_col = col_first(df, [\"Age\", \"age\", \"age_in_months\", \"Age_in_months\"])\n", - "timepoint_col = col_first(df, [\"timepoint\", \"Timepoint\", \"TimePoint\"])\n", - "date_valid_col = col_first(df, [\"DateValidityScore\", \"Date Validity Score\", \"Date_Validity_Score\"])\n", - "\n", - "required = [src_col, id_col, fn_col]\n", - "if not all(required):\n", - " raise SystemExit(\"CSV missing one of required columns: SourceFile, ID, FileName\")\n", - "\n", - "out = pd.DataFrame()\n", - "out[\"sourcefile\"] = df[src_col].astype(str).str.strip()\n", - "out[\"video_filename\"] = df[fn_col].astype(str).apply(lambda s: Path(s).name)\n", - "out[\"participant_id\"] = df[id_col].astype(str).str.strip().str.upper()\n", - "\n", - "# duration parse\n", - "if dur_col:\n", - " out[\"duration\"] = df[dur_col].apply(parse_duration_to_seconds)\n", - "else:\n", - " out[\"duration\"] = pd.NA\n", - "\n", - "# video_date parse\n", - "if vd_col:\n", - " out[\"video_date\"] = df[vd_col].apply(parse_date).dt.tz_convert(None).dt.strftime(\"%Y-%m-%d\")\n", - "else:\n", - " out[\"video_date\"] = pd.NA\n", - "\n", - "out[\"date_validity_score\"] = df[date_valid_col] if date_valid_col else pd.NA\n", - "\n", - "ages = []\n", - "for i, row in df.iterrows():\n", - " dob = row.get(dob_col) if dob_col else None\n", - " vd = row.get(vd_col) if vd_col else None\n", - " age_m = compute_age_months(dob, vd) if dob or vd else np.nan\n", - " if np.isnan(age_m):\n", - " raw_age = row.get(age_col) if age_col else None\n", - " if raw_age is None or (isinstance(raw_age, float) and np.isnan(raw_age)) or str(raw_age).strip()==\"\":\n", - " age_m = np.nan\n", - " else:\n", - " try:\n", - " a = float(str(raw_age))\n", - " # heuristic: if a looks like years (<=6) convert to months\n", - " if a <= 6:\n", - " age_m = a * 12.0\n", - " else:\n", - " # assume already months\n", - " age_m = a\n", - " except Exception:\n", - " age_m = np.nan\n", - " ages.append(age_m)\n", - "out[\"age\"] = ages\n", - "\n", - "if timepoint_col and timepoint_col in df.columns:\n", - " out[\"session_id\"] = df[timepoint_col].fillna(\"\").astype(str).replace(\"\", \"session_unknown\")\n", - "else:\n", - " out[\"session_id\"] = out[\"age\"].apply(lambda m: session_from_age_months(m))\n", - "\n", - "out = out.drop_duplicates(subset=[\"participant_id\", \"video_filename\"])\n", - "bids_root = Path(out_dir) / \"bids-dataset\"\n", - "bids_root.mkdir(parents=True, exist_ok=True)\n", - "\n", - "participants_cols = [\"participant_id\", \"session_id\", \"video_filename\", \"duration\", \"age\", \"video_date\", \"date_validity_score\", \"sourcefile\"]\n", - "out.to_csv(bids_root / \"participants.tsv\", sep=\"\\t\", index=False, columns=participants_cols)\n", - "\n", - "participants_json = {\n", - " \"participant_id\": {\"Description\": \"Original alphanumeric participant identifier (without 'sub-').\"},\n", - " \"session_id\": {\"Description\": \"Session label (e.g., '12-16 months', '34-38 months').\"},\n", - " \"video_filename\": {\"Description\": \"Source video file name (basename).\"},\n", - " \"duration\": {\"Description\": \"Video duration (seconds).\", \"Units\": \"s\"},\n", - " \"age\": {\"Description\": \"Age at video time (months).\", \"Units\": \"months\"},\n", - " \"video_date\": {\"Description\": \"Date of the video (ISO YYYY-MM-DD).\"},\n", - " \"date_validity_score\": {\"Description\": \"Date validity score from SAILS ratings CSV.\"},\n", - " \"sourcefile\": {\"Description\": \"Original full source file path from SAILS CSV.\"},\n", - "}\n", - "import json\n", - "with open(bids_root / \"participants.json\", \"w\") as f:\n", - " json.dump(participants_json, f, indent=4)\n", - "\n", - "print(f\"[OK] wrote: {bids_root / 'participants.tsv'}\")\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "84c649ea-d321-4389-ae95-d86a011c5e40", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5e35c5b8-05a1-4445-819c-7d6e4be2106a", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/src/new_code/log_file.ipynb b/src/new_code/log_file.ipynb deleted file mode 100644 index b753187..0000000 --- a/src/new_code/log_file.ipynb +++ /dev/null @@ -1,597 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total processed: 3259\n", - "Total failed: 20\n", - "\n", - "Tasks processed:\n", - " bookshare: 56\n", - " dailyroutine: 252\n", - " generalsocialcommunicationinteraction: 1010\n", - " generalsocialinteraction: 1\n", - " motorplay: 497\n", - " other: 283\n", - " socialroutine: 158\n", - " specialoccasion: 135\n", - " toyplay: 564\n", - " unknown: 303\n", - "\n", - "Sessions:\n", - " Session 01: 1677\n", - " Session 02: 1582\n", - "\n", - "Failed videos by error type:\n", - " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-B1S3L8Q7Y6/ses-02/beh/sub-B1S3L8Q7Y6_ses-02_task-motorplay_run-03_desc-processed_beh.mp4':\n", - " Metadata:\n", - " major_brand : isom\n", - " minor_version : 512\n", - " compatible_brands: isomiso2avc1mp41\n", - " encoder : Lavf62.6.100\n", - " Duration: 00:00:01.87, start: 0.000000, bitrate: 2765 kb/s\n", - " Stream #0:0[0x1](und): Video: h264 (High) (avc1 / 0x31637661), yuvj420p(pc, smpte170m/smpte432/bt709, progressive), 540x720, 2758 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", - " Metadata:\n", - " handler_name : Core Media Video\n", - " vendor_id : [0][0][0][0]\n", - " encoder : Lavc62.15.100 libx264\n", - "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-B1S3L8Q7Y6/ses-02/beh/sub-B1S3L8Q7Y6_ses-02_task-motorplay_run-03_audio.wav':\n", - "[out#0/wav @ 0x5629c3cb82c0] Output file does not contain any stream\n", - "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-B1S3L8Q7Y6/ses-02/beh/sub-B1S3L8Q7Y6_ses-02_task-motorplay_run-03_audio.wav.\n", - "Error opening output files: Invalid argument\n", - ": 1\n", - " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-I8T4E2J3M6/ses-01/beh/sub-I8T4E2J3M6_ses-01_task-unknown_run-02_desc-processed_beh.mp4':\n", - " Metadata:\n", - " major_brand : isom\n", - " minor_version : 512\n", - " compatible_brands: isomiso2avc1mp41\n", - " encoder : Lavf62.6.100\n", - " Duration: 00:00:04.23, start: 0.000000, bitrate: 1096 kb/s\n", - " Stream #0:0[0x1](und): Video: h264 (High) (avc1 / 0x31637661), yuvj420p(pc, progressive), 406x720, 1092 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", - " Metadata:\n", - " handler_name : Core Media Video\n", - " vendor_id : [0][0][0][0]\n", - " encoder : Lavc62.15.100 libx264\n", - "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-I8T4E2J3M6/ses-01/beh/sub-I8T4E2J3M6_ses-01_task-unknown_run-02_audio.wav':\n", - "[out#0/wav @ 0x55dac7e00900] Output file does not contain any stream\n", - "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-I8T4E2J3M6/ses-01/beh/sub-I8T4E2J3M6_ses-01_task-unknown_run-02_audio.wav.\n", - "Error opening output files: Invalid argument\n", - ": 1\n", - " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-L5Q0Z1W8S2/ses-01/beh/sub-L5Q0Z1W8S2_ses-01_task-generalsocialcommunicationinteraction_run-01_desc-processed_beh.mp4':\n", - " Metadata:\n", - " major_brand : isom\n", - " minor_version : 512\n", - " compatible_brands: isomiso2avc1mp41\n", - " title : 10158604828739603\n", - " encoder : Lavf62.6.100\n", - " Duration: 00:00:07.90, start: 0.000000, bitrate: 984 kb/s\n", - " Stream #0:0[0x1](und): Video: h264 (High) (avc1 / 0x31637661), yuv420p(tv, smpte170m/bt470bg/smpte170m, progressive), 720x720, 981 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", - " Metadata:\n", - " handler_name : VideoHandler\n", - " vendor_id : [0][0][0][0]\n", - " encoder : Lavc62.15.100 libx264\n", - "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-L5Q0Z1W8S2/ses-01/beh/sub-L5Q0Z1W8S2_ses-01_task-generalsocialcommunicationinteraction_run-01_audio.wav':\n", - "[out#0/wav @ 0x555e1335df40] Output file does not contain any stream\n", - "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-L5Q0Z1W8S2/ses-01/beh/sub-L5Q0Z1W8S2_ses-01_task-generalsocialcommunicationinteraction_run-01_audio.wav.\n", - "Error opening output files: Invalid argument\n", - ": 1\n", - " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-T8P2Z1M9O9/ses-01/beh/sub-T8P2Z1M9O9_ses-01_task-motorplay_run-09_desc-processed_beh.mp4':\n", - " Metadata:\n", - " major_brand : isom\n", - " minor_version : 512\n", - " compatible_brands: isomiso2avc1mp41\n", - " encoder : Lavf62.6.100\n", - " Duration: 00:00:04.30, start: 0.000000, bitrate: 1678 kb/s\n", - " Stream #0:0[0x1](eng): Video: h264 (High) (avc1 / 0x31637661), yuv420p(tv, bt470bg/bt470bg/smpte170m, progressive), 408x720 [SAR 1:1 DAR 17:30], 1674 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", - " Metadata:\n", - " handler_name : Snap Video\n", - " vendor_id : [0][0][0][0]\n", - " encoder : Lavc62.15.100 libx264\n", - "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-T8P2Z1M9O9/ses-01/beh/sub-T8P2Z1M9O9_ses-01_task-motorplay_run-09_audio.wav':\n", - "[out#0/wav @ 0x560d6c5794c0] Output file does not contain any stream\n", - "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-T8P2Z1M9O9/ses-01/beh/sub-T8P2Z1M9O9_ses-01_task-motorplay_run-09_audio.wav.\n", - "Error opening output files: Invalid argument\n", - ": 1\n", - " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-W3O7N1N8U2/ses-01/beh/sub-W3O7N1N8U2_ses-01_task-toyplay_run-01_desc-processed_beh.mp4':\n", - " Metadata:\n", - " major_brand : isom\n", - " minor_version : 512\n", - " compatible_brands: isomiso2avc1mp41\n", - " encoder : Lavf62.6.100\n", - " Duration: 00:01:43.23, start: 0.000000, bitrate: 895 kb/s\n", - " Stream #0:0[0x1](und): Video: h264 (High) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 406x720, 892 kb/s, 30 fps, 30 tbr, 15360 tbn (default)\n", - " Metadata:\n", - " handler_name : Core Media Video\n", - " vendor_id : [0][0][0][0]\n", - " encoder : Lavc62.15.100 libx264\n", - "Output #0, wav, to '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-W3O7N1N8U2/ses-01/beh/sub-W3O7N1N8U2_ses-01_task-toyplay_run-01_audio.wav':\n", - "[out#0/wav @ 0x55de6b4854c0] Output file does not contain any stream\n", - "Error opening output file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-W3O7N1N8U2/ses-01/beh/sub-W3O7N1N8U2_ses-01_task-toyplay_run-01_audio.wav.\n", - "Error opening output files: Invalid argument\n", - ": 1\n", - " Audio extraction failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x563f29669880] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x563f29669880] moov atom not found\n", - "[in#0 @ 0x563f29669500] Error opening input: Invalid data found when processing input\n", - "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/derivatives/preprocessed/sub-D9N0U7M9X3/ses-02/beh/sub-D9N0U7M9X3_ses-02_task-specialoccasion_run-01_desc-processed_beh.mp4.\n", - "Error opening input files: Invalid data found when processing input\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-A2P7X9N8L7/ses-01/video/sub-A2P7X9N8L7_ses-01_task-generalsocialcommunicationinteraction_run-04_video..3gp':\n", - " Metadata:\n", - " major_brand : 3gp6\n", - " minor_version : 256\n", - " compatible_brands: 3gp6isom\n", - " creation_time : 2020-01-01T21:21:42.000000Z\n", - " Duration: 00:00:36.60, start: 0.000000, bitrate: 150 kb/s\n", - " Stream #0:0[0x1](und): Audio: amr_nb (amrnb) (samr / 0x726D6173), 8000 Hz, mono, fltp, 12 kb/s (default)\n", - " Metadata:\n", - " creation_time : 2020-01-01T21:21:42.000000Z\n", - " handler_name : Core Media Audio\n", - " vendor_id : [0][0][0][0]\n", - " Stream #0:1[0x2](und): Video: h264 (Baseline) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 320x240, 135 kb/s, 15 fps, 15 tbr, 600 tbn (default)\n", - " Metadata:\n", - " creation_time : 2020-01-01T21:21:42.000000Z\n", - " handler_name : Core Media Video\n", - " vendor_id : [0][0][0][0]\n", - "Stream mapping:\n", - " Stream #0:1 -> #0:0 (copy)\n", - " Stream #0:0 -> #0:1 (copy)\n", - "[mp4 @ 0x5632294e44c0] Could not find tag for codec amr_nb in stream #1, codec not currently supported in container\n", - "[out#0/mp4 @ 0x56322959b0c0] Could not write header (incorrect codec parameters ?): Invalid argument\n", - "Conversion failed!\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-H9T5Y8D3A1/ses-01/video/sub-H9T5Y8D3A1_ses-01_task-unknown_run-01_video..3gp':\n", - " Metadata:\n", - " major_brand : mp42\n", - " minor_version : 0\n", - " compatible_brands: isommp42\n", - " creation_time : 2018-02-18T18:45:45.000000Z\n", - " Duration: 00:00:29.46, start: 0.000000, bitrate: 148 kb/s\n", - " Stream #0:0[0x1](eng): Video: h263 (s263 / 0x33363273), yuv420p, 176x144 [SAR 12:11 DAR 4:3], 80 kb/s, SAR 1:1 DAR 11:9, 20.22 fps, 20.25 tbr, 90k tbn (default)\n", - " Metadata:\n", - " creation_time : 2018-02-18T18:45:45.000000Z\n", - " handler_name : VideoHandle\n", - " vendor_id : [0][0][0][0]\n", - " Side data:\n", - " Display Matrix: rotation of -90.00 degrees\n", - " Stream #0:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, stereo, fltp, 64 kb/s (default)\n", - " Metadata:\n", - " creation_time : 2018-02-18T18:45:45.000000Z\n", - " handler_name : SoundHandle\n", - " vendor_id : [0][0][0][0]\n", - "Stream mapping:\n", - " Stream #0:0 -> #0:0 (copy)\n", - " Stream #0:1 -> #0:1 (copy)\n", - "[mp4 @ 0x55d965e0eac0] Could not find tag for codec h263 in stream #0, codec not currently supported in container\n", - "[out#0/mp4 @ 0x55d965e4b480] Could not write header (incorrect codec parameters ?): Invalid argument\n", - "Conversion failed!\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-N3L7A1I2B9/ses-02/video/sub-N3L7A1I2B9_ses-02_task-generalsocialcommunicationinteraction_run-09_video..3gp':\n", - " Metadata:\n", - " major_brand : 3gp4\n", - " minor_version : 0\n", - " compatible_brands: isom3gp4\n", - " creation_time : 2022-01-14T18:19:46.000000Z\n", - " com.android.version: 11\n", - " com.android.capture.fps: 15.000000\n", - " Duration: 00:01:00.11, start: 0.000000, bitrate: 141 kb/s\n", - " Stream #0:0[0x1](eng): Video: h264 (Baseline) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 320x240, 127 kb/s, SAR 1:1 DAR 4:3, 14.93 fps, 15 tbr, 90k tbn, start 0.013200 (default)\n", - " Metadata:\n", - " creation_time : 2022-01-14T18:19:46.000000Z\n", - " handler_name : VideoHandle\n", - " vendor_id : [0][0][0][0]\n", - " Side data:\n", - " Display Matrix: rotation of -90.00 degrees\n", - " Stream #0:1[0x2](eng): Audio: amr_nb (amrnb) (samr / 0x726D6173), 8000 Hz, mono, fltp, 12 kb/s (default)\n", - " Metadata:\n", - " creation_time : 2022-01-14T18:19:46.000000Z\n", - " handler_name : SoundHandle\n", - " vendor_id : [0][0][0][0]\n", - "Stream mapping:\n", - " Stream #0:0 -> #0:0 (copy)\n", - " Stream #0:1 -> #0:1 (copy)\n", - "[mp4 @ 0x557146320880] Could not find tag for codec amr_nb in stream #1, codec not currently supported in container\n", - "[out#0/mp4 @ 0x55714639c580] Could not write header (incorrect codec parameters ?): Invalid argument\n", - "Conversion failed!\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[aist#0:1/pcm_u8 @ 0x556affb3acc0] Guessed Channel Layout: mono\n", - "Input #0, avi, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-Y5C5R8P9Z5/ses-01/video/sub-Y5C5R8P9Z5_ses-01_task-unknown_run-01_video..AVI':\n", - " Metadata:\n", - " creation_time : 2015-05-30 05:09:17\n", - " software : CanonMVI06\n", - " Duration: 00:00:27.20, start: 0.000000, bitrate: 9899 kb/s\n", - " Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 640x480, 9826 kb/s, 20 fps, 20 tbr, 20 tbn\n", - " Stream #0:1: Audio: pcm_u8 ([1][0][0][0] / 0x0001), 11024 Hz, mono, u8, 88 kb/s\n", - "Stream mapping:\n", - " Stream #0:0 -> #0:0 (copy)\n", - " Stream #0:1 -> #0:1 (copy)\n", - "[mp4 @ 0x556affb44d80] Could not find tag for codec pcm_u8 in stream #1, codec not currently supported in container\n", - "[out#0/mp4 @ 0x556affb44c40] Could not write header (incorrect codec parameters ?): Invalid argument\n", - "Conversion failed!\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[aist#0:1/pcm_u8 @ 0x556fb8d06f00] Guessed Channel Layout: mono\n", - "Input #0, avi, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-Y5C5R8P9Z5/ses-01/video/sub-Y5C5R8P9Z5_ses-01_task-unknown_run-03_video..AVI':\n", - " Metadata:\n", - " creation_time : 2015-05-31 22:51:06\n", - " software : CanonMVI06\n", - " Duration: 00:01:35.40, start: 0.000000, bitrate: 10003 kb/s\n", - " Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 640x480, 9917 kb/s, 20 fps, 20 tbr, 20 tbn\n", - " Stream #0:1: Audio: pcm_u8 ([1][0][0][0] / 0x0001), 11024 Hz, mono, u8, 88 kb/s\n", - "Stream mapping:\n", - " Stream #0:0 -> #0:0 (copy)\n", - " Stream #0:1 -> #0:1 (copy)\n", - "[mp4 @ 0x556fb8d08d00] Could not find tag for codec pcm_u8 in stream #1, codec not currently supported in container\n", - "[out#0/mp4 @ 0x556fb8d08bc0] Could not write header (incorrect codec parameters ?): Invalid argument\n", - "Conversion failed!\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[aist#0:1/pcm_u8 @ 0x559330724100] Guessed Channel Layout: mono\n", - "Input #0, avi, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-Y5C5R8P9Z5/ses-01/video/sub-Y5C5R8P9Z5_ses-01_task-unknown_run-02_video..AVI':\n", - " Metadata:\n", - " creation_time : 2015-05-31 07:14:18\n", - " software : CanonMVI06\n", - " Duration: 00:01:58.10, start: 0.000000, bitrate: 10032 kb/s\n", - " Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 640x480, 9945 kb/s, 20 fps, 20 tbr, 20 tbn\n", - " Stream #0:1: Audio: pcm_u8 ([1][0][0][0] / 0x0001), 11024 Hz, mono, u8, 88 kb/s\n", - "Stream mapping:\n", - " Stream #0:0 -> #0:0 (copy)\n", - " Stream #0:1 -> #0:1 (copy)\n", - "[mp4 @ 0x559330725f40] Could not find tag for codec pcm_u8 in stream #1, codec not currently supported in container\n", - "[out#0/mp4 @ 0x559330725e00] Could not write header (incorrect codec parameters ?): Invalid argument\n", - "Conversion failed!\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[aist#0:1/pcm_u8 @ 0x55e250233100] Guessed Channel Layout: mono\n", - "Input #0, avi, from '/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-Y5C5R8P9Z5/ses-01/video/sub-Y5C5R8P9Z5_ses-01_task-unknown_run-04_video..AVI':\n", - " Metadata:\n", - " creation_time : 2015-05-31 12:26:23\n", - " software : CanonMVI06\n", - " Duration: 00:02:01.90, start: 0.000000, bitrate: 9803 kb/s\n", - " Stream #0:0: Video: mjpeg (Baseline) (MJPG / 0x47504A4D), yuvj422p(pc, bt470bg/unknown/unknown), 640x480, 9716 kb/s, 20 fps, 20 tbr, 20 tbn\n", - " Stream #0:1: Audio: pcm_u8 ([1][0][0][0] / 0x0001), 11024 Hz, mono, u8, 88 kb/s\n", - "Stream mapping:\n", - " Stream #0:0 -> #0:0 (copy)\n", - " Stream #0:1 -> #0:1 (copy)\n", - "[mp4 @ 0x55e250234f40] Could not find tag for codec pcm_u8 in stream #1, codec not currently supported in container\n", - "[out#0/mp4 @ 0x55e250234e00] Could not write header (incorrect codec parameters ?): Invalid argument\n", - "Conversion failed!\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5564177205c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5564177205c0] moov atom not found\n", - "[in#0 @ 0x556417716f40] Error opening input: Invalid data found when processing input\n", - "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-S7F0B6H3A3/ses-02/video/sub-S7F0B6H3A3_ses-02_task-unknown_run-01_video..MOV.\n", - "Error opening input files: Invalid data found when processing input\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55bf4343e5c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x55bf4343e5c0] moov atom not found\n", - "[in#0 @ 0x55bf43434f40] Error opening input: Invalid data found when processing input\n", - "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-S7F0B6H3A3/ses-02/video/sub-S7F0B6H3A3_ses-02_task-unknown_run-01_video..MOV.\n", - "Error opening input files: Invalid data found when processing input\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x562269a195c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x562269a195c0] moov atom not found\n", - "[in#0 @ 0x562269a0ff40] Error opening input: Invalid data found when processing input\n", - "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-G3D3S2N7W8/ses-02/video/sub-G3D3S2N7W8_ses-02_task-unknown_run-01_video..MOV.\n", - "Error opening input files: Invalid data found when processing input\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5624281205c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x5624281205c0] moov atom not found\n", - "[in#0 @ 0x562428116f40] Error opening input: Invalid data found when processing input\n", - "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-S7F0B6H3A3/ses-02/video/sub-S7F0B6H3A3_ses-02_task-unknown_run-01_video..MOV.\n", - "Error opening input files: Invalid data found when processing input\n", - ": 1\n", - " FFmpeg conversion failed: ffmpeg version N-121159-g0bd5a7d371-20250921 Copyright (c) 2000-2025 the FFmpeg developers\n", - " built with gcc 15.2.0 (crosstool-NG 1.28.0.1_403899e)\n", - " configuration: --prefix=/ffbuild/prefix --pkg-config-flags=--static --pkg-config=pkg-config --cross-prefix=x86_64-ffbuild-linux-gnu- --arch=x86_64 --target-os=linux --enable-gpl --enable-version3 --disable-debug --enable-iconv --enable-zlib --enable-libxml2 --enable-libsoxr --enable-openssl --enable-libvmaf --enable-fontconfig --enable-libharfbuzz --enable-libfreetype --enable-libfribidi --enable-vulkan --enable-libshaderc --enable-libvorbis --enable-libxcb --enable-xlib --enable-libpulse --enable-opencl --enable-gmp --enable-lzma --enable-amf --enable-libaom --enable-libaribb24 --enable-avisynth --enable-chromaprint --enable-libdav1d --enable-libdavs2 --enable-libdvdread --enable-libdvdnav --disable-libfdk-aac --enable-ffnvcodec --enable-cuda-llvm --enable-frei0r --enable-libgme --enable-libkvazaar --enable-libaribcaption --enable-libass --enable-libbluray --enable-libjxl --enable-libmp3lame --enable-libopus --enable-libplacebo --enable-librist --enable-libssh --enable-libtheora --enable-libvpx --enable-libwebp --enable-libzmq --enable-lv2 --enable-libvpl --enable-openal --enable-liboapv --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenh264 --enable-libopenjpeg --enable-libopenmpt --enable-librav1e --enable-librubberband --disable-schannel --enable-sdl2 --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtwolame --enable-libuavs3d --enable-libdrm --enable-vaapi --enable-libvidstab --enable-libvvenc --enable-whisper --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxvid --enable-libzimg --enable-libzvbi --extra-cflags=-DLIBTWOLAME_STATIC --extra-cxxflags= --extra-libs='-lgomp -ldl' --extra-ldflags=-pthread --extra-ldexeflags=-pie --cc=x86_64-ffbuild-linux-gnu-gcc --cxx=x86_64-ffbuild-linux-gnu-g++ --ar=x86_64-ffbuild-linux-gnu-gcc-ar --ranlib=x86_64-ffbuild-linux-gnu-gcc-ranlib --nm=x86_64-ffbuild-linux-gnu-gcc-nm --extra-version=20250921\n", - " libavutil 60. 13.100 / 60. 13.100\n", - " libavcodec 62. 15.100 / 62. 15.100\n", - " libavformat 62. 6.100 / 62. 6.100\n", - " libavdevice 62. 2.100 / 62. 2.100\n", - " libavfilter 11. 9.100 / 11. 9.100\n", - " libswscale 9. 3.100 / 9. 3.100\n", - " libswresample 6. 2.100 / 6. 2.100\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x564ce80415c0] Format mov,mp4,m4a,3gp,3g2,mj2 detected only with low score of 1, misdetection possible!\n", - "[mov,mp4,m4a,3gp,3g2,mj2 @ 0x564ce80415c0] moov atom not found\n", - "[in#0 @ 0x564ce8037f40] Error opening input: Invalid data found when processing input\n", - "Error opening input file /home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset/sourcedata/sub-K6O1J5K7Z0/ses-01/video/sub-K6O1J5K7Z0_ses-01_task-unknown_run-01_video..mov.\n", - "Error opening input files: Invalid data found when processing input\n", - ": 1\n", - " Unreadable or unsupported video format: 2\n" - ] - } - ], - "source": [ - "import json\n", - "import os\n", - "import pandas as pd\n", - "\n", - "# paths\n", - "BASE_DIR = \"/home/aparnabg/orcd/scratch/bidsdata\"\n", - "BIDS_ROOT = \"/home/aparnabg/orcd/scratch/bidsdata/final_bids-dataset\"\n", - "\n", - "def merge_logs():\n", - " all_processed, all_failed = [], []\n", - " task_dirs = sorted(\n", - " int(item) for item in os.listdir(BASE_DIR)\n", - " if item.isdigit() and os.path.isdir(os.path.join(BASE_DIR, item))\n", - " )\n", - " \n", - " for task_id in task_dirs:\n", - " task_dir = os.path.join(BASE_DIR, str(task_id))\n", - " processed_file = os.path.join(task_dir, \"processing_log.json\")\n", - " failed_file = os.path.join(task_dir, \"not_processed.json\")\n", - "\n", - " if os.path.exists(processed_file):\n", - " with open(processed_file, 'r') as f:\n", - " all_processed.extend(json.load(f))\n", - " \n", - " if os.path.exists(failed_file):\n", - " with open(failed_file, 'r') as f:\n", - " all_failed.extend(json.load(f))\n", - " \n", - " return all_processed, all_failed\n", - "\n", - "processed_data, failed_data = merge_logs()\n", - "\n", - "# Save all logs\n", - "with open(os.path.join(BASE_DIR, \"all_processing_log.json\"), 'w') as f:\n", - " json.dump(processed_data, f, indent=2, default=str)\n", - "\n", - "with open(os.path.join(BASE_DIR, \"all_not_processed.json\"), 'w') as f:\n", - " json.dump(failed_data, f, indent=2, default=str)\n", - "\n", - "# Create participants files\n", - "if processed_data:\n", - " participant_ids = {\n", - " entry['participant_id'] for entry in processed_data if 'participant_id' in entry\n", - " }\n", - "\n", - " participants_data = [\n", - " {\"participant_id\": f\"sub-{pid}\", \"age\": \"n/a\", \"validity\": \"n/a\"}\n", - " for pid in sorted(participant_ids)\n", - " ]\n", - " \n", - " pd.DataFrame(participants_data).to_csv(\n", - " os.path.join(BIDS_ROOT, \"participants.tsv\"), \n", - " sep='\\t', index=False, na_rep='n/a'\n", - " )\n", - " \n", - " participants_json = {\n", - " \"participant_id\": {\"Description\": \"Unique participant identifier\"},\n", - " \"age\": {\"Description\": \"Age information\", \"Units\": \"months\"}, \n", - " \"validity\": {\"Description\": \"Data validity information\"}\n", - " }\n", - " \n", - " with open(os.path.join(BIDS_ROOT, \"participants.json\"), 'w') as f:\n", - " json.dump(participants_json, f, indent=2)\n", - "\n", - "# Summary\n", - "print(f\"Total processed: {len(processed_data)}\")\n", - "print(f\"Total failed: {len(failed_data)}\")\n", - "\n", - "if processed_data:\n", - " task_counts = {}\n", - " for entry in processed_data:\n", - " task = entry.get('task_label', 'unknown')\n", - " task_counts[task] = task_counts.get(task, 0) + 1\n", - " \n", - " print(\"\\nTasks processed:\")\n", - " for task, count in sorted(task_counts.items()):\n", - " print(f\" {task}: {count}\")\n", - " \n", - " session_counts = {}\n", - " for entry in processed_data:\n", - " session = entry.get('session_id', 'unknown')\n", - " session_counts[session] = session_counts.get(session, 0) + 1\n", - " \n", - " print(\"\\nSessions:\")\n", - " for session, count in sorted(session_counts.items()):\n", - " print(f\" Session {session}: {count}\")\n", - "\n", - "if failed_data:\n", - " error_counts = {}\n", - " for entry in failed_data:\n", - " error = entry.get('error', 'Unknown')\n", - " error_counts[error] = error_counts.get(error, 0) + 1\n", - " \n", - " print(\"\\nFailed videos by error type:\")\n", - " for error, count in sorted(error_counts.items()):\n", - " print(f\" {error}: {count}\")\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/src/new_code/submit_bids.sh b/src/new_code/submit_bids.sh deleted file mode 100644 index 872c1ce..0000000 --- a/src/new_code/submit_bids.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=bids_processing -#SBATCH --partition=mit_normal -#SBATCH --array=0-90 -#SBATCH --output=logs/bids_%A_%a.out -#SBATCH --error=logs/bids_%A_%a.err -#SBATCH --mem=5G -#SBATCH --time=10:00:00 -#SBATCH --cpus-per-task=5 - -mkdir -p logs - -module load miniforge - - -source $(conda info --base)/etc/profile.d/conda.sh - -eval "$(conda shell.bash hook)" - -conda activate data_env - -echo "Python executable: $(which python)" -echo "Python version: $(python --version)" - -echo "Starting video processing for task $SLURM_ARRAY_TASK_ID" -python /home/aparnabg/bids.py $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_TASK_COUNT - -echo "Job completed at: $(date)" From 59b66e306f82b3bf2db946525cffb9744cd469ce Mon Sep 17 00:00:00 2001 From: lucie271 Date: Thu, 30 Oct 2025 17:08:16 -0400 Subject: [PATCH 10/36] Final script for BIDS conversion added --- src/BIDS_convertor.py | 2142 +++++++++++++++++++++++++---------------- 1 file changed, 1294 insertions(+), 848 deletions(-) diff --git a/src/BIDS_convertor.py b/src/BIDS_convertor.py index 635b163..957ab32 100644 --- a/src/BIDS_convertor.py +++ b/src/BIDS_convertor.py @@ -14,22 +14,20 @@ * check with actual data """ -# Standard library imports import json import os -import plistlib import re import shutil -import struct import subprocess +import sys +import time from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union -# Third-party imports +import cv2 import pandas as pd import yaml -from dateutil import parser def load_configuration(config_path: str = "config.yaml") -> Dict[str, Any]: @@ -51,17 +49,24 @@ def load_configuration(config_path: str = "config.yaml") -> Dict[str, Any]: # Load configuration -config = load_configuration() +config_path = ( + Path(__file__).resolve().parents[1] / "configs" / "config_bids_convertor.yaml" +) +config = load_configuration(str(config_path)) +# Unpack configuration +ANNOTATION_FILE = config["annotation_file"] VIDEO_ROOT = config["video_root"] -ASD_CSV = config["asd_csv"] -NONASD_CSV = config["nonasd_csv"] OUTPUT_DIR = config["output_dir"] -TARGET_RESOLUTION = config.get("target_resolution", "1280x720") -TARGET_FRAMERATE = config.get("target_fps", 30) - +TARGET_RESOLUTION = config["target_resolution"] +TARGET_FRAMERATE = config["target_framerate"] +ASD_STATUS_FILE = config["asd_status"] # BIDS directory structure -BIDS_ROOT = os.path.join(OUTPUT_DIR, "bids-dataset") -DERIVATIVES_DIR = os.path.join(BIDS_ROOT, "derivatives", "preprocessed") +FINAL_BIDS_ROOT = os.path.join( + OUTPUT_DIR, config.get("final_bids_root", "final_bids-dataset") +) +FINAL_DERIVATIVES_DIR = os.path.join( + FINAL_BIDS_ROOT, config.get("derivatives_subdir", "derivatives/preprocessed") +) def create_bids_structure() -> None: @@ -74,325 +79,508 @@ def create_bids_structure() -> None: This function creates directories with exist_ok=True to prevent errors if directories already exist. """ - os.makedirs(BIDS_ROOT, exist_ok=True) - os.makedirs(DERIVATIVES_DIR, exist_ok=True) + os.makedirs(FINAL_BIDS_ROOT, exist_ok=True) + os.makedirs(FINAL_DERIVATIVES_DIR, exist_ok=True) -def create_dataset_description() -> None: - """Create dataset_description.json for main BIDS dataset. +def save_json(data: Union[List[Any], Dict[str, Any]], path: str) -> None: + """Save data to JSON file. - Generates the required dataset description file according to BIDS - specification, containing metadata about the dataset including name, - version, authors, and description. + Utility function to save Python data structures to JSON files with + proper formatting and error handling. + + Args: + data (list or dict): Data structure to save as JSON. + path (str): Output file path for JSON file. Raises: - IOError: If unable to write the dataset description file. - """ - dataset_desc = { - "Name": "Home Videos", - "BIDSVersion": "1.10.0", - "HEDVersion": "8.2.0", - "DatasetType": "raw", - "License": "", - "Authors": ["Research Team"], - "Acknowledgements": "participants and families", - "HowToAcknowledge": "", - "Funding": ["", "", ""], - "EthicsApprovals": [""], - "ReferencesAndLinks": ["", "", ""], - "DatasetDOI": "doi:", - } + IOError: If unable to write to the specified path. + TypeError: If data contains non-serializable objects. - with open(os.path.join(BIDS_ROOT, "dataset_description.json"), "w") as f: - json.dump(dataset_desc, f, indent=4) + Note: + Uses 4-space indentation for readable JSON output. + """ + with open(path, "w") as f: + json.dump(data, f, indent=4) -def create_derivatives_dataset_description() -> None: - """Create dataset_description.json for derivatives. +def safe_print(message: str) -> None: + """Print with timestamps.""" + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"{timestamp} [MAIN] {message}") - Generates the dataset description file for the derivatives directory, - documenting the preprocessing pipeline and source datasets. - Raises: - IOError: If unable to write the derivatives dataset description file. - """ - derivatives_desc = { - "Name": "Home Videos", - "BIDSVersion": "1.10.0", - "DatasetType": "derivative", - "GeneratedBy": [ - { - "Name": "Video Preprocessing Pipeline", - "Version": "1.0.0", - "Description": ( - "FFmpeg-based video stabilization, denoising, " - "and standardization pipeline" - ), - "CodeURL": "local", - } - ], - "SourceDatasets": [{"DOI": "", "URL": "", "Version": "1.0.0"}], - "HowToAcknowledge": "Please cite the original study", - } +# Helper functions +def parse_duration(duration_str: str) -> float: + """Parse duration string to seconds.""" + try: + if pd.isna(duration_str) or duration_str == "": + return 0.0 + duration_str = str(duration_str) + if ":" in duration_str: + parts = duration_str.split(":") + if len(parts) == 3: + hours = int(parts[0]) + minutes = int(parts[1]) + seconds = float(parts[2]) + return hours * 3600 + minutes * 60 + seconds + elif len(parts) == 2: + minutes = int(parts[0]) + seconds = float(parts[1]) + return minutes * 60 + seconds + return float(duration_str) + except (ValueError, TypeError): + return 0.0 + + +def make_bids_task_label(task_name: str) -> str: + """Convert TaskName to BIDS-compatible task label for filenames.""" + s = str(task_name).strip() + s = re.sub(r"[^0-9a-zA-Z+]", "", s) # Keep only alphanumeric and + + return s + + +def get_video_properties(video_path: str) -> dict: + """Extract video properties using OpenCV.""" + try: + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + return {"SamplingFrequency": None, "Resolution": None} + + fps = cap.get(cv2.CAP_PROP_FPS) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap.release() + + return { + "SamplingFrequency": fps, + "Resolution": f"{width}x{height}", + } - derivatives_path = os.path.join(DERIVATIVES_DIR, "dataset_description.json") - with open(derivatives_path, "w") as f: - json.dump(derivatives_desc, f, indent=4) + except Exception as e: + print(f"Error reading video {video_path}: {e}") + return {"SamplingFrequency": None, "Resolution": None} -def create_readme() -> None: - """Create README file for the BIDS dataset. +def determine_session_from_folder(folder_name: str) -> Optional[str]: + """Determine the session ID from a folder name based on known age-related patterns. - Generates a comprehensive README file documenting the dataset structure, - organization, processing pipeline, and usage instructions following - BIDS best practices. + Args: + folder_name (str): The name of the folder to check. - Raises: - IOError: If unable to write the README file. + Returns: + Optional[str]: "01" for 12–16 month sessions, "02" for 34–38 month sessions, + or None if no match. """ - readme_content = """# README - -This README serves as the primary guide for researchers using this BIDS-format dataset. - -## Details Related to Access to the Data + folder_lower = folder_name.lower() + + # Session 01 patterns + if any( + pattern in folder_lower + for pattern in [ + "12-16 month", + "12-14 month", + "12_16", + "12_14", + "12-16month", + "12-14month", + "12-16_month_videos", + ] + ): + return "01" -### Data User Agreement + # Session 02 patterns (typos and variants included) + if any( + pattern in folder_lower + for pattern in [ + "34-38 month", + "34-28 month", + "34-48 month", + "34_38", + "34_28", + "34_48", + "34-38month", + "34-28month", + "34-48month", + "34-38_month_videos", + ] + ): + return "02" -### Contact Person -- Name: -- Email: -- ORCID: + return None -### Practical Information to Access the Data -## Overview +def find_age_folder_session(current_path: str, participant_path: str) -> Optional[str]: + """Recursively seek the timepoint folder. -### Project Information -- Project Name: [If applicable] -- Years: [YYYY-YYYY] + Args: + current_path (str): Current directory path to inspect. + participant_path (str): Root path of the participant. -### Dataset Description -This dataset contains [brief description of data types and sample size]. + Returns: + Optional[str]: Session ID ("01" or "02") if detected, else None. + """ + if ( + not current_path.startswith(participant_path) + or current_path == participant_path + ): + return None -### Experimental Design + current_folder = os.path.basename(current_path) + session_id = determine_session_from_folder(current_folder) + if session_id: + return session_id + parent_path = os.path.dirname(current_path) + return find_age_folder_session(parent_path, participant_path) -### Quality Assessment -[Summary statistics or QC metrics] -## Methods +def extract_participant_id_from_folder(folder_name: str) -> str: + """Extract the participant ID from folder names. -### Subjects -[Description of participant pool] + Args: + folder_name (str): Folder name containing participant info. -#### Recruitment -[Recruitment procedures] + Returns: + str: Extracted participant ID. + """ + if "AMES_" in folder_name: + parts = folder_name.split("AMES_") + if len(parts) > 1: + return parts[1].strip() -#### Inclusion Criteria -1. [Criterion 1] -2. [Criterion 2] + if "_" in folder_name: + return folder_name.split("_")[-1] -#### Exclusion Criteria -1. [Criterion 1] -2. [Criterion 2] + return folder_name -### Apparatus -[Equipment and environment details] -### Initial Setup -[Pre-session procedures] +def determine_session_from_excel( + current_path: str, annotation_df: pd.DataFrame, participant_id: str +) -> Optional[str]: + """Determine the session ID for a video based on the annotation file. -### Task Organization -- Counterbalancing: [Yes/No] -- Session Structure: - 1. [Activity 1] - 2. [Activity 2] + Args: + current_path (str): Path to the video file. + annotation_df (pd.DataFrame): Excel data containing 'ID', + 'FileName', 'timepoint', and 'Age' columns. + participant_id (str): Participant identifier. -### Task Details + Returns: + Optional[str]: Session ID ("01" or "02"), or None if not found. + """ + filename = os.path.splitext(os.path.basename(current_path))[0] + # Filter for the participant + participant_excel = annotation_df[ + annotation_df["ID"].astype(str) == str(participant_id) + ] + if participant_excel.empty: + raise ValueError( + f"Participant ID '{participant_id}' not found in Excel metadata" + f" for file '{filename}'." + ) -### Additional Data Acquired + # Match the video filename (without extension) + mask = participant_excel["FileName"].str.split(".").str[0] == filename + video_entry = participant_excel[mask] + if video_entry.empty: + raise ValueError( + f"No matching Excel entry found for video '{filename}'" + f"(participant {participant_id})." + ) -### Experimental Location -[Facility/geographic details] + timepoint = video_entry["timepoint"].iloc[0] + age = video_entry["Age"].iloc[0] -### Missing Data -- Participant [ID]: [Issue description] -- Participant [ID]: [Issue description] + # Normalize timepoint to string for pattern matching + timepoint_str = str(timepoint) -### Notes -[Any additional relevant information] + if "14" in timepoint_str: + return "01" + elif "36" in timepoint_str: + return "02" + elif pd.notna(age): + return "01" if age < 2 else "02" + else: + raise ValueError( + f"Unable to determine session ID: timepoint={timepoint}, age={age}" + ) -""" - with open(os.path.join(BIDS_ROOT, "README"), "w") as f: - f.write(readme_content) +def find_session_id( + directory: str, + current_path: str, + participant_path: str, + annotation_df: pd.DataFrame, + participant_id: str, + excel: bool = True, +) -> Optional[str]: + """Determine session ID by checking folder names first, then Excel data if needed. + Args: + directory (str): Current directory being scanned. + current_path (str): Full path to the file. + participant_path (str): Root participant directory. + annotation_df (pd.DataFrame): Excel metadata. + participant_id (str): Participant identifier. + excel (bool) : Whether to use Excel data for session determination. -def get_session_from_path(video_path: Union[str, Path]) -> str: - """Determine session ID based on video path. + Returns: + Optional[str]: Session ID ("01" or "02"), or None. + """ + if ( + not current_path.startswith(participant_path) + or current_path == participant_path + ): + return None - Analyzes the video file path to determine which session (age group) - the video belongs to based on folder naming conventions. + try: + folder_name = os.path.basename(directory) + session_id = determine_session_from_folder(folder_name) - Args: - video_path (str or Path): Path to the video file. + if not session_id and excel: + session_id = determine_session_from_excel( + current_path, annotation_df, participant_id + ) - Returns: - str: Session ID ('01' for 12-16 months, '02' for 34-38 months). + if session_id: + return session_id + + # Recurse upward if not found + parent_path = os.path.dirname(directory) + if parent_path != directory: + return find_session_id( + parent_path, + current_path, + participant_path, + annotation_df, + participant_id, + False, + ) - Note: - Defaults to session '01' if no clear age group indicator is found. - """ - path_str = str(video_path).lower() - if "12-16 month" in path_str: - return "01" - elif "34-38 month" in path_str: - return "02" - else: - # Fallback - try to infer from folder structure - return "01" # Default to session 01 + except PermissionError: + print(f"Permission denied: {current_path}") + except Exception as e: + print(f"Error accessing {current_path}: {e}") + return None -def create_bids_filename( - participant_id: int, session_id: str, suffix: str, extension: str -) -> str: - """Create BIDS-compliant filename. - Generates standardized filenames following BIDS naming conventions - for participant data files. +def find_videos_recursive( + directory: str, + participant_path: str, + annotation_df: pd.DataFrame, + participant_id: str, +) -> List[Tuple[str, Optional[str]]]: + """Recursively find video files and determine their session IDs. Args: - participant_id (int): Numeric participant identifier. - session_id (str): Session identifier (e.g., '01', '02'). - suffix (str): File type suffix (e.g., 'beh', 'events'). - extension (str): File extension without dot (e.g., 'mp4', 'tsv'). + directory (str): Directory to search in. + participant_path (str): Root path of the participant. + annotation_df (pd.DataFrame): Excel data for metadata lookup. + participant_id (str): Participant identifier. Returns: - str: BIDS-compliant filename. - - Example: - >>> create_bids_filename(123, '01', 'beh', 'mp4') - 'sub-123_ses-01_task-play_beh.mp4' + List[Tuple[str, Optional[str]]]: List of (video_path, session_id) pairs. """ - return f"sub-{participant_id:02d}_ses-{session_id}_task-play_{suffix}.{extension}" + videos = [] + try: + for item in os.listdir(directory): + if item.startswith("."): + continue # Skip hidden files + + item_path = os.path.join(directory, item) + if os.path.isfile(item_path) and item.lower().endswith( + (".mp4", ".mov", ".avi", ".mkv", ".m4v", ".3gp", ".mts") + ): + session_id = find_session_id( + directory, + item_path, + participant_path, + annotation_df, + participant_id, + ) + videos.append((item_path, session_id)) + + elif os.path.isdir(item_path): + videos.extend( + find_videos_recursive( + item_path, participant_path, annotation_df, participant_id + ) + ) + + except PermissionError: + print(f"Permission denied: {directory}") + except Exception as e: + print(f"Error accessing {directory}: {e}") + + return videos -def read_demographics(asd_csv: str, nonasd_csv: str) -> pd.DataFrame: - """Read and combine demographics data from CSV files. - Loads participant demographics from separate ASD and non-ASD CSV files, - combines them, and standardizes column names. +def get_all_videos(video_root: str, annotation_df: pd.DataFrame) -> List[dict]: + """Find and label all participant videos with their corresponding session IDs. Args: - asd_csv (str): Path to ASD participants CSV file. - nonasd_csv (str): Path to non-ASD participants CSV file. + video_root (str): Root directory containing all participant folders. + annotation_df (pd.DataFrame): Excel data with metadata. Returns: - pd.DataFrame: Combined demographics dataframe with standardized column names. - - Raises: - FileNotFoundError: If either CSV file is not found. - pd.errors.EmptyDataError: If CSV files are empty. + List[dict]: List of video metadata dictionaries. """ - df_asd = pd.read_csv(asd_csv) - df_nonasd = pd.read_csv(nonasd_csv) - df = pd.concat([df_asd, df_nonasd], ignore_index=True) - df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_") - return df + all_videos = [] + try: + for participant_folder in os.listdir(video_root): + participant_path = os.path.join(video_root, participant_folder) + if not os.path.isdir(participant_path): + continue -def create_participants_files( - demographics_df: pd.DataFrame, processed_data: List[Dict[str, Any]] -) -> None: - """Create participants.tsv and participants.json files. + participant_id = extract_participant_id_from_folder(participant_folder) + if not participant_id: + continue - Generates BIDS-compliant participant information files including - a TSV file with participant data and a JSON data dictionary. + videos = find_videos_recursive( + participant_path, participant_path, annotation_df, participant_id + ) - Args: - demographics_df (pd.DataFrame): Demographics dataframe. - processed_data (list): List of processed video data dictionaries. + for video_path, session_id in videos: + if session_id in {"01", "02"}: + all_videos.append( + { + "participant_id": participant_id, + "filename": os.path.basename(video_path), + "full_path": video_path, + "session_id": session_id, + "age_folder": os.path.basename(os.path.dirname(video_path)), + } + ) - Raises: - IOError: If unable to write participant files. - """ - # Get unique participants from processed data - processed_participants = set() - for entry in processed_data: - processed_participants.add(entry["bids_participant_id"]) + except Exception as e: + print(f"Error scanning video folders: {e}") + + return all_videos + + +def create_dummy_excel_data( + video_path: str, participant_id: str, session_id: str, task_label: str = "unknown" +) -> dict[str, str]: + """Create dummy behavioral data for videos not in Excel file.""" + video_filename = os.path.basename(video_path) + + dummy_row_data = { + "ID": participant_id, + "FileName": video_filename, + "Context": task_label, + "Location": "n/a", + "Activity": "n/a", + "Child_of_interest_clear": "n/a", + "#_adults": "n/a", + "#_children": "n/a", + "#_people_background": "n/a", + "Interaction_with_child": "n/a", + "#_people_interacting": "n/a", + "Child_constrained": "n/a", + "Constraint_type": "n/a", + "Supports": "n/a", + "Support_type": "n/a", + "Example_support_type": "n/a", + "Gestures": "n/a", + "Gesture_type": "n/a", + "Vocalizations": "n/a", + "RMM": "n/a", + "RMM_type": "n/a", + "Response_to_name": "n/a", + "Locomotion": "n/a", + "Locomotion_type": "n/a", + "Grasping": "n/a", + "Grasp_type": "n/a", + "Body_Parts_Visible": "n/a", + "Angle_of_Body": "n/a", + "time_point": "n/a", + "DOB": "n/a", + "Vid_date": "n/a", + "Video_Quality_Child_Face_Visibility": "n/a", + "Video_Quality_Child_Body_Visibility": "n/a", + "Video_Quality_Child_Hand_Visibility": "n/a", + "Video_Quality_Lighting": "n/a", + "Video_Quality_Resolution": "n/a", + "Video_Quality_Motion": "n/a", + "Coder": "n/a", + "SourceFile": "n/a", + "Vid_duration": "00:00:00", + "Notes": "Video not found in Excel file - behavioral data unavailable", + } - # Filter demographics for only processed participants - participants_data = [] - for _, row in demographics_df.iterrows(): - participant_id = str(row["dependent_temporary_id"]).upper() - # Create consistent numeric ID - bids_id = f"sub-{hash(participant_id) % 10000:04d}" - - if bids_id in processed_participants: - participants_data.append( - { - "participant_id": bids_id, - "age": row.get("dependent_dob", "n/a"), - "sex": row.get("sex", "n/a"), - "group": ( - "ASD" - if "asd" in str(row.get("diagnosis", "")).lower() - else "NonASD" - ), - } - ) + return dummy_row_data - # Create participants.tsv - participants_df = pd.DataFrame(participants_data) - participants_df.to_csv( - os.path.join(BIDS_ROOT, "participants.tsv"), sep="\t", index=False + +def get_task_from_excel_row(row: pd.Series) -> str: + """Extract and create task label from Excel row data.""" + context = str(row.get("Context", "")).strip() + + if context and context.lower() not in ["nan", "n/a", ""]: + return make_bids_task_label(context) + else: + return "unknown" + + +def get_next_run_number( + participant_id: str, session_id: str, task_label: str, final_bids_root: str +) -> int: + """Find the next available run number for this participant/session/task.""" + beh_dir = os.path.join( + final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh" ) - # Create participants.json (data dictionary) - participants_json = { - "participant_id": {"Description": "Unique participant identifier"}, - "age": {"Description": "Date of birth", "Units": "YYYY-MM-DD"}, - "sex": { - "Description": "Biological sex of participant", - "Levels": {"M": "male", "F": "female"}, - }, - "group": { - "Description": "Participant group classification", - "Levels": { - "ASD": "Autism Spectrum Disorder", - "NonASD": "Not Autism Spectrum Disorder", - }, - }, - } + if not os.path.exists(beh_dir): + return 1 - with open(os.path.join(BIDS_ROOT, "participants.json"), "w") as f: - json.dump(participants_json, f, indent=4) + # Look for existing files with this task + pattern = f"sub-{participant_id}_ses-{session_id}_task-{task_label}_" + existing_files = [f for f in os.listdir(beh_dir) if f.startswith(pattern)] + if not existing_files: + return 1 -def extract_exif(video_path: str) -> Dict[str, Any]: - """Extract video metadata using ffprobe. + # Extract run numbers from existing files + run_numbers = [] + for filename in existing_files: + if "_run-" in filename: + run_part = filename.split("_run-")[1].split("_")[0] + try: + run_numbers.append(int(run_part)) + except ValueError: + continue + else: + run_numbers.append(1) # Files without run numbers are considered run-1 - Uses FFmpeg's ffprobe tool to extract comprehensive metadata from video files - including format information, stream details, and embedded timestamps. + return max(run_numbers) + 1 if run_numbers else 1 - Args: - video_path (str): Path to the video file. - Returns: - dict: Dictionary containing extracted metadata including duration, - bit rate, format information, and date/time tags. +def create_bids_filename( + participant_id: str, + session_id: str, + task_label: str, + suffix: str, + extension: str, + run_id: int = 1, +) -> str: + """Create BIDS-compliant filename w run identifier for multiple videos per task.""" + return ( + f"sub-{participant_id}_" + f"ses-{session_id}_" + f"task-{task_label}_" + f"run-{run_id:02d}_" + f"{suffix}.{extension}" + ) - Note: - Returns error information in the dictionary if ffprobe fails - or if the video format is unsupported. - Example: - >>> metadata = extract_exif('/path/to/video.mp4') - >>> print(metadata['duration_sec']) - 120.5 - """ +# Video processing functions +def extract_exif(video_path: str) -> Dict[str, Any]: + """Extract video metadata using ffprobe.""" try: cmd = [ "ffprobe", @@ -407,177 +595,57 @@ def extract_exif(video_path: str) -> Dict[str, Any]: result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: return {"ffprobe_error": result.stderr.strip()} + metadata = json.loads(result.stdout) extracted = {} - # Format-level metadata + format_info = metadata.get("format", {}) extracted["filename"] = format_info.get("filename") extracted["format"] = format_info.get("format_long_name") extracted["duration_sec"] = float(format_info.get("duration", 0)) extracted["bit_rate"] = int(format_info.get("bit_rate", 0)) extracted["size_bytes"] = int(format_info.get("size", 0)) - # Date/time-related tags from format - extracted["format_dates"] = {} - if "tags" in format_info: - for k, v in format_info["tags"].items(): - if "date" in k.lower() or "time" in k.lower(): - extracted["format_dates"][k] = v - # Loop through all streams (video, audio, etc.) - extracted["stream_dates"] = [] - for stream in metadata.get("streams", []): - stream_entry = {} - if "tags" in stream: - for k, v in stream["tags"].items(): - if "date" in k.lower() or "time" in k.lower(): - stream_entry[k] = v - if stream_entry: - extracted["stream_dates"].append(stream_entry) + return extracted except Exception as e: return {"error": str(e)} -def extract_date_from_filename(filename: str) -> Optional[str]: - """Extract date from filename using various patterns. - - Attempts to parse dates from video filenames using multiple common - date formats and patterns, including Facebook/Instagram formats - and standard date conventions. - - Args: - filename (str): Video filename to parse. - - Returns: - str or None: Formatted date string in "YYYY:MM:DD HH:MM:SS" format, - or None if no valid date pattern is found. - - Note: - This function tries multiple date formats and patterns to maximize - compatibility with various naming conventions used by different - devices and platforms. - - Example: - >>> extract_date_from_filename('video_2023-12-25.mp4') - '2023:12:25 00:00:00' - """ - try: - name = os.path.splitext(os.path.basename(filename))[0] - # Try direct known formats - known_formats = [ - "%m-%d-%Y", - "%m-%d-%y", - "%m_%d_%Y", - "%m_%d_%y", - "%Y-%m-%d", - "%Y%m%d", - "%m%d%Y", - ] - for fmt in known_formats: - try: - return datetime.strptime(name, fmt).strftime("%Y:%m:%d %H:%M:%S") - except ValueError: - continue - # Try extracting from YYYYMMDD_HHMMSS or FB_/IMG_ formats - match = re.search(r"(20\d{6})[_\-]?(?:([01]\d{3,4}))?", name) - if match: - date_str = match.group(1) - time_str = match.group(2) if match.group(2) else "000000" - if len(time_str) == 4: # HHMM - time_str += "00" - dt = datetime.strptime(date_str + time_str, "%Y%m%d%H%M%S") - return dt.strftime("%Y:%m:%d %H:%M:%S") - # Try M-D-YYYY, D-M-YYYY fallback - fallback = re.match(r"(\d{1,2})[\-_](\d{1,2})[\-_](\d{2,4})", name) - if fallback: - m, d, y = fallback.groups() - if len(y) == 2: - y = "20" + y # assume 20xx - try: - dt = datetime.strptime(f"{m}-{d}-{y}", "%m-%d-%Y") - return dt.strftime("%Y:%m:%d %H:%M:%S") - except ValueError: - pass - try: - dt = datetime.strptime(f"{d}-{m}-{y}", "%d-%m-%Y") - return dt.strftime("%Y:%m:%d %H:%M:%S") - except ValueError: - pass - raise ValueError("No valid date format found in filename.") - except Exception as e: - print(f"Could not extract date from filename {filename}: {e}") - return None - - -def calculate_age(dob_str: str, video_date: datetime) -> Optional[float]: - """Calculate age in months at time of video. - - Computes the participant's age in months at the time the video was recorded - based on their date of birth and the video recording date. - - Args: - dob_str (str): Date of birth string in parseable format. - video_date (datetime): Date when the video was recorded. - - Returns: - float or None: Age in months (rounded to 1 decimal place), - or None if calculation fails. - - Note: - Uses 30.44 days per month for calculation to account for - varying month lengths. - - Example: - >>> from datetime import datetime - >>> dob = "2022-01-15" - >>> video_dt = datetime(2023, 1, 15) - >>> calculate_age(dob, video_dt) - 12.0 - """ - try: - dob = parser.parse(dob_str) - delta = video_date - dob - age_months = round(delta.days / 30.44, 1) - return age_months - except Exception: - return None - - -def stabilize_video(input_path: str, stabilized_path: str) -> None: - """Stabilize video using ffmpeg vidstab. - - Applies video stabilization using FFmpeg's vidstab filter to reduce - camera shake and improve video quality for analysis. - - Args: - input_path (str): Path to input video file. - stabilized_path (str): Path for output stabilized video file. - - Note: - This function uses a two-pass approach: first detecting motion - vectors, then applying stabilization transforms. Temporary - transform files are automatically cleaned up. +def stabilize_video(input_path: str, stabilized_path: str, temp_dir: str) -> None: + """Stabilize video using FFmpeg vidstab filters, with error checks.""" + os.makedirs(temp_dir, exist_ok=True) + transforms_file = os.path.join(temp_dir, "transforms.trf") - Todo: - Add error handling for FFmpeg execution failures. - """ + # Step 1: Detect transforms detect_cmd = [ "ffmpeg", + "-y", "-i", input_path, "-vf", - "vidstabdetect=shakiness=5:accuracy=15", + f"vidstabdetect=shakiness=5:accuracy=15:result={transforms_file}", "-f", "null", "-", ] - subprocess.run(detect_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + print(f"[DEBUG] Running: {' '.join(detect_cmd)}") + detect_proc = subprocess.run(detect_cmd, capture_output=True, text=True) + + if detect_proc.returncode != 0: + print(f"[ERROR] vidstabdetect failed for {input_path}:\n{detect_proc.stderr}") + raise RuntimeError(f"FFmpeg vidstabdetect failed for {input_path}") + + if not os.path.exists(transforms_file): + raise FileNotFoundError(f"Transform file not created: {transforms_file}") + + # Step 2: Apply transforms transform_cmd = [ "ffmpeg", "-y", "-i", input_path, "-vf", - "vidstabtransform=smoothing=30:input=transforms.trf", + f"vidstabtransform=smoothing=30:input={transforms_file}", "-c:v", "libx264", "-preset", @@ -588,89 +656,96 @@ def stabilize_video(input_path: str, stabilized_path: str) -> None: "copy", stabilized_path, ] - subprocess.run(transform_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - if os.path.exists("transforms.trf"): - os.remove("transforms.trf") + print(f"[DEBUG] Running: {' '.join(transform_cmd)}") + transform_proc = subprocess.run(transform_cmd, capture_output=True, text=True) + if transform_proc.returncode != 0: + print( + f"[ERROR] vidstabtransform failed for {input_path}:" + f"\n{transform_proc.stderr}" + ) + raise RuntimeError(f"FFmpeg vidstabtransform failed for {input_path}") -def preprocess_video(input_path: str, output_path: str) -> None: - """Preprocess video with stabilization, denoising, and standardization. + if not os.path.exists(stabilized_path): + raise FileNotFoundError(f"Stabilized video not created: {stabilized_path}") - Applies a comprehensive video processing pipeline including stabilization, - denoising, color equalization, and format standardization to prepare - videos for behavioral analysis. + # Cleanup + os.remove(transforms_file) - Args: - input_path (str): Path to input video file. - output_path (str): Path for output processed video file. - Note: - The processing pipeline includes: - - Video stabilization using vidstab - - Deinterlacing using yadif - - Noise reduction using hqdn3d - - Color equalization - - Resolution scaling to 720p - - Frame rate standardization - - H.264 encoding with optimized settings - - Todo: - Add progress reporting for long video processing tasks. - """ - stabilized_tmp = input_path.replace(".mp4", "_stab.mp4").replace( - ".mov", "_stab.mov" - ) - stabilize_video(input_path, stabilized_tmp) - vf_filters = ( - "yadif," - "hqdn3d," - "eq=contrast=1.0:brightness=0.0:saturation=1.0," - "scale=-2:720," - "pad=ceil(iw/2)*2:ceil(ih/2)*2," - f"fps={TARGET_FRAMERATE}" - ) - cmd = [ - "ffmpeg", - "-y", - "-i", - stabilized_tmp, - "-vf", - vf_filters, - "-c:v", - "libx264", - "-crf", - "23", - "-preset", - "fast", - "-c:a", - "aac", - "-b:a", - "128k", - "-movflags", - "+faststart", - output_path, - ] - subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - os.remove(stabilized_tmp) +def preprocess_video(input_path: str, output_path: str, temp_dir: str) -> None: + """Preprocess video with stabilization, denoising, and standardization.""" + if not os.path.exists(input_path): + raise ValueError(f"Input video not found: {input_path}") + stabilized_tmp = os.path.join(temp_dir, f"stabilized_temp_{os.getpid()}.mp4") -def extract_audio(input_path: str, output_audio_path: str) -> None: - """Extract audio from video file. + try: + stabilize_video(input_path, stabilized_tmp, temp_dir) - Extracts audio track from processed video and converts it to standardized - format suitable for speech and audio analysis. + # Verify stabilization succeeded + if not os.path.exists(stabilized_tmp): + raise ValueError( + "Video stabilization failed - no intermediate file created" + ) - Args: - input_path (str): Path to input video file. - output_audio_path (str): Path for output audio file. + vf_filters = ( + "yadif," + "hqdn3d," + "eq=contrast=1.0:brightness=0.0:saturation=1.0," + "scale=-2:720," + "pad=ceil(iw/2)*2:ceil(ih/2)*2," + f"fps={TARGET_FRAMERATE}" + ) + + cmd = [ + "ffmpeg", + "-y", + "-i", + stabilized_tmp, + "-vf", + vf_filters, + "-c:v", + "libx264", + "-crf", + "23", + "-preset", + "fast", + "-c:a", + "aac", + "-b:a", + "128k", + "-movflags", + "+faststart", + output_path, + ] + + # Capture and check stderr + result = subprocess.run( + cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True + ) + if result.returncode != 0: + raise ValueError(f"Video processing failed: {result.stderr}") + + # Verify output file was created and has content + if not os.path.exists(output_path): + raise ValueError(f"Video processing failed - no output file: {output_path}") + if os.path.getsize(output_path) == 0: + raise ValueError( + f"Video processing failed - empty output file: {output_path}" + ) + + finally: + # Clean up temp file + if os.path.exists(stabilized_tmp): + os.remove(stabilized_tmp) + + +def extract_audio(input_path: str, output_audio_path: str) -> None: + """Extract audio from video file.""" + if not os.path.exists(input_path): + raise ValueError(f"Input video not found: {input_path}") - Note: - Audio is extracted with the following specifications: - - Sample rate: 16 kHz - - Channels: Mono (1 channel) - - Encoding: 16-bit PCM WAV - These settings are optimized for speech analysis applications. - """ cmd = [ "ffmpeg", "-y", @@ -685,129 +760,126 @@ def extract_audio(input_path: str, output_audio_path: str) -> None: "1", output_audio_path, ] - subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + # Check return code and stderr + result = subprocess.run( + cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True + ) + if result.returncode != 0: + raise ValueError(f"Audio extraction failed: {result.stderr}") -def parse_appledouble_metadata(metafile_path: str) -> Dict[str, Any]: - """Parse AppleDouble metadata files. - - Extracts metadata from macOS AppleDouble files (._filename) which contain - extended attributes, resource forks, and other file system metadata. + # Verify output file was created + if not os.path.exists(output_audio_path): + raise ValueError( + f"Audio extraction failed - no output file: {output_audio_path}" + ) - Args: - metafile_path (str): Path to AppleDouble metadata file. - Returns: - dict: Dictionary containing parsed metadata including extended attributes, - resource fork information, and Finder info when available. +def safe_float_conversion( + value: float | int | str | None, default: str = "n/a" +) -> float | str: + """Convert value to float, return default if conversion fails.""" + if value is None or pd.isna(value): + return default - Note: - AppleDouble files are created by macOS when files are copied to - non-HFS+ filesystems. They preserve metadata that would otherwise - be lost, including creation dates and extended attributes. + # Convert to string and check for common non-numeric indicators + str_val = str(value).strip().lower() + if str_val in ["", "n/a", "na", "nan", "none", "null"]: + return default - Example: - >>> metadata = parse_appledouble_metadata('._video.mp4') - >>> print(metadata.get('extended_attributes', {})) - """ try: - with open(metafile_path, "rb") as f: - content = f.read() - if not content.startswith(b"\x00\x05\x16\x07"): - return {"info": "Not AppleDouble format"} - entries = {} - entry_count = struct.unpack(">H", content[24:26])[0] - for i in range(entry_count): - entry_offset = 26 + (i * 12) - entry_id, offset, length = struct.unpack( - ">III", content[entry_offset : entry_offset + 12] - ) - entry_data = content[offset : offset + length] - # Extended attributes - if entry_id == 9: - if b"bplist" in entry_data: - try: - plist_start = entry_data.index(b"bplist") - plist_data = entry_data[plist_start:] - xattrs = plistlib.loads(plist_data) - for key, val in xattrs.items(): - if isinstance(val, bytes): - try: - val = plistlib.loads(val) - except Exception: - val = val.decode(errors="ignore") - key_str = key.decode() if isinstance(key, bytes) else key - entries[key_str] = val - except Exception as e: - entries["extended_attributes_error"] = str(e) - elif entry_id == 2: - entries["resource_fork_bytes"] = len(entry_data) - elif entry_id == 1: - entries["finder_info_present"] = True - if not entries: - return { - "info": "AppleDouble metadata detected", - "hex_preview": content[:64].hex(), - } - return entries - except Exception as e: - return {"error": f"Failed to parse AppleDouble: {e}"} + return float(value) + except (ValueError, TypeError): + return default -def create_events_tsv(video_metadata: Dict[str, Any], output_path: str) -> None: - """Create events.tsv file for video. - - Generates a BIDS-compliant events file documenting the timing and nature - of events in the video session. - - Args: - video_metadata (dict): Video metadata containing duration information. - output_path (str): Path for output events TSV file. - - Note: - For free play sessions, creates a single event spanning the entire - video duration with trial_type 'free_play'. +# BIDS file creation functions +def create_events_file( + group_df: pd.DataFrame, output_path: str, full_filepath: str +) -> None: + """Create events.tsv file from Excel data with all columns.""" + events_data = [] - Raises: - IOError: If unable to write the events file. - """ - events_data = [ - { + for idx, row in group_df.iterrows(): + event = { + "filepath_engaging": str(full_filepath), "onset": 0.0, - "duration": video_metadata.get("duration_sec", 0), - "trial_type": "free_play", - "response_time": "n/a", + "duration": parse_duration(row.get("Vid_duration", "00:00:00")), + "coder": str(row.get("Coder", "n/a")), + "source_file": str(row.get("SourceFile", "n/a")), + "context": str(row.get("Context", "n/a")), + "location": str(row.get("Location", "n/a")), + "activity": str(row.get("Activity", "n/a")), + "child_clear": str(row.get("Child_of_interest_clear", "n/a")), + "num_adults": str(row.get("#_adults", "n/a")), + "num_children": str(row.get("#_children", "n/a")), + "num_people_background": str(row.get("#_people_background", "n/a")), + "interaction_with_child": str(row.get("Interaction_with_child", "n/a")), + "num_people_interacting": str(row.get("#_people_interacting", "n/a")), + "child_constrained": str(row.get("Child_constrained", "n/a")), + "constraint_type": str(row.get("Constraint_type", "n/a")), + "supports": str(row.get("Supports", "n/a")), + "support_type": str(row.get("Support_type", "n/a")), + "example_support_type": str(row.get("Example_support_type", "n/a")), + "gestures": str(row.get("Gestures", "n/a")), + "gesture_type": str(row.get("Gesture_type", "n/a")), + "vocalizations": str(row.get("Vocalizations", "n/a")), + "rmm": str(row.get("RMM", "n/a")), + "rmm_type": str(row.get("RMM_type", "n/a")), + "response_to_name": str(row.get("Response_to_name", "n/a")), + "locomotion": str(row.get("Locomotion", "n/a")), + "locomotion_type": str(row.get("Locomotion_type", "n/a")), + "grasping": str(row.get("Grasping", "n/a")), + "grasp_type": str(row.get("Grasp_type", "n/a")), + "body_parts_visible": str(row.get("Body_Parts_Visible", "n/a")), + "angle_of_body": str(row.get("Angle_of_Body", "n/a")), + "timepoint": str(row.get("time_point", "n/a")), + "dob": str(row.get("DOB", "n/a")), + "vid_date": str(row.get("Vid_date", "n/a")), + "video_quality_face": safe_float_conversion( + row.get("Video_Quality_Child_Face_Visibility") + ), + "video_quality_body": safe_float_conversion( + row.get("Video_Quality_Child_Body_Visibility") + ), + "video_quality_hand": safe_float_conversion( + row.get("Video_Quality_Child_Hand_Visibility") + ), + "video_quality_lighting": safe_float_conversion( + row.get("Video_Quality_Lighting") + ), + "video_quality_resolution": safe_float_conversion( + row.get("Video_Quality_Resolution") + ), + "video_quality_motion": safe_float_conversion( + row.get("Video_Quality_Motion") + ), + "notes": str(row.get("Notes", "n/a")), } - ] + events_data.append(event) events_df = pd.DataFrame(events_data) - events_df.to_csv(output_path, sep="\t", index=False) + print(events_df) + events_df.to_csv(output_path, sep="\t", index=False, na_rep="n/a") def create_video_metadata_json( - metadata: Dict[str, Any], processing_info: Dict[str, Any], output_path: str + metadata: Dict[str, Any], + processing_info: Dict[str, Any], + task_info: Dict[str, Any], + output_path: str, ) -> None: - """Create JSON metadata file for processed video. - - Generates a BIDS-compliant JSON sidecar file containing video metadata, - processing parameters, and task information. - - Args: - metadata (dict): Original video metadata from ffprobe. - processing_info (dict): Information about processing steps applied. - output_path (str): Path for output JSON metadata file. - - Raises: - IOError: If unable to write the metadata file. - - Note: - The JSON file includes both technical specifications and processing - pipeline information required for reproducible analysis. - """ + """Create JSON metadata file for processed video with dynamic task info.""" video_json = { - "TaskName": "free_play", - "TaskDescription": "Free play session recorded at home", - "Instructions": "Natural play behavior in home environment", + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get( + "task_description", "Video recorded during behavioral session" + ), + "Instructions": task_info.get( + "instructions", "Natural behavior in home environment" + ), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), "SamplingFrequency": TARGET_FRAMERATE, "Resolution": TARGET_RESOLUTION, "ProcessingPipeline": { @@ -819,326 +891,700 @@ def create_video_metadata_json( }, "OriginalMetadata": metadata, } - - with open(output_path, "w") as f: - json.dump(video_json, f, indent=4) + save_json(video_json, output_path) -def create_audio_metadata_json(duration_sec: float, output_path: str) -> None: - """Create JSON metadata file for extracted audio. - - Generates a BIDS-compliant JSON sidecar file for audio files extracted - from video sessions, documenting technical specifications and task context. - - Args: - duration_sec (float): Duration of audio file in seconds. - output_path (str): Path for output JSON metadata file. - - Raises: - IOError: If unable to write the metadata file. - - Note: - Audio specifications are standardized for speech analysis: - 16kHz sampling rate, mono channel, 16-bit encoding. - """ +def create_audio_metadata_json( + duration_sec: float, task_info: Dict[str, Any], output_path: str +) -> None: + """Create JSON metadata file for extracted audio with dynamic task info.""" audio_json = { "SamplingFrequency": 16000, "Channels": 1, "SampleEncoding": "16bit", "Duration": duration_sec, - "TaskName": "free_play", - "TaskDescription": "Audio extracted from free play session", + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get( + "task_description", "Audio extracted from behavioral session" + ), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), } + save_json(audio_json, output_path) - with open(output_path, "w") as f: - json.dump(audio_json, f, indent=4) +def create_raw_video_json( + row: pd.Series, task_info: Dict[str, Any], video_path: str, output_path: str +) -> None: + """Create JSON metadata for raw video.""" + video_props = get_video_properties(video_path) -def process_videos( - video_root: str, demographics_df: pd.DataFrame -) -> Tuple[List[Dict[str, Any]], List[Union[str, Dict[str, Any]]]]: - """Process videos and organize in BIDS format. + video_json = { + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get( + "task_description", "Raw video from behavioral session" + ), + "SamplingFrequency": video_props.get("SamplingFrequency", "n/a"), + "Resolution": video_props.get("Resolution", "n/a"), + "OriginalFilename": str(row.get("FileName", "")), + "Duration": parse_duration(row.get("Vid_duration", "00:00:00")), + "RecordingDate": str(row.get("Vid_date", "n/a")), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), + "TimePoint": str(row.get("timepoint", "n/a")), + "SourceFile": str(row.get("SourceFile", "n/a")), + } + save_json(video_json, output_path) + + +def process_single_video( + video_info: Dict, + annotation_df: pd.DataFrame, + final_bids_root: str, + final_derivatives_dir: str, + temp_dir: str, +) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]: + """Process a single video with all BIDS structures.""" + participant_id = video_info["participant_id"] + filename = video_info["filename"] + session_id = video_info["session_id"] + input_video_path = video_info["full_path"] + safe_print(f"Processing: {participant_id}/{filename}") + filename_without_extension = os.path.splitext(filename)[0] + # Check if video exists in Excel or create dummy data - Main processing function that walks through video directories, processes - each video file, and organizes the results according to BIDS specification. + try: + # Check if video exists in Excel or create dummy data + participant_excel = annotation_df[ + annotation_df["ID"].astype(str) == str(participant_id) + ] + mask = ( + participant_excel["FileName"].str.split(".").str[0] + == filename_without_extension + ) + video_excel = participant_excel[mask] + if video_excel.empty: + # Create dummy data for missing Excel entries + dummy_data = create_dummy_excel_data( + input_video_path, participant_id, session_id + ) + video_excel = pd.DataFrame([dummy_data]) + has_excel_data = False + safe_print("No Excel data found - using dummy data") + else: + has_excel_data = True + + excel_row = video_excel.iloc[0] + task_label = get_task_from_excel_row(excel_row) + activity = excel_row.get("Activity", "unknown activity") + # Create task information + task_info = { + "task_name": task_label, + "task_description": f"Behavioral session: {activity}", + "instructions": "Natural behavior observation", + "context": str(excel_row.get("Context", "n/a")), + "activity": str(excel_row.get("Activity", "n/a")), + } - Args: - video_root (str): Root directory containing video files. - demographics_df (pd.DataFrame): DataFrame containing participant demographics. + # Create BIDS directory structure + raw_subj_dir = os.path.join( + final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh" + ) + deriv_subj_dir = os.path.join( + final_derivatives_dir, f"sub-{participant_id}", f"ses-{session_id}", "beh" + ) + + os.makedirs(raw_subj_dir, exist_ok=True) + os.makedirs(deriv_subj_dir, exist_ok=True) + + # Create BIDS filenames with run number + ext = os.path.splitext(filename)[1][1:] + run_number = get_next_run_number( + participant_id, session_id, task_label, final_bids_root + ) + + raw_video_name = create_bids_filename( + participant_id, session_id, task_label, "beh", "mp4", run_number + ) + processed_video_name = create_bids_filename( + participant_id, + session_id, + task_label, + "desc-processed_beh", + "mp4", + run_number, + ) + audio_name = create_bids_filename( + participant_id, session_id, task_label, "audio", "wav", run_number + ) + events_name = create_bids_filename( + participant_id, session_id, task_label, "events", "tsv", run_number + ) + + # File paths + raw_video_path = os.path.join(raw_subj_dir, raw_video_name) + processed_video_path = os.path.join(deriv_subj_dir, processed_video_name) + audio_path = os.path.join(deriv_subj_dir, audio_name) + events_path = os.path.join(raw_subj_dir, events_name) + + if not os.path.exists(raw_video_path): + if ext.lower() != ".mp4": + # Convert to mp4 without processing + cmd = [ + "ffmpeg", + "-y", + "-i", + input_video_path, + "-c", + "copy", + raw_video_path, + ] + result = subprocess.run( + cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True + ) + # Check return code and verify output file + if result.returncode != 0: + raise ValueError(f"FFmpeg conversion failed: {result.stderr}") + if not os.path.exists(raw_video_path): + raise ValueError( + f"FFmpeg did not create output file: {raw_video_path}" + ) + safe_print(" Converted to raw BIDS format") + else: + shutil.copy2(input_video_path, raw_video_path) + # FIX: Verify copy succeeded + if not os.path.exists(raw_video_path): + raise ValueError(f"Failed to copy to raw BIDS: {raw_video_path}") + safe_print(" Copied to raw BIDS") + + # Extract metadata from raw video + exif_data = extract_exif(raw_video_path) + if "error" in exif_data or "ffprobe_error" in exif_data: + raise ValueError("Unreadable or unsupported video format") + + # Process video for derivatives + if not os.path.exists(processed_video_path): + safe_print(" Starting video processing...") + preprocess_video(raw_video_path, processed_video_path, temp_dir) + # Verify processing succeeded + if not os.path.exists(processed_video_path): + raise ValueError( + f"Video processing failed - no output file: {processed_video_path}" + ) + if os.path.getsize(processed_video_path) == 0: + raise ValueError( + "Video processing failed- empty output file:" + f" {processed_video_path}" + ) + safe_print(" Video processing complete") + + if not os.path.exists(audio_path): + safe_print(" Extracting audio...") + extract_audio(processed_video_path, audio_path) + # Verify audio extraction succeeded + if not os.path.exists(audio_path): + raise ValueError( + f"Audio extraction failed - no output file: {audio_path}" + ) + if os.path.getsize(audio_path) == 0: + raise ValueError( + f"Audio extraction failed - empty output file: {audio_path}" + ) + safe_print(" Audio extraction complete") + + # Create events files + create_events_file(video_excel, events_path, input_video_path) + if not os.path.exists(events_path): + raise ValueError(f"Failed to create events file: {events_path}") + + # Create metadata JSON files + processing_info = { + "has_stabilization": True, + "has_denoising": True, + "has_equalization": True, + } - Returns: - tuple: A tuple containing: - - list: Successfully processed video entries with metadata - - list: Videos that failed processing with error information - (strings for simple failures, dicts for detailed errors) + # Raw video JSON + raw_video_json_path = raw_video_path.replace(".mp4", ".json") + create_raw_video_json( + excel_row, + task_info, + raw_video_path, + raw_video_json_path, + ) + if not os.path.exists(raw_video_json_path): + raise ValueError(f"Failed to create raw video JSON: {raw_video_json_path}") + + # Processed video JSON + processed_video_json_path = processed_video_path.replace(".mp4", ".json") + create_video_metadata_json( + exif_data, + processing_info, + task_info, + processed_video_json_path, + ) + if not os.path.exists(processed_video_json_path): + raise ValueError( + f"Failed to create processed video JSON: {processed_video_json_path}" + ) - Note: - This function performs the complete processing pipeline: - 1. Video discovery and metadata extraction - 2. Participant identification and matching - 3. BIDS directory structure creation - 4. Video processing (stabilization, denoising, standardization) - 5. Audio extraction - 6. Metadata file generation - - Todo: - Add parallel processing support for large video collections. - Implement progress reporting with estimated completion times. - """ - all_data = [] - not_processed: List[Union[str, Dict[str, Any]]] = [] - processed_files = set() - demographics_df["dependent_temporary_id"] = ( - demographics_df["dependent_temporary_id"].astype(str).str.upper() - ) + # Audio JSON + audio_json_path = audio_path.replace(".wav", ".json") + create_audio_metadata_json( + exif_data.get("duration_sec", 0), task_info, audio_json_path + ) + if not os.path.exists(audio_json_path): + raise ValueError(f"Failed to create audio JSON: {audio_json_path}") + + # Store processing information + entry = { + "participant_id": participant_id, + "session_id": session_id, + "task_label": task_label, + "original_video": input_video_path, + "raw_video_bids": raw_video_path, + "processed_video_bids": processed_video_path, + "audio_file_bids": audio_path, + "events_file_bids": events_path, + "filename": filename, + "age_folder": video_info["age_folder"], + "duration_sec": exif_data.get("duration_sec", 0), + "has_excel_data": has_excel_data, + "excel_metadata": excel_row.to_dict(), + "task_info": task_info, + "processing_info": processing_info, + } - for root, dirs, files in os.walk(video_root): - for file in files: - if file.startswith("._"): - real_name = file[2:] - real_path = os.path.join(root, real_name) - if os.path.exists(real_path): - metadata_path = os.path.join(root, file) - metadata_info = parse_appledouble_metadata(metadata_path) - print(f"[AppleDouble] Metadata for {real_name}: {metadata_info}") - continue # Skip ._ file itself - - # Skip unsupported formats - if not file.lower().endswith((".mov", ".mp4")): - print(f"[SKIP] Unsupported file type: {file}") - continue + safe_print(f" Successfully processed: {participant_id}/{filename}") + return entry, None - if file.lower().endswith((".mov", ".mp4")) and not file.startswith( - ".DS_Store" - ): - if file in processed_files: - continue - processed_files.add(file) - video_path = os.path.join(root, file) - - try: - print(f"[PROCESS] Processing file: {file}") - exif_data = extract_exif(video_path) - if "error" in exif_data or "ffprobe_error" in exif_data: - raise ValueError("Unreadable or unsupported video format") - - # Extract participant ID from folder structure - folder_parts = Path(video_path).parts - matching_folder = next( - ( - part - for part in folder_parts - if "_" in part - and part.upper().endswith( - tuple(demographics_df["dependent_temporary_id"].values) - ) - ), - None, - ) - if not matching_folder: - not_processed.append(video_path) - continue - - participant_id_str = matching_folder.split("_")[-1].upper() - demo_row = demographics_df[ - demographics_df["dependent_temporary_id"] == participant_id_str - ] - if demo_row.empty: - not_processed.append(video_path) - continue - - # Create consistent numeric participant ID for BIDS - bids_participant_id = f"sub-{hash(participant_id_str) % 10000:04d}" - bids_participant_num = hash(participant_id_str) % 10000 - - # Determine session from path - session_id = get_session_from_path(video_path) - - # Extract video date and calculate age - video_date_str = extract_date_from_filename(file) - if not video_date_str: - raise ValueError("Could not extract date from filename") - video_date = datetime.strptime(video_date_str, "%Y:%m:%d %H:%M:%S") - age = calculate_age(demo_row.iloc[0]["dependent_dob"], video_date) - - # Create BIDS directory structure for this participant/session - raw_subj_dir = os.path.join( - BIDS_ROOT, bids_participant_id, f"ses-{session_id}", "beh" - ) - deriv_subj_dir = os.path.join( - DERIVATIVES_DIR, bids_participant_id, f"ses-{session_id}", "beh" - ) - os.makedirs(raw_subj_dir, exist_ok=True) - os.makedirs(deriv_subj_dir, exist_ok=True) + except Exception as e: + safe_print(f" ERROR processing {input_video_path}: {str(e)}") + return None, {"video": input_video_path, "error": str(e)} - # Create BIDS filenames - raw_video_name = create_bids_filename( - bids_participant_num, session_id, "beh", "mp4" - ) - processed_video_name = create_bids_filename( - bids_participant_num, session_id, "desc-processed_beh", "mp4" - ) - audio_name = create_bids_filename( - bids_participant_num, session_id, "audio", "wav" - ) - events_name = create_bids_filename( - bids_participant_num, session_id, "events", "tsv" - ) - processed_events_name = create_bids_filename( - bids_participant_num, session_id, "desc-processed_events", "tsv" - ) - # File paths - raw_video_path = os.path.join(raw_subj_dir, raw_video_name) - processed_video_path = os.path.join( - deriv_subj_dir, processed_video_name - ) - audio_path = os.path.join(deriv_subj_dir, audio_name) - events_path = os.path.join(raw_subj_dir, events_name) - processed_events_path = os.path.join( - deriv_subj_dir, processed_events_name - ) +def create_dataset_description() -> None: + """Create dataset_description.json for main BIDS dataset.""" + dataset_desc = { + "Name": "SAILS Phase III Home Videos", + "BIDSVersion": "1.9.0", + "DatasetType": "raw", + "License": "na", + "Authors": ["Research Team"], + "Acknowledgements": "participants and families", + "HowToAcknowledge": "na", + "Funding": ["na"], + "EthicsApprovals": ["na"], + "ReferencesAndLinks": ["na"], + "DatasetDOI": "doi:", + } + try: + filepath = os.path.join(FINAL_BIDS_ROOT, "dataset_description.json") + save_json(dataset_desc, filepath) - # Copy raw video to BIDS structure - if not os.path.exists(raw_video_path): - shutil.copy2(video_path, raw_video_path) - - # Process video - if not os.path.exists(processed_video_path): - preprocess_video(video_path, processed_video_path) - - # Extract audio - if not os.path.exists(audio_path): - extract_audio(processed_video_path, audio_path) - - # Create events files - create_events_tsv(exif_data, events_path) - # Copy for derivatives - create_events_tsv(exif_data, processed_events_path) - - # Create metadata JSON files - processing_info = { - "has_stabilization": True, - "has_denoising": True, - "has_equalization": True, - } - - video_json_path = processed_video_path.replace(".mp4", ".json") - create_video_metadata_json( - exif_data, processing_info, video_json_path - ) + except Exception as e: + raise ValueError( + f"Failed to create dataset_description.json at {filepath}: {e}" + ) - audio_json_path = audio_path.replace(".wav", ".json") - create_audio_metadata_json( - exif_data.get("duration_sec", 0), audio_json_path - ) - # Look for associated AppleDouble metadata - apple_metadata = None - apple_file = os.path.join(os.path.dirname(video_path), f"._{file}") - if os.path.exists(apple_file): - apple_metadata = parse_appledouble_metadata(apple_file) - - entry = { - "original_participant_id": participant_id_str, - "bids_participant_id": bids_participant_id, - "session_id": session_id, - "original_video": video_path, - "raw_video_bids": raw_video_path, - "processed_video_bids": processed_video_path, - "audio_file_bids": audio_path, - "events_file_bids": events_path, - "video_date": video_date.isoformat(), - "age_months": age, - "duration_sec": exif_data.get("duration_sec", 0), - "metadata": exif_data, - "apple_metadata": apple_metadata, - "processing_info": processing_info, - } - all_data.append(entry) - - except Exception as e: - print(f"[ERROR] Failed to process {video_path}: {str(e)}") - not_processed.append({"video": video_path, "error": str(e)}) - - return all_data, not_processed +def create_derivatives_dataset_description() -> None: + """Create dataset_description.json for derivatives.""" + os.makedirs(FINAL_DERIVATIVES_DIR, exist_ok=True) + derivatives_desc = { + "Name": "SAILS Phase III Home Videos - Preprocessed", + "BIDSVersion": "1.9.0", + "DatasetType": "derivative", + "GeneratedBy": [ + { + "Name": "Video Preprocessing Pipeline", + "Version": "1.0.0", + "Description": ( + "FFmpeg-based video stabilization, denoising, " + "and standardization pipeline with audio extraction" + ), + "CodeURL": "local", + } + ], + "SourceDatasets": [{"URL": "", "Version": "1.0.0"}], + "HowToAcknowledge": "Please cite the original study", + } -def save_json(data: Union[List[Any], Dict[str, Any]], path: str) -> None: - """Save data to JSON file. + filepath = os.path.join(FINAL_DERIVATIVES_DIR, "dataset_description.json") + save_json(derivatives_desc, filepath) + if not os.path.exists(filepath): + raise ValueError( + f"Failed to create derivatives dataset_description.json at {filepath}" + ) - Utility function to save Python data structures to JSON files with - proper formatting and error handling. - Args: - data (list or dict): Data structure to save as JSON. - path (str): Output file path for JSON file. +def create_readme() -> None: + """Create README file for the BIDS dataset.""" + readme_content = """# SAILS Phase III Home Videos BIDS Dataset - Raises: - IOError: If unable to write to the specified path. - TypeError: If data contains non-serializable objects. +## Overview +This dataset contains home videos from the SAILS Phase III study, +organized according to the Brain Imaging Data Structure (BIDS) specification. - Note: - Uses 4-space indentation for readable JSON output. - """ - with open(path, "w") as f: - json.dump(data, f, indent=4) +## Requirements +The BIDS conversion and preprocessing pipeline can be run using Poetry +for dependency management. +However, note that the pipeline requires FFmpeg ≥ 6.0 compiled with the +vidstab library. -def main() -> None: - """Main processing function. +Because FFmpeg is not a Python package, it must be installed separately. +If you don’t have administrator privileges (e.g., on a cluster), you +can install the static binary locally as follows: - Orchestrates the complete BIDS video processing pipeline including - directory structure creation, dataset description generation, video - processing, and metadata file creation. +``` +cd ~ +wget https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz +tar -xJf ffmpeg-release-amd64-static.tar.xz +mv ffmpeg-*-static ffmpeg_static +export PATH="$HOME/ffmpeg_static:$PATH" +``` - This function serves as the entry point for the processing pipeline - and handles the overall workflow coordination. +To make this permanent, add the last line to your ~/.bashrc or ~/.bash_profile. - Raises: - Exception: Various exceptions may be raised during processing, - which are caught and reported appropriately. +You can verify that FFmpeg is correctly installed and supports video stabilization: - Note: - Processing progress and statistics are printed to stdout for - monitoring large batch operations. - - Example: - >>> main() - Starting BIDS format video processing... - [PROCESS] Processing file: video001.mp4 - ... - Processing complete! - Successfully processed: 45 videos - Failed to process: 2 videos +ffmpeg -version +ffmpeg -filters | grep vidstab + + +✅ Expected output: + +T.. vidstabdetect V->V Video stabilization analysis +T.. vidstabtransform V->V Video stabilization transform filter + +📦 Poetry Environment + +Once FFmpeg is installed and available in your PATH, install the Python +dependencies using Poetry (at the location of the root of the project): + +poetry install + +Verify that Poetry can access FFmpeg: + +which ffmpeg + +It should point to your local binary (e.g. $HOME/ffmpeg_static/ffmpeg). + +You might want to submit the script on Engaging using sbatch. We've +provided the sumbission files so you'll simply need to cd to the folder where +you can find this README and run : + +jid=$(sbatch --parsable submit_bids_updated.sh) +sbatch --dependency=afterok:$jid merge_cleanup.sh + +This will convert the raw video into BIDS format in a clean fashion. + +## Data Collection +Videos were collected from home environments during various activities. +Two main age groups were included: +- Session 01: 12-16 month old children +- Session 02: 34-38 month old children + +## Dataset Structure +### Raw Data +- sub-*/ses-*/beh/: Raw behavioral videos (converted to mp4) and event +annotations (contains also the original filepath of the video processed) + +### Derivatives +- derivatives/preprocessed/sub-*/ses-*/beh/: Processed videos and extracted audio + - Videos: Stabilized, denoised, standardized to 720p/30fps + - Audio: Extracted to 16kHz mono WAV format + +## Data Processing +All videos underwent standardized preprocessing including: +- Video stabilization using vidstab +- Denoising and quality enhancement +- Standardization to 720p resolution and 30fps +- Audio extraction for speech analysis + +## Behavioral Coding +Events files include annotations from csv file. + +## Task Labels +Task labels are derived from the Context column in the csv. +Videos without behavioral coding data use "unknown" task label. +""" + + filepath = os.path.join(FINAL_BIDS_ROOT, "README") + try: + with open(filepath, "w") as f: + f.write(readme_content) + except Exception as e: + raise ValueError(f"Failed to create README at {filepath}: {e}") + + +def create_participants_file( + processed_data: List[Dict[str, Any]], asd_status: pd.DataFrame, final_bids_root: str +) -> None: + """Create participants.tsv and participants.json files.""" + processed_participants = set(entry["participant_id"] for entry in processed_data) + + participants_data = [] + for participant_id in sorted(processed_participants): + asd_info = asd_status[asd_status["ID"].astype(str) == str(participant_id)] + participants_data.append( + { + "participant_id": f"sub-{participant_id}", + "group": asd_info["Group"].values[0] if not asd_info.empty else "n/a", + } + ) + + participants_df = pd.DataFrame(participants_data) + participants_df.to_csv( + os.path.join(final_bids_root, "participants.tsv"), + sep="\t", + index=False, + na_rep="n/a", + ) + + participants_json = { + "participant_id": {"Description": "Unique BIDS participant identifier"}, + "Group": {"Description": "ASD status"}, + } + + save_json(participants_json, os.path.join(final_bids_root, "participants.json")) + + +def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: + """Print processing summary statistics.""" + print("PROCESSING SUMMARY") + + print(f"Successfully processed: {len(all_processed)} videos") + print(f"Failed to process: {len(all_failed)} videos") + print(f"Total videos attempted: {len(all_processed) + len(all_failed)}") + + if all_processed: + # Excel data availability + with_excel = sum( + 1 for entry in all_processed if entry.get("has_excel_data", False) + ) + without_excel = len(all_processed) - with_excel + print("\nData sources:") + print(f" With Excel behavioral data: {with_excel} videos") + print(f" With dummy behavioral data: {without_excel} videos") + + # Task distribution + task_counts: dict[str, int] = {} + participant_counts: dict[str, int] = {} + session_counts: dict[str, int] = {} + + for entry in all_processed: + task = entry["task_label"] + participant = entry["participant_id"] + session = entry["session_id"] + task_counts[task] = task_counts.get(task, 0) + 1 + participant_counts[participant] = participant_counts.get(participant, 0) + 1 + session_counts[session] = session_counts.get(session, 0) + 1 + + print("\nTask distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count} videos") + + print("\nSession distribution:") + for session, count in sorted(session_counts.items()): + print(f" Session {session}: {count} videos") + + print(f"\nUnique participants processed: {len(participant_counts)}") + + # Duration statistics + durations = [entry.get("duration_sec", 0) for entry in all_processed] + total_duration = sum(durations) + avg_duration = total_duration / len(durations) if durations else 0 + + print("\nDuration statistics:") + print(f" Total video duration: {total_duration/3600:.1f} hours") + print(f" Average video duration: {avg_duration/60:.1f} minutes") + + if all_failed: + print("\nFailed videos breakdown:") + error_types: dict[str, int] = {} + for entry in all_failed: + error = entry.get("error", "Unknown error") + error_types[error] = error_types.get(error, 0) + 1 + + for error, count in sorted(error_types.items()): + print(f" {error}: {count} videos") + + +def merge_subjects(base_dir: str) -> None: + """Merge duplicated subjects folders.""" + paths_to_check = [ + Path(base_dir), + Path(base_dir) / "derivatives" / "preprocessed", + ] + + for folder in paths_to_check: + if not folder.exists(): + continue + + subs = [d for d in folder.iterdir() if d.is_dir() and d.name.startswith("sub-")] + sub_names = {d.name for d in subs} + + for sub in subs: + if sub.name.endswith(" 2"): + original_name = sub.name[:-1] # remove the '2' + original_path = folder / original_name + + if original_name in sub_names and original_path.exists(): + print(f"Merging {sub} → {original_path}") + + for item in sub.iterdir(): + dest = original_path / item.name + if item.is_dir(): + if not dest.exists(): + shutil.copytree(item, dest) + else: + # merge recursively if same session already exists + for subitem in item.iterdir(): + dest_sub = dest / subitem.name + if not dest_sub.exists(): + if subitem.is_dir(): + shutil.copytree(subitem, dest_sub) + else: + shutil.copy2(subitem, dest_sub) + else: + if not dest.exists(): + shutil.copy2(item, dest) + shutil.rmtree(sub) + else: + print(f"No base subject found for {sub}, skipping.") + + +def process_videos( + task_id: int, + num_tasks: int, + annotation_df: pd.DataFrame, + all_videos: list, + final_bids_root: str, + final_derivatives_dir: str, + output_dir: str, +) -> tuple[list, list]: + """Process the subset of videos assigned to this task. + + Returns: + (all_processed, all_failed) """ - print("Starting BIDS format video processing...") + safe_print(f"Task {task_id}: Processing videos...") + video_chunks = all_videos[task_id::num_tasks] + + if not video_chunks: + safe_print(f"No videos assigned to task {task_id}") + return [], [] + + temp_dir = os.path.join(output_dir, str(task_id), "temp") + os.makedirs(temp_dir, exist_ok=True) + + all_processed, all_failed = [], [] - # Create BIDS directory structure - create_bids_structure() + for i, video_info in enumerate(video_chunks, 1): + safe_print(f"[Task {task_id}] Video {i}/{len(video_chunks)}") + processed_entry, failed_entry = process_single_video( + video_info, + annotation_df, + final_bids_root, + final_derivatives_dir, + temp_dir, + ) + if processed_entry: + all_processed.append(processed_entry) + if failed_entry: + all_failed.append(failed_entry) - # Create dataset description files - create_dataset_description() - create_derivatives_dataset_description() + # Save per-task logs + task_dir = os.path.join(output_dir, str(task_id)) + os.makedirs(task_dir, exist_ok=True) + save_json(all_processed, os.path.join(task_dir, "processing_log.json")) + save_json(all_failed, os.path.join(task_dir, "not_processed.json")) - # Create README file - create_readme() + # Cleanup temp dir + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir) - # Read demographics and process videos - demographics_df = read_demographics(ASD_CSV, NONASD_CSV) - all_data, not_processed = process_videos(VIDEO_ROOT, demographics_df) + return all_processed, all_failed + + +def main() -> None: + """Main entry point for multi-task BIDS video processing.""" + if len(sys.argv) != 3: + print("Usage: python updated_bids.py ") + sys.exit(1) + + my_task_id = int(sys.argv[1]) + num_tasks = int(sys.argv[2]) + + start_time = time.time() + + # --- Validate paths --- + for path, label in [(VIDEO_ROOT, "Video root"), (ANNOTATION_FILE, "Excel file")]: + if not os.path.exists(path): + print(f"ERROR: {label} not found at {path}") + sys.exit(1) + + # --- Load metadata --- + try: + annotation_df = pd.read_csv(ANNOTATION_FILE) + annotation_df.columns = annotation_df.columns.str.strip() + safe_print(f"Loaded {len(annotation_df)} rows from Excel file") + except Exception as e: + safe_print(f"ERROR: Failed to load Excel file: {e}") + sys.exit(1) + + # --- Discover videos --- + safe_print("Discovering videos...") + all_videos = get_all_videos(VIDEO_ROOT, annotation_df) + if not all_videos: + safe_print("ERROR: No videos found.") + sys.exit(1) + safe_print(f"Found {len(all_videos)} video files.") + + # --- Create BIDS structure (only once) --- + if my_task_id == 0: + try: + safe_print("Creating BIDS structure files...") + create_bids_structure() + create_dataset_description() + create_derivatives_dataset_description() + create_readme() + except Exception as e: + safe_print(f"CRITICAL ERROR: Failed to create BIDS structure files: {e}") + sys.exit(1) + + # --- Process this task’s subset --- + all_processed, all_failed = process_videos( + my_task_id, + num_tasks, + annotation_df, + all_videos, + FINAL_BIDS_ROOT, + FINAL_DERIVATIVES_DIR, + OUTPUT_DIR, + ) + + # --- Final summary --- + total_time = time.time() - start_time + print_summary(all_processed, all_failed) + safe_print( + f"Total processing time: {total_time / 3600:.1f}" + f" hours ({total_time / 60:.1f} minutes)" + ) - # Create participants files - create_participants_files(demographics_df, all_data) + if all_processed: + avg_time = total_time / len(all_processed) + safe_print(f"Average time per video: {avg_time:.1f} seconds") - # Save processing logs - save_json(all_data, os.path.join(OUTPUT_DIR, "bids_processing_log.json")) - save_json(not_processed, os.path.join(OUTPUT_DIR, "bids_not_processed.json")) + merge_subjects(FINAL_BIDS_ROOT) - print("Processing complete!") - print(f"Successfully processed: {len(all_data)} videos") - print(f"Failed to process: {len(not_processed)} videos") - print(f"BIDS dataset created at: {BIDS_ROOT}") + # -- Load ASD status file + asd_status = pd.read_excel(ASD_STATUS_FILE) + create_participants_file(all_processed, asd_status, FINAL_BIDS_ROOT) + safe_print("Processing complete ✅") if __name__ == "__main__": From 27cb82684d8eb07e1b5816f75b036b995074946f Mon Sep 17 00:00:00 2001 From: lucie271 Date: Thu, 30 Oct 2025 17:11:31 -0400 Subject: [PATCH 11/36] Added poetry dependencies --- poetry.lock | 246 ++++++++++++++++++++++--------------------------- pyproject.toml | 4 + 2 files changed, 114 insertions(+), 136 deletions(-) diff --git a/poetry.lock b/poetry.lock index f853564..182a534 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "cfgv" @@ -135,6 +135,18 @@ files = [ {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, ] +[[package]] +name = "et-xmlfile" +version = "2.0.0" +description = "An implementation of lxml.xmlfile for the standard library" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"}, + {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, +] + [[package]] name = "exceptiongroup" version = "1.3.0" @@ -387,8 +399,7 @@ version = "2.2.6" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" -groups = ["dev"] -markers = "python_version == \"3.10\"" +groups = ["main", "dev"] files = [ {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, @@ -448,90 +459,40 @@ files = [ ] [[package]] -name = "numpy" -version = "2.3.2" -description = "Fundamental package for array computing in Python" +name = "opencv-python" +version = "4.12.0.88" +description = "Wrapper package for OpenCV python bindings." optional = false -python-versions = ">=3.11" -groups = ["dev"] -markers = "python_version >= \"3.11\"" +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "opencv-python-4.12.0.88.tar.gz", hash = "sha256:8b738389cede219405f6f3880b851efa3415ccd674752219377353f017d2994d"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:f9a1f08883257b95a5764bf517a32d75aec325319c8ed0f89739a57fae9e92a5"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:812eb116ad2b4de43ee116fcd8991c3a687f099ada0b04e68f64899c09448e81"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:51fd981c7df6af3e8f70b1556696b05224c4e6b6777bdd2a46b3d4fb09de1a92"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:092c16da4c5a163a818f120c22c5e4a2f96e0db4f24e659c701f1fe629a690f9"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-win32.whl", hash = "sha256:ff554d3f725b39878ac6a2e1fa232ec509c36130927afc18a1719ebf4fbf4357"}, + {file = "opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl", hash = "sha256:d98edb20aa932fd8ebd276a72627dad9dc097695b3d435a4257557bbb49a79d2"}, +] + +[package.dependencies] +numpy = {version = ">=2,<2.3.0", markers = "python_version >= \"3.9\""} + +[[package]] +name = "openpyxl" +version = "3.1.5" +description = "A Python library to read/write Excel 2010 xlsx/xlsm files" +optional = false +python-versions = ">=3.8" +groups = ["main"] files = [ - {file = "numpy-2.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:852ae5bed3478b92f093e30f785c98e0cb62fa0a939ed057c31716e18a7a22b9"}, - {file = "numpy-2.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7a0e27186e781a69959d0230dd9909b5e26024f8da10683bd6344baea1885168"}, - {file = "numpy-2.3.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f0a1a8476ad77a228e41619af2fa9505cf69df928e9aaa165746584ea17fed2b"}, - {file = "numpy-2.3.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:cbc95b3813920145032412f7e33d12080f11dc776262df1712e1638207dde9e8"}, - {file = "numpy-2.3.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f75018be4980a7324edc5930fe39aa391d5734531b1926968605416ff58c332d"}, - {file = "numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20b8200721840f5621b7bd03f8dcd78de33ec522fc40dc2641aa09537df010c3"}, - {file = "numpy-2.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1f91e5c028504660d606340a084db4b216567ded1056ea2b4be4f9d10b67197f"}, - {file = "numpy-2.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fb1752a3bb9a3ad2d6b090b88a9a0ae1cd6f004ef95f75825e2f382c183b2097"}, - {file = "numpy-2.3.2-cp311-cp311-win32.whl", hash = "sha256:4ae6863868aaee2f57503c7a5052b3a2807cf7a3914475e637a0ecd366ced220"}, - {file = "numpy-2.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:240259d6564f1c65424bcd10f435145a7644a65a6811cfc3201c4a429ba79170"}, - {file = "numpy-2.3.2-cp311-cp311-win_arm64.whl", hash = "sha256:4209f874d45f921bde2cff1ffcd8a3695f545ad2ffbef6d3d3c6768162efab89"}, - {file = "numpy-2.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bc3186bea41fae9d8e90c2b4fb5f0a1f5a690682da79b92574d63f56b529080b"}, - {file = "numpy-2.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f4f0215edb189048a3c03bd5b19345bdfa7b45a7a6f72ae5945d2a28272727f"}, - {file = "numpy-2.3.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b1224a734cd509f70816455c3cffe13a4f599b1bf7130f913ba0e2c0b2006c0"}, - {file = "numpy-2.3.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:3dcf02866b977a38ba3ec10215220609ab9667378a9e2150615673f3ffd6c73b"}, - {file = "numpy-2.3.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:572d5512df5470f50ada8d1972c5f1082d9a0b7aa5944db8084077570cf98370"}, - {file = "numpy-2.3.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8145dd6d10df13c559d1e4314df29695613575183fa2e2d11fac4c208c8a1f73"}, - {file = "numpy-2.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:103ea7063fa624af04a791c39f97070bf93b96d7af7eb23530cd087dc8dbe9dc"}, - {file = "numpy-2.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc927d7f289d14f5e037be917539620603294454130b6de200091e23d27dc9be"}, - {file = "numpy-2.3.2-cp312-cp312-win32.whl", hash = "sha256:d95f59afe7f808c103be692175008bab926b59309ade3e6d25009e9a171f7036"}, - {file = "numpy-2.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:9e196ade2400c0c737d93465327d1ae7c06c7cb8a1756121ebf54b06ca183c7f"}, - {file = "numpy-2.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:ee807923782faaf60d0d7331f5e86da7d5e3079e28b291973c545476c2b00d07"}, - {file = "numpy-2.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c8d9727f5316a256425892b043736d63e89ed15bbfe6556c5ff4d9d4448ff3b3"}, - {file = "numpy-2.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:efc81393f25f14d11c9d161e46e6ee348637c0a1e8a54bf9dedc472a3fae993b"}, - {file = "numpy-2.3.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:dd937f088a2df683cbb79dda9a772b62a3e5a8a7e76690612c2737f38c6ef1b6"}, - {file = "numpy-2.3.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:11e58218c0c46c80509186e460d79fbdc9ca1eb8d8aee39d8f2dc768eb781089"}, - {file = "numpy-2.3.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5ad4ebcb683a1f99f4f392cc522ee20a18b2bb12a2c1c42c3d48d5a1adc9d3d2"}, - {file = "numpy-2.3.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:938065908d1d869c7d75d8ec45f735a034771c6ea07088867f713d1cd3bbbe4f"}, - {file = "numpy-2.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:66459dccc65d8ec98cc7df61307b64bf9e08101f9598755d42d8ae65d9a7a6ee"}, - {file = "numpy-2.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a7af9ed2aa9ec5950daf05bb11abc4076a108bd3c7db9aa7251d5f107079b6a6"}, - {file = "numpy-2.3.2-cp313-cp313-win32.whl", hash = "sha256:906a30249315f9c8e17b085cc5f87d3f369b35fedd0051d4a84686967bdbbd0b"}, - {file = "numpy-2.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:c63d95dc9d67b676e9108fe0d2182987ccb0f11933c1e8959f42fa0da8d4fa56"}, - {file = "numpy-2.3.2-cp313-cp313-win_arm64.whl", hash = "sha256:b05a89f2fb84d21235f93de47129dd4f11c16f64c87c33f5e284e6a3a54e43f2"}, - {file = "numpy-2.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4e6ecfeddfa83b02318f4d84acf15fbdbf9ded18e46989a15a8b6995dfbf85ab"}, - {file = "numpy-2.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:508b0eada3eded10a3b55725b40806a4b855961040180028f52580c4729916a2"}, - {file = "numpy-2.3.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:754d6755d9a7588bdc6ac47dc4ee97867271b17cee39cb87aef079574366db0a"}, - {file = "numpy-2.3.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:a9f66e7d2b2d7712410d3bc5684149040ef5f19856f20277cd17ea83e5006286"}, - {file = "numpy-2.3.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de6ea4e5a65d5a90c7d286ddff2b87f3f4ad61faa3db8dabe936b34c2275b6f8"}, - {file = "numpy-2.3.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3ef07ec8cbc8fc9e369c8dcd52019510c12da4de81367d8b20bc692aa07573a"}, - {file = "numpy-2.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:27c9f90e7481275c7800dc9c24b7cc40ace3fdb970ae4d21eaff983a32f70c91"}, - {file = "numpy-2.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:07b62978075b67eee4065b166d000d457c82a1efe726cce608b9db9dd66a73a5"}, - {file = "numpy-2.3.2-cp313-cp313t-win32.whl", hash = "sha256:c771cfac34a4f2c0de8e8c97312d07d64fd8f8ed45bc9f5726a7e947270152b5"}, - {file = "numpy-2.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:72dbebb2dcc8305c431b2836bcc66af967df91be793d63a24e3d9b741374c450"}, - {file = "numpy-2.3.2-cp313-cp313t-win_arm64.whl", hash = "sha256:72c6df2267e926a6d5286b0a6d556ebe49eae261062059317837fda12ddf0c1a"}, - {file = "numpy-2.3.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:448a66d052d0cf14ce9865d159bfc403282c9bc7bb2a31b03cc18b651eca8b1a"}, - {file = "numpy-2.3.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:546aaf78e81b4081b2eba1d105c3b34064783027a06b3ab20b6eba21fb64132b"}, - {file = "numpy-2.3.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:87c930d52f45df092f7578889711a0768094debf73cfcde105e2d66954358125"}, - {file = "numpy-2.3.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:8dc082ea901a62edb8f59713c6a7e28a85daddcb67454c839de57656478f5b19"}, - {file = "numpy-2.3.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af58de8745f7fa9ca1c0c7c943616c6fe28e75d0c81f5c295810e3c83b5be92f"}, - {file = "numpy-2.3.2-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed5527c4cf10f16c6d0b6bee1f89958bccb0ad2522c8cadc2efd318bcd545f5"}, - {file = "numpy-2.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:095737ed986e00393ec18ec0b21b47c22889ae4b0cd2d5e88342e08b01141f58"}, - {file = "numpy-2.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5e40e80299607f597e1a8a247ff8d71d79c5b52baa11cc1cce30aa92d2da6e0"}, - {file = "numpy-2.3.2-cp314-cp314-win32.whl", hash = "sha256:7d6e390423cc1f76e1b8108c9b6889d20a7a1f59d9a60cac4a050fa734d6c1e2"}, - {file = "numpy-2.3.2-cp314-cp314-win_amd64.whl", hash = "sha256:b9d0878b21e3918d76d2209c924ebb272340da1fb51abc00f986c258cd5e957b"}, - {file = "numpy-2.3.2-cp314-cp314-win_arm64.whl", hash = "sha256:2738534837c6a1d0c39340a190177d7d66fdf432894f469728da901f8f6dc910"}, - {file = "numpy-2.3.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:4d002ecf7c9b53240be3bb69d80f86ddbd34078bae04d87be81c1f58466f264e"}, - {file = "numpy-2.3.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:293b2192c6bcce487dbc6326de5853787f870aeb6c43f8f9c6496db5b1781e45"}, - {file = "numpy-2.3.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:0a4f2021a6da53a0d580d6ef5db29947025ae8b35b3250141805ea9a32bbe86b"}, - {file = "numpy-2.3.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9c144440db4bf3bb6372d2c3e49834cc0ff7bb4c24975ab33e01199e645416f2"}, - {file = "numpy-2.3.2-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f92d6c2a8535dc4fe4419562294ff957f83a16ebdec66df0805e473ffaad8bd0"}, - {file = "numpy-2.3.2-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cefc2219baa48e468e3db7e706305fcd0c095534a192a08f31e98d83a7d45fb0"}, - {file = "numpy-2.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:76c3e9501ceb50b2ff3824c3589d5d1ab4ac857b0ee3f8f49629d0de55ecf7c2"}, - {file = "numpy-2.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:122bf5ed9a0221b3419672493878ba4967121514b1d7d4656a7580cd11dddcbf"}, - {file = "numpy-2.3.2-cp314-cp314t-win32.whl", hash = "sha256:6f1ae3dcb840edccc45af496f312528c15b1f79ac318169d094e85e4bb35fdf1"}, - {file = "numpy-2.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:087ffc25890d89a43536f75c5fe8770922008758e8eeeef61733957041ed2f9b"}, - {file = "numpy-2.3.2-cp314-cp314t-win_arm64.whl", hash = "sha256:092aeb3449833ea9c0bf0089d70c29ae480685dd2377ec9cdbbb620257f84631"}, - {file = "numpy-2.3.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:14a91ebac98813a49bc6aa1a0dfc09513dcec1d97eaf31ca21a87221a1cdcb15"}, - {file = "numpy-2.3.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:71669b5daae692189540cffc4c439468d35a3f84f0c88b078ecd94337f6cb0ec"}, - {file = "numpy-2.3.2-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:69779198d9caee6e547adb933941ed7520f896fd9656834c300bdf4dd8642712"}, - {file = "numpy-2.3.2-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:2c3271cc4097beb5a60f010bcc1cc204b300bb3eafb4399376418a83a1c6373c"}, - {file = "numpy-2.3.2-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8446acd11fe3dc1830568c941d44449fd5cb83068e5c70bd5a470d323d448296"}, - {file = "numpy-2.3.2-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa098a5ab53fa407fded5870865c6275a5cd4101cfdef8d6fafc48286a96e981"}, - {file = "numpy-2.3.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:6936aff90dda378c09bea075af0d9c675fe3a977a9d2402f95a87f440f59f619"}, - {file = "numpy-2.3.2.tar.gz", hash = "sha256:e0486a11ec30cdecb53f184d496d1c6a20786c81e55e41640270130056f8ee48"}, + {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, + {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, ] +[package.dependencies] +et-xmlfile = "*" + [[package]] name = "packaging" version = "25.0" @@ -546,54 +507,67 @@ files = [ [[package]] name = "pandas" -version = "2.3.1" +version = "2.3.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ - {file = "pandas-2.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22c2e866f7209ebc3a8f08d75766566aae02bcc91d196935a1d9e59c7b990ac9"}, - {file = "pandas-2.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3583d348546201aff730c8c47e49bc159833f971c2899d6097bce68b9112a4f1"}, - {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f951fbb702dacd390561e0ea45cdd8ecfa7fb56935eb3dd78e306c19104b9b0"}, - {file = "pandas-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd05b72ec02ebfb993569b4931b2e16fbb4d6ad6ce80224a3ee838387d83a191"}, - {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1b916a627919a247d865aed068eb65eb91a344b13f5b57ab9f610b7716c92de1"}, - {file = "pandas-2.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fe67dc676818c186d5a3d5425250e40f179c2a89145df477dd82945eaea89e97"}, - {file = "pandas-2.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:2eb789ae0274672acbd3c575b0598d213345660120a257b47b5dafdc618aec83"}, - {file = "pandas-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2b0540963d83431f5ce8870ea02a7430adca100cec8a050f0811f8e31035541b"}, - {file = "pandas-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fe7317f578c6a153912bd2292f02e40c1d8f253e93c599e82620c7f69755c74f"}, - {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6723a27ad7b244c0c79d8e7007092d7c8f0f11305770e2f4cd778b3ad5f9f85"}, - {file = "pandas-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3462c3735fe19f2638f2c3a40bd94ec2dc5ba13abbb032dd2fa1f540a075509d"}, - {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:98bcc8b5bf7afed22cc753a28bc4d9e26e078e777066bc53fac7904ddef9a678"}, - {file = "pandas-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d544806b485ddf29e52d75b1f559142514e60ef58a832f74fb38e48d757b299"}, - {file = "pandas-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b3cd4273d3cb3707b6fffd217204c52ed92859533e31dc03b7c5008aa933aaab"}, - {file = "pandas-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:689968e841136f9e542020698ee1c4fbe9caa2ed2213ae2388dc7b81721510d3"}, - {file = "pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:025e92411c16cbe5bb2a4abc99732a6b132f439b8aab23a59fa593eb00704232"}, - {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b7ff55f31c4fcb3e316e8f7fa194566b286d6ac430afec0d461163312c5841e"}, - {file = "pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dcb79bf373a47d2a40cf7232928eb7540155abbc460925c2c96d2d30b006eb4"}, - {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:56a342b231e8862c96bdb6ab97170e203ce511f4d0429589c8ede1ee8ece48b8"}, - {file = "pandas-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ca7ed14832bce68baef331f4d7f294411bed8efd032f8109d690df45e00c4679"}, - {file = "pandas-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ac942bfd0aca577bef61f2bc8da8147c4ef6879965ef883d8e8d5d2dc3e744b8"}, - {file = "pandas-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9026bd4a80108fac2239294a15ef9003c4ee191a0f64b90f170b40cfb7cf2d22"}, - {file = "pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6de8547d4fdb12421e2d047a2c446c623ff4c11f47fddb6b9169eb98ffba485a"}, - {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:782647ddc63c83133b2506912cc6b108140a38a37292102aaa19c81c83db2928"}, - {file = "pandas-2.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba6aff74075311fc88504b1db890187a3cd0f887a5b10f5525f8e2ef55bfdb9"}, - {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e5635178b387bd2ba4ac040f82bc2ef6e6b500483975c4ebacd34bec945fda12"}, - {file = "pandas-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f3bf5ec947526106399a9e1d26d40ee2b259c66422efdf4de63c848492d91bb"}, - {file = "pandas-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:1c78cf43c8fde236342a1cb2c34bcff89564a7bfed7e474ed2fffa6aed03a956"}, - {file = "pandas-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8dfc17328e8da77be3cf9f47509e5637ba8f137148ed0e9b5241e1baf526e20a"}, - {file = "pandas-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ec6c851509364c59a5344458ab935e6451b31b818be467eb24b0fe89bd05b6b9"}, - {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:911580460fc4884d9b05254b38a6bfadddfcc6aaef856fb5859e7ca202e45275"}, - {file = "pandas-2.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4d6feeba91744872a600e6edbbd5b033005b431d5ae8379abee5bcfa479fab"}, - {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fe37e757f462d31a9cd7580236a82f353f5713a80e059a29753cf938c6775d96"}, - {file = "pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444"}, - {file = "pandas-2.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4645f770f98d656f11c69e81aeb21c6fca076a44bed3dcbb9396a4311bc7f6d8"}, - {file = "pandas-2.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:342e59589cc454aaff7484d75b816a433350b3d7964d7847327edda4d532a2e3"}, - {file = "pandas-2.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d12f618d80379fde6af007f65f0c25bd3e40251dbd1636480dfffce2cf1e6da"}, - {file = "pandas-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd71c47a911da120d72ef173aeac0bf5241423f9bfea57320110a978457e069e"}, - {file = "pandas-2.3.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:09e3b1587f0f3b0913e21e8b32c3119174551deb4a4eba4a89bc7377947977e7"}, - {file = "pandas-2.3.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2323294c73ed50f612f67e2bf3ae45aea04dce5690778e08a09391897f35ff88"}, - {file = "pandas-2.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:b4b0de34dc8499c2db34000ef8baad684cfa4cbd836ecee05f323ebfba348c7d"}, - {file = "pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2"}, + {file = "pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c"}, + {file = "pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a"}, + {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1"}, + {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838"}, + {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250"}, + {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4"}, + {file = "pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826"}, + {file = "pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523"}, + {file = "pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45"}, + {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66"}, + {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b"}, + {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791"}, + {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151"}, + {file = "pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c"}, + {file = "pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53"}, + {file = "pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35"}, + {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908"}, + {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89"}, + {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98"}, + {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084"}, + {file = "pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b"}, + {file = "pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713"}, + {file = "pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8"}, + {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d"}, + {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac"}, + {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c"}, + {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493"}, + {file = "pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee"}, + {file = "pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5"}, + {file = "pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21"}, + {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78"}, + {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110"}, + {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86"}, + {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc"}, + {file = "pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0"}, + {file = "pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593"}, + {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c"}, + {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b"}, + {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6"}, + {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3"}, + {file = "pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5"}, + {file = "pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec"}, + {file = "pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7"}, + {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450"}, + {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5"}, + {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788"}, + {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87"}, + {file = "pandas-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2"}, + {file = "pandas-2.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8"}, + {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff"}, + {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29"}, + {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73"}, + {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9"}, + {file = "pandas-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa"}, + {file = "pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b"}, ] [package.dependencies] @@ -823,7 +797,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -838,7 +812,7 @@ version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -941,7 +915,7 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -1004,14 +978,14 @@ files = [ [[package]] name = "types-pyyaml" -version = "6.0.12.20250516" +version = "6.0.12.20250915" description = "Typing stubs for PyYAML" optional = false python-versions = ">=3.9" -groups = ["dev"] +groups = ["main", "dev"] files = [ - {file = "types_pyyaml-6.0.12.20250516-py3-none-any.whl", hash = "sha256:8478208feaeb53a34cb5d970c56a7cd76b72659442e733e268a94dc72b2d0530"}, - {file = "types_pyyaml-6.0.12.20250516.tar.gz", hash = "sha256:9f21a70216fc0fa1b216a8176db5f9e0af6eb35d2f2932acb87689d03a5bf6ba"}, + {file = "types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6"}, + {file = "types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3"}, ] [[package]] @@ -1032,7 +1006,7 @@ version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -1062,4 +1036,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "127b43f8085cdb7fd1849b6c9e630249a7871628645f91f61e1e98c5f5392191" +content-hash = "f7624bd40c213cd8c0b13fd2043b69ede77f586bfedba2c33ee97beb23fba5fa" diff --git a/pyproject.toml b/pyproject.toml index 7d4292b..ae3a591 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,10 @@ version = "0.0.0" [tool.poetry.dependencies] click = "~=8.2" +pandas = "^2.3.3" +opencv-python = "^4.12.0.88" +openpyxl = "^3.1.5" +types-pyyaml = "^6.0.12.20250915" [tool.poetry.group.dev] optional = true From 47227b131e15bbb0d067e03b5434cf50ea066ae0 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Thu, 30 Oct 2025 17:12:32 -0400 Subject: [PATCH 12/36] Modified test for new BIDS convertor script --- src/tests/test_BIDS_convertor.py | 323 ++++++++++++++----------------- 1 file changed, 146 insertions(+), 177 deletions(-) diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index e291e63..26ec34c 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -3,7 +3,6 @@ import json import os import sys -from datetime import datetime from types import ModuleType from typing import Generator from unittest.mock import MagicMock, mock_open, patch @@ -18,56 +17,59 @@ def setup_mock_config() -> Generator[None, None, None]: """Create a temporary config.yaml file for testing.""" mock_config = { - 'video_root': '/mock/videos', - 'asd_csv': 'mock_asd.csv', - 'nonasd_csv': 'mock_nonasd.csv', - 'output_dir': '/mock/output', - 'target_resolution': '1280x720', - 'target_fps': 30 + "video_root": "/mock/videos", + "asd_csv": "mock_asd.csv", + "nonasd_csv": "mock_nonasd.csv", + "output_dir": "/mock/output", + "target_resolution": "1280x720", + "target_fps": 30, } # Create temporary config file - with open('config.yaml', 'w') as f: + with open("config.yaml", "w") as f: yaml.dump(mock_config, f) yield # Cleanup - if os.path.exists('config.yaml'): - os.remove('config.yaml') + if os.path.exists("config.yaml"): + os.remove("config.yaml") + # Import the module after config is created @pytest.fixture(scope="session") def bvp_module(setup_mock_config: Generator[None, None, None]) -> ModuleType: """Import the BIDS converter module.""" - sys.path.insert(0, 'src') + sys.path.insert(0, "src") import BIDS_convertor as bvp + return bvp + class TestConfiguration: """Test configuration loading and validation.""" def test_load_configuration_success(self, bvp_module: ModuleType) -> None: """Test successful configuration loading.""" mock_config = { - 'video_root': '/path/to/videos', - 'asd_csv': 'asd.csv', - 'nonasd_csv': 'nonasd.csv', - 'output_dir': '/output', - 'target_resolution': '1280x720', - 'target_fps': 30 + "video_root": "/path/to/videos", + "asd_csv": "asd.csv", + "nonasd_csv": "nonasd.csv", + "output_dir": "/output", + "target_resolution": "1280x720", + "target_fps": 30, } - with patch('builtins.open', mock_open(read_data=yaml.dump(mock_config))): - with patch('yaml.safe_load', return_value=mock_config): - config = bvp_module.load_configuration('config.yaml') + with patch("builtins.open", mock_open(read_data=yaml.dump(mock_config))): + with patch("yaml.safe_load", return_value=mock_config): + config = bvp_module.load_configuration("config.yaml") assert config == mock_config def test_load_configuration_file_not_found(self, bvp_module: ModuleType) -> None: """Test configuration loading with missing file.""" - with patch('builtins.open', side_effect=FileNotFoundError()): + with patch("builtins.open", side_effect=FileNotFoundError()): with pytest.raises(FileNotFoundError): - bvp_module.load_configuration('nonexistent.yaml') + bvp_module.load_configuration("nonexistent.yaml") class TestBIDSStructure: @@ -75,7 +77,7 @@ class TestBIDSStructure: def test_create_bids_structure(self, bvp_module: ModuleType) -> None: """Test BIDS directory structure creation.""" - with patch('os.makedirs') as mock_makedirs: + with patch("os.makedirs") as mock_makedirs: bvp_module.create_bids_structure() # Check that directories are created with exist_ok=True assert mock_makedirs.call_count == 2 @@ -83,22 +85,22 @@ def test_create_bids_structure(self, bvp_module: ModuleType) -> None: def test_create_dataset_description(self, bvp_module: ModuleType) -> None: """Test dataset description file creation.""" mock_file = mock_open() - with patch('builtins.open', mock_file): - with patch('json.dump') as mock_json_dump: + with patch("builtins.open", mock_file): + with patch("json.dump") as mock_json_dump: bvp_module.create_dataset_description() mock_file.assert_called_once() mock_json_dump.assert_called_once() # Check that the dataset description contains required fields args, kwargs = mock_json_dump.call_args dataset_desc = args[0] - assert 'Name' in dataset_desc - assert 'BIDSVersion' in dataset_desc - assert 'DatasetType' in dataset_desc + assert "Name" in dataset_desc + assert "BIDSVersion" in dataset_desc + assert "DatasetType" in dataset_desc def test_create_readme(self, bvp_module: ModuleType) -> None: """Test README file creation.""" mock_file = mock_open() - with patch('builtins.open', mock_file): + with patch("builtins.open", mock_file): bvp_module.create_readme() mock_file.assert_called_once() # Check that content was written @@ -111,46 +113,21 @@ class TestBIDSNaming: def test_create_bids_filename(self, bvp_module: ModuleType) -> None: """Test BIDS filename creation.""" - filename = bvp_module.create_bids_filename(123, '01', 'beh', 'mp4') - expected = 'sub-123_ses-01_task-play_beh.mp4' + filename = bvp_module.create_bids_filename(123, "01", "mealtime", "beh", "mp4") + expected = "sub-123_ses-01_task-mealtime_run-01_beh.mp4" assert filename == expected def test_get_session_from_path_12_16_months(self, bvp_module: ModuleType) -> None: """Test session determination for 12-16 month videos.""" - path = '/data/videos/12-16 month/participant_video.mp4' - session = bvp_module.get_session_from_path(path) - assert session == '01' + path = "/data/videos/12-16 month/participant_video.mp4" + session = bvp_module.determine_session_from_folder(path) + assert session == "01" def test_get_session_from_path_34_38_months(self, bvp_module: ModuleType) -> None: """Test session determination for 34-38 month videos.""" - path = '/data/videos/34-38 month/participant_video.mp4' - session = bvp_module.get_session_from_path(path) - assert session == '02' - - -class TestDemographicsHandling: - """Test demographics data processing.""" - - def test_read_demographics(self, bvp_module: ModuleType) -> None: - """Test demographics CSV reading and combining.""" - asd_data = pd.DataFrame({ - 'dependent_temporary_id': ['A001', 'A002'], - 'dependent_dob': ['2022-01-01', '2022-02-01'], - 'sex': ['M', 'F'], - 'diagnosis': ['ASD', 'ASD'] - }) - - nonasd_data = pd.DataFrame({ - 'dependent_temporary_id': ['N001', 'N002'], - 'dependent_dob': ['2022-03-01', '2022-04-01'], - 'sex': ['F', 'M'], - 'diagnosis': ['TD', 'TD'] - }) - - with patch('pandas.read_csv', side_effect=[asd_data, nonasd_data]): - df = bvp_module.read_demographics('asd.csv', 'nonasd.csv') - assert len(df) == 4 - assert 'dependent_temporary_id' in df.columns + path = "/data/videos/34-38 month/participant_video.mp4" + session = bvp_module.determine_session_from_folder(path) + assert session == "02" class TestVideoMetadataExtraction: @@ -165,112 +142,79 @@ def test_extract_exif_success(self, bvp_module: ModuleType) -> None: "duration": "120.5", "bit_rate": "1000000", "size": "15000000", - "tags": {"creation_time": "2023-01-01T12:00:00.000000Z"} + "tags": {"creation_time": "2023-01-01T12:00:00.000000Z"}, }, - "streams": [ - { - "tags": {"creation_time": "2023-01-01T12:00:00.000000Z"} - } - ] + "streams": [{"tags": {"creation_time": "2023-01-01T12:00:00.000000Z"}}], } - with patch('subprocess.run') as mock_run: + with patch("subprocess.run") as mock_run: mock_run.return_value.returncode = 0 mock_run.return_value.stdout = json.dumps(mock_metadata) - result = bvp_module.extract_exif('test.mp4') - assert 'duration_sec' in result - assert result['duration_sec'] == 120.5 - assert result['format'] == "QuickTime / MOV" + result = bvp_module.extract_exif("test.mp4") + assert "duration_sec" in result + assert result["duration_sec"] == 120.5 + assert result["format"] == "QuickTime / MOV" def test_extract_exif_ffprobe_error(self, bvp_module: ModuleType) -> None: """Test video metadata extraction with ffprobe error.""" - with patch('subprocess.run') as mock_run: + with patch("subprocess.run") as mock_run: mock_run.return_value.returncode = 1 mock_run.return_value.stderr = "Error message" - result = bvp_module.extract_exif('test.mp4') - assert 'ffprobe_error' in result - assert result['ffprobe_error'] == "Error message" - - -class TestDateExtraction: - """Test date extraction from filenames.""" - - def test_extract_date_from_filename_standard_format( - self, bvp_module: ModuleType - ) -> None: - """Test date extraction from standard format.""" - # Test a format that should work based on the actual implementation - filename = "2023-12-25.mp4" # Remove 'video_' prefix - result = bvp_module.extract_date_from_filename(filename) - assert result == "2023:12:25 00:00:00" - - def test_extract_date_from_filename_mmddyyyy_format( - self, bvp_module: ModuleType - ) -> None: - """Test date extraction from MM-DD-YYYY format.""" - filename = "12-25-2023.mp4" - result = bvp_module.extract_date_from_filename(filename) - assert result == "2023:12:25 00:00:00" - - def test_extract_date_from_filename_yyyymmdd_format( - self, bvp_module: ModuleType - ) -> None: - """Test date extraction from YYYYMMDD format.""" - filename = "20231225.mp4" - result = bvp_module.extract_date_from_filename(filename) - assert result == "2023:12:25 00:00:00" - - def test_extract_date_from_filename_invalid(self, bvp_module: ModuleType) -> None: - """Test date extraction from invalid filename.""" - filename = "invalid_filename.mp4" - result = bvp_module.extract_date_from_filename(filename) - assert result is None - - def test_calculate_age(self, bvp_module: ModuleType) -> None: - """Test age calculation in months.""" - dob_str = "2022-01-15" - video_date = datetime(2023, 1, 15) - age = bvp_module.calculate_age(dob_str, video_date) - assert age == 12.0 + result = bvp_module.extract_exif("test.mp4") + assert "ffprobe_error" in result + assert result["ffprobe_error"] == "Error message" class TestVideoProcessing: """Test video processing functions.""" - @patch('subprocess.run') - @patch('os.remove') - @patch('os.path.exists') + @patch("subprocess.run") + @patch("os.remove") + @patch("os.path.exists") + @patch("os.makedirs") def test_stabilize_video( self, + mock_makedirs: MagicMock, mock_exists: MagicMock, mock_remove: MagicMock, mock_run: MagicMock, - bvp_module: ModuleType + bvp_module: ModuleType, ) -> None: """Test video stabilization.""" mock_exists.return_value = True - bvp_module.stabilize_video('input.mp4', 'output.mp4') + mock_run.return_value.returncode = 0 # success + mock_run.return_value.stderr = "" + bvp_module.stabilize_video("input.mp4", "output.mp4", "output/TEMP/task-01") # Should call subprocess.run twice (detect and transform) assert mock_run.call_count == 2 - mock_remove.assert_called_once_with("transforms.trf") + mock_remove.assert_called_once_with( + os.path.join("output/TEMP/task-01", "transforms.trf") + ) - @patch('subprocess.run') + @patch("subprocess.run") + @patch("os.path.exists") def test_extract_audio( - self, mock_run: MagicMock, bvp_module: ModuleType + self, mock_exists: MagicMock, mock_run: MagicMock, bvp_module: ModuleType ) -> None: """Test audio extraction from video.""" - bvp_module.extract_audio('input.mp4', 'output.wav') + # Pretend both input and output exist + mock_exists.return_value = True + mock_run.return_value.returncode = 0 # Simulate success + mock_run.return_value.stderr = "" + + bvp_module.extract_audio("input.mp4", "output.wav") + mock_run.assert_called_once() # Check that the command includes correct audio parameters args = mock_run.call_args[0][0] - assert '-ar' in args - assert '16000' in args - assert '-ac' in args - assert '1' in args + assert "-ar" in args + assert "16000" in args + assert "-ac" in args + assert "1" in args class TestMetadataFileCreation: @@ -278,30 +222,46 @@ class TestMetadataFileCreation: def test_create_events_tsv(self, bvp_module: ModuleType) -> None: """Test events TSV file creation.""" - video_metadata = {'duration_sec': 120.5} + video_metadata = pd.DataFrame( + [ + {"duration": 120.5, "filename": "video1.mp4"}, + {"duration": 43.5, "filename": "video2.mp4"}, + ] + ) - with patch('pandas.DataFrame.to_csv') as mock_to_csv: - bvp_module.create_events_tsv(video_metadata, 'output.tsv') + with patch("pandas.DataFrame.to_csv") as mock_to_csv: + bvp_module.create_events_file( + video_metadata, "output.tsv", "filepath/on/Engaging.mp4" + ) mock_to_csv.assert_called_once() def test_create_video_metadata_json(self, bvp_module: ModuleType) -> None: """Test video metadata JSON creation.""" - metadata = {'duration_sec': 120.5, 'format': 'MP4'} - processing_info = {'has_stabilization': True} - - with patch('builtins.open', mock_open()): - with patch('json.dump') as mock_json_dump: + metadata = {"duration_sec": 120.5, "format": "MP4"} + processing_info = {"has_stabilization": True} + task_info = { + "task_name": "unknown", + "task_description": "Behavioral session:", + "instructions": "Natural behavior observation", + "context": "mealtime", + "activity": "eating", + } + with patch("builtins.open", mock_open()): + with patch("json.dump") as mock_json_dump: bvp_module.create_video_metadata_json( - metadata, processing_info, 'output.json' + metadata, + processing_info, + task_info, + "output.json", ) mock_json_dump.assert_called_once() # Check JSON content structure args = mock_json_dump.call_args[0] json_content = args[0] - assert 'TaskName' in json_content - assert 'ProcessingPipeline' in json_content - assert 'OriginalMetadata' in json_content + assert "TaskName" in json_content + assert "ProcessingPipeline" in json_content + assert "OriginalMetadata" in json_content class TestUtilityFunctions: @@ -309,30 +269,29 @@ class TestUtilityFunctions: def test_save_json(self, bvp_module: ModuleType) -> None: """Test JSON file saving utility.""" - test_data = {'test': 'data', 'number': 123} + test_data = {"test": "data", "number": 123} mock_file = mock_open() - with patch('builtins.open', mock_file): - with patch('json.dump') as mock_json_dump: - bvp_module.save_json(test_data, 'output.json') + with patch("builtins.open", mock_file): + with patch("json.dump") as mock_json_dump: + bvp_module.save_json(test_data, "output.json") # Check that json.dump was called with the test data and the file handle mock_json_dump.assert_called_once() args, kwargs = mock_json_dump.call_args assert args[0] == test_data - assert kwargs.get('indent') == 4 + assert kwargs.get("indent") == 4 class TestMainWorkflow: """Test the main processing workflow.""" - @patch('BIDS_convertor.create_participants_files') - @patch('BIDS_convertor.process_videos') - @patch('BIDS_convertor.read_demographics') - @patch('BIDS_convertor.create_readme') - @patch('BIDS_convertor.create_derivatives_dataset_description') - @patch('BIDS_convertor.create_dataset_description') - @patch('BIDS_convertor.create_bids_structure') - @patch('BIDS_convertor.save_json') + @patch("BIDS_convertor.create_participants_file") + @patch("BIDS_convertor.process_videos") + @patch("BIDS_convertor.create_readme") + @patch("BIDS_convertor.create_derivatives_dataset_description") + @patch("BIDS_convertor.create_dataset_description") + @patch("BIDS_convertor.create_bids_structure") + @patch("BIDS_convertor.save_json") def test_main_workflow( self, mock_save_json: MagicMock, @@ -340,26 +299,34 @@ def test_main_workflow( mock_create_dataset: MagicMock, mock_create_derivatives: MagicMock, mock_create_readme: MagicMock, - mock_read_demographics: MagicMock, mock_process_videos: MagicMock, mock_create_participants: MagicMock, - bvp_module: ModuleType + bvp_module: ModuleType, ) -> None: """Test the main processing workflow.""" # Setup mocks - mock_demographics = pd.DataFrame({'id': [1, 2]}) - mock_read_demographics.return_value = mock_demographics - mock_process_videos.return_value = ([{'test': 'data'}], ['error1']) - # Run main function - bvp_module.main() + mock_process_videos.return_value = ( + [ + { + "task_label": "task-rest", + "participant_id": "sub-001", + "session_id": "ses-01", + } + ], + [{"error": None}], + ) + # Mock sys.argv to simulate CLI arguments + with patch.object(sys, "argv", ["BIDS_convertor.py", "0", "4"]): + with patch("sys.exit") as mock_exit: + bvp_module.main() + mock_exit.assert_not_called() # Verify all steps were called mock_create_structure.assert_called_once() mock_create_dataset.assert_called_once() mock_create_derivatives.assert_called_once() mock_create_readme.assert_called_once() - mock_read_demographics.assert_called_once() mock_process_videos.assert_called_once() mock_create_participants.assert_called_once() assert mock_save_json.call_count == 2 @@ -369,24 +336,26 @@ def test_main_workflow( @pytest.fixture def sample_demographics() -> pd.DataFrame: """Sample demographics DataFrame for testing.""" - return pd.DataFrame({ - 'dependent_temporary_id': ['A001', 'A002', 'N001'], - 'dependent_dob': ['2022-01-01', '2022-02-01', '2022-03-01'], - 'sex': ['M', 'F', 'M'], - 'diagnosis': ['ASD', 'ASD', 'TD'] - }) + return pd.DataFrame( + { + "dependent_temporary_id": ["A001", "A002", "N001"], + "dependent_dob": ["2022-01-01", "2022-02-01", "2022-03-01"], + "sex": ["M", "F", "M"], + "diagnosis": ["ASD", "ASD", "TD"], + } + ) @pytest.fixture def sample_video_metadata() -> dict[str, float | str | int]: """Sample video metadata for testing.""" return { - 'duration_sec': 120.5, - 'format': 'QuickTime / MOV', - 'bit_rate': 1000000, - 'size_bytes': 15000000 + "duration_sec": 120.5, + "format": "QuickTime / MOV", + "bit_rate": 1000000, + "size_bytes": 15000000, } -if __name__ == '__main__': - pytest.main([__file__]) \ No newline at end of file +if __name__ == "__main__": + pytest.main([__file__]) From acf2e66a07b6d672cbb90422da15cefab64a68cf Mon Sep 17 00:00:00 2001 From: lucie271 Date: Fri, 31 Oct 2025 14:51:35 -0400 Subject: [PATCH 13/36] updated README with BIDS-conversion pipeline --- README.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/README.md b/README.md index 8c8affa..335aa1a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,49 @@ Welcome to the ```sailsprep``` repo! This is a Python repo for doing incredible video-based human pose estimation analyses. **STAY TUNED!** **Caution:**: this package is still under development and may change rapidly over the next few weeks. +## General information +To manage dependencies, this project uses Poetry. Make sure you've got poetry installed. +On Engaging, you need to first run +``` +module load miniforge +``` +Then run +``` +pip install poetry +``` +Then go to the root of this repo and run +``` +poetry install +``` + +## Preprocessing +### BIDS-conversion +The conversion pipeline requires FFmpeg ≥ 6.0 compiled with the vidstab library. +Because FFmpeg compiled with vidstab is not a Python package, it must be installed separately. +You'll need to run (outside any environment): + +``` +cd ~ +wget https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz +tar -xJf ffmpeg-release-amd64-static.tar.xz +mv ffmpeg-*-static ffmpeg_static +export PATH="$HOME/ffmpeg_static:$PATH" + +``` + +To make this permanent, add the last line to your ~/.bashrc or ~/.bash_profile. +You can verify that FFmpeg has the right version (≥ 6.0): +``` +ffmpeg -version +``` +You'll need to submit the script on Engaging using sbatch. We've +provided the sumbission files so you'll simply need to run (with module miniforge deactivated) : +``` +jid=$(sbatch --parsable jobs/submit_bids_updated.sh) +sbatch --dependency=afterok:$jid jobs/merge_cleanup.sh +``` +This will convert the raw video into BIDS format in a clean fashion. ## Features - A few - Cool From b1d262c40dca9cdfb33b94486ee472918551cd07 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Fri, 31 Oct 2025 15:12:04 -0400 Subject: [PATCH 14/36] final cleaning and merge after script execution --- jobs/merge_cleanup.sh | 59 +++++++++++++++++ jobs/run_bids_convertor.sh | 33 ++++++++++ src/BIDS_convertor.py | 109 +++++++------------------------ src/tests/test_BIDS_convertor.py | 4 -- 4 files changed, 116 insertions(+), 89 deletions(-) create mode 100644 jobs/merge_cleanup.sh create mode 100644 jobs/run_bids_convertor.sh diff --git a/jobs/merge_cleanup.sh b/jobs/merge_cleanup.sh new file mode 100644 index 0000000..2b367ef --- /dev/null +++ b/jobs/merge_cleanup.sh @@ -0,0 +1,59 @@ +#!/bin/bash +#SBATCH --job-name=merge_cleanup +#SBATCH --output=logs/merge_cleanup_%j.out +#SBATCH --error=logs/merge_cleanup_%j.err +#SBATCH --time=01:00:00 +#SBATCH --mem=2G + +# Clean up old logs before running +echo "Cleaning up old logs..." +rm -rf logs +mkdir -p logs + +OUTPUT_DIR=$(poetry run python -c "import yaml; f=open('configs/config_bids_convertor.yaml'); print(yaml.safe_load(f)['output_dir'])") +MERGED_DIR="$OUTPUT_DIR" + +mkdir -p "$MERGED_DIR" + +echo "Merging logs from numbered folders under $OUTPUT_DIR" +echo "Started at $(date)" + +merged_processed="$MERGED_DIR/all_processed.json" +merged_failed="$MERGED_DIR/all_failed.json" + +# Create empty lists if not exist +echo "[]" > "$merged_processed" +echo "[]" > "$merged_failed" + +# Load jq (if not already available) +module load jq 2>/dev/null || true + +for folder in "$OUTPUT_DIR"/*/; do + foldername=$(basename "$folder") + + if [[ "$foldername" =~ ^[0-9]+$ ]]; then + echo "Merging from folder: $foldername" + if [[ -f "$folder/processing_log.json" ]]; then + jq -s 'add' "$merged_processed" "$folder/processing_log.json" > tmp.json && mv tmp.json "$merged_processed" + fi + if [[ -f "$folder/not_processed.json" ]]; then + jq -s 'add' "$merged_failed" "$folder/not_processed.json" > tmp.json && mv tmp.json "$merged_failed" + fi + fi +done + +echo "Merged logs saved in: $MERGED_DIR" +echo "Now cleaning up numbered folders..." + +# Delete only folders with numeric names (avoid final_bids-dataset) +for folder in "$OUTPUT_DIR"/*/; do + foldername=$(basename "$folder") + if [[ "$foldername" =~ ^[0-9]+$ ]]; then + echo "Deleting temporary folder: $foldername" + rm -rf "$folder" + else + echo "Skipping non-numeric folder: $foldername" + fi +done + +echo "Cleanup complete at $(date)" diff --git a/jobs/run_bids_convertor.sh b/jobs/run_bids_convertor.sh new file mode 100644 index 0000000..08f7227 --- /dev/null +++ b/jobs/run_bids_convertor.sh @@ -0,0 +1,33 @@ +#!/bin/bash +#SBATCH --job-name=bids_processing +#SBATCH --partition=mit_normal +#SBATCH --array=0-90 +#SBATCH --output=logs/bids_%A_%a.out +#SBATCH --error=logs/bids_%A_%a.err +#SBATCH --mem=5G +#SBATCH --time=10:00:00 +#SBATCH --cpus-per-task=5 + +# --- Environment setup --- +cd .. +mkdir -p logs +export PYTHONUNBUFFERED=1 + +echo "Job started at $(date) on node $(hostname)" +echo "Task ID: $SLURM_ARRAY_TASK_ID of $SLURM_ARRAY_TASK_COUNT" + +echo "FFmpeg version:" +ffmpeg -version + +# Move to project and activate poetry env +cd /orcd/data/satra/001/users/lucie271/sailsprep +source $(poetry env info --path)/bin/activate + +cd src +echo "Using Python from: $(which python)" +echo "Starting BIDS conversion at $(date)" + +# Run your script +python BIDS_convertor.py $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_TASK_COUNT + +echo "Finished at $(date)" diff --git a/src/BIDS_convertor.py b/src/BIDS_convertor.py index 957ab32..8f57da7 100644 --- a/src/BIDS_convertor.py +++ b/src/BIDS_convertor.py @@ -270,7 +270,8 @@ def determine_session_from_excel( Optional[str]: Session ID ("01" or "02"), or None if not found. """ filename = os.path.splitext(os.path.basename(current_path))[0] - + if participant_id.endswith(" 2"): + participant_id = participant_id[:-2].strip() # Filter for the participant participant_excel = annotation_df[ annotation_df["ID"].astype(str) == str(participant_id) @@ -802,10 +803,10 @@ def create_events_file( for idx, row in group_df.iterrows(): event = { - "filepath_engaging": str(full_filepath), "onset": 0.0, "duration": parse_duration(row.get("Vid_duration", "00:00:00")), "coder": str(row.get("Coder", "n/a")), + "filepath_engaging": str(full_filepath), "source_file": str(row.get("SourceFile", "n/a")), "context": str(row.get("Context", "n/a")), "location": str(row.get("Location", "n/a")), @@ -1168,15 +1169,7 @@ def create_dataset_description() -> None: dataset_desc = { "Name": "SAILS Phase III Home Videos", "BIDSVersion": "1.9.0", - "DatasetType": "raw", - "License": "na", - "Authors": ["Research Team"], - "Acknowledgements": "participants and families", - "HowToAcknowledge": "na", - "Funding": ["na"], - "EthicsApprovals": ["na"], - "ReferencesAndLinks": ["na"], - "DatasetDOI": "doi:", + "DatasetType": "domestic videos with audio", } try: filepath = os.path.join(FINAL_BIDS_ROOT, "dataset_description.json") @@ -1227,60 +1220,6 @@ def create_readme() -> None: This dataset contains home videos from the SAILS Phase III study, organized according to the Brain Imaging Data Structure (BIDS) specification. -## Requirements -The BIDS conversion and preprocessing pipeline can be run using Poetry -for dependency management. - -However, note that the pipeline requires FFmpeg ≥ 6.0 compiled with the -vidstab library. - -Because FFmpeg is not a Python package, it must be installed separately. -If you don’t have administrator privileges (e.g., on a cluster), you -can install the static binary locally as follows: - -``` -cd ~ -wget https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz -tar -xJf ffmpeg-release-amd64-static.tar.xz -mv ffmpeg-*-static ffmpeg_static -export PATH="$HOME/ffmpeg_static:$PATH" -``` - -To make this permanent, add the last line to your ~/.bashrc or ~/.bash_profile. - -You can verify that FFmpeg is correctly installed and supports video stabilization: - -ffmpeg -version -ffmpeg -filters | grep vidstab - - -✅ Expected output: - -T.. vidstabdetect V->V Video stabilization analysis -T.. vidstabtransform V->V Video stabilization transform filter - -📦 Poetry Environment - -Once FFmpeg is installed and available in your PATH, install the Python -dependencies using Poetry (at the location of the root of the project): - -poetry install - -Verify that Poetry can access FFmpeg: - -which ffmpeg - -It should point to your local binary (e.g. $HOME/ffmpeg_static/ffmpeg). - -You might want to submit the script on Engaging using sbatch. We've -provided the sumbission files so you'll simply need to cd to the folder where -you can find this README and run : - -jid=$(sbatch --parsable submit_bids_updated.sh) -sbatch --dependency=afterok:$jid merge_cleanup.sh - -This will convert the raw video into BIDS format in a clean fashion. - ## Data Collection Videos were collected from home environments during various activities. Two main age groups were included: @@ -1303,16 +1242,20 @@ def create_readme() -> None: - Denoising and quality enhancement - Standardization to 720p resolution and 30fps - Audio extraction for speech analysis +- Filename modication according to subject ID and task label +- Extraction of ASD status for every subject stored in the participants.tsv file. ## Behavioral Coding -Events files include annotations from csv file. +Events files include manual annotations from csv file and Engaging +location of the raw video. ## Task Labels Task labels are derived from the Context column in the csv. +It allows to capture what kind of interaction was happening in the video. Videos without behavioral coding data use "unknown" task label. """ - filepath = os.path.join(FINAL_BIDS_ROOT, "README") + filepath = os.path.join(OUTPUT_DIR, "README") try: with open(filepath, "w") as f: f.write(readme_content) @@ -1320,14 +1263,16 @@ def create_readme() -> None: raise ValueError(f"Failed to create README at {filepath}: {e}") -def create_participants_file( - processed_data: List[Dict[str, Any]], asd_status: pd.DataFrame, final_bids_root: str -) -> None: +def create_participants_file() -> None: """Create participants.tsv and participants.json files.""" - processed_participants = set(entry["participant_id"] for entry in processed_data) - + asd_status = pd.read_excel(ASD_STATUS_FILE) + ids_processed_participants = [] + for name in os.listdir(FINAL_BIDS_ROOT): + full_path = os.path.join(FINAL_BIDS_ROOT, name) + if os.path.isdir(full_path) and name.startswith("sub-"): + ids_processed_participants.append(name.split("sub-")[1]) participants_data = [] - for participant_id in sorted(processed_participants): + for participant_id in sorted(ids_processed_participants): asd_info = asd_status[asd_status["ID"].astype(str) == str(participant_id)] participants_data.append( { @@ -1338,7 +1283,7 @@ def create_participants_file( participants_df = pd.DataFrame(participants_data) participants_df.to_csv( - os.path.join(final_bids_root, "participants.tsv"), + os.path.join(FINAL_BIDS_ROOT, "participants.tsv"), sep="\t", index=False, na_rep="n/a", @@ -1349,7 +1294,7 @@ def create_participants_file( "Group": {"Description": "ASD status"}, } - save_json(participants_json, os.path.join(final_bids_root, "participants.json")) + save_json(participants_json, os.path.join(FINAL_BIDS_ROOT, "participants.json")) def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: @@ -1413,11 +1358,11 @@ def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: print(f" {error}: {count} videos") -def merge_subjects(base_dir: str) -> None: +def merge_subjects() -> None: """Merge duplicated subjects folders.""" paths_to_check = [ - Path(base_dir), - Path(base_dir) / "derivatives" / "preprocessed", + Path(FINAL_BIDS_ROOT), + Path(FINAL_BIDS_ROOT) / "derivatives" / "preprocessed", ] for folder in paths_to_check: @@ -1429,9 +1374,8 @@ def merge_subjects(base_dir: str) -> None: for sub in subs: if sub.name.endswith(" 2"): - original_name = sub.name[:-1] # remove the '2' + original_name = sub.name[:-2] original_path = folder / original_name - if original_name in sub_names and original_path.exists(): print(f"Merging {sub} → {original_path}") @@ -1579,11 +1523,6 @@ def main() -> None: avg_time = total_time / len(all_processed) safe_print(f"Average time per video: {avg_time:.1f} seconds") - merge_subjects(FINAL_BIDS_ROOT) - - # -- Load ASD status file - asd_status = pd.read_excel(ASD_STATUS_FILE) - create_participants_file(all_processed, asd_status, FINAL_BIDS_ROOT) safe_print("Processing complete ✅") diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index 26ec34c..60d0567 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -285,7 +285,6 @@ def test_save_json(self, bvp_module: ModuleType) -> None: class TestMainWorkflow: """Test the main processing workflow.""" - @patch("BIDS_convertor.create_participants_file") @patch("BIDS_convertor.process_videos") @patch("BIDS_convertor.create_readme") @patch("BIDS_convertor.create_derivatives_dataset_description") @@ -300,7 +299,6 @@ def test_main_workflow( mock_create_derivatives: MagicMock, mock_create_readme: MagicMock, mock_process_videos: MagicMock, - mock_create_participants: MagicMock, bvp_module: ModuleType, ) -> None: """Test the main processing workflow.""" @@ -328,8 +326,6 @@ def test_main_workflow( mock_create_derivatives.assert_called_once() mock_create_readme.assert_called_once() mock_process_videos.assert_called_once() - mock_create_participants.assert_called_once() - assert mock_save_json.call_count == 2 # Test fixtures for reusable data From 348973b7e69f83101be7325055d17299ecb999fd Mon Sep 17 00:00:00 2001 From: lucie271 Date: Fri, 31 Oct 2025 15:22:24 -0400 Subject: [PATCH 15/36] changed number of jobs in submission file --- jobs/run_bids_convertor.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobs/run_bids_convertor.sh b/jobs/run_bids_convertor.sh index 08f7227..a11a3c1 100644 --- a/jobs/run_bids_convertor.sh +++ b/jobs/run_bids_convertor.sh @@ -1,7 +1,7 @@ #!/bin/bash #SBATCH --job-name=bids_processing #SBATCH --partition=mit_normal -#SBATCH --array=0-90 +#SBATCH --array=0-19 #SBATCH --output=logs/bids_%A_%a.out #SBATCH --error=logs/bids_%A_%a.err #SBATCH --mem=5G From e57bcaec20c971043c30ef3da9f4ab37572bd253 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Fri, 31 Oct 2025 16:54:10 -0400 Subject: [PATCH 16/36] Added logs to .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index ad616ab..cf3fd6d 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,6 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ + +#logs +logs/ From 84f2e35c8aeb1c3e16740887ba404036a09124de Mon Sep 17 00:00:00 2001 From: lucie271 Date: Mon, 3 Nov 2025 09:49:39 -0500 Subject: [PATCH 17/36] fixed last shell scripts --- README.md | 10 ++-------- jobs/merge_cleanup.sh | 5 +++++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 335aa1a..a546feb 100644 --- a/README.md +++ b/README.md @@ -12,16 +12,10 @@ Welcome to the ```sailsprep``` repo! This is a Python repo for doing incredible ## General information To manage dependencies, this project uses Poetry. Make sure you've got poetry installed. -On Engaging, you need to first run +On Engaging, you need to first run at the root of the repo : ``` module load miniforge -``` -Then run -``` pip install poetry -``` -Then go to the root of this repo and run -``` poetry install ``` @@ -48,7 +42,7 @@ ffmpeg -version You'll need to submit the script on Engaging using sbatch. We've provided the sumbission files so you'll simply need to run (with module miniforge deactivated) : ``` -jid=$(sbatch --parsable jobs/submit_bids_updated.sh) +jid=$(sbatch --parsable jobs/run_bids_convertor.sh) sbatch --dependency=afterok:$jid jobs/merge_cleanup.sh ``` This will convert the raw video into BIDS format in a clean fashion. diff --git a/jobs/merge_cleanup.sh b/jobs/merge_cleanup.sh index 2b367ef..e986551 100644 --- a/jobs/merge_cleanup.sh +++ b/jobs/merge_cleanup.sh @@ -57,3 +57,8 @@ for folder in "$OUTPUT_DIR"/*/; do done echo "Cleanup complete at $(date)" + +# --- Run final Python merge --- +echo "Running final Python merge and participant file creation..." +poetry run python -c "from src.BIDS_convertor import merge_subjects, create_participants_file; merge_subjects(); create_participants_file()" +echo "Final BIDS merge and participant file creation complete ✅" From d6ac5b897499d135466c4818622ee39e8dadd883 Mon Sep 17 00:00:00 2001 From: Lucie Bierent Date: Mon, 3 Nov 2025 10:41:09 -0500 Subject: [PATCH 18/36] Update jobs/run_bids_convertor.sh Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- jobs/run_bids_convertor.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/jobs/run_bids_convertor.sh b/jobs/run_bids_convertor.sh index a11a3c1..28a8003 100644 --- a/jobs/run_bids_convertor.sh +++ b/jobs/run_bids_convertor.sh @@ -9,7 +9,11 @@ #SBATCH --cpus-per-task=5 # --- Environment setup --- -cd .. +# Determine project root dynamically +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) +PROJECT_ROOT=$(cd -- "$SCRIPT_DIR/.." &> /dev/null && pwd) + +cd "$PROJECT_ROOT" mkdir -p logs export PYTHONUNBUFFERED=1 @@ -19,8 +23,7 @@ echo "Task ID: $SLURM_ARRAY_TASK_ID of $SLURM_ARRAY_TASK_COUNT" echo "FFmpeg version:" ffmpeg -version -# Move to project and activate poetry env -cd /orcd/data/satra/001/users/lucie271/sailsprep +# Activate poetry env from project root source $(poetry env info --path)/bin/activate cd src From 770e600822da22c23de98fa6e19d023b05dc2e62 Mon Sep 17 00:00:00 2001 From: Lucie Bierent Date: Mon, 3 Nov 2025 10:43:31 -0500 Subject: [PATCH 19/36] Update jobs/merge_cleanup.sh Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- jobs/merge_cleanup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobs/merge_cleanup.sh b/jobs/merge_cleanup.sh index e986551..4cb38c1 100644 --- a/jobs/merge_cleanup.sh +++ b/jobs/merge_cleanup.sh @@ -10,7 +10,7 @@ echo "Cleaning up old logs..." rm -rf logs mkdir -p logs -OUTPUT_DIR=$(poetry run python -c "import yaml; f=open('configs/config_bids_convertor.yaml'); print(yaml.safe_load(f)['output_dir'])") +OUTPUT_DIR=$(poetry run python -c "import yaml; with open('configs/config_bids_convertor.yaml') as f: print(yaml.safe_load(f)['output_dir'])") MERGED_DIR="$OUTPUT_DIR" mkdir -p "$MERGED_DIR" From 316f227004fc7797da3764b6751f024b946e2bd5 Mon Sep 17 00:00:00 2001 From: Lucie Bierent Date: Mon, 3 Nov 2025 10:44:29 -0500 Subject: [PATCH 20/36] Update src/tests/test_BIDS_convertor.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/tests/test_BIDS_convertor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index 60d0567..7ba6fc2 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -113,7 +113,7 @@ class TestBIDSNaming: def test_create_bids_filename(self, bvp_module: ModuleType) -> None: """Test BIDS filename creation.""" - filename = bvp_module.create_bids_filename(123, "01", "mealtime", "beh", "mp4") + filename = bvp_module.create_bids_filename("123", "01", "mealtime", "beh", "mp4") expected = "sub-123_ses-01_task-mealtime_run-01_beh.mp4" assert filename == expected From 491a431ce54b275709b58237fd0dca1a9fffd12a Mon Sep 17 00:00:00 2001 From: Lucie Bierent Date: Mon, 3 Nov 2025 10:45:15 -0500 Subject: [PATCH 21/36] Update jobs/merge_cleanup.sh Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- jobs/merge_cleanup.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/jobs/merge_cleanup.sh b/jobs/merge_cleanup.sh index 4cb38c1..5c32c16 100644 --- a/jobs/merge_cleanup.sh +++ b/jobs/merge_cleanup.sh @@ -34,10 +34,12 @@ for folder in "$OUTPUT_DIR"/*/; do if [[ "$foldername" =~ ^[0-9]+$ ]]; then echo "Merging from folder: $foldername" if [[ -f "$folder/processing_log.json" ]]; then - jq -s 'add' "$merged_processed" "$folder/processing_log.json" > tmp.json && mv tmp.json "$merged_processed" + tmpfile=$(mktemp) + jq -s 'add' "$merged_processed" "$folder/processing_log.json" > "$tmpfile" && mv "$tmpfile" "$merged_processed" fi if [[ -f "$folder/not_processed.json" ]]; then - jq -s 'add' "$merged_failed" "$folder/not_processed.json" > tmp.json && mv tmp.json "$merged_failed" + tmpfile=$(mktemp) + jq -s 'add' "$merged_failed" "$folder/not_processed.json" > "$tmpfile" && mv "$tmpfile" "$merged_failed" fi fi done From f8ed634fc4d06a29aaffa77f446e6575985406be Mon Sep 17 00:00:00 2001 From: lucie271 Date: Mon, 3 Nov 2025 14:55:41 -0500 Subject: [PATCH 22/36] untrack poetry.lock and add logs folder --- .gitignore | 3 + logs/.gitkeep | 0 poetry.lock | 1039 ------------------------------------------------- 3 files changed, 3 insertions(+), 1039 deletions(-) create mode 100644 logs/.gitkeep delete mode 100644 poetry.lock diff --git a/.gitignore b/.gitignore index cf3fd6d..418c6e8 100644 --- a/.gitignore +++ b/.gitignore @@ -167,3 +167,6 @@ cython_debug/ #logs logs/ + +#ignore poetry.lock +poetry.lock diff --git a/logs/.gitkeep b/logs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/poetry.lock b/poetry.lock deleted file mode 100644 index 182a534..0000000 --- a/poetry.lock +++ /dev/null @@ -1,1039 +0,0 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. - -[[package]] -name = "cfgv" -version = "3.4.0" -description = "Validate configuration and produce human readable error messages." -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, - {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, -] - -[[package]] -name = "click" -version = "8.2.1" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.10" -groups = ["main"] -files = [ - {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, - {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main", "dev"] -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] -markers = {main = "platform_system == \"Windows\"", dev = "sys_platform == \"win32\""} - -[[package]] -name = "coverage" -version = "7.9.2" -description = "Code coverage measurement for Python" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "coverage-7.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:66283a192a14a3854b2e7f3418d7db05cdf411012ab7ff5db98ff3b181e1f912"}, - {file = "coverage-7.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4e01d138540ef34fcf35c1aa24d06c3de2a4cffa349e29a10056544f35cca15f"}, - {file = "coverage-7.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f22627c1fe2745ee98d3ab87679ca73a97e75ca75eb5faee48660d060875465f"}, - {file = "coverage-7.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b1c2d8363247b46bd51f393f86c94096e64a1cf6906803fa8d5a9d03784bdbf"}, - {file = "coverage-7.9.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c10c882b114faf82dbd33e876d0cbd5e1d1ebc0d2a74ceef642c6152f3f4d547"}, - {file = "coverage-7.9.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:de3c0378bdf7066c3988d66cd5232d161e933b87103b014ab1b0b4676098fa45"}, - {file = "coverage-7.9.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1e2f097eae0e5991e7623958a24ced3282676c93c013dde41399ff63e230fcf2"}, - {file = "coverage-7.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28dc1f67e83a14e7079b6cea4d314bc8b24d1aed42d3582ff89c0295f09b181e"}, - {file = "coverage-7.9.2-cp310-cp310-win32.whl", hash = "sha256:bf7d773da6af9e10dbddacbf4e5cab13d06d0ed93561d44dae0188a42c65be7e"}, - {file = "coverage-7.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:0c0378ba787681ab1897f7c89b415bd56b0b2d9a47e5a3d8dc0ea55aac118d6c"}, - {file = "coverage-7.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a7a56a2964a9687b6aba5b5ced6971af308ef6f79a91043c05dd4ee3ebc3e9ba"}, - {file = "coverage-7.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:123d589f32c11d9be7fe2e66d823a236fe759b0096f5db3fb1b75b2fa414a4fa"}, - {file = "coverage-7.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:333b2e0ca576a7dbd66e85ab402e35c03b0b22f525eed82681c4b866e2e2653a"}, - {file = "coverage-7.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:326802760da234baf9f2f85a39e4a4b5861b94f6c8d95251f699e4f73b1835dc"}, - {file = "coverage-7.9.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19e7be4cfec248df38ce40968c95d3952fbffd57b400d4b9bb580f28179556d2"}, - {file = "coverage-7.9.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0b4a4cb73b9f2b891c1788711408ef9707666501ba23684387277ededab1097c"}, - {file = "coverage-7.9.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2c8937fa16c8c9fbbd9f118588756e7bcdc7e16a470766a9aef912dd3f117dbd"}, - {file = "coverage-7.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:42da2280c4d30c57a9b578bafd1d4494fa6c056d4c419d9689e66d775539be74"}, - {file = "coverage-7.9.2-cp311-cp311-win32.whl", hash = "sha256:14fa8d3da147f5fdf9d298cacc18791818f3f1a9f542c8958b80c228320e90c6"}, - {file = "coverage-7.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:549cab4892fc82004f9739963163fd3aac7a7b0df430669b75b86d293d2df2a7"}, - {file = "coverage-7.9.2-cp311-cp311-win_arm64.whl", hash = "sha256:c2667a2b913e307f06aa4e5677f01a9746cd08e4b35e14ebcde6420a9ebb4c62"}, - {file = "coverage-7.9.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae9eb07f1cfacd9cfe8eaee6f4ff4b8a289a668c39c165cd0c8548484920ffc0"}, - {file = "coverage-7.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9ce85551f9a1119f02adc46d3014b5ee3f765deac166acf20dbb851ceb79b6f3"}, - {file = "coverage-7.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8f6389ac977c5fb322e0e38885fbbf901743f79d47f50db706e7644dcdcb6e1"}, - {file = "coverage-7.9.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff0d9eae8cdfcd58fe7893b88993723583a6ce4dfbfd9f29e001922544f95615"}, - {file = "coverage-7.9.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fae939811e14e53ed8a9818dad51d434a41ee09df9305663735f2e2d2d7d959b"}, - {file = "coverage-7.9.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:31991156251ec202c798501e0a42bbdf2169dcb0f137b1f5c0f4267f3fc68ef9"}, - {file = "coverage-7.9.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d0d67963f9cbfc7c7f96d4ac74ed60ecbebd2ea6eeb51887af0f8dce205e545f"}, - {file = "coverage-7.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49b752a2858b10580969ec6af6f090a9a440a64a301ac1528d7ca5f7ed497f4d"}, - {file = "coverage-7.9.2-cp312-cp312-win32.whl", hash = "sha256:88d7598b8ee130f32f8a43198ee02edd16d7f77692fa056cb779616bbea1b355"}, - {file = "coverage-7.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:9dfb070f830739ee49d7c83e4941cc767e503e4394fdecb3b54bfdac1d7662c0"}, - {file = "coverage-7.9.2-cp312-cp312-win_arm64.whl", hash = "sha256:4e2c058aef613e79df00e86b6d42a641c877211384ce5bd07585ed7ba71ab31b"}, - {file = "coverage-7.9.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:985abe7f242e0d7bba228ab01070fde1d6c8fa12f142e43debe9ed1dde686038"}, - {file = "coverage-7.9.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c3939264a76d44fde7f213924021ed31f55ef28111a19649fec90c0f109e6d"}, - {file = "coverage-7.9.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae5d563e970dbe04382f736ec214ef48103d1b875967c89d83c6e3f21706d5b3"}, - {file = "coverage-7.9.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdd612e59baed2a93c8843c9a7cb902260f181370f1d772f4842987535071d14"}, - {file = "coverage-7.9.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:256ea87cb2a1ed992bcdfc349d8042dcea1b80436f4ddf6e246d6bee4b5d73b6"}, - {file = "coverage-7.9.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f44ae036b63c8ea432f610534a2668b0c3aee810e7037ab9d8ff6883de480f5b"}, - {file = "coverage-7.9.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:82d76ad87c932935417a19b10cfe7abb15fd3f923cfe47dbdaa74ef4e503752d"}, - {file = "coverage-7.9.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:619317bb86de4193debc712b9e59d5cffd91dc1d178627ab2a77b9870deb2868"}, - {file = "coverage-7.9.2-cp313-cp313-win32.whl", hash = "sha256:0a07757de9feb1dfafd16ab651e0f628fd7ce551604d1bf23e47e1ddca93f08a"}, - {file = "coverage-7.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:115db3d1f4d3f35f5bb021e270edd85011934ff97c8797216b62f461dd69374b"}, - {file = "coverage-7.9.2-cp313-cp313-win_arm64.whl", hash = "sha256:48f82f889c80af8b2a7bb6e158d95a3fbec6a3453a1004d04e4f3b5945a02694"}, - {file = "coverage-7.9.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:55a28954545f9d2f96870b40f6c3386a59ba8ed50caf2d949676dac3ecab99f5"}, - {file = "coverage-7.9.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cdef6504637731a63c133bb2e6f0f0214e2748495ec15fe42d1e219d1b133f0b"}, - {file = "coverage-7.9.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcd5ebe66c7a97273d5d2ddd4ad0ed2e706b39630ed4b53e713d360626c3dbb3"}, - {file = "coverage-7.9.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9303aed20872d7a3c9cb39c5d2b9bdbe44e3a9a1aecb52920f7e7495410dfab8"}, - {file = "coverage-7.9.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc18ea9e417a04d1920a9a76fe9ebd2f43ca505b81994598482f938d5c315f46"}, - {file = "coverage-7.9.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6406cff19880aaaadc932152242523e892faff224da29e241ce2fca329866584"}, - {file = "coverage-7.9.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d0d4f6ecdf37fcc19c88fec3e2277d5dee740fb51ffdd69b9579b8c31e4232e"}, - {file = "coverage-7.9.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c33624f50cf8de418ab2b4d6ca9eda96dc45b2c4231336bac91454520e8d1fac"}, - {file = "coverage-7.9.2-cp313-cp313t-win32.whl", hash = "sha256:1df6b76e737c6a92210eebcb2390af59a141f9e9430210595251fbaf02d46926"}, - {file = "coverage-7.9.2-cp313-cp313t-win_amd64.whl", hash = "sha256:f5fd54310b92741ebe00d9c0d1d7b2b27463952c022da6d47c175d246a98d1bd"}, - {file = "coverage-7.9.2-cp313-cp313t-win_arm64.whl", hash = "sha256:c48c2375287108c887ee87d13b4070a381c6537d30e8487b24ec721bf2a781cb"}, - {file = "coverage-7.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddc39510ac922a5c4c27849b739f875d3e1d9e590d1e7b64c98dadf037a16cce"}, - {file = "coverage-7.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a535c0c7364acd55229749c2b3e5eebf141865de3a8f697076a3291985f02d30"}, - {file = "coverage-7.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df0f9ef28e0f20c767ccdccfc5ae5f83a6f4a2fbdfbcbcc8487a8a78771168c8"}, - {file = "coverage-7.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2f3da12e0ccbcb348969221d29441ac714bbddc4d74e13923d3d5a7a0bebef7a"}, - {file = "coverage-7.9.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a17eaf46f56ae0f870f14a3cbc2e4632fe3771eab7f687eda1ee59b73d09fe4"}, - {file = "coverage-7.9.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:669135a9d25df55d1ed56a11bf555f37c922cf08d80799d4f65d77d7d6123fcf"}, - {file = "coverage-7.9.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:9d3a700304d01a627df9db4322dc082a0ce1e8fc74ac238e2af39ced4c083193"}, - {file = "coverage-7.9.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:71ae8b53855644a0b1579d4041304ddc9995c7b21c8a1f16753c4d8903b4dfed"}, - {file = "coverage-7.9.2-cp39-cp39-win32.whl", hash = "sha256:dd7a57b33b5cf27acb491e890720af45db05589a80c1ffc798462a765be6d4d7"}, - {file = "coverage-7.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:f65bb452e579d5540c8b37ec105dd54d8b9307b07bcaa186818c104ffda22441"}, - {file = "coverage-7.9.2-pp39.pp310.pp311-none-any.whl", hash = "sha256:8a1166db2fb62473285bcb092f586e081e92656c7dfa8e9f62b4d39d7e6b5050"}, - {file = "coverage-7.9.2-py3-none-any.whl", hash = "sha256:e425cd5b00f6fc0ed7cdbd766c70be8baab4b7839e4d4fe5fac48581dd968ea4"}, - {file = "coverage-7.9.2.tar.gz", hash = "sha256:997024fa51e3290264ffd7492ec97d0690293ccd2b45a6cd7d82d945a4a80c8b"}, -] - -[package.dependencies] -tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} - -[package.extras] -toml = ["tomli ; python_full_version <= \"3.11.0a6\""] - -[[package]] -name = "distlib" -version = "0.4.0" -description = "Distribution utilities" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"}, - {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, -] - -[[package]] -name = "et-xmlfile" -version = "2.0.0" -description = "An implementation of lxml.xmlfile for the standard library" -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa"}, - {file = "et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54"}, -] - -[[package]] -name = "exceptiongroup" -version = "1.3.0" -description = "Backport of PEP 654 (exception groups)" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -markers = "python_version == \"3.10\"" -files = [ - {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, - {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, -] - -[package.dependencies] -typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} - -[package.extras] -test = ["pytest (>=6)"] - -[[package]] -name = "execnet" -version = "2.1.1" -description = "execnet: rapid multi-Python deployment" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "execnet-2.1.1-py3-none-any.whl", hash = "sha256:26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc"}, - {file = "execnet-2.1.1.tar.gz", hash = "sha256:5189b52c6121c24feae288166ab41b32549c7e2348652736540b9e6e7d4e72e3"}, -] - -[package.extras] -testing = ["hatch", "pre-commit", "pytest", "tox"] - -[[package]] -name = "filelock" -version = "3.18.0" -description = "A platform independent file lock." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"}, - {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"}, -] - -[package.extras] -docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] - -[[package]] -name = "identify" -version = "2.6.12" -description = "File identification library for Python" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2"}, - {file = "identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6"}, -] - -[package.extras] -license = ["ukkonen"] - -[[package]] -name = "iniconfig" -version = "2.1.0" -description = "brain-dead simple config-ini parsing" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, - {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, -] - -[[package]] -name = "jinja2" -version = "3.1.6" -description = "A very fast and expressive template engine." -optional = false -python-versions = ">=3.7" -groups = ["docs"] -files = [ - {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, - {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, -] - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "markupsafe" -version = "3.0.2" -description = "Safely add untrusted strings to HTML/XML markup." -optional = false -python-versions = ">=3.9" -groups = ["docs"] -files = [ - {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50"}, - {file = "MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d"}, - {file = "MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"}, - {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1"}, - {file = "MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6"}, - {file = "MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f"}, - {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"}, - {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, -] - -[[package]] -name = "mypy" -version = "1.17.0" -description = "Optional static typing for Python" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "mypy-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8e08de6138043108b3b18f09d3f817a4783912e48828ab397ecf183135d84d6"}, - {file = "mypy-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ce4a17920ec144647d448fc43725b5873548b1aae6c603225626747ededf582d"}, - {file = "mypy-1.17.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6ff25d151cc057fdddb1cb1881ef36e9c41fa2a5e78d8dd71bee6e4dcd2bc05b"}, - {file = "mypy-1.17.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93468cf29aa9a132bceb103bd8475f78cacde2b1b9a94fd978d50d4bdf616c9a"}, - {file = "mypy-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:98189382b310f16343151f65dd7e6867386d3e35f7878c45cfa11383d175d91f"}, - {file = "mypy-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:c004135a300ab06a045c1c0d8e3f10215e71d7b4f5bb9a42ab80236364429937"}, - {file = "mypy-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9d4fe5c72fd262d9c2c91c1117d16aac555e05f5beb2bae6a755274c6eec42be"}, - {file = "mypy-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d96b196e5c16f41b4f7736840e8455958e832871990c7ba26bf58175e357ed61"}, - {file = "mypy-1.17.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:73a0ff2dd10337ceb521c080d4147755ee302dcde6e1a913babd59473904615f"}, - {file = "mypy-1.17.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24cfcc1179c4447854e9e406d3af0f77736d631ec87d31c6281ecd5025df625d"}, - {file = "mypy-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3c56f180ff6430e6373db7a1d569317675b0a451caf5fef6ce4ab365f5f2f6c3"}, - {file = "mypy-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:eafaf8b9252734400f9b77df98b4eee3d2eecab16104680d51341c75702cad70"}, - {file = "mypy-1.17.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f986f1cab8dbec39ba6e0eaa42d4d3ac6686516a5d3dccd64be095db05ebc6bb"}, - {file = "mypy-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:51e455a54d199dd6e931cd7ea987d061c2afbaf0960f7f66deef47c90d1b304d"}, - {file = "mypy-1.17.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3204d773bab5ff4ebbd1f8efa11b498027cd57017c003ae970f310e5b96be8d8"}, - {file = "mypy-1.17.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1051df7ec0886fa246a530ae917c473491e9a0ba6938cfd0ec2abc1076495c3e"}, - {file = "mypy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f773c6d14dcc108a5b141b4456b0871df638eb411a89cd1c0c001fc4a9d08fc8"}, - {file = "mypy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:1619a485fd0e9c959b943c7b519ed26b712de3002d7de43154a489a2d0fd817d"}, - {file = "mypy-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c41aa59211e49d717d92b3bb1238c06d387c9325d3122085113c79118bebb06"}, - {file = "mypy-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0e69db1fb65b3114f98c753e3930a00514f5b68794ba80590eb02090d54a5d4a"}, - {file = "mypy-1.17.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03ba330b76710f83d6ac500053f7727270b6b8553b0423348ffb3af6f2f7b889"}, - {file = "mypy-1.17.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:037bc0f0b124ce46bfde955c647f3e395c6174476a968c0f22c95a8d2f589bba"}, - {file = "mypy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c38876106cb6132259683632b287238858bd58de267d80defb6f418e9ee50658"}, - {file = "mypy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:d30ba01c0f151998f367506fab31c2ac4527e6a7b2690107c7a7f9e3cb419a9c"}, - {file = "mypy-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:63e751f1b5ab51d6f3d219fe3a2fe4523eaa387d854ad06906c63883fde5b1ab"}, - {file = "mypy-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f7fb09d05e0f1c329a36dcd30e27564a3555717cde87301fae4fb542402ddfad"}, - {file = "mypy-1.17.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b72c34ce05ac3a1361ae2ebb50757fb6e3624032d91488d93544e9f82db0ed6c"}, - {file = "mypy-1.17.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:434ad499ad8dde8b2f6391ddfa982f41cb07ccda8e3c67781b1bfd4e5f9450a8"}, - {file = "mypy-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f105f61a5eff52e137fd73bee32958b2add9d9f0a856f17314018646af838e97"}, - {file = "mypy-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:ba06254a5a22729853209550d80f94e28690d5530c661f9416a68ac097b13fc4"}, - {file = "mypy-1.17.0-py3-none-any.whl", hash = "sha256:15d9d0018237ab058e5de3d8fce61b6fa72cc59cc78fd91f1b474bce12abf496"}, - {file = "mypy-1.17.0.tar.gz", hash = "sha256:e5d7ccc08ba089c06e2f5629c660388ef1fee708444f1dee0b9203fa031dee03"}, -] - -[package.dependencies] -mypy_extensions = ">=1.0.0" -pathspec = ">=0.9.0" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing_extensions = ">=4.6.0" - -[package.extras] -dmypy = ["psutil (>=4.0)"] -faster-cache = ["orjson"] -install-types = ["pip"] -mypyc = ["setuptools (>=50)"] -reports = ["lxml"] - -[[package]] -name = "mypy-extensions" -version = "1.1.0" -description = "Type system extensions for programs checked with the mypy type checker." -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, - {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, -] - -[[package]] -name = "nodeenv" -version = "1.9.1" -description = "Node.js virtual environment builder" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["dev"] -files = [ - {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, - {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, -] - -[[package]] -name = "numpy" -version = "2.2.6" -description = "Fundamental package for array computing in Python" -optional = false -python-versions = ">=3.10" -groups = ["main", "dev"] -files = [ - {file = "numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb"}, - {file = "numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90"}, - {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163"}, - {file = "numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf"}, - {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83"}, - {file = "numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915"}, - {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680"}, - {file = "numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289"}, - {file = "numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d"}, - {file = "numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3"}, - {file = "numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae"}, - {file = "numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a"}, - {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42"}, - {file = "numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491"}, - {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a"}, - {file = "numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf"}, - {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1"}, - {file = "numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab"}, - {file = "numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47"}, - {file = "numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303"}, - {file = "numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff"}, - {file = "numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c"}, - {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3"}, - {file = "numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282"}, - {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87"}, - {file = "numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249"}, - {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49"}, - {file = "numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de"}, - {file = "numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4"}, - {file = "numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2"}, - {file = "numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84"}, - {file = "numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b"}, - {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d"}, - {file = "numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566"}, - {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f"}, - {file = "numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f"}, - {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868"}, - {file = "numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d"}, - {file = "numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd"}, - {file = "numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c"}, - {file = "numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6"}, - {file = "numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda"}, - {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40"}, - {file = "numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8"}, - {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f"}, - {file = "numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa"}, - {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571"}, - {file = "numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1"}, - {file = "numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff"}, - {file = "numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06"}, - {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d"}, - {file = "numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db"}, - {file = "numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543"}, - {file = "numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00"}, - {file = "numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd"}, -] - -[[package]] -name = "opencv-python" -version = "4.12.0.88" -description = "Wrapper package for OpenCV python bindings." -optional = false -python-versions = ">=3.6" -groups = ["main"] -files = [ - {file = "opencv-python-4.12.0.88.tar.gz", hash = "sha256:8b738389cede219405f6f3880b851efa3415ccd674752219377353f017d2994d"}, - {file = "opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:f9a1f08883257b95a5764bf517a32d75aec325319c8ed0f89739a57fae9e92a5"}, - {file = "opencv_python-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:812eb116ad2b4de43ee116fcd8991c3a687f099ada0b04e68f64899c09448e81"}, - {file = "opencv_python-4.12.0.88-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:51fd981c7df6af3e8f70b1556696b05224c4e6b6777bdd2a46b3d4fb09de1a92"}, - {file = "opencv_python-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:092c16da4c5a163a818f120c22c5e4a2f96e0db4f24e659c701f1fe629a690f9"}, - {file = "opencv_python-4.12.0.88-cp37-abi3-win32.whl", hash = "sha256:ff554d3f725b39878ac6a2e1fa232ec509c36130927afc18a1719ebf4fbf4357"}, - {file = "opencv_python-4.12.0.88-cp37-abi3-win_amd64.whl", hash = "sha256:d98edb20aa932fd8ebd276a72627dad9dc097695b3d435a4257557bbb49a79d2"}, -] - -[package.dependencies] -numpy = {version = ">=2,<2.3.0", markers = "python_version >= \"3.9\""} - -[[package]] -name = "openpyxl" -version = "3.1.5" -description = "A Python library to read/write Excel 2010 xlsx/xlsm files" -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2"}, - {file = "openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050"}, -] - -[package.dependencies] -et-xmlfile = "*" - -[[package]] -name = "packaging" -version = "25.0" -description = "Core utilities for Python packages" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, - {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, -] - -[[package]] -name = "pandas" -version = "2.3.3" -description = "Powerful data structures for data analysis, time series, and statistics" -optional = false -python-versions = ">=3.9" -groups = ["main", "dev"] -files = [ - {file = "pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c"}, - {file = "pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a"}, - {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1"}, - {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838"}, - {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250"}, - {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4"}, - {file = "pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826"}, - {file = "pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523"}, - {file = "pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45"}, - {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66"}, - {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b"}, - {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791"}, - {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151"}, - {file = "pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c"}, - {file = "pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53"}, - {file = "pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35"}, - {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908"}, - {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89"}, - {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98"}, - {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084"}, - {file = "pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b"}, - {file = "pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713"}, - {file = "pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8"}, - {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d"}, - {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac"}, - {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c"}, - {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493"}, - {file = "pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee"}, - {file = "pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5"}, - {file = "pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21"}, - {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78"}, - {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110"}, - {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86"}, - {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc"}, - {file = "pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0"}, - {file = "pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593"}, - {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c"}, - {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b"}, - {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6"}, - {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3"}, - {file = "pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5"}, - {file = "pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec"}, - {file = "pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7"}, - {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450"}, - {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5"}, - {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788"}, - {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87"}, - {file = "pandas-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2"}, - {file = "pandas-2.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8"}, - {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff"}, - {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29"}, - {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73"}, - {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9"}, - {file = "pandas-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa"}, - {file = "pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b"}, -] - -[package.dependencies] -numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, -] -python-dateutil = ">=2.8.2" -pytz = ">=2020.1" -tzdata = ">=2022.7" - -[package.extras] -all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] -aws = ["s3fs (>=2022.11.0)"] -clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] -compression = ["zstandard (>=0.19.0)"] -computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] -consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] -feather = ["pyarrow (>=10.0.1)"] -fss = ["fsspec (>=2022.11.0)"] -gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] -hdf5 = ["tables (>=3.8.0)"] -html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] -mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] -parquet = ["pyarrow (>=10.0.1)"] -performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] -plot = ["matplotlib (>=3.6.3)"] -postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] -pyarrow = ["pyarrow (>=10.0.1)"] -spss = ["pyreadstat (>=1.2.0)"] -sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] -test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.9.2)"] - -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - -[[package]] -name = "pdoc" -version = "15.0.4" -description = "API Documentation for Python Projects" -optional = false -python-versions = ">=3.9" -groups = ["docs"] -files = [ - {file = "pdoc-15.0.4-py3-none-any.whl", hash = "sha256:f9028e85e7bb8475b054e69bde1f6d26fc4693d25d9fa1b1ce9009bec7f7a5c4"}, - {file = "pdoc-15.0.4.tar.gz", hash = "sha256:cf9680f10f5b4863381f44ef084b1903f8f356acb0d4cc6b64576ba9fb712c82"}, -] - -[package.dependencies] -Jinja2 = ">=2.11.0" -MarkupSafe = ">=1.1.1" -pygments = ">=2.12.0" - -[[package]] -name = "platformdirs" -version = "4.3.8" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, - {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, -] - -[package.extras] -docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] -type = ["mypy (>=1.14.1)"] - -[[package]] -name = "pluggy" -version = "1.6.0" -description = "plugin and hook calling mechanisms for python" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, - {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, -] - -[package.extras] -dev = ["pre-commit", "tox"] -testing = ["coverage", "pytest", "pytest-benchmark"] - -[[package]] -name = "pre-commit" -version = "4.2.0" -description = "A framework for managing and maintaining multi-language pre-commit hooks." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd"}, - {file = "pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146"}, -] - -[package.dependencies] -cfgv = ">=2.0.0" -identify = ">=1.0.0" -nodeenv = ">=0.11.1" -pyyaml = ">=5.1" -virtualenv = ">=20.10.0" - -[[package]] -name = "psutil" -version = "7.0.0" -description = "Cross-platform lib for process and system monitoring in Python. NOTE: the syntax of this script MUST be kept compatible with Python 2.7." -optional = false -python-versions = ">=3.6" -groups = ["dev"] -files = [ - {file = "psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25"}, - {file = "psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34"}, - {file = "psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993"}, - {file = "psutil-7.0.0-cp36-cp36m-win32.whl", hash = "sha256:84df4eb63e16849689f76b1ffcb36db7b8de703d1bc1fe41773db487621b6c17"}, - {file = "psutil-7.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1e744154a6580bc968a0195fd25e80432d3afec619daf145b9e5ba16cc1d688e"}, - {file = "psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99"}, - {file = "psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553"}, - {file = "psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456"}, -] - -[package.extras] -dev = ["abi3audit", "black (==24.10.0)", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest", "pytest-cov", "pytest-xdist", "requests", "rstcheck", "ruff", "setuptools", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "vulture", "wheel"] -test = ["pytest", "pytest-xdist", "setuptools"] - -[[package]] -name = "pygments" -version = "2.19.2" -description = "Pygments is a syntax highlighting package written in Python." -optional = false -python-versions = ">=3.8" -groups = ["dev", "docs"] -files = [ - {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, - {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, -] - -[package.extras] -windows-terminal = ["colorama (>=0.4.6)"] - -[[package]] -name = "pytest" -version = "8.4.1" -description = "pytest: simple powerful testing with Python" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7"}, - {file = "pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c"}, -] - -[package.dependencies] -colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} -exceptiongroup = {version = ">=1", markers = "python_version < \"3.11\""} -iniconfig = ">=1" -packaging = ">=20" -pluggy = ">=1.5,<2" -pygments = ">=2.7.2" -tomli = {version = ">=1", markers = "python_version < \"3.11\""} - -[package.extras] -dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] - -[[package]] -name = "pytest-cov" -version = "6.2.1" -description = "Pytest plugin for measuring coverage." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "pytest_cov-6.2.1-py3-none-any.whl", hash = "sha256:f5bc4c23f42f1cdd23c70b1dab1bbaef4fc505ba950d53e0081d0730dd7e86d5"}, - {file = "pytest_cov-6.2.1.tar.gz", hash = "sha256:25cc6cc0a5358204b8108ecedc51a9b57b34cc6b8c967cc2c01a4e00d8a67da2"}, -] - -[package.dependencies] -coverage = {version = ">=7.5", extras = ["toml"]} -pluggy = ">=1.2" -pytest = ">=6.2.5" - -[package.extras] -testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"] - -[[package]] -name = "pytest-xdist" -version = "3.6.1" -description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"}, - {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"}, -] - -[package.dependencies] -execnet = ">=2.1" -psutil = {version = ">=3.0", optional = true, markers = "extra == \"psutil\""} -pytest = ">=7.0.0" - -[package.extras] -psutil = ["psutil (>=3.0)"] -setproctitle = ["setproctitle"] -testing = ["filelock"] - -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -description = "Extensions to the standard Python datetime module" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "dev"] -files = [ - {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, - {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, -] - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "pytz" -version = "2025.2" -description = "World timezone definitions, modern and historical" -optional = false -python-versions = "*" -groups = ["main", "dev"] -files = [ - {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, - {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, -] - -[[package]] -name = "pyyaml" -version = "6.0.2" -description = "YAML parser and emitter for Python" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, - {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, - {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, - {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, - {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, - {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, - {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, - {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, - {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, - {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, - {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, - {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, - {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, - {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, - {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, - {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, - {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, - {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, - {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, - {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, - {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, - {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, - {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, - {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, - {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, - {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, - {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, - {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, - {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, - {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, - {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, - {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, - {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, - {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, - {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, -] - -[[package]] -name = "ruff" -version = "0.12.4" -description = "An extremely fast Python linter and code formatter, written in Rust." -optional = false -python-versions = ">=3.7" -groups = ["dev"] -files = [ - {file = "ruff-0.12.4-py3-none-linux_armv6l.whl", hash = "sha256:cb0d261dac457ab939aeb247e804125a5d521b21adf27e721895b0d3f83a0d0a"}, - {file = "ruff-0.12.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:55c0f4ca9769408d9b9bac530c30d3e66490bd2beb2d3dae3e4128a1f05c7442"}, - {file = "ruff-0.12.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a8224cc3722c9ad9044da7f89c4c1ec452aef2cfe3904365025dd2f51daeae0e"}, - {file = "ruff-0.12.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9949d01d64fa3672449a51ddb5d7548b33e130240ad418884ee6efa7a229586"}, - {file = "ruff-0.12.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:be0593c69df9ad1465e8a2d10e3defd111fdb62dcd5be23ae2c06da77e8fcffb"}, - {file = "ruff-0.12.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7dea966bcb55d4ecc4cc3270bccb6f87a337326c9dcd3c07d5b97000dbff41c"}, - {file = "ruff-0.12.4-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:afcfa3ab5ab5dd0e1c39bf286d829e042a15e966b3726eea79528e2e24d8371a"}, - {file = "ruff-0.12.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c057ce464b1413c926cdb203a0f858cd52f3e73dcb3270a3318d1630f6395bb3"}, - {file = "ruff-0.12.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e64b90d1122dc2713330350626b10d60818930819623abbb56535c6466cce045"}, - {file = "ruff-0.12.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2abc48f3d9667fdc74022380b5c745873499ff827393a636f7a59da1515e7c57"}, - {file = "ruff-0.12.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:2b2449dc0c138d877d629bea151bee8c0ae3b8e9c43f5fcaafcd0c0d0726b184"}, - {file = "ruff-0.12.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:56e45bb11f625db55f9b70477062e6a1a04d53628eda7784dce6e0f55fd549eb"}, - {file = "ruff-0.12.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:478fccdb82ca148a98a9ff43658944f7ab5ec41c3c49d77cd99d44da019371a1"}, - {file = "ruff-0.12.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:0fc426bec2e4e5f4c4f182b9d2ce6a75c85ba9bcdbe5c6f2a74fcb8df437df4b"}, - {file = "ruff-0.12.4-py3-none-win32.whl", hash = "sha256:4de27977827893cdfb1211d42d84bc180fceb7b72471104671c59be37041cf93"}, - {file = "ruff-0.12.4-py3-none-win_amd64.whl", hash = "sha256:fe0b9e9eb23736b453143d72d2ceca5db323963330d5b7859d60d101147d461a"}, - {file = "ruff-0.12.4-py3-none-win_arm64.whl", hash = "sha256:0618ec4442a83ab545e5b71202a5c0ed7791e8471435b94e655b570a5031a98e"}, - {file = "ruff-0.12.4.tar.gz", hash = "sha256:13efa16df6c6eeb7d0f091abae50f58e9522f3843edb40d56ad52a5a4a4b6873"}, -] - -[[package]] -name = "six" -version = "1.17.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "dev"] -files = [ - {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, - {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, -] - -[[package]] -name = "tomli" -version = "2.2.1" -description = "A lil' TOML parser" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -markers = "python_version == \"3.10\"" -files = [ - {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, - {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, - {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, - {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, - {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, - {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, - {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, - {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, - {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, - {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, - {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, - {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, - {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, - {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, -] - -[[package]] -name = "types-python-dateutil" -version = "2.9.0.20250708" -description = "Typing stubs for python-dateutil" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "types_python_dateutil-2.9.0.20250708-py3-none-any.whl", hash = "sha256:4d6d0cc1cc4d24a2dc3816024e502564094497b713f7befda4d5bc7a8e3fd21f"}, - {file = "types_python_dateutil-2.9.0.20250708.tar.gz", hash = "sha256:ccdbd75dab2d6c9696c350579f34cffe2c281e4c5f27a585b2a2438dd1d5c8ab"}, -] - -[[package]] -name = "types-pyyaml" -version = "6.0.12.20250915" -description = "Typing stubs for PyYAML" -optional = false -python-versions = ">=3.9" -groups = ["main", "dev"] -files = [ - {file = "types_pyyaml-6.0.12.20250915-py3-none-any.whl", hash = "sha256:e7d4d9e064e89a3b3cae120b4990cd370874d2bf12fa5f46c97018dd5d3c9ab6"}, - {file = "types_pyyaml-6.0.12.20250915.tar.gz", hash = "sha256:0f8b54a528c303f0e6f7165687dd33fafa81c807fcac23f632b63aa624ced1d3"}, -] - -[[package]] -name = "typing-extensions" -version = "4.14.1" -description = "Backported and Experimental Type Hints for Python 3.9+" -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76"}, - {file = "typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36"}, -] - -[[package]] -name = "tzdata" -version = "2025.2" -description = "Provider of IANA time zone data" -optional = false -python-versions = ">=2" -groups = ["main", "dev"] -files = [ - {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, - {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, -] - -[[package]] -name = "virtualenv" -version = "20.32.0" -description = "Virtual Python Environment builder" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "virtualenv-20.32.0-py3-none-any.whl", hash = "sha256:2c310aecb62e5aa1b06103ed7c2977b81e042695de2697d01017ff0f1034af56"}, - {file = "virtualenv-20.32.0.tar.gz", hash = "sha256:886bf75cadfdc964674e6e33eb74d787dff31ca314ceace03ca5810620f4ecf0"}, -] - -[package.dependencies] -distlib = ">=0.3.7,<1" -filelock = ">=3.12.2,<4" -platformdirs = ">=3.9.1,<5" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"GraalVM\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] - -[metadata] -lock-version = "2.1" -python-versions = ">=3.10,<3.13" -content-hash = "f7624bd40c213cd8c0b13fd2043b69ede77f586bfedba2c33ee97beb23fba5fa" From b50ba765e4baea0c320f9e1ae2c289f7df1897ba Mon Sep 17 00:00:00 2001 From: lucie271 Date: Mon, 3 Nov 2025 15:36:38 -0500 Subject: [PATCH 23/36] BIDS_convertor.py in sailsprep --- jobs/merge_cleanup.sh | 2 +- jobs/run_bids_convertor.sh | 3 +- src/BIDS_convertor.py | 1530 ------------------------------------ 3 files changed, 2 insertions(+), 1533 deletions(-) delete mode 100644 src/BIDS_convertor.py diff --git a/jobs/merge_cleanup.sh b/jobs/merge_cleanup.sh index 5c32c16..5323999 100644 --- a/jobs/merge_cleanup.sh +++ b/jobs/merge_cleanup.sh @@ -62,5 +62,5 @@ echo "Cleanup complete at $(date)" # --- Run final Python merge --- echo "Running final Python merge and participant file creation..." -poetry run python -c "from src.BIDS_convertor import merge_subjects, create_participants_file; merge_subjects(); create_participants_file()" +poetry run python -c "from sailsprep.BIDS_convertor import merge_subjects, create_participants_file; merge_subjects(); create_participants_file()" echo "Final BIDS merge and participant file creation complete ✅" diff --git a/jobs/run_bids_convertor.sh b/jobs/run_bids_convertor.sh index 28a8003..da8d629 100644 --- a/jobs/run_bids_convertor.sh +++ b/jobs/run_bids_convertor.sh @@ -26,11 +26,10 @@ ffmpeg -version # Activate poetry env from project root source $(poetry env info --path)/bin/activate -cd src echo "Using Python from: $(which python)" echo "Starting BIDS conversion at $(date)" # Run your script -python BIDS_convertor.py $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_TASK_COUNT +python -m sailsprep.BIDS_convertor $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_TASK_COUNT echo "Finished at $(date)" diff --git a/src/BIDS_convertor.py b/src/BIDS_convertor.py deleted file mode 100644 index 8f57da7..0000000 --- a/src/BIDS_convertor.py +++ /dev/null @@ -1,1530 +0,0 @@ -"""BIDS Video Processing Pipeline. - -This module processes home videos from ASD screening studies and organizes them -according to the Brain Imaging Data Structure (BIDS) specification version 1.8.0. - -The pipeline includes video stabilization, denoising, standardization, and audio -extraction for behavioral analysis research. - -Example: - Basic usage: - $ python bids_video_processor.py - -Todo: - * check with actual data -""" - -import json -import os -import re -import shutil -import subprocess -import sys -import time -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union - -import cv2 -import pandas as pd -import yaml - - -def load_configuration(config_path: str = "config.yaml") -> Dict[str, Any]: - """Load configuration from YAML file. - - Args: - config_path (str): Path to the configuration YAML file. - - Returns: - dict: Configuration dictionary containing video processing parameters. - - Raises: - FileNotFoundError: If the configuration file is not found. - yaml.YAMLError: If the YAML file is malformed. - """ - with open(config_path, "r") as f: - config = yaml.safe_load(f) - return config - - -# Load configuration -config_path = ( - Path(__file__).resolve().parents[1] / "configs" / "config_bids_convertor.yaml" -) -config = load_configuration(str(config_path)) -# Unpack configuration -ANNOTATION_FILE = config["annotation_file"] -VIDEO_ROOT = config["video_root"] -OUTPUT_DIR = config["output_dir"] -TARGET_RESOLUTION = config["target_resolution"] -TARGET_FRAMERATE = config["target_framerate"] -ASD_STATUS_FILE = config["asd_status"] -# BIDS directory structure -FINAL_BIDS_ROOT = os.path.join( - OUTPUT_DIR, config.get("final_bids_root", "final_bids-dataset") -) -FINAL_DERIVATIVES_DIR = os.path.join( - FINAL_BIDS_ROOT, config.get("derivatives_subdir", "derivatives/preprocessed") -) - - -def create_bids_structure() -> None: - """Create the BIDS directory structure. - - Creates the main BIDS dataset directory and derivatives subdirectory - following BIDS specification requirements. - - Note: - This function creates directories with exist_ok=True to prevent - errors if directories already exist. - """ - os.makedirs(FINAL_BIDS_ROOT, exist_ok=True) - os.makedirs(FINAL_DERIVATIVES_DIR, exist_ok=True) - - -def save_json(data: Union[List[Any], Dict[str, Any]], path: str) -> None: - """Save data to JSON file. - - Utility function to save Python data structures to JSON files with - proper formatting and error handling. - - Args: - data (list or dict): Data structure to save as JSON. - path (str): Output file path for JSON file. - - Raises: - IOError: If unable to write to the specified path. - TypeError: If data contains non-serializable objects. - - Note: - Uses 4-space indentation for readable JSON output. - """ - with open(path, "w") as f: - json.dump(data, f, indent=4) - - -def safe_print(message: str) -> None: - """Print with timestamps.""" - timestamp = datetime.now().strftime("%H:%M:%S") - print(f"{timestamp} [MAIN] {message}") - - -# Helper functions -def parse_duration(duration_str: str) -> float: - """Parse duration string to seconds.""" - try: - if pd.isna(duration_str) or duration_str == "": - return 0.0 - duration_str = str(duration_str) - if ":" in duration_str: - parts = duration_str.split(":") - if len(parts) == 3: - hours = int(parts[0]) - minutes = int(parts[1]) - seconds = float(parts[2]) - return hours * 3600 + minutes * 60 + seconds - elif len(parts) == 2: - minutes = int(parts[0]) - seconds = float(parts[1]) - return minutes * 60 + seconds - return float(duration_str) - except (ValueError, TypeError): - return 0.0 - - -def make_bids_task_label(task_name: str) -> str: - """Convert TaskName to BIDS-compatible task label for filenames.""" - s = str(task_name).strip() - s = re.sub(r"[^0-9a-zA-Z+]", "", s) # Keep only alphanumeric and + - return s - - -def get_video_properties(video_path: str) -> dict: - """Extract video properties using OpenCV.""" - try: - cap = cv2.VideoCapture(video_path) - if not cap.isOpened(): - return {"SamplingFrequency": None, "Resolution": None} - - fps = cap.get(cv2.CAP_PROP_FPS) - width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) - cap.release() - - return { - "SamplingFrequency": fps, - "Resolution": f"{width}x{height}", - } - - except Exception as e: - print(f"Error reading video {video_path}: {e}") - return {"SamplingFrequency": None, "Resolution": None} - - -def determine_session_from_folder(folder_name: str) -> Optional[str]: - """Determine the session ID from a folder name based on known age-related patterns. - - Args: - folder_name (str): The name of the folder to check. - - Returns: - Optional[str]: "01" for 12–16 month sessions, "02" for 34–38 month sessions, - or None if no match. - """ - folder_lower = folder_name.lower() - - # Session 01 patterns - if any( - pattern in folder_lower - for pattern in [ - "12-16 month", - "12-14 month", - "12_16", - "12_14", - "12-16month", - "12-14month", - "12-16_month_videos", - ] - ): - return "01" - - # Session 02 patterns (typos and variants included) - if any( - pattern in folder_lower - for pattern in [ - "34-38 month", - "34-28 month", - "34-48 month", - "34_38", - "34_28", - "34_48", - "34-38month", - "34-28month", - "34-48month", - "34-38_month_videos", - ] - ): - return "02" - - return None - - -def find_age_folder_session(current_path: str, participant_path: str) -> Optional[str]: - """Recursively seek the timepoint folder. - - Args: - current_path (str): Current directory path to inspect. - participant_path (str): Root path of the participant. - - Returns: - Optional[str]: Session ID ("01" or "02") if detected, else None. - """ - if ( - not current_path.startswith(participant_path) - or current_path == participant_path - ): - return None - - current_folder = os.path.basename(current_path) - session_id = determine_session_from_folder(current_folder) - if session_id: - return session_id - - parent_path = os.path.dirname(current_path) - return find_age_folder_session(parent_path, participant_path) - - -def extract_participant_id_from_folder(folder_name: str) -> str: - """Extract the participant ID from folder names. - - Args: - folder_name (str): Folder name containing participant info. - - Returns: - str: Extracted participant ID. - """ - if "AMES_" in folder_name: - parts = folder_name.split("AMES_") - if len(parts) > 1: - return parts[1].strip() - - if "_" in folder_name: - return folder_name.split("_")[-1] - - return folder_name - - -def determine_session_from_excel( - current_path: str, annotation_df: pd.DataFrame, participant_id: str -) -> Optional[str]: - """Determine the session ID for a video based on the annotation file. - - Args: - current_path (str): Path to the video file. - annotation_df (pd.DataFrame): Excel data containing 'ID', - 'FileName', 'timepoint', and 'Age' columns. - participant_id (str): Participant identifier. - - Returns: - Optional[str]: Session ID ("01" or "02"), or None if not found. - """ - filename = os.path.splitext(os.path.basename(current_path))[0] - if participant_id.endswith(" 2"): - participant_id = participant_id[:-2].strip() - # Filter for the participant - participant_excel = annotation_df[ - annotation_df["ID"].astype(str) == str(participant_id) - ] - if participant_excel.empty: - raise ValueError( - f"Participant ID '{participant_id}' not found in Excel metadata" - f" for file '{filename}'." - ) - - # Match the video filename (without extension) - mask = participant_excel["FileName"].str.split(".").str[0] == filename - video_entry = participant_excel[mask] - - if video_entry.empty: - raise ValueError( - f"No matching Excel entry found for video '{filename}'" - f"(participant {participant_id})." - ) - - timepoint = video_entry["timepoint"].iloc[0] - age = video_entry["Age"].iloc[0] - - # Normalize timepoint to string for pattern matching - timepoint_str = str(timepoint) - - if "14" in timepoint_str: - return "01" - elif "36" in timepoint_str: - return "02" - elif pd.notna(age): - return "01" if age < 2 else "02" - else: - raise ValueError( - f"Unable to determine session ID: timepoint={timepoint}, age={age}" - ) - - -def find_session_id( - directory: str, - current_path: str, - participant_path: str, - annotation_df: pd.DataFrame, - participant_id: str, - excel: bool = True, -) -> Optional[str]: - """Determine session ID by checking folder names first, then Excel data if needed. - - Args: - directory (str): Current directory being scanned. - current_path (str): Full path to the file. - participant_path (str): Root participant directory. - annotation_df (pd.DataFrame): Excel metadata. - participant_id (str): Participant identifier. - excel (bool) : Whether to use Excel data for session determination. - - Returns: - Optional[str]: Session ID ("01" or "02"), or None. - """ - if ( - not current_path.startswith(participant_path) - or current_path == participant_path - ): - return None - - try: - folder_name = os.path.basename(directory) - session_id = determine_session_from_folder(folder_name) - - if not session_id and excel: - session_id = determine_session_from_excel( - current_path, annotation_df, participant_id - ) - - if session_id: - return session_id - - # Recurse upward if not found - parent_path = os.path.dirname(directory) - if parent_path != directory: - return find_session_id( - parent_path, - current_path, - participant_path, - annotation_df, - participant_id, - False, - ) - - except PermissionError: - print(f"Permission denied: {current_path}") - except Exception as e: - print(f"Error accessing {current_path}: {e}") - - return None - - -def find_videos_recursive( - directory: str, - participant_path: str, - annotation_df: pd.DataFrame, - participant_id: str, -) -> List[Tuple[str, Optional[str]]]: - """Recursively find video files and determine their session IDs. - - Args: - directory (str): Directory to search in. - participant_path (str): Root path of the participant. - annotation_df (pd.DataFrame): Excel data for metadata lookup. - participant_id (str): Participant identifier. - - Returns: - List[Tuple[str, Optional[str]]]: List of (video_path, session_id) pairs. - """ - videos = [] - try: - for item in os.listdir(directory): - if item.startswith("."): - continue # Skip hidden files - - item_path = os.path.join(directory, item) - - if os.path.isfile(item_path) and item.lower().endswith( - (".mp4", ".mov", ".avi", ".mkv", ".m4v", ".3gp", ".mts") - ): - session_id = find_session_id( - directory, - item_path, - participant_path, - annotation_df, - participant_id, - ) - videos.append((item_path, session_id)) - - elif os.path.isdir(item_path): - videos.extend( - find_videos_recursive( - item_path, participant_path, annotation_df, participant_id - ) - ) - - except PermissionError: - print(f"Permission denied: {directory}") - except Exception as e: - print(f"Error accessing {directory}: {e}") - - return videos - - -def get_all_videos(video_root: str, annotation_df: pd.DataFrame) -> List[dict]: - """Find and label all participant videos with their corresponding session IDs. - - Args: - video_root (str): Root directory containing all participant folders. - annotation_df (pd.DataFrame): Excel data with metadata. - - Returns: - List[dict]: List of video metadata dictionaries. - """ - all_videos = [] - - try: - for participant_folder in os.listdir(video_root): - participant_path = os.path.join(video_root, participant_folder) - if not os.path.isdir(participant_path): - continue - - participant_id = extract_participant_id_from_folder(participant_folder) - if not participant_id: - continue - - videos = find_videos_recursive( - participant_path, participant_path, annotation_df, participant_id - ) - - for video_path, session_id in videos: - if session_id in {"01", "02"}: - all_videos.append( - { - "participant_id": participant_id, - "filename": os.path.basename(video_path), - "full_path": video_path, - "session_id": session_id, - "age_folder": os.path.basename(os.path.dirname(video_path)), - } - ) - - except Exception as e: - print(f"Error scanning video folders: {e}") - - return all_videos - - -def create_dummy_excel_data( - video_path: str, participant_id: str, session_id: str, task_label: str = "unknown" -) -> dict[str, str]: - """Create dummy behavioral data for videos not in Excel file.""" - video_filename = os.path.basename(video_path) - - dummy_row_data = { - "ID": participant_id, - "FileName": video_filename, - "Context": task_label, - "Location": "n/a", - "Activity": "n/a", - "Child_of_interest_clear": "n/a", - "#_adults": "n/a", - "#_children": "n/a", - "#_people_background": "n/a", - "Interaction_with_child": "n/a", - "#_people_interacting": "n/a", - "Child_constrained": "n/a", - "Constraint_type": "n/a", - "Supports": "n/a", - "Support_type": "n/a", - "Example_support_type": "n/a", - "Gestures": "n/a", - "Gesture_type": "n/a", - "Vocalizations": "n/a", - "RMM": "n/a", - "RMM_type": "n/a", - "Response_to_name": "n/a", - "Locomotion": "n/a", - "Locomotion_type": "n/a", - "Grasping": "n/a", - "Grasp_type": "n/a", - "Body_Parts_Visible": "n/a", - "Angle_of_Body": "n/a", - "time_point": "n/a", - "DOB": "n/a", - "Vid_date": "n/a", - "Video_Quality_Child_Face_Visibility": "n/a", - "Video_Quality_Child_Body_Visibility": "n/a", - "Video_Quality_Child_Hand_Visibility": "n/a", - "Video_Quality_Lighting": "n/a", - "Video_Quality_Resolution": "n/a", - "Video_Quality_Motion": "n/a", - "Coder": "n/a", - "SourceFile": "n/a", - "Vid_duration": "00:00:00", - "Notes": "Video not found in Excel file - behavioral data unavailable", - } - - return dummy_row_data - - -def get_task_from_excel_row(row: pd.Series) -> str: - """Extract and create task label from Excel row data.""" - context = str(row.get("Context", "")).strip() - - if context and context.lower() not in ["nan", "n/a", ""]: - return make_bids_task_label(context) - else: - return "unknown" - - -def get_next_run_number( - participant_id: str, session_id: str, task_label: str, final_bids_root: str -) -> int: - """Find the next available run number for this participant/session/task.""" - beh_dir = os.path.join( - final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh" - ) - - if not os.path.exists(beh_dir): - return 1 - - # Look for existing files with this task - pattern = f"sub-{participant_id}_ses-{session_id}_task-{task_label}_" - existing_files = [f for f in os.listdir(beh_dir) if f.startswith(pattern)] - - if not existing_files: - return 1 - - # Extract run numbers from existing files - run_numbers = [] - for filename in existing_files: - if "_run-" in filename: - run_part = filename.split("_run-")[1].split("_")[0] - try: - run_numbers.append(int(run_part)) - except ValueError: - continue - else: - run_numbers.append(1) # Files without run numbers are considered run-1 - - return max(run_numbers) + 1 if run_numbers else 1 - - -def create_bids_filename( - participant_id: str, - session_id: str, - task_label: str, - suffix: str, - extension: str, - run_id: int = 1, -) -> str: - """Create BIDS-compliant filename w run identifier for multiple videos per task.""" - return ( - f"sub-{participant_id}_" - f"ses-{session_id}_" - f"task-{task_label}_" - f"run-{run_id:02d}_" - f"{suffix}.{extension}" - ) - - -# Video processing functions -def extract_exif(video_path: str) -> Dict[str, Any]: - """Extract video metadata using ffprobe.""" - try: - cmd = [ - "ffprobe", - "-v", - "quiet", - "-print_format", - "json", - "-show_format", - "-show_streams", - video_path, - ] - result = subprocess.run(cmd, capture_output=True, text=True) - if result.returncode != 0: - return {"ffprobe_error": result.stderr.strip()} - - metadata = json.loads(result.stdout) - extracted = {} - - format_info = metadata.get("format", {}) - extracted["filename"] = format_info.get("filename") - extracted["format"] = format_info.get("format_long_name") - extracted["duration_sec"] = float(format_info.get("duration", 0)) - extracted["bit_rate"] = int(format_info.get("bit_rate", 0)) - extracted["size_bytes"] = int(format_info.get("size", 0)) - - return extracted - except Exception as e: - return {"error": str(e)} - - -def stabilize_video(input_path: str, stabilized_path: str, temp_dir: str) -> None: - """Stabilize video using FFmpeg vidstab filters, with error checks.""" - os.makedirs(temp_dir, exist_ok=True) - transforms_file = os.path.join(temp_dir, "transforms.trf") - - # Step 1: Detect transforms - detect_cmd = [ - "ffmpeg", - "-y", - "-i", - input_path, - "-vf", - f"vidstabdetect=shakiness=5:accuracy=15:result={transforms_file}", - "-f", - "null", - "-", - ] - print(f"[DEBUG] Running: {' '.join(detect_cmd)}") - detect_proc = subprocess.run(detect_cmd, capture_output=True, text=True) - - if detect_proc.returncode != 0: - print(f"[ERROR] vidstabdetect failed for {input_path}:\n{detect_proc.stderr}") - raise RuntimeError(f"FFmpeg vidstabdetect failed for {input_path}") - - if not os.path.exists(transforms_file): - raise FileNotFoundError(f"Transform file not created: {transforms_file}") - - # Step 2: Apply transforms - transform_cmd = [ - "ffmpeg", - "-y", - "-i", - input_path, - "-vf", - f"vidstabtransform=smoothing=30:input={transforms_file}", - "-c:v", - "libx264", - "-preset", - "slow", - "-crf", - "23", - "-c:a", - "copy", - stabilized_path, - ] - print(f"[DEBUG] Running: {' '.join(transform_cmd)}") - transform_proc = subprocess.run(transform_cmd, capture_output=True, text=True) - - if transform_proc.returncode != 0: - print( - f"[ERROR] vidstabtransform failed for {input_path}:" - f"\n{transform_proc.stderr}" - ) - raise RuntimeError(f"FFmpeg vidstabtransform failed for {input_path}") - - if not os.path.exists(stabilized_path): - raise FileNotFoundError(f"Stabilized video not created: {stabilized_path}") - - # Cleanup - os.remove(transforms_file) - - -def preprocess_video(input_path: str, output_path: str, temp_dir: str) -> None: - """Preprocess video with stabilization, denoising, and standardization.""" - if not os.path.exists(input_path): - raise ValueError(f"Input video not found: {input_path}") - - stabilized_tmp = os.path.join(temp_dir, f"stabilized_temp_{os.getpid()}.mp4") - - try: - stabilize_video(input_path, stabilized_tmp, temp_dir) - - # Verify stabilization succeeded - if not os.path.exists(stabilized_tmp): - raise ValueError( - "Video stabilization failed - no intermediate file created" - ) - - vf_filters = ( - "yadif," - "hqdn3d," - "eq=contrast=1.0:brightness=0.0:saturation=1.0," - "scale=-2:720," - "pad=ceil(iw/2)*2:ceil(ih/2)*2," - f"fps={TARGET_FRAMERATE}" - ) - - cmd = [ - "ffmpeg", - "-y", - "-i", - stabilized_tmp, - "-vf", - vf_filters, - "-c:v", - "libx264", - "-crf", - "23", - "-preset", - "fast", - "-c:a", - "aac", - "-b:a", - "128k", - "-movflags", - "+faststart", - output_path, - ] - - # Capture and check stderr - result = subprocess.run( - cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True - ) - if result.returncode != 0: - raise ValueError(f"Video processing failed: {result.stderr}") - - # Verify output file was created and has content - if not os.path.exists(output_path): - raise ValueError(f"Video processing failed - no output file: {output_path}") - if os.path.getsize(output_path) == 0: - raise ValueError( - f"Video processing failed - empty output file: {output_path}" - ) - - finally: - # Clean up temp file - if os.path.exists(stabilized_tmp): - os.remove(stabilized_tmp) - - -def extract_audio(input_path: str, output_audio_path: str) -> None: - """Extract audio from video file.""" - if not os.path.exists(input_path): - raise ValueError(f"Input video not found: {input_path}") - - cmd = [ - "ffmpeg", - "-y", - "-i", - input_path, - "-vn", - "-acodec", - "pcm_s16le", - "-ar", - "16000", - "-ac", - "1", - output_audio_path, - ] - - # Check return code and stderr - result = subprocess.run( - cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True - ) - if result.returncode != 0: - raise ValueError(f"Audio extraction failed: {result.stderr}") - - # Verify output file was created - if not os.path.exists(output_audio_path): - raise ValueError( - f"Audio extraction failed - no output file: {output_audio_path}" - ) - - -def safe_float_conversion( - value: float | int | str | None, default: str = "n/a" -) -> float | str: - """Convert value to float, return default if conversion fails.""" - if value is None or pd.isna(value): - return default - - # Convert to string and check for common non-numeric indicators - str_val = str(value).strip().lower() - if str_val in ["", "n/a", "na", "nan", "none", "null"]: - return default - - try: - return float(value) - except (ValueError, TypeError): - return default - - -# BIDS file creation functions -def create_events_file( - group_df: pd.DataFrame, output_path: str, full_filepath: str -) -> None: - """Create events.tsv file from Excel data with all columns.""" - events_data = [] - - for idx, row in group_df.iterrows(): - event = { - "onset": 0.0, - "duration": parse_duration(row.get("Vid_duration", "00:00:00")), - "coder": str(row.get("Coder", "n/a")), - "filepath_engaging": str(full_filepath), - "source_file": str(row.get("SourceFile", "n/a")), - "context": str(row.get("Context", "n/a")), - "location": str(row.get("Location", "n/a")), - "activity": str(row.get("Activity", "n/a")), - "child_clear": str(row.get("Child_of_interest_clear", "n/a")), - "num_adults": str(row.get("#_adults", "n/a")), - "num_children": str(row.get("#_children", "n/a")), - "num_people_background": str(row.get("#_people_background", "n/a")), - "interaction_with_child": str(row.get("Interaction_with_child", "n/a")), - "num_people_interacting": str(row.get("#_people_interacting", "n/a")), - "child_constrained": str(row.get("Child_constrained", "n/a")), - "constraint_type": str(row.get("Constraint_type", "n/a")), - "supports": str(row.get("Supports", "n/a")), - "support_type": str(row.get("Support_type", "n/a")), - "example_support_type": str(row.get("Example_support_type", "n/a")), - "gestures": str(row.get("Gestures", "n/a")), - "gesture_type": str(row.get("Gesture_type", "n/a")), - "vocalizations": str(row.get("Vocalizations", "n/a")), - "rmm": str(row.get("RMM", "n/a")), - "rmm_type": str(row.get("RMM_type", "n/a")), - "response_to_name": str(row.get("Response_to_name", "n/a")), - "locomotion": str(row.get("Locomotion", "n/a")), - "locomotion_type": str(row.get("Locomotion_type", "n/a")), - "grasping": str(row.get("Grasping", "n/a")), - "grasp_type": str(row.get("Grasp_type", "n/a")), - "body_parts_visible": str(row.get("Body_Parts_Visible", "n/a")), - "angle_of_body": str(row.get("Angle_of_Body", "n/a")), - "timepoint": str(row.get("time_point", "n/a")), - "dob": str(row.get("DOB", "n/a")), - "vid_date": str(row.get("Vid_date", "n/a")), - "video_quality_face": safe_float_conversion( - row.get("Video_Quality_Child_Face_Visibility") - ), - "video_quality_body": safe_float_conversion( - row.get("Video_Quality_Child_Body_Visibility") - ), - "video_quality_hand": safe_float_conversion( - row.get("Video_Quality_Child_Hand_Visibility") - ), - "video_quality_lighting": safe_float_conversion( - row.get("Video_Quality_Lighting") - ), - "video_quality_resolution": safe_float_conversion( - row.get("Video_Quality_Resolution") - ), - "video_quality_motion": safe_float_conversion( - row.get("Video_Quality_Motion") - ), - "notes": str(row.get("Notes", "n/a")), - } - events_data.append(event) - - events_df = pd.DataFrame(events_data) - print(events_df) - events_df.to_csv(output_path, sep="\t", index=False, na_rep="n/a") - - -def create_video_metadata_json( - metadata: Dict[str, Any], - processing_info: Dict[str, Any], - task_info: Dict[str, Any], - output_path: str, -) -> None: - """Create JSON metadata file for processed video with dynamic task info.""" - video_json = { - "TaskName": task_info.get("task_name", "unknown"), - "TaskDescription": task_info.get( - "task_description", "Video recorded during behavioral session" - ), - "Instructions": task_info.get( - "instructions", "Natural behavior in home environment" - ), - "Context": task_info.get("context", "n/a"), - "Activity": task_info.get("activity", "n/a"), - "SamplingFrequency": TARGET_FRAMERATE, - "Resolution": TARGET_RESOLUTION, - "ProcessingPipeline": { - "Stabilization": processing_info.get("has_stabilization", False), - "Denoising": processing_info.get("has_denoising", False), - "Equalization": processing_info.get("has_equalization", False), - "StandardizedFPS": TARGET_FRAMERATE, - "StandardizedResolution": TARGET_RESOLUTION, - }, - "OriginalMetadata": metadata, - } - save_json(video_json, output_path) - - -def create_audio_metadata_json( - duration_sec: float, task_info: Dict[str, Any], output_path: str -) -> None: - """Create JSON metadata file for extracted audio with dynamic task info.""" - audio_json = { - "SamplingFrequency": 16000, - "Channels": 1, - "SampleEncoding": "16bit", - "Duration": duration_sec, - "TaskName": task_info.get("task_name", "unknown"), - "TaskDescription": task_info.get( - "task_description", "Audio extracted from behavioral session" - ), - "Context": task_info.get("context", "n/a"), - "Activity": task_info.get("activity", "n/a"), - } - save_json(audio_json, output_path) - - -def create_raw_video_json( - row: pd.Series, task_info: Dict[str, Any], video_path: str, output_path: str -) -> None: - """Create JSON metadata for raw video.""" - video_props = get_video_properties(video_path) - - video_json = { - "TaskName": task_info.get("task_name", "unknown"), - "TaskDescription": task_info.get( - "task_description", "Raw video from behavioral session" - ), - "SamplingFrequency": video_props.get("SamplingFrequency", "n/a"), - "Resolution": video_props.get("Resolution", "n/a"), - "OriginalFilename": str(row.get("FileName", "")), - "Duration": parse_duration(row.get("Vid_duration", "00:00:00")), - "RecordingDate": str(row.get("Vid_date", "n/a")), - "Context": task_info.get("context", "n/a"), - "Activity": task_info.get("activity", "n/a"), - "TimePoint": str(row.get("timepoint", "n/a")), - "SourceFile": str(row.get("SourceFile", "n/a")), - } - save_json(video_json, output_path) - - -def process_single_video( - video_info: Dict, - annotation_df: pd.DataFrame, - final_bids_root: str, - final_derivatives_dir: str, - temp_dir: str, -) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]: - """Process a single video with all BIDS structures.""" - participant_id = video_info["participant_id"] - filename = video_info["filename"] - session_id = video_info["session_id"] - input_video_path = video_info["full_path"] - safe_print(f"Processing: {participant_id}/{filename}") - filename_without_extension = os.path.splitext(filename)[0] - # Check if video exists in Excel or create dummy data - - try: - # Check if video exists in Excel or create dummy data - participant_excel = annotation_df[ - annotation_df["ID"].astype(str) == str(participant_id) - ] - mask = ( - participant_excel["FileName"].str.split(".").str[0] - == filename_without_extension - ) - video_excel = participant_excel[mask] - if video_excel.empty: - # Create dummy data for missing Excel entries - dummy_data = create_dummy_excel_data( - input_video_path, participant_id, session_id - ) - video_excel = pd.DataFrame([dummy_data]) - has_excel_data = False - safe_print("No Excel data found - using dummy data") - else: - has_excel_data = True - - excel_row = video_excel.iloc[0] - task_label = get_task_from_excel_row(excel_row) - activity = excel_row.get("Activity", "unknown activity") - # Create task information - task_info = { - "task_name": task_label, - "task_description": f"Behavioral session: {activity}", - "instructions": "Natural behavior observation", - "context": str(excel_row.get("Context", "n/a")), - "activity": str(excel_row.get("Activity", "n/a")), - } - - # Create BIDS directory structure - raw_subj_dir = os.path.join( - final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh" - ) - deriv_subj_dir = os.path.join( - final_derivatives_dir, f"sub-{participant_id}", f"ses-{session_id}", "beh" - ) - - os.makedirs(raw_subj_dir, exist_ok=True) - os.makedirs(deriv_subj_dir, exist_ok=True) - - # Create BIDS filenames with run number - ext = os.path.splitext(filename)[1][1:] - run_number = get_next_run_number( - participant_id, session_id, task_label, final_bids_root - ) - - raw_video_name = create_bids_filename( - participant_id, session_id, task_label, "beh", "mp4", run_number - ) - processed_video_name = create_bids_filename( - participant_id, - session_id, - task_label, - "desc-processed_beh", - "mp4", - run_number, - ) - audio_name = create_bids_filename( - participant_id, session_id, task_label, "audio", "wav", run_number - ) - events_name = create_bids_filename( - participant_id, session_id, task_label, "events", "tsv", run_number - ) - - # File paths - raw_video_path = os.path.join(raw_subj_dir, raw_video_name) - processed_video_path = os.path.join(deriv_subj_dir, processed_video_name) - audio_path = os.path.join(deriv_subj_dir, audio_name) - events_path = os.path.join(raw_subj_dir, events_name) - - if not os.path.exists(raw_video_path): - if ext.lower() != ".mp4": - # Convert to mp4 without processing - cmd = [ - "ffmpeg", - "-y", - "-i", - input_video_path, - "-c", - "copy", - raw_video_path, - ] - result = subprocess.run( - cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True - ) - # Check return code and verify output file - if result.returncode != 0: - raise ValueError(f"FFmpeg conversion failed: {result.stderr}") - if not os.path.exists(raw_video_path): - raise ValueError( - f"FFmpeg did not create output file: {raw_video_path}" - ) - safe_print(" Converted to raw BIDS format") - else: - shutil.copy2(input_video_path, raw_video_path) - # FIX: Verify copy succeeded - if not os.path.exists(raw_video_path): - raise ValueError(f"Failed to copy to raw BIDS: {raw_video_path}") - safe_print(" Copied to raw BIDS") - - # Extract metadata from raw video - exif_data = extract_exif(raw_video_path) - if "error" in exif_data or "ffprobe_error" in exif_data: - raise ValueError("Unreadable or unsupported video format") - - # Process video for derivatives - if not os.path.exists(processed_video_path): - safe_print(" Starting video processing...") - preprocess_video(raw_video_path, processed_video_path, temp_dir) - # Verify processing succeeded - if not os.path.exists(processed_video_path): - raise ValueError( - f"Video processing failed - no output file: {processed_video_path}" - ) - if os.path.getsize(processed_video_path) == 0: - raise ValueError( - "Video processing failed- empty output file:" - f" {processed_video_path}" - ) - safe_print(" Video processing complete") - - if not os.path.exists(audio_path): - safe_print(" Extracting audio...") - extract_audio(processed_video_path, audio_path) - # Verify audio extraction succeeded - if not os.path.exists(audio_path): - raise ValueError( - f"Audio extraction failed - no output file: {audio_path}" - ) - if os.path.getsize(audio_path) == 0: - raise ValueError( - f"Audio extraction failed - empty output file: {audio_path}" - ) - safe_print(" Audio extraction complete") - - # Create events files - create_events_file(video_excel, events_path, input_video_path) - if not os.path.exists(events_path): - raise ValueError(f"Failed to create events file: {events_path}") - - # Create metadata JSON files - processing_info = { - "has_stabilization": True, - "has_denoising": True, - "has_equalization": True, - } - - # Raw video JSON - raw_video_json_path = raw_video_path.replace(".mp4", ".json") - create_raw_video_json( - excel_row, - task_info, - raw_video_path, - raw_video_json_path, - ) - if not os.path.exists(raw_video_json_path): - raise ValueError(f"Failed to create raw video JSON: {raw_video_json_path}") - - # Processed video JSON - processed_video_json_path = processed_video_path.replace(".mp4", ".json") - create_video_metadata_json( - exif_data, - processing_info, - task_info, - processed_video_json_path, - ) - if not os.path.exists(processed_video_json_path): - raise ValueError( - f"Failed to create processed video JSON: {processed_video_json_path}" - ) - - # Audio JSON - audio_json_path = audio_path.replace(".wav", ".json") - create_audio_metadata_json( - exif_data.get("duration_sec", 0), task_info, audio_json_path - ) - if not os.path.exists(audio_json_path): - raise ValueError(f"Failed to create audio JSON: {audio_json_path}") - - # Store processing information - entry = { - "participant_id": participant_id, - "session_id": session_id, - "task_label": task_label, - "original_video": input_video_path, - "raw_video_bids": raw_video_path, - "processed_video_bids": processed_video_path, - "audio_file_bids": audio_path, - "events_file_bids": events_path, - "filename": filename, - "age_folder": video_info["age_folder"], - "duration_sec": exif_data.get("duration_sec", 0), - "has_excel_data": has_excel_data, - "excel_metadata": excel_row.to_dict(), - "task_info": task_info, - "processing_info": processing_info, - } - - safe_print(f" Successfully processed: {participant_id}/{filename}") - return entry, None - - except Exception as e: - safe_print(f" ERROR processing {input_video_path}: {str(e)}") - return None, {"video": input_video_path, "error": str(e)} - - -def create_dataset_description() -> None: - """Create dataset_description.json for main BIDS dataset.""" - dataset_desc = { - "Name": "SAILS Phase III Home Videos", - "BIDSVersion": "1.9.0", - "DatasetType": "domestic videos with audio", - } - try: - filepath = os.path.join(FINAL_BIDS_ROOT, "dataset_description.json") - save_json(dataset_desc, filepath) - - except Exception as e: - raise ValueError( - f"Failed to create dataset_description.json at {filepath}: {e}" - ) - - -def create_derivatives_dataset_description() -> None: - """Create dataset_description.json for derivatives.""" - os.makedirs(FINAL_DERIVATIVES_DIR, exist_ok=True) - - derivatives_desc = { - "Name": "SAILS Phase III Home Videos - Preprocessed", - "BIDSVersion": "1.9.0", - "DatasetType": "derivative", - "GeneratedBy": [ - { - "Name": "Video Preprocessing Pipeline", - "Version": "1.0.0", - "Description": ( - "FFmpeg-based video stabilization, denoising, " - "and standardization pipeline with audio extraction" - ), - "CodeURL": "local", - } - ], - "SourceDatasets": [{"URL": "", "Version": "1.0.0"}], - "HowToAcknowledge": "Please cite the original study", - } - - filepath = os.path.join(FINAL_DERIVATIVES_DIR, "dataset_description.json") - save_json(derivatives_desc, filepath) - if not os.path.exists(filepath): - raise ValueError( - f"Failed to create derivatives dataset_description.json at {filepath}" - ) - - -def create_readme() -> None: - """Create README file for the BIDS dataset.""" - readme_content = """# SAILS Phase III Home Videos BIDS Dataset - -## Overview -This dataset contains home videos from the SAILS Phase III study, -organized according to the Brain Imaging Data Structure (BIDS) specification. - -## Data Collection -Videos were collected from home environments during various activities. -Two main age groups were included: -- Session 01: 12-16 month old children -- Session 02: 34-38 month old children - -## Dataset Structure -### Raw Data -- sub-*/ses-*/beh/: Raw behavioral videos (converted to mp4) and event -annotations (contains also the original filepath of the video processed) - -### Derivatives -- derivatives/preprocessed/sub-*/ses-*/beh/: Processed videos and extracted audio - - Videos: Stabilized, denoised, standardized to 720p/30fps - - Audio: Extracted to 16kHz mono WAV format - -## Data Processing -All videos underwent standardized preprocessing including: -- Video stabilization using vidstab -- Denoising and quality enhancement -- Standardization to 720p resolution and 30fps -- Audio extraction for speech analysis -- Filename modication according to subject ID and task label -- Extraction of ASD status for every subject stored in the participants.tsv file. - -## Behavioral Coding -Events files include manual annotations from csv file and Engaging -location of the raw video. - -## Task Labels -Task labels are derived from the Context column in the csv. -It allows to capture what kind of interaction was happening in the video. -Videos without behavioral coding data use "unknown" task label. -""" - - filepath = os.path.join(OUTPUT_DIR, "README") - try: - with open(filepath, "w") as f: - f.write(readme_content) - except Exception as e: - raise ValueError(f"Failed to create README at {filepath}: {e}") - - -def create_participants_file() -> None: - """Create participants.tsv and participants.json files.""" - asd_status = pd.read_excel(ASD_STATUS_FILE) - ids_processed_participants = [] - for name in os.listdir(FINAL_BIDS_ROOT): - full_path = os.path.join(FINAL_BIDS_ROOT, name) - if os.path.isdir(full_path) and name.startswith("sub-"): - ids_processed_participants.append(name.split("sub-")[1]) - participants_data = [] - for participant_id in sorted(ids_processed_participants): - asd_info = asd_status[asd_status["ID"].astype(str) == str(participant_id)] - participants_data.append( - { - "participant_id": f"sub-{participant_id}", - "group": asd_info["Group"].values[0] if not asd_info.empty else "n/a", - } - ) - - participants_df = pd.DataFrame(participants_data) - participants_df.to_csv( - os.path.join(FINAL_BIDS_ROOT, "participants.tsv"), - sep="\t", - index=False, - na_rep="n/a", - ) - - participants_json = { - "participant_id": {"Description": "Unique BIDS participant identifier"}, - "Group": {"Description": "ASD status"}, - } - - save_json(participants_json, os.path.join(FINAL_BIDS_ROOT, "participants.json")) - - -def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: - """Print processing summary statistics.""" - print("PROCESSING SUMMARY") - - print(f"Successfully processed: {len(all_processed)} videos") - print(f"Failed to process: {len(all_failed)} videos") - print(f"Total videos attempted: {len(all_processed) + len(all_failed)}") - - if all_processed: - # Excel data availability - with_excel = sum( - 1 for entry in all_processed if entry.get("has_excel_data", False) - ) - without_excel = len(all_processed) - with_excel - print("\nData sources:") - print(f" With Excel behavioral data: {with_excel} videos") - print(f" With dummy behavioral data: {without_excel} videos") - - # Task distribution - task_counts: dict[str, int] = {} - participant_counts: dict[str, int] = {} - session_counts: dict[str, int] = {} - - for entry in all_processed: - task = entry["task_label"] - participant = entry["participant_id"] - session = entry["session_id"] - task_counts[task] = task_counts.get(task, 0) + 1 - participant_counts[participant] = participant_counts.get(participant, 0) + 1 - session_counts[session] = session_counts.get(session, 0) + 1 - - print("\nTask distribution:") - for task, count in sorted(task_counts.items()): - print(f" {task}: {count} videos") - - print("\nSession distribution:") - for session, count in sorted(session_counts.items()): - print(f" Session {session}: {count} videos") - - print(f"\nUnique participants processed: {len(participant_counts)}") - - # Duration statistics - durations = [entry.get("duration_sec", 0) for entry in all_processed] - total_duration = sum(durations) - avg_duration = total_duration / len(durations) if durations else 0 - - print("\nDuration statistics:") - print(f" Total video duration: {total_duration/3600:.1f} hours") - print(f" Average video duration: {avg_duration/60:.1f} minutes") - - if all_failed: - print("\nFailed videos breakdown:") - error_types: dict[str, int] = {} - for entry in all_failed: - error = entry.get("error", "Unknown error") - error_types[error] = error_types.get(error, 0) + 1 - - for error, count in sorted(error_types.items()): - print(f" {error}: {count} videos") - - -def merge_subjects() -> None: - """Merge duplicated subjects folders.""" - paths_to_check = [ - Path(FINAL_BIDS_ROOT), - Path(FINAL_BIDS_ROOT) / "derivatives" / "preprocessed", - ] - - for folder in paths_to_check: - if not folder.exists(): - continue - - subs = [d for d in folder.iterdir() if d.is_dir() and d.name.startswith("sub-")] - sub_names = {d.name for d in subs} - - for sub in subs: - if sub.name.endswith(" 2"): - original_name = sub.name[:-2] - original_path = folder / original_name - if original_name in sub_names and original_path.exists(): - print(f"Merging {sub} → {original_path}") - - for item in sub.iterdir(): - dest = original_path / item.name - if item.is_dir(): - if not dest.exists(): - shutil.copytree(item, dest) - else: - # merge recursively if same session already exists - for subitem in item.iterdir(): - dest_sub = dest / subitem.name - if not dest_sub.exists(): - if subitem.is_dir(): - shutil.copytree(subitem, dest_sub) - else: - shutil.copy2(subitem, dest_sub) - else: - if not dest.exists(): - shutil.copy2(item, dest) - shutil.rmtree(sub) - else: - print(f"No base subject found for {sub}, skipping.") - - -def process_videos( - task_id: int, - num_tasks: int, - annotation_df: pd.DataFrame, - all_videos: list, - final_bids_root: str, - final_derivatives_dir: str, - output_dir: str, -) -> tuple[list, list]: - """Process the subset of videos assigned to this task. - - Returns: - (all_processed, all_failed) - """ - safe_print(f"Task {task_id}: Processing videos...") - video_chunks = all_videos[task_id::num_tasks] - - if not video_chunks: - safe_print(f"No videos assigned to task {task_id}") - return [], [] - - temp_dir = os.path.join(output_dir, str(task_id), "temp") - os.makedirs(temp_dir, exist_ok=True) - - all_processed, all_failed = [], [] - - for i, video_info in enumerate(video_chunks, 1): - safe_print(f"[Task {task_id}] Video {i}/{len(video_chunks)}") - processed_entry, failed_entry = process_single_video( - video_info, - annotation_df, - final_bids_root, - final_derivatives_dir, - temp_dir, - ) - if processed_entry: - all_processed.append(processed_entry) - if failed_entry: - all_failed.append(failed_entry) - - # Save per-task logs - task_dir = os.path.join(output_dir, str(task_id)) - os.makedirs(task_dir, exist_ok=True) - save_json(all_processed, os.path.join(task_dir, "processing_log.json")) - save_json(all_failed, os.path.join(task_dir, "not_processed.json")) - - # Cleanup temp dir - if os.path.exists(temp_dir): - shutil.rmtree(temp_dir) - - return all_processed, all_failed - - -def main() -> None: - """Main entry point for multi-task BIDS video processing.""" - if len(sys.argv) != 3: - print("Usage: python updated_bids.py ") - sys.exit(1) - - my_task_id = int(sys.argv[1]) - num_tasks = int(sys.argv[2]) - - start_time = time.time() - - # --- Validate paths --- - for path, label in [(VIDEO_ROOT, "Video root"), (ANNOTATION_FILE, "Excel file")]: - if not os.path.exists(path): - print(f"ERROR: {label} not found at {path}") - sys.exit(1) - - # --- Load metadata --- - try: - annotation_df = pd.read_csv(ANNOTATION_FILE) - annotation_df.columns = annotation_df.columns.str.strip() - safe_print(f"Loaded {len(annotation_df)} rows from Excel file") - except Exception as e: - safe_print(f"ERROR: Failed to load Excel file: {e}") - sys.exit(1) - - # --- Discover videos --- - safe_print("Discovering videos...") - all_videos = get_all_videos(VIDEO_ROOT, annotation_df) - if not all_videos: - safe_print("ERROR: No videos found.") - sys.exit(1) - safe_print(f"Found {len(all_videos)} video files.") - - # --- Create BIDS structure (only once) --- - if my_task_id == 0: - try: - safe_print("Creating BIDS structure files...") - create_bids_structure() - create_dataset_description() - create_derivatives_dataset_description() - create_readme() - except Exception as e: - safe_print(f"CRITICAL ERROR: Failed to create BIDS structure files: {e}") - sys.exit(1) - - # --- Process this task’s subset --- - all_processed, all_failed = process_videos( - my_task_id, - num_tasks, - annotation_df, - all_videos, - FINAL_BIDS_ROOT, - FINAL_DERIVATIVES_DIR, - OUTPUT_DIR, - ) - - # --- Final summary --- - total_time = time.time() - start_time - print_summary(all_processed, all_failed) - safe_print( - f"Total processing time: {total_time / 3600:.1f}" - f" hours ({total_time / 60:.1f} minutes)" - ) - - if all_processed: - avg_time = total_time / len(all_processed) - safe_print(f"Average time per video: {avg_time:.1f} seconds") - - safe_print("Processing complete ✅") - - -if __name__ == "__main__": - main() From e16f84b254aca4d2a0c4d934ba0df5e3eaf32976 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Mon, 3 Nov 2025 16:07:59 -0500 Subject: [PATCH 24/36] Fixed little warnings from PR --- src/sailsprep/BIDS_convertor.py | 1529 ++++++++++++++++++++++++++++++ src/tests/test_BIDS_convertor.py | 18 +- 2 files changed, 1539 insertions(+), 8 deletions(-) create mode 100644 src/sailsprep/BIDS_convertor.py diff --git a/src/sailsprep/BIDS_convertor.py b/src/sailsprep/BIDS_convertor.py new file mode 100644 index 0000000..1b63d53 --- /dev/null +++ b/src/sailsprep/BIDS_convertor.py @@ -0,0 +1,1529 @@ +"""BIDS Video Processing Pipeline. + +This module processes home videos from ASD screening studies and organizes them +according to the Brain Imaging Data Structure (BIDS) specification version 1.9.0. + +The pipeline includes video stabilization, denoising, standardization, and audio +extraction for behavioral analysis research. + +Example: + Basic usage: + $ python bids_video_processor.py + +Todo: + * check with actual data +""" + +import json +import os +import re +import shutil +import subprocess +import sys +import time +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple, Union + +import cv2 +import pandas as pd +import yaml + + +def load_configuration(config_path: str = "config.yaml") -> Dict[str, Any]: + """Load configuration from YAML file. + + Args: + config_path (str): Path to the configuration YAML file. + + Returns: + dict: Configuration dictionary containing video processing parameters. + + Raises: + FileNotFoundError: If the configuration file is not found. + yaml.YAMLError: If the YAML file is malformed. + """ + with open(config_path, "r") as f: + config = yaml.safe_load(f) + return config + + +# Load configuration +config_path = ( + Path(__file__).resolve().parents[2] / "configs" / "config_bids_convertor.yaml" +) +config = load_configuration(str(config_path)) +# Unpack configuration +ANNOTATION_FILE = config["annotation_file"] +VIDEO_ROOT = config["video_root"] +OUTPUT_DIR = config["output_dir"] +TARGET_RESOLUTION = config["target_resolution"] +TARGET_FRAMERATE = config["target_framerate"] +ASD_STATUS_FILE = config["asd_status"] +# BIDS directory structure +FINAL_BIDS_ROOT = os.path.join( + OUTPUT_DIR, config.get("final_bids_root", "final_bids-dataset") +) +FINAL_DERIVATIVES_DIR = os.path.join( + FINAL_BIDS_ROOT, config.get("derivatives_subdir", "derivatives/preprocessed") +) + + +def create_bids_structure() -> None: + """Create the BIDS directory structure. + + Creates the main BIDS dataset directory and derivatives subdirectory + following BIDS specification requirements. + + Note: + This function creates directories with exist_ok=True to prevent + errors if directories already exist. + """ + os.makedirs(FINAL_BIDS_ROOT, exist_ok=True) + os.makedirs(FINAL_DERIVATIVES_DIR, exist_ok=True) + + +def save_json(data: Union[List[Any], Dict[str, Any]], path: str) -> None: + """Save data to JSON file. + + Utility function to save Python data structures to JSON files with + proper formatting and error handling. + + Args: + data (list or dict): Data structure to save as JSON. + path (str): Output file path for JSON file. + + Raises: + IOError: If unable to write to the specified path. + TypeError: If data contains non-serializable objects. + + Note: + Uses 4-space indentation for readable JSON output. + """ + with open(path, "w") as f: + json.dump(data, f, indent=4) + + +def safe_print(message: str) -> None: + """Print with timestamps.""" + timestamp = datetime.now().strftime("%H:%M:%S") + print(f"{timestamp} [MAIN] {message}") + + +# Helper functions +def parse_duration(duration_str: str) -> float: + """Parse duration string to seconds.""" + try: + if pd.isna(duration_str) or duration_str == "": + return 0.0 + duration_str = str(duration_str) + if ":" in duration_str: + parts = duration_str.split(":") + if len(parts) == 3: + hours = int(parts[0]) + minutes = int(parts[1]) + seconds = float(parts[2]) + return hours * 3600 + minutes * 60 + seconds + elif len(parts) == 2: + minutes = int(parts[0]) + seconds = float(parts[1]) + return minutes * 60 + seconds + return float(duration_str) + except (ValueError, TypeError): + return 0.0 + + +def make_bids_task_label(task_name: str) -> str: + """Convert TaskName to BIDS-compatible task label for filenames.""" + s = str(task_name).strip() + s = re.sub(r"[^0-9a-zA-Z+]", "", s) # Keep only alphanumeric and + + return s + + +def get_video_properties(video_path: str) -> dict: + """Extract video properties using OpenCV.""" + try: + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + return {"FrameRate": None, "Resolution": None} + + fps = cap.get(cv2.CAP_PROP_FPS) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap.release() + + return { + "SamplingFrequency": fps, + "Resolution": f"{width}x{height}", + } + + except Exception as e: + print(f"Error reading video {video_path}: {e}") + return {"SamplingFrequency": None, "Resolution": None} + + +def determine_session_from_folder(folder_name: str) -> Optional[str]: + """Determine the session ID from a folder name based on known age-related patterns. + + Args: + folder_name (str): The name of the folder to check. + + Returns: + Optional[str]: "01" for 12–16 month sessions, "02" for 34–38 month sessions, + or None if no match. + """ + folder_lower = folder_name.lower() + + # Session 01 patterns + if any( + pattern in folder_lower + for pattern in [ + "12-16 month", + "12-14 month", + "12_16", + "12_14", + "12-16month", + "12-14month", + "12-16_month_videos", + ] + ): + return "01" + + # Session 02 patterns (typos and variants included) + if any( + pattern in folder_lower + for pattern in [ + "34-38 month", + "34-28 month", + "34-48 month", + "34_38", + "34_28", + "34_48", + "34-38month", + "34-28month", + "34-48month", + "34-38_month_videos", + ] + ): + return "02" + + return None + + +def find_age_folder_session(current_path: str, participant_path: str) -> Optional[str]: + """Recursively seek the timepoint folder. + + Args: + current_path (str): Current directory path to inspect. + participant_path (str): Root path of the participant. + + Returns: + Optional[str]: Session ID ("01" or "02") if detected, else None. + """ + if ( + not current_path.startswith(participant_path) + or current_path == participant_path + ): + return None + + current_folder = os.path.basename(current_path) + session_id = determine_session_from_folder(current_folder) + if session_id: + return session_id + + parent_path = os.path.dirname(current_path) + return find_age_folder_session(parent_path, participant_path) + + +def extract_participant_id_from_folder(folder_name: str) -> str: + """Extract the participant ID from folder names. + + Args: + folder_name (str): Folder name containing participant info. + + Returns: + str: Extracted participant ID. + """ + if "AMES_" in folder_name: + parts = folder_name.split("AMES_") + if len(parts) > 1: + return parts[1].strip() + + if "_" in folder_name: + return folder_name.split("_")[-1] + + return folder_name + + +def determine_session_from_excel( + current_path: str, annotation_df: pd.DataFrame, participant_id: str +) -> Optional[str]: + """Determine the session ID for a video based on the annotation file. + + Args: + current_path (str): Path to the video file. + annotation_df (pd.DataFrame): Excel data containing 'ID', + 'FileName', 'timepoint', and 'Age' columns. + participant_id (str): Participant identifier. + + Returns: + Optional[str]: Session ID ("01" or "02"), or None if not found. + """ + filename = os.path.splitext(os.path.basename(current_path))[0] + if participant_id.endswith(" 2"): + participant_id = participant_id[:-2].strip() + # Filter for the participant + participant_excel = annotation_df[ + annotation_df["ID"].astype(str) == str(participant_id) + ] + if participant_excel.empty: + raise ValueError( + f"Participant ID '{participant_id}' not found in Excel metadata" + f" for file '{filename}'." + ) + + # Match the video filename (without extension) + mask = participant_excel["FileName"].str.split(".").str[0] == filename + video_entry = participant_excel[mask] + + if video_entry.empty: + raise ValueError( + f"No matching Excel entry found for video '{filename}'" + f"(participant {participant_id})." + ) + + timepoint = video_entry["timepoint"].iloc[0] + age = video_entry["Age"].iloc[0] + + # Normalize timepoint to string for pattern matching + timepoint_str = str(timepoint) + + if "14" in timepoint_str: + return "01" + elif "36" in timepoint_str: + return "02" + elif pd.notna(age): + return "01" if age < 2 else "02" + else: + raise ValueError( + f"Unable to determine session ID: timepoint={timepoint}, age={age}" + ) + + +def find_session_id( + directory: str, + current_path: str, + participant_path: str, + annotation_df: pd.DataFrame, + participant_id: str, + excel: bool = True, +) -> Optional[str]: + """Determine session ID by checking folder names first, then Excel data if needed. + + Args: + directory (str): Current directory being scanned. + current_path (str): Full path to the file. + participant_path (str): Root participant directory. + annotation_df (pd.DataFrame): Excel metadata. + participant_id (str): Participant identifier. + excel (bool) : Whether to use Excel data for session determination. + + Returns: + Optional[str]: Session ID ("01" or "02"), or None. + """ + if ( + not current_path.startswith(participant_path) + or current_path == participant_path + ): + return None + + try: + folder_name = os.path.basename(directory) + session_id = determine_session_from_folder(folder_name) + + if not session_id and excel: + session_id = determine_session_from_excel( + current_path, annotation_df, participant_id + ) + + if session_id: + return session_id + + # Recurse upward if not found + parent_path = os.path.dirname(directory) + if parent_path != directory: + return find_session_id( + parent_path, + current_path, + participant_path, + annotation_df, + participant_id, + False, + ) + + except PermissionError: + print(f"Permission denied: {current_path}") + except Exception as e: + print(f"Error accessing {current_path}: {e}") + + return None + + +def find_videos_recursive( + directory: str, + participant_path: str, + annotation_df: pd.DataFrame, + participant_id: str, +) -> List[Tuple[str, Optional[str]]]: + """Recursively find video files and determine their session IDs. + + Args: + directory (str): Directory to search in. + participant_path (str): Root path of the participant. + annotation_df (pd.DataFrame): Excel data for metadata lookup. + participant_id (str): Participant identifier. + + Returns: + List[Tuple[str, Optional[str]]]: List of (video_path, session_id) pairs. + """ + videos = [] + try: + for item in os.listdir(directory): + if item.startswith("."): + continue # Skip hidden files + + item_path = os.path.join(directory, item) + + if os.path.isfile(item_path) and item.lower().endswith( + (".mp4", ".mov", ".avi", ".mkv", ".m4v", ".3gp", ".mts") + ): + session_id = find_session_id( + directory, + item_path, + participant_path, + annotation_df, + participant_id, + ) + videos.append((item_path, session_id)) + + elif os.path.isdir(item_path): + videos.extend( + find_videos_recursive( + item_path, participant_path, annotation_df, participant_id + ) + ) + + except PermissionError: + print(f"Permission denied: {directory}") + except Exception as e: + print(f"Error accessing {directory}: {e}") + + return videos + + +def get_all_videos(video_root: str, annotation_df: pd.DataFrame) -> List[dict]: + """Find and label all participant videos with their corresponding session IDs. + + Args: + video_root (str): Root directory containing all participant folders. + annotation_df (pd.DataFrame): Excel data with metadata. + + Returns: + List[dict]: List of video metadata dictionaries. + """ + all_videos = [] + + try: + for participant_folder in os.listdir(video_root): + participant_path = os.path.join(video_root, participant_folder) + if not os.path.isdir(participant_path): + continue + + participant_id = extract_participant_id_from_folder(participant_folder) + if not participant_id: + continue + + videos = find_videos_recursive( + participant_path, participant_path, annotation_df, participant_id + ) + + for video_path, session_id in videos: + if session_id in {"01", "02"}: + all_videos.append( + { + "participant_id": participant_id, + "filename": os.path.basename(video_path), + "full_path": video_path, + "session_id": session_id, + "age_folder": os.path.basename(os.path.dirname(video_path)), + } + ) + + except Exception as e: + print(f"Error scanning video folders: {e}") + + return all_videos + + +def create_dummy_excel_data( + video_path: str, participant_id: str, session_id: str, task_label: str = "unknown" +) -> dict[str, str]: + """Create dummy behavioral data for videos not in Excel file.""" + video_filename = os.path.basename(video_path) + + dummy_row_data = { + "ID": participant_id, + "FileName": video_filename, + "Context": task_label, + "Location": "n/a", + "Activity": "n/a", + "Child_of_interest_clear": "n/a", + "#_adults": "n/a", + "#_children": "n/a", + "#_people_background": "n/a", + "Interaction_with_child": "n/a", + "#_people_interacting": "n/a", + "Child_constrained": "n/a", + "Constraint_type": "n/a", + "Supports": "n/a", + "Support_type": "n/a", + "Example_support_type": "n/a", + "Gestures": "n/a", + "Gesture_type": "n/a", + "Vocalizations": "n/a", + "RMM": "n/a", + "RMM_type": "n/a", + "Response_to_name": "n/a", + "Locomotion": "n/a", + "Locomotion_type": "n/a", + "Grasping": "n/a", + "Grasp_type": "n/a", + "Body_Parts_Visible": "n/a", + "Angle_of_Body": "n/a", + "time_point": "n/a", + "DOB": "n/a", + "Vid_date": "n/a", + "Video_Quality_Child_Face_Visibility": "n/a", + "Video_Quality_Child_Body_Visibility": "n/a", + "Video_Quality_Child_Hand_Visibility": "n/a", + "Video_Quality_Lighting": "n/a", + "Video_Quality_Resolution": "n/a", + "Video_Quality_Motion": "n/a", + "Coder": "n/a", + "SourceFile": "n/a", + "Vid_duration": "00:00:00", + "Notes": "Video not found in Excel file - behavioral data unavailable", + } + + return dummy_row_data + + +def get_task_from_excel_row(row: pd.Series) -> str: + """Extract and create task label from Excel row data.""" + context = str(row.get("Context", "")).strip() + + if context and context.lower() not in ["nan", "n/a", ""]: + return make_bids_task_label(context) + else: + return "unknown" + + +def get_next_run_number( + participant_id: str, session_id: str, task_label: str, final_bids_root: str +) -> int: + """Find the next available run number for this participant/session/task.""" + beh_dir = os.path.join( + final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh" + ) + + if not os.path.exists(beh_dir): + return 1 + + # Look for existing files with this task + pattern = f"sub-{participant_id}_ses-{session_id}_task-{task_label}_" + existing_files = [f for f in os.listdir(beh_dir) if f.startswith(pattern)] + + if not existing_files: + return 1 + + # Extract run numbers from existing files + run_numbers = [] + for filename in existing_files: + if "_run-" in filename: + run_part = filename.split("_run-")[1].split("_")[0] + try: + run_numbers.append(int(run_part)) + except ValueError: + continue + else: + run_numbers.append(1) # Files without run numbers are considered run-1 + + return max(run_numbers) + 1 if run_numbers else 1 + + +def create_bids_filename( + participant_id: str, + session_id: str, + task_label: str, + suffix: str, + extension: str, + run_id: int = 1, +) -> str: + """Create BIDS-compliant filename w run identifier for multiple videos per task.""" + return ( + f"sub-{participant_id}_" + f"ses-{session_id}_" + f"task-{task_label}_" + f"run-{run_id:02d}_" + f"{suffix}.{extension}" + ) + + +# Video processing functions +def extract_exif(video_path: str) -> Dict[str, Any]: + """Extract video metadata using ffprobe.""" + try: + cmd = [ + "ffprobe", + "-v", + "quiet", + "-print_format", + "json", + "-show_format", + "-show_streams", + video_path, + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + return {"ffprobe_error": result.stderr.strip()} + + metadata = json.loads(result.stdout) + extracted = {} + + format_info = metadata.get("format", {}) + extracted["filename"] = format_info.get("filename") + extracted["format"] = format_info.get("format_long_name") + extracted["duration_sec"] = float(format_info.get("duration", 0)) + extracted["bit_rate"] = int(format_info.get("bit_rate", 0)) + extracted["size_bytes"] = int(format_info.get("size", 0)) + + return extracted + except Exception as e: + return {"error": str(e)} + + +def stabilize_video(input_path: str, stabilized_path: str, temp_dir: str) -> None: + """Stabilize video using FFmpeg vidstab filters, with error checks.""" + os.makedirs(temp_dir, exist_ok=True) + transforms_file = os.path.join(temp_dir, "transforms.trf") + + # Step 1: Detect transforms + detect_cmd = [ + "ffmpeg", + "-y", + "-i", + input_path, + "-vf", + f"vidstabdetect=shakiness=5:accuracy=15:result={transforms_file}", + "-f", + "null", + "-", + ] + print(f"[DEBUG] Running: {' '.join(detect_cmd)}") + detect_proc = subprocess.run(detect_cmd, capture_output=True, text=True) + + if detect_proc.returncode != 0: + print(f"[ERROR] vidstabdetect failed for {input_path}:\n{detect_proc.stderr}") + raise RuntimeError(f"FFmpeg vidstabdetect failed for {input_path}") + + if not os.path.exists(transforms_file): + raise FileNotFoundError(f"Transform file not created: {transforms_file}") + + # Step 2: Apply transforms + transform_cmd = [ + "ffmpeg", + "-y", + "-i", + input_path, + "-vf", + f"vidstabtransform=smoothing=30:input={transforms_file}", + "-c:v", + "libx264", + "-preset", + "slow", + "-crf", + "23", + "-c:a", + "copy", + stabilized_path, + ] + print(f"[DEBUG] Running: {' '.join(transform_cmd)}") + transform_proc = subprocess.run(transform_cmd, capture_output=True, text=True) + + if transform_proc.returncode != 0: + print( + f"[ERROR] vidstabtransform failed for {input_path}:" + f"\n{transform_proc.stderr}" + ) + raise RuntimeError(f"FFmpeg vidstabtransform failed for {input_path}") + + if not os.path.exists(stabilized_path): + raise FileNotFoundError(f"Stabilized video not created: {stabilized_path}") + + # Cleanup + os.remove(transforms_file) + + +def preprocess_video(input_path: str, output_path: str, temp_dir: str) -> None: + """Preprocess video with stabilization, denoising, and standardization.""" + if not os.path.exists(input_path): + raise ValueError(f"Input video not found: {input_path}") + + stabilized_tmp = os.path.join(temp_dir, f"stabilized_temp_{os.getpid()}.mp4") + + try: + stabilize_video(input_path, stabilized_tmp, temp_dir) + + # Verify stabilization succeeded + if not os.path.exists(stabilized_tmp): + raise ValueError( + "Video stabilization failed - no intermediate file created" + ) + width, height = TARGET_RESOLUTION.split("x") + vf_filters = ( + "yadif," + "hqdn3d," + "eq=contrast=1.0:brightness=0.0:saturation=1.0," + "scale=-2:{height}," + "pad=ceil(iw/2)*2:ceil(ih/2)*2," + f"fps={TARGET_FRAMERATE}" + ) + + cmd = [ + "ffmpeg", + "-y", + "-i", + stabilized_tmp, + "-vf", + vf_filters, + "-c:v", + "libx264", + "-crf", + "23", + "-preset", + "fast", + "-c:a", + "aac", + "-b:a", + "128k", + "-movflags", + "+faststart", + output_path, + ] + + # Capture and check stderr + result = subprocess.run( + cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True + ) + if result.returncode != 0: + raise ValueError(f"Video processing failed: {result.stderr}") + + # Verify output file was created and has content + if not os.path.exists(output_path): + raise ValueError(f"Video processing failed - no output file: {output_path}") + if os.path.getsize(output_path) == 0: + raise ValueError( + f"Video processing failed - empty output file: {output_path}" + ) + + finally: + # Clean up temp file + if os.path.exists(stabilized_tmp): + os.remove(stabilized_tmp) + + +def extract_audio(input_path: str, output_audio_path: str) -> None: + """Extract audio from video file.""" + if not os.path.exists(input_path): + raise ValueError(f"Input video not found: {input_path}") + + cmd = [ + "ffmpeg", + "-y", + "-i", + input_path, + "-vn", + "-acodec", + "pcm_s16le", + "-ar", + "16000", + "-ac", + "1", + output_audio_path, + ] + + # Check return code and stderr + result = subprocess.run( + cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True + ) + if result.returncode != 0: + raise ValueError(f"Audio extraction failed: {result.stderr}") + + # Verify output file was created + if not os.path.exists(output_audio_path): + raise ValueError( + f"Audio extraction failed - no output file: {output_audio_path}" + ) + + +def safe_float_conversion( + value: float | int | str | None, default: str = "n/a" +) -> float | str: + """Convert value to float, return default if conversion fails.""" + if value is None or pd.isna(value): + return default + + # Convert to string and check for common non-numeric indicators + str_val = str(value).strip().lower() + if str_val in ["", "n/a", "na", "nan", "none", "null"]: + return default + + try: + return float(value) + except (ValueError, TypeError): + return default + + +# BIDS file creation functions +def create_events_file( + group_df: pd.DataFrame, output_path: str, full_filepath: str +) -> None: + """Create events.tsv file from Excel data with all columns.""" + events_data = [] + + for idx, row in group_df.iterrows(): + event = { + "onset": 0.0, + "duration": parse_duration(row.get("Vid_duration", "00:00:00")), + "coder": str(row.get("Coder", "n/a")), + "filepath_engaging": str(full_filepath), + "source_file": str(row.get("SourceFile", "n/a")), + "context": str(row.get("Context", "n/a")), + "location": str(row.get("Location", "n/a")), + "activity": str(row.get("Activity", "n/a")), + "child_clear": str(row.get("Child_of_interest_clear", "n/a")), + "num_adults": str(row.get("#_adults", "n/a")), + "num_children": str(row.get("#_children", "n/a")), + "num_people_background": str(row.get("#_people_background", "n/a")), + "interaction_with_child": str(row.get("Interaction_with_child", "n/a")), + "num_people_interacting": str(row.get("#_people_interacting", "n/a")), + "child_constrained": str(row.get("Child_constrained", "n/a")), + "constraint_type": str(row.get("Constraint_type", "n/a")), + "supports": str(row.get("Supports", "n/a")), + "support_type": str(row.get("Support_type", "n/a")), + "example_support_type": str(row.get("Example_support_type", "n/a")), + "gestures": str(row.get("Gestures", "n/a")), + "gesture_type": str(row.get("Gesture_type", "n/a")), + "vocalizations": str(row.get("Vocalizations", "n/a")), + "rmm": str(row.get("RMM", "n/a")), + "rmm_type": str(row.get("RMM_type", "n/a")), + "response_to_name": str(row.get("Response_to_name", "n/a")), + "locomotion": str(row.get("Locomotion", "n/a")), + "locomotion_type": str(row.get("Locomotion_type", "n/a")), + "grasping": str(row.get("Grasping", "n/a")), + "grasp_type": str(row.get("Grasp_type", "n/a")), + "body_parts_visible": str(row.get("Body_Parts_Visible", "n/a")), + "angle_of_body": str(row.get("Angle_of_Body", "n/a")), + "timepoint": str(row.get("time_point", "n/a")), + "dob": str(row.get("DOB", "n/a")), + "vid_date": str(row.get("Vid_date", "n/a")), + "video_quality_face": safe_float_conversion( + row.get("Video_Quality_Child_Face_Visibility") + ), + "video_quality_body": safe_float_conversion( + row.get("Video_Quality_Child_Body_Visibility") + ), + "video_quality_hand": safe_float_conversion( + row.get("Video_Quality_Child_Hand_Visibility") + ), + "video_quality_lighting": safe_float_conversion( + row.get("Video_Quality_Lighting") + ), + "video_quality_resolution": safe_float_conversion( + row.get("Video_Quality_Resolution") + ), + "video_quality_motion": safe_float_conversion( + row.get("Video_Quality_Motion") + ), + "notes": str(row.get("Notes", "n/a")), + } + events_data.append(event) + + events_df = pd.DataFrame(events_data) + events_df.to_csv(output_path, sep="\t", index=False, na_rep="n/a") + + +def create_video_metadata_json( + metadata: Dict[str, Any], + processing_info: Dict[str, Any], + task_info: Dict[str, Any], + output_path: str, +) -> None: + """Create JSON metadata file for processed video with dynamic task info.""" + video_json = { + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get( + "task_description", "Video recorded during behavioral session" + ), + "Instructions": task_info.get( + "instructions", "Natural behavior in home environment" + ), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), + "SamplingFrequency": TARGET_FRAMERATE, + "Resolution": TARGET_RESOLUTION, + "ProcessingPipeline": { + "Stabilization": processing_info.get("has_stabilization", False), + "Denoising": processing_info.get("has_denoising", False), + "Equalization": processing_info.get("has_equalization", False), + "StandardizedFPS": TARGET_FRAMERATE, + "StandardizedResolution": TARGET_RESOLUTION, + }, + "OriginalMetadata": metadata, + } + save_json(video_json, output_path) + + +def create_audio_metadata_json( + duration_sec: float, task_info: Dict[str, Any], output_path: str +) -> None: + """Create JSON metadata file for extracted audio with dynamic task info.""" + audio_json = { + "SamplingFrequency": 16000, + "Channels": 1, + "SampleEncoding": "16bit", + "Duration": duration_sec, + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get( + "task_description", "Audio extracted from behavioral session" + ), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), + } + save_json(audio_json, output_path) + + +def create_raw_video_json( + row: pd.Series, task_info: Dict[str, Any], video_path: str, output_path: str +) -> None: + """Create JSON metadata for raw video.""" + video_props = get_video_properties(video_path) + + video_json = { + "TaskName": task_info.get("task_name", "unknown"), + "TaskDescription": task_info.get( + "task_description", "Raw video from behavioral session" + ), + "SamplingFrequency": video_props.get("SamplingFrequency", "n/a"), + "Resolution": video_props.get("Resolution", "n/a"), + "OriginalFilename": str(row.get("FileName", "")), + "Duration": parse_duration(row.get("Vid_duration", "00:00:00")), + "RecordingDate": str(row.get("Vid_date", "n/a")), + "Context": task_info.get("context", "n/a"), + "Activity": task_info.get("activity", "n/a"), + "TimePoint": str(row.get("timepoint", "n/a")), + "SourceFile": str(row.get("SourceFile", "n/a")), + } + save_json(video_json, output_path) + + +def process_single_video( + video_info: Dict, + annotation_df: pd.DataFrame, + final_bids_root: str, + final_derivatives_dir: str, + temp_dir: str, +) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]: + """Process a single video with all BIDS structures.""" + participant_id = video_info["participant_id"] + filename = video_info["filename"] + session_id = video_info["session_id"] + input_video_path = video_info["full_path"] + safe_print(f"Processing: {participant_id}/{filename}") + filename_without_extension = os.path.splitext(filename)[0] + # Check if video exists in Excel or create dummy data + + try: + # Check if video exists in Excel or create dummy data + participant_excel = annotation_df[ + annotation_df["ID"].astype(str) == str(participant_id) + ] + mask = ( + participant_excel["FileName"].str.split(".").str[0] + == filename_without_extension + ) + video_excel = participant_excel[mask] + if video_excel.empty: + # Create dummy data for missing Excel entries + dummy_data = create_dummy_excel_data( + input_video_path, participant_id, session_id + ) + video_excel = pd.DataFrame([dummy_data]) + has_excel_data = False + safe_print("No Excel data found - using dummy data") + else: + has_excel_data = True + + excel_row = video_excel.iloc[0] + task_label = get_task_from_excel_row(excel_row) + activity = excel_row.get("Activity", "unknown activity") + # Create task information + task_info = { + "task_name": task_label, + "task_description": f"Behavioral session: {activity}", + "instructions": "Natural behavior observation", + "context": str(excel_row.get("Context", "n/a")), + "activity": str(excel_row.get("Activity", "n/a")), + } + + # Create BIDS directory structure + raw_subj_dir = os.path.join( + final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh" + ) + deriv_subj_dir = os.path.join( + final_derivatives_dir, f"sub-{participant_id}", f"ses-{session_id}", "beh" + ) + + os.makedirs(raw_subj_dir, exist_ok=True) + os.makedirs(deriv_subj_dir, exist_ok=True) + + # Create BIDS filenames with run number + ext = os.path.splitext(filename)[1] + run_number = get_next_run_number( + participant_id, session_id, task_label, final_bids_root + ) + + raw_video_name = create_bids_filename( + participant_id, session_id, task_label, "beh", "mp4", run_number + ) + processed_video_name = create_bids_filename( + participant_id, + session_id, + task_label, + "desc-processed_beh", + "mp4", + run_number, + ) + audio_name = create_bids_filename( + participant_id, session_id, task_label, "audio", "wav", run_number + ) + events_name = create_bids_filename( + participant_id, session_id, task_label, "events", "tsv", run_number + ) + + # File paths + raw_video_path = os.path.join(raw_subj_dir, raw_video_name) + processed_video_path = os.path.join(deriv_subj_dir, processed_video_name) + audio_path = os.path.join(deriv_subj_dir, audio_name) + events_path = os.path.join(raw_subj_dir, events_name) + + if not os.path.exists(raw_video_path): + if ext.lower() != ".mp4": + # Convert to mp4 without processing + cmd = [ + "ffmpeg", + "-y", + "-i", + input_video_path, + "-c", + "copy", + raw_video_path, + ] + result = subprocess.run( + cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True + ) + # Check return code and verify output file + if result.returncode != 0: + raise ValueError(f"FFmpeg conversion failed: {result.stderr}") + if not os.path.exists(raw_video_path): + raise ValueError( + f"FFmpeg did not create output file: {raw_video_path}" + ) + safe_print(" Converted to raw BIDS format") + else: + shutil.copy2(input_video_path, raw_video_path) + # FIX: Verify copy succeeded + if not os.path.exists(raw_video_path): + raise ValueError(f"Failed to copy to raw BIDS: {raw_video_path}") + safe_print(" Copied to raw BIDS") + + # Extract metadata from raw video + exif_data = extract_exif(raw_video_path) + if "error" in exif_data or "ffprobe_error" in exif_data: + raise ValueError("Unreadable or unsupported video format") + + # Process video for derivatives + if not os.path.exists(processed_video_path): + safe_print(" Starting video processing...") + preprocess_video(raw_video_path, processed_video_path, temp_dir) + # Verify processing succeeded + if not os.path.exists(processed_video_path): + raise ValueError( + f"Video processing failed - no output file: {processed_video_path}" + ) + if os.path.getsize(processed_video_path) == 0: + raise ValueError( + "Video processing failed- empty output file:" + f" {processed_video_path}" + ) + safe_print(" Video processing complete") + + if not os.path.exists(audio_path): + safe_print(" Extracting audio...") + extract_audio(processed_video_path, audio_path) + # Verify audio extraction succeeded + if not os.path.exists(audio_path): + raise ValueError( + f"Audio extraction failed - no output file: {audio_path}" + ) + if os.path.getsize(audio_path) == 0: + raise ValueError( + f"Audio extraction failed - empty output file: {audio_path}" + ) + safe_print(" Audio extraction complete") + + # Create events files + create_events_file(video_excel, events_path, input_video_path) + if not os.path.exists(events_path): + raise ValueError(f"Failed to create events file: {events_path}") + + # Create metadata JSON files + processing_info = { + "has_stabilization": True, + "has_denoising": True, + "has_equalization": True, + } + + # Raw video JSON + raw_video_json_path = raw_video_path.replace(".mp4", ".json") + create_raw_video_json( + excel_row, + task_info, + raw_video_path, + raw_video_json_path, + ) + if not os.path.exists(raw_video_json_path): + raise ValueError(f"Failed to create raw video JSON: {raw_video_json_path}") + + # Processed video JSON + processed_video_json_path = processed_video_path.replace(".mp4", ".json") + create_video_metadata_json( + exif_data, + processing_info, + task_info, + processed_video_json_path, + ) + if not os.path.exists(processed_video_json_path): + raise ValueError( + f"Failed to create processed video JSON: {processed_video_json_path}" + ) + + # Audio JSON + audio_json_path = audio_path.replace(".wav", ".json") + create_audio_metadata_json( + exif_data.get("duration_sec", 0), task_info, audio_json_path + ) + if not os.path.exists(audio_json_path): + raise ValueError(f"Failed to create audio JSON: {audio_json_path}") + + # Store processing information + entry = { + "participant_id": participant_id, + "session_id": session_id, + "task_label": task_label, + "original_video": input_video_path, + "raw_video_bids": raw_video_path, + "processed_video_bids": processed_video_path, + "audio_file_bids": audio_path, + "events_file_bids": events_path, + "filename": filename, + "age_folder": video_info["age_folder"], + "duration_sec": exif_data.get("duration_sec", 0), + "has_excel_data": has_excel_data, + "excel_metadata": excel_row.to_dict(), + "task_info": task_info, + "processing_info": processing_info, + } + + safe_print(f" Successfully processed: {participant_id}/{filename}") + return entry, None + + except Exception as e: + safe_print(f" ERROR processing {input_video_path}: {str(e)}") + return None, {"video": input_video_path, "error": str(e)} + + +def create_dataset_description() -> None: + """Create dataset_description.json for main BIDS dataset.""" + dataset_desc = { + "Name": "SAILS Phase III Home Videos", + "BIDSVersion": "1.9.0", + "DatasetType": "raw", + } + try: + filepath = os.path.join(FINAL_BIDS_ROOT, "dataset_description.json") + save_json(dataset_desc, filepath) + + except Exception as e: + raise ValueError( + f"Failed to create dataset_description.json at {filepath}: {e}" + ) + + +def create_derivatives_dataset_description() -> None: + """Create dataset_description.json for derivatives.""" + os.makedirs(FINAL_DERIVATIVES_DIR, exist_ok=True) + + derivatives_desc = { + "Name": "SAILS Phase III Home Videos - Preprocessed", + "BIDSVersion": "1.9.0", + "DatasetType": "derivative", + "GeneratedBy": [ + { + "Name": "Video Preprocessing Pipeline", + "Version": "1.0.0", + "Description": ( + "FFmpeg-based video stabilization, denoising, " + "and standardization pipeline with audio extraction" + ), + "CodeURL": "local", + } + ], + "SourceDatasets": [{"URL": "", "Version": "1.0.0"}], + "HowToAcknowledge": "Please cite the original study", + } + + filepath = os.path.join(FINAL_DERIVATIVES_DIR, "dataset_description.json") + save_json(derivatives_desc, filepath) + if not os.path.exists(filepath): + raise ValueError( + f"Failed to create derivatives dataset_description.json at {filepath}" + ) + + +def create_readme() -> None: + """Create README file for the BIDS dataset.""" + readme_content = """# SAILS Phase III Home Videos BIDS Dataset + +## Overview +This dataset contains home videos from the SAILS Phase III study, +organized according to the Brain Imaging Data Structure (BIDS) specification. + +## Data Collection +Videos were collected from home environments during various activities. +Two main age groups were included: +- Session 01: 12-16 month old children +- Session 02: 34-38 month old children + +## Dataset Structure +### Raw Data +- sub-*/ses-*/beh/: Raw behavioral videos (converted to mp4) and event +annotations (contains also the original filepath of the video processed) + +### Derivatives +- derivatives/preprocessed/sub-*/ses-*/beh/: Processed videos and extracted audio + - Videos: Stabilized, denoised, standardized to 720p/30fps + - Audio: Extracted to 16kHz mono WAV format + +## Data Processing +All videos underwent standardized preprocessing including: +- Video stabilization using vidstab +- Denoising and quality enhancement +- Standardization to 720p resolution and 30fps +- Audio extraction for speech analysis +- Filename modication according to subject ID and task label +- Extraction of ASD status for every subject stored in the participants.tsv file. + +## Behavioral Coding +Events files include manual annotations from csv file and Engaging +location of the raw video. + +## Task Labels +Task labels are derived from the Context column in the csv. +It allows to capture what kind of interaction was happening in the video. +Videos without behavioral coding data use "unknown" task label. +""" + + filepath = os.path.join(OUTPUT_DIR, "README") + try: + with open(filepath, "w") as f: + f.write(readme_content) + except Exception as e: + raise ValueError(f"Failed to create README at {filepath}: {e}") + + +def create_participants_file() -> None: + """Create participants.tsv and participants.json files.""" + asd_status = pd.read_excel(ASD_STATUS_FILE) + ids_processed_participants = [] + for name in os.listdir(FINAL_BIDS_ROOT): + full_path = os.path.join(FINAL_BIDS_ROOT, name) + if os.path.isdir(full_path) and name.startswith("sub-"): + ids_processed_participants.append(name.split("sub-")[1]) + participants_data = [] + for participant_id in sorted(ids_processed_participants): + asd_info = asd_status[asd_status["ID"].astype(str) == str(participant_id)] + participants_data.append( + { + "participant_id": f"sub-{participant_id}", + "group": asd_info["Group"].values[0] if not asd_info.empty else "n/a", + } + ) + + participants_df = pd.DataFrame(participants_data) + participants_df.to_csv( + os.path.join(FINAL_BIDS_ROOT, "participants.tsv"), + sep="\t", + index=False, + na_rep="n/a", + ) + + participants_json = { + "participant_id": {"Description": "Unique BIDS participant identifier"}, + "Group": {"Description": "ASD status"}, + } + + save_json(participants_json, os.path.join(FINAL_BIDS_ROOT, "participants.json")) + + +def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: + """Print processing summary statistics.""" + print("PROCESSING SUMMARY") + + print(f"Successfully processed: {len(all_processed)} videos") + print(f"Failed to process: {len(all_failed)} videos") + print(f"Total videos attempted: {len(all_processed) + len(all_failed)}") + + if all_processed: + # Excel data availability + with_excel = sum( + 1 for entry in all_processed if entry.get("has_excel_data", False) + ) + without_excel = len(all_processed) - with_excel + print("\nData sources:") + print(f" With Excel behavioral data: {with_excel} videos") + print(f" With dummy behavioral data: {without_excel} videos") + + # Task distribution + task_counts: dict[str, int] = {} + participant_counts: dict[str, int] = {} + session_counts: dict[str, int] = {} + + for entry in all_processed: + task = entry["task_label"] + participant = entry["participant_id"] + session = entry["session_id"] + task_counts[task] = task_counts.get(task, 0) + 1 + participant_counts[participant] = participant_counts.get(participant, 0) + 1 + session_counts[session] = session_counts.get(session, 0) + 1 + + print("\nTask distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count} videos") + + print("\nSession distribution:") + for session, count in sorted(session_counts.items()): + print(f" Session {session}: {count} videos") + + print(f"\nUnique participants processed: {len(participant_counts)}") + + # Duration statistics + durations = [entry.get("duration_sec", 0) for entry in all_processed] + total_duration = sum(durations) + avg_duration = total_duration / len(durations) if durations else 0 + + print("\nDuration statistics:") + print(f" Total video duration: {total_duration/3600:.1f} hours") + print(f" Average video duration: {avg_duration/60:.1f} minutes") + + if all_failed: + print("\nFailed videos breakdown:") + error_types: dict[str, int] = {} + for entry in all_failed: + error = entry.get("error", "Unknown error") + error_types[error] = error_types.get(error, 0) + 1 + + for error, count in sorted(error_types.items()): + print(f" {error}: {count} videos") + + +def merge_subjects() -> None: + """Merge duplicated subjects folders.""" + paths_to_check = [ + Path(FINAL_BIDS_ROOT), + Path(FINAL_BIDS_ROOT) / "derivatives" / "preprocessed", + ] + + for folder in paths_to_check: + if not folder.exists(): + continue + + subs = [d for d in folder.iterdir() if d.is_dir() and d.name.startswith("sub-")] + sub_names = {d.name for d in subs} + + for sub in subs: + if sub.name.endswith(" 2"): + original_name = sub.name[:-2] + original_path = folder / original_name + if original_name in sub_names and original_path.exists(): + print(f"Merging {sub} → {original_path}") + + for item in sub.iterdir(): + dest = original_path / item.name + if item.is_dir(): + if not dest.exists(): + shutil.copytree(item, dest) + else: + # merge recursively if same session already exists + for subitem in item.iterdir(): + dest_sub = dest / subitem.name + if not dest_sub.exists(): + if subitem.is_dir(): + shutil.copytree(subitem, dest_sub) + else: + shutil.copy2(subitem, dest_sub) + else: + if not dest.exists(): + shutil.copy2(item, dest) + shutil.rmtree(sub) + else: + print(f"No base subject found for {sub}, skipping.") + + +def process_videos( + task_id: int, + num_tasks: int, + annotation_df: pd.DataFrame, + all_videos: list, + final_bids_root: str, + final_derivatives_dir: str, + output_dir: str, +) -> tuple[list, list]: + """Process the subset of videos assigned to this task. + + Returns: + (all_processed, all_failed) + """ + safe_print(f"Task {task_id}: Processing videos...") + video_chunks = all_videos[task_id::num_tasks] + + if not video_chunks: + safe_print(f"No videos assigned to task {task_id}") + return [], [] + + temp_dir = os.path.join(output_dir, str(task_id), "temp") + os.makedirs(temp_dir, exist_ok=True) + + all_processed, all_failed = [], [] + + for i, video_info in enumerate(video_chunks, 1): + safe_print(f"[Task {task_id}] Video {i}/{len(video_chunks)}") + processed_entry, failed_entry = process_single_video( + video_info, + annotation_df, + final_bids_root, + final_derivatives_dir, + temp_dir, + ) + if processed_entry: + all_processed.append(processed_entry) + if failed_entry: + all_failed.append(failed_entry) + + # Save per-task logs + task_dir = os.path.join(output_dir, str(task_id)) + os.makedirs(task_dir, exist_ok=True) + save_json(all_processed, os.path.join(task_dir, "processing_log.json")) + save_json(all_failed, os.path.join(task_dir, "not_processed.json")) + + # Cleanup temp dir + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir) + + return all_processed, all_failed + + +def main() -> None: + """Main entry point for multi-task BIDS video processing.""" + if len(sys.argv) != 3: + print("Usage: python updated_bids.py ") + sys.exit(1) + + my_task_id = int(sys.argv[1]) + num_tasks = int(sys.argv[2]) + + start_time = time.time() + + # --- Validate paths --- + for path, label in [(VIDEO_ROOT, "Video root"), (ANNOTATION_FILE, "Excel file")]: + if not os.path.exists(path): + print(f"ERROR: {label} not found at {path}") + sys.exit(1) + + # --- Load metadata --- + try: + annotation_df = pd.read_csv(ANNOTATION_FILE) + annotation_df.columns = annotation_df.columns.str.strip() + safe_print(f"Loaded {len(annotation_df)} rows from Excel file") + except Exception as e: + safe_print(f"ERROR: Failed to load Excel file: {e}") + sys.exit(1) + + # --- Discover videos --- + safe_print("Discovering videos...") + all_videos = get_all_videos(VIDEO_ROOT, annotation_df) + if not all_videos: + safe_print("ERROR: No videos found.") + sys.exit(1) + safe_print(f"Found {len(all_videos)} video files.") + + # --- Create BIDS structure (only once) --- + if my_task_id == 0: + try: + safe_print("Creating BIDS structure files...") + create_bids_structure() + create_dataset_description() + create_derivatives_dataset_description() + create_readme() + except Exception as e: + safe_print(f"CRITICAL ERROR: Failed to create BIDS structure files: {e}") + sys.exit(1) + + # --- Process this task’s subset --- + all_processed, all_failed = process_videos( + my_task_id, + num_tasks, + annotation_df, + all_videos, + FINAL_BIDS_ROOT, + FINAL_DERIVATIVES_DIR, + OUTPUT_DIR, + ) + + # --- Final summary --- + total_time = time.time() - start_time + print_summary(all_processed, all_failed) + safe_print( + f"Total processing time: {total_time / 3600:.1f}" + f" hours ({total_time / 60:.1f} minutes)" + ) + + if all_processed: + avg_time = total_time / len(all_processed) + safe_print(f"Average time per video: {avg_time:.1f} seconds") + + safe_print("Processing complete ✅") + + +if __name__ == "__main__": + main() diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index 7ba6fc2..e962a51 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -41,7 +41,7 @@ def setup_mock_config() -> Generator[None, None, None]: def bvp_module(setup_mock_config: Generator[None, None, None]) -> ModuleType: """Import the BIDS converter module.""" sys.path.insert(0, "src") - import BIDS_convertor as bvp + import sailsprep.BIDS_convertor as bvp return bvp @@ -113,7 +113,9 @@ class TestBIDSNaming: def test_create_bids_filename(self, bvp_module: ModuleType) -> None: """Test BIDS filename creation.""" - filename = bvp_module.create_bids_filename("123", "01", "mealtime", "beh", "mp4") + filename = bvp_module.create_bids_filename( + "123", "01", "mealtime", "beh", "mp4" + ) expected = "sub-123_ses-01_task-mealtime_run-01_beh.mp4" assert filename == expected @@ -285,12 +287,12 @@ def test_save_json(self, bvp_module: ModuleType) -> None: class TestMainWorkflow: """Test the main processing workflow.""" - @patch("BIDS_convertor.process_videos") - @patch("BIDS_convertor.create_readme") - @patch("BIDS_convertor.create_derivatives_dataset_description") - @patch("BIDS_convertor.create_dataset_description") - @patch("BIDS_convertor.create_bids_structure") - @patch("BIDS_convertor.save_json") + @patch("sailsprep.BIDS_convertor.process_videos") + @patch("sailsprep.BIDS_convertor.create_readme") + @patch("sailsprep.BIDS_convertor.create_derivatives_dataset_description") + @patch("sailsprep.BIDS_convertor.create_dataset_description") + @patch("sailsprep.BIDS_convertor.create_bids_structure") + @patch("sailsprep.BIDS_convertor.save_json") def test_main_workflow( self, mock_save_json: MagicMock, From 04bad65690466a270d759f337f240a69a09b2ef5 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 11:54:25 -0500 Subject: [PATCH 25/36] Fixed warnings in BIDS_convertor.py from PR --- src/sailsprep/BIDS_convertor.py | 71 +++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/src/sailsprep/BIDS_convertor.py b/src/sailsprep/BIDS_convertor.py index 1b63d53..2b5f81d 100644 --- a/src/sailsprep/BIDS_convertor.py +++ b/src/sailsprep/BIDS_convertor.py @@ -14,6 +14,7 @@ * check with actual data """ +import argparse import json import os import re @@ -153,13 +154,13 @@ def get_video_properties(video_path: str) -> dict: cap.release() return { - "SamplingFrequency": fps, + "FrameRate": fps, "Resolution": f"{width}x{height}", } except Exception as e: print(f"Error reading video {video_path}: {e}") - return {"SamplingFrequency": None, "Resolution": None} + return {"FrameRate": None, "Resolution": None} def determine_session_from_folder(folder_name: str) -> Optional[str]: @@ -629,7 +630,6 @@ def stabilize_video(input_path: str, stabilized_path: str, temp_dir: str) -> Non "null", "-", ] - print(f"[DEBUG] Running: {' '.join(detect_cmd)}") detect_proc = subprocess.run(detect_cmd, capture_output=True, text=True) if detect_proc.returncode != 0: @@ -880,7 +880,7 @@ def create_video_metadata_json( ), "Context": task_info.get("context", "n/a"), "Activity": task_info.get("activity", "n/a"), - "SamplingFrequency": TARGET_FRAMERATE, + "FrameRate": TARGET_FRAMERATE, "Resolution": TARGET_RESOLUTION, "ProcessingPipeline": { "Stabilization": processing_info.get("has_stabilization", False), @@ -924,7 +924,7 @@ def create_raw_video_json( "TaskDescription": task_info.get( "task_description", "Raw video from behavioral session" ), - "SamplingFrequency": video_props.get("SamplingFrequency", "n/a"), + "FrameRate": video_props.get("FrameRate", "n/a"), "Resolution": video_props.get("Resolution", "n/a"), "OriginalFilename": str(row.get("FileName", "")), "Duration": parse_duration(row.get("Vid_duration", "00:00:00")), @@ -1254,7 +1254,7 @@ def create_readme() -> None: Videos without behavioral coding data use "unknown" task label. """ - filepath = os.path.join(OUTPUT_DIR, "README") + filepath = os.path.join(FINAL_BIDS_ROOT, "README") try: with open(filepath, "w") as f: f.write(readme_content) @@ -1358,7 +1358,7 @@ def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: def merge_subjects() -> None: - """Merge duplicated subjects folders.""" + """Merge duplicated subject folders safely.""" paths_to_check = [ Path(FINAL_BIDS_ROOT), Path(FINAL_BIDS_ROOT) / "derivatives" / "preprocessed", @@ -1381,20 +1381,42 @@ def merge_subjects() -> None: for item in sub.iterdir(): dest = original_path / item.name if item.is_dir(): - if not dest.exists(): - shutil.copytree(item, dest) - else: + if dest.exists(): + if dest.is_file(): + print( + f"Conflict: {dest} is a file, " + "expected a folder. Skipping." + ) + continue # merge recursively if same session already exists for subitem in item.iterdir(): dest_sub = dest / subitem.name - if not dest_sub.exists(): - if subitem.is_dir(): - shutil.copytree(subitem, dest_sub) - else: - shutil.copy2(subitem, dest_sub) + if dest_sub.exists(): + # type conflict handling + if dest_sub.is_file() != subitem.is_file(): + print( + f"Type conflict for {dest_sub}, " + "skipping." + ) + continue + if subitem.is_dir(): + shutil.copytree( + subitem, dest_sub, dirs_exist_ok=True + ) + else: + shutil.copy2(subitem, dest_sub) + else: + shutil.copytree(item, dest) else: - if not dest.exists(): - shutil.copy2(item, dest) + if dest.exists(): + if dest.is_dir(): + print( + f"Conflict: {dest} is a directory," + " expected a file. Skipping." + ) + continue + shutil.copy2(item, dest) + shutil.rmtree(sub) else: print(f"No base subject found for {sub}, skipping.") @@ -1455,12 +1477,17 @@ def process_videos( def main() -> None: """Main entry point for multi-task BIDS video processing.""" - if len(sys.argv) != 3: - print("Usage: python updated_bids.py ") - sys.exit(1) + parser = argparse.ArgumentParser( + description="Run updated_bids with task and total number of tasks." + ) + parser.add_argument("task_id", type=int, help="ID of the current task") + parser.add_argument("num_tasks", type=int, help="Total number of tasks") + + args = parser.parse_args() + my_task_id = args.task_id + num_tasks = args.num_tasks - my_task_id = int(sys.argv[1]) - num_tasks = int(sys.argv[2]) + print(f"Running task {my_task_id}/{num_tasks}") start_time = time.time() From 50a80b977686c3bc06c84426738db42e33e637ca Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 11:56:04 -0500 Subject: [PATCH 26/36] Cleaned /logs handling --- jobs/merge_cleanup.sh | 9 ++++++--- jobs/run_bids_convertor.sh | 1 - 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/jobs/merge_cleanup.sh b/jobs/merge_cleanup.sh index 5323999..4495968 100644 --- a/jobs/merge_cleanup.sh +++ b/jobs/merge_cleanup.sh @@ -7,10 +7,13 @@ # Clean up old logs before running echo "Cleaning up old logs..." -rm -rf logs -mkdir -p logs +if [ -d logs ]; then + find logs -mindepth 1 ! -name ".gitkeep" \ + ! -name "merge_cleanup_${SLURM_JOB_ID}.out" \ + ! -name "merge_cleanup_${SLURM_JOB_ID}.err" -delete +fi -OUTPUT_DIR=$(poetry run python -c "import yaml; with open('configs/config_bids_convertor.yaml') as f: print(yaml.safe_load(f)['output_dir'])") +OUTPUT_DIR=$(poetry run python -c "import yaml, sys; print(yaml.safe_load(open('configs/config_bids_convertor.yaml'))['output_dir'])") MERGED_DIR="$OUTPUT_DIR" mkdir -p "$MERGED_DIR" diff --git a/jobs/run_bids_convertor.sh b/jobs/run_bids_convertor.sh index da8d629..1269760 100644 --- a/jobs/run_bids_convertor.sh +++ b/jobs/run_bids_convertor.sh @@ -14,7 +14,6 @@ SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) PROJECT_ROOT=$(cd -- "$SCRIPT_DIR/.." &> /dev/null && pwd) cd "$PROJECT_ROOT" -mkdir -p logs export PYTHONUNBUFFERED=1 echo "Job started at $(date) on node $(hostname)" From 0fce931eb0e8c96ab12f81e79fc62c077976699d Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 11:57:15 -0500 Subject: [PATCH 27/36] Changed source video to raw folder --- configs/config_bids_convertor.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/config_bids_convertor.yaml b/configs/config_bids_convertor.yaml index ab38518..bdeeb3f 100644 --- a/configs/config_bids_convertor.yaml +++ b/configs/config_bids_convertor.yaml @@ -2,7 +2,7 @@ # Input data annotation_file: /orcd/data/satra/002/datasets/SAILS/data4analysis/Video Rating Data/SAILS_RATINGS_ALL_DEDUPLICATED_NotForFinalAnalyses_2025.10.csv -video_root: /orcd/data/satra/002/datasets/SAILS/Phase_III_Videos/Videos_from_external_standardized +video_root: /orcd/data/satra/002/datasets/SAILS/Phase_III_Videos/Videos_from_external asd_status: /orcd/data/satra/002/datasets/SAILS/data4analysis/ASD_Status.xlsx # Output data From c8699e4cde4e881218649ab6d5b7dd79fc474848 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 12:04:09 -0500 Subject: [PATCH 28/36] Update src/tests/test_BIDS_convertor.py --- src/tests/test_BIDS_convertor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index e962a51..738faa2 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -121,13 +121,13 @@ def test_create_bids_filename(self, bvp_module: ModuleType) -> None: def test_get_session_from_path_12_16_months(self, bvp_module: ModuleType) -> None: """Test session determination for 12-16 month videos.""" - path = "/data/videos/12-16 month/participant_video.mp4" + path = "12-16 month" session = bvp_module.determine_session_from_folder(path) assert session == "01" def test_get_session_from_path_34_38_months(self, bvp_module: ModuleType) -> None: """Test session determination for 34-38 month videos.""" - path = "/data/videos/34-38 month/participant_video.mp4" + path = "34-38 month" session = bvp_module.determine_session_from_folder(path) assert session == "02" From 1e04269388b2e7d9f61c72da713ea6b399b72f29 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 16:06:16 -0500 Subject: [PATCH 29/36] fixed issues of execution --- jobs/run_bids_convertor.sh | 33 ++++++++++++++++++++------------- src/sailsprep/BIDS_convertor.py | 3 ++- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/jobs/run_bids_convertor.sh b/jobs/run_bids_convertor.sh index 1269760..3fe666c 100644 --- a/jobs/run_bids_convertor.sh +++ b/jobs/run_bids_convertor.sh @@ -8,27 +8,34 @@ #SBATCH --time=10:00:00 #SBATCH --cpus-per-task=5 -# --- Environment setup --- -# Determine project root dynamically -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) -PROJECT_ROOT=$(cd -- "$SCRIPT_DIR/.." &> /dev/null && pwd) +mkdir -p logs -cd "$PROJECT_ROOT" +# --- Determine project root robustly --- +if [ -n "$SLURM_SUBMIT_DIR" ]; then + cd "$SLURM_SUBMIT_DIR" || { echo "❌ Cannot cd to SLURM_SUBMIT_DIR=$SLURM_SUBMIT_DIR"; exit 1; } +else + SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + cd "$SCRIPT_DIR/.." || { echo "❌ Cannot cd to project root"; exit 1; } +fi + +echo "Running from project root: $(pwd)" export PYTHONUNBUFFERED=1 -echo "Job started at $(date) on node $(hostname)" -echo "Task ID: $SLURM_ARRAY_TASK_ID of $SLURM_ARRAY_TASK_COUNT" +ffmpeg -version || echo "⚠️ FFmpeg not available" -echo "FFmpeg version:" -ffmpeg -version +# --- Poetry setup --- +if ! poetry env info --path &> /dev/null; then + echo "Creating Poetry environment..." + poetry install || { echo "❌ Poetry install failed"; exit 1; } +fi -# Activate poetry env from project root -source $(poetry env info --path)/bin/activate +ENV_PATH=$(poetry env info --path) +source "$ENV_PATH/bin/activate" || { echo "❌ Failed to activate Poetry environment"; exit 1; } echo "Using Python from: $(which python)" +echo "Task ID: ${SLURM_ARRAY_TASK_ID}" echo "Starting BIDS conversion at $(date)" -# Run your script -python -m sailsprep.BIDS_convertor $SLURM_ARRAY_TASK_ID $SLURM_ARRAY_TASK_COUNT +python -m sailsprep.BIDS_convertor "$SLURM_ARRAY_TASK_ID" "$SLURM_ARRAY_TASK_MAX" echo "Finished at $(date)" diff --git a/src/sailsprep/BIDS_convertor.py b/src/sailsprep/BIDS_convertor.py index 2b5f81d..2d38b24 100644 --- a/src/sailsprep/BIDS_convertor.py +++ b/src/sailsprep/BIDS_convertor.py @@ -689,12 +689,13 @@ def preprocess_video(input_path: str, output_path: str, temp_dir: str) -> None: raise ValueError( "Video stabilization failed - no intermediate file created" ) + width, height = TARGET_RESOLUTION.split("x") vf_filters = ( "yadif," "hqdn3d," "eq=contrast=1.0:brightness=0.0:saturation=1.0," - "scale=-2:{height}," + f"scale=-2:{height}," "pad=ceil(iw/2)*2:ceil(ih/2)*2," f"fps={TARGET_FRAMERATE}" ) From 9e5c76df5a258d5dd1d2cd0763199c5bb9708744 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 17:15:12 -0500 Subject: [PATCH 30/36] added documentation --- README.md | 42 ++++++++++++---------------------- docs/BIDS_convertor.md | 52 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 28 deletions(-) create mode 100644 docs/BIDS_convertor.md diff --git a/README.md b/README.md index a546feb..71ccab6 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,15 @@ Welcome to the ```sailsprep``` repo! This is a Python repo for doing incredible video-based human pose estimation analyses. **STAY TUNED!** **Caution:**: this package is still under development and may change rapidly over the next few weeks. -## General information +This will convert the raw video into BIDS format in a clean fashion. +## Features +- A few +- Cool +- Things +- These may include a wonderful CLI interface. + +## Installation To manage dependencies, this project uses Poetry. Make sure you've got poetry installed. On Engaging, you need to first run at the root of the repo : ``` @@ -19,9 +26,7 @@ pip install poetry poetry install ``` -## Preprocessing -### BIDS-conversion -The conversion pipeline requires FFmpeg ≥ 6.0 compiled with the vidstab library. +The BIDS-conversion tool of sailsprep requires FFmpeg ≥ 6.0 compiled with the vidstab library. Because FFmpeg compiled with vidstab is not a Python package, it must be installed separately. You'll need to run (outside any environment): @@ -34,37 +39,18 @@ export PATH="$HOME/ffmpeg_static:$PATH" ``` -To make this permanent, add the last line to your ~/.bashrc or ~/.bash_profile. -You can verify that FFmpeg has the right version (≥ 6.0): -``` -ffmpeg -version -``` -You'll need to submit the script on Engaging using sbatch. We've -provided the sumbission files so you'll simply need to run (with module miniforge deactivated) : -``` -jid=$(sbatch --parsable jobs/run_bids_convertor.sh) -sbatch --dependency=afterok:$jid jobs/merge_cleanup.sh -``` -This will convert the raw video into BIDS format in a clean fashion. -## Features -- A few -- Cool -- Things -- These may include a wonderful CLI interface. - -## Installation Get the newest development version via: ```sh pip install git+https://github.com/sensein/sailsprep.git ``` - ## Quick start -```Python -from sailsprep.app import hello_world -hello_world() -``` +Tools developped in sailsprep +|Tool|Documentation| +|----|--------------| +|BIDS-conversion| [link to documentation](docs/BIDS_convertor.md) + ## Contributing We welcome contributions from the community! Before getting started, please review our [**CONTRIBUTING.md**](https://github.com/sensein/sailsprep/blob/main/CONTRIBUTING.md). diff --git a/docs/BIDS_convertor.md b/docs/BIDS_convertor.md new file mode 100644 index 0000000..1eff64a --- /dev/null +++ b/docs/BIDS_convertor.md @@ -0,0 +1,52 @@ +## BIDS Format + +For reproducibility, organization, and practicality, sailsprep converts its raw data into the BIDS (Brain Imaging Data Structure) format. +BIDS is a community-driven standard for organizing, naming, and describing neuroimaging and related data (e.g., EEG, fMRI, MEG, behavioral, physiological data, etc.). + +During the BIDS conversion pipeline, the raw domestic videos are preprocessed to be standardized, denoised, and reformatted. +Relevant metadata and annotations necessary for downstream analysis are also extracted at this stage. + +## Structure + +The final BIDS dataset follows the structure below: +```graphql +├── sub-ID1 # Contains raw videos in BIDS format +│ ├── ses-01 # Videos between 12 and 16 months +│ │ └── beh # Behavioral data +│ │ ├── sub-ID1_ses-01_task-A_run-01_beh.mp4 # Standardized raw video +│ │ ├── sub-ID1_ses-01_task-A_run-01_beh.tsv # Manual annotations +│ │ └── sub-ID1_ses-01_task-A_run-01_beh.json # Info on standardization +│ └── ses-02 # Videos between 34 and 38 months +│ └── beh +├── derivatives +│ └── preprocessed # Contains stabilized, denoised, standardized videos +│ ├── sub-ID1 +│ │ ├── ses-01 +│ │ │ └── beh +│ │ │ ├── sub-ID1_ses-01_task-A_run-01_audio.json # Audio extraction info +│ │ │ ├── sub-ID1_ses-01_task-A_run-01_audio.wav # Extracted audio +│ │ │ ├── sub-ID1_ses-01_task-A_run-01_desc-processed.json # Video preprocessing info +│ │ │ └── sub-ID1_ses-01_task-A_run-01_desc-processed_beh.mp4 # Preprocessed video +│ │ └── ses-02 +│ └── sub-ID2 +├── README.md # Explains dataset structure and content +├── participants.tsv # Participant information (e.g., ASD status) +├── participants.json # Metadata for participants.tsv +└── dataset_description.json # BIDS dataset description (name, version, etc.) +``` +## Execution + +To verify that FFmpeg is correctly installed (cf [README.md](../README.md)) and at least version 6.0, run: + +``` +ffmpeg -version +``` + +You’ll need to submit the conversion job on Engaging using sbatch. +Make sure you are in the root directory of the repository. + +We provide SLURM submission scripts for convenience — simply run the following commands (with the miniforge module deactivated to ensure the correct FFmpeg version is used): +``` +jid=$(sbatch --parsable jobs/run_bids_convertor.sh) +sbatch --dependency=afterok:$jid jobs/merge_cleanup.sh +``` From 21003fd01273b57aa7929592b23cf8e8f07f16bd Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 17:46:06 -0500 Subject: [PATCH 31/36] fixed scripts for unit tests --- src/sailsprep/BIDS_convertor.py | 2 ++ src/tests/test_BIDS_convertor.py | 18 +++++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/sailsprep/BIDS_convertor.py b/src/sailsprep/BIDS_convertor.py index 2d38b24..a42016c 100644 --- a/src/sailsprep/BIDS_convertor.py +++ b/src/sailsprep/BIDS_convertor.py @@ -1497,6 +1497,7 @@ def main() -> None: if not os.path.exists(path): print(f"ERROR: {label} not found at {path}") sys.exit(1) + return # --- Load metadata --- try: @@ -1506,6 +1507,7 @@ def main() -> None: except Exception as e: safe_print(f"ERROR: Failed to load Excel file: {e}") sys.exit(1) + return # --- Discover videos --- safe_print("Discovering videos...") diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index 738faa2..cd2f143 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -316,11 +316,19 @@ def test_main_workflow( ], [{"error": None}], ) - # Mock sys.argv to simulate CLI arguments - with patch.object(sys, "argv", ["BIDS_convertor.py", "0", "4"]): - with patch("sys.exit") as mock_exit: - bvp_module.main() - mock_exit.assert_not_called() + with ( + patch("sailsprep.BIDS_convertor.os.path.exists", return_value=True), + patch( + "sailsprep.BIDS_convertor.pd.read_csv", + return_value=pd.DataFrame( + {"Context": ["playing", "unknown"], "ID": ["AZE", "RET"]} + ), + ), + patch.object(sys, "argv", ["BIDS_convertor.py", "0", "4"]), + patch("sys.exit") as mock_exit, + ): + bvp_module.main() + mock_exit.assert_not_called() # Verify all steps were called mock_create_structure.assert_called_once() From 5bad1f5c692d78bb008c9048548019462a2680c8 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 17:54:22 -0500 Subject: [PATCH 32/36] Merge branch 'main' into BIDS-conversion --- pyproject.toml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ae3a591..23bd11b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,20 +45,20 @@ types-pyyaml = "^6.0.12.20250915" optional = true [tool.poetry.group.dev.dependencies] -pytest-xdist = {version = "~=3.6.1", extras = ["psutil"]} -mypy = "~=1.16" -pre-commit = "~=4.2" -pytest-cov = "~=6.1" -ruff = "~=0.11" -pandas = "^2.3.1" -types-pyyaml = "^6.0.12.20250516" -types-python-dateutil = "^2.9.0.20250708" +pytest-xdist = {version = "~=3.8.0", extras = ["psutil"]} +mypy = "~=1.18" +pre-commit = "~=4.3" +pytest-cov = "~=7.0" +ruff = "~=0.14" +pandas = "^2.3.3" +types-pyyaml = "^6.0.12.20250915" +types-python-dateutil = "^2.9.0.20251008" [tool.poetry.group.docs] optional = true [tool.poetry.group.docs.dependencies] -pdoc = "~=15.0" +pdoc = "~=16.0" [tool.pytest.ini_options] pythonpath = [ From 333f5f4eaadc3cf8f15472f66cc17f506952ce6f Mon Sep 17 00:00:00 2001 From: lucie271 Date: Tue, 4 Nov 2025 18:08:06 -0500 Subject: [PATCH 33/36] updated unit test --- src/tests/test_BIDS_convertor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index cd2f143..b47945b 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -287,6 +287,7 @@ def test_save_json(self, bvp_module: ModuleType) -> None: class TestMainWorkflow: """Test the main processing workflow.""" + @patch("sailsprep.BIDS_convertor.get_all_videos") @patch("sailsprep.BIDS_convertor.process_videos") @patch("sailsprep.BIDS_convertor.create_readme") @patch("sailsprep.BIDS_convertor.create_derivatives_dataset_description") @@ -301,10 +302,12 @@ def test_main_workflow( mock_create_derivatives: MagicMock, mock_create_readme: MagicMock, mock_process_videos: MagicMock, + mock_get_all_videos: MagicMock, bvp_module: ModuleType, ) -> None: """Test the main processing workflow.""" # Setup mocks + mock_get_all_videos.return_value = (["dummy_video_1.mp4"], []) mock_process_videos.return_value = ( [ From 7768f7eff9d70167343899c57a0a22ed0823b161 Mon Sep 17 00:00:00 2001 From: lucie271 Date: Wed, 5 Nov 2025 16:41:35 -0500 Subject: [PATCH 34/36] Added unit tests --- src/sailsprep/BIDS_convertor.py | 222 ++++++----- src/tests/test_BIDS_convertor.py | 623 ++++++++++++++++++++++++++++++- 2 files changed, 749 insertions(+), 96 deletions(-) diff --git a/src/sailsprep/BIDS_convertor.py b/src/sailsprep/BIDS_convertor.py index a42016c..503fba4 100644 --- a/src/sailsprep/BIDS_convertor.py +++ b/src/sailsprep/BIDS_convertor.py @@ -32,7 +32,7 @@ def load_configuration(config_path: str = "config.yaml") -> Dict[str, Any]: - """Load configuration from YAML file. + """Load and validate configuration from YAML file. Args: config_path (str): Path to the configuration YAML file. @@ -43,9 +43,23 @@ def load_configuration(config_path: str = "config.yaml") -> Dict[str, Any]: Raises: FileNotFoundError: If the configuration file is not found. yaml.YAMLError: If the YAML file is malformed. + KeyError: If required keys are missing in the configuration. """ with open(config_path, "r") as f: config = yaml.safe_load(f) + + required_keys = [ + "annotation_file", + "video_root", + "output_dir", + "target_resolution", + "target_framerate", + "asd_status", + ] + + missing_keys = [key for key in required_keys if key not in config] + if missing_keys: + raise KeyError(f"Missing configuration keys: {', '.join(missing_keys)}") return config @@ -61,6 +75,7 @@ def load_configuration(config_path: str = "config.yaml") -> Dict[str, Any]: TARGET_RESOLUTION = config["target_resolution"] TARGET_FRAMERATE = config["target_framerate"] ASD_STATUS_FILE = config["asd_status"] + # BIDS directory structure FINAL_BIDS_ROOT = os.path.join( OUTPUT_DIR, config.get("final_bids_root", "final_bids-dataset") @@ -343,9 +358,12 @@ def find_session_id( session_id = determine_session_from_folder(folder_name) if not session_id and excel: - session_id = determine_session_from_excel( - current_path, annotation_df, participant_id - ) + try: + session_id = determine_session_from_excel( + current_path, annotation_df, participant_id + ) + except ValueError as e: + print(f"Excel lookup failed for {participant_id}: {e}") if session_id: return session_id @@ -617,6 +635,8 @@ def stabilize_video(input_path: str, stabilized_path: str, temp_dir: str) -> Non """Stabilize video using FFmpeg vidstab filters, with error checks.""" os.makedirs(temp_dir, exist_ok=True) transforms_file = os.path.join(temp_dir, "transforms.trf") + if not os.path.exists(input_path): + raise FileNotFoundError(f"Video to stabilize not found: {input_path}") # Step 1: Detect transforms detect_cmd = [ @@ -945,40 +965,75 @@ def process_single_video( final_derivatives_dir: str, temp_dir: str, ) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]: - """Process a single video with all BIDS structures.""" - participant_id = video_info["participant_id"] - filename = video_info["filename"] - session_id = video_info["session_id"] - input_video_path = video_info["full_path"] - safe_print(f"Processing: {participant_id}/{filename}") - filename_without_extension = os.path.splitext(filename)[0] - # Check if video exists in Excel or create dummy data - + """Process a single video with all BIDS structures robustly.""" try: - # Check if video exists in Excel or create dummy data - participant_excel = annotation_df[ - annotation_df["ID"].astype(str) == str(participant_id) - ] - mask = ( - participant_excel["FileName"].str.split(".").str[0] - == filename_without_extension - ) - video_excel = participant_excel[mask] - if video_excel.empty: - # Create dummy data for missing Excel entries - dummy_data = create_dummy_excel_data( - input_video_path, participant_id, session_id + # --- Validate input -------------------------------------------------- + if not video_info or not isinstance(video_info, dict): + raise ValueError("video_info is empty or invalid") + + required_keys = ["participant_id", "filename", "session_id", "full_path"] + missing = [k for k in required_keys if k not in video_info] + if missing: + raise ValueError(f"Missing required video_info keys: {missing}") + + participant_id = video_info["participant_id"] + filename = video_info["filename"] + session_id = video_info["session_id"] + input_video_path = video_info["full_path"] + + safe_print(f"Processing: {participant_id}/{filename}") + filename_without_extension = os.path.splitext(filename)[0] + + # --- Handle empty or invalid annotation_df --------------------------- + if annotation_df is None or annotation_df.empty: + safe_print("Annotation DataFrame is empty - using dummy data") + video_excel = pd.DataFrame( + [create_dummy_excel_data(input_video_path, participant_id, session_id)] ) - video_excel = pd.DataFrame([dummy_data]) has_excel_data = False - safe_print("No Excel data found - using dummy data") else: - has_excel_data = True + # Ensure expected columns exist + expected_cols = {"ID", "FileName"} + if not expected_cols.issubset(annotation_df.columns): + safe_print( + "Annotation DataFrame missing required columns - using dummy data" + ) + video_excel = pd.DataFrame( + [ + create_dummy_excel_data( + input_video_path, participant_id, session_id + ) + ] + ) + has_excel_data = False + else: + # Normal Excel lookup + participant_excel = annotation_df[ + annotation_df["ID"].astype(str) == str(participant_id) + ] + mask = ( + participant_excel["FileName"].str.split(".").str[0] + == filename_without_extension + ) + video_excel = participant_excel[mask] + if video_excel.empty: + safe_print("No Excel data found - using dummy data") + video_excel = pd.DataFrame( + [ + create_dummy_excel_data( + input_video_path, participant_id, session_id + ) + ] + ) + has_excel_data = False + else: + has_excel_data = True excel_row = video_excel.iloc[0] task_label = get_task_from_excel_row(excel_row) activity = excel_row.get("Activity", "unknown activity") - # Create task information + + # --- Build task info ------------------------------------------------- task_info = { "task_name": task_label, "task_description": f"Behavioral session: {activity}", @@ -987,18 +1042,17 @@ def process_single_video( "activity": str(excel_row.get("Activity", "n/a")), } - # Create BIDS directory structure + # --- Directory setup ------------------------------------------------- raw_subj_dir = os.path.join( final_bids_root, f"sub-{participant_id}", f"ses-{session_id}", "beh" ) deriv_subj_dir = os.path.join( final_derivatives_dir, f"sub-{participant_id}", f"ses-{session_id}", "beh" ) - os.makedirs(raw_subj_dir, exist_ok=True) os.makedirs(deriv_subj_dir, exist_ok=True) - # Create BIDS filenames with run number + # --- File naming ----------------------------------------------------- ext = os.path.splitext(filename)[1] run_number = get_next_run_number( participant_id, session_id, task_label, final_bids_root @@ -1022,15 +1076,15 @@ def process_single_video( participant_id, session_id, task_label, "events", "tsv", run_number ) - # File paths + # --- Paths ----------------------------------------------------------- raw_video_path = os.path.join(raw_subj_dir, raw_video_name) processed_video_path = os.path.join(deriv_subj_dir, processed_video_name) audio_path = os.path.join(deriv_subj_dir, audio_name) events_path = os.path.join(raw_subj_dir, events_name) + # --- Raw video preparation ------------------------------------------ if not os.path.exists(raw_video_path): if ext.lower() != ".mp4": - # Convert to mp4 without processing cmd = [ "ffmpeg", "-y", @@ -1043,93 +1097,69 @@ def process_single_video( result = subprocess.run( cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True ) - # Check return code and verify output file - if result.returncode != 0: + if result.returncode != 0 or not os.path.exists(raw_video_path): raise ValueError(f"FFmpeg conversion failed: {result.stderr}") - if not os.path.exists(raw_video_path): - raise ValueError( - f"FFmpeg did not create output file: {raw_video_path}" - ) safe_print(" Converted to raw BIDS format") else: shutil.copy2(input_video_path, raw_video_path) - # FIX: Verify copy succeeded if not os.path.exists(raw_video_path): raise ValueError(f"Failed to copy to raw BIDS: {raw_video_path}") safe_print(" Copied to raw BIDS") - # Extract metadata from raw video + # --- Metadata extraction -------------------------------------------- exif_data = extract_exif(raw_video_path) - if "error" in exif_data or "ffprobe_error" in exif_data: + if ( + not isinstance(exif_data, dict) + or "error" in exif_data + or "ffprobe_error" in exif_data + ): raise ValueError("Unreadable or unsupported video format") - # Process video for derivatives + # --- Video processing ----------------------------------------------- if not os.path.exists(processed_video_path): safe_print(" Starting video processing...") preprocess_video(raw_video_path, processed_video_path, temp_dir) - # Verify processing succeeded - if not os.path.exists(processed_video_path): - raise ValueError( - f"Video processing failed - no output file: {processed_video_path}" - ) - if os.path.getsize(processed_video_path) == 0: - raise ValueError( - "Video processing failed- empty output file:" - f" {processed_video_path}" - ) + if ( + not os.path.exists(processed_video_path) + or os.path.getsize(processed_video_path) == 0 + ): + raise ValueError("Video processing failed - no valid output") safe_print(" Video processing complete") + # --- Audio extraction ----------------------------------------------- if not os.path.exists(audio_path): safe_print(" Extracting audio...") extract_audio(processed_video_path, audio_path) - # Verify audio extraction succeeded - if not os.path.exists(audio_path): - raise ValueError( - f"Audio extraction failed - no output file: {audio_path}" - ) - if os.path.getsize(audio_path) == 0: - raise ValueError( - f"Audio extraction failed - empty output file: {audio_path}" - ) + if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0: + raise ValueError("Audio extraction failed - no valid output") safe_print(" Audio extraction complete") - # Create events files + # --- Events file ---------------------------------------------------- create_events_file(video_excel, events_path, input_video_path) if not os.path.exists(events_path): raise ValueError(f"Failed to create events file: {events_path}") - # Create metadata JSON files + # --- Metadata JSONs ------------------------------------------------- processing_info = { "has_stabilization": True, "has_denoising": True, "has_equalization": True, } - # Raw video JSON raw_video_json_path = raw_video_path.replace(".mp4", ".json") - create_raw_video_json( - excel_row, - task_info, - raw_video_path, - raw_video_json_path, - ) + create_raw_video_json(excel_row, task_info, raw_video_path, raw_video_json_path) if not os.path.exists(raw_video_json_path): raise ValueError(f"Failed to create raw video JSON: {raw_video_json_path}") - # Processed video JSON processed_video_json_path = processed_video_path.replace(".mp4", ".json") create_video_metadata_json( - exif_data, - processing_info, - task_info, - processed_video_json_path, + exif_data, processing_info, task_info, processed_video_json_path ) if not os.path.exists(processed_video_json_path): raise ValueError( f"Failed to create processed video JSON: {processed_video_json_path}" ) - # Audio JSON audio_json_path = audio_path.replace(".wav", ".json") create_audio_metadata_json( exif_data.get("duration_sec", 0), task_info, audio_json_path @@ -1137,7 +1167,7 @@ def process_single_video( if not os.path.exists(audio_json_path): raise ValueError(f"Failed to create audio JSON: {audio_json_path}") - # Store processing information + # --- Success return ------------------------------------------------- entry = { "participant_id": participant_id, "session_id": session_id, @@ -1148,7 +1178,7 @@ def process_single_video( "audio_file_bids": audio_path, "events_file_bids": events_path, "filename": filename, - "age_folder": video_info["age_folder"], + "age_folder": video_info.get("age_folder", "n/a"), "duration_sec": exif_data.get("duration_sec", 0), "has_excel_data": has_excel_data, "excel_metadata": excel_row.to_dict(), @@ -1160,8 +1190,11 @@ def process_single_video( return entry, None except Exception as e: - safe_print(f" ERROR processing {input_video_path}: {str(e)}") - return None, {"video": input_video_path, "error": str(e)} + safe_print( + f" ERROR processing {video_info.get('full_path', 'unknown file')}:" + f" {str(e)}" + ) + return None, {"video": video_info.get("full_path", "unknown"), "error": str(e)} def create_dataset_description() -> None: @@ -1263,12 +1296,17 @@ def create_readme() -> None: raise ValueError(f"Failed to create README at {filepath}: {e}") -def create_participants_file() -> None: +def create_participants_file( + final_bids_root: str = FINAL_BIDS_ROOT, asd_status_file: str = ASD_STATUS_FILE +) -> None: """Create participants.tsv and participants.json files.""" - asd_status = pd.read_excel(ASD_STATUS_FILE) + if not os.path.exists(asd_status_file): + raise FileNotFoundError(f"ASD status file not found: {asd_status_file}") + + asd_status = pd.read_excel(asd_status_file) ids_processed_participants = [] - for name in os.listdir(FINAL_BIDS_ROOT): - full_path = os.path.join(FINAL_BIDS_ROOT, name) + for name in os.listdir(final_bids_root): + full_path = os.path.join(final_bids_root, name) if os.path.isdir(full_path) and name.startswith("sub-"): ids_processed_participants.append(name.split("sub-")[1]) participants_data = [] @@ -1283,7 +1321,7 @@ def create_participants_file() -> None: participants_df = pd.DataFrame(participants_data) participants_df.to_csv( - os.path.join(FINAL_BIDS_ROOT, "participants.tsv"), + os.path.join(final_bids_root, "participants.tsv"), sep="\t", index=False, na_rep="n/a", @@ -1294,7 +1332,7 @@ def create_participants_file() -> None: "Group": {"Description": "ASD status"}, } - save_json(participants_json, os.path.join(FINAL_BIDS_ROOT, "participants.json")) + save_json(participants_json, os.path.join(final_bids_root, "participants.json")) def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: @@ -1358,11 +1396,11 @@ def print_summary(all_processed: List[Dict], all_failed: List[Dict]) -> None: print(f" {error}: {count} videos") -def merge_subjects() -> None: +def merge_subjects(final_bids_root: str = FINAL_BIDS_ROOT) -> None: """Merge duplicated subject folders safely.""" paths_to_check = [ - Path(FINAL_BIDS_ROOT), - Path(FINAL_BIDS_ROOT) / "derivatives" / "preprocessed", + Path(final_bids_root), + Path(final_bids_root) / "derivatives" / "preprocessed", ] for folder in paths_to_check: diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index b47945b..073ee01 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -1,12 +1,15 @@ """Tests for BIDS Video Processing Pipeline.""" import json +import math import os import sys +from pathlib import Path from types import ModuleType from typing import Generator from unittest.mock import MagicMock, mock_open, patch +import numpy as np import pandas as pd import pytest import yaml @@ -53,11 +56,11 @@ def test_load_configuration_success(self, bvp_module: ModuleType) -> None: """Test successful configuration loading.""" mock_config = { "video_root": "/path/to/videos", - "asd_csv": "asd.csv", - "nonasd_csv": "nonasd.csv", + "annotation_file": "blablabla.csv", + "asd_status": "nonasd.xlsx", "output_dir": "/output", "target_resolution": "1280x720", - "target_fps": 30, + "target_framerate": 30, } with patch("builtins.open", mock_open(read_data=yaml.dump(mock_config))): @@ -71,6 +74,194 @@ def test_load_configuration_file_not_found(self, bvp_module: ModuleType) -> None with pytest.raises(FileNotFoundError): bvp_module.load_configuration("nonexistent.yaml") + def test_load_configuration_invalid_yaml(self, bvp_module: ModuleType) -> None: + """Test configuration loading with invalid YAML.""" + with patch("builtins.open", mock_open(read_data="invalid: yaml: : format")): + with pytest.raises(yaml.YAMLError): + bvp_module.load_configuration("config.yaml") + + def test_load_configuration_missing_required_fields( + self, bvp_module: ModuleType + ) -> None: + """Test configuration loading with missing required fields.""" + incomplete_config = { + "video_root": "/path/to/videos", + # Missing other required fields + } + with patch("builtins.open", mock_open(read_data=yaml.dump(incomplete_config))): + with pytest.raises(KeyError): + bvp_module.load_configuration("config.yaml") + + +class TestInfoExtractorforBIDS: + """Test info extraction and missing excel handling for BIDS.""" + + def test_create_dummy_excel_data_returns_expected_dict( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test dummy excel data creation returns expected dict.""" + # Arrange + video_path = tmp_path / "sub-001_video.mp4" + video_path.write_text("dummy") # just to create a filename + participant_id = "001" + session_id = "01" + + # Act + data = bvp_module.create_dummy_excel_data( + str(video_path), participant_id, session_id, "rest" + ) + + # Assert + assert data["ID"] == "001" + assert data["FileName"] == os.path.basename(video_path) + assert data["Context"] == "rest" + assert data["Notes"].startswith("Video not found") + assert "Vid_duration" in data + # All fields should have default "n/a" except the few explicitly set + assert all( + v == "n/a" or k in ["ID", "FileName", "Context", "Vid_duration", "Notes"] + for k, v in data.items() + if k not in ["ID", "FileName", "Context", "Vid_duration", "Notes"] + ) + + def test_find_age_folder_session_direct_match( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test direct match for age folder session.""" + participant_path = tmp_path / "sub-001" + participant_path.mkdir() + current_path = participant_path / "12-16_months" + current_path.mkdir() + + with patch( + "sailsprep.BIDS_convertor.determine_session_from_folder", return_value="01" + ): + session = bvp_module.find_age_folder_session( + str(current_path), str(participant_path) + ) + assert session == "01" + + def test_find_age_folder_session_outside_participant_path( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test age folder session outside participant path.""" + participant_path = tmp_path / "sub-001" + other_path = tmp_path / "other" / "12-16_months" + other_path.mkdir(parents=True) + + with patch( + "sailsprep.BIDS_convertor.determine_session_from_folder", return_value="01" + ): + session = bvp_module.find_age_folder_session( + str(other_path), str(participant_path) + ) + assert session is None + + def test_get_task_from_excel_row_valid_context( + self, bvp_module: ModuleType + ) -> None: + """Test get task from excel row with valid context.""" + row = pd.Series({"Context": "Play-time"}) + result = bvp_module.get_task_from_excel_row(row) + assert result == "Playtime" # cleaned via make_bids_task_label + + def get_task_from_excel_row(self, row: pd.Series) -> None: + """Test get task from excel row with unknown context.""" + context = str(row.get("Context", "Other ")).strip() + result = bvp_module.make_bids_task_label(context) + assert result == "unknown" + + def test_extract_participant_id_from_folder_with_ames_prefix( + self, bvp_module: ModuleType + ) -> None: + """Test extract participant ID from folder with AMES prefix.""" + assert ( + bvp_module.extract_participant_id_from_folder("SOMETHING_AMES_123") == "123" + ) + + def test_extract_participant_id_edge_cases(self, bvp_module: ModuleType) -> None: + """Test extract participant ID edge cases.""" + assert ( + bvp_module.extract_participant_id_from_folder("ABC_AMES_456_extra_AMES") + == "456_extra_AMES" + ) + assert ( + bvp_module.extract_participant_id_from_folder("participant123") + == "participant123" + ) + assert ( + bvp_module.extract_participant_id_from_folder("AA_participant_123") == "123" + ) + + def test_determine_session_from_excel_timepoint_14( + self, bvp_module: ModuleType + ) -> None: + """Test determine session from excel with timepoint 14.""" + df = pd.DataFrame( + [{"ID": "001", "FileName": "video1.mp4", "timepoint": "14_month", "Age": 1}] + ) + session = bvp_module.determine_session_from_excel( + "/some/path/video1.mp4", df, "001" + ) + assert session == "01" + + def test_determine_session_from_excel_timepoint_36( + self, bvp_module: ModuleType + ) -> None: + """Test determine session from excel with timepoint 36.""" + df = pd.DataFrame( + [{"ID": "002", "FileName": "vid2.mov", "timepoint": "36months", "Age": 3}] + ) + session = bvp_module.determine_session_from_excel( + "/some/path/vid2.mov", df, "002" + ) + assert session == "02" + + def test_determine_session_from_excel_age_based( + self, bvp_module: ModuleType + ) -> None: + """Test determine session from excel.""" + df = pd.DataFrame( + [ + {"ID": "003", "FileName": "a.mp4", "timepoint": "unknown", "Age": 1.5}, + {"ID": "004", "FileName": "b.mp4", "timepoint": pd.NA, "Age": 3}, + ] + ) + s1 = bvp_module.determine_session_from_excel("/p/a.mp4", df, "003") + s2 = bvp_module.determine_session_from_excel("/p/b.mp4", df, "004") + assert s1 == "01" + assert s2 == "02" + + def test_determine_session_from_excel_participant_not_found( + self, bvp_module: ModuleType + ) -> None: + """Test determine session from excel with error in participant ID.""" + df = pd.DataFrame( + [{"ID": "999", "FileName": "x.mp4", "timepoint": "14", "Age": 1}] + ) + with pytest.raises(ValueError): + bvp_module.determine_session_from_excel("/p/y.mp4", df, "001") + + def test_determine_session_from_excel_file_not_found( + self, bvp_module: ModuleType + ) -> None: + """Test determine session from excel with missing excel.""" + df = pd.DataFrame( + [{"ID": "010", "FileName": "other.mp4", "timepoint": "14", "Age": 1}] + ) + with pytest.raises(ValueError): + bvp_module.determine_session_from_excel("/p/missing.mp4", df, "010") + + def test_determine_session_from_excel_unable_to_determine( + self, bvp_module: ModuleType + ) -> None: + """Test determine session timepoint does not match and age is NaN.""" + df = pd.DataFrame( + [{"ID": "030", "FileName": "u.mp4", "timepoint": "unk", "Age": pd.NA}] + ) + with pytest.raises(ValueError): + bvp_module.determine_session_from_excel("/p/u.mp4", df, "030") + class TestBIDSStructure: """Test BIDS directory structure creation and validation.""" @@ -119,6 +310,55 @@ def test_create_bids_filename(self, bvp_module: ModuleType) -> None: expected = "sub-123_ses-01_task-mealtime_run-01_beh.mp4" assert filename == expected + def test_get_next_run_number_no_dir( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test get_next_run_numberwhen no subject/session directory exists.""" + root = tmp_path + result = bvp_module.get_next_run_number("001", "01", "rest", str(root)) + assert result == 1 + + def test_get_next_run_number_empty_dir( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test get_next_run_number when runs already exist.""" + beh_dir = tmp_path / "sub-001" / "ses-01" / "beh" + beh_dir.mkdir(parents=True) + result = bvp_module.get_next_run_number("001", "01", "rest", str(tmp_path)) + assert result == 1 + + def test_get_next_run_number_with_existing_runs( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test get_next_run_number w existing runs.""" + beh_dir = tmp_path / "sub-001" / "ses-01" / "beh" + beh_dir.mkdir(parents=True) + # Simulate existing files + (beh_dir / "sub-001_ses-01_task-rest_run-1_beh.tsv").touch() + (beh_dir / "sub-001_ses-01_task-rest_run-2_beh.tsv").touch() + result = bvp_module.get_next_run_number("001", "01", "rest", str(tmp_path)) + assert result == 3 + + def test_get_next_run_number_with_invalid_and_no_run( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test get_next_run_number skips invalid filenames.""" + beh_dir = tmp_path / "sub-001" / "ses-01" / "beh" + beh_dir.mkdir(parents=True) + # One invalid, one missing run number + (beh_dir / "sub-001_ses-01_task-rest_run-abc_beh.tsv").touch() + (beh_dir / "sub-001_ses-01_task-rest_beh.tsv").touch() + result = bvp_module.get_next_run_number("001", "01", "rest", str(tmp_path)) + assert result == 2 # treated as next after run-1 + + def test_make_bids_task_label_sanitizes_name(self, bvp_module: ModuleType) -> None: + """Test make_bids_task_label correctly sanitizes and normalizes task names.""" + assert bvp_module.make_bids_task_label(" Task Rest ") == "TaskRest" + assert bvp_module.make_bids_task_label("run-01+") == "run01+" + assert bvp_module.make_bids_task_label("We!rd#Name$") == "WerdName" + assert bvp_module.make_bids_task_label("") == "" + assert bvp_module.make_bids_task_label(None) == "None" + def test_get_session_from_path_12_16_months(self, bvp_module: ModuleType) -> None: """Test session determination for 12-16 month videos.""" path = "12-16 month" @@ -135,6 +375,40 @@ def test_get_session_from_path_34_38_months(self, bvp_module: ModuleType) -> Non class TestVideoMetadataExtraction: """Test video metadata extraction and processing.""" + def test_parse_duration_various_formats(self, bvp_module: ModuleType) -> None: + """Test for various duration formats.""" + # Normal HH:MM:SS + assert math.isclose(bvp_module.parse_duration("01:02:03"), 3723.0) + # MM:SS format + assert math.isclose(bvp_module.parse_duration("05:30"), 330.0) + # Plain number string + assert math.isclose(bvp_module.parse_duration("12.5"), 12.5) + # Empty or NaN → 0.0 + assert bvp_module.parse_duration("") == 0.0 + assert bvp_module.parse_duration(np.nan) == 0.0 + # Invalid types → handled gracefully + assert bvp_module.parse_duration(None) == 0.0 + assert bvp_module.parse_duration("abc") == 0.0 + + def test_extract_exif_empty_file(self, bvp_module: ModuleType) -> None: + """Test video metadata extraction with empty file.""" + with patch("subprocess.run") as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "{}" # Empty JSON response + + result = bvp_module.extract_exif("empty.mp4") + assert result.get("duration_sec") == 0 + assert result.get("format") is None + + def test_extract_exif_corrupted_json(self, bvp_module: ModuleType) -> None: + """Test video metadata extraction with corrupted JSON output.""" + with patch("subprocess.run") as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "corrupted json" + + result = bvp_module.extract_exif("corrupt.mp4") + assert "error" in result + def test_extract_exif_success(self, bvp_module: ModuleType) -> None: """Test successful video metadata extraction.""" mock_metadata = { @@ -196,6 +470,65 @@ def test_stabilize_video( os.path.join("output/TEMP/task-01", "transforms.trf") ) + def test_stabilize_video_input_missing(self, bvp_module: ModuleType) -> None: + """Test video stabilization with missing input file.""" + with patch("os.path.exists", return_value=False): + with pytest.raises(FileNotFoundError): + bvp_module.stabilize_video("nonexistent.mp4", "output.mp4", "temp") + + @patch("subprocess.run") + @patch("os.path.exists") + def test_stabilize_video_vidstab_error( + self, + mock_exists: MagicMock, + mock_run: MagicMock, + bvp_module: ModuleType, + ) -> None: + """Test video stabilization with vidstab error.""" + mock_exists.return_value = True + mock_run.return_value.returncode = 1 + mock_run.return_value.stderr = "Error in vidstab" + + with pytest.raises(RuntimeError): + bvp_module.stabilize_video("input.mp4", "output.mp4", "temp") + + def test_get_video_properties_success( + self, monkeypatch: pytest.MonkeyPatch, bvp_module: ModuleType + ) -> None: + """Test video properties extraction success.""" + mock_cap = MagicMock() + mock_cap.isOpened.return_value = True + mock_cap.get.side_effect = [30.0, 1280.0, 720.0] + monkeypatch.setattr("cv2.VideoCapture", lambda _: mock_cap) + + props = bvp_module.get_video_properties("video.mp4") + assert props["FrameRate"] == 30.0 + assert props["Resolution"] == "1280x720" + + def test_get_video_properties_unopened( + self, monkeypatch: pytest.MonkeyPatch, bvp_module: ModuleType + ) -> None: + """Test video properties extraction with unopened video.""" + mock_cap = MagicMock() + mock_cap.isOpened.return_value = False + monkeypatch.setattr("cv2.VideoCapture", lambda _: mock_cap) + + props = bvp_module.get_video_properties("missing.mp4") + assert props == {"FrameRate": None, "Resolution": None} + + def test_get_video_properties_exception( + self, monkeypatch: pytest.MonkeyPatch, bvp_module: ModuleType + ) -> None: + """Test video properties extraction with OpenCV exception.""" + + def broken_videocap() -> None: + raise RuntimeError("OpenCV error") + + monkeypatch.setattr("cv2.VideoCapture", broken_videocap) + + props = bvp_module.get_video_properties("corrupt.mp4") + assert props == {"FrameRate": None, "Resolution": None} + @patch("subprocess.run") @patch("os.path.exists") def test_extract_audio( @@ -222,7 +555,7 @@ def test_extract_audio( class TestMetadataFileCreation: """Test creation of BIDS metadata files.""" - def test_create_events_tsv(self, bvp_module: ModuleType) -> None: + def test_create_events_file(self, bvp_module: ModuleType) -> None: """Test events TSV file creation.""" video_metadata = pd.DataFrame( [ @@ -341,6 +674,288 @@ def test_main_workflow( mock_process_videos.assert_called_once() +class TestExtendedFunctions: + """Additional unit tests for deeper functions and edge cases.""" + + def test_find_session_id_uses_folder_first(self, bvp_module: ModuleType) -> None: + """Should use folder-based session detection first.""" + mock_df = pd.DataFrame() # not used + + with ( + patch( + "sailsprep.BIDS_convertor.determine_session_from_folder", + return_value="01", + ) as mock_folder, + patch( + "sailsprep.BIDS_convertor.determine_session_from_excel" + ) as mock_excel, + ): + session = bvp_module.find_session_id( + directory="/data/participant/session01", + current_path="/data/participant/session01/video.mp4", + participant_path="/data/participant", + annotation_df=mock_df, + participant_id="001", + ) + + assert session == "01" + mock_folder.assert_called_once() + mock_excel.assert_not_called() + + def test_find_session_id_falls_back_to_folder_when_excel_fails( + self, bvp_module: ModuleType + ) -> None: + """Should fall back to Excel lookup when folder-based detection fails.""" + mock_df = pd.DataFrame() + with ( + patch( + "sailsprep.BIDS_convertor.determine_session_from_folder", + return_value=None, + ) as mock_folder, + patch( + "sailsprep.BIDS_convertor.determine_session_from_excel", + return_value="02", + ) as mock_excel, + ): + session = bvp_module.find_session_id( + directory="/data/participant/unknown_folder", + current_path="/data/participant/unknown_folder/video.mp4", + participant_path="/data/participant", + annotation_df=mock_df, + participant_id="001", + ) + + assert session == "02" + mock_folder.assert_called_once() + mock_excel.assert_called_once() + + def test_find_videos_recursive_collects_videos( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test find_videos_recursive function.""" + participant = tmp_path / "sub-ABC" + participant.mkdir() + (participant / "12-16_months").mkdir() + v1 = participant / "12-16_months" / "one.mp4" + v1.write_text("x") + (participant / "notes.txt").write_text("ignore") + + videos = bvp_module.find_videos_recursive( + str(participant), str(participant), pd.DataFrame(), "ABC" + ) + assert any(str(v1) == p for p, s in videos) + + def test_preprocess_video_success_creates_output( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Check that preprocess_video succeeds when all steps work.""" + input_file = tmp_path / "in.mp4" + input_file.write_bytes(b"video") + + temp_dir = tmp_path / "temp" + temp_dir.mkdir() + + # Pre-create stabilized temp file + stabilized_tmp = temp_dir / f"stabilized_temp_{os.getpid()}.mp4" + stabilized_tmp.write_bytes(b"stable") + + output_path = tmp_path / "out.mp4" + output_path.write_bytes(b"processed") + + # Patch stabilize_video and subprocess.run + with ( + patch("sailsprep.BIDS_convertor.stabilize_video", return_value=None), + patch("sailsprep.BIDS_convertor.subprocess.run") as mock_run, + ): + mock_run.return_value.returncode = 0 + mock_run.return_value.stderr = "" + + # Should not raise any error + bvp_module.preprocess_video( + str(input_file), str(output_path), str(temp_dir) + ) + + # ✅ Assert that output file exists and is non-empty + assert output_path.exists(), "Output video file should exist" + assert output_path.stat().st_size >= 0, "Output video file should not be empty" + + # ✅ Assert that stabilized temp file was cleaned up + assert ( + not stabilized_tmp.exists() + ), "Temporary stabilized file should be removed" + + # ✅ Verify that ffmpeg (subprocess) was called + mock_run.assert_called_once() + + def test_safe_float_conversion_various(self, bvp_module: ModuleType) -> None: + """Test function for the conversion of float.""" + assert bvp_module.safe_float_conversion(None) == "n/a" + assert bvp_module.safe_float_conversion("n/a") == "n/a" + assert bvp_module.safe_float_conversion("12.5") == 12.5 + assert bvp_module.safe_float_conversion(3) == 3.0 + assert bvp_module.safe_float_conversion("abc", default="-") == "-" + + def test_create_audio_metadata_json_calls_save_json( + self, bvp_module: ModuleType + ) -> None: + """Test audio metadata creation function.""" + with patch("sailsprep.BIDS_convertor.save_json") as mock_save_json: + bvp_module.create_audio_metadata_json( + 12.3, {"task_name": "t", "task_description": "blabla"}, "out.json" + ) + mock_save_json.assert_called_once() + args = mock_save_json.call_args[0] + assert args[0]["Duration"] == 12.3 + assert args[0]["TaskName"] == "t" + assert args[0]["TaskDescription"] == "blabla" + + def test_create_raw_video_json_saves_properties( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test raw video json creation function.""" + with ( + patch( + "sailsprep.BIDS_convertor.get_video_properties", + return_value={"FrameRate": 30.0, "Resolution": "1280x720"}, + ), + patch("sailsprep.BIDS_convertor.save_json") as mock_save, + ): + row = pd.Series( + { + "FileName": "a.mp4", + "Vid_duration": "00:01:00", + "Vid_date": "2020-01-01", + "timepoint": "14", + "SourceFile": "orig.mp4", + } + ) + + bvp_module.create_raw_video_json( + row, + {"task_name": "t", "context": "c", "activity": "a"}, + "somepath.mp4", + str(tmp_path / "raw.json"), + ) + + # Assert save_json was called once + mock_save.assert_called_once() + + # Extract the arguments used in the call + saved_data = mock_save.call_args[0][0] + + # Check that the metadata contains expected values + assert saved_data["TaskName"] == "t" + assert saved_data["FrameRate"] == 30.0 + assert saved_data["Resolution"] == "1280x720" + assert saved_data["OriginalFilename"] == "a.mp4" + assert saved_data["Context"] == "c" + assert saved_data["Activity"] == "a" + assert saved_data["TimePoint"] == "14" + assert saved_data["SourceFile"] == "orig.mp4" + assert ( + abs(saved_data["Duration"] - 60.0) < 1e-6 + ) # assuming parse_duration → seconds + + def test_create_participants_file_creates_expected_outputs( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test create participants.tsv function.""" + # Setup mock data + bids_root = tmp_path / "bids" + bids_root.mkdir() + (bids_root / "sub-101").mkdir() + (bids_root / "sub-102").mkdir() + + asd_file = tmp_path / "asd.xlsx" + df = pd.DataFrame({"ID": ["101", "102"], "Group": ["ASD", "Non-ASD"]}) + df.to_excel(asd_file, index=True) + + bvp_module.create_participants_file(str(bids_root), str(asd_file)) + + # Assertions + tsv_path = bids_root / "participants.tsv" + json_path = bids_root / "participants.json" + assert tsv_path.exists() + assert json_path.exists() + + df_out = pd.read_csv(tsv_path, sep="\t") + print(df_out) + assert set(df_out["participant_id"]) == {"sub-101", "sub-102"} + assert set(df_out["group"]) == {"ASD", "Non-ASD"} + + def test_print_summary_outputs_expected( + self, capsys: pytest.CaptureFixture[str], bvp_module: ModuleType + ) -> None: + """Test the summary printer function.""" + processed = [ + { + "task_label": "a", + "participant_id": "p1", + "session_id": "01", + "duration_sec": 60, + "has_excel_data": True, + }, + { + "task_label": "b", + "participant_id": "p2", + "session_id": "02", + "duration_sec": 120, + "has_excel_data": False, + }, + ] + failed = [{"video": "x", "error": "boom"}] + bvp_module.print_summary(processed, failed) + captured = capsys.readouterr() + assert "Successfully processed: 2 videos" in captured.out + assert "Failed to process: 1 videos" in captured.out + + def test_merge_subjects_merges_and_removes( + self, tmp_path: Path, bvp_module: ModuleType + ) -> None: + """Test merge subjects function.""" + # Prepare FINAL_BIDS_ROOT and derivatives paths + root = tmp_path / "bids" + deriv = root / "derivatives" / "preprocessed" + (root).mkdir(parents=True) + (deriv).mkdir(parents=True) + + # Create original and duplicate subject folders + orig = root / "sub-200" + dup = root / "sub-200 2" + orig.mkdir() + dup.mkdir() + # Add file to dup that should be moved + (dup / "file.txt").write_text("hello") + + # Run merge_subjects + bvp_module.merge_subjects(str(root)) + + # After merge, duplicate folder should not exist + assert not dup.exists() + + +class TestProcessSingleVideo: + """Test the process_single_video function.""" + + def test_process_single_video_empty_info(self, bvp_module: ModuleType) -> None: + """Test the processing of single video with empty information.""" + result, error = bvp_module.process_single_video( + {}, pd.DataFrame(), "root", "deriv", "tmp" + ) + assert result is None + assert isinstance(error, dict) + assert "video_info is empty" in error["error"] + + def test_process_single_video_missing_keys(self, bvp_module: ModuleType) -> None: + """Test the processing of single video with missing information.""" + video_info = {"filename": "f.mp4"} # missing participant_id, etc. + result, error = bvp_module.process_single_video( + video_info, pd.DataFrame(), "root", "deriv", "tmp" + ) + assert result is None + assert "Missing required video_info keys" in error["error"] + + # Test fixtures for reusable data @pytest.fixture def sample_demographics() -> pd.DataFrame: From 5117e7409b4a9f1e2b3bc8a487715e3aada0a36d Mon Sep 17 00:00:00 2001 From: lucie271 Date: Wed, 5 Nov 2025 16:43:06 -0500 Subject: [PATCH 35/36] Change number of array --- jobs/run_bids_convertor.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobs/run_bids_convertor.sh b/jobs/run_bids_convertor.sh index 3fe666c..b85d5f6 100644 --- a/jobs/run_bids_convertor.sh +++ b/jobs/run_bids_convertor.sh @@ -1,7 +1,7 @@ #!/bin/bash #SBATCH --job-name=bids_processing #SBATCH --partition=mit_normal -#SBATCH --array=0-19 +#SBATCH --array=0-18 #SBATCH --output=logs/bids_%A_%a.out #SBATCH --error=logs/bids_%A_%a.err #SBATCH --mem=5G From 78e611ec108a8284370fd744764a260181c7019c Mon Sep 17 00:00:00 2001 From: lucie271 Date: Thu, 6 Nov 2025 12:25:03 -0500 Subject: [PATCH 36/36] Fixed error mypy --- src/tests/test_BIDS_convertor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/test_BIDS_convertor.py b/src/tests/test_BIDS_convertor.py index 073ee01..19b7567 100644 --- a/src/tests/test_BIDS_convertor.py +++ b/src/tests/test_BIDS_convertor.py @@ -165,7 +165,7 @@ def test_get_task_from_excel_row_valid_context( result = bvp_module.get_task_from_excel_row(row) assert result == "Playtime" # cleaned via make_bids_task_label - def get_task_from_excel_row(self, row: pd.Series) -> None: + def get_task_from_excel_row(self, row: pd.Series, bvp_module: ModuleType) -> None: """Test get task from excel row with unknown context.""" context = str(row.get("Context", "Other ")).strip() result = bvp_module.make_bids_task_label(context)