1
+ """
2
+ Generic temporal annotation processor for frame-based media (video, audio)
3
+ """
4
+ from abc import ABC , abstractmethod
5
+ from collections import defaultdict
6
+ from typing import Any , Dict , Generator , List , Union
7
+
8
+ from ...annotation_types .annotation import ClassificationAnnotation , ObjectAnnotation
9
+ from ...annotation_types .label import Label
10
+ from .classification import NDClassificationType , NDClassification
11
+ from .objects import NDObject
12
+
13
+
14
+ class TemporalAnnotationProcessor (ABC ):
15
+ """Abstract base class for processing temporal annotations (video, audio, etc.)"""
16
+
17
+ @abstractmethod
18
+ def get_annotation_types (self ) -> tuple :
19
+ """Return tuple of annotation types this processor handles"""
20
+ pass
21
+
22
+ @abstractmethod
23
+ def should_group_annotations (self , annotation_group : List ) -> bool :
24
+ """Determine if annotations should be grouped into one feature"""
25
+ pass
26
+
27
+ @abstractmethod
28
+ def build_frame_data (self , annotation_group : List ) -> List [Dict [str , Any ]]:
29
+ """Extract frame data from annotation group"""
30
+ pass
31
+
32
+ @abstractmethod
33
+ def prepare_grouped_content (self , annotation_group : List ) -> Any :
34
+ """Prepare content for grouped annotations (may modify annotation.value)"""
35
+ pass
36
+
37
+ def process_annotations (self , label : Label ) -> Generator [Union [NDClassificationType , Any ], None , None ]:
38
+ """Main processing method - generic for all temporal media"""
39
+ temporal_annotations = defaultdict (list )
40
+ classification_types , object_types = self .get_annotation_types ()
41
+
42
+ # Group annotations by feature name/schema
43
+ for annot in label .annotations :
44
+ if isinstance (annot , classification_types + object_types ):
45
+ temporal_annotations [annot .feature_schema_id or annot .name ].append (annot )
46
+
47
+ # Process each group
48
+ for annotation_group in temporal_annotations .values ():
49
+ if isinstance (annotation_group [0 ], classification_types ):
50
+ yield from self ._process_classification_group (annotation_group , label .data )
51
+ elif isinstance (annotation_group [0 ], object_types ):
52
+ yield from self ._process_object_group (annotation_group , label .data )
53
+
54
+ def _process_classification_group (self , annotation_group , data ):
55
+ """Process classification annotations"""
56
+ if self .should_group_annotations (annotation_group ):
57
+ # Group into single feature with multiple keyframes
58
+ annotation = annotation_group [0 ] # Use first as template
59
+
60
+ # Build frame data
61
+ frames_data = self .build_frame_data (annotation_group )
62
+
63
+ # Prepare content (may modify annotation.value)
64
+ self .prepare_grouped_content (annotation_group )
65
+
66
+ # Update with frame data
67
+ annotation .extra = {"frames" : frames_data }
68
+ yield NDClassification .from_common (annotation , data )
69
+ else :
70
+ # Process individually
71
+ for annotation in annotation_group :
72
+ frames_data = self .build_frame_data ([annotation ])
73
+ if frames_data :
74
+ if not annotation .extra :
75
+ annotation .extra = {}
76
+ annotation .extra .update ({"frames" : frames_data })
77
+ yield NDClassification .from_common (annotation , data )
78
+
79
+ def _process_object_group (self , annotation_group , data ):
80
+ """Process object annotations - default to individual processing"""
81
+ for annotation in annotation_group :
82
+ yield NDObject .from_common (annotation , data )
83
+
84
+
85
+ class AudioTemporalProcessor (TemporalAnnotationProcessor ):
86
+ """Processor for audio temporal annotations"""
87
+
88
+ def __init__ (self ,
89
+ group_text_annotations : bool = True ,
90
+ enable_token_mapping : bool = True ):
91
+ self .group_text_annotations = group_text_annotations
92
+ self .enable_token_mapping = enable_token_mapping
93
+
94
+ def get_annotation_types (self ) -> tuple :
95
+ from ...annotation_types .audio import AudioClassificationAnnotation , AudioObjectAnnotation
96
+ return (AudioClassificationAnnotation ,), (AudioObjectAnnotation ,)
97
+
98
+ def should_group_annotations (self , annotation_group : List ) -> bool :
99
+ """Group TEXT classifications with multiple temporal instances"""
100
+ if not self .group_text_annotations :
101
+ return False
102
+
103
+ from ...annotation_types .classification .classification import Text
104
+ return (isinstance (annotation_group [0 ].value , Text ) and
105
+ len (annotation_group ) > 1 and
106
+ all (hasattr (ann , 'frame' ) for ann in annotation_group ))
107
+
108
+ def build_frame_data (self , annotation_group : List ) -> List [Dict [str , Any ]]:
109
+ """Extract frame ranges from audio annotations"""
110
+ frames_data = []
111
+ for annotation in annotation_group :
112
+ if hasattr (annotation , 'frame' ):
113
+ frame = annotation .frame
114
+ end_frame = (annotation .end_frame
115
+ if hasattr (annotation , 'end_frame' ) and annotation .end_frame is not None
116
+ else frame )
117
+ frames_data .append ({"start" : frame , "end" : end_frame })
118
+ return frames_data
119
+
120
+ def prepare_grouped_content (self , annotation_group : List ) -> None :
121
+ """Prepare content for grouped audio annotations"""
122
+ from ...annotation_types .classification .classification import Text
123
+
124
+ if not isinstance (annotation_group [0 ].value , Text ) or not self .enable_token_mapping :
125
+ return
126
+
127
+ # Build token mapping for TEXT annotations
128
+ import json
129
+
130
+ all_content = [ann .value .answer for ann in annotation_group ]
131
+ token_mapping = {str (ann .frame ): ann .value .answer for ann in annotation_group }
132
+
133
+ content_structure = json .dumps ({
134
+ "default_text" : " " .join (all_content ),
135
+ "token_mapping" : token_mapping
136
+ })
137
+
138
+ # Update the template annotation
139
+ annotation_group [0 ].value = Text (answer = content_structure )
140
+
141
+
142
+ class VideoTemporalProcessor (TemporalAnnotationProcessor ):
143
+ """Processor for video temporal annotations - matches existing behavior"""
144
+
145
+ def get_annotation_types (self ) -> tuple :
146
+ from ...annotation_types .video import VideoClassificationAnnotation , VideoObjectAnnotation
147
+ return (VideoClassificationAnnotation ,), (VideoObjectAnnotation ,)
148
+
149
+ def should_group_annotations (self , annotation_group : List ) -> bool :
150
+ """Video always groups by segment ranges"""
151
+ return True
152
+
153
+ def build_frame_data (self , annotation_group : List ) -> List [Dict [str , Any ]]:
154
+ """Build frame data using existing video segment logic"""
155
+ from .label import NDLabel # Import here to avoid circular import
156
+
157
+ segment_frame_ranges = NDLabel ._get_segment_frame_ranges (annotation_group )
158
+ return [{"start" : frames [0 ], "end" : frames [- 1 ]} for frames in segment_frame_ranges ]
159
+
160
+ def prepare_grouped_content (self , annotation_group : List ) -> None :
161
+ """Video doesn't modify content - uses existing value"""
162
+ pass
163
+
164
+ def _process_object_group (self , annotation_group , data ):
165
+ """Video objects use segment-based processing"""
166
+ from .label import NDLabel
167
+
168
+ segment_frame_ranges = NDLabel ._get_segment_frame_ranges (annotation_group )
169
+ segments = []
170
+ for start_frame , end_frame in segment_frame_ranges :
171
+ segment = []
172
+ for annotation in annotation_group :
173
+ if (annotation .keyframe and
174
+ start_frame <= annotation .frame <= end_frame ):
175
+ segment .append (annotation )
176
+ segments .append (segment )
177
+ yield NDObject .from_common (segments , data )
0 commit comments