Skip to content

Commit 7a666cc

Browse files
chore: clean up and organize code
1 parent 16896fd commit 7a666cc

File tree

4 files changed

+198
-103
lines changed

4 files changed

+198
-103
lines changed

libs/labelbox/src/labelbox/data/serialization/ndjson/label.py

Lines changed: 17 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
VideoMaskAnnotation,
2525
VideoObjectAnnotation,
2626
)
27+
from typing import List
2728
from ...annotation_types.audio import (
2829
AudioClassificationAnnotation,
2930
AudioObjectAnnotation,
@@ -128,120 +129,36 @@ def _get_segment_frame_ranges(
128129
def _create_video_annotations(
129130
cls, label: Label
130131
) -> Generator[Union[NDChecklistSubclass, NDRadioSubclass], None, None]:
131-
video_annotations = defaultdict(list)
132+
# Handle video mask annotations separately (special case)
132133
for annot in label.annotations:
133-
if isinstance(
134-
annot, (VideoClassificationAnnotation, VideoObjectAnnotation)
135-
):
136-
video_annotations[annot.feature_schema_id or annot.name].append(
137-
annot
138-
)
139-
elif isinstance(annot, VideoMaskAnnotation):
134+
if isinstance(annot, VideoMaskAnnotation):
140135
yield NDObject.from_common(annotation=annot, data=label.data)
141-
142-
for annotation_group in video_annotations.values():
143-
segment_frame_ranges = cls._get_segment_frame_ranges(
144-
annotation_group
145-
)
146-
if isinstance(annotation_group[0], VideoClassificationAnnotation):
147-
annotation = annotation_group[0]
148-
frames_data = []
149-
for frames in segment_frame_ranges:
150-
frames_data.append({"start": frames[0], "end": frames[-1]})
151-
annotation.extra.update({"frames": frames_data})
152-
yield NDClassification.from_common(annotation, label.data)
153-
154-
elif isinstance(annotation_group[0], VideoObjectAnnotation):
155-
segments = []
156-
for start_frame, end_frame in segment_frame_ranges:
157-
segment = []
158-
for annotation in annotation_group:
159-
if (
160-
annotation.keyframe
161-
and start_frame <= annotation.frame <= end_frame
162-
):
163-
segment.append(annotation)
164-
segments.append(segment)
165-
yield NDObject.from_common(segments, label.data)
136+
137+
# Use temporal processor for video classifications and objects
138+
from .utils.temporal_processor import VideoTemporalProcessor
139+
processor = VideoTemporalProcessor()
140+
yield from processor.process_annotations(label)
166141

167142
@classmethod
168143
def _create_audio_annotations(
169144
cls, label: Label
170145
) -> Generator[Union[NDChecklistSubclass, NDRadioSubclass], None, None]:
171-
"""Create audio annotations
146+
"""Create audio annotations using generic temporal processor
172147
173148
Args:
174149
label: Label containing audio annotations to be processed
175150
176151
Yields:
177152
NDClassification or NDObject: Audio annotations in NDJSON format
178153
"""
179-
audio_annotations = defaultdict(list)
180-
for annot in label.annotations:
181-
if isinstance(
182-
annot, (AudioClassificationAnnotation, AudioObjectAnnotation)
183-
):
184-
audio_annotations[annot.feature_schema_id or annot.name].append(
185-
annot
186-
)
187-
188-
for annotation_group in audio_annotations.values():
189-
if isinstance(annotation_group[0], AudioClassificationAnnotation):
190-
# For TEXT classifications, group them into one feature with multiple keyframes
191-
from ...annotation_types.classification.classification import Text
192-
if isinstance(annotation_group[0].value, Text):
193-
194-
# Group all annotations into one feature with multiple keyframes
195-
# Use first annotation as template but create combined content
196-
annotation = annotation_group[0]
197-
frames_data = []
198-
all_tokens = []
199-
200-
for individual_annotation in annotation_group:
201-
frame = individual_annotation.frame
202-
end_frame = individual_annotation.end_frame if hasattr(individual_annotation, 'end_frame') and individual_annotation.end_frame is not None else frame
203-
frames_data.append({"start": frame, "end": end_frame})
204-
all_tokens.append(individual_annotation.value.answer)
205-
206-
# For per-token annotations, embed token mapping in the content
207-
# Create a JSON structure that includes both the default text and token mapping
208-
import json
209-
token_mapping = {}
210-
for individual_annotation in annotation_group:
211-
frame = individual_annotation.frame
212-
token_mapping[str(frame)] = individual_annotation.value.answer
213-
214-
# Embed token mapping in the answer field as JSON
215-
content_with_mapping = {
216-
"default_text": " ".join(all_tokens), # Fallback text
217-
"token_mapping": token_mapping # Per-keyframe content
218-
}
219-
from ...annotation_types.classification.classification import Text
220-
annotation.value = Text(answer=json.dumps(content_with_mapping))
221-
222-
# Update the annotation with frames data
223-
annotation.extra = {"frames": frames_data}
224-
yield NDClassification.from_common(annotation, label.data)
225-
else:
226-
# For non-TEXT classifications, process each individually
227-
for annotation in annotation_group:
228-
229-
# Ensure frame data is properly formatted in extra field
230-
if hasattr(annotation, 'frame') and annotation.frame is not None:
231-
if not annotation.extra:
232-
annotation.extra = {}
233-
234-
if 'frames' not in annotation.extra:
235-
end_frame = annotation.end_frame if hasattr(annotation, 'end_frame') and annotation.end_frame is not None else annotation.frame
236-
frames_data = [{"start": annotation.frame, "end": end_frame}]
237-
annotation.extra.update({"frames": frames_data})
238-
239-
yield NDClassification.from_common(annotation, label.data)
240-
241-
elif isinstance(annotation_group[0], AudioObjectAnnotation):
242-
# For audio objects, treat like single video frame
243-
annotation = annotation_group[0]
244-
yield NDObject.from_common(annotation, label.data)
154+
from .utils.temporal_processor import AudioTemporalProcessor
155+
156+
# Use processor with configurable behavior
157+
processor = AudioTemporalProcessor(
158+
group_text_annotations=True, # Group multiple TEXT annotations into one feature
159+
enable_token_mapping=True # Enable per-keyframe token content
160+
)
161+
yield from processor.process_annotations(label)
245162

246163
@classmethod
247164
def _create_non_video_annotations(cls, label: Label):

libs/labelbox/src/labelbox/data/serialization/ndjson/objects.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,7 @@ def from_common(
748748
return obj.from_common(annotation, data)
749749
elif isinstance(annotation, AudioObjectAnnotation):
750750
# Handle audio object annotation like single video frame
751-
return cls._handle_single_audio_annotation(annotation, data)
751+
return cls._serialize_audio_object_annotation(annotation, data)
752752

753753
subclasses = [
754754
NDSubclassification.from_common(annot)
@@ -773,8 +773,8 @@ def from_common(
773773
)
774774

775775
@classmethod
776-
def _handle_single_audio_annotation(cls, annotation: AudioObjectAnnotation, data: GenericDataRowData):
777-
"""Handle single audio annotation like video frame
776+
def _serialize_audio_object_annotation(cls, annotation: AudioObjectAnnotation, data: GenericDataRowData):
777+
"""Serialize audio object annotation with temporal information
778778
779779
Args:
780780
annotation: Audio object annotation to process
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Utils package for NDJSON serialization helpers
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
"""
2+
Generic temporal annotation processor for frame-based media (video, audio)
3+
"""
4+
from abc import ABC, abstractmethod
5+
from collections import defaultdict
6+
from typing import Any, Dict, Generator, List, Union
7+
8+
from ...annotation_types.annotation import ClassificationAnnotation, ObjectAnnotation
9+
from ...annotation_types.label import Label
10+
from .classification import NDClassificationType, NDClassification
11+
from .objects import NDObject
12+
13+
14+
class TemporalAnnotationProcessor(ABC):
15+
"""Abstract base class for processing temporal annotations (video, audio, etc.)"""
16+
17+
@abstractmethod
18+
def get_annotation_types(self) -> tuple:
19+
"""Return tuple of annotation types this processor handles"""
20+
pass
21+
22+
@abstractmethod
23+
def should_group_annotations(self, annotation_group: List) -> bool:
24+
"""Determine if annotations should be grouped into one feature"""
25+
pass
26+
27+
@abstractmethod
28+
def build_frame_data(self, annotation_group: List) -> List[Dict[str, Any]]:
29+
"""Extract frame data from annotation group"""
30+
pass
31+
32+
@abstractmethod
33+
def prepare_grouped_content(self, annotation_group: List) -> Any:
34+
"""Prepare content for grouped annotations (may modify annotation.value)"""
35+
pass
36+
37+
def process_annotations(self, label: Label) -> Generator[Union[NDClassificationType, Any], None, None]:
38+
"""Main processing method - generic for all temporal media"""
39+
temporal_annotations = defaultdict(list)
40+
classification_types, object_types = self.get_annotation_types()
41+
42+
# Group annotations by feature name/schema
43+
for annot in label.annotations:
44+
if isinstance(annot, classification_types + object_types):
45+
temporal_annotations[annot.feature_schema_id or annot.name].append(annot)
46+
47+
# Process each group
48+
for annotation_group in temporal_annotations.values():
49+
if isinstance(annotation_group[0], classification_types):
50+
yield from self._process_classification_group(annotation_group, label.data)
51+
elif isinstance(annotation_group[0], object_types):
52+
yield from self._process_object_group(annotation_group, label.data)
53+
54+
def _process_classification_group(self, annotation_group, data):
55+
"""Process classification annotations"""
56+
if self.should_group_annotations(annotation_group):
57+
# Group into single feature with multiple keyframes
58+
annotation = annotation_group[0] # Use first as template
59+
60+
# Build frame data
61+
frames_data = self.build_frame_data(annotation_group)
62+
63+
# Prepare content (may modify annotation.value)
64+
self.prepare_grouped_content(annotation_group)
65+
66+
# Update with frame data
67+
annotation.extra = {"frames": frames_data}
68+
yield NDClassification.from_common(annotation, data)
69+
else:
70+
# Process individually
71+
for annotation in annotation_group:
72+
frames_data = self.build_frame_data([annotation])
73+
if frames_data:
74+
if not annotation.extra:
75+
annotation.extra = {}
76+
annotation.extra.update({"frames": frames_data})
77+
yield NDClassification.from_common(annotation, data)
78+
79+
def _process_object_group(self, annotation_group, data):
80+
"""Process object annotations - default to individual processing"""
81+
for annotation in annotation_group:
82+
yield NDObject.from_common(annotation, data)
83+
84+
85+
class AudioTemporalProcessor(TemporalAnnotationProcessor):
86+
"""Processor for audio temporal annotations"""
87+
88+
def __init__(self,
89+
group_text_annotations: bool = True,
90+
enable_token_mapping: bool = True):
91+
self.group_text_annotations = group_text_annotations
92+
self.enable_token_mapping = enable_token_mapping
93+
94+
def get_annotation_types(self) -> tuple:
95+
from ...annotation_types.audio import AudioClassificationAnnotation, AudioObjectAnnotation
96+
return (AudioClassificationAnnotation,), (AudioObjectAnnotation,)
97+
98+
def should_group_annotations(self, annotation_group: List) -> bool:
99+
"""Group TEXT classifications with multiple temporal instances"""
100+
if not self.group_text_annotations:
101+
return False
102+
103+
from ...annotation_types.classification.classification import Text
104+
return (isinstance(annotation_group[0].value, Text) and
105+
len(annotation_group) > 1 and
106+
all(hasattr(ann, 'frame') for ann in annotation_group))
107+
108+
def build_frame_data(self, annotation_group: List) -> List[Dict[str, Any]]:
109+
"""Extract frame ranges from audio annotations"""
110+
frames_data = []
111+
for annotation in annotation_group:
112+
if hasattr(annotation, 'frame'):
113+
frame = annotation.frame
114+
end_frame = (annotation.end_frame
115+
if hasattr(annotation, 'end_frame') and annotation.end_frame is not None
116+
else frame)
117+
frames_data.append({"start": frame, "end": end_frame})
118+
return frames_data
119+
120+
def prepare_grouped_content(self, annotation_group: List) -> None:
121+
"""Prepare content for grouped audio annotations"""
122+
from ...annotation_types.classification.classification import Text
123+
124+
if not isinstance(annotation_group[0].value, Text) or not self.enable_token_mapping:
125+
return
126+
127+
# Build token mapping for TEXT annotations
128+
import json
129+
130+
all_content = [ann.value.answer for ann in annotation_group]
131+
token_mapping = {str(ann.frame): ann.value.answer for ann in annotation_group}
132+
133+
content_structure = json.dumps({
134+
"default_text": " ".join(all_content),
135+
"token_mapping": token_mapping
136+
})
137+
138+
# Update the template annotation
139+
annotation_group[0].value = Text(answer=content_structure)
140+
141+
142+
class VideoTemporalProcessor(TemporalAnnotationProcessor):
143+
"""Processor for video temporal annotations - matches existing behavior"""
144+
145+
def get_annotation_types(self) -> tuple:
146+
from ...annotation_types.video import VideoClassificationAnnotation, VideoObjectAnnotation
147+
return (VideoClassificationAnnotation,), (VideoObjectAnnotation,)
148+
149+
def should_group_annotations(self, annotation_group: List) -> bool:
150+
"""Video always groups by segment ranges"""
151+
return True
152+
153+
def build_frame_data(self, annotation_group: List) -> List[Dict[str, Any]]:
154+
"""Build frame data using existing video segment logic"""
155+
from .label import NDLabel # Import here to avoid circular import
156+
157+
segment_frame_ranges = NDLabel._get_segment_frame_ranges(annotation_group)
158+
return [{"start": frames[0], "end": frames[-1]} for frames in segment_frame_ranges]
159+
160+
def prepare_grouped_content(self, annotation_group: List) -> None:
161+
"""Video doesn't modify content - uses existing value"""
162+
pass
163+
164+
def _process_object_group(self, annotation_group, data):
165+
"""Video objects use segment-based processing"""
166+
from .label import NDLabel
167+
168+
segment_frame_ranges = NDLabel._get_segment_frame_ranges(annotation_group)
169+
segments = []
170+
for start_frame, end_frame in segment_frame_ranges:
171+
segment = []
172+
for annotation in annotation_group:
173+
if (annotation.keyframe and
174+
start_frame <= annotation.frame <= end_frame):
175+
segment.append(annotation)
176+
segments.append(segment)
177+
yield NDObject.from_common(segments, data)

0 commit comments

Comments
 (0)