diff --git a/examples/README.md b/examples/README.md
index 924d1017d..842286b2d 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -16,15 +16,25 @@
+
+ Projects |
+  |
+  |
+
Ontologies |
 |
 |
- Quick Start |
-  |
-  |
+ Batches |
+  |
+  |
+
+
+ Custom Embeddings |
+  |
+  |
Data Rows |
@@ -37,25 +47,15 @@
 |
- Batches |
-  |
-  |
-
-
- Projects |
-  |
-  |
+ Quick Start |
+  |
+  |
Data Row Metadata |
 |
 |
-
- Custom Embeddings |
-  |
-  |
-
User Management |
 |
@@ -75,25 +75,25 @@
+
+ Export Data |
+  |
+  |
+
Export V1 to V2 Migration Support |
 |
 |
-
- Exporting to CSV |
-  |
-  |
-
Composite Mask Export |
 |
 |
- Export Data |
-  |
-  |
+ Exporting to CSV |
+  |
+  |
@@ -143,36 +143,11 @@
-
- Tiled |
-  |
-  |
-
Text |
 |
 |
-
- PDF |
-  |
-  |
-
-
- Video |
-  |
-  |
-
-
- Audio |
-  |
-  |
-
-
- Conversational |
-  |
-  |
-
HTML |
 |
@@ -188,11 +163,36 @@
 |
 |
+
+ Video |
+  |
+  |
+
+
+ Audio |
+  |
+  |
+
Conversational LLM |
 |
 |
+
+ Tiled |
+  |
+  |
+
+
+ PDF |
+  |
+  |
+
+
+ Conversational |
+  |
+  |
+
@@ -208,9 +208,9 @@
- Langchain |
-  |
-  |
+ Meta SAM |
+  |
+  |
Meta SAM Video |
@@ -218,20 +218,20 @@
 |
- Meta SAM |
-  |
-  |
+ Huggingface Custom Embeddings |
+  |
+  |
+
+
+ Langchain |
+  |
+  |
Import YOLOv8 Annotations |
 |
 |
-
- Huggingface Custom Embeddings |
-  |
-  |
-
@@ -247,25 +247,25 @@
- Model Predictions to Project |
-  |
-  |
+ Custom Metrics Basics |
+  |
+  |
Custom Metrics Demo |
 |
 |
-
- Custom Metrics Basics |
-  |
-  |
-
Model Slices |
 |
 |
+
+ Model Predictions to Project |
+  |
+  |
+
@@ -280,25 +280,15 @@
-
- HTML Predictions |
-  |
-  |
-
Text Predictions |
 |
 |
- Video Predictions |
-  |
-  |
-
-
- Conversational Predictions |
-  |
-  |
+ PDF Predictions |
+  |
+  |
Geospatial Predictions |
@@ -306,9 +296,14 @@
 |
- PDF Predictions |
-  |
-  |
+ Conversational Predictions |
+  |
+  |
+
+
+ Video Predictions |
+  |
+  |
Image Predictions |
@@ -320,6 +315,11 @@
 |
 |
+
+ HTML Predictions |
+  |
+  |
+
diff --git a/examples/annotation_import/audio.ipynb b/examples/annotation_import/audio.ipynb
index 437130a9e..4f20127ee 100644
--- a/examples/annotation_import/audio.ipynb
+++ b/examples/annotation_import/audio.ipynb
@@ -27,6 +27,30 @@
],
"cell_type": "markdown"
},
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " | \n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " | \n",
+ "\n",
+ "\n",
+ " \n",
+ " | "
+ ],
+ "cell_type": "markdown"
+ },
{
"metadata": {},
"source": [
@@ -170,7 +194,7 @@
},
{
"metadata": {},
- "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)",
+ "source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n # Temporal classification for token-level annotations\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"User Speaker\",\n scope=lb.Classification.Scope.INDEX, # INDEX scope for temporal\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)",
"cell_type": "code",
"outputs": [],
"execution_count": null
@@ -223,6 +247,27 @@
],
"cell_type": "markdown"
},
+ {
+ "metadata": {},
+ "source": [
+ "\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": "",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
{
"metadata": {},
"source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation],\n ))",
@@ -252,6 +297,29 @@
],
"cell_type": "markdown"
},
+ {
+ "metadata": {},
+ "source": [
+ "## Temporal Audio Annotations\n",
+ "\n",
+ "You can create temporal annotations for individual tokens (words) with precise timing:\n"
+ ],
+ "cell_type": "markdown"
+ },
+ {
+ "metadata": {},
+ "source": "# Define tokens with precise timing (from demo script)\ntokens_data = [\n (\"Hello\", 586, 770), # Hello: frames 586-770\n (\"AI\", 771, 955), # AI: frames 771-955\n (\"how\", 956, 1140), # how: frames 956-1140\n (\"are\", 1141, 1325), # are: frames 1141-1325\n (\"you\", 1326, 1510), # you: frames 1326-1510\n (\"doing\", 1511, 1695), # doing: frames 1511-1695\n (\"today\", 1696, 1880), # today: frames 1696-1880\n]\n\n# Create temporal annotations for each token\ntemporal_annotations = []\nfor token, start_frame, end_frame in tokens_data:\n token_annotation = lb_types.AudioClassificationAnnotation(\n frame=start_frame,\n end_frame=end_frame,\n name=\"User Speaker\",\n value=lb_types.Text(answer=token),\n )\n temporal_annotations.append(token_annotation)\n\nprint(f\"Created {len(temporal_annotations)} temporal token annotations\")",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
+ {
+ "metadata": {},
+ "source": "# Create label with both regular and temporal annotations\nlabel_with_temporal = []\nlabel_with_temporal.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation] +\n temporal_annotations,\n ))\n\nprint(\n f\"Created label with {len(label_with_temporal[0].annotations)} total annotations\"\n)\nprint(f\" - Regular annotations: 3\")\nprint(f\" - Temporal annotations: {len(temporal_annotations)}\")",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
{
"metadata": {},
"source": [
@@ -260,6 +328,13 @@
],
"cell_type": "markdown"
},
+ {
+ "metadata": {},
+ "source": "# Upload temporal annotations via MAL\ntemporal_upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"temporal_mal_job-{str(uuid.uuid4())}\",\n predictions=label_with_temporal,\n)\n\ntemporal_upload_job.wait_until_done()\nprint(\"Temporal upload completed!\")\nprint(\"Errors:\", temporal_upload_job.errors)\nprint(\"Status:\", temporal_upload_job.statuses)",
+ "cell_type": "code",
+ "outputs": [],
+ "execution_count": null
+ },
{
"metadata": {},
"source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py
index fc75652cf..9f59b5197 100644
--- a/libs/labelbox/src/labelbox/data/annotation_types/__init__.py
+++ b/libs/labelbox/src/labelbox/data/annotation_types/__init__.py
@@ -19,6 +19,8 @@
from .video import MaskInstance
from .video import VideoMaskAnnotation
+from .audio import AudioClassificationAnnotation
+
from .ner import ConversationEntity
from .ner import DocumentEntity
from .ner import DocumentTextSelection
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/audio.py b/libs/labelbox/src/labelbox/data/annotation_types/audio.py
new file mode 100644
index 000000000..c86fba668
--- /dev/null
+++ b/libs/labelbox/src/labelbox/data/annotation_types/audio.py
@@ -0,0 +1,37 @@
+from typing import Optional
+from pydantic import Field, AliasChoices
+
+from labelbox.data.annotation_types.annotation import (
+ ClassificationAnnotation,
+)
+
+
+class AudioClassificationAnnotation(ClassificationAnnotation):
+ """Audio classification for specific time range
+
+ Examples:
+ - Speaker identification from 2500ms to 4100ms
+ - Audio quality assessment for a segment
+ - Language detection for audio segments
+
+ Args:
+ name (Optional[str]): Name of the classification
+ feature_schema_id (Optional[Cuid]): Feature schema identifier
+ value (Union[Text, Checklist, Radio]): Classification value
+ start_frame (int): The frame index in milliseconds (e.g., 2500 = 2.5 seconds)
+ end_frame (Optional[int]): End frame in milliseconds (for time ranges)
+ segment_index (Optional[int]): Index of audio segment this annotation belongs to
+ extra (Dict[str, Any]): Additional metadata
+ """
+
+ start_frame: int = Field(
+ validation_alias=AliasChoices("start_frame", "frame"),
+ serialization_alias="start_frame",
+ )
+ end_frame: Optional[int] = Field(
+ default=None,
+ validation_alias=AliasChoices("end_frame", "endFrame"),
+ serialization_alias="end_frame",
+ )
+ segment_index: Optional[int] = None
+
diff --git a/libs/labelbox/src/labelbox/data/annotation_types/label.py b/libs/labelbox/src/labelbox/data/annotation_types/label.py
index d13fb8f20..228512a5d 100644
--- a/libs/labelbox/src/labelbox/data/annotation_types/label.py
+++ b/libs/labelbox/src/labelbox/data/annotation_types/label.py
@@ -13,6 +13,7 @@
from .metrics import ScalarMetric, ConfusionMatrixMetric
from .video import VideoClassificationAnnotation
from .video import VideoObjectAnnotation, VideoMaskAnnotation
+from .audio import AudioClassificationAnnotation
from .mmc import MessageEvaluationTaskAnnotation
from pydantic import BaseModel, field_validator
@@ -44,6 +45,7 @@ class Label(BaseModel):
ClassificationAnnotation,
ObjectAnnotation,
VideoMaskAnnotation,
+ AudioClassificationAnnotation,
ScalarMetric,
ConfusionMatrixMetric,
RelationshipAnnotation,
@@ -75,15 +77,23 @@ def _get_annotations_by_type(self, annotation_type):
def frame_annotations(
self,
- ) -> Dict[str, Union[VideoObjectAnnotation, VideoClassificationAnnotation]]:
+ ) -> Dict[int, Union[VideoObjectAnnotation, VideoClassificationAnnotation, AudioClassificationAnnotation]]:
+ """Get temporal annotations organized by frame
+
+ Returns:
+ Dict[int, List]: Dictionary mapping frame (milliseconds) to list of temporal annotations
+
+ Example:
+ >>> label.frame_annotations()
+ {2500: [VideoClassificationAnnotation(...), AudioClassificationAnnotation(...)]}
+ """
frame_dict = defaultdict(list)
for annotation in self.annotations:
- if isinstance(
- annotation,
- (VideoObjectAnnotation, VideoClassificationAnnotation),
- ):
+ if isinstance(annotation, (VideoObjectAnnotation, VideoClassificationAnnotation)):
frame_dict[annotation.frame].append(annotation)
- return frame_dict
+ elif isinstance(annotation, AudioClassificationAnnotation):
+ frame_dict[annotation.start_frame].append(annotation)
+ return dict(frame_dict)
def add_url_to_masks(self, signer) -> "Label":
"""
diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py
index 2f4799d13..5fc19c004 100644
--- a/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py
+++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/label.py
@@ -2,7 +2,7 @@
import copy
from itertools import groupby
from operator import itemgetter
-from typing import Generator, List, Tuple, Union
+from typing import Any, Dict, Generator, List, Tuple, Union
from uuid import uuid4
from pydantic import BaseModel
@@ -24,6 +24,11 @@
VideoMaskAnnotation,
VideoObjectAnnotation,
)
+from typing import List
+from ...annotation_types.audio import (
+ AudioClassificationAnnotation,
+)
+from .temporal import create_audio_ndjson_annotations
from labelbox.types import DocumentRectangle, DocumentEntity
from .classification import (
NDChecklistSubclass,
@@ -69,6 +74,7 @@ def from_common(
yield from cls._create_relationship_annotations(label)
yield from cls._create_non_video_annotations(label)
yield from cls._create_video_annotations(label)
+ yield from cls._create_audio_annotations(label)
@staticmethod
def _get_consecutive_frames(
@@ -80,6 +86,7 @@ def _get_consecutive_frames(
consecutive.append((group[0], group[-1]))
return consecutive
+
@classmethod
def _get_segment_frame_ranges(
cls,
@@ -159,6 +166,32 @@ def _create_video_annotations(
segments.append(segment)
yield NDObject.from_common(segments, label.data)
+ @classmethod
+ def _create_audio_annotations(
+ cls, label: Label
+ ) -> Generator[BaseModel, None, None]:
+ """Create audio annotations with nested classifications using modular hierarchy builder."""
+ # Extract audio annotations from the label
+ audio_annotations = [
+ annot for annot in label.annotations
+ if isinstance(annot, AudioClassificationAnnotation)
+ ]
+
+ if not audio_annotations:
+ return
+
+ # Use the modular hierarchy builder to create NDJSON annotations
+ ndjson_annotations = create_audio_ndjson_annotations(
+ audio_annotations,
+ label.data.global_key
+ )
+
+ # Yield each NDJSON annotation
+ for annotation in ndjson_annotations:
+ yield annotation
+
+
+
@classmethod
def _create_non_video_annotations(cls, label: Label):
non_video_annotations = [
@@ -170,6 +203,7 @@ def _create_non_video_annotations(cls, label: Label):
VideoClassificationAnnotation,
VideoObjectAnnotation,
VideoMaskAnnotation,
+ AudioClassificationAnnotation,
RelationshipAnnotation,
),
)
@@ -187,7 +221,7 @@ def _create_non_video_annotations(cls, label: Label):
yield NDMessageTask.from_common(annotation, label.data)
else:
raise TypeError(
- f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value',annotation))}`"
+ f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value', annotation))}`"
)
@classmethod
diff --git a/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py b/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py
new file mode 100644
index 000000000..da9af289d
--- /dev/null
+++ b/libs/labelbox/src/labelbox/data/serialization/ndjson/temporal.py
@@ -0,0 +1,339 @@
+"""
+Generic hierarchical classification builder for NDJSON serialization.
+
+This module provides reusable components for constructing nested hierarchical
+classifications from temporal annotations (audio, video, etc.), separating the
+complex logic from the main serialization code.
+"""
+
+from collections import defaultdict
+from typing import Any, Dict, List, Set, Tuple, Protocol, TypeVar, Generic
+from pydantic import BaseModel
+
+from ...annotation_types.audio import AudioClassificationAnnotation
+
+# Generic type for temporal annotations
+TemporalAnnotation = TypeVar('TemporalAnnotation', bound=Any)
+
+
+class TemporalFrame:
+ """Represents a time frame in temporal annotations (audio, video, etc.)."""
+
+ def __init__(self, start: int, end: int = None):
+ self.start = start
+ self.end = end or start
+
+ def contains(self, other: "TemporalFrame") -> bool:
+ """Check if this frame contains another frame."""
+ return (self.start <= other.start and
+ self.end is not None and other.end is not None and
+ self.end >= other.end)
+
+ def strictly_contains(self, other: "TemporalFrame") -> bool:
+ """Check if this frame strictly contains another frame (not equal)."""
+ return (self.contains(other) and
+ (self.start < other.start or self.end > other.end))
+
+ def overlaps(self, other: "TemporalFrame") -> bool:
+ """Check if this frame overlaps with another frame."""
+ return not (self.end < other.start or other.end < self.start)
+
+ def to_dict(self) -> Dict[str, int]:
+ """Convert to dictionary format."""
+ return {"start": self.start, "end": self.end}
+
+
+class AnnotationGroupManager(Generic[TemporalAnnotation]):
+ """Manages grouping of temporal annotations by classification type."""
+
+ def __init__(self, annotations: List[TemporalAnnotation], frame_extractor: callable):
+ self.annotations = annotations
+ self.frame_extractor = frame_extractor # Function to extract (start, end) from annotation
+ self.groups = self._group_annotations()
+ self.root_groups = self._identify_root_groups()
+
+ def _group_annotations(self) -> Dict[str, List[TemporalAnnotation]]:
+ """Group annotations by classification key (schema_id or name)."""
+ groups = defaultdict(list)
+ for annot in self.annotations:
+ key = annot.feature_schema_id or annot.name
+ groups[key].append(annot)
+ return dict(groups)
+
+ def _identify_root_groups(self) -> Set[str]:
+ """Identify root groups that are not fully contained by other groups."""
+ root_groups = set()
+
+ for group_key, group_anns in self.groups.items():
+ if not self._is_group_nested(group_key):
+ root_groups.add(group_key)
+
+ return root_groups
+
+ def _is_group_nested(self, group_key: str) -> bool:
+ """Check if a group is fully contained by other groups."""
+ group_anns = self.groups[group_key]
+
+ for ann in group_anns:
+ start, end = self.frame_extractor(ann)
+ ann_frame = TemporalFrame(start, end)
+
+ # Check if this annotation is contained by any other group
+ contained = False
+ for other_key, other_anns in self.groups.items():
+ if other_key == group_key:
+ continue
+
+ for parent in other_anns:
+ parent_start, parent_end = self.frame_extractor(parent)
+ parent_frame = TemporalFrame(parent_start, parent_end)
+ if parent_frame.contains(ann_frame):
+ contained = True
+ break
+
+ if contained:
+ break
+
+ if not contained:
+ return False # Group is not fully nested
+
+ return True # All annotations were contained somewhere
+
+ def get_group_display_name(self, group_key: str) -> str:
+ """Get display name for a group."""
+ group_anns = self.groups[group_key]
+ # Prefer the first non-empty annotation name
+ for ann in group_anns:
+ if ann.name:
+ return ann.name
+ return group_key
+
+ def get_annotations_within_frames(self, frames: List[TemporalFrame], exclude_group: str = None) -> List[TemporalAnnotation]:
+ """Get all annotations within the given frames, excluding specified group."""
+ contained = []
+
+ for group_key, group_anns in self.groups.items():
+ if group_key == exclude_group:
+ continue
+
+ for ann in group_anns:
+ start, end = self.frame_extractor(ann)
+ ann_frame = TemporalFrame(start, end)
+ if any(frame.contains(ann_frame) for frame in frames):
+ contained.append(ann)
+
+ return contained
+
+
+class ValueGrouper(Generic[TemporalAnnotation]):
+ """Handles grouping of annotations by their values and answer construction."""
+
+ def __init__(self, frame_extractor: callable):
+ self.frame_extractor = frame_extractor # Function to extract (start, end) from annotation
+
+ def group_by_value(self, annotations: List[TemporalAnnotation]) -> List[Dict[str, Any]]:
+ """Group annotations by logical value and produce answer entries."""
+ value_buckets = defaultdict(list)
+
+ for ann in annotations:
+ key = self._get_value_key(ann)
+ value_buckets[key].append(ann)
+
+ entries = []
+ for _, anns in value_buckets.items():
+ first = anns[0]
+ frames = [self.frame_extractor(a) for a in anns]
+ frame_dicts = [{"start": start, "end": end} for start, end in frames]
+
+ entry = self._create_answer_entry(first, frame_dicts)
+ entries.append(entry)
+
+ return entries
+
+ def _get_value_key(self, ann: TemporalAnnotation) -> str:
+ """Get a stable key for grouping annotations by value."""
+ if hasattr(ann.value, "answer"):
+ if isinstance(ann.value.answer, list):
+ # Checklist: stable key from selected option names
+ return str(sorted([opt.name for opt in ann.value.answer]))
+ elif hasattr(ann.value.answer, "name"):
+ # Radio: option name
+ return ann.value.answer.name
+ else:
+ # Text: the string value
+ return ann.value.answer
+ else:
+ return str(ann.value)
+
+ def _create_answer_entry(self, first_ann: TemporalAnnotation, frames: List[Dict[str, int]]) -> Dict[str, Any]:
+ """Create an answer entry from the first annotation and frames."""
+ if hasattr(first_ann.value, "answer") and isinstance(first_ann.value.answer, list):
+ # Checklist: emit one entry per distinct option present in this bucket
+ entries = []
+ for opt_name in sorted([o.name for o in first_ann.value.answer]):
+ entries.append({"name": opt_name, "frames": frames})
+ return entries[0] if len(entries) == 1 else {"options": entries, "frames": frames}
+ elif hasattr(first_ann.value, "answer") and hasattr(first_ann.value.answer, "name"):
+ # Radio
+ return {"name": first_ann.value.answer.name, "frames": frames}
+ else:
+ # Text
+ return {"value": first_ann.value.answer, "frames": frames}
+
+
+class HierarchyBuilder(Generic[TemporalAnnotation]):
+ """Builds hierarchical nested classifications from temporal annotations."""
+
+ def __init__(self, group_manager: AnnotationGroupManager[TemporalAnnotation], value_grouper: ValueGrouper[TemporalAnnotation]):
+ self.group_manager = group_manager
+ self.value_grouper = value_grouper
+
+ def build_hierarchy(self) -> List[Dict[str, Any]]:
+ """Build the complete hierarchical structure."""
+ results = []
+
+ for group_key in self.group_manager.root_groups:
+ group_anns = self.group_manager.groups[group_key]
+ top_entries = self.value_grouper.group_by_value(group_anns)
+
+ # Attach nested classifications to each top-level entry
+ for entry in top_entries:
+ frames = [TemporalFrame(f["start"], f["end"]) for f in entry.get("frames", [])]
+ nested = self._build_nested_for_frames(frames, group_key)
+ if nested:
+ entry["classifications"] = nested
+
+ results.append({
+ "name": self.group_manager.get_group_display_name(group_key),
+ "answer": top_entries,
+ })
+
+ return results
+
+ def _build_nested_for_frames(self, parent_frames: List[TemporalFrame], exclude_group: str) -> List[Dict[str, Any]]:
+ """Recursively build nested classifications for specific parent frames."""
+ nested = []
+
+ # Get all annotations within parent frames
+ all_contained = self.group_manager.get_annotations_within_frames(parent_frames, exclude_group)
+
+ # Group by classification type and process each group
+ for group_key, group_anns in self.group_manager.groups.items():
+ if group_key == exclude_group or group_key in self.group_manager.root_groups:
+ continue
+
+ # Filter annotations that are contained by parent frames
+ candidate_anns = []
+ for ann in group_anns:
+ start, end = self.group_manager.frame_extractor(ann)
+ ann_frame = TemporalFrame(start, end)
+ if any(frame.contains(ann_frame) for frame in parent_frames):
+ candidate_anns.append(ann)
+
+ if not candidate_anns:
+ continue
+
+ # Keep only immediate children (not strictly contained by other contained annotations)
+ child_anns = self._filter_immediate_children(candidate_anns, all_contained)
+ if not child_anns:
+ continue
+
+ # Build this child classification block
+ child_entries = self.value_grouper.group_by_value(child_anns)
+
+ # Recursively attach further nested classifications
+ for entry in child_entries:
+ entry_frames = [TemporalFrame(f["start"], f["end"]) for f in entry.get("frames", [])]
+ child_nested = self._build_nested_for_frames(entry_frames, group_key)
+ if child_nested:
+ entry["classifications"] = child_nested
+
+ nested.append({
+ "name": self.group_manager.get_group_display_name(group_key),
+ "answer": child_entries,
+ })
+
+ return nested
+
+ def _filter_immediate_children(self, candidates: List[TemporalAnnotation],
+ all_contained: List[TemporalAnnotation]) -> List[TemporalAnnotation]:
+ """Filter to keep only immediate children (not strictly contained by others)."""
+ immediate_children = []
+
+ for candidate in candidates:
+ start, end = self.group_manager.frame_extractor(candidate)
+ candidate_frame = TemporalFrame(start, end)
+
+ # Check if this candidate is strictly contained by any other contained annotation
+ has_closer_container = False
+ for other in all_contained:
+ if other is candidate:
+ continue
+ other_start, other_end = self.group_manager.frame_extractor(other)
+ other_frame = TemporalFrame(other_start, other_end)
+ if other_frame.strictly_contains(candidate_frame):
+ has_closer_container = True
+ break
+
+ if not has_closer_container:
+ immediate_children.append(candidate)
+
+ return immediate_children
+
+
+class TemporalNDJSON(BaseModel):
+ """NDJSON format for temporal annotations (audio, video, etc.)."""
+ name: str
+ answer: List[Dict[str, Any]]
+ dataRow: Dict[str, str]
+
+
+def create_temporal_ndjson_annotations(annotations: List[TemporalAnnotation],
+ data_global_key: str,
+ frame_extractor: callable) -> List[TemporalNDJSON]:
+ """
+ Create NDJSON temporal annotations with hierarchical structure.
+
+ Args:
+ annotations: List of temporal classification annotations
+ data_global_key: Global key for the data row
+ frame_extractor: Function that extracts (start, end) from annotation
+
+ Returns:
+ List of TemporalNDJSON objects
+ """
+ if not annotations:
+ return []
+
+ group_manager = AnnotationGroupManager(annotations, frame_extractor)
+ value_grouper = ValueGrouper(frame_extractor)
+ hierarchy_builder = HierarchyBuilder(group_manager, value_grouper)
+ hierarchy = hierarchy_builder.build_hierarchy()
+
+ return [
+ TemporalNDJSON(
+ name=item["name"],
+ answer=item["answer"],
+ dataRow={"globalKey": data_global_key}
+ )
+ for item in hierarchy
+ ]
+
+
+# Audio-specific convenience function
+def create_audio_ndjson_annotations(annotations: List[AudioClassificationAnnotation],
+ data_global_key: str) -> List[TemporalNDJSON]:
+ """
+ Create NDJSON audio annotations with hierarchical structure.
+
+ Args:
+ annotations: List of audio classification annotations
+ data_global_key: Global key for the data row
+
+ Returns:
+ List of TemporalNDJSON objects
+ """
+ def audio_frame_extractor(ann: AudioClassificationAnnotation) -> Tuple[int, int]:
+ return (ann.start_frame, ann.end_frame or ann.start_frame)
+
+ return create_temporal_ndjson_annotations(annotations, data_global_key, audio_frame_extractor)
diff --git a/libs/labelbox/tests/data/serialization/ndjson/test_audio.py b/libs/labelbox/tests/data/serialization/ndjson/test_audio.py
new file mode 100644
index 000000000..e392c2577
--- /dev/null
+++ b/libs/labelbox/tests/data/serialization/ndjson/test_audio.py
@@ -0,0 +1,363 @@
+import labelbox.types as lb_types
+from labelbox.data.serialization.ndjson.converter import NDJsonConverter
+
+
+def test_audio_nested_text_radio_checklist_structure():
+ # Purpose: verify that class-based AudioClassificationAnnotation inputs serialize
+ # into v3-style nested NDJSON with:
+ # - exactly three top-level groups (text_class, radio_class, checklist_class)
+ # - children nested only under their closest containing parent frames
+ # - correct field shapes per type (Text uses "value", Radio/Checklist use "name")
+
+ # Build annotations mirroring exec/v3.py shapes using class-based annotations
+ anns = []
+
+ # text_class top-level with multiple values
+ # Expect: produces an NDJSON object named "text_class" with four answer entries;
+ # the long segment (1500-2400) will carry nested children below.
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1000,
+ end_frame=1100,
+ name="text_class",
+ value=lb_types.Text(answer="A"),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1500,
+ end_frame=2400,
+ name="text_class",
+ value=lb_types.Text(answer="text_class value"),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=2500,
+ end_frame=2700,
+ name="text_class",
+ value=lb_types.Text(answer="C"),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=2900,
+ end_frame=2999,
+ name="text_class",
+ value=lb_types.Text(answer="D"),
+ )
+ )
+
+ # nested under text_class
+ # Expect: nested_text_class (1600-2000) nests under the 1500-2400 parent;
+ # nested_text_class_2 nests under nested_text_class only (no duplicates at parent level).
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1600,
+ end_frame=2000,
+ name="nested_text_class",
+ value=lb_types.Text(answer="nested_text_class value"),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1800,
+ end_frame=2000,
+ name="nested_text_class_2",
+ value=lb_types.Text(answer="nested_text_class_2 value"),
+ )
+ )
+
+ # radio_class top-level
+ # Expect: two answer entries for first_radio_answer (two frame segments) and
+ # two for second_radio_answer; children attach only to their closest container answer.
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=200,
+ end_frame=1500,
+ name="radio_class",
+ value=lb_types.Radio(
+ answer=lb_types.ClassificationAnswer(name="first_radio_answer")
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=2000,
+ end_frame=2500,
+ name="radio_class",
+ value=lb_types.Radio(
+ answer=lb_types.ClassificationAnswer(name="first_radio_answer")
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1550,
+ end_frame=1700,
+ name="radio_class",
+ value=lb_types.Radio(
+ answer=lb_types.ClassificationAnswer(name="second_radio_answer")
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=2700,
+ end_frame=3000,
+ name="radio_class",
+ value=lb_types.Radio(
+ answer=lb_types.ClassificationAnswer(name="second_radio_answer")
+ ),
+ )
+ )
+
+ # nested radio
+ # Expect: sub_radio_question nests under first_radio_answer (1000-1500), and
+ # sub_radio_question_2 nests under sub_radio_question's first_sub_radio_answer only.
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1000,
+ end_frame=1500,
+ name="sub_radio_question",
+ value=lb_types.Radio(
+ answer=lb_types.ClassificationAnswer(
+ name="first_sub_radio_answer"
+ )
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1300,
+ end_frame=1500,
+ name="sub_radio_question_2",
+ value=lb_types.Radio(
+ answer=lb_types.ClassificationAnswer(
+ name="first_sub_radio_answer_2"
+ )
+ ),
+ )
+ )
+
+ # checklist_class top-level
+ # Expect: three answer entries (first/second/third_checklist_option) and
+ # nested checklist children attach to the first option segments where contained.
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=300,
+ end_frame=800,
+ name="checklist_class",
+ value=lb_types.Checklist(
+ answer=[
+ lb_types.ClassificationAnswer(name="first_checklist_option")
+ ]
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1200,
+ end_frame=1800,
+ name="checklist_class",
+ value=lb_types.Checklist(
+ answer=[
+ lb_types.ClassificationAnswer(name="first_checklist_option")
+ ]
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=2200,
+ end_frame=2900,
+ name="checklist_class",
+ value=lb_types.Checklist(
+ answer=[
+ lb_types.ClassificationAnswer(
+ name="second_checklist_option"
+ )
+ ]
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=2500,
+ end_frame=3500,
+ name="checklist_class",
+ value=lb_types.Checklist(
+ answer=[
+ lb_types.ClassificationAnswer(name="third_checklist_option")
+ ]
+ ),
+ )
+ )
+
+ # nested checklist
+ # Expect: nested_checklist options 1/2/3 attach to their containing parent frames;
+ # checklist_nested_text attaches under nested_option_1 only.
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=400,
+ end_frame=700,
+ name="nested_checklist",
+ value=lb_types.Checklist(
+ answer=[lb_types.ClassificationAnswer(name="nested_option_1")]
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1200,
+ end_frame=1600,
+ name="nested_checklist",
+ value=lb_types.Checklist(
+ answer=[lb_types.ClassificationAnswer(name="nested_option_2")]
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=1400,
+ end_frame=1800,
+ name="nested_checklist",
+ value=lb_types.Checklist(
+ answer=[lb_types.ClassificationAnswer(name="nested_option_3")]
+ ),
+ )
+ )
+ anns.append(
+ lb_types.AudioClassificationAnnotation(
+ frame=500,
+ end_frame=700,
+ name="checklist_nested_text",
+ value=lb_types.Text(answer="checklist_nested_text value"),
+ )
+ )
+
+ # Serialize a single Label containing all of the above annotations
+ label = lb_types.Label(
+ data={"global_key": "audio_nested_test_key"}, annotations=anns
+ )
+ ndjson = list(NDJsonConverter.serialize([label]))
+
+ # Assert: exactly three top-level groups, matching v3 root objects
+ assert {obj["name"] for obj in ndjson} == {
+ "text_class",
+ "radio_class",
+ "checklist_class",
+ }
+
+ # Validate text_class structure: children appear under the long segment only,
+ # and grandchildren only under their immediate parent
+ text_nd = next(obj for obj in ndjson if obj["name"] == "text_class")
+ parent = next(
+ item
+ for item in text_nd["answer"]
+ if item.get("value") == "text_class value"
+ )
+ nested = parent.get("classifications", [])
+ names = {c["name"] for c in nested}
+ assert "nested_text_class" in names
+ nt = next(c for c in nested if c["name"] == "nested_text_class")
+ nt_ans = nt["answer"][0]
+ assert nt_ans["value"] == "nested_text_class value"
+ nt_nested = nt_ans.get("classifications", [])
+ assert any(c["name"] == "nested_text_class_2" for c in nt_nested)
+
+ # Validate radio_class structure and immediate-child only
+ radio_nd = next(obj for obj in ndjson if obj["name"] == "radio_class")
+ first_radio = next(
+ a for a in radio_nd["answer"] if a["name"] == "first_radio_answer"
+ )
+ assert any(
+ c["name"] == "sub_radio_question"
+ for c in first_radio.get("classifications", [])
+ )
+ # sub_radio_question_2 is nested under sub_radio_question only
+ sub_radio = next(
+ c
+ for c in first_radio["classifications"]
+ if c["name"] == "sub_radio_question"
+ )
+ sr_first = next(
+ a for a in sub_radio["answer"] if a["name"] == "first_sub_radio_answer"
+ )
+ assert any(
+ c["name"] == "sub_radio_question_2"
+ for c in sr_first.get("classifications", [])
+ )
+
+ # Validate checklist_class structure: nested_checklist exists, and nested text
+ # appears only under nested_option_1 (closest container)
+ checklist_nd = next(
+ obj for obj in ndjson if obj["name"] == "checklist_class"
+ )
+ first_opt = next(
+ a
+ for a in checklist_nd["answer"]
+ if a["name"] == "first_checklist_option"
+ )
+ assert any(
+ c["name"] == "nested_checklist"
+ for c in first_opt.get("classifications", [])
+ )
+ nested_checklist = next(
+ c
+ for c in first_opt["classifications"]
+ if c["name"] == "nested_checklist"
+ )
+ # Ensure nested text present under nested_checklist → nested_option_1
+ opt1 = next(
+ a for a in nested_checklist["answer"] if a["name"] == "nested_option_1"
+ )
+ assert any(
+ c["name"] == "checklist_nested_text"
+ for c in opt1.get("classifications", [])
+ )
+
+
+def test_audio_top_level_only_basic():
+ anns = [
+ lb_types.AudioClassificationAnnotation(
+ frame=200,
+ end_frame=1500,
+ name="radio_class",
+ value=lb_types.Radio(answer=lb_types.ClassificationAnswer(name="first_radio_answer")),
+ ),
+ lb_types.AudioClassificationAnnotation(
+ frame=1550,
+ end_frame=1700,
+ name="radio_class",
+ value=lb_types.Radio(answer=lb_types.ClassificationAnswer(name="second_radio_answer")),
+ ),
+ lb_types.AudioClassificationAnnotation(
+ frame=1200,
+ end_frame=1800,
+ name="checklist_class",
+ value=lb_types.Checklist(answer=[lb_types.ClassificationAnswer(name="angry")]),
+ ),
+ ]
+
+ label = lb_types.Label(data={"global_key": "audio_top_level_only"}, annotations=anns)
+ ndjson = list(NDJsonConverter.serialize([label]))
+
+ names = {o["name"] for o in ndjson}
+ assert names == {"radio_class", "checklist_class"}
+
+ radio = next(o for o in ndjson if o["name"] == "radio_class")
+ r_answers = sorted(radio["answer"], key=lambda x: x["frames"][0]["start"])
+ assert r_answers[0]["name"] == "first_radio_answer"
+ assert r_answers[0]["frames"] == [{"start": 200, "end": 1500}]
+ assert "classifications" not in r_answers[0]
+ assert r_answers[1]["name"] == "second_radio_answer"
+ assert r_answers[1]["frames"] == [{"start": 1550, "end": 1700}]
+ assert "classifications" not in r_answers[1]
+
+ checklist = next(o for o in ndjson if o["name"] == "checklist_class")
+ c_answers = checklist["answer"]
+ assert len(c_answers) == 1
+ assert c_answers[0]["name"] == "angry"
+ assert c_answers[0]["frames"] == [{"start": 1200, "end": 1800}]
+ assert "classifications" not in c_answers[0]