Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
e4fd630
chore: PoC + ipynb
rishisurana-labelbox Sep 3, 2025
dbcc7bf
chore: use ms instead of s in sdk interface
rishisurana-labelbox Sep 8, 2025
dbb592f
:art: Cleaned
github-actions[bot] Sep 8, 2025
ff298d4
:memo: README updated
github-actions[bot] Sep 8, 2025
16896fd
chore: it works for temporal text/radio/checklist classifications
rishisurana-labelbox Sep 11, 2025
7a666cc
chore: clean up and organize code
rishisurana-labelbox Sep 11, 2025
ac58ad0
chore: update tests fail and documentation update
rishisurana-labelbox Sep 11, 2025
67dd14a
:art: Cleaned
github-actions[bot] Sep 11, 2025
a1600e5
:memo: README updated
github-actions[bot] Sep 11, 2025
b4d2f42
chore: improve imports
rishisurana-labelbox Sep 11, 2025
fadb14e
chore: restore py version
rishisurana-labelbox Sep 11, 2025
1e12596
chore: restore py version
rishisurana-labelbox Sep 11, 2025
c2a7b4c
chore: cleanup
rishisurana-labelbox Sep 12, 2025
26a35fd
chore: lint
rishisurana-labelbox Sep 12, 2025
b16f2ea
fix: failing build issue due to lint
rishisurana-labelbox Sep 12, 2025
943cb73
chore: simplify
rishisurana-labelbox Sep 19, 2025
a838513
chore: update examples - all tests passing
rishisurana-labelbox Sep 19, 2025
0ca9cd6
chore: use start frame instead of frame
rishisurana-labelbox Sep 22, 2025
7861537
chore: remove audio object annotation
rishisurana-labelbox Sep 22, 2025
6c3c50a
chore: change class shape for text and radio/checklist
rishisurana-labelbox Sep 22, 2025
68773cf
chore: stan comments
rishisurana-labelbox Sep 25, 2025
58b30f7
chore: top level + nested working
rishisurana-labelbox Sep 26, 2025
400d5bb
:art: Cleaned
github-actions[bot] Sep 26, 2025
c761dcf
:memo: README updated
github-actions[bot] Sep 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 84 additions & 84 deletions examples/README.md

Large diffs are not rendered by default.

53 changes: 52 additions & 1 deletion examples/annotation_import/audio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@
},
{
"metadata": {},
"source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)",
"source": "ontology_builder = lb.OntologyBuilder(classifications=[\n lb.Classification(class_type=lb.Classification.Type.TEXT,\n name=\"text_audio\"),\n lb.Classification(\n class_type=lb.Classification.Type.CHECKLIST,\n name=\"checklist_audio\",\n options=[\n lb.Option(value=\"first_checklist_answer\"),\n lb.Option(value=\"second_checklist_answer\"),\n ],\n ),\n lb.Classification(\n class_type=lb.Classification.Type.RADIO,\n name=\"radio_audio\",\n options=[\n lb.Option(value=\"first_radio_answer\"),\n lb.Option(value=\"second_radio_answer\"),\n ],\n ),\n # Temporal classification for token-level annotations\n lb.Classification(\n class_type=lb.Classification.Type.TEXT,\n name=\"User Speaker\",\n scope=lb.Classification.Scope.INDEX, # INDEX scope for temporal\n ),\n])\n\nontology = client.create_ontology(\n \"Ontology Audio Annotations\",\n ontology_builder.asdict(),\n media_type=lb.MediaType.Audio,\n)",
"cell_type": "code",
"outputs": [],
"execution_count": null
Expand Down Expand Up @@ -223,6 +223,27 @@
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"\n"
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": "",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": "",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": "label = []\nlabel.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation],\n ))",
Expand Down Expand Up @@ -252,6 +273,29 @@
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": [
"## Temporal Audio Annotations\n",
"\n",
"You can create temporal annotations for individual tokens (words) with precise timing:\n"
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": "# Define tokens with precise timing (from demo script)\ntokens_data = [\n (\"Hello\", 586, 770), # Hello: frames 586-770\n (\"AI\", 771, 955), # AI: frames 771-955\n (\"how\", 956, 1140), # how: frames 956-1140\n (\"are\", 1141, 1325), # are: frames 1141-1325\n (\"you\", 1326, 1510), # you: frames 1326-1510\n (\"doing\", 1511, 1695), # doing: frames 1511-1695\n (\"today\", 1696, 1880), # today: frames 1696-1880\n]\n\n# Create temporal annotations for each token\ntemporal_annotations = []\nfor token, start_frame, end_frame in tokens_data:\n token_annotation = lb_types.AudioClassificationAnnotation(\n frame=start_frame,\n end_frame=end_frame,\n name=\"User Speaker\",\n value=lb_types.Text(answer=token),\n )\n temporal_annotations.append(token_annotation)\n\nprint(f\"Created {len(temporal_annotations)} temporal token annotations\")",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": "# Create label with both regular and temporal annotations\nlabel_with_temporal = []\nlabel_with_temporal.append(\n lb_types.Label(\n data={\"global_key\": global_key},\n annotations=[text_annotation, checklist_annotation, radio_annotation] +\n temporal_annotations,\n ))\n\nprint(\n f\"Created label with {len(label_with_temporal[0].annotations)} total annotations\"\n)\nprint(f\" - Regular annotations: 3\")\nprint(f\" - Temporal annotations: {len(temporal_annotations)}\")",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": [
Expand All @@ -260,6 +304,13 @@
],
"cell_type": "markdown"
},
{
"metadata": {},
"source": "# Upload temporal annotations via MAL\ntemporal_upload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"temporal_mal_job-{str(uuid.uuid4())}\",\n predictions=label_with_temporal,\n)\n\ntemporal_upload_job.wait_until_done()\nprint(\"Temporal upload completed!\")\nprint(\"Errors:\", temporal_upload_job.errors)\nprint(\"Status:\", temporal_upload_job.statuses)",
"cell_type": "code",
"outputs": [],
"execution_count": null
},
{
"metadata": {},
"source": "# Upload our label using Model-Assisted Labeling\nupload_job = lb.MALPredictionImport.create_from_objects(\n client=client,\n project_id=project.uid,\n name=f\"mal_job-{str(uuid.uuid4())}\",\n predictions=label,\n)\n\nupload_job.wait_until_done()\nprint(\"Errors:\", upload_job.errors)\nprint(\"Status of uploads: \", upload_job.statuses)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from .video import MaskInstance
from .video import VideoMaskAnnotation

from .audio import AudioClassificationAnnotation

from .ner import ConversationEntity
from .ner import DocumentEntity
from .ner import DocumentTextSelection
Expand Down
37 changes: 37 additions & 0 deletions libs/labelbox/src/labelbox/data/annotation_types/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from typing import Optional
from pydantic import Field, AliasChoices

from labelbox.data.annotation_types.annotation import (
ClassificationAnnotation,
)


class AudioClassificationAnnotation(ClassificationAnnotation):
"""Audio classification for specific time range
Examples:
- Speaker identification from 2500ms to 4100ms
- Audio quality assessment for a segment
- Language detection for audio segments
Args:
name (Optional[str]): Name of the classification
feature_schema_id (Optional[Cuid]): Feature schema identifier
value (Union[Text, Checklist, Radio]): Classification value
start_frame (int): The frame index in milliseconds (e.g., 2500 = 2.5 seconds)
end_frame (Optional[int]): End frame in milliseconds (for time ranges)
segment_index (Optional[int]): Index of audio segment this annotation belongs to
extra (Dict[str, Any]): Additional metadata
"""

start_frame: int = Field(
validation_alias=AliasChoices("start_frame", "frame"),
serialization_alias="startframe",
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Serialization Inconsistency Bug

The start_frame field deserializes from start_frame or frame but serializes to startframe. This inconsistency prevents successful round-trip serialization.

Fix in CursorΒ Fix in Web

end_frame: Optional[int] = Field(
default=None,
validation_alias=AliasChoices("end_frame", "endFrame"),
serialization_alias="end_frame",
)
segment_index: Optional[int] = None

22 changes: 16 additions & 6 deletions libs/labelbox/src/labelbox/data/annotation_types/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .metrics import ScalarMetric, ConfusionMatrixMetric
from .video import VideoClassificationAnnotation
from .video import VideoObjectAnnotation, VideoMaskAnnotation
from .audio import AudioClassificationAnnotation
from .mmc import MessageEvaluationTaskAnnotation
from pydantic import BaseModel, field_validator

Expand Down Expand Up @@ -44,6 +45,7 @@ class Label(BaseModel):
ClassificationAnnotation,
ObjectAnnotation,
VideoMaskAnnotation,
AudioClassificationAnnotation,
ScalarMetric,
ConfusionMatrixMetric,
RelationshipAnnotation,
Expand Down Expand Up @@ -75,15 +77,23 @@ def _get_annotations_by_type(self, annotation_type):

def frame_annotations(
self,
) -> Dict[str, Union[VideoObjectAnnotation, VideoClassificationAnnotation]]:
) -> Dict[int, Union[VideoObjectAnnotation, VideoClassificationAnnotation, AudioClassificationAnnotation]]:
"""Get temporal annotations organized by frame

Returns:
Dict[int, List]: Dictionary mapping frame (milliseconds) to list of temporal annotations

Example:
>>> label.frame_annotations()
{2500: [VideoClassificationAnnotation(...), AudioClassificationAnnotation(...)]}
"""
frame_dict = defaultdict(list)
for annotation in self.annotations:
if isinstance(
annotation,
(VideoObjectAnnotation, VideoClassificationAnnotation),
):
if isinstance(annotation, (VideoObjectAnnotation, VideoClassificationAnnotation)):
frame_dict[annotation.frame].append(annotation)
return frame_dict
elif isinstance(annotation, AudioClassificationAnnotation):
frame_dict[annotation.start_frame].append(annotation)
return dict(frame_dict)

def add_url_to_masks(self, signer) -> "Label":
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,22 +60,6 @@ def serialize_model(self, handler):
return res


class FrameLocation(BaseModel):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

restore

end: int
start: int


class VideoSupported(BaseModel):
# Note that frames are only allowed as top level inferences for video
frames: Optional[List[FrameLocation]] = None

@model_serializer(mode="wrap")
def serialize_model(self, handler):
res = handler(self)
# This means these are no video frames ..
if self.frames is None:
res.pop("frames")
return res


class NDTextSubclass(NDAnswer):
Expand Down Expand Up @@ -242,13 +226,14 @@ def from_common(
name=name,
schema_id=feature_schema_id,
uuid=uuid,
frames=extra.get("frames"),
message_id=message_id,
confidence=text.confidence,
custom_metrics=text.custom_metrics,
)


class NDChecklist(NDAnnotation, NDChecklistSubclass, VideoSupported):
class NDChecklist(NDAnnotation, NDChecklistSubclass):
@model_serializer(mode="wrap")
def serialize_model(self, handler):
res = handler(self)
Expand Down Expand Up @@ -295,7 +280,7 @@ def from_common(
)


class NDRadio(NDAnnotation, NDRadioSubclass, VideoSupported):
class NDRadio(NDAnnotation, NDRadioSubclass):
@classmethod
def from_common(
cls,
Expand Down Expand Up @@ -425,7 +410,8 @@ def to_common(
def from_common(
cls,
annotation: Union[
ClassificationAnnotation, VideoClassificationAnnotation
ClassificationAnnotation,
VideoClassificationAnnotation,
],
data: GenericDataRowData,
) -> Union[NDTextSubclass, NDChecklistSubclass, NDRadioSubclass]:
Expand All @@ -448,7 +434,8 @@ def from_common(
@staticmethod
def lookup_classification(
annotation: Union[
ClassificationAnnotation, VideoClassificationAnnotation
ClassificationAnnotation,
VideoClassificationAnnotation,
],
) -> Union[NDText, NDChecklist, NDRadio]:
return {Text: NDText, Checklist: NDChecklist, Radio: NDRadio}.get(
Expand Down
92 changes: 89 additions & 3 deletions libs/labelbox/src/labelbox/data/serialization/ndjson/label.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import copy
from itertools import groupby
from operator import itemgetter
from typing import Generator, List, Tuple, Union
from typing import Any, Dict, Generator, List, Tuple, Union
from uuid import uuid4

from pydantic import BaseModel
Expand All @@ -24,6 +24,10 @@
VideoMaskAnnotation,
VideoObjectAnnotation,
)
from typing import List
from ...annotation_types.audio import (
AudioClassificationAnnotation,
)
from labelbox.types import DocumentRectangle, DocumentEntity
from .classification import (
NDChecklistSubclass,
Expand Down Expand Up @@ -69,6 +73,7 @@ def from_common(
yield from cls._create_relationship_annotations(label)
yield from cls._create_non_video_annotations(label)
yield from cls._create_video_annotations(label)
yield from cls._create_audio_annotations(label)

@staticmethod
def _get_consecutive_frames(
Expand All @@ -80,6 +85,7 @@ def _get_consecutive_frames(
consecutive.append((group[0], group[-1]))
return consecutive


@classmethod
def _get_segment_frame_ranges(
cls,
Expand Down Expand Up @@ -153,12 +159,91 @@ def _create_video_annotations(
for annotation in annotation_group:
if (
annotation.keyframe
and start_frame <= annotation.frame <= end_frame
and start_frame <= annotation.start_frame <= end_frame
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Incorrect Attribute Usage in Video Annotations

The _create_video_annotations method incorrectly uses annotation.start_frame for VideoObjectAnnotation objects. VideoObjectAnnotation uses a frame attribute, not start_frame, which is specific to audio annotations. This leads to an AttributeError and breaks video annotation processing.

Additional Locations (1)

Fix in CursorΒ Fix in Web

):
segment.append(annotation)
segments.append(segment)
yield NDObject.from_common(segments, label.data)

@classmethod
def _create_audio_annotations(
cls, label: Label
) -> Generator[BaseModel, None, None]:
"""Create audio annotations grouped by classification name in v2.py format."""
audio_annotations = defaultdict(list)

# Collect audio annotations by name/schema_id
for annot in label.annotations:
if isinstance(annot, AudioClassificationAnnotation):
audio_annotations[annot.feature_schema_id or annot.name].append(annot)

# Create v2.py format for each classification group
for classification_name, annotation_group in audio_annotations.items():
# Group annotations by value (like v2.py does)
value_groups = defaultdict(list)

for ann in annotation_group:
# Extract value based on classification type for grouping
if hasattr(ann.value, 'answer'):
if isinstance(ann.value.answer, list):
# Checklist classification - convert list to string for grouping
value = str(sorted([item.name for item in ann.value.answer]))
elif hasattr(ann.value.answer, 'name'):
# Radio classification - ann.value.answer is ClassificationAnswer with name
value = ann.value.answer.name
else:
# Text classification
value = ann.value.answer
else:
value = str(ann.value)

# Group by value
value_groups[value].append(ann)

# Create answer items with grouped frames (like v2.py)
answer_items = []
for value, annotations_with_same_value in value_groups.items():
frames = []
for ann in annotations_with_same_value:
frames.append({"start": ann.start_frame, "end": ann.end_frame})

# Extract the actual value for the output (not the grouping key)
first_ann = annotations_with_same_value[0]

# Use different field names based on classification type
if hasattr(first_ann.value, 'answer') and isinstance(first_ann.value.answer, list):
# Checklist - use "name" field (like v2.py)
answer_items.append({
"name": first_ann.value.answer[0].name, # Single item for now
"frames": frames
})
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Audio Checklist Data Loss

The _create_audio_annotations method incorrectly handles checklist classifications, only preserving the first selected answer. This results in data loss for multi-selection audio checklists, as other selected options are discarded.

Fix in CursorΒ Fix in Web

elif hasattr(first_ann.value, 'answer') and hasattr(first_ann.value.answer, 'name'):
# Radio - use "name" field (like v2.py)
answer_items.append({
"name": first_ann.value.answer.name,
"frames": frames
})
else:
# Text - use "value" field (like v2.py)
answer_items.append({
"value": first_ann.value.answer,
"frames": frames
})

# Create a simple Pydantic model for the v2.py format
class AudioNDJSON(BaseModel):
name: str
answer: List[Dict[str, Any]]
dataRow: Dict[str, str]

yield AudioNDJSON(
name=classification_name,
answer=answer_items,
dataRow={"globalKey": label.data.global_key}
)



@classmethod
def _create_non_video_annotations(cls, label: Label):
non_video_annotations = [
Expand All @@ -170,6 +255,7 @@ def _create_non_video_annotations(cls, label: Label):
VideoClassificationAnnotation,
VideoObjectAnnotation,
VideoMaskAnnotation,
AudioClassificationAnnotation,
RelationshipAnnotation,
),
)
Expand All @@ -187,7 +273,7 @@ def _create_non_video_annotations(cls, label: Label):
yield NDMessageTask.from_common(annotation, label.data)
else:
raise TypeError(
f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value',annotation))}`"
f"Unable to convert object to MAL format. `{type(getattr(annotation, 'value', annotation))}`"
)

@classmethod
Expand Down
Loading