forked from w3c/me-media-timed-events
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
746 lines (745 loc) · 33.1 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
<!DOCTYPE html>
<html>
<head>
<title>Media Timed Events</title>
<meta charset="utf-8">
<script src="https://www.w3.org/Tools/respec/respec-w3c-common" async class="remove"></script>
<script class="remove">
var respecConfig = {
specStatus: "IG-NOTE",
edDraftURI: "https://w3c.github.io/me-media-timed-events/",
shortName: "media-timed-events",
editors: [
{
name: "Chris Needham",
mailto: "chris.needham@bbc.co.uk",
company: "British Broadcasting Corporation",
companyURL: "https://www.bbc.co.uk"
},
{
name: "Giridhar Mandyam",
mailto: "mandyam@qti.qualcomm.com",
company: "Qualcomm",
companyURL: "https://www.qualcomm.com"
}
],
wg: "Media & Entertainment Interest Group",
wgURI: "https://www.w3.org/2011/webtv/",
charterDisclosureURI: "https://www.w3.org/2017/03/webtv-charter.html",
github: "https://github.com/w3c/me-media-timed-events/",
otherLinks: [
{
key: "Version history",
data: [
{
value: "GitHub w3c/me-media-timed-events/commits",
href: "https://github.com/w3c/me-media-timed-events/commits"
}
]
},
{
key: "Participate",
data: [
{
value: "GitHub w3c/me-media-timed-events",
href: "https://github.com/w3c/me-media-timed-events/"
},
{
value: "File an issue",
href: "https://github.com/w3c/me-media-timed-events/issues/new"
},
{
value: "Open issues",
href: "https://github.com/w3c/me-media-timed-events/issues/"
},
{
value: "Mailing-list (public-web-and-tv@w3.org)",
href: "https://lists.w3.org/Archives/Public/public-web-and-tv/"
}
]
}
],
localBiblio: {
"WEB-ISOBMFF": {
title: "ISO/IEC JTC1/SC29/WG11 N16944 Working Draft on Carriage of Web Resources in ISOBMFF",
href: "https://mpeg.chiariglione.org/standards/mpeg-4/timed-text-and-other-visual-overlays-iso-base-media-file-format/wd-carriage-web",
// href: "https://mpeg.chiariglione.org/sites/default/files/files/standards/parts/docs/w16944.zip",
authors: [
"Thomas Stockhammer",
"Cyril Concolato"
],
publisher: "MPEG",
date: "July 2017",
},
"DASH-EVENTING": {
title: "DASH Eventing and HTML5",
href: "https://www.w3.org/2011/webtv/wiki/images/a/a5/DASH_Eventing_and_HTML5.pdf",
authors: [
"Giridhar Mandyam"
],
date: "February 2018"
},
"WEBVTT": {
title: "WebVTT: The Web Video Text Tracks Format",
href: "https://www.w3.org/TR/webvtt1/",
authors: [
"Simon Pieters",
"Silvia Pfeiffer",
"Phillip Jägenstedt",
"Ian Hickson"
],
publisher: "W3C",
status: "CR",
date: "10 May 2018"
},
"WEB-MEDIA-GUIDELINES": {
title: "Web Media Application Developer Guidelines 2018",
href: "https://w3c.github.io/webmediaguidelines/",
authors: [
"Joel Korpi",
"Thasso Griebel",
"Jeff Burtoft"
],
publisher: "W3C",
status: "CG-DRAFT",
date: "26 April 2018"
},
"HBBTV": {
title: "HbbTV 2.0.2 Specification",
href: "https://www.hbbtv.org/wp-content/uploads/2018/02/HbbTV_v202_specification_2018_02_16.pdf",
publisher: "HbbTV Association",
date: "16 February 2018"
},
"HBBTV-TESTS": {
title: "HbbTV Test Suite 2018-1",
href: "https://www.hbbtv.org/wp-content/uploads/2018/03/HbbTV-testcases-2018-1.pdf",
publisher: "HbbTV Association",
date: "2018"
},
"DVB-DASH": {
title: "DVB Document A168. Digital Video Broadcasting (DVB); MPEG-DASH Profile for Transport of ISO BMFF Based DVB Services over IP Based Networks",
href: "https://www.dvb.org/resources/public/standards/a168_dvb_mpeg-dash_nov_2017.pdf",
publisher: "DVB",
date: "November 2017"
},
"BBC-SUBTITLES": {
title: "Subtitle Guidelines",
href: "http://bbc.github.io/subtitle-guidelines/",
publisher: "BBC",
date: "May 2018"
},
"SCTE-35": {
title: "Digital Program Insertion Cueing Message for Cable",
href: "https://www.scte.org/SCTEDocs/Standards/SCTE%2035%202016.pdf",
publisher: "The Society of Cable and Television Engineers",
date: "2016"
},
"3GPP-INTERACTIVITY-WID": {
title: "SP-170796: New WID on 3GPP Service Interactivity",
href: "http://www.3gpp.org/ftp/tsg_sa/TSG_SA/TSGS_77/Docs/SP-170796.zip",
publisher: "3GPP",
date: "September 2017"
},
"3GPP-INTERACTIVITY-TR": {
title: "TR 26.953: Interactivity Support for 3GPP-Based Streaming and Download Services (Release 15)",
href: "http://www.3gpp.org/ftp/Specs/archive/26_series/26.953/26953-f00.zip",
publisher: "3GPP",
date: "June 2018"
},
"WebVMT": {
title: "WebVMT: The Web Video Map Tracks Format",
href: "https://w3c.github.io/sdw/proposals/geotagging/webvmt/",
authors: [
"Rob Smith"
],
publisher: "W3C",
status: "ED",
date: "11 October 2018"
},
"HLS-TIMED-METADATA": {
title: "Timed Metadata for HTTP Live Streaming",
href: "https://developer.apple.com/library/archive/documentation/AudioVideo/Conceptual/HTTP_Live_Streaming_Metadata_Spec/Introduction/Introduction.html"
}
}
};
</script>
</head>
<body>
<section id="abstract">
<p>
This document collects use cases and requirements for improved support
for timed events related to audio or video media on the web, such as
subtitles, captions, or other web content, where synchronization to a
playing audio or video media stream is needed, and makes recommendations
for new or changed web APIs to realize these requirements.
</p>
</section>
<section id="sotd">
</section>
<section>
<h2>Introduction</h2>
<p>
Media timed events describes a generic capability for making
changes to a web page, or executing application code triggered from
JavaScript events, at specific points on the media timeline of an
audio or video media stream.
</p>
</section>
<section>
<h2>Terminology</h2>
<p>
The following terms are used in this document:
</p>
<ul>
<li>
<dfn>in-band</dfn> — timed event information that is delivered
within the audio or video media container or multiplexed with the
media stream.
</li>
<li>
<dfn>out-of-band</dfn> — timed event information that is
delivered over some other mechanism external to the media container
or media stream.
</li>
</ul>
<p>
The following terms are defined in [[HTML52]]:
</p>
<ul>
<li>
<dfn><a href="https://www.w3.org/TR/html52/semantics-embedded-content.html#media-timeline">media timeline</a></dfn>
</li>
</ul>
</section>
<section>
<h2>Use cases</h2>
<p>
Media-timed events carry metadata that is related to points in time,
or regions of time on the media timeline, which can be used to trigger
retrieval and/or rendering of web resources synchronized with media
playback. Such resources can be used to enhance user experience in
the context of media that is being rendered. Some examples include
display of social media feeds corresponding to a live broadcast such
as a sporting event, banner advertisements for sponsored content,
accessibility-related assets, such as large print rendering of
captions, and display of track titles or images alongside an audio
stream.
</p>
<p>
The following sections describe a few use cases in more detail.
</p>
<section>
<h3>Audio stream with titles and images</h3>
<p>
A media content provider wants to provide visual information alongside
an audio stream, such as an image of the artist and title of the
current playing track, to give users live information about the
content they are listening to.
</p>
<p>
Examples include HLS timed metadata [[HLS-TIMED-METADATA]], which uses
in-band ID3 metadata to carry the image content, and RadioVIS in DVB
([[DVB-DASH]], section 9.1.7), which defines in-band event messages
that contain image URLs and text messages to be displayed, with
information about when the content should be displayed in relation to
the media timeline.
</p>
</section>
<section>
<h3>MPEG DASH manifest expiry notifications</h3>
<p>
Section 5.10.4 of [[MPEGDASH]] describes a DASH specific event that
is used to notify a DASH player web application that it should refresh
its copy of the manifest (MPD) document.
An in-band <code>emsg</code> event is used an alternative to setting
a cache duration in the response to the HTTP request for the manifest,
so the client can refresh the MPD when it actually changes, so reducing
the load on HTTP servers caused by frequent server requests.
</p>
<p>
Reference: M&E IG call 1 Feb 2018:
<a href="https://www.w3.org/2018/02/01-me-minutes.html">Minutes</a>,
[[DASH-EVENTING]].
</p>
<p class="ednote">
See also <a href="https://github.com/w3c/webmediaguidelines/issues/64">this issue</a>
against the [[WEB-MEDIA-GUIDELINES]]. TODO: Add detail here.
</p>
</section>
<section>
<h3>Synchronized map animations</h3>
<p>
[[WebVMT]] is a format for metadata cues, synchronized with
a timed media file, that can drive an online map, e.g., OpenStreetMap,
rendered in a separate HTML element alongside the media element
on the web page. The media playhead position controls presentation
and animation of the map, e.g., pan and zoom, and allows annotations
to be added and removed, e.g., markers, at specified times during
media playback. Control can also be overridden by the user with the
usual interactive features of the map at any time, e.g., zoom.
Concrete examples are provided by the
<a href="http://webvmt.org/demos">tech demos</a> at the WebVMT website.
</p>
<p>
Reference: M&E IG TF call 17 Sept 2018:
<a href="https://www.w3.org/2018/09/17-me-minutes.html">Minutes</a>.
</p>
</section>
<section>
<h3>Media analysis visualization</h3>
<p>
A video image analysis system processes a media stream to detect and
recognize objects shown in the video. This system generates metadata
describing the objects, including timestamps that describe the when
the objects are visible, together with position information (e.g.,
bounding boxes). A web application then uses this timed metadata to
overlay labels and annotations on the video using HTML and CSS.
</p>
</section>
<section>
<h3>Presentation of auxiliary content in live media</h3>
<p>
During a live media presentation, dynamic and unpredictable events
may occur which cause temporary suspension of the media presentation.
During that suspension interval, auxiliary content such as the presentation
of UI controls and media files, may be unavailable. Depending on the
specific user engagement (or not) with the UI controls and the time
at which any such engagement occurs, specific web resources may be
rendered at defined times in a synchronized manner. For example,
a multimedia A/V clip along with subtitles corresponding to an
advertisement, and which were previously downloaded and cached
by the UA, are played out.
</p>
</section>
</section>
<section>
<h2>Related industry standards</h2>
<section>
<h3>MPEG-DASH</h3>
<p>
In MPEG DASH, events may be conveyed either as in-band events, e.g.,
as <code>emsg</code> boxes in ISO BMFF files, or out-of-band, via an
EventStream fragment in the MPD (Media Presentation Description)
document (i.e., by an instance of the <code>EventStream</code> child
of the <code>MPD.Period</code> element).
In addition, the MPD document may advertise the presence of
<code>emsg</code> events in the ISO BMFF content for given schemas.
</p>
<p>
An <code>emsg</code> event contains the following information,
as specified in [[MPEGDASH]], section 5.10.3.3:
</p>
<ul>
<li><code>scheme_id_uri</code> — A URI that identifies
the message scheme</li>
<li><code>value</code> — The event value (string)</li>
<li><code>timescale</code> — Timescale units, in ticks
per second</li>
<li><code>presentation_time_delta</code> — Presentation
time delta (with respect to the media segment),
in <code>timescale</code> units</li>
<li><code>event_duration</code> — Event duration,
in <code>timescale</code> units</li>
<li><code>id</code> — Event message identifier</li>
<li><code>message_data</code> — Message body (may be empty)</li>
</ul>
<p>
The presence of <code>emsg</code> events in the media stream is signalled
in the DASH manifest document (MPD), using an <code>EventStream</code> XML
element ([[MPEGDASH]], section 5.10.2).
</p>
</section>
<section>
<h3>HbbTV</h3>
<p>
HbbTV includes support for <code>emsg</code> events ([[DVB-DASH]],
section 9.1) and requires this be mapped to <code>DataCue</code>
([[HBBTV]], section 9.3.2). The HbbTV device test suite includes
test pages and streams for this ([[HBBTV-TESTS]]). This feature is
included in user agents shipping in connected TVs across Europe from
2017. HbbTV has a <a href="https://github.com/HbbTV-Association/ReferenceApplication">reference app</a>
and content for DASH+DRM which includes <code>emsg</code> support.
As well as HbbTV devices, this reference app and content have been
tested on Microsoft Edge with MSE and EME although <code>emsg</code>
support does not work there.
</p>
</section>
<section>
<h3>DASH Industry Forum APIs for Interactivity</h3>
<p>
The DASH-IF InterOp Working Group has an ongoing work item,
<em>DAInty</em>, "DASH APIs for Interactivity", which aims to
specify a set of APIs between the DASH client/player and interactivity-capable
applications, for both web and native applications. The origin of this
work is a related 3GPP work item on Service Interactivity
([[3GPP-INTERACTIVITY-WID]], [[3GPP-INTERACTIVITY-TR]]).
The objective is to provide service enablers for user engagement with
auxiliary content and UIs on mobile device during live or time-shifted
viewing of streaming content delivered over 3GPP broadcast or unicast
bearers, and the measurement and reporting of such interactive consumption.
</p>
<p>
Two APIs are being developed that are relevant to the scope of the present
document:
</p>
<ul>
<li>
Application subscription/DASH client dispatch of DASH event stream
messages containing interactivity information. Events can be delivered
in-band (<code>emsg</code>) and/or as MPD events.
</li>
<li>
Application subscription/DASH client dispatch of ISO BMFF Timed
Metadata tracks providing similar functionality to DASH event streams.
</li>
</ul>
<p>
Two modes for dispatching events are defined. In Mode 1, events are dispatched
at the time the event arrives, and in Mode 2, events are dispatched at the
given time on the media timeline. The "arrival" of events from the DASH client
perspective may be either static or pre-provisioned, in the case MPD Events,
or dynamic in the case of inband events carried in the <code>emsg</code>.
The application can register with the DASH client which Mode to use.
</p>
<p>
Reference: M&E IG, Media Timed Events Task Force call 20 Aug 2018:
<a href="https://www.w3.org/2018/08/20-me-minutes.html">Minutes</a>.
</p>
</section>
<section>
<h3>BBC Subtitle Guidelines</h3>
<p>
The BBC Subtitle Guidelines ([[BBC-SUBTITLES]]) describe best practice
for authoring subtitles or captions. In particular, the guidelines state:
</p>
<blockquote>
<p>
<b>5.2 Match subtitle to pace of speaking</b>
</p>
<p>
The subtitles should match the pace of speaking as closely
as possible. Ideally, when the speaker is in shot, your subtitles
should not anticipate speech by more than 1.5 seconds or hang up on
the screen for more than 1.5 seconds after speech has stopped.
</p>
<p>
<b>6.1 Match subtitles to shot</b>
</p>
<p>
It is likely to be less tiring for the viewer if shot changes and
subtitle changes occur at the same time. Many subtitles therefore
start on the first frame of the shot and end on the last frame.
</p>
<p>
<b>6.2 Maintain a minimum gap when mismatched</b>
</p>
<p>
If you have to let a subtitle hang over a shot change, do not remove
it too soon after the cut. The duration of the overhang will depend
on the content.
</p>
<p>
<b>6.3 Avoid straddling shot changes</b>
</p>
<p>
Avoid creating subtitles that straddle a shot change (i.e., a subtitle
that starts in the middle of shot one and ends in the middle of shot
two). To do this, you may need to split a sentence at an appropriate
point, or delay the start of a new sentence to coincide with the shot
change.
</p>
</blockquote>
<p>
To meet these requirements, the playback system must honour the provided
timings. Subtitles for video are typically authored against video at a
nominal frame rate, e.g., 25 frames per second (which corresponds to
40 milliseconds per frame). The actual video frame rate may be adjusted
dynamically according to the video encoding, but the subtitle timing
must remain the same ([[EBU-TT-D]], Annex E).
</p>
</section>
<section>
<h3>SCTE Media Splicing Requirements</h3>
<p>
The Society for Cable and Televison Engineers (SCTE) has produced the
SCTE-35 specification "Digital Program Insertion Cueing for Cable"
[[SCTE-35]]. In it, the requirements for splicing (Sec. 9.1) are "In
order to give advance warning of the impending splice (a pre-roll
function), the splice_insert() command could be sent multiple times
before the splice point. For example, the splice_insert() command
could be sent at 8, 5, 4 and 2 seconds prior to the packet containing
the related splice point. In order to meet other splicing deadlines in
the system, any message received with less than 4 seconds of advance
notice may not create the desired result."
</p>
<p>
This places an implicit requirement on the user agent in handling
media-timed events related to insertion cues. The content originator
may provide the cue in advance with as little as 2 seconds of the
insertion time. Therefore the propagation of the event data associated
with the insertion cue to the application by the user agent should be
considerably less than 2 seconds.
</p>
</section>
<section>
<h3>MPEG Working Draft on Carriage of Web Resources in ISO BMFF</h3>
<p>
[[WEB-ISOBMFF]] is a draft document that specifies the use of ISO BMFF
tools for the storage and delivery of web data. The specified storage
is designed to enable enriching audio/video content, as well as
audio-only content, with synchronized, animated, interactive web data,
including overlays.
</p>
</section>
<section>
<h3>WebVTT</h3>
<p>
[[WEBVTT]] is a W3C specification that provides a format for web video
text tracks. A <code>VTTCue</code> is a text track cue, and may have
attributes that affect rendering of the cue text on a web page.
WebVTT metadata cues are text that is time-aligned.
</p>
</section>
</section>
<section>
<h2>Gap analysis</h2>
<p>
This section describes gaps in existing existing web platform
capabilities needed to support the use cases and requirements described
in this document. Where applicable, this section also describes how
existing web platform features can be used as workarounds, and any
associated limitations.
</p>
<section>
<h3>Synchronized event triggering</h3>
<section>
<h4>DASH and ISO BMFF emsg events</h4>
<p>
The <code>DataCue</code> API has been previously discussed as a means to
deliver in-band event data to web applications, but this is not implemented
in all of the main browser engines. It is <a href="https://www.w3.org/TR/2018/WD-html53-20181018/semantics-embedded-content.html#text-tracks-exposing-inband-metadata">included</a>
in the 18 October 2018 HTML 5.3 draft [[HTML53-20181018]], but is
<a href="https://html.spec.whatwg.org/multipage/media.html#timed-text-tracks">not included</a>
in [[HTML]]. See discussion <a href="https://groups.google.com/a/chromium.org/forum/#!topic/blink-dev/U06zrT2N-Xk">here</a>
and notes on implementation status <a href="https://lists.w3.org/Archives/Public/public-html/2016Apr/0005.html">here</a>.
</p>
<p>
WebKit <a href="https://discourse.wicg.io/t/media-timed-events-api-for-mpeg-dash-mpd-and-emsg-events/3096/2">supports</a>
a <code>DataCue</code> interface that extends HTML5 <code>DataCue</code>
with two attributes to support non-text metadata, <code>type</code> and
<code>value</code>.
</p>
<pre class="example">
interface DataCue : TextTrackCue {
attribute ArrayBuffer data; // Always empty
// Proposed extensions.
attribute any value;
readonly attribute DOMString type;
};
</pre>
<p>
<code>type</code> is a string identifying the type of metadata:
</p>
<table class="simple">
<thead>
<tr>
<th colspan="2">WebKit <code>DataCue</code> metadata types</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>"com.apple.quicktime.udta"</code></td>
<td>QuickTime User Data</td>
</tr>
<tr>
<td><code>"com.apple.quicktime.mdta"</code></td>
<td>QuickTime Metadata</td>
</tr>
<tr>
<td><code>"com.apple.itunes"</code></td>
<td>iTunes metadata</td>
</tr>
<tr>
<td><code>"org.mp4ra"</code></td>
<td>MPEG-4 metadata</td>
</tr>
<tr>
<td><code>"org.id3"</code></td>
<td>ID3 metadata</td>
</tr>
</tbody>
</table>
<p>
and <code>value</code> is an object with the metadata item key, data, and optionally a locale:
</p>
<pre class="example">
value = {
key: String
data: String | Number | Array | ArrayBuffer | Object
locale: String
}
</pre>
<p>
Neither [[MSE-BYTE-STREAM-FORMAT-ISOBMFF]] nor [[INBANDTRACKS]] describe
handling of <code>emsg</code> boxes.
</p>
<p>
On resource constrained devices such as smart TVs and streaming sticks,
parsing media segments to extract event information leads to a significant
performance penalty, which can have an impact on UI rendering updates if
this is done on the UI thread. There can also be an impact on the battery
life of mobile devices. Given that the media segments will be parsed anyway
by the user agent, parsing in JavaScript is an expensive overhead that
could be avoided.
</p>
<p>
[[HBBTV]] section 9.3.2 describes a mapping between the <code>emsg</code>
fields described <a href="#mpeg-dash">above</a>
and the <a href="https://html.spec.whatwg.org/multipage/media.html#texttrack"><code>TextTrack</code></a>
and <a href="https://www.w3.org/TR/2018/WD-html53-20180426/semantics-embedded-content.html#datacue"><code>DataCue</code></a>
APIs. A <code>TextTrack</code> instance is created for each event
stream signalled in the MPD document (as identified by the
<code>schemeIdUri</code> and <code>value</code>), and the
<a href="https://html.spec.whatwg.org/multipage/media.html#dom-texttrack-inbandmetadatatrackdispatchtype"><code>inBandMetadataTrackDispatchType</code></a>
<code>TextTrack</code> attribute contains the <code>scheme_id_uri</code>
and <code>value</code> values. Because HbbTV devices include a native
DASH client, parsing of the MPD document and creation of the
<code>TextTrack</code>s is done by the UA.
</p>
<p class="ednote">
To support DASH clients implemented in web applications, there is
therefore either a need for an API that allows applications to tell
the UA which schemes it wants to receive, or the UA should simply
expose all event streams to applications. Which of these is preferred?
</p>
</section>
<section>
<h4>Synchronization and timing</h4>
<p>
The timing guarantees provided in HTML5 regarding the triggering of
<code>TextTrackCue</code> events may be not be enough to avoid
<a href="https://lists.w3.org/Archives/Public/public-inbandtracks/2013Dec/0004.html">events being missed</a>.
</p>
</section>
</section>
<section>
<h3>Synchronized rendering of web resources</h3>
<p class="ednote">
Describe gaps relating to synchronized rendering of web resources.
Can we define a generic web API for scheduling page changes
synchronized to playing media? Related: [[css-animations-1]],
[[web-animations-1]], [[css-transitions-1]]. See also:
<a href="https://github.com/bbc/VideoContext">https://github.com/bbc/VideoContext</a>.
Should this be in scope for the TF?
</p>
</section>
<section>
<h3>Rendering of web content embedded in media containers</h3>
<p>
There is no API for surfacing web content embedded in ISO BMFF
containers into the browser (e.g., the <code>HTMLCue</code> proposal
discussed at <a href="https://www.w3.org/wiki/TPAC2015/HTMLcue">TPAC 2015</a>).
</p>
<p class="ednote">
Add more detail on what's required. Some questions / considerations:
</p>
<ul class="ednote">
<li>Are the web resources intended to be handed to a web application
for rendering, or direct rendering by the UA?</li>
<li>How do we guarantee that resources are delivered to the browser
sufficiently ahead of time?</li>
<li>How does same-origin policy affect such resources?</li>
</ul>
</section>
</section>
<section>
<h2>Recommendations</h2>
<p>
This section describes recommendations from the Media & Entertainment
Interest Group for the development of a generic media timed event API.
</p>
<section>
<h3>Subscribing to event streams</h3>
<p>
The API should allow web applications to subscribe to receive specific
event types. For example, to support DASH <code>emsg</code> and MPD events,
the API should allow subscription by <code>id</code> and (optional) <code>value</code>.
This is to make receiving events opt-in from the application point of view.
The user agent should deliver only those events to a web application
for which the application has subscribed. The API should also allow web
applications to unsubscribe from specific event streams by event type.
</p>
</section>
<section>
<h3>Out-of-band events</h3>
<p>
To be able to handle out of band events, the API must allow web
applications to create events to be added to the media timeline,
to be triggered by the user agent. The API should allow the
web application to provide all necessary parameters to define
the event, including start and end times, event type, and data
payload. The payload should be any data type (e.g., the set of
types supported by the WebKit <code>DataCue</code>). For DASH MPD
events, the event type is defined by the <code>id</code> and
(optional) <code>value</code> fields.
</p>
</section>
<section>
<h3>Event triggering</h3>
<p>
For those events that the application has subscribed to receive,
the API must:
</p>
<ul>
<li>
Generate a JavaScript event when an in-band media timed event
is parsed from the media container or media stream (DAInty Mode 1)
</li>
<li>
Generate a JavaScript event when the current media playback
position reaches the start time of a media timed event during
playback (DAInty Mode 2). This applies equally to in-band events
that the user agent has extracted from the media container, and
out-of-band events added by the web application.
</li>
</ul>
<p>
The API must provide guarantees that no events can be missed during
linear playback of the media.
</p>
</section>
<section>
<h3>In-band event processing</h3>
<p>
We recommend updating [[INBANDTRACKS]] to describe handling of in-band media
timed events supported on the web platform, following a registry approach
with one specification per media format that describes the event details
for that format.
</p>
</section>
<section>
<h3>DASH events</h3>
<p>
We recommend that browser engines support DASH <code>emsg</code>
in-band events and MPD out-of-band events, as part of their support
for the MPEG Common Media Application Format (CMAF).
</p>
</section>
<section>
<h3>Synchronization</h3>
<p>
The <em>time marches on algorithm</em> should be reviewed and updated
to ensure that events are delivered to the web application within
time constraints described elsewhere in this report.
</p>
</section>
</section>
<section>
<h2>Acknowledgments</h2>
<p>
Thanks to Charles Lo, Nigel Megitt, Jon Piesing, and Rob Smith for their
contributions to this document.
</p>
</section>
</body>
</html>