Don't default to UseDTS for mpegts

Presentation timestamps seem to work fine for most files in these containers (and replacing PTS with DTS can break predictions) so wait until we encounter a packet with no PTS (which does happen sometimes) before reverting to UseDTS. On the other hand, seeking by PTS is unreliable in mpegts (since it's just lavf's generic internal seek function), so default to SeekByPos there. I don't have an mpegtsraw test file on hand but as far as I can see that should get the same treatment. For nuv I don't really know, so I'll leave it as it is.
FFMS · Apr 8, 2024 · c3e931d · c3e931d
1 parent 96f4267
commit c3e931d
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 10 deletions.
diff --git a/src/core/indexing.cpp b/src/core/indexing.cpp
@@ -71,6 +71,10 @@ void FFMS_Index::Finalize(std::vector<SharedAVContext> const& video_contexts, co
     for (size_t i = 0, end = size(); i != end; ++i) {
         FFMS_Track& track = (*this)[i];
 
+        if (!strcmp(Format, "mpeg") || !strcmp(Format, "mpegts") || !strcmp(Format, "mpegtsraw"))
+            if (std::any_of(track.begin(), track.end(), [](FrameInfo F) { return F.PTS == AV_NOPTS_VALUE; }))
+                track.RevertToDTS();
+
         // Some audio tracks are simply insane junk (seen with als) and will have a single(?) super long packet and
         // apart from that look legit and be chosen instead of usable audio. This hopefully rejects some of it.
         // Caused by sample in https://github.com/FFMS/ffms2/issues/351
@@ -85,6 +89,7 @@ void FFMS_Index::Finalize(std::vector<SharedAVContext> const& video_contexts, co
         // but may also have valid, split packets, with pos equal to the previous pos.
         if (video_contexts[i].CodecContext && video_contexts[i].CodecContext->codec_id == AV_CODEC_ID_H264 && !!strcmp(Format, "asf"))
             track.MaybeHideFrames();
+
         track.FinalizeTrack();
 
         if (track.TT != FFMS_TYPE_VIDEO) continue;
@@ -430,7 +435,8 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
     std::vector<SharedAVContext> AVContexts(FormatContext->nb_streams);
 
     auto TrackIndices = std::unique_ptr<FFMS_Index>(new FFMS_Index(Filesize, Digest, ErrorHandling, LAVFOpts));
-    bool UseDTS = !strcmp(FormatContext->iformat->name, "mpeg") || !strcmp(FormatContext->iformat->name, "mpegts") || !strcmp(FormatContext->iformat->name, "mpegtsraw") || !strcmp(FormatContext->iformat->name, "nuv");
+    bool UseDTS = !strcmp(FormatContext->iformat->name, "nuv");
+    bool IsMpegLike = !strcmp(FormatContext->iformat->name, "mpeg") || !strcmp(FormatContext->iformat->name, "mpegts") || !strcmp(FormatContext->iformat->name, "mpegtsraw");
 
     for (unsigned int i = 0; i < FormatContext->nb_streams; i++) {
         TrackIndices->emplace_back((int64_t)FormatContext->streams[i]->time_base.num * 1000,
@@ -527,7 +533,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
 
         if (FormatContext->streams[Track]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
             int64_t PTS = TrackInfo.UseDTS ? Packet->dts : Packet->pts;
-            if (PTS == AV_NOPTS_VALUE) {
+            if (PTS == AV_NOPTS_VALUE && !IsMpegLike) {
                 // VPx alt-refs are output as packets which lack timestmps or durations, since
                 // they are invisible. Currently, the timestamp mangling code in libavformat
                 // will sometimes add a bogus timestamp and duration, if the webm in question
@@ -558,7 +564,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
             bool SecondField = false;
             ParseVideoPacket(AVContexts[Track], Packet, &RepeatPict, &FrameType, &Invisible, &SecondField, &LastPicStruct);
 
-            TrackInfo.AddVideoFrame(PTS, RepeatPict, KeyFrame,
+            TrackInfo.AddVideoFrame(PTS, Packet->dts, RepeatPict, KeyFrame,
                 FrameType, Packet->pos, Invisible, SecondField);
         } else if (FormatContext->streams[Track]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
             // For video seeking timestamps are used only if all packets have
@@ -571,7 +577,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
             uint32_t SampleCount = IndexAudioPacket(Track, Packet, AVContexts[Track], *TrackIndices);
             TrackInfo.SampleRate = AVContexts[Track].CodecContext->sample_rate;
 
-            TrackInfo.AddAudioFrame(LastValidTS[Track],
+            TrackInfo.AddAudioFrame(LastValidTS[Track], Packet->dts,
                 StartSample, SampleCount, KeyFrame, Packet->pos, Packet->flags & AV_PKT_FLAG_DISCARD);
         }
 

diff --git a/src/core/track.cpp b/src/core/track.cpp
@@ -131,14 +131,14 @@ void FFMS_Track::Write(ZipFile &stream) const {
         WriteFrame(stream, Frames[i], i == 0 ? temp : Frames[i - 1], TT);
 }
 
-void FFMS_Track::AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos, bool MarkedHidden, bool SecondField) {
-    Data->Frames.push_back({ PTS, 0, FilePos, 0, 0, 0, 0, FrameType, RepeatPict, KeyFrame, MarkedHidden, SecondField });
+void FFMS_Track::AddVideoFrame(int64_t PTS, int64_t DTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos, bool MarkedHidden, bool SecondField) {
+    Data->Frames.push_back({ PTS, 0, FilePos, 0, 0, 0, 0, FrameType, RepeatPict, KeyFrame, MarkedHidden, SecondField, DTS });
 }
 
-void FFMS_Track::AddAudioFrame(int64_t PTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos, bool MarkedHidden) {
+void FFMS_Track::AddAudioFrame(int64_t PTS, int64_t DTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos, bool MarkedHidden) {
     if (SampleCount > 0) {
         Data->Frames.push_back({ PTS, 0, FilePos, SampleStart, SampleCount,
-            0, 0, 0, 0, KeyFrame, MarkedHidden, false });
+            0, 0, 0, 0, KeyFrame, MarkedHidden, false, DTS });
     }
 }
 
@@ -248,6 +248,14 @@ void FFMS_Track::MaybeReorderFrames() {
     }
 }
 
+void FFMS_Track::RevertToDTS() {
+    frame_vec &Frames = Data->Frames;
+    for (size_t i = 0; i < size(); ++i)
+        Frames[i].PTS = Frames[i].DTS;
+
+    UseDTS = true;
+}
+
 void FFMS_Track::MaybeHideFrames() {
     frame_vec &Frames = Data->Frames;
     // Awful handling for interlaced H.264: each frame is output twice, so hide

diff --git a/src/core/track.h b/src/core/track.h
@@ -43,6 +43,8 @@ struct FrameInfo {
     bool MarkedHidden;
     bool SecondField;
 
+    int64_t DTS;        // Only used during indexing and not stored in the index file. (If UseDTS is true, the PTS values will be DTS)
+
     // If true, no frame corresponding to this packet will be output
     constexpr bool Skipped() const { return MarkedHidden || SecondField; }
 };
@@ -71,9 +73,10 @@ struct FFMS_Track {
     int64_t LastDuration = 0;
     int SampleRate = 0; // not persisted
 
-    void AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos = 0, bool Invisible = false, bool SecondField = false);
-    void AddAudioFrame(int64_t PTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos = 0, bool Invisible = false);
+    void AddVideoFrame(int64_t PTS, int64_t DTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos = 0, bool Invisible = false, bool SecondField = false);
+    void AddAudioFrame(int64_t PTS, int64_t DTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos = 0, bool Invisible = false);
 
+    void RevertToDTS();
     void MaybeHideFrames();
     void FinalizeTrack();
     void FillAudioGaps();

diff --git a/src/core/videosource.cpp b/src/core/videosource.cpp
@@ -279,6 +279,8 @@ FFMS_VideoSource::FFMS_VideoSource(const char *SourceFile, FFMS_Index &Index, in
                 Delay.ThreadDelay = CodecContext->thread_count - 1;
         }
 
+        SeekByPos = !strcmp(FormatContext->iformat->name, "mpeg") || !strcmp(FormatContext->iformat->name, "mpegts") || !strcmp(FormatContext->iformat->name, "mpegtsraw");
+
         // Always try to decode a frame to make sure all required parameters are known
         int64_t DummyPTS = 0, DummyPos = 0;
         DecodeNextFrame(DummyPTS, DummyPos);