Fix UB/crash in new SeqnoToTimeMapping::CopyFromSeqnoRange (facebook#…

…12293) Summary: After facebook#12253 this function has crashed in the crash test, in its call to `std::copy`. I haven't reproduced the crash directly, but `std::copy` probably has undefined behavior if the starting iterator is after the ending iterator, which was possible. I've fixed the logic to deal with that case and to add an assertion to check that precondition of `std::copy` (which appears can be unchecked by `std::copy` itself even with UBSAN+ASAN). Also added some unit tests etc. that were unfinished for facebook#12253, and slightly tweak SeqnoToTimeMapping::EnforceMaxTimeSpan handling of zero time span case. This is intended for patching 8.11. Pull Request resolved: facebook#12293 Test Plan: tests added. Will trigger ~20 runs of the crash test job that saw the crash. https://fburl.com/ci/5iiizvfa Reviewed By: jowlyzhang Differential Revision: D53090422 Pulled By: pdillinger fbshipit-source-id: 69d60b1847d9c7e4ae62b153011c2040405db461
jaykorean · Jan 25, 2024 · d895eb0 · d895eb0
1 parent 11d4ac8
commit d895eb0
Show file tree

Hide file tree

Showing 3 changed files with 215 additions and 11 deletions.
diff --git a/db/seqno_time_test.cc b/db/seqno_time_test.cc
@@ -1053,6 +1053,114 @@ TEST_F(SeqnoTimeTest, MappingAppend) {
   ASSERT_EQ(test.TEST_GetLastEntry(), P({11, 250}));
 }
 
+TEST_F(SeqnoTimeTest, CapacityLimits) {
+  using P = SeqnoToTimeMapping::SeqnoTimePair;
+  SeqnoToTimeMapping test;
+
+  test.SetCapacity(3);
+  EXPECT_TRUE(test.Append(10, 300));
+  EXPECT_TRUE(test.Append(20, 400));
+  EXPECT_TRUE(test.Append(30, 500));
+  EXPECT_TRUE(test.Append(40, 600));
+  // Capacity 3 is small enough that the non-strict limit is
+  // equal to the strict limit.
+  EXPECT_EQ(3U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({40, 600}));
+
+  // Same for Capacity 2
+  test.SetCapacity(2);
+  EXPECT_EQ(2U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({40, 600}));
+
+  EXPECT_TRUE(test.Append(50, 700));
+  EXPECT_EQ(2U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({50, 700}));
+
+  // Capacity 1 is difficult to work with internally, so is
+  // coerced to 2.
+  test.SetCapacity(1);
+  EXPECT_EQ(2U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({50, 700}));
+
+  EXPECT_TRUE(test.Append(60, 800));
+  EXPECT_EQ(2U, test.Size());
+  EXPECT_EQ(test.TEST_GetLastEntry(), P({60, 800}));
+
+  // Capacity 0 means throw everything away
+  test.SetCapacity(0);
+  EXPECT_EQ(0U, test.Size());
+
+  EXPECT_FALSE(test.Append(70, 900));
+  EXPECT_EQ(0U, test.Size());
+
+  // Unlimited capacity
+  test.SetCapacity(UINT64_MAX);
+  for (unsigned i = 1; i <= 10101U; i++) {
+    EXPECT_TRUE(test.Append(i, 11U * i));
+  }
+  EXPECT_EQ(10101U, test.Size());
+}
+
+TEST_F(SeqnoTimeTest, TimeSpanLimits) {
+  SeqnoToTimeMapping test;
+
+  // Default: no limit
+  for (unsigned i = 1; i <= 63U; i++) {
+    EXPECT_TRUE(test.Append(1000 + i, uint64_t{1} << i));
+  }
+  // None dropped.
+  EXPECT_EQ(63U, test.Size());
+
+  test.Clear();
+
+  // Explicit no limit
+  test.SetMaxTimeSpan(UINT64_MAX);
+  for (unsigned i = 1; i <= 63U; i++) {
+    EXPECT_TRUE(test.Append(1000 + i, uint64_t{1} << i));
+  }
+  // None dropped.
+  EXPECT_EQ(63U, test.Size());
+
+  // We generally keep 2 entries as long as the configured max time span
+  // is non-zero
+  test.SetMaxTimeSpan(10);
+  EXPECT_EQ(2U, test.Size());
+
+  test.SetMaxTimeSpan(1);
+  EXPECT_EQ(2U, test.Size());
+
+  // But go down to 1 entry if the max time span is zero
+  test.SetMaxTimeSpan(0);
+  EXPECT_EQ(1U, test.Size());
+
+  EXPECT_TRUE(test.Append(2000, (uint64_t{1} << 63) + 42U));
+  EXPECT_EQ(1U, test.Size());
+
+  test.Clear();
+
+  // Test more typical behavior. Note that one entry at or beyond the max span
+  // is kept.
+  test.SetMaxTimeSpan(100);
+  EXPECT_TRUE(test.Append(1001, 123));
+  EXPECT_TRUE(test.Append(1002, 134));
+  EXPECT_TRUE(test.Append(1003, 150));
+  EXPECT_TRUE(test.Append(1004, 189));
+  EXPECT_TRUE(test.Append(1005, 220));
+  EXPECT_EQ(5U, test.Size());
+  EXPECT_TRUE(test.Append(1006, 233));
+  EXPECT_EQ(6U, test.Size());
+  EXPECT_TRUE(test.Append(1007, 234));
+  EXPECT_EQ(6U, test.Size());
+  EXPECT_TRUE(test.Append(1008, 235));
+  EXPECT_EQ(7U, test.Size());
+  EXPECT_TRUE(test.Append(1009, 300));
+  EXPECT_EQ(6U, test.Size());
+  EXPECT_TRUE(test.Append(1010, 350));
+  EXPECT_EQ(3U, test.Size());
+  EXPECT_TRUE(test.Append(1011, 470));
+  EXPECT_EQ(2U, test.Size());
+}
+
 TEST_F(SeqnoTimeTest, ProximalFunctions) {
   SeqnoToTimeMapping test;
   test.SetCapacity(10);
@@ -1202,6 +1310,93 @@ TEST_F(SeqnoTimeTest, PrePopulate) {
   }
 }
 
+TEST_F(SeqnoTimeTest, CopyFromSeqnoRange) {
+  SeqnoToTimeMapping test_from;
+  SeqnoToTimeMapping test_to;
+
+  // With zero to draw from
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 100, 100);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  // With one to draw from
+  EXPECT_TRUE(test_from.Append(10, 500));
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  // Includes one entry before range
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 100, 100);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  // Includes one entry before range (even if somewhat nonsensical)
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 9);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 10);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  // With more to draw from
+  EXPECT_TRUE(test_from.Append(20, 600));
+  EXPECT_TRUE(test_from.Append(30, 700));
+  EXPECT_TRUE(test_from.Append(40, 800));
+  EXPECT_TRUE(test_from.Append(50, 900));
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
+  EXPECT_EQ(test_to.Size(), 5U);
+
+  // Includes one entry before range
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 100, 100);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 19, 19);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  // Includes one entry before range (even if somewhat nonsensical)
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 9);
+  EXPECT_EQ(test_to.Size(), 0U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 0, 10);
+  EXPECT_EQ(test_to.Size(), 1U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 20, 20);
+  EXPECT_EQ(test_to.Size(), 2U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 20, 29);
+  EXPECT_EQ(test_to.Size(), 2U);
+
+  test_to.Clear();
+  test_to.CopyFromSeqnoRange(test_from, 20, 30);
+  EXPECT_EQ(test_to.Size(), 3U);
+}
+
 TEST_F(SeqnoTimeTest, EnforceWithNow) {
   constexpr uint64_t kMaxTimeSpan = 420;
   SeqnoToTimeMapping test;

diff --git a/db/seqno_to_time_mapping.cc b/db/seqno_to_time_mapping.cc
@@ -72,20 +72,16 @@ SequenceNumber SeqnoToTimeMapping::GetProximalSeqnoBeforeTime(
 void SeqnoToTimeMapping::EnforceMaxTimeSpan(uint64_t now) {
   assert(enforced_);  // at least sorted
   uint64_t cutoff_time;
+  if (pairs_.size() <= 1) {
+    return;
+  }
   if (now > 0) {
-    if (pairs_.size() <= 1) {
-      return;
-    }
     if (now < max_time_span_) {
       // Nothing eligible to prune / avoid underflow
       return;
     }
     cutoff_time = now - max_time_span_;
   } else {
-    if (pairs_.size() <= 2) {
-      // Need to keep at least two if we don't know the current time
-      return;
-    }
     const auto& last = pairs_.back();
     if (last.time < max_time_span_) {
       // Nothing eligible to prune / avoid underflow
@@ -108,7 +104,6 @@ void SeqnoToTimeMapping::EnforceCapacity(bool strict) {
     return;
   }
   // Treat cap of 1 as 2 to work with the below algorithm (etc.)
-  // TODO: unit test
   if (strict_cap == 1) {
     strict_cap = 2;
   }
@@ -400,12 +395,16 @@ void SeqnoToTimeMapping::CopyFromSeqnoRange(const SeqnoToTimeMapping& src,
                                             SequenceNumber to_seqno) {
   bool orig_empty = Empty();
   auto src_it = src.FindGreaterEqSeqno(from_seqno);
+  // Allow nonsensical ranges like [1000, 0] which might show up e.g. for
+  // an SST file with no entries.
+  auto src_it_end =
+      to_seqno < from_seqno ? src_it : src.FindGreaterSeqno(to_seqno);
   // To best answer GetProximalTimeBeforeSeqno(from_seqno) we need an entry
   // with a seqno before that (if available)
   if (src_it != src.pairs_.begin()) {
     --src_it;
   }
-  auto src_it_end = src.FindGreaterSeqno(to_seqno);
+  assert(src_it <= src_it_end);
   std::copy(src_it, src_it_end, std::back_inserter(pairs_));
 
   if (!orig_empty || max_time_span_ < UINT64_MAX || capacity_ < UINT64_MAX) {
@@ -420,6 +419,7 @@ bool SeqnoToTimeMapping::Append(SequenceNumber seqno, uint64_t time) {
   bool added = false;
   if (seqno == 0) {
     // skip seq number 0, which may have special meaning, like zeroed out data
+    // TODO: consider changing?
   } else if (pairs_.empty()) {
     enforced_ = true;
     pairs_.push_back({seqno, time});

diff --git a/db/seqno_to_time_mapping.h b/db/seqno_to_time_mapping.h
@@ -125,12 +125,13 @@ class SeqnoToTimeMapping {
   // under enforcement mode, the structure will maintian only one entry older
   // than the newest entry time minus max_time_span, so that
   // GetProximalSeqnoBeforeTime queries back to that time return a good result.
-  // UINT64_MAX == unlimited. Returns *this.
+  // UINT64_MAX == unlimited. 0 == retain just one latest entry. Returns *this.
   SeqnoToTimeMapping& SetMaxTimeSpan(uint64_t max_time_span);
 
   // Set the nominal capacity under enforcement mode. The structure is allowed
   // to grow some reasonable fraction larger but will automatically compact
-  // down to this size. UINT64_MAX == unlimited. Returns *this.
+  // down to this size. UINT64_MAX == unlimited. 0 == retain nothing.
+  // Returns *this.
   SeqnoToTimeMapping& SetCapacity(uint64_t capacity);
 
   // ==== Modifiers, enforced ==== //
@@ -243,6 +244,14 @@ class SeqnoToTimeMapping {
 
   std::deque<SeqnoTimePair> pairs_;
 
+  // Whether this object is in the "enforced" state. Between calls to public
+  // functions, enforced_==true means that
+  // * `pairs_` is sorted
+  // * The capacity limit (non-strict) is met
+  // * The time span limit is met
+  // However, some places within the implementation (Append()) will temporarily
+  // violate those last two conditions while enforced_==true. See also the
+  // Enforce*() and Sort*() private functions below.
   bool enforced_ = true;
 
   void EnforceMaxTimeSpan(uint64_t now = 0);