Skip to content

Commit

Permalink
Merge branch 'main' into verify_flushed_data_recovery
Browse files Browse the repository at this point in the history
  • Loading branch information
hx235 authored Jun 24, 2024
2 parents 4be510e + 56f7ef5 commit 4fabe2b
Show file tree
Hide file tree
Showing 43 changed files with 297 additions and 182 deletions.
20 changes: 16 additions & 4 deletions .github/workflows/pr-jobs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ jobs:
container:
image: zjay437/rocksdb:0.6
options: --shm-size=16gb
env:
CC: gcc-10
CXX: g++-10
steps:
- uses: actions/checkout@v4.1.0
- uses: "./.github/actions/pre-steps"
Expand All @@ -104,38 +107,47 @@ jobs:
container:
image: zjay437/rocksdb:0.6
options: --shm-size=16gb
env:
CC: gcc-10
CXX: g++-10
steps:
- uses: actions/checkout@v4.1.0
- uses: "./.github/actions/pre-steps"
- uses: "./.github/actions/setup-folly"
- run: "(mkdir build && cd build && cmake -DUSE_FOLLY_LITE=1 -DWITH_GFLAGS=1 .. && make V=1 -j20)"
- uses: "./.github/actions/post-steps"
build-linux-gcc-7-with-folly:
build-linux-make-with-folly:
if: ${{ github.repository_owner == 'facebook' }}
runs-on:
labels: 16-core-ubuntu
container:
image: zjay437/rocksdb:0.6
options: --shm-size=16gb
env:
CC: gcc-10
CXX: g++-10
steps:
- uses: actions/checkout@v4.1.0
- uses: "./.github/actions/pre-steps"
- uses: "./.github/actions/setup-folly"
- uses: "./.github/actions/build-folly"
- run: USE_FOLLY=1 LIB_MODE=static CC=gcc-7 CXX=g++-7 V=1 make -j32 check
- run: USE_FOLLY=1 LIB_MODE=static V=1 make -j32 check
- uses: "./.github/actions/post-steps"
build-linux-gcc-7-with-folly-lite-no-test:
build-linux-make-with-folly-lite-no-test:
if: ${{ github.repository_owner == 'facebook' }}
runs-on:
labels: 16-core-ubuntu
container:
image: zjay437/rocksdb:0.6
options: --shm-size=16gb
env:
CC: gcc-10
CXX: g++-10
steps:
- uses: actions/checkout@v4.1.0
- uses: "./.github/actions/pre-steps"
- uses: "./.github/actions/setup-folly"
- run: USE_FOLLY_LITE=1 CC=gcc-7 CXX=g++-7 V=1 make -j32 all
- run: USE_FOLLY_LITE=1 V=1 make -j32 all
- uses: "./.github/actions/post-steps"
build-linux-cmake-with-folly-coroutines:
if: ${{ github.repository_owner == 'facebook' }}
Expand Down
9 changes: 8 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ if(USE_FOLLY)
FMT_INST_PATH)
exec_program(ls ARGS -d ${FOLLY_INST_PATH}/../gflags* OUTPUT_VARIABLE
GFLAGS_INST_PATH)
set(Boost_DIR ${BOOST_INST_PATH}/lib/cmake/Boost-1.78.0)
set(Boost_DIR ${BOOST_INST_PATH}/lib/cmake/Boost-1.83.0)
if(EXISTS ${FMT_INST_PATH}/lib64)
set(fmt_DIR ${FMT_INST_PATH}/lib64/cmake/fmt)
else()
Expand Down Expand Up @@ -1052,13 +1052,20 @@ if(USE_FOLLY_LITE)
list(APPEND SOURCES
third-party/folly/folly/container/detail/F14Table.cpp
third-party/folly/folly/detail/Futex.cpp
third-party/folly/folly/lang/Exception.cpp
third-party/folly/folly/lang/SafeAssert.cpp
third-party/folly/folly/lang/ToAscii.cpp
third-party/folly/folly/ScopeGuard.cpp
third-party/folly/folly/synchronization/AtomicNotification.cpp
third-party/folly/folly/synchronization/DistributedMutex.cpp
third-party/folly/folly/synchronization/ParkingLot.cpp)
include_directories(${PROJECT_SOURCE_DIR}/third-party/folly)
exec_program(python3 ${PROJECT_SOURCE_DIR}/third-party/folly ARGS
build/fbcode_builder/getdeps.py show-source-dir boost OUTPUT_VARIABLE
BOOST_SOURCE_PATH)
exec_program(ls ARGS -d ${BOOST_SOURCE_PATH}/boost* OUTPUT_VARIABLE
BOOST_INCLUDE_DIR)
include_directories(${BOOST_INCLUDE_DIR})
add_definitions(-DUSE_FOLLY -DFOLLY_NO_CONFIG)
list(APPEND THIRDPARTY_LIBS glog)
endif()
Expand Down
25 changes: 25 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
# Rocksdb Change Log
> NOTE: Entries for next release do not go here. Follow instructions in `unreleased_history/README.txt`
## 9.4.0 (06/23/2024)
### New Features
* Added a `CompactForTieringCollectorFactory` to auto trigger compaction for tiering use case.
* Optimistic transactions and pessimistic transactions with the WriteCommitted policy now support the `GetEntityForUpdate` API.
* Added a new "count" command to the ldb repl shell. By default, it prints a count of keys in the database from start to end. The options --from=<key> and/or --to=<key> can be specified to limit the range.
* Add `rocksdb_writebatch_update_timestamps`, `rocksdb_writebatch_wi_update_timestamps` in C API.
* Add `rocksdb_iter_refresh` in C API.
* Add `rocksdb_writebatch_create_with_params`, `rocksdb_writebatch_wi_create_with_params` to create WB and WBWI with all options in C API

### Public API Changes
* Deprecated names `LogFile` and `VectorLogPtr` in favor of new names `WalFile` and `VectorWalPtr`.
* Introduce a new universal compaction option CompactionOptionsUniversal::max_read_amp which allows user to define the limit on the number of sorted runs separately from the trigger for compaction (`level0_file_num_compaction_trigger`) #12477.

### Behavior Changes
* Inactive WALs are immediately closed upon being fully sync-ed rather than in a background thread. This is to ensure LinkFile() is not called on files still open for write, which might not be supported by some FileSystem implementations. This should not be a performance issue, but an opt-out is available with with new DB option `background_close_inactive_wals`.

### Bug Fixes
* Fix a rare case in which a hard-linked WAL in a Checkpoint is not fully synced (so might lose data on power loss).
* Fixed the output of the `ldb dump_wal` command for `PutEntity` records so it prints the key and correctly resets the hexadecimal formatting flag after printing the wide-column entity.
* Fixed an issue where `PutEntity` records were handled incorrectly while rebuilding transactions during recovery.
* Various read operations could ignore various ReadOptions that might be relevant. Fixed many such cases, which can result in behavior change but a better reflection of specified options.

### Performance Improvements
* Improved write throughput to memtable when there's a large number of concurrent writers and allow_concurrent_memtable_write=true(#12545)

## 9.3.0 (05/17/2024)
### New Features
* Optimistic transactions and pessimistic transactions with the WriteCommitted policy now support the `GetEntity` API.
Expand Down
21 changes: 15 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ dummy := $(shell (export ROCKSDB_ROOT="$(CURDIR)"; \
export LIB_MODE="$(LIB_MODE)"; \
export ROCKSDB_CXX_STANDARD="$(ROCKSDB_CXX_STANDARD)"; \
export USE_FOLLY="$(USE_FOLLY)"; \
export USE_FOLLY_LITE="$(USE_FOLLY_LITE)"; \
"$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk"))
# this file is generated by the previous line to set build flags and sources
include make_config.mk
Expand Down Expand Up @@ -500,6 +501,17 @@ endif
ifeq ($(USE_FOLLY_LITE),1)
# Path to the Folly source code and include files
FOLLY_DIR = ./third-party/folly
ifneq ($(strip $(BOOST_SOURCE_PATH)),)
BOOST_INCLUDE = $(shell (ls -d $(BOOST_SOURCE_PATH)/boost*/))
# AIX: pre-defined system headers are surrounded by an extern "C" block
ifeq ($(PLATFORM), OS_AIX)
PLATFORM_CCFLAGS += -I$(BOOST_INCLUDE)
PLATFORM_CXXFLAGS += -I$(BOOST_INCLUDE)
else
PLATFORM_CCFLAGS += -isystem $(BOOST_INCLUDE)
PLATFORM_CXXFLAGS += -isystem $(BOOST_INCLUDE)
endif
endif # BOOST_SOURCE_PATH
# AIX: pre-defined system headers are surrounded by an extern "C" block
ifeq ($(PLATFORM), OS_AIX)
PLATFORM_CCFLAGS += -I$(FOLLY_DIR)
Expand Down Expand Up @@ -2472,14 +2484,13 @@ checkout_folly:
fi
@# Pin to a particular version for public CI, so that PR authors don't
@# need to worry about folly breaking our integration. Update periodically
cd third-party/folly && git reset --hard beacd86d63cd71c904632262e6c36f60874d78ba
@# A hack to remove boost dependency.
@# NOTE: this hack is only needed if building using USE_FOLLY_LITE
perl -pi -e 's/^(#include <boost)/\/\/$$1/' third-party/folly/folly/functional/Invoke.h
cd third-party/folly && git reset --hard c48fdd205c1c291651749d532b8055fe822bba25
@# NOTE: this hack is required for clang in some cases
perl -pi -e 's/int rv = syscall/int rv = (int)syscall/' third-party/folly/folly/detail/Futex.cpp
@# NOTE: this hack is required for gcc in some cases
perl -pi -e 's/(__has_include.<experimental.memory_resource>.)/__cpp_rtti && $$1/' third-party/folly/folly/memory/MemoryResource.h
@# NOTE: boost source will be needed for any build including `USE_FOLLY_LITE` builds as those depend on boost headers
cd third-party/folly && $(PYTHON) build/fbcode_builder/getdeps.py fetch boost

CXX_M_FLAGS = $(filter -m%, $(CXXFLAGS))

Expand All @@ -2491,8 +2502,6 @@ build_folly:
echo "Please run checkout_folly first"; \
false; \
fi
# Restore the original version of Invoke.h with boost dependency
cd third-party/folly && ${GIT_COMMAND} checkout folly/functional/Invoke.h
cd third-party/folly && \
CXXFLAGS=" $(CXX_M_FLAGS) -DHAVE_CXX11_ATOMIC " $(PYTHON) build/fbcode_builder/getdeps.py build --no-tests

Expand Down
8 changes: 7 additions & 1 deletion build_tools/build_detect_platform
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ EOF
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lbenchmark"
fi
fi
if test $USE_FOLLY; then
if test $USE_FOLLY || test $USE_FOLLY_LITE; then
# Test whether libfolly library is installed
$CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <folly/synchronization/DistributedMutex.h>
Expand Down Expand Up @@ -751,6 +751,11 @@ if [ "$USE_FOLLY" ]; then
FOLLY_PATH=`cd $FOLLY_DIR && $PYTHON build/fbcode_builder/getdeps.py show-inst-dir folly`
fi
fi
if [ "$USE_FOLLY_LITE" ]; then
if [ "$FOLLY_DIR" ]; then
BOOST_SOURCE_PATH=`cd $FOLLY_DIR && $PYTHON build/fbcode_builder/getdeps.py show-source-dir boost`
fi
fi

PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"
Expand Down Expand Up @@ -792,6 +797,7 @@ echo "PROFILING_FLAGS=$PROFILING_FLAGS" >> "$OUTPUT"
echo "FIND=$FIND" >> "$OUTPUT"
echo "WATCH=$WATCH" >> "$OUTPUT"
echo "FOLLY_PATH=$FOLLY_PATH" >> "$OUTPUT"
echo "BOOST_SOURCE_PATH=$BOOST_SOURCE_PATH" >> "$OUTPUT"

# This will enable some related identifiers for the preprocessor
if test -n "$JEMALLOC"; then
Expand Down
8 changes: 4 additions & 4 deletions db/arena_wrapped_db_iter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ Status ArenaWrappedDBIter::Refresh(const Snapshot* snapshot) {
reinit_internal_iter();
break;
} else {
delete *memtable_range_tombstone_iter_;
*memtable_range_tombstone_iter_ = new TruncatedRangeDelIterator(
std::unique_ptr<FragmentedRangeTombstoneIterator>(t),
&cfd->internal_comparator(), nullptr, nullptr);
*memtable_range_tombstone_iter_ =
std::make_unique<TruncatedRangeDelIterator>(
std::unique_ptr<FragmentedRangeTombstoneIterator>(t),
&cfd->internal_comparator(), nullptr, nullptr);
}
}
db_impl->ReturnAndCleanupSuperVersion(cfd, sv);
Expand Down
6 changes: 4 additions & 2 deletions db/arena_wrapped_db_iter.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ class ArenaWrappedDBIter : public Iterator {
db_iter_->SetIter(iter);
}

void SetMemtableRangetombstoneIter(TruncatedRangeDelIterator** iter) {
void SetMemtableRangetombstoneIter(
std::unique_ptr<TruncatedRangeDelIterator>* iter) {
memtable_range_tombstone_iter_ = iter;
}

Expand Down Expand Up @@ -110,7 +111,8 @@ class ArenaWrappedDBIter : public Iterator {
bool allow_refresh_ = true;
// If this is nullptr, it means the mutable memtable does not contain range
// tombstone when added under this DBIter.
TruncatedRangeDelIterator** memtable_range_tombstone_iter_ = nullptr;
std::unique_ptr<TruncatedRangeDelIterator>* memtable_range_tombstone_iter_ =
nullptr;
};

// Generate the arena wrapped iterator class.
Expand Down
8 changes: 4 additions & 4 deletions db/db_impl/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2064,19 +2064,19 @@ InternalIterator* DBImpl::NewInternalIterator(
read_options, super_version->GetSeqnoToTimeMapping(), arena);
Status s;
if (!read_options.ignore_range_deletions) {
TruncatedRangeDelIterator* mem_tombstone_iter = nullptr;
std::unique_ptr<TruncatedRangeDelIterator> mem_tombstone_iter;
auto range_del_iter = super_version->mem->NewRangeTombstoneIterator(
read_options, sequence, false /* immutable_memtable */);
if (range_del_iter == nullptr || range_del_iter->empty()) {
delete range_del_iter;
} else {
mem_tombstone_iter = new TruncatedRangeDelIterator(
mem_tombstone_iter = std::make_unique<TruncatedRangeDelIterator>(
std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
&cfd->ioptions()->internal_comparator, nullptr /* smallest */,
nullptr /* largest */);
}
merge_iter_builder.AddPointAndTombstoneIterator(mem_iter,
mem_tombstone_iter);
merge_iter_builder.AddPointAndTombstoneIterator(
mem_iter, std::move(mem_tombstone_iter));
} else {
merge_iter_builder.AddIterator(mem_iter);
}
Expand Down
15 changes: 8 additions & 7 deletions db/db_impl/db_impl_write.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2263,14 +2263,15 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
SequenceNumber seq = versions_->LastSequence();
new_mem = cfd->ConstructNewMemtable(mutable_cf_options, seq);
context->superversion_context.NewSuperVersion();

ROCKS_LOG_INFO(immutable_db_options_.info_log,
"[%s] New memtable created with log file: #%" PRIu64
". Immutable memtables: %d.\n",
cfd->GetName().c_str(), new_log_number, num_imm_unflushed);
// There should be no concurrent write as the thread is at the front of
// writer queue
cfd->mem()->ConstructFragmentedRangeTombstones();
}
ROCKS_LOG_INFO(immutable_db_options_.info_log,
"[%s] New memtable created with log file: #%" PRIu64
". Immutable memtables: %d.\n",
cfd->GetName().c_str(), new_log_number, num_imm_unflushed);
// There should be no concurrent write as the thread is at the front of
// writer queue
cfd->mem()->ConstructFragmentedRangeTombstones();

mutex_.Lock();
if (recycle_log_number != 0) {
Expand Down
9 changes: 6 additions & 3 deletions db/event_helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,18 @@ void EventHelpers::NotifyOnErrorRecoveryEnd(
InstrumentedMutex* db_mutex) {
if (!listeners.empty()) {
db_mutex->AssertHeld();
// Make copies before releasing mutex to avoid race.
Status old_bg_error_cp = old_bg_error;
Status new_bg_error_cp = new_bg_error;
// release lock while notifying events
db_mutex->Unlock();
TEST_SYNC_POINT("NotifyOnErrorRecoveryEnd:MutexUnlocked:1");
TEST_SYNC_POINT("NotifyOnErrorRecoveryEnd:MutexUnlocked:2");
for (auto& listener : listeners) {
BackgroundErrorRecoveryInfo info;
info.old_bg_error = old_bg_error;
info.new_bg_error = new_bg_error;
listener->OnErrorRecoveryCompleted(old_bg_error);
info.old_bg_error = old_bg_error_cp;
info.new_bg_error = new_bg_error_cp;
listener->OnErrorRecoveryCompleted(old_bg_error_cp);
listener->OnErrorRecoveryEnd(info);
info.old_bg_error.PermitUncheckedError();
info.new_bg_error.PermitUncheckedError();
Expand Down
5 changes: 3 additions & 2 deletions db/memtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -618,8 +618,9 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIteratorInternal(
}

void MemTable::ConstructFragmentedRangeTombstones() {
assert(!IsFragmentedRangeTombstonesConstructed(false));
// There should be no concurrent Construction
// There should be no concurrent Construction.
// We could also check fragmented_range_tombstone_list_ to avoid repeate
// constructions. We just construct them here again to be safe.
if (!is_range_del_table_empty_.load(std::memory_order_relaxed)) {
// TODO: plumb Env::IOActivity, Env::IOPriority
auto* unfragmented_iter = new MemTableIterator(
Expand Down
20 changes: 10 additions & 10 deletions db/memtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,21 +534,21 @@ class MemTable {
// Returns a heuristic flush decision
bool ShouldFlushNow();

// Updates `fragmented_range_tombstone_list_` that will be used to serve reads
// when this memtable becomes an immutable memtable (in some
// MemtableListVersion::memlist_). Should be called when this memtable is
// about to become immutable. May be called multiple times since
// SwitchMemtable() may fail.
void ConstructFragmentedRangeTombstones();

// Returns whether a fragmented range tombstone list is already constructed
// for this memtable. It should be constructed right before a memtable is
// added to an immutable memtable list. Note that if a memtable does not have
// any range tombstone, then no range tombstone list will ever be constructed.
// @param allow_empty Specifies whether a memtable with no range tombstone is
// considered to have its fragmented range tombstone list constructed.
bool IsFragmentedRangeTombstonesConstructed(bool allow_empty = true) const {
if (allow_empty) {
return fragmented_range_tombstone_list_.get() != nullptr ||
is_range_del_table_empty_;
} else {
return fragmented_range_tombstone_list_.get() != nullptr;
}
// any range tombstone, then no range tombstone list will ever be constructed
// and true is returned in that case.
bool IsFragmentedRangeTombstonesConstructed() const {
return fragmented_range_tombstone_list_.get() != nullptr ||
is_range_del_table_empty_;
}

// Get the newest user-defined timestamp contained in this MemTable. Check
Expand Down
8 changes: 4 additions & 4 deletions db/memtable_list.cc
Original file line number Diff line number Diff line change
Expand Up @@ -235,19 +235,19 @@ void MemTableListVersion::AddIterators(
SequenceNumber read_seq = options.snapshot != nullptr
? options.snapshot->GetSequenceNumber()
: kMaxSequenceNumber;
TruncatedRangeDelIterator* mem_tombstone_iter = nullptr;
std::unique_ptr<TruncatedRangeDelIterator> mem_tombstone_iter;
auto range_del_iter = m->NewRangeTombstoneIterator(
options, read_seq, true /* immutale_memtable */);
if (range_del_iter == nullptr || range_del_iter->empty()) {
delete range_del_iter;
} else {
mem_tombstone_iter = new TruncatedRangeDelIterator(
mem_tombstone_iter = std::make_unique<TruncatedRangeDelIterator>(
std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
&m->GetInternalKeyComparator(), nullptr /* smallest */,
nullptr /* largest */);
}
merge_iter_builder->AddPointAndTombstoneIterator(mem_iter,
mem_tombstone_iter);
merge_iter_builder->AddPointAndTombstoneIterator(
mem_iter, std::move(mem_tombstone_iter));
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions db/table_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ InternalIterator* TableCache::NewIterator(
const InternalKey* smallest_compaction_key,
const InternalKey* largest_compaction_key, bool allow_unprepared_value,
uint8_t block_protection_bytes_per_key, const SequenceNumber* read_seqno,
TruncatedRangeDelIterator** range_del_iter) {
std::unique_ptr<TruncatedRangeDelIterator>* range_del_iter) {
PERF_TIMER_GUARD(new_table_iterator_nanos);

Status s;
Expand Down Expand Up @@ -285,7 +285,7 @@ InternalIterator* TableCache::NewIterator(
delete new_range_del_iter;
*range_del_iter = nullptr;
} else {
*range_del_iter = new TruncatedRangeDelIterator(
*range_del_iter = std::make_unique<TruncatedRangeDelIterator>(
std::unique_ptr<FragmentedRangeTombstoneIterator>(
new_range_del_iter),
&icomparator, &file_meta.smallest, &file_meta.largest);
Expand Down
Loading

0 comments on commit 4fabe2b

Please sign in to comment.