diff --git a/db/blob/blob_file_builder.cc b/db/blob/blob_file_builder.cc index 35269fdb509..acbc6039e04 100644 --- a/db/blob/blob_file_builder.cc +++ b/db/blob/blob_file_builder.cc @@ -34,9 +34,10 @@ BlobFileBuilder::BlobFileBuilder( VersionSet* versions, FileSystem* fs, const ImmutableOptions* immutable_options, const MutableCFOptions* mutable_cf_options, const FileOptions* file_options, - std::string db_id, std::string db_session_id, int job_id, - uint32_t column_family_id, const std::string& column_family_name, - Env::IOPriority io_priority, Env::WriteLifeTimeHint write_hint, + const WriteOptions* write_options, std::string db_id, + std::string db_session_id, int job_id, uint32_t column_family_id, + const std::string& column_family_name, Env::IOPriority io_priority, + Env::WriteLifeTimeHint write_hint, const std::shared_ptr& io_tracer, BlobFileCompletionCallback* blob_callback, BlobFileCreationReason creation_reason, @@ -44,18 +45,19 @@ BlobFileBuilder::BlobFileBuilder( std::vector* blob_file_additions) : BlobFileBuilder([versions]() { return versions->NewFileNumber(); }, fs, immutable_options, mutable_cf_options, file_options, - db_id, db_session_id, job_id, column_family_id, - column_family_name, io_priority, write_hint, io_tracer, - blob_callback, creation_reason, blob_file_paths, - blob_file_additions) {} + write_options, db_id, db_session_id, job_id, + column_family_id, column_family_name, io_priority, + write_hint, io_tracer, blob_callback, creation_reason, + blob_file_paths, blob_file_additions) {} BlobFileBuilder::BlobFileBuilder( std::function file_number_generator, FileSystem* fs, const ImmutableOptions* immutable_options, const MutableCFOptions* mutable_cf_options, const FileOptions* file_options, - std::string db_id, std::string db_session_id, int job_id, - uint32_t column_family_id, const std::string& column_family_name, - Env::IOPriority io_priority, Env::WriteLifeTimeHint write_hint, + const WriteOptions* write_options, std::string db_id, + std::string db_session_id, int job_id, uint32_t column_family_id, + const std::string& column_family_name, Env::IOPriority io_priority, + Env::WriteLifeTimeHint write_hint, const std::shared_ptr& io_tracer, BlobFileCompletionCallback* blob_callback, BlobFileCreationReason creation_reason, @@ -69,6 +71,7 @@ BlobFileBuilder::BlobFileBuilder( blob_compression_type_(mutable_cf_options->blob_compression_type), prepopulate_blob_cache_(mutable_cf_options->prepopulate_blob_cache), file_options_(file_options), + write_options_(write_options), db_id_(std::move(db_id)), db_session_id_(std::move(db_session_id)), job_id_(job_id), @@ -214,7 +217,8 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() { std::unique_ptr file_writer(new WritableFileWriter( std::move(file), blob_file_paths_->back(), *file_options_, immutable_options_->clock, io_tracer_, statistics, - immutable_options_->listeners, + // TODO: pass the correct Histogram for BLOB file writer + Histograms::HISTOGRAM_ENUM_MAX, immutable_options_->listeners, immutable_options_->file_checksum_gen_factory.get(), tmp_set.Contains(FileType::kBlobFile), false)); @@ -231,7 +235,7 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() { expiration_range); { - Status s = blob_log_writer->WriteHeader(header); + Status s = blob_log_writer->WriteHeader(*write_options_, header); TEST_SYNC_POINT_CALLBACK( "BlobFileBuilder::OpenBlobFileIfNeeded:WriteHeader", &s); @@ -296,7 +300,8 @@ Status BlobFileBuilder::WriteBlobToFile(const Slice& key, const Slice& blob, uint64_t key_offset = 0; - Status s = writer_->AddRecord(key, blob, &key_offset, blob_offset); + Status s = + writer_->AddRecord(*write_options_, key, blob, &key_offset, blob_offset); TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AddRecord", &s); @@ -321,7 +326,8 @@ Status BlobFileBuilder::CloseBlobFile() { std::string checksum_method; std::string checksum_value; - Status s = writer_->AppendFooter(footer, &checksum_method, &checksum_value); + Status s = writer_->AppendFooter(*write_options_, footer, &checksum_method, + &checksum_value); TEST_SYNC_POINT_CALLBACK("BlobFileBuilder::WriteBlobToFile:AppendFooter", &s); diff --git a/db/blob/blob_file_builder.h b/db/blob/blob_file_builder.h index 8e7aab502d6..7e49a42ed2b 100644 --- a/db/blob/blob_file_builder.h +++ b/db/blob/blob_file_builder.h @@ -13,6 +13,7 @@ #include "rocksdb/advanced_options.h" #include "rocksdb/compression_type.h" #include "rocksdb/env.h" +#include "rocksdb/options.h" #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/types.h" @@ -36,7 +37,8 @@ class BlobFileBuilder { BlobFileBuilder(VersionSet* versions, FileSystem* fs, const ImmutableOptions* immutable_options, const MutableCFOptions* mutable_cf_options, - const FileOptions* file_options, std::string db_id, + const FileOptions* file_options, + const WriteOptions* write_options, std::string db_id, std::string db_session_id, int job_id, uint32_t column_family_id, const std::string& column_family_name, @@ -51,7 +53,8 @@ class BlobFileBuilder { BlobFileBuilder(std::function file_number_generator, FileSystem* fs, const ImmutableOptions* immutable_options, const MutableCFOptions* mutable_cf_options, - const FileOptions* file_options, std::string db_id, + const FileOptions* file_options, + const WriteOptions* write_options, std::string db_id, std::string db_session_id, int job_id, uint32_t column_family_id, const std::string& column_family_name, @@ -92,6 +95,7 @@ class BlobFileBuilder { CompressionType blob_compression_type_; PrepopulateBlobCache prepopulate_blob_cache_; const FileOptions* file_options_; + const WriteOptions* write_options_; const std::string db_id_; const std::string db_session_id_; int job_id_; diff --git a/db/blob/blob_file_builder_test.cc b/db/blob/blob_file_builder_test.cc index 5882e219fe4..e9eb1f5ccbb 100644 --- a/db/blob/blob_file_builder_test.cc +++ b/db/blob/blob_file_builder_test.cc @@ -113,6 +113,7 @@ class BlobFileBuilderTest : public testing::Test { FileSystem* fs_; SystemClock* clock_; FileOptions file_options_; + WriteOptions write_options_; }; TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) { @@ -144,8 +145,8 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckOneFile) { BlobFileBuilder builder( TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, - &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id, - column_family_id, column_family_name, io_priority, write_hint, + &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/, + job_id, column_family_id, column_family_name, io_priority, write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); @@ -229,8 +230,8 @@ TEST_F(BlobFileBuilderTest, BuildAndCheckMultipleFiles) { BlobFileBuilder builder( TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, - &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id, - column_family_id, column_family_name, io_priority, write_hint, + &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/, + job_id, column_family_id, column_family_name, io_priority, write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); @@ -317,8 +318,8 @@ TEST_F(BlobFileBuilderTest, InlinedValues) { BlobFileBuilder builder( TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, - &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id, - column_family_id, column_family_name, io_priority, write_hint, + &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/, + job_id, column_family_id, column_family_name, io_priority, write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); @@ -372,8 +373,8 @@ TEST_F(BlobFileBuilderTest, Compression) { BlobFileBuilder builder( TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, - &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id, - column_family_id, column_family_name, io_priority, write_hint, + &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/, + job_id, column_family_id, column_family_name, io_priority, write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); @@ -456,8 +457,8 @@ TEST_F(BlobFileBuilderTest, CompressionError) { BlobFileBuilder builder( TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, - &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id, - column_family_id, column_family_name, io_priority, write_hint, + &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/, + job_id, column_family_id, column_family_name, io_priority, write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); @@ -536,8 +537,8 @@ TEST_F(BlobFileBuilderTest, Checksum) { BlobFileBuilder builder( TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, - &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id, - column_family_id, column_family_name, io_priority, write_hint, + &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/, + job_id, column_family_id, column_family_name, io_priority, write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); @@ -594,6 +595,7 @@ class BlobFileBuilderIOErrorTest std::unique_ptr mock_env_; FileSystem* fs_; FileOptions file_options_; + WriteOptions write_options_; std::string sync_point_; }; @@ -634,8 +636,8 @@ TEST_P(BlobFileBuilderIOErrorTest, IOError) { BlobFileBuilder builder( TestFileNumberGenerator(), fs_, &immutable_options, &mutable_cf_options, - &file_options_, "" /*db_id*/, "" /*db_session_id*/, job_id, - column_family_id, column_family_name, io_priority, write_hint, + &file_options_, &write_options_, "" /*db_id*/, "" /*db_session_id*/, + job_id, column_family_id, column_family_name, io_priority, write_hint, nullptr /*IOTracer*/, nullptr /*BlobFileCompletionCallback*/, BlobFileCreationReason::kFlush, &blob_file_paths, &blob_file_additions); diff --git a/db/blob/blob_file_cache_test.cc b/db/blob/blob_file_cache_test.cc index 8c3c56de9b4..edfeb7e810e 100644 --- a/db/blob/blob_file_cache_test.cc +++ b/db/blob/blob_file_cache_test.cc @@ -57,7 +57,7 @@ void WriteBlobFile(uint32_t column_family_id, BlobLogHeader header(column_family_id, kNoCompression, has_ttl, expiration_range); - ASSERT_OK(blob_log_writer.WriteHeader(header)); + ASSERT_OK(blob_log_writer.WriteHeader(WriteOptions(), header)); constexpr char key[] = "key"; constexpr char blob[] = "blob"; @@ -67,7 +67,8 @@ void WriteBlobFile(uint32_t column_family_id, uint64_t key_offset = 0; uint64_t blob_offset = 0; - ASSERT_OK(blob_log_writer.AddRecord(key, blob, &key_offset, &blob_offset)); + ASSERT_OK(blob_log_writer.AddRecord(WriteOptions(), key, blob, &key_offset, + &blob_offset)); BlobLogFooter footer; footer.blob_count = 1; @@ -76,8 +77,8 @@ void WriteBlobFile(uint32_t column_family_id, std::string checksum_method; std::string checksum_value; - ASSERT_OK( - blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value)); + ASSERT_OK(blob_log_writer.AppendFooter(WriteOptions(), footer, + &checksum_method, &checksum_value)); } } // anonymous namespace diff --git a/db/blob/blob_file_reader_test.cc b/db/blob/blob_file_reader_test.cc index b6049d1ef5f..b42b8668592 100644 --- a/db/blob/blob_file_reader_test.cc +++ b/db/blob/blob_file_reader_test.cc @@ -63,7 +63,7 @@ void WriteBlobFile(const ImmutableOptions& immutable_options, BlobLogHeader header(column_family_id, compression, has_ttl, expiration_range_header); - ASSERT_OK(blob_log_writer.WriteHeader(header)); + ASSERT_OK(blob_log_writer.WriteHeader(WriteOptions(), header)); std::vector compressed_blobs(num); std::vector blobs_to_write(num); @@ -91,7 +91,8 @@ void WriteBlobFile(const ImmutableOptions& immutable_options, for (size_t i = 0; i < num; ++i) { uint64_t key_offset = 0; - ASSERT_OK(blob_log_writer.AddRecord(keys[i], blobs_to_write[i], &key_offset, + ASSERT_OK(blob_log_writer.AddRecord(WriteOptions(), keys[i], + blobs_to_write[i], &key_offset, &blob_offsets[i])); } @@ -101,8 +102,8 @@ void WriteBlobFile(const ImmutableOptions& immutable_options, std::string checksum_method; std::string checksum_value; - ASSERT_OK( - blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value)); + ASSERT_OK(blob_log_writer.AppendFooter(WriteOptions(), footer, + &checksum_method, &checksum_value)); } // Creates a test blob file with a single blob in it. Note: this method @@ -473,7 +474,7 @@ TEST_F(BlobFileReaderTest, Malformed) { BlobLogHeader header(column_family_id, kNoCompression, has_ttl, expiration_range); - ASSERT_OK(blob_log_writer.WriteHeader(header)); + ASSERT_OK(blob_log_writer.WriteHeader(WriteOptions(), header)); } constexpr HistogramImpl* blob_file_read_hist = nullptr; diff --git a/db/blob/blob_log_writer.cc b/db/blob/blob_log_writer.cc index bf5ef27c1d6..559ba0f6b57 100644 --- a/db/blob/blob_log_writer.cc +++ b/db/blob/blob_log_writer.cc @@ -33,26 +33,35 @@ BlobLogWriter::BlobLogWriter(std::unique_ptr&& dest, BlobLogWriter::~BlobLogWriter() = default; -Status BlobLogWriter::Sync() { +Status BlobLogWriter::Sync(const WriteOptions& write_options) { TEST_SYNC_POINT("BlobLogWriter::Sync"); StopWatch sync_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS); - Status s = dest_->Sync(use_fsync_); + IOOptions opts; + Status s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (s.ok()) { + s = dest_->Sync(opts, use_fsync_); + } RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED); return s; } -Status BlobLogWriter::WriteHeader(BlobLogHeader& header) { +Status BlobLogWriter::WriteHeader(const WriteOptions& write_options, + BlobLogHeader& header) { assert(block_offset_ == 0); assert(last_elem_type_ == kEtNone); std::string str; header.EncodeTo(&str); - Status s = dest_->Append(Slice(str)); + IOOptions opts; + Status s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (s.ok()) { + s = dest_->Append(opts, Slice(str)); + } if (s.ok()) { block_offset_ += str.size(); if (do_flush_) { - s = dest_->Flush(); + s = dest_->Flush(opts); } } last_elem_type_ = kEtFileHdr; @@ -61,7 +70,8 @@ Status BlobLogWriter::WriteHeader(BlobLogHeader& header) { return s; } -Status BlobLogWriter::AppendFooter(BlobLogFooter& footer, +Status BlobLogWriter::AppendFooter(const WriteOptions& write_options, + BlobLogFooter& footer, std::string* checksum_method, std::string* checksum_value) { assert(block_offset_ != 0); @@ -75,14 +85,17 @@ Status BlobLogWriter::AppendFooter(BlobLogFooter& footer, s.PermitUncheckedError(); return Status::IOError("Seen Error. Skip closing."); } else { - s = dest_->Append(Slice(str)); + IOOptions opts; + s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (s.ok()) { + s = dest_->Append(opts, Slice(str)); + } if (s.ok()) { block_offset_ += str.size(); - - s = Sync(); + s = Sync(write_options); if (s.ok()) { - s = dest_->Close(); + s = dest_->Close(opts); if (s.ok()) { assert(!!checksum_method == !!checksum_value); @@ -116,7 +129,8 @@ Status BlobLogWriter::AppendFooter(BlobLogFooter& footer, return s; } -Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val, +Status BlobLogWriter::AddRecord(const WriteOptions& write_options, + const Slice& key, const Slice& val, uint64_t expiration, uint64_t* key_offset, uint64_t* blob_offset) { assert(block_offset_ != 0); @@ -125,11 +139,13 @@ Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val, std::string buf; ConstructBlobHeader(&buf, key, val, expiration); - Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset); + Status s = + EmitPhysicalRecord(write_options, buf, key, val, key_offset, blob_offset); return s; } -Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val, +Status BlobLogWriter::AddRecord(const WriteOptions& write_options, + const Slice& key, const Slice& val, uint64_t* key_offset, uint64_t* blob_offset) { assert(block_offset_ != 0); assert(last_elem_type_ == kEtFileHdr || last_elem_type_ == kEtRecord); @@ -137,7 +153,8 @@ Status BlobLogWriter::AddRecord(const Slice& key, const Slice& val, std::string buf; ConstructBlobHeader(&buf, key, val, 0); - Status s = EmitPhysicalRecord(buf, key, val, key_offset, blob_offset); + Status s = + EmitPhysicalRecord(write_options, buf, key, val, key_offset, blob_offset); return s; } @@ -150,20 +167,26 @@ void BlobLogWriter::ConstructBlobHeader(std::string* buf, const Slice& key, record.EncodeHeaderTo(buf); } -Status BlobLogWriter::EmitPhysicalRecord(const std::string& headerbuf, +Status BlobLogWriter::EmitPhysicalRecord(const WriteOptions& write_options, + const std::string& headerbuf, const Slice& key, const Slice& val, uint64_t* key_offset, uint64_t* blob_offset) { StopWatch write_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS); - Status s = dest_->Append(Slice(headerbuf)); + + IOOptions opts; + Status s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (s.ok()) { + s = dest_->Append(opts, Slice(headerbuf)); + } if (s.ok()) { - s = dest_->Append(key); + s = dest_->Append(opts, key); } if (s.ok()) { - s = dest_->Append(val); + s = dest_->Append(opts, val); } if (do_flush_ && s.ok()) { - s = dest_->Flush(); + s = dest_->Flush(opts); } *key_offset = block_offset_ + BlobLogRecord::kHeaderSize; diff --git a/db/blob/blob_log_writer.h b/db/blob/blob_log_writer.h index c1f9f31ad00..0ba4f9c2a2e 100644 --- a/db/blob/blob_log_writer.h +++ b/db/blob/blob_log_writer.h @@ -43,20 +43,24 @@ class BlobLogWriter { static void ConstructBlobHeader(std::string* buf, const Slice& key, const Slice& val, uint64_t expiration); - Status AddRecord(const Slice& key, const Slice& val, uint64_t* key_offset, + Status AddRecord(const WriteOptions& write_options, const Slice& key, + const Slice& val, uint64_t* key_offset, uint64_t* blob_offset); - Status AddRecord(const Slice& key, const Slice& val, uint64_t expiration, - uint64_t* key_offset, uint64_t* blob_offset); + Status AddRecord(const WriteOptions& write_options, const Slice& key, + const Slice& val, uint64_t expiration, uint64_t* key_offset, + uint64_t* blob_offset); - Status EmitPhysicalRecord(const std::string& headerbuf, const Slice& key, + Status EmitPhysicalRecord(const WriteOptions& write_options, + const std::string& headerbuf, const Slice& key, const Slice& val, uint64_t* key_offset, uint64_t* blob_offset); - Status AppendFooter(BlobLogFooter& footer, std::string* checksum_method, + Status AppendFooter(const WriteOptions& write_options, BlobLogFooter& footer, + std::string* checksum_method, std::string* checksum_value); - Status WriteHeader(BlobLogHeader& header); + Status WriteHeader(const WriteOptions& write_options, BlobLogHeader& header); WritableFileWriter* file() { return dest_.get(); } @@ -64,7 +68,7 @@ class BlobLogWriter { uint64_t get_log_number() const { return log_number_; } - Status Sync(); + Status Sync(const WriteOptions& write_options); private: std::unique_ptr dest_; diff --git a/db/blob/blob_source_test.cc b/db/blob/blob_source_test.cc index c0e1aba6ec0..dbc103ef1f7 100644 --- a/db/blob/blob_source_test.cc +++ b/db/blob/blob_source_test.cc @@ -65,7 +65,7 @@ void WriteBlobFile(const ImmutableOptions& immutable_options, BlobLogHeader header(column_family_id, compression, has_ttl, expiration_range_header); - ASSERT_OK(blob_log_writer.WriteHeader(header)); + ASSERT_OK(blob_log_writer.WriteHeader(WriteOptions(), header)); std::vector compressed_blobs(num); std::vector blobs_to_write(num); @@ -93,7 +93,8 @@ void WriteBlobFile(const ImmutableOptions& immutable_options, for (size_t i = 0; i < num; ++i) { uint64_t key_offset = 0; - ASSERT_OK(blob_log_writer.AddRecord(keys[i], blobs_to_write[i], &key_offset, + ASSERT_OK(blob_log_writer.AddRecord(WriteOptions(), keys[i], + blobs_to_write[i], &key_offset, &blob_offsets[i])); } @@ -103,8 +104,8 @@ void WriteBlobFile(const ImmutableOptions& immutable_options, std::string checksum_method; std::string checksum_value; - ASSERT_OK( - blob_log_writer.AppendFooter(footer, &checksum_method, &checksum_value)); + ASSERT_OK(blob_log_writer.AppendFooter(WriteOptions(), footer, + &checksum_method, &checksum_value)); } } // anonymous namespace diff --git a/db/builder.cc b/db/builder.cc index a3a6bc47e63..6cb53673fed 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -58,7 +58,8 @@ Status BuildTable( const std::string& dbname, VersionSet* versions, const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions, const FileOptions& file_options, const ReadOptions& read_options, - TableCache* table_cache, InternalIterator* iter, + const WriteOptions& write_options, TableCache* table_cache, + InternalIterator* iter, std::vector> range_del_iters, FileMetaData* meta, std::vector* blob_file_additions, @@ -168,7 +169,7 @@ Status BuildTable( file->SetWriteLifeTimeHint(write_hint); file_writer.reset(new WritableFileWriter( std::move(file), fname, file_options, ioptions.clock, io_tracer, - ioptions.stats, ioptions.listeners, + ioptions.stats, Histograms::SST_WRITE_MICROS, ioptions.listeners, ioptions.file_checksum_gen_factory.get(), tmp_set.Contains(FileType::kTableFile), false)); @@ -188,10 +189,11 @@ Status BuildTable( blob_file_additions) ? new BlobFileBuilder( versions, fs, &ioptions, &mutable_cf_options, &file_options, - tboptions.db_id, tboptions.db_session_id, job_id, - tboptions.column_family_id, tboptions.column_family_name, - io_priority, write_hint, io_tracer, blob_callback, - blob_creation_reason, &blob_file_paths, blob_file_additions) + &write_options, tboptions.db_id, tboptions.db_session_id, + job_id, tboptions.column_family_id, + tboptions.column_family_name, io_priority, write_hint, + io_tracer, blob_callback, blob_creation_reason, + &blob_file_paths, blob_file_additions) : nullptr); const std::atomic kManualCompactionCanceledFalse{false}; @@ -346,13 +348,16 @@ Status BuildTable( // Finish and check for file errors TEST_SYNC_POINT("BuildTable:BeforeSyncTable"); - if (s.ok() && !empty) { + IOOptions opts; + *io_status = + WritableFileWriter::PrepareIOOptions(tboptions.write_options, opts); + if (s.ok() && io_status->ok() && !empty) { StopWatch sw(ioptions.clock, ioptions.stats, TABLE_SYNC_MICROS); - *io_status = file_writer->Sync(ioptions.use_fsync); + *io_status = file_writer->Sync(opts, ioptions.use_fsync); } TEST_SYNC_POINT("BuildTable:BeforeCloseTableFile"); if (s.ok() && io_status->ok() && !empty) { - *io_status = file_writer->Close(); + *io_status = file_writer->Close(opts); } if (s.ok() && io_status->ok() && !empty) { // Add the checksum information to file metadata. diff --git a/db/builder.h b/db/builder.h index 6a6a1866a13..2c2ea05c41f 100644 --- a/db/builder.h +++ b/db/builder.h @@ -54,7 +54,8 @@ extern Status BuildTable( const std::string& dbname, VersionSet* versions, const ImmutableDBOptions& db_options, const TableBuilderOptions& tboptions, const FileOptions& file_options, const ReadOptions& read_options, - TableCache* table_cache, InternalIterator* iter, + const WriteOptions& write_options, TableCache* table_cache, + InternalIterator* iter, std::vector> range_del_iters, FileMetaData* meta, std::vector* blob_file_additions, diff --git a/db/column_family.cc b/db/column_family.cc index 7578b7ec7d6..cc29d063b90 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1137,7 +1137,7 @@ Status ColumnFamilyData::RangesOverlapWithMemtables( *overlap = false; // Create an InternalIterator over all unflushed memtables Arena arena; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions read_opts; read_opts.total_order_seek = true; MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena); diff --git a/db/compaction/compaction_job.cc b/db/compaction/compaction_job.cc index 8ea80681633..a851af72b65 100644 --- a/db/compaction/compaction_job.cc +++ b/db/compaction/compaction_job.cc @@ -1123,6 +1123,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { // (b) CompactionFilter::Decision::kRemoveAndSkipUntil. read_options.total_order_seek = true; + const WriteOptions write_option(Env::IOActivity::kCompaction); + // Remove the timestamps from boundaries because boundaries created in // GenSubcompactionBoundaries doesn't strip away the timestamp. size_t ts_sz = cfd->user_comparator()->timestamp_size(); @@ -1257,10 +1259,11 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) { ? new BlobFileBuilder( versions_, fs_.get(), sub_compact->compaction->immutable_options(), - mutable_cf_options, &file_options_, db_id_, db_session_id_, - job_id_, cfd->GetID(), cfd->GetName(), Env::IOPriority::IO_LOW, - write_hint_, io_tracer_, blob_callback_, - BlobFileCreationReason::kCompaction, &blob_file_paths, + mutable_cf_options, &file_options_, &write_option, db_id_, + db_session_id_, job_id_, cfd->GetID(), cfd->GetName(), + Env::IOPriority::IO_LOW, write_hint_, io_tracer_, + blob_callback_, BlobFileCreationReason::kCompaction, + &blob_file_paths, sub_compact->Current().GetBlobFileAdditionsPtr()) : nullptr); @@ -1693,6 +1696,8 @@ Status CompactionJob::InstallCompactionResults( db_mutex_->AssertHeld(); const ReadOptions read_options(Env::IOActivity::kCompaction); + const WriteOptions write_options(Env::IOActivity::kCompaction); + auto* compaction = compact_->compaction; assert(compaction); @@ -1770,8 +1775,8 @@ Status CompactionJob::InstallCompactionResults( } return versions_->LogAndApply(compaction->column_family_data(), - mutable_cf_options, read_options, edit, - db_mutex_, db_directory_); + mutable_cf_options, read_options, write_options, + edit, db_mutex_, db_directory_); } void CompactionJob::RecordCompactionIOStats() { @@ -1918,12 +1923,15 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact, sub_compact->compaction->immutable_options()->listeners; outputs.AssignFileWriter(new WritableFileWriter( std::move(writable_file), fname, fo_copy, db_options_.clock, io_tracer_, - db_options_.stats, listeners, db_options_.file_checksum_gen_factory.get(), + db_options_.stats, Histograms::SST_WRITE_MICROS, listeners, + db_options_.file_checksum_gen_factory.get(), tmp_set.Contains(FileType::kTableFile), false)); + const WriteOptions write_option(Env::IOActivity::kCompaction); TableBuilderOptions tboptions( *cfd->ioptions(), *(sub_compact->compaction->mutable_cf_options()), - cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), + write_option, cfd->internal_comparator(), + cfd->int_tbl_prop_collector_factories(), sub_compact->compaction->output_compression(), sub_compact->compaction->output_compression_opts(), cfd->GetID(), cfd->GetName(), sub_compact->compaction->output_level(), diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 8f91cc04ce5..abb5ab2496b 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -293,10 +293,11 @@ class CompactionJobTestBase : public testing::Test { Status s = WritableFileWriter::Create(fs_, table_name, FileOptions(), &file_writer, nullptr); ASSERT_OK(s); + const WriteOptions write_options; std::unique_ptr table_builder( cf_options_.table_factory->NewTableBuilder( TableBuilderOptions(*cfd_->ioptions(), mutable_cf_options_, - cfd_->internal_comparator(), + write_options, cfd_->internal_comparator(), cfd_->int_tbl_prop_collector_factories(), CompressionType::kNoCompression, CompressionOptions(), 0 /* column_family_id */, @@ -392,7 +393,7 @@ class CompactionJobTestBase : public testing::Test { mutex_.Lock(); EXPECT_OK(versions_->LogAndApply( versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, - read_options_, &edit, &mutex_, nullptr)); + read_options_, write_options_, &edit, &mutex_, nullptr)); mutex_.Unlock(); } @@ -546,7 +547,7 @@ class CompactionJobTestBase : public testing::Test { /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr, /*db_id*/ "", /*db_session_id*/ "")); compaction_job_stats_.Reset(); - ASSERT_OK(SetIdentityFile(env_, dbname_)); + ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); VersionEdit new_db; new_db.SetLogNumber(0); @@ -565,11 +566,11 @@ class CompactionJobTestBase : public testing::Test { log::Writer log(std::move(file_writer), 0, false); std::string record; new_db.EncodeTo(&record); - s = log.AddRecord(record); + s = log.AddRecord(WriteOptions(), record); } ASSERT_OK(s); // Make "CURRENT" file that points to the new manifest file. - s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); + s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); @@ -731,6 +732,7 @@ class CompactionJobTestBase : public testing::Test { MutableCFOptions mutable_cf_options_; MutableDBOptions mutable_db_options_; const ReadOptions read_options_; + const WriteOptions write_options_; std::shared_ptr table_cache_; WriteController write_controller_; WriteBufferManager write_buffer_manager_; diff --git a/db/compaction/compaction_outputs.cc b/db/compaction/compaction_outputs.cc index 3e21484c463..a0ee896467d 100644 --- a/db/compaction/compaction_outputs.cc +++ b/db/compaction/compaction_outputs.cc @@ -60,12 +60,15 @@ IOStatus CompactionOutputs::WriterSyncClose(const Status& input_status, Statistics* statistics, bool use_fsync) { IOStatus io_s; - if (input_status.ok()) { + IOOptions opts; + io_s = WritableFileWriter::PrepareIOOptions( + WriteOptions(Env::IOActivity::kCompaction), opts); + if (input_status.ok() && io_s.ok()) { StopWatch sw(clock, statistics, COMPACTION_OUTFILE_SYNC_MICROS); - io_s = file_writer_->Sync(use_fsync); + io_s = file_writer_->Sync(opts, use_fsync); } if (input_status.ok() && io_s.ok()) { - io_s = file_writer_->Close(); + io_s = file_writer_->Close(opts); } if (input_status.ok() && io_s.ok()) { diff --git a/db/convenience.cc b/db/convenience.cc index 08bddc8e8f6..9e78adc74e4 100644 --- a/db/convenience.cc +++ b/db/convenience.cc @@ -34,7 +34,7 @@ Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family, Status VerifySstFileChecksum(const Options& options, const EnvOptions& env_options, const std::string& file_path) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; return VerifySstFileChecksum(options, env_options, read_options, file_path); } diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 15cc5d3f592..9b0671a814d 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -3106,9 +3106,11 @@ TEST_F(DBBasicTest, LastSstFileNotInManifest) { // Manually add a sst file. constexpr uint64_t kSstFileNumber = 100; const std::string kSstFile = MakeTableFileName(dbname_, kSstFileNumber); - ASSERT_OK(WriteStringToFile(env_, /* data = */ "bad sst file content", + ASSERT_OK(WriteStringToFile(env_, + /* data = */ "bad sst file content", /* fname = */ kSstFile, - /* should_sync = */ true)); + /* should_sync = */ true, + /* io_activity */ Env::IOActivity::kUnknown)); ASSERT_OK(env_->FileExists(kSstFile)); TableFileListener* listener = new TableFileListener(); @@ -3160,7 +3162,8 @@ TEST_F(DBBasicTest, RecoverWithMissingFiles) { ASSERT_OK(ReadFileToString(env_, files[files.size() - 1], &corrupted_data)); ASSERT_OK(WriteStringToFile( env_, corrupted_data.substr(0, corrupted_data.size() - 2), - files[files.size() - 1], /*should_sync=*/true)); + files[files.size() - 1], /*should_sync=*/true, + /*io_activity=*/Env::IOActivity::kUnknown)); for (int j = static_cast(files.size() - 2); j >= static_cast(i); --j) { ASSERT_OK(env_->DeleteFile(files[j])); @@ -3207,13 +3210,15 @@ TEST_F(DBBasicTest, BestEffortsRecoveryTryMultipleManifests) { // Hack by adding a new MANIFEST with high file number std::string garbage(10, '\0'); ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/MANIFEST-001000", - /*should_sync=*/true)); + /*should_sync=*/true, + /*io_activity=*/Env::IOActivity::kUnknown)); } { // Hack by adding a corrupted SST not referenced by any MANIFEST std::string garbage(10, '\0'); ASSERT_OK(WriteStringToFile(env_, garbage, dbname_ + "/001001.sst", - /*should_sync=*/true)); + /*should_sync=*/true, + /*io_activity=*/Env::IOActivity::kUnknown)); } options.best_efforts_recovery = true; diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 228a3da4fa0..77ff506af72 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -333,8 +333,10 @@ Status DBImpl::Resume() { Status DBImpl::ResumeImpl(DBRecoverContext context) { mutex_.AssertHeld(); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + WaitForBackgroundWork(); Status s; @@ -376,8 +378,8 @@ Status DBImpl::ResumeImpl(DBRecoverContext context) { assert(cfh); ColumnFamilyData* cfd = cfh->cfd(); const MutableCFOptions& cf_opts = *cfd->GetLatestMutableCFOptions(); - s = versions_->LogAndApply(cfd, cf_opts, read_options, &edit, &mutex_, - directories_.GetDbDir()); + s = versions_->LogAndApply(cfd, cf_opts, read_options, write_options, + &edit, &mutex_, directories_.GetDbDir()); if (!s.ok()) { io_s = versions_->io_status(); if (!io_s.ok()) { @@ -701,23 +703,26 @@ Status DBImpl::CloseHelper() { Status DBImpl::CloseImpl() { return CloseHelper(); } DBImpl::~DBImpl() { + ThreadStatus::OperationType cur_op_type = + ThreadStatusUtil::GetThreadOperation(); + ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN); + // TODO: remove this. init_logger_creation_s_.PermitUncheckedError(); InstrumentedMutexLock closing_lock_guard(&closing_mutex_); - if (closed_) { - return; - } + if (!closed_) { + closed_ = true; - closed_ = true; + { + const Status s = MaybeReleaseTimestampedSnapshotsAndCheck(); + s.PermitUncheckedError(); + } - { - const Status s = MaybeReleaseTimestampedSnapshotsAndCheck(); - s.PermitUncheckedError(); + closing_status_ = CloseImpl(); + closing_status_.PermitUncheckedError(); } - - closing_status_ = CloseImpl(); - closing_status_.PermitUncheckedError(); + ThreadStatusUtil::SetThreadOperation(cur_op_type); } void DBImpl::MaybeIgnoreError(Status* s) const { @@ -1134,8 +1139,10 @@ FSDirectory* DBImpl::GetDataDir(ColumnFamilyData* cfd, size_t path_id) const { Status DBImpl::SetOptions( ColumnFamilyHandle* column_family, const std::unordered_map& options_map) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + auto* cfd = static_cast_with_check(column_family)->cfd(); if (options_map.empty()) { @@ -1157,15 +1164,16 @@ Status DBImpl::SetOptions( new_options = *cfd->GetLatestMutableCFOptions(); // Append new version to recompute compaction score. VersionEdit dummy_edit; - s = versions_->LogAndApply(cfd, new_options, read_options, &dummy_edit, - &mutex_, directories_.GetDbDir()); + s = versions_->LogAndApply(cfd, new_options, read_options, write_options, + &dummy_edit, &mutex_, directories_.GetDbDir()); // Trigger possible flush/compactions. This has to be before we persist // options to file, otherwise there will be a deadlock with writer // thread. InstallSuperVersionAndScheduleWork(cfd, &sv_context, new_options); - persist_options_status = WriteOptionsFile( - false /*need_mutex_lock*/, true /*need_enter_write_thread*/); + persist_options_status = + WriteOptionsFile(write_options, false /*need_mutex_lock*/, + true /*need_enter_write_thread*/); bg_cv_.SignalAll(); } } @@ -1329,8 +1337,9 @@ Status DBImpl::SetDBOptions( purge_wal_status.ToString().c_str()); } } - persist_options_status = WriteOptionsFile( - false /*need_mutex_lock*/, false /*need_enter_write_thread*/); + persist_options_status = + WriteOptionsFile(WriteOptions(), false /*need_mutex_lock*/, + false /*need_enter_write_thread*/); write_thread_.ExitUnbatched(&w); } else { // To get here, we must have had invalid options and will not attempt to @@ -1383,14 +1392,14 @@ int DBImpl::FindMinimumEmptyLevelFitting( return minimum_level; } -Status DBImpl::FlushWAL(bool sync) { +Status DBImpl::FlushWAL(const WriteOptions& write_options, bool sync) { if (manual_wal_flush_) { IOStatus io_s; { // We need to lock log_write_mutex_ since logs_ might change concurrently InstrumentedMutexLock wl(&log_write_mutex_); log::Writer* cur_log_writer = logs_.back().writer; - io_s = cur_log_writer->WriteBuffer(); + io_s = cur_log_writer->WriteBuffer(write_options); } if (!io_s.ok()) { ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s", @@ -1463,11 +1472,22 @@ Status DBImpl::SyncWAL() { RecordTick(stats_, WAL_FILE_SYNCED); Status status; IOStatus io_s; - for (log::Writer* log : logs_to_sync) { - io_s = log->file()->SyncWithoutFlush(immutable_db_options_.use_fsync); - if (!io_s.ok()) { - status = io_s; - break; + // TODO: plumb Env::IOActivity, Env::IOPriority + const ReadOptions read_options; + const WriteOptions write_options; + IOOptions opts; + io_s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (!io_s.ok()) { + status = io_s; + } + if (io_s.ok()) { + for (log::Writer* log : logs_to_sync) { + io_s = + log->file()->SyncWithoutFlush(opts, immutable_db_options_.use_fsync); + if (!io_s.ok()) { + status = io_s; + break; + } } } if (!io_s.ok()) { @@ -1496,9 +1516,7 @@ Status DBImpl::SyncWAL() { } if (status.ok() && synced_wals.IsWalAddition()) { InstrumentedMutexLock l(&mutex_); - // TODO: plumb Env::IOActivity - const ReadOptions read_options; - status = ApplyWALToManifest(read_options, &synced_wals); + status = ApplyWALToManifest(read_options, write_options, &synced_wals); } TEST_SYNC_POINT("DBImpl::SyncWAL:BeforeMarkLogsSynced:2"); @@ -1507,12 +1525,14 @@ Status DBImpl::SyncWAL() { } Status DBImpl::ApplyWALToManifest(const ReadOptions& read_options, + const WriteOptions& write_options, VersionEdit* synced_wals) { // not empty, write to MANIFEST. mutex_.AssertHeld(); Status status = versions_->LogAndApplyToDefaultColumnFamily( - read_options, synced_wals, &mutex_, directories_.GetDbDir()); + read_options, write_options, synced_wals, &mutex_, + directories_.GetDbDir()); if (!status.ok() && versions_->io_status().IsIOError()) { status = error_handler_.SetBGError(versions_->io_status(), BackgroundErrorReason::kManifestWrite); @@ -3147,19 +3167,23 @@ void DBImpl::MultiGetEntity(const ReadOptions& options, results, /* timestamps */ nullptr, statuses, sorted_input); } -Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options, +Status DBImpl::CreateColumnFamily(const ReadOptions& read_options, + const WriteOptions& write_options, + const ColumnFamilyOptions& cf_options, const std::string& column_family, ColumnFamilyHandle** handle) { assert(handle != nullptr); - Status s = CreateColumnFamilyImpl(cf_options, column_family, handle); + Status s = CreateColumnFamilyImpl(read_options, write_options, cf_options, + column_family, handle); if (s.ok()) { - s = WriteOptionsFile(true /*need_mutex_lock*/, + s = WriteOptionsFile(write_options, true /*need_mutex_lock*/, true /*need_enter_write_thread*/); } return s; } Status DBImpl::CreateColumnFamilies( + const ReadOptions& read_options, const WriteOptions& write_options, const ColumnFamilyOptions& cf_options, const std::vector& column_family_names, std::vector* handles) { @@ -3170,7 +3194,8 @@ Status DBImpl::CreateColumnFamilies( bool success_once = false; for (size_t i = 0; i < num_cf; i++) { ColumnFamilyHandle* handle; - s = CreateColumnFamilyImpl(cf_options, column_family_names[i], &handle); + s = CreateColumnFamilyImpl(read_options, write_options, cf_options, + column_family_names[i], &handle); if (!s.ok()) { break; } @@ -3178,8 +3203,9 @@ Status DBImpl::CreateColumnFamilies( success_once = true; } if (success_once) { - Status persist_options_status = WriteOptionsFile( - true /*need_mutex_lock*/, true /*need_enter_write_thread*/); + Status persist_options_status = + WriteOptionsFile(write_options, true /*need_mutex_lock*/, + true /*need_enter_write_thread*/); if (s.ok() && !persist_options_status.ok()) { s = persist_options_status; } @@ -3188,6 +3214,7 @@ Status DBImpl::CreateColumnFamilies( } Status DBImpl::CreateColumnFamilies( + const ReadOptions& read_options, const WriteOptions& write_options, const std::vector& column_families, std::vector* handles) { assert(handles != nullptr); @@ -3197,7 +3224,8 @@ Status DBImpl::CreateColumnFamilies( bool success_once = false; for (size_t i = 0; i < num_cf; i++) { ColumnFamilyHandle* handle; - s = CreateColumnFamilyImpl(column_families[i].options, + s = CreateColumnFamilyImpl(read_options, write_options, + column_families[i].options, column_families[i].name, &handle); if (!s.ok()) { break; @@ -3206,8 +3234,9 @@ Status DBImpl::CreateColumnFamilies( success_once = true; } if (success_once) { - Status persist_options_status = WriteOptionsFile( - true /*need_mutex_lock*/, true /*need_enter_write_thread*/); + Status persist_options_status = + WriteOptionsFile(write_options, true /*need_mutex_lock*/, + true /*need_enter_write_thread*/); if (s.ok() && !persist_options_status.ok()) { s = persist_options_status; } @@ -3215,11 +3244,11 @@ Status DBImpl::CreateColumnFamilies( return s; } -Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, +Status DBImpl::CreateColumnFamilyImpl(const ReadOptions& read_options, + const WriteOptions& write_options, + const ColumnFamilyOptions& cf_options, const std::string& column_family_name, ColumnFamilyHandle** handle) { - // TODO: plumb Env::IOActivity - const ReadOptions read_options; Status s; *handle = nullptr; @@ -3263,7 +3292,7 @@ Status DBImpl::CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, // LogAndApply will both write the creation in MANIFEST and create // ColumnFamilyData object s = versions_->LogAndApply(nullptr, MutableCFOptions(cf_options), - read_options, &edit, &mutex_, + read_options, write_options, &edit, &mutex_, directories_.GetDbDir(), false, &cf_options); write_thread_.ExitUnbatched(&w); } @@ -3315,7 +3344,8 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) { assert(column_family != nullptr); Status s = DropColumnFamilyImpl(column_family); if (s.ok()) { - s = WriteOptionsFile(true /*need_mutex_lock*/, + // TODO: plumb Env::IOActivity, Env::IOPriority + s = WriteOptionsFile(WriteOptions(), true /*need_mutex_lock*/, true /*need_enter_write_thread*/); } return s; @@ -3333,8 +3363,10 @@ Status DBImpl::DropColumnFamilies( success_once = true; } if (success_once) { - Status persist_options_status = WriteOptionsFile( - true /*need_mutex_lock*/, true /*need_enter_write_thread*/); + // TODO: plumb Env::IOActivity, Env::IOPriority + Status persist_options_status = + WriteOptionsFile(WriteOptions(), true /*need_mutex_lock*/, + true /*need_enter_write_thread*/); if (s.ok() && !persist_options_status.ok()) { s = persist_options_status; } @@ -3343,8 +3375,10 @@ Status DBImpl::DropColumnFamilies( } Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + auto cfh = static_cast_with_check(column_family); auto cfd = cfh->cfd(); if (cfd->GetID() == 0) { @@ -3368,7 +3402,7 @@ Status DBImpl::DropColumnFamilyImpl(ColumnFamilyHandle* column_family) { WriteThread::Writer w; write_thread_.EnterUnbatched(&w, &mutex_); s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), - read_options, &edit, &mutex_, + read_options, write_options, &edit, &mutex_, directories_.GetDbDir()); write_thread_.ExitUnbatched(&w); } @@ -3426,7 +3460,7 @@ bool DBImpl::KeyMayExist(const ReadOptions& read_options, // falsify later if key-may-exist but can't fetch value *value_found = true; } - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions roptions = read_options; roptions.read_tier = kBlockCacheTier; // read from block cache only PinnableSlice pinnable_val; @@ -3928,7 +3962,7 @@ Status DBImpl::GetPropertiesOfAllTables(ColumnFamilyHandle* column_family, version->Ref(); mutex_.Unlock(); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; auto s = version->GetPropertiesOfAllTables(read_options, props); @@ -3952,7 +3986,7 @@ Status DBImpl::GetPropertiesOfTablesInRange(ColumnFamilyHandle* column_family, version->Ref(); mutex_.Unlock(); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; auto s = version->GetPropertiesOfTablesInRange(read_options, range, n, props); @@ -4297,7 +4331,7 @@ Status DBImpl::GetApproximateSizes(const SizeApproximationOptions& options, SuperVersion* sv = GetAndRefSuperVersion(cfd); v = sv->current; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; for (int i = 0; i < n; i++) { // Add timestamp if needed @@ -4361,8 +4395,10 @@ Status DBImpl::GetUpdatesSince( } Status DBImpl::DeleteFile(std::string name) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + uint64_t number; FileType type; WalFileType log_type; @@ -4442,7 +4478,7 @@ Status DBImpl::DeleteFile(std::string name) { edit.SetColumnFamily(cfd->GetID()); edit.DeleteFile(level, number); status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), - read_options, &edit, &mutex_, + read_options, write_options, &edit, &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, @@ -4465,8 +4501,10 @@ Status DBImpl::DeleteFile(std::string name) { Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family, const RangePtr* ranges, size_t n, bool include_end) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + Status status = Status::OK(); auto cfh = static_cast_with_check(column_family); ColumnFamilyData* cfd = cfh->cfd(); @@ -4534,7 +4572,7 @@ Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family, } input_version->Ref(); status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), - read_options, &edit, &mutex_, + read_options, write_options, &edit, &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, @@ -4948,7 +4986,8 @@ Status DestroyDB(const std::string& dbname, const Options& options, return result; } -Status DBImpl::WriteOptionsFile(bool need_mutex_lock, +Status DBImpl::WriteOptionsFile(const WriteOptions& write_options, + bool need_mutex_lock, bool need_enter_write_thread) { WriteThread::Writer w; if (need_mutex_lock) { @@ -4985,8 +5024,8 @@ Status DBImpl::WriteOptionsFile(bool need_mutex_lock, std::string file_name = TempOptionsFileName(GetName(), versions_->NewFileNumber()); - Status s = PersistRocksDBOptions(db_options, cf_names, cf_opts, file_name, - fs_.get()); + Status s = PersistRocksDBOptions(write_options, db_options, cf_names, cf_opts, + file_name, fs_.get()); if (s.ok()) { s = RenameTempFileToOptionsFile(file_name); @@ -5179,7 +5218,7 @@ Status DBImpl::GetLatestSequenceForKey( MergeContext merge_context; SequenceNumber max_covering_tombstone_seq = 0; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions read_options; SequenceNumber current_seq = versions_->LastSequence(); @@ -5335,8 +5374,10 @@ Status DBImpl::IngestExternalFile( Status DBImpl::IngestExternalFiles( const std::vector& args) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + if (args.empty()) { return Status::InvalidArgument("ingestion arg list is empty"); } @@ -5554,9 +5595,10 @@ Status DBImpl::IngestExternalFiles( } assert(0 == num_entries); } - status = versions_->LogAndApply(cfds_to_commit, mutable_cf_options_list, - read_options, edit_lists, &mutex_, - directories_.GetDbDir()); + status = versions_->LogAndApply( + cfds_to_commit, mutable_cf_options_list, read_options, write_options, + + edit_lists, &mutex_, directories_.GetDbDir()); // It is safe to update VersionSet last seqno here after LogAndApply since // LogAndApply persists last sequence number from VersionEdits, // which are from file's largest seqno and not from VersionSet. @@ -5658,8 +5700,10 @@ Status DBImpl::CreateColumnFamilyWithImport( ColumnFamilyHandle** handle) { assert(handle != nullptr); assert(*handle == nullptr); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + std::string cf_comparator_name = options.comparator->Name(); size_t total_file_num = 0; @@ -5675,7 +5719,8 @@ Status DBImpl::CreateColumnFamilyWithImport( } // Create column family. - auto status = CreateColumnFamily(options, column_family_name, handle); + auto status = CreateColumnFamily(read_options, write_options, options, + column_family_name, handle); if (!status.ok()) { return status; } @@ -5711,8 +5756,8 @@ Status DBImpl::CreateColumnFamilyWithImport( next_file_number = versions_->FetchAddFileNumber(total_file_num); auto cf_options = cfd->GetLatestMutableCFOptions(); status = - versions_->LogAndApply(cfd, *cf_options, read_options, &dummy_edit, - &mutex_, directories_.GetDbDir()); + versions_->LogAndApply(cfd, *cf_options, read_options, write_options, + &dummy_edit, &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, &dummy_sv_ctx, *cf_options); } @@ -5749,8 +5794,8 @@ Status DBImpl::CreateColumnFamilyWithImport( if (status.ok()) { auto cf_options = cfd->GetLatestMutableCFOptions(); status = versions_->LogAndApply(cfd, *cf_options, read_options, - import_job.edit(), &mutex_, - directories_.GetDbDir()); + write_options, import_job.edit(), + &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, &sv_context, *cf_options); } @@ -6153,8 +6198,10 @@ Status DBImpl::ReserveFileNumbersBeforeIngestion( ColumnFamilyData* cfd, uint64_t num, std::unique_ptr::iterator>& pending_output_elem, uint64_t* next_file_number) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + Status s; SuperVersionContext dummy_sv_ctx(true /* create_superversion */); assert(nullptr != next_file_number); @@ -6172,8 +6219,8 @@ Status DBImpl::ReserveFileNumbersBeforeIngestion( // reuse the file number that has already assigned to the internal file, // and this will overwrite the external file. To protect the external // file, we have to make sure the file number will never being reused. - s = versions_->LogAndApply(cfd, *cf_options, read_options, &dummy_edit, - &mutex_, directories_.GetDbDir()); + s = versions_->LogAndApply(cfd, *cf_options, read_options, write_options, + &dummy_edit, &mutex_, directories_.GetDbDir()); if (s.ok()) { InstallSuperVersionAndScheduleWork(cfd, &dummy_sv_ctx, *cf_options); } diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 50aec5804bc..5720adfa40f 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -314,14 +314,38 @@ class DBImpl : public DB { virtual Status CreateColumnFamily(const ColumnFamilyOptions& cf_options, const std::string& column_family, - ColumnFamilyHandle** handle) override; + ColumnFamilyHandle** handle) override { + return CreateColumnFamily(ReadOptions(), WriteOptions(), cf_options, + column_family, handle); + } + virtual Status CreateColumnFamily(const ReadOptions& read_options, + const WriteOptions& write_options, + const ColumnFamilyOptions& cf_options, + const std::string& column_family, + ColumnFamilyHandle** handle); virtual Status CreateColumnFamilies( const ColumnFamilyOptions& cf_options, const std::vector& column_family_names, - std::vector* handles) override; + std::vector* handles) override { + return CreateColumnFamilies(ReadOptions(), WriteOptions(), cf_options, + column_family_names, handles); + } + virtual Status CreateColumnFamilies( + const ReadOptions& read_options, const WriteOptions& write_options, + const ColumnFamilyOptions& cf_options, + const std::vector& column_family_names, + std::vector* handles); + virtual Status CreateColumnFamilies( const std::vector& column_families, - std::vector* handles) override; + std::vector* handles) override { + return CreateColumnFamilies(ReadOptions(), WriteOptions(), column_families, + handles); + } + virtual Status CreateColumnFamilies( + const ReadOptions& read_options, const WriteOptions& write_options, + const std::vector& column_families, + std::vector* handles); virtual Status DropColumnFamily(ColumnFamilyHandle* column_family) override; virtual Status DropColumnFamilies( const std::vector& column_families) override; @@ -433,7 +457,11 @@ class DBImpl : public DB { virtual Status Flush( const FlushOptions& options, const std::vector& column_families) override; - virtual Status FlushWAL(bool sync) override; + virtual Status FlushWAL(bool sync) override { + return FlushWAL(WriteOptions(), sync); + } + + virtual Status FlushWAL(const WriteOptions& write_options, bool sync); bool WALBufferIsEmpty(); virtual Status SyncWAL() override; virtual Status LockWAL() override; @@ -1391,7 +1419,8 @@ class DBImpl : public DB { // Persist options to options file. // If need_mutex_lock = false, the method will lock DB mutex. // If need_enter_write_thread = false, the method will enter write thread. - Status WriteOptionsFile(bool need_mutex_lock, bool need_enter_write_thread); + Status WriteOptionsFile(const WriteOptions& write_options, + bool need_mutex_lock, bool need_enter_write_thread); Status CompactRangeInternal(const CompactRangeOptions& options, ColumnFamilyHandle* column_family, @@ -1634,7 +1663,8 @@ class DBImpl : public DB { return w; } Status ClearWriter() { - Status s = writer->WriteBuffer(); + // TODO: plumb Env::IOActivity, Env::IOPriority + Status s = writer->WriteBuffer(WriteOptions()); delete writer; writer = nullptr; return s; @@ -1809,7 +1839,9 @@ class DBImpl : public DB { const Status CreateArchivalDirectory(); - Status CreateColumnFamilyImpl(const ColumnFamilyOptions& cf_options, + Status CreateColumnFamilyImpl(const ReadOptions& read_options, + const WriteOptions& write_options, + const ColumnFamilyOptions& cf_options, const std::string& cf_name, ColumnFamilyHandle** handle); @@ -2025,12 +2057,10 @@ class DBImpl : public DB { WriteBatch* tmp_batch, WriteBatch** merged_batch, size_t* write_with_wal, WriteBatch** to_be_cached_state); - // rate_limiter_priority is used to charge `DBOptions::rate_limiter` - // for automatic WAL flush (`Options::manual_wal_flush` == false) - // associated with this WriteToWAL - IOStatus WriteToWAL(const WriteBatch& merged_batch, log::Writer* log_writer, - uint64_t* log_used, uint64_t* log_size, - Env::IOPriority rate_limiter_priority, + IOStatus WriteToWAL(const WriteBatch& merged_batch, + const WriteOptions& write_options, + log::Writer* log_writer, uint64_t* log_used, + uint64_t* log_size, LogFileNumberSize& log_file_number_size); IOStatus WriteToWAL(const WriteThread::WriteGroup& write_group, @@ -2164,7 +2194,9 @@ class DBImpl : public DB { // helper function to call after some of the logs_ were synced void MarkLogsSynced(uint64_t up_to, bool synced_dir, VersionEdit* edit); - Status ApplyWALToManifest(const ReadOptions& read_options, VersionEdit* edit); + Status ApplyWALToManifest(const ReadOptions& read_options, + const WriteOptions& write_options, + VersionEdit* edit); // WALs with log number up to up_to are not synced successfully. void MarkLogsNotSynced(uint64_t up_to); @@ -2235,8 +2267,9 @@ class DBImpl : public DB { size_t GetWalPreallocateBlockSize(uint64_t write_buffer_size) const; Env::WriteLifeTimeHint CalculateWALWriteHint() { return Env::WLTH_SHORT; } - IOStatus CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number, - size_t preallocate_block_size, log::Writer** new_log); + IOStatus CreateWAL(const WriteOptions& write_options, uint64_t log_file_num, + uint64_t recycle_log_number, size_t preallocate_block_size, + log::Writer** new_log); // Validate self-consistency of DB options static Status ValidateOptions(const DBOptions& db_options); diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index a1a544eef8d..d9b83b674bf 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -142,7 +142,8 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context, if (error_handler_.IsRecoveryInProgress()) { log->file()->reset_seen_error(); } - io_s = log->file()->Sync(immutable_db_options_.use_fsync); + // TODO: plumb Env::IOActivity, Env::IOPriority + io_s = log->file()->Sync(IOOptions(), immutable_db_options_.use_fsync); if (!io_s.ok()) { break; } @@ -266,8 +267,9 @@ Status DBImpl::FlushMemTableToOutputFile( mutex_.Lock(); if (log_io_s.ok() && synced_wals.IsWalAddition()) { const ReadOptions read_options(Env::IOActivity::kFlush); - log_io_s = - status_to_io_status(ApplyWALToManifest(read_options, &synced_wals)); + const WriteOptions write_options(Env::IOActivity::kFlush); + log_io_s = status_to_io_status( + ApplyWALToManifest(read_options, write_options, &synced_wals)); TEST_SYNC_POINT_CALLBACK("DBImpl::FlushMemTableToOutputFile:CommitWal:1", nullptr); } @@ -531,8 +533,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles( mutex_.Lock(); if (log_io_s.ok() && synced_wals.IsWalAddition()) { const ReadOptions read_options(Env::IOActivity::kFlush); - log_io_s = - status_to_io_status(ApplyWALToManifest(read_options, &synced_wals)); + const WriteOptions write_options(Env::IOActivity::kFlush); + + log_io_s = status_to_io_status( + ApplyWALToManifest(read_options, write_options, &synced_wals)); } if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() && @@ -985,8 +989,10 @@ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd, edit.SetColumnFamily(cfd->GetID()); edit.SetFullHistoryTsLow(ts_low); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + TEST_SYNC_POINT_CALLBACK("DBImpl::IncreaseFullHistoryTsLowImpl:BeforeEdit", &edit); @@ -1000,7 +1006,7 @@ Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd, } Status s = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), - read_options, &edit, &mutex_, + read_options, write_options, &edit, &mutex_, directories_.GetDbDir()); if (!s.ok()) { return s; @@ -1685,6 +1691,7 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { } const ReadOptions read_options(Env::IOActivity::kCompaction); + const WriteOptions write_options(Env::IOActivity::kCompaction); SuperVersionContext sv_context(/* create_superversion */ true); @@ -1801,9 +1808,9 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { "[%s] Apply version edit:\n%s", cfd->GetName().c_str(), edit.DebugString().data()); - Status status = - versions_->LogAndApply(cfd, mutable_cf_options, read_options, &edit, - &mutex_, directories_.GetDbDir()); + Status status = versions_->LogAndApply(cfd, mutable_cf_options, + read_options, write_options, &edit, + &mutex_, directories_.GetDbDir()); cfd->compaction_picker()->UnregisterCompaction(c.get()); c.reset(); @@ -3334,6 +3341,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, TEST_SYNC_POINT("DBImpl::BackgroundCompaction:Start"); const ReadOptions read_options(Env::IOActivity::kCompaction); + const WriteOptions write_options(Env::IOActivity::kCompaction); bool is_manual = (manual_compaction != nullptr); std::unique_ptr c; @@ -3547,7 +3555,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, } status = versions_->LogAndApply( c->column_family_data(), *c->mutable_cf_options(), read_options, - c->edit(), &mutex_, directories_.GetDbDir()); + write_options, c->edit(), &mutex_, directories_.GetDbDir()); io_s = versions_->io_status(); InstallSuperVersionAndScheduleWork(c->column_family_data(), &job_context->superversion_contexts[0], @@ -3613,7 +3621,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, } status = versions_->LogAndApply( c->column_family_data(), *c->mutable_cf_options(), read_options, - c->edit(), &mutex_, directories_.GetDbDir()); + write_options, c->edit(), &mutex_, directories_.GetDbDir()); io_s = versions_->io_status(); // Use latest MutableCFOptions InstallSuperVersionAndScheduleWork(c->column_family_data(), diff --git a/db/db_impl/db_impl_experimental.cc b/db/db_impl/db_impl_experimental.cc index 442cb47679d..c90df262e83 100644 --- a/db/db_impl/db_impl_experimental.cc +++ b/db/db_impl/db_impl_experimental.cc @@ -61,8 +61,10 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) { "PromoteL0 FAILED. Invalid target level %d\n", target_level); return Status::InvalidArgument("Invalid target level"); } - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + Status status; VersionEdit edit; JobContext job_context(next_job_id_.fetch_add(1), true); @@ -143,7 +145,7 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) { } status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), - read_options, &edit, &mutex_, + read_options, write_options, &edit, &mutex_, directories_.GetDbDir()); if (status.ok()) { InstallSuperVersionAndScheduleWork(cfd, diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index 9e2fbb73d00..ab75a36cc39 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -951,7 +951,8 @@ Status DBImpl::SetupDBId(bool read_only, RecoveryContext* recovery_ctx) { } // Persist it to IDENTITY file if allowed if (!read_only) { - s = SetIdentityFile(env_, dbname_, db_id_); + // TODO: plumb Env::IOActivity, Env::IOPriority + s = SetIdentityFile(WriteOptions(), env_, dbname_, db_id_); } return s; } diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index d9d1f932afa..818bf0e7483 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -296,7 +296,8 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) { Status DBImpl::NewDB(std::vector* new_filenames) { VersionEdit new_db; - Status s = SetIdentityFile(env_, dbname_); + Status s = + SetIdentityFile(WriteOptions(Env::IOActivity::kDBOpen), env_, dbname_); if (!s.ok()) { return s; } @@ -311,6 +312,7 @@ Status DBImpl::NewDB(std::vector* new_filenames) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Creating manifest 1 \n"); const std::string manifest = DescriptorFileName(dbname_, 1); + const WriteOptions write_options(Env::IOActivity::kDBOpen); { if (fs_->FileExists(manifest, IOOptions(), nullptr).ok()) { fs_->DeleteFile(manifest, IOOptions(), nullptr).PermitUncheckedError(); @@ -326,20 +328,23 @@ Status DBImpl::NewDB(std::vector* new_filenames) { immutable_db_options_.manifest_preallocation_size); std::unique_ptr file_writer(new WritableFileWriter( std::move(file), manifest, file_options, immutable_db_options_.clock, - io_tracer_, nullptr /* stats */, immutable_db_options_.listeners, - nullptr, tmp_set.Contains(FileType::kDescriptorFile), + io_tracer_, nullptr /* stats */, + Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, + immutable_db_options_.listeners, nullptr, + tmp_set.Contains(FileType::kDescriptorFile), tmp_set.Contains(FileType::kDescriptorFile))); log::Writer log(std::move(file_writer), 0, false); std::string record; new_db.EncodeTo(&record); - s = log.AddRecord(record); + s = log.AddRecord(write_options, record); if (s.ok()) { s = SyncManifest(&immutable_db_options_, log.file()); } } if (s.ok()) { // Make "CURRENT" file that points to the new manifest file. - s = SetCurrentFile(fs_.get(), dbname_, 1, directories_.GetDbDir()); + s = SetCurrentFile(write_options, fs_.get(), dbname_, 1, + directories_.GetDbDir()); if (new_filenames) { new_filenames->emplace_back( manifest.substr(manifest.find_last_of("/\\") + 1)); @@ -858,7 +863,9 @@ Status DBImpl::PersistentStatsProcessFormatVersion() { if (s.ok()) { ColumnFamilyOptions cfo; OptimizeForPersistentStats(&cfo); - s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle); + s = CreateColumnFamily(ReadOptions(Env::IOActivity::kDBOpen), + WriteOptions(Env::IOActivity::kDBOpen), cfo, + kPersistentStatsColumnFamilyName, &handle); } if (s.ok()) { persist_stats_cf_handle_ = static_cast(handle); @@ -911,7 +918,9 @@ Status DBImpl::InitPersistStatsColumnFamily() { ColumnFamilyHandle* handle = nullptr; ColumnFamilyOptions cfo; OptimizeForPersistentStats(&cfo); - s = CreateColumnFamily(cfo, kPersistentStatsColumnFamilyName, &handle); + s = CreateColumnFamily(ReadOptions(Env::IOActivity::kDBOpen), + WriteOptions(Env::IOActivity::kDBOpen), cfo, + kPersistentStatsColumnFamilyName, &handle); persist_stats_cf_handle_ = static_cast(handle); mutex_.Lock(); } @@ -922,9 +931,12 @@ Status DBImpl::LogAndApplyForRecovery(const RecoveryContext& recovery_ctx) { mutex_.AssertHeld(); assert(versions_->descriptor_log_ == nullptr); const ReadOptions read_options(Env::IOActivity::kDBOpen); - Status s = versions_->LogAndApply( - recovery_ctx.cfds_, recovery_ctx.mutable_cf_opts_, read_options, - recovery_ctx.edit_lists_, &mutex_, directories_.GetDbDir()); + const WriteOptions write_options(Env::IOActivity::kDBOpen); + + Status s = versions_->LogAndApply(recovery_ctx.cfds_, + recovery_ctx.mutable_cf_opts_, read_options, + write_options, recovery_ctx.edit_lists_, + &mutex_, directories_.GetDbDir()); if (s.ok() && !(recovery_ctx.files_to_delete_.empty())) { mutex_.Unlock(); for (const auto& fname : recovery_ctx.files_to_delete_) { @@ -1640,9 +1652,10 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, } IOStatus io_s; + const WriteOptions write_option(Env::IOActivity::kDBOpen); TableBuilderOptions tboptions( - *cfd->ioptions(), mutable_cf_options, cfd->internal_comparator(), - cfd->int_tbl_prop_collector_factories(), + *cfd->ioptions(), mutable_cf_options, write_option, + cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), mutable_cf_options.compression_opts, cfd->GetID(), cfd->GetName(), 0 /* level */, false /* is_bottommost */, @@ -1656,14 +1669,14 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, uint64_t num_input_entries = 0; s = BuildTable( dbname_, versions_.get(), immutable_db_options_, tboptions, - file_options_for_compaction_, read_option, cfd->table_cache(), - iter.get(), std::move(range_del_iters), &meta, &blob_file_additions, - snapshot_seqs, earliest_write_conflict_snapshot, kMaxSequenceNumber, - snapshot_checker, paranoid_file_checks, cfd->internal_stats(), &io_s, - io_tracer_, BlobFileCreationReason::kRecovery, - empty_seqno_time_mapping, &event_logger_, job_id, Env::IO_HIGH, - nullptr /* table_properties */, write_hint, - nullptr /*full_history_ts_low*/, &blob_callback_, version, + file_options_for_compaction_, read_option, write_option, + cfd->table_cache(), iter.get(), std::move(range_del_iters), &meta, + &blob_file_additions, snapshot_seqs, earliest_write_conflict_snapshot, + kMaxSequenceNumber, snapshot_checker, paranoid_file_checks, + cfd->internal_stats(), &io_s, io_tracer_, + BlobFileCreationReason::kRecovery, empty_seqno_time_mapping, + &event_logger_, job_id, Env::IO_HIGH, nullptr /* table_properties */, + write_hint, nullptr /*full_history_ts_low*/, &blob_callback_, version, &num_input_entries); version->Unref(); LogFlush(immutable_db_options_.info_log); @@ -1863,7 +1876,8 @@ Status DB::OpenAndTrimHistory( return s; } -IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number, +IOStatus DBImpl::CreateWAL(const WriteOptions& write_options, + uint64_t log_file_num, uint64_t recycle_log_number, size_t preallocate_block_size, log::Writer** new_log) { IOStatus io_s; @@ -1897,14 +1911,15 @@ IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number, FileTypeSet tmp_set = immutable_db_options_.checksum_handoff_file_types; std::unique_ptr file_writer(new WritableFileWriter( std::move(lfile), log_fname, opt_file_options, - immutable_db_options_.clock, io_tracer_, nullptr /* stats */, listeners, - nullptr, tmp_set.Contains(FileType::kWalFile), + immutable_db_options_.clock, io_tracer_, nullptr /* stats */, + Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, listeners, nullptr, + tmp_set.Contains(FileType::kWalFile), tmp_set.Contains(FileType::kWalFile))); *new_log = new log::Writer(std::move(file_writer), log_file_num, immutable_db_options_.recycle_log_file_num > 0, immutable_db_options_.manual_wal_flush, immutable_db_options_.wal_compression); - io_s = (*new_log)->AddCompressionTypeRecord(); + io_s = (*new_log)->AddCompressionTypeRecord(write_options); } return io_s; } @@ -1913,6 +1928,9 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, const std::vector& column_families, std::vector* handles, DB** dbptr, const bool seq_per_batch, const bool batch_per_txn) { + const WriteOptions write_options(Env::IOActivity::kDBOpen); + const ReadOptions read_options(Env::IOActivity::kDBOpen); + Status s = ValidateOptionsByTable(db_options, column_families); if (!s.ok()) { return s; @@ -1988,7 +2006,7 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, log::Writer* new_log = nullptr; const size_t preallocate_block_size = impl->GetWalPreallocateBlockSize(max_write_buffer_size); - s = impl->CreateWAL(new_log_number, 0 /*recycle_log_number*/, + s = impl->CreateWAL(write_options, new_log_number, 0 /*recycle_log_number*/, preallocate_block_size, &new_log); if (s.ok()) { InstrumentedMutexLock wl(&impl->log_write_mutex_); @@ -2013,21 +2031,25 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, if (recovered_seq != kMaxSequenceNumber) { WriteBatch empty_batch; WriteBatchInternal::SetSequence(&empty_batch, recovered_seq); - WriteOptions write_options; uint64_t log_used, log_size; log::Writer* log_writer = impl->logs_.back().writer; LogFileNumberSize& log_file_number_size = impl->alive_log_files_.back(); assert(log_writer->get_log_number() == log_file_number_size.number); impl->mutex_.AssertHeld(); - s = impl->WriteToWAL(empty_batch, log_writer, &log_used, &log_size, - Env::IO_TOTAL, log_file_number_size); + s = impl->WriteToWAL(empty_batch, write_options, log_writer, &log_used, + &log_size, log_file_number_size); if (s.ok()) { // Need to fsync, otherwise it might get lost after a power reset. - s = impl->FlushWAL(false); + s = impl->FlushWAL(write_options, false); TEST_SYNC_POINT_CALLBACK("DBImpl::Open::BeforeSyncWAL", /*arg=*/&s); + IOOptions opts; if (s.ok()) { - s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync); + s = WritableFileWriter::PrepareIOOptions(write_options, opts); + } + if (s.ok()) { + s = log_writer->file()->Sync(opts, + impl->immutable_db_options_.use_fsync); } } } @@ -2056,7 +2078,8 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, // missing column family, create it ColumnFamilyHandle* handle = nullptr; impl->mutex_.Unlock(); - s = impl->CreateColumnFamily(cf.options, cf.name, &handle); + s = impl->CreateColumnFamily(read_options, write_options, cf.options, + cf.name, &handle); impl->mutex_.Lock(); if (s.ok()) { handles->push_back(handle); @@ -2107,8 +2130,9 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, if (s.ok()) { // Persist RocksDB Options before scheduling the compaction. // The WriteOptionsFile() will release and lock the mutex internally. - persist_options_status = impl->WriteOptionsFile( - false /*need_mutex_lock*/, false /*need_enter_write_thread*/); + persist_options_status = + impl->WriteOptionsFile(write_options, false /*need_mutex_lock*/, + false /*need_enter_write_thread*/); *dbptr = impl; impl->opened_successfully_ = true; @@ -2209,12 +2233,17 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname, impl); LogFlush(impl->immutable_db_options_.info_log); if (!impl->WALBufferIsEmpty()) { - s = impl->FlushWAL(false); + s = impl->FlushWAL(write_options, false); if (s.ok()) { // Sync is needed otherwise WAL buffered data might get lost after a // power reset. log::Writer* log_writer = impl->logs_.back().writer; - s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync); + IOOptions opts; + s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (s.ok()) { + s = log_writer->file()->Sync(opts, + impl->immutable_db_options_.use_fsync); + } } } if (s.ok() && !persist_options_status.ok()) { diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 8a1a6ce3114..8edf8bab51f 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -606,9 +606,9 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options, log_write_mutex_.Unlock(); if (status.ok() && synced_wals.IsWalAddition()) { InstrumentedMutexLock l(&mutex_); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; - status = ApplyWALToManifest(read_options, &synced_wals); + status = ApplyWALToManifest(read_options, write_options, &synced_wals); } // Requesting sync with two_write_queues_ is expected to be very rare. We @@ -769,9 +769,9 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options, } if (w.status.ok() && synced_wals.IsWalAddition()) { InstrumentedMutexLock l(&mutex_); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; - w.status = ApplyWALToManifest(read_options, &synced_wals); + w.status = ApplyWALToManifest(read_options, write_options, &synced_wals); } write_thread_.ExitAsBatchGroupLeader(wal_write_group, w.status); } @@ -1304,9 +1304,9 @@ Status DBImpl::MergeBatch(const WriteThread::WriteGroup& write_group, // When two_write_queues_ is disabled, this function is called from the only // write thread. Otherwise this must be called holding log_write_mutex_. IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch, + const WriteOptions& write_options, log::Writer* log_writer, uint64_t* log_used, uint64_t* log_size, - Env::IOPriority rate_limiter_priority, LogFileNumberSize& log_file_number_size) { assert(log_size != nullptr); @@ -1329,12 +1329,11 @@ IOStatus DBImpl::WriteToWAL(const WriteBatch& merged_batch, log_write_mutex_.Lock(); } IOStatus io_s = log_writer->MaybeAddUserDefinedTimestampSizeRecord( - versions_->GetColumnFamiliesTimestampSizeForRecord(), - rate_limiter_priority); + write_options, versions_->GetColumnFamiliesTimestampSizeForRecord()); if (!io_s.ok()) { return io_s; } - io_s = log_writer->AddRecord(log_entry, rate_limiter_priority); + io_s = log_writer->AddRecord(write_options, log_entry); if (UNLIKELY(needs_locking)) { log_write_mutex_.Unlock(); @@ -1377,9 +1376,12 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, WriteBatchInternal::SetSequence(merged_batch, sequence); uint64_t log_size; - io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size, - write_group.leader->rate_limiter_priority, - log_file_number_size); + + WriteOptions write_options; + write_options.rate_limiter_priority = + write_group.leader->rate_limiter_priority; + io_s = WriteToWAL(*merged_batch, write_options, log_writer, log_used, + &log_size, log_file_number_size); if (to_be_cached_state) { cached_recoverable_state_ = *to_be_cached_state; cached_recoverable_state_empty_ = false; @@ -1406,10 +1408,14 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group, log_write_mutex_.Lock(); } - for (auto& log : logs_) { - io_s = log.writer->file()->Sync(immutable_db_options_.use_fsync); - if (!io_s.ok()) { - break; + IOOptions opts; + io_s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (io_s.ok()) { + for (auto& log : logs_) { + io_s = log.writer->file()->Sync(opts, immutable_db_options_.use_fsync); + if (!io_s.ok()) { + break; + } } } @@ -1482,9 +1488,12 @@ IOStatus DBImpl::ConcurrentWriteToWAL( assert(log_writer->get_log_number() == log_file_number_size.number); uint64_t log_size; - io_s = WriteToWAL(*merged_batch, log_writer, log_used, &log_size, - write_group.leader->rate_limiter_priority, - log_file_number_size); + + WriteOptions write_options; + write_options.rate_limiter_priority = + write_group.leader->rate_limiter_priority; + io_s = WriteToWAL(*merged_batch, write_options, log_writer, log_used, + &log_size, log_file_number_size); if (to_be_cached_state) { cached_recoverable_state_ = *to_be_cached_state; cached_recoverable_state_empty_ = false; @@ -2096,8 +2105,10 @@ void DBImpl::NotifyOnMemTableSealed(ColumnFamilyData* /*cfd*/, // two_write_queues_ is true (This is to simplify the reasoning.) Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { mutex_.AssertHeld(); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + log::Writer* new_log = nullptr; MemTable* new_mem = nullptr; IOStatus io_s; @@ -2144,8 +2155,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { if (creating_new_log) { // TODO: Write buffer size passed in should be max of all CF's instead // of mutable_cf_options.write_buffer_size. - io_s = CreateWAL(new_log_number, recycle_log_number, preallocate_block_size, - &new_log); + io_s = CreateWAL(write_options, new_log_number, recycle_log_number, + preallocate_block_size, &new_log); if (s.ok()) { s = io_s; } @@ -2182,7 +2193,7 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { // In recovery path, we force another try of writing WAL buffer. cur_log_writer->file()->reset_seen_error(); } - io_s = cur_log_writer->WriteBuffer(); + io_s = cur_log_writer->WriteBuffer(write_options); if (s.ok()) { s = io_s; } @@ -2250,7 +2261,8 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { VersionEdit wal_deletion; wal_deletion.DeleteWalsBefore(min_wal_number_to_keep); s = versions_->LogAndApplyToDefaultColumnFamily( - read_options, &wal_deletion, &mutex_, directories_.GetDbDir()); + read_options, write_options, &wal_deletion, &mutex_, + directories_.GetDbDir()); if (!s.ok() && versions_->io_status().IsIOError()) { s = error_handler_.SetBGError(versions_->io_status(), BackgroundErrorReason::kManifestWrite); diff --git a/db/db_iter.cc b/db/db_iter.cc index e547c2e1cdd..24fb5753409 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -197,6 +197,7 @@ bool DBIter::SetBlobValueIfNeeded(const Slice& user_key, // TODO: consider moving ReadOptions from ArenaWrappedDBIter to DBIter to // avoid having to copy options back and forth. + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions read_options; read_options.read_tier = read_tier_; read_options.fill_cache = fill_cache_; diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc index 4293e77a15b..dab77ea0f68 100644 --- a/db/db_sst_test.cc +++ b/db/db_sst_test.cc @@ -948,9 +948,12 @@ TEST_F(DBSSTTest, OpenDBWithExistingTrash) { // Add some trash files to the db directory so the DB can clean them up ASSERT_OK(env_->CreateDirIfMissing(dbname_)); - ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "001.sst.trash")); - ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "002.sst.trash")); - ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "003.sst.trash")); + ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "001.sst.trash", + false, Env::IOActivity::kUnknown)); + ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "002.sst.trash", + false, Env::IOActivity::kUnknown)); + ASSERT_OK(WriteStringToFile(env_, "abc", dbname_ + "/" + "003.sst.trash", + false, Env::IOActivity::kUnknown)); // Reopen the DB and verify that it deletes existing trash files Reopen(options); diff --git a/db/db_test2.cc b/db/db_test2.cc index c9fbe15f410..d1484a2f7f3 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -5679,7 +5679,8 @@ TEST_F(DBTest2, CrashInRecoveryMultipleCF) { ASSERT_OK(ReadFileToString(env_, fname, &file_content)); file_content[400] = 'h'; file_content[401] = 'a'; - ASSERT_OK(WriteStringToFile(env_, file_content, fname)); + ASSERT_OK(WriteStringToFile(env_, file_content, fname, false, + Env::IOActivity::kUnknown)); break; } } diff --git a/db/db_wal_test.cc b/db/db_wal_test.cc index 01dc84a0fba..603d91eada5 100644 --- a/db/db_wal_test.cc +++ b/db/db_wal_test.cc @@ -1559,7 +1559,7 @@ class RecoveryTestHelper { new log::Writer(std::move(file_writer), current_log_number, db_options.recycle_log_file_num > 0, false, db_options.wal_compression); - ASSERT_OK(log_writer->AddCompressionTypeRecord()); + ASSERT_OK(log_writer->AddCompressionTypeRecord(WriteOptions())); current_log_writer.reset(log_writer); WriteBatch batch; @@ -1572,7 +1572,7 @@ class RecoveryTestHelper { ASSERT_OK(batch.Put(key, value)); WriteBatchInternal::SetSequence(&batch, seq); ASSERT_OK(current_log_writer->AddRecord( - WriteBatchInternal::Contents(&batch))); + WriteOptions(), WriteBatchInternal::Contents(&batch))); versions->SetLastAllocatedSequence(seq); versions->SetLastPublishedSequence(seq); versions->SetLastSequence(seq); diff --git a/db/experimental.cc b/db/experimental.cc index f6f920b2ccb..44816e7107e 100644 --- a/db/experimental.cc +++ b/db/experimental.cc @@ -38,8 +38,9 @@ Status UpdateManifestForFilesState( const DBOptions& db_opts, const std::string& db_name, const std::vector& column_families, const UpdateManifestForFilesStateOptions& opts) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; OfflineManifestWriter w(db_opts, db_name); Status s = w.Recover(column_families); @@ -117,7 +118,8 @@ Status UpdateManifestForFilesState( std::unique_ptr db_dir; s = fs->NewDirectory(db_name, IOOptions(), &db_dir, nullptr); if (s.ok()) { - s = w.LogAndApply(read_options, cfd, &edit, db_dir.get()); + s = w.LogAndApply(read_options, write_options, cfd, &edit, + db_dir.get()); } if (s.ok()) { ++cfs_updated; diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 9756f47aa77..3a05b5ef76f 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -709,7 +709,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( // If customized readahead size is needed, we can pass a user option // all the way to here. Right now we just rely on the default readahead // to keep things simple. - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ro; ro.readahead_size = ingestion_options_.verify_checksums_readahead_size; status = table_reader->VerifyChecksum( @@ -763,7 +763,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( file_to_ingest->num_range_deletions = props->num_range_deletions; ParsedInternalKey key; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ro; std::unique_ptr iter(table_reader->NewIterator( ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr, @@ -874,7 +874,7 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile( bool overlap_with_db = false; Arena arena; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ro; ro.total_order_seek = true; int target_level = 0; diff --git a/db/fault_injection_test.cc b/db/fault_injection_test.cc index d888dfde104..17b4c034283 100644 --- a/db/fault_injection_test.cc +++ b/db/fault_injection_test.cc @@ -572,7 +572,7 @@ TEST_P(FaultInjectionTest, NoDuplicateTrailingEntries) { edit.SetColumnFamily(0); std::string buf; assert(edit.EncodeTo(&buf)); - const Status s = log_writer->AddRecord(buf); + const Status s = log_writer->AddRecord(WriteOptions(), buf); ASSERT_NOK(s); } diff --git a/db/flush_job.cc b/db/flush_job.cc index 0e6c66cacb3..ada684328e1 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -391,7 +391,7 @@ Status FlushJob::MemPurge() { // Create two iterators, one for the memtable data (contains // info from puts + deletes), and one for the memtable // Range Tombstones (from DeleteRanges). - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ro; ro.total_order_seek = true; Arena arena; @@ -683,8 +683,8 @@ bool FlushJob::MemPurgeDecider(double threshold) { // Cochran formula for determining sample size. // 95% confidence interval, 7% precision. // n0 = (1.96*1.96)*0.25/(0.07*0.07) = 196.0 - // TODO: plumb Env::IOActivity double n0 = 196.0; + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ro; ro.total_order_seek = true; @@ -942,29 +942,30 @@ Status FlushJob::WriteLevel0Table() { const std::string* const full_history_ts_low = (full_history_ts_low_.empty()) ? nullptr : &full_history_ts_low_; + const WriteOptions write_options(Env::IOActivity::kFlush); TableBuilderOptions tboptions( - *cfd_->ioptions(), mutable_cf_options_, cfd_->internal_comparator(), - cfd_->int_tbl_prop_collector_factories(), output_compression_, - mutable_cf_options_.compression_opts, cfd_->GetID(), cfd_->GetName(), - 0 /* level */, false /* is_bottommost */, - TableFileCreationReason::kFlush, oldest_key_time, current_time, - db_id_, db_session_id_, 0 /* target_file_size */, - meta_.fd.GetNumber()); + *cfd_->ioptions(), mutable_cf_options_, write_options, + cfd_->internal_comparator(), cfd_->int_tbl_prop_collector_factories(), + output_compression_, mutable_cf_options_.compression_opts, + cfd_->GetID(), cfd_->GetName(), 0 /* level */, + false /* is_bottommost */, TableFileCreationReason::kFlush, + oldest_key_time, current_time, db_id_, db_session_id_, + 0 /* target_file_size */, meta_.fd.GetNumber()); const SequenceNumber job_snapshot_seq = job_context_->GetJobSnapshotSequence(); const ReadOptions read_options(Env::IOActivity::kFlush); - s = BuildTable(dbname_, versions_, db_options_, tboptions, file_options_, - read_options, cfd_->table_cache(), iter.get(), - std::move(range_del_iters), &meta_, &blob_file_additions, - existing_snapshots_, earliest_write_conflict_snapshot_, - job_snapshot_seq, snapshot_checker_, - mutable_cf_options_.paranoid_file_checks, - cfd_->internal_stats(), &io_s, io_tracer_, - BlobFileCreationReason::kFlush, seqno_to_time_mapping_, - event_logger_, job_context_->job_id, io_priority, - &table_properties_, write_hint, full_history_ts_low, - blob_callback_, base_, &num_input_entries, - &memtable_payload_bytes, &memtable_garbage_bytes); + s = BuildTable( + dbname_, versions_, db_options_, tboptions, file_options_, + read_options, write_options, cfd_->table_cache(), iter.get(), + std::move(range_del_iters), &meta_, &blob_file_additions, + existing_snapshots_, earliest_write_conflict_snapshot_, + job_snapshot_seq, snapshot_checker_, + mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(), + &io_s, io_tracer_, BlobFileCreationReason::kFlush, + seqno_to_time_mapping_, event_logger_, job_context_->job_id, + io_priority, &table_properties_, write_hint, full_history_ts_low, + blob_callback_, base_, &num_input_entries, &memtable_payload_bytes, + &memtable_garbage_bytes); // TODO: Cleanup io_status in BuildTable and table builders assert(!s.ok() || io_s.ok()); io_s.PermitUncheckedError(); @@ -1157,8 +1158,9 @@ Status FlushJob::MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT() { VersionEdit edit; edit.SetColumnFamily(cfd_->GetID()); edit.SetFullHistoryTsLow(new_full_history_ts_low); + // TODO: plumb Env::IOActivity, Env::IOPriority return versions_->LogAndApply(cfd_, *cfd_->GetLatestMutableCFOptions(), - ReadOptions(), &edit, db_mutex_, + ReadOptions(), WriteOptions(), &edit, db_mutex_, output_file_directory_); } diff --git a/db/flush_job_test.cc b/db/flush_job_test.cc index 9fd9c13faf8..18ae6e61bff 100644 --- a/db/flush_job_test.cc +++ b/db/flush_job_test.cc @@ -55,7 +55,7 @@ class FlushJobTestBase : public testing::Test { } void NewDB() { - ASSERT_OK(SetIdentityFile(env_, dbname_)); + ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); VersionEdit new_db; new_db.SetLogNumber(0); @@ -89,19 +89,19 @@ class FlushJobTestBase : public testing::Test { log::Writer log(std::move(file_writer), 0, false); std::string record; new_db.EncodeTo(&record); - s = log.AddRecord(record); + s = log.AddRecord(WriteOptions(), record); ASSERT_OK(s); for (const auto& e : new_cfs) { record.clear(); e.EncodeTo(&record); - s = log.AddRecord(record); + s = log.AddRecord(WriteOptions(), record); ASSERT_OK(s); } } ASSERT_OK(s); // Make "CURRENT" file that points to the new manifest file. - s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); + s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); } diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 32bc4eead49..46a49726d00 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -354,7 +354,7 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo( // in file_meta. if (file_meta.smallest.empty()) { assert(file_meta.largest.empty()); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ro; std::unique_ptr iter(table_reader->NewIterator( ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr, diff --git a/db/internal_stats.cc b/db/internal_stats.cc index 6ef4b430236..d4cf19dcfb7 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -1155,7 +1155,7 @@ bool InternalStats::HandleSsTables(std::string* value, Slice /*suffix*/) { bool InternalStats::HandleAggregatedTableProperties(std::string* value, Slice /*suffix*/) { std::shared_ptr tp; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; auto s = cfd_->current()->GetAggregatedTableProperties(read_options, &tp); if (!s.ok()) { @@ -1177,7 +1177,7 @@ static std::map MapUint64ValuesToString( bool InternalStats::HandleAggregatedTablePropertiesMap( std::map* values, Slice /*suffix*/) { std::shared_ptr tp; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; auto s = cfd_->current()->GetAggregatedTableProperties(read_options, &tp); if (!s.ok()) { @@ -1195,7 +1195,7 @@ bool InternalStats::HandleAggregatedTablePropertiesAtLevel(std::string* values, return false; } std::shared_ptr tp; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; auto s = cfd_->current()->GetAggregatedTableProperties( read_options, &tp, static_cast(level)); @@ -1214,7 +1214,7 @@ bool InternalStats::HandleAggregatedTablePropertiesAtLevelMap( return false; } std::shared_ptr tp; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; auto s = cfd_->current()->GetAggregatedTableProperties( read_options, &tp, static_cast(level)); @@ -1418,7 +1418,7 @@ bool InternalStats::HandleEstimatePendingCompactionBytes(uint64_t* value, bool InternalStats::HandleEstimateTableReadersMem(uint64_t* value, DBImpl* /*db*/, Version* version) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; *value = (version == nullptr) ? 0 @@ -1473,7 +1473,7 @@ bool InternalStats::HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* /*db*/, ->compaction_options_fifo.allow_compaction) { return false; } - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; TablePropertiesCollection collection; auto s = cfd_->current()->GetPropertiesOfAllTables(read_options, &collection); diff --git a/db/log_test.cc b/db/log_test.cc index fa5e2aa0fcd..430e5b6569d 100644 --- a/db/log_test.cc +++ b/db/log_test.cc @@ -185,9 +185,10 @@ class LogTest void Write(const std::string& msg, const UnorderedMap* cf_to_ts_sz = nullptr) { if (cf_to_ts_sz != nullptr && !cf_to_ts_sz->empty()) { - ASSERT_OK(writer_->MaybeAddUserDefinedTimestampSizeRecord(*cf_to_ts_sz)); + ASSERT_OK(writer_->MaybeAddUserDefinedTimestampSizeRecord(WriteOptions(), + *cf_to_ts_sz)); } - ASSERT_OK(writer_->AddRecord(Slice(msg))); + ASSERT_OK(writer_->AddRecord(WriteOptions(), Slice(msg))); } size_t WrittenBytes() const { return dest_contents().size(); } @@ -732,8 +733,8 @@ TEST_P(LogTest, Recycle) { std::unique_ptr dest_holder(new WritableFileWriter( std::move(sink), "" /* don't care */, FileOptions())); Writer recycle_writer(std::move(dest_holder), 123, true); - ASSERT_OK(recycle_writer.AddRecord(Slice("foooo"))); - ASSERT_OK(recycle_writer.AddRecord(Slice("bar"))); + ASSERT_OK(recycle_writer.AddRecord(WriteOptions(), Slice("foooo"))); + ASSERT_OK(recycle_writer.AddRecord(WriteOptions(), Slice("bar"))); ASSERT_GE(get_reader_contents()->size(), log::kBlockSize * 2); ASSERT_EQ("foooo", Read()); ASSERT_EQ("bar", Read()); @@ -764,9 +765,10 @@ TEST_P(LogTest, RecycleWithTimestampSize) { UnorderedMap ts_sz_two = { {2, sizeof(uint64_t)}, }; - ASSERT_OK(recycle_writer.MaybeAddUserDefinedTimestampSizeRecord(ts_sz_two)); - ASSERT_OK(recycle_writer.AddRecord(Slice("foooo"))); - ASSERT_OK(recycle_writer.AddRecord(Slice("bar"))); + ASSERT_OK(recycle_writer.MaybeAddUserDefinedTimestampSizeRecord( + WriteOptions(), ts_sz_two)); + ASSERT_OK(recycle_writer.AddRecord(WriteOptions(), Slice("foooo"))); + ASSERT_OK(recycle_writer.AddRecord(WriteOptions(), Slice("bar"))); ASSERT_GE(get_reader_contents()->size(), log::kBlockSize * 2); CheckRecordAndTimestampSize("foooo", ts_sz_two); CheckRecordAndTimestampSize("bar", ts_sz_two); @@ -853,12 +855,12 @@ class RetriableLogTest : public ::testing::TestWithParam { std::string contents() { return sink_->contents_; } void Encode(const std::string& msg) { - ASSERT_OK(log_writer_->AddRecord(Slice(msg))); + ASSERT_OK(log_writer_->AddRecord(WriteOptions(), Slice(msg))); } void Write(const Slice& data) { - ASSERT_OK(writer_->Append(data)); - ASSERT_OK(writer_->Sync(true)); + ASSERT_OK(writer_->Append(IOOptions(), data)); + ASSERT_OK(writer_->Sync(IOOptions(), true)); } bool TryRead(std::string* result) { @@ -991,7 +993,9 @@ INSTANTIATE_TEST_CASE_P(bool, RetriableLogTest, ::testing::Values(0, 2)); class CompressionLogTest : public LogTest { public: - Status SetupTestEnv() { return writer_->AddCompressionTypeRecord(); } + Status SetupTestEnv() { + return writer_->AddCompressionTypeRecord(WriteOptions()); + } }; TEST_P(CompressionLogTest, Empty) { @@ -1109,7 +1113,7 @@ TEST_P(CompressionLogTest, AlignedFragmentation) { // beginning of the block. while ((WrittenBytes() & (kBlockSize - 1)) >= kHeaderSize) { char entry = 'a'; - ASSERT_OK(writer_->AddRecord(Slice(&entry, 1))); + ASSERT_OK(writer_->AddRecord(WriteOptions(), Slice(&entry, 1))); num_filler_records++; } const std::vector wal_entries = { diff --git a/db/log_writer.cc b/db/log_writer.cc index 86e0286ccd5..bd561146fb9 100644 --- a/db/log_writer.cc +++ b/db/log_writer.cc @@ -39,31 +39,37 @@ Writer::Writer(std::unique_ptr&& dest, uint64_t log_number, Writer::~Writer() { if (dest_) { - WriteBuffer().PermitUncheckedError(); + WriteBuffer(WriteOptions()).PermitUncheckedError(); } if (compress_) { delete compress_; } } -IOStatus Writer::WriteBuffer() { +IOStatus Writer::WriteBuffer(const WriteOptions& write_options) { if (dest_->seen_error()) { return IOStatus::IOError("Seen error. Skip writing buffer."); } - return dest_->Flush(); + IOOptions opts; + IOStatus s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (!s.ok()) { + return s; + } + return dest_->Flush(opts); } IOStatus Writer::Close() { IOStatus s; + IOOptions opts; if (dest_) { - s = dest_->Close(); + s = dest_->Close(opts); dest_.reset(); } return s; } -IOStatus Writer::AddRecord(const Slice& slice, - Env::IOPriority rate_limiter_priority) { +IOStatus Writer::AddRecord(const WriteOptions& write_options, + const Slice& slice) { const char* ptr = slice.data(); size_t left = slice.size(); @@ -83,6 +89,8 @@ IOStatus Writer::AddRecord(const Slice& slice, } IOStatus s; + IOOptions opts; + s = WritableFileWriter::PrepareIOOptions(write_options, opts); do { const int64_t leftover = kBlockSize - block_offset_; assert(leftover >= 0); @@ -92,9 +100,10 @@ IOStatus Writer::AddRecord(const Slice& slice, // Fill the trailer (literal below relies on kHeaderSize and // kRecyclableHeaderSize being <= 11) assert(header_size <= 11); - s = dest_->Append(Slice("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + s = dest_->Append(opts, + Slice("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", static_cast(leftover)), - 0 /* crc32c_checksum */, rate_limiter_priority); + 0 /* crc32c_checksum */); if (!s.ok()) { break; } @@ -144,7 +153,7 @@ IOStatus Writer::AddRecord(const Slice& slice, type = recycle_log_files_ ? kRecyclableMiddleType : kMiddleType; } - s = EmitPhysicalRecord(type, ptr, fragment_length, rate_limiter_priority); + s = EmitPhysicalRecord(write_options, type, ptr, fragment_length); ptr += fragment_length; left -= fragment_length; begin = false; @@ -152,14 +161,14 @@ IOStatus Writer::AddRecord(const Slice& slice, if (s.ok()) { if (!manual_flush_) { - s = dest_->Flush(rate_limiter_priority); + s = dest_->Flush(opts); } } return s; } -IOStatus Writer::AddCompressionTypeRecord() { +IOStatus Writer::AddCompressionTypeRecord(const WriteOptions& write_options) { // Should be the first record assert(block_offset_ == 0); @@ -171,11 +180,13 @@ IOStatus Writer::AddCompressionTypeRecord() { CompressionTypeRecord record(compression_type_); std::string encode; record.EncodeTo(&encode); - IOStatus s = - EmitPhysicalRecord(kSetCompressionType, encode.data(), encode.size()); + IOStatus s = EmitPhysicalRecord(write_options, kSetCompressionType, + encode.data(), encode.size()); + IOOptions io_opts; + s = WritableFileWriter::PrepareIOOptions(write_options, io_opts); if (s.ok()) { if (!manual_flush_) { - s = dest_->Flush(); + s = dest_->Flush(io_opts); } // Initialize fields required for compression const size_t max_output_buffer_len = @@ -197,8 +208,8 @@ IOStatus Writer::AddCompressionTypeRecord() { } IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord( - const UnorderedMap& cf_to_ts_sz, - Env::IOPriority rate_limiter_priority) { + const WriteOptions& write_options, + const UnorderedMap& cf_to_ts_sz) { std::vector> ts_sz_to_record; for (const auto& [cf_id, ts_sz] : cf_to_ts_sz) { if (recorded_cf_to_ts_sz_.count(cf_id) != 0) { @@ -219,14 +230,14 @@ IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord( record.EncodeTo(&encoded); RecordType type = recycle_log_files_ ? kRecyclableUserDefinedTimestampSizeType : kUserDefinedTimestampSizeType; - return EmitPhysicalRecord(type, encoded.data(), encoded.size(), - rate_limiter_priority); + return EmitPhysicalRecord(write_options, type, encoded.data(), + encoded.size()); } bool Writer::BufferIsEmpty() { return dest_->BufferIsEmpty(); } -IOStatus Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n, - Env::IOPriority rate_limiter_priority) { +IOStatus Writer::EmitPhysicalRecord(const WriteOptions& write_options, + RecordType t, const char* ptr, size_t n) { assert(n <= 0xffff); // Must fit in two bytes size_t header_size; @@ -266,10 +277,13 @@ IOStatus Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n, EncodeFixed32(buf, crc); // Write the header and the payload - IOStatus s = dest_->Append(Slice(buf, header_size), 0 /* crc32c_checksum */, - rate_limiter_priority); + IOOptions opts; + IOStatus s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (s.ok()) { + s = dest_->Append(opts, Slice(buf, header_size), 0 /* crc32c_checksum */); + } if (s.ok()) { - s = dest_->Append(Slice(ptr, n), payload_crc, rate_limiter_priority); + s = dest_->Append(opts, Slice(ptr, n), payload_crc); } block_offset_ += header_size + n; return s; diff --git a/db/log_writer.h b/db/log_writer.h index 7a64a856015..c57f387c351 100644 --- a/db/log_writer.h +++ b/db/log_writer.h @@ -86,9 +86,8 @@ class Writer { ~Writer(); - IOStatus AddRecord(const Slice& slice, - Env::IOPriority rate_limiter_priority = Env::IO_TOTAL); - IOStatus AddCompressionTypeRecord(); + IOStatus AddRecord(const WriteOptions& write_options, const Slice& slice); + IOStatus AddCompressionTypeRecord(const WriteOptions& write_options); // If there are column families in `cf_to_ts_sz` not included in // `recorded_cf_to_ts_sz_` and its user-defined timestamp size is non-zero, @@ -96,15 +95,15 @@ class Writer { // kRecyclableUserDefinedTimestampSizeType for these column families. // This timestamp size record applies to all subsequent records. IOStatus MaybeAddUserDefinedTimestampSizeRecord( - const UnorderedMap& cf_to_ts_sz, - Env::IOPriority rate_limiter_priority = Env::IO_TOTAL); + const WriteOptions& write_options, + const UnorderedMap& cf_to_ts_sz); WritableFileWriter* file() { return dest_.get(); } const WritableFileWriter* file() const { return dest_.get(); } uint64_t get_log_number() const { return log_number_; } - IOStatus WriteBuffer(); + IOStatus WriteBuffer(const WriteOptions& write_options); IOStatus Close(); @@ -121,9 +120,8 @@ class Writer { // record type stored in the header. uint32_t type_crc_[kMaxRecordType + 1]; - IOStatus EmitPhysicalRecord( - RecordType type, const char* ptr, size_t length, - Env::IOPriority rate_limiter_priority = Env::IO_TOTAL); + IOStatus EmitPhysicalRecord(const WriteOptions& write_options, + RecordType type, const char* ptr, size_t length); // If true, it does not flush after each write. Instead it relies on the upper // layer to manually does the flush by calling ::WriteBuffer() diff --git a/db/memtable.cc b/db/memtable.cc index 8a71a6494d4..9eb8513b7e5 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -597,7 +597,7 @@ void MemTable::ConstructFragmentedRangeTombstones() { assert(!IsFragmentedRangeTombstonesConstructed(false)); // There should be no concurrent Construction if (!is_range_del_table_empty_.load(std::memory_order_relaxed)) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority auto* unfragmented_iter = new MemTableIterator(*this, ReadOptions(), nullptr /* arena */, true /* use_range_del_table */); diff --git a/db/memtable_list.cc b/db/memtable_list.cc index b665c19b8fc..b30defe7302 100644 --- a/db/memtable_list.cc +++ b/db/memtable_list.cc @@ -468,6 +468,7 @@ Status MemTableList::TryInstallMemtableFlushResults( mu->AssertHeld(); const ReadOptions read_options(Env::IOActivity::kFlush); + const WriteOptions write_options(Env::IOActivity::kFlush); // Flush was successful // Record the status on the memtable object. Either this call or a call by a @@ -580,10 +581,10 @@ Status MemTableList::TryInstallMemtableFlushResults( }; if (write_edits) { // this can release and reacquire the mutex. - s = vset->LogAndApply(cfd, mutable_cf_options, read_options, edit_list, - mu, db_directory, /*new_descriptor_log=*/false, - /*column_family_options=*/nullptr, - manifest_write_cb); + s = vset->LogAndApply( + cfd, mutable_cf_options, read_options, write_options, edit_list, mu, + db_directory, /*new_descriptor_log=*/false, + /*column_family_options=*/nullptr, manifest_write_cb); } else { // If write_edit is false (e.g: successful mempurge), // then remove old memtables, wake up manifest write queue threads, @@ -801,6 +802,7 @@ Status InstallMemtableAtomicFlushResults( mu->AssertHeld(); const ReadOptions read_options(Env::IOActivity::kFlush); + const WriteOptions write_options(Env::IOActivity::kFlush); size_t num = mems_list.size(); assert(cfds.size() == num); @@ -879,8 +881,8 @@ Status InstallMemtableAtomicFlushResults( } // this can release and reacquire the mutex. - s = vset->LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists, - mu, db_directory); + s = vset->LogAndApply(cfds, mutable_cf_options_list, read_options, + write_options, edit_lists, mu, db_directory); for (size_t k = 0; k != cfds.size(); ++k) { auto* imm = (imm_lists == nullptr) ? cfds[k]->imm() : imm_lists->at(k); diff --git a/db/repair.cc b/db/repair.cc index 1af738fca76..b9efef3c21e 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -145,8 +145,10 @@ class Repairer { // Adds a column family to the VersionSet with cf_options_ and updates // manifest. Status AddColumnFamily(const std::string& cf_name, uint32_t cf_id) { - // TODO: plumb Env::IOActivity; + // TODO: plumb Env::IOActivity, Env::IOPriority; const ReadOptions read_options; + const WriteOptions write_options; + const auto* cf_opts = GetColumnFamilyOptions(cf_name); if (cf_opts == nullptr) { return Status::Corruption("Encountered unknown column family with name=" + @@ -169,9 +171,9 @@ class Repairer { Status status = env_->GetFileSystem()->NewDirectory(dbname_, IOOptions(), &db_dir, nullptr); if (status.ok()) { - status = vset_.LogAndApply(cfd, mut_cf_opts, read_options, &edit, &mutex_, - db_dir.get(), false /* new_descriptor_log */, - cf_opts); + status = vset_.LogAndApply(cfd, mut_cf_opts, read_options, write_options, + &edit, &mutex_, db_dir.get(), + false /* new_descriptor_log */, cf_opts); } mutex_.Unlock(); return status; @@ -361,7 +363,7 @@ class Repairer { } }; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; // Open the log file @@ -439,7 +441,7 @@ class Repairer { FileMetaData meta; meta.fd = FileDescriptor(next_file_number_++, 0, 0); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ro; ro.total_order_seek = true; Arena arena; @@ -462,8 +464,10 @@ class Repairer { IOStatus io_s; CompressionOptions default_compression; + // TODO: plumb Env::IOActivity, Env::IOPriority + const WriteOptions write_option; TableBuilderOptions tboptions( - *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(), + *cfd->ioptions(), *cfd->GetLatestMutableCFOptions(), write_option, cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), kNoCompression, default_compression, cfd->GetID(), cfd->GetName(), -1 /* level */, false /* is_bottommost */, @@ -474,13 +478,14 @@ class Repairer { SeqnoToTimeMapping empty_seqno_time_mapping; status = BuildTable( dbname_, /* versions */ nullptr, immutable_db_options_, tboptions, - file_options_, read_options, table_cache_.get(), iter.get(), - std::move(range_del_iters), &meta, nullptr /* blob_file_additions */, - {}, kMaxSequenceNumber, kMaxSequenceNumber, snapshot_checker, - false /* paranoid_file_checks*/, nullptr /* internal_stats */, &io_s, - nullptr /*IOTracer*/, BlobFileCreationReason::kRecovery, - empty_seqno_time_mapping, nullptr /* event_logger */, 0 /* job_id */, - Env::IO_HIGH, nullptr /* table_properties */, write_hint); + file_options_, read_options, write_option, table_cache_.get(), + iter.get(), std::move(range_del_iters), &meta, + nullptr /* blob_file_additions */, {}, kMaxSequenceNumber, + kMaxSequenceNumber, snapshot_checker, false /* paranoid_file_checks*/, + nullptr /* internal_stats */, &io_s, nullptr /*IOTracer*/, + BlobFileCreationReason::kRecovery, empty_seqno_time_mapping, + nullptr /* event_logger */, 0 /* job_id */, Env::IO_HIGH, + nullptr /* table_properties */, write_hint); ROCKS_LOG_INFO(db_options_.info_log, "Log #%" PRIu64 ": %d ops saved to Table #%" PRIu64 " %s", log, counter, meta.fd.GetNumber(), @@ -527,7 +532,7 @@ class Repairer { file_size); std::shared_ptr props; if (status.ok()) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; status = table_cache_->GetTableProperties( file_options_, read_options, icmp_, t->meta, &props, @@ -590,7 +595,7 @@ class Repairer { } } if (status.ok()) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ropts; ropts.total_order_seek = true; InternalIterator* iter = table_cache_->NewIterator( @@ -639,7 +644,7 @@ class Repairer { // an SST file is a full sorted run. This probably needs the extra logic // from compaction_job.cc around call to UpdateBoundariesForRange (to // handle range tombstones extendingg beyond range of other entries). - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ropts; std::unique_ptr r_iter; status = table_cache_->GetRangeTombstoneIterator( @@ -664,8 +669,10 @@ class Repairer { } Status AddTables() { - // TODO: plumb Env::IOActivity; + // TODO: plumb Env::IOActivity, Env::IOPriority; const ReadOptions read_options; + const WriteOptions write_options; + std::unordered_map> cf_id_to_tables; SequenceNumber max_sequence = 0; for (size_t i = 0; i < tables_.size(); i++) { @@ -752,8 +759,8 @@ class Repairer { nullptr); if (s.ok()) { s = vset_.LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), - read_options, &edit, &mutex_, db_dir.get(), - false /* new_descriptor_log */); + read_options, write_options, &edit, &mutex_, + db_dir.get(), false /* new_descriptor_log */); } mutex_.Unlock(); } diff --git a/db/table_properties_collector_test.cc b/db/table_properties_collector_test.cc index 437b7e30903..0ae4f91f1fd 100644 --- a/db/table_properties_collector_test.cc +++ b/db/table_properties_collector_test.cc @@ -52,10 +52,12 @@ void MakeBuilder( std::unique_ptr wf(new test::StringSink); writable->reset( new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions())); + const WriteOptions write_options; TableBuilderOptions tboptions( - ioptions, moptions, internal_comparator, int_tbl_prop_collector_factories, - options.compression, options.compression_opts, kTestColumnFamilyId, - kTestColumnFamilyName, kTestLevel); + ioptions, moptions, write_options, internal_comparator, + int_tbl_prop_collector_factories, options.compression, + options.compression_opts, kTestColumnFamilyId, kTestColumnFamilyName, + kTestLevel); builder->reset(NewTableBuilder(tboptions, writable->get())); } } // namespace @@ -281,7 +283,7 @@ void TestCustomizedTablePropertiesCollector( builder->Add(ikey.Encode(), kv.second); } ASSERT_OK(builder->Finish()); - ASSERT_OK(writer->Flush()); + ASSERT_OK(writer->Flush(IOOptions())); // -- Step 2: Read properties test::StringSink* fwf = @@ -420,7 +422,7 @@ void TestInternalKeyPropertiesCollector( } ASSERT_OK(builder->Finish()); - ASSERT_OK(writable->Flush()); + ASSERT_OK(writable->Flush(IOOptions())); test::StringSink* fwf = static_cast(writable->writable_file()); diff --git a/db/version_set.cc b/db/version_set.cc index c5057028d62..ecdb6fe2302 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1613,7 +1613,7 @@ Status Version::TablesRangeTombstoneSummary(int max_entries_to_print, std::stringstream ss; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; for (int level = 0; level < storage_info_.num_levels_; level++) { for (const auto& file_meta : storage_info_.files_[level]) { @@ -5154,8 +5154,8 @@ void VersionSet::AppendVersion(ColumnFamilyData* column_family_data, Status VersionSet::ProcessManifestWrites( std::deque& writers, InstrumentedMutex* mu, FSDirectory* dir_contains_current_file, bool new_descriptor_log, - const ColumnFamilyOptions* new_cf_options, - const ReadOptions& read_options) { + const ColumnFamilyOptions* new_cf_options, const ReadOptions& read_options, + const WriteOptions& write_options) { mu->AssertHeld(); assert(!writers.empty()); ManifestWriter& first_writer = writers.front(); @@ -5425,13 +5425,15 @@ Status VersionSet::ProcessManifestWrites( FileTypeSet tmp_set = db_options_->checksum_handoff_file_types; std::unique_ptr file_writer(new WritableFileWriter( std::move(descriptor_file), descriptor_fname, opt_file_opts, clock_, - io_tracer_, nullptr, db_options_->listeners, nullptr, + io_tracer_, nullptr, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, + db_options_->listeners, nullptr, tmp_set.Contains(FileType::kDescriptorFile), tmp_set.Contains(FileType::kDescriptorFile))); descriptor_log_.reset( new log::Writer(std::move(file_writer), 0, false)); - s = WriteCurrentStateToManifest(curr_state, wal_additions, - descriptor_log_.get(), io_s); + s = WriteCurrentStateToManifest(write_options, curr_state, + wal_additions, descriptor_log_.get(), + io_s); } else { manifest_io_status = io_s; s = io_s; @@ -5473,7 +5475,7 @@ Status VersionSet::ProcessManifestWrites( } ++idx; #endif /* !NDEBUG */ - io_s = descriptor_log_->AddRecord(record); + io_s = descriptor_log_->AddRecord(write_options, record); if (!io_s.ok()) { s = io_s; manifest_io_status = io_s; @@ -5500,7 +5502,8 @@ Status VersionSet::ProcessManifestWrites( assert(manifest_io_status.ok()); } if (s.ok() && new_descriptor_log) { - io_s = SetCurrentFile(fs_.get(), dbname_, pending_manifest_file_number_, + io_s = SetCurrentFile(write_options, fs_.get(), dbname_, + pending_manifest_file_number_, dir_contains_current_file); if (!io_s.ok()) { s = io_s; @@ -5728,7 +5731,7 @@ void VersionSet::WakeUpWaitingManifestWriters() { Status VersionSet::LogAndApply( const autovector& column_family_datas, const autovector& mutable_cf_options_list, - const ReadOptions& read_options, + const ReadOptions& read_options, const WriteOptions& write_options, const autovector>& edit_lists, InstrumentedMutex* mu, FSDirectory* dir_contains_current_file, bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options, @@ -5806,8 +5809,8 @@ Status VersionSet::LogAndApply( return Status::ColumnFamilyDropped(); } return ProcessManifestWrites(writers, mu, dir_contains_current_file, - new_descriptor_log, new_cf_options, - read_options); + new_descriptor_log, new_cf_options, read_options, + write_options); } void VersionSet::LogAndApplyCFHelper(VersionEdit* edit, @@ -6144,7 +6147,7 @@ Status VersionSet::ListColumnFamilies(std::vector* column_families, Status VersionSet::ListColumnFamiliesFromManifest( const std::string& manifest_path, FileSystem* fs, std::vector* column_families) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; std::unique_ptr file_reader; Status s; @@ -6188,8 +6191,9 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname, "Number of levels needs to be bigger than 1"); } - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; ImmutableDBOptions db_options(*options); ColumnFamilyOptions cf_options(*options); @@ -6278,8 +6282,8 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname, InstrumentedMutex dummy_mutex; InstrumentedMutexLock l(&dummy_mutex); return versions.LogAndApply(versions.GetColumnFamilySet()->GetDefault(), - mutable_cf_options, read_options, &ve, - &dummy_mutex, nullptr, true); + mutable_cf_options, read_options, write_options, + &ve, &dummy_mutex, nullptr, true); } // Get the checksum information including the checksum and checksum function @@ -6353,7 +6357,7 @@ Status VersionSet::DumpManifest( Options& options, std::string& dscname, bool verbose, bool hex, bool json, const std::vector& cf_descs) { assert(options.env); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; std::vector column_families; @@ -6420,6 +6424,7 @@ void VersionSet::MarkMinLogNumberToKeep(uint64_t number) { } Status VersionSet::WriteCurrentStateToManifest( + const WriteOptions& write_options, const std::unordered_map& curr_state, const VersionEdit& wal_additions, log::Writer* log, IOStatus& io_s) { // TODO: Break up into multiple records to reduce memory usage on recovery? @@ -6440,7 +6445,7 @@ Status VersionSet::WriteCurrentStateToManifest( return Status::Corruption("Unable to Encode VersionEdit:" + edit_for_db_id.DebugString(true)); } - io_s = log->AddRecord(db_id_record); + io_s = log->AddRecord(write_options, db_id_record); if (!io_s.ok()) { return io_s; } @@ -6455,7 +6460,7 @@ Status VersionSet::WriteCurrentStateToManifest( return Status::Corruption("Unable to Encode VersionEdit: " + wal_additions.DebugString(true)); } - io_s = log->AddRecord(record); + io_s = log->AddRecord(write_options, record); if (!io_s.ok()) { return io_s; } @@ -6472,7 +6477,7 @@ Status VersionSet::WriteCurrentStateToManifest( return Status::Corruption("Unable to Encode VersionEdit: " + wal_deletions.DebugString(true)); } - io_s = log->AddRecord(wal_deletions_record); + io_s = log->AddRecord(write_options, wal_deletions_record); if (!io_s.ok()) { return io_s; } @@ -6502,7 +6507,7 @@ Status VersionSet::WriteCurrentStateToManifest( return Status::Corruption("Unable to Encode VersionEdit:" + edit.DebugString(true)); } - io_s = log->AddRecord(record); + io_s = log->AddRecord(write_options, record); if (!io_s.ok()) { return io_s; } @@ -6584,7 +6589,7 @@ Status VersionSet::WriteCurrentStateToManifest( return Status::Corruption("Unable to Encode VersionEdit:" + edit.DebugString(true)); } - io_s = log->AddRecord(record); + io_s = log->AddRecord(write_options, record); if (!io_s.ok()) { return io_s; } diff --git a/db/version_set.h b/db/version_set.h index 87c6eb51474..4a24f7edb63 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1152,14 +1152,15 @@ class VersionSet { virtual ~VersionSet(); Status LogAndApplyToDefaultColumnFamily( - const ReadOptions& read_options, VersionEdit* edit, InstrumentedMutex* mu, + const ReadOptions& read_options, const WriteOptions& write_options, + VersionEdit* edit, InstrumentedMutex* mu, FSDirectory* dir_contains_current_file, bool new_descriptor_log = false, const ColumnFamilyOptions* column_family_options = nullptr) { ColumnFamilyData* default_cf = GetColumnFamilySet()->GetDefault(); const MutableCFOptions* cf_options = default_cf->GetLatestMutableCFOptions(); - return LogAndApply(default_cf, *cf_options, read_options, edit, mu, - dir_contains_current_file, new_descriptor_log, + return LogAndApply(default_cf, *cf_options, read_options, write_options, + edit, mu, dir_contains_current_file, new_descriptor_log, column_family_options); } @@ -1172,7 +1173,8 @@ class VersionSet { Status LogAndApply( ColumnFamilyData* column_family_data, const MutableCFOptions& mutable_cf_options, - const ReadOptions& read_options, VersionEdit* edit, InstrumentedMutex* mu, + const ReadOptions& read_options, const WriteOptions& write_options, + VersionEdit* edit, InstrumentedMutex* mu, FSDirectory* dir_contains_current_file, bool new_descriptor_log = false, const ColumnFamilyOptions* column_family_options = nullptr) { autovector cfds; @@ -1183,16 +1185,16 @@ class VersionSet { autovector edit_list; edit_list.emplace_back(edit); edit_lists.emplace_back(edit_list); - return LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists, - mu, dir_contains_current_file, new_descriptor_log, - column_family_options); + return LogAndApply(cfds, mutable_cf_options_list, read_options, + write_options, edit_lists, mu, dir_contains_current_file, + new_descriptor_log, column_family_options); } // The batch version. If edit_list.size() > 1, caller must ensure that // no edit in the list column family add or drop Status LogAndApply( ColumnFamilyData* column_family_data, const MutableCFOptions& mutable_cf_options, - const ReadOptions& read_options, + const ReadOptions& read_options, const WriteOptions& write_options, const autovector& edit_list, InstrumentedMutex* mu, FSDirectory* dir_contains_current_file, bool new_descriptor_log = false, const ColumnFamilyOptions* column_family_options = nullptr, @@ -1203,9 +1205,10 @@ class VersionSet { mutable_cf_options_list.emplace_back(&mutable_cf_options); autovector> edit_lists; edit_lists.emplace_back(edit_list); - return LogAndApply(cfds, mutable_cf_options_list, read_options, edit_lists, - mu, dir_contains_current_file, new_descriptor_log, - column_family_options, {manifest_wcb}); + return LogAndApply(cfds, mutable_cf_options_list, read_options, + write_options, edit_lists, mu, dir_contains_current_file, + new_descriptor_log, column_family_options, + {manifest_wcb}); } // The across-multi-cf batch version. If edit_lists contain more than @@ -1214,7 +1217,7 @@ class VersionSet { virtual Status LogAndApply( const autovector& cfds, const autovector& mutable_cf_options_list, - const ReadOptions& read_options, + const ReadOptions& read_options, const WriteOptions& write_options, const autovector>& edit_lists, InstrumentedMutex* mu, FSDirectory* dir_contains_current_file, bool new_descriptor_log = false, @@ -1568,6 +1571,7 @@ class VersionSet { // Save current contents to *log Status WriteCurrentStateToManifest( + const WriteOptions& write_options, const std::unordered_map& curr_state, const VersionEdit& wal_additions, log::Writer* log, IOStatus& io_s); @@ -1655,7 +1659,8 @@ class VersionSet { FSDirectory* dir_contains_current_file, bool new_descriptor_log, const ColumnFamilyOptions* new_cf_options, - const ReadOptions& read_options); + const ReadOptions& read_options, + const WriteOptions& write_options); void LogAndApplyCFHelper(VersionEdit* edit, SequenceNumber* max_last_sequence); @@ -1707,7 +1712,7 @@ class ReactiveVersionSet : public VersionSet { private: std::unique_ptr manifest_tailer_; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options_; using VersionSet::LogAndApply; using VersionSet::Recover; @@ -1716,6 +1721,7 @@ class ReactiveVersionSet : public VersionSet { const autovector& /*cfds*/, const autovector& /*mutable_cf_options_list*/, const ReadOptions& /* read_options */, + const WriteOptions& /* write_options */, const autovector>& /*edit_lists*/, InstrumentedMutex* /*mu*/, FSDirectory* /*dir_contains_current_file*/, bool /*new_descriptor_log*/, const ColumnFamilyOptions* /*new_cf_option*/, diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 59b94615128..f1369aaf9fc 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -1259,11 +1259,11 @@ class VersionSetTestBase { log_writer->reset(new log::Writer(std::move(file_writer), 0, false)); std::string record; new_db.EncodeTo(&record); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); for (const auto& e : new_cfs) { record.clear(); e.EncodeTo(&record); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); ASSERT_OK(s); } } @@ -1279,11 +1279,11 @@ class VersionSetTestBase { void NewDB() { SequenceNumber last_seqno; std::unique_ptr log_writer; - ASSERT_OK(SetIdentityFile(env_, dbname_)); + ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); PrepareManifest(&column_families_, &last_seqno, &log_writer); log_writer.reset(); // Make "CURRENT" file point to the new manifest file. - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); + Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); EXPECT_OK(versions_->Recover(column_families_, false)); @@ -1313,7 +1313,7 @@ class VersionSetTestBase { mutex_.Lock(); Status s = versions_->LogAndApply( versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, - read_options_, &edit, &mutex_, nullptr); + read_options_, write_options_, &edit, &mutex_, nullptr); mutex_.Unlock(); return s; } @@ -1327,7 +1327,7 @@ class VersionSetTestBase { mutex_.Lock(); Status s = versions_->LogAndApply( versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, - read_options_, vedits, &mutex_, nullptr); + read_options_, write_options_, vedits, &mutex_, nullptr); mutex_.Unlock(); return s; } @@ -1339,7 +1339,8 @@ class VersionSetTestBase { VersionEdit dummy; ASSERT_OK(versions_->LogAndApply( versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, - read_options_, &dummy, &mutex_, db_directory, new_descriptor_log)); + read_options_, write_options_, &dummy, &mutex_, db_directory, + new_descriptor_log)); mutex_.Unlock(); } @@ -1357,7 +1358,7 @@ class VersionSetTestBase { mutex_.Lock(); s = versions_->LogAndApply(/*column_family_data=*/nullptr, MutableCFOptions(cf_options), read_options_, - &new_cf, &mutex_, + write_options_, &new_cf, &mutex_, /*db_directory=*/nullptr, /*new_descriptor_log=*/false, &cf_options); mutex_.Unlock(); @@ -1380,6 +1381,8 @@ class VersionSetTestBase { ImmutableOptions immutable_options_; MutableCFOptions mutable_cf_options_; const ReadOptions read_options_; + const WriteOptions write_options_; + std::shared_ptr table_cache_; WriteController write_controller_; WriteBufferManager write_buffer_manager_; @@ -1404,6 +1407,7 @@ TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) { NewDB(); const int kGroupSize = 5; const ReadOptions read_options; + const WriteOptions write_options; autovector edits; for (int i = 0; i != kGroupSize; ++i) { @@ -1431,8 +1435,9 @@ TEST_F(VersionSetTest, SameColumnFamilyGroupCommit) { }); SyncPoint::GetInstance()->EnableProcessing(); mutex_.Lock(); - Status s = versions_->LogAndApply(cfds, all_mutable_cf_options, read_options, - edit_lists, &mutex_, nullptr); + Status s = + versions_->LogAndApply(cfds, all_mutable_cf_options, read_options, + write_options, edit_lists, &mutex_, nullptr); mutex_.Unlock(); EXPECT_OK(s); EXPECT_EQ(kGroupSize - 1, count); @@ -1634,7 +1639,7 @@ TEST_F(VersionSetTest, ObsoleteBlobFile) { mutex_.Lock(); Status s = versions_->LogAndApply( versions_->GetColumnFamilySet()->GetDefault(), mutable_cf_options_, - read_options_, &edit, &mutex_, nullptr); + read_options_, write_options_, &edit, &mutex_, nullptr); mutex_.Unlock(); ASSERT_OK(s); @@ -2252,7 +2257,8 @@ class VersionSetWithTimestampTest : public VersionSetTest { Status s; mutex_.Lock(); s = versions_->LogAndApply(cfd_, *(cfd_->GetLatestMutableCFOptions()), - read_options_, edits_, &mutex_, nullptr); + read_options_, write_options_, edits_, &mutex_, + nullptr); mutex_.Unlock(); ASSERT_OK(s); VerifyFullHistoryTsLow(*std::max_element(ts_lbs.begin(), ts_lbs.end())); @@ -2312,7 +2318,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, edits_[i].MarkAtomicGroup(--remaining); edits_[i].SetLastSequence(last_seqno_++); } - ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); + ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); } void SetupIncompleteTrailingAtomicGroup(int atomic_group_size) { @@ -2324,7 +2330,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, edits_[i].MarkAtomicGroup(--remaining); edits_[i].SetLastSequence(last_seqno_++); } - ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); + ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); } void SetupCorruptedAtomicGroup(int atomic_group_size) { @@ -2338,7 +2344,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, } edits_[i].SetLastSequence(last_seqno_++); } - ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); + ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); } void SetupIncorrectAtomicGroup(int atomic_group_size) { @@ -2354,7 +2360,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, } edits_[i].SetLastSequence(last_seqno_++); } - ASSERT_OK(SetCurrentFile(fs_.get(), dbname_, 1, nullptr)); + ASSERT_OK(SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr)); } void SetupTestSyncPoints() { @@ -2400,7 +2406,7 @@ class VersionSetAtomicGroupTest : public VersionSetTestBase, for (int i = 0; i < num_edits; i++) { std::string record; edits_[i].EncodeTo(&record); - ASSERT_OK(log_writer_->AddRecord(record)); + ASSERT_OK(log_writer_->AddRecord(WriteOptions(), record)); } } @@ -2522,7 +2528,7 @@ TEST_F(VersionSetAtomicGroupTest, // edits. std::string last_record; edits_[kAtomicGroupSize - 1].EncodeTo(&last_record); - EXPECT_OK(log_writer_->AddRecord(last_record)); + EXPECT_OK(log_writer_->AddRecord(WriteOptions(), last_record)); InstrumentedMutex mu; std::unordered_set cfds_changed; mu.Lock(); @@ -2694,12 +2700,13 @@ class VersionSetTestDropOneCF : public VersionSetTestBase, // last column family in an atomic group. TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) { const ReadOptions read_options; + const WriteOptions write_options; std::vector column_families; SequenceNumber last_seqno; std::unique_ptr log_writer; PrepareManifest(&column_families, &last_seqno, &log_writer); - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); + Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); EXPECT_OK(versions_->Recover(column_families, false /* read_only */)); @@ -2722,9 +2729,9 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) { cfd_to_drop->Ref(); drop_cf_edit.SetColumnFamily(cfd_to_drop->GetID()); mutex_.Lock(); - s = versions_->LogAndApply(cfd_to_drop, - *cfd_to_drop->GetLatestMutableCFOptions(), - read_options, &drop_cf_edit, &mutex_, nullptr); + s = versions_->LogAndApply( + cfd_to_drop, *cfd_to_drop->GetLatestMutableCFOptions(), read_options, + write_options, &drop_cf_edit, &mutex_, nullptr); mutex_.Unlock(); ASSERT_OK(s); @@ -2774,7 +2781,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) { SyncPoint::GetInstance()->EnableProcessing(); mutex_.Lock(); s = versions_->LogAndApply(cfds, mutable_cf_options_list, read_options, - edit_lists, &mutex_, nullptr); + write_options, edit_lists, &mutex_, nullptr); mutex_.Unlock(); ASSERT_OK(s); ASSERT_EQ(1, called); @@ -2808,7 +2815,7 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase, log_writer->reset(new log::Writer(std::move(file_writer), 0, true)); std::string record; ASSERT_TRUE(new_db.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); ASSERT_OK(s); // Create new column family VersionEdit new_cf; @@ -2818,7 +2825,7 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase, new_cf.SetNextFile(2); record.clear(); ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); ASSERT_OK(s); } @@ -2832,8 +2839,8 @@ class EmptyDefaultCfNewManifest : public VersionSetTestBase, TEST_F(EmptyDefaultCfNewManifest, Recover) { PrepareManifest(nullptr, nullptr, &log_writer_); log_writer_.reset(); - Status s = - SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); + Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); @@ -2864,7 +2871,7 @@ class VersionSetTestEmptyDb assert(nullptr != log_writer); VersionEdit new_db; if (db_options_.write_dbid_to_manifest) { - ASSERT_OK(SetIdentityFile(env_, dbname_)); + ASSERT_OK(SetIdentityFile(WriteOptions(), env_, dbname_)); DBOptions tmp_db_options; tmp_db_options.env = env_; std::unique_ptr impl(new DBImpl(tmp_db_options, dbname_)); @@ -2883,7 +2890,7 @@ class VersionSetTestEmptyDb log_writer->reset(new log::Writer(std::move(file_writer), 0, false)); std::string record; new_db.EncodeTo(&record); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); ASSERT_OK(s); } } @@ -2897,8 +2904,8 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest0) { db_options_.write_dbid_to_manifest = std::get<0>(GetParam()); PrepareManifest(nullptr, nullptr, &log_writer_); log_writer_.reset(); - Status s = - SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); + Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; @@ -2938,11 +2945,12 @@ TEST_P(VersionSetTestEmptyDb, OpenFromIncompleteManifest1) { { std::string record; new_cf1.EncodeTo(&record); - s = log_writer_->AddRecord(record); + s = log_writer_->AddRecord(WriteOptions(), record); ASSERT_OK(s); } log_writer_.reset(); - s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); + s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; @@ -2985,11 +2993,12 @@ TEST_P(VersionSetTestEmptyDb, OpenFromInCompleteManifest2) { new_cf.SetColumnFamily(cf_id++); std::string record; ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = log_writer_->AddRecord(record); + s = log_writer_->AddRecord(WriteOptions(), record); ASSERT_OK(s); } log_writer_.reset(); - s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); + s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; @@ -3032,7 +3041,7 @@ TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) { new_cf.SetColumnFamily(cf_id++); std::string record; ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = log_writer_->AddRecord(record); + s = log_writer_->AddRecord(WriteOptions(), record); ASSERT_OK(s); } { @@ -3043,11 +3052,12 @@ TEST_P(VersionSetTestEmptyDb, OpenManifestWithUnknownCF) { tmp_edit.SetLastSequence(0); std::string record; ASSERT_TRUE(tmp_edit.EncodeTo(&record)); - s = log_writer_->AddRecord(record); + s = log_writer_->AddRecord(WriteOptions(), record); ASSERT_OK(s); } log_writer_.reset(); - s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); + s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; @@ -3090,7 +3100,7 @@ TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) { new_cf.SetColumnFamily(cf_id++); std::string record; ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = log_writer_->AddRecord(record); + s = log_writer_->AddRecord(WriteOptions(), record); ASSERT_OK(s); } { @@ -3100,11 +3110,12 @@ TEST_P(VersionSetTestEmptyDb, OpenCompleteManifest) { tmp_edit.SetLastSequence(0); std::string record; ASSERT_TRUE(tmp_edit.EncodeTo(&record)); - s = log_writer_->AddRecord(record); + s = log_writer_->AddRecord(WriteOptions(), record); ASSERT_OK(s); } log_writer_.reset(); - s = SetCurrentFile(fs_.get(), dbname_, 1, /*directory_to_fsync=*/nullptr); + s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, + /*directory_to_fsync=*/nullptr); ASSERT_OK(s); std::string manifest_path; @@ -3205,7 +3216,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, { std::string record; ASSERT_TRUE(new_db.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); ASSERT_OK(s); } const std::vector cf_names = { @@ -3223,7 +3234,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, new_cf.SetColumnFamily(cf_id); std::string record; ASSERT_TRUE(new_cf.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); ASSERT_OK(s); VersionEdit cf_files; @@ -3231,7 +3242,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, cf_files.SetLogNumber(0); record.clear(); ASSERT_TRUE(cf_files.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); ASSERT_OK(s); ++cf_id; } @@ -3242,7 +3253,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, edit.SetLastSequence(seq); std::string record; ASSERT_TRUE(edit.EncodeTo(&record)); - s = (*log_writer)->AddRecord(record); + s = (*log_writer)->AddRecord(WriteOptions(), record); ASSERT_OK(s); } *last_seqno = seq + 1; @@ -3283,18 +3294,19 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, std::move(file), fname, FileOptions(), env_->GetSystemClock().get())); IntTblPropCollectorFactories int_tbl_prop_collector_factories; + const WriteOptions write_options; std::unique_ptr builder(table_factory_->NewTableBuilder( TableBuilderOptions( - immutable_options_, mutable_cf_options_, *internal_comparator_, - &int_tbl_prop_collector_factories, kNoCompression, - CompressionOptions(), + immutable_options_, mutable_cf_options_, write_options, + *internal_comparator_, &int_tbl_prop_collector_factories, + kNoCompression, CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, info.column_family, info.level), fwriter.get())); InternalKey ikey(info.key, 0, ValueType::kTypeValue); builder->Add(ikey.Encode(), "value"); ASSERT_OK(builder->Finish()); - ASSERT_OK(fwriter->Flush()); + ASSERT_OK(fwriter->Flush(IOOptions())); uint64_t file_size = 0; s = fs_->GetFileSize(fname, IOOptions(), &file_size, nullptr); ASSERT_OK(s); @@ -3326,7 +3338,7 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, assert(log_writer_.get() != nullptr); std::string record; ASSERT_TRUE(edit.EncodeTo(&record, 0 /* ts_sz */)); - Status s = log_writer_->AddRecord(record); + Status s = log_writer_->AddRecord(WriteOptions(), record); ASSERT_OK(s); } @@ -3371,7 +3383,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) { WriteFileAdditionAndDeletionToManifest( /*cf=*/0, std::vector>(), deleted_files); log_writer_.reset(); - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); + Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); @@ -3429,7 +3441,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) { WriteFileAdditionAndDeletionToManifest( /*cf=*/0, added_files, std::vector>()); log_writer_.reset(); - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); + Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); @@ -3483,7 +3495,7 @@ TEST_F(VersionSetTestMissingFiles, NoFileMissing) { WriteFileAdditionAndDeletionToManifest( /*cf=*/0, std::vector>(), deleted_files); log_writer_.reset(); - Status s = SetCurrentFile(fs_.get(), dbname_, 1, nullptr); + Status s = SetCurrentFile(WriteOptions(), fs_.get(), dbname_, 1, nullptr); ASSERT_OK(s); std::string manifest_path; VerifyManifest(&manifest_path); diff --git a/db/version_util.h b/db/version_util.h index e39f255719c..348efe56fa0 100644 --- a/db/version_util.h +++ b/db/version_util.h @@ -33,15 +33,17 @@ class OfflineManifestWriter { /*no_error_if_files_missing*/ true); } - Status LogAndApply(const ReadOptions& read_options, ColumnFamilyData* cfd, + Status LogAndApply(const ReadOptions& read_options, + const WriteOptions& write_options, ColumnFamilyData* cfd, VersionEdit* edit, FSDirectory* dir_contains_current_file) { // Use `mutex` to imitate a locked DB mutex when calling `LogAndApply()`. InstrumentedMutex mutex; mutex.Lock(); - Status s = versions_.LogAndApply( - cfd, *cfd->GetLatestMutableCFOptions(), read_options, edit, &mutex, - dir_contains_current_file, false /* new_descriptor_log */); + Status s = versions_.LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), + read_options, write_options, edit, &mutex, + dir_contains_current_file, + false /* new_descriptor_log */); mutex.Unlock(); return s; } diff --git a/db/wal_manager_test.cc b/db/wal_manager_test.cc index 0144e184682..156acbf8533 100644 --- a/db/wal_manager_test.cc +++ b/db/wal_manager_test.cc @@ -72,8 +72,8 @@ class WalManagerTest : public testing::Test { WriteBatch batch; ASSERT_OK(batch.Put(key, value)); WriteBatchInternal::SetSequence(&batch, seq); - ASSERT_OK( - current_log_writer_->AddRecord(WriteBatchInternal::Contents(&batch))); + ASSERT_OK(current_log_writer_->AddRecord( + WriteOptions(), WriteBatchInternal::Contents(&batch))); versions_->SetLastAllocatedSequence(seq); versions_->SetLastPublishedSequence(seq); versions_->SetLastSequence(seq); @@ -145,7 +145,8 @@ TEST_F(WalManagerTest, ReadFirstRecordCache) { WriteBatch batch; ASSERT_OK(batch.Put("foo", "bar")); WriteBatchInternal::SetSequence(&batch, 10); - ASSERT_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch))); + ASSERT_OK( + writer.AddRecord(WriteOptions(), WriteBatchInternal::Contents(&batch))); // TODO(icanadi) move SpecialEnv outside of db_test, so we can reuse it here. // Waiting for lei to finish with db_test @@ -333,4 +334,3 @@ int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } - diff --git a/db/write_batch.cc b/db/write_batch.cc index 78a137d21aa..9ba5e65c0eb 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -2047,7 +2047,7 @@ class MemTableInserter : public WriteBatch::Handler { // key not found in memtable. Do sst get, update, add SnapshotImpl read_from_snapshot; read_from_snapshot.number_ = sequence_; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions ropts; // it's going to be overwritten for sure, so no point caching data block // containing the old version @@ -2492,7 +2492,7 @@ class MemTableInserter : public WriteBatch::Handler { // operations in the same batch. SnapshotImpl read_from_snapshot; read_from_snapshot.number_ = sequence_; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority ReadOptions read_options; read_options.snapshot = &read_from_snapshot; diff --git a/db/write_thread.h b/db/write_thread.h index 6e5805e3764..dc64601f9f4 100644 --- a/db/write_thread.h +++ b/db/write_thread.h @@ -166,6 +166,8 @@ class WriteThread { PreReleaseCallback* _pre_release_callback = nullptr, PostMemTableCallback* _post_memtable_callback = nullptr) : batch(_batch), + // TODO: store a copy of WriteOptions instead of its seperated data + // members sync(write_options.sync), no_slowdown(write_options.no_slowdown), disable_wal(write_options.disableWAL), diff --git a/db_stress_tool/db_stress_env_wrapper.h b/db_stress_tool/db_stress_env_wrapper.h index 83e6838c703..08d6cd2e8b6 100644 --- a/db_stress_tool/db_stress_env_wrapper.h +++ b/db_stress_tool/db_stress_env_wrapper.h @@ -76,6 +76,161 @@ class DbStressRandomAccessFileWrapper : public FSRandomAccessFileOwnerWrapper { } }; +class DbStressWritableFileWrapper : public FSWritableFileOwnerWrapper { + public: + DbStressWritableFileWrapper(std::unique_ptr&& target) + : FSWritableFileOwnerWrapper(std::move(target)) {} + + IOStatus Append(const Slice& data, const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Append(data, options, dbg); + } + IOStatus Append(const Slice& data, const IOOptions& options, + const DataVerificationInfo& verification_info, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Append(data, options, verification_info, dbg); + } + IOStatus PositionedAppend(const Slice& data, uint64_t offset, + const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->PositionedAppend(data, offset, options, dbg); + } + IOStatus PositionedAppend(const Slice& data, uint64_t offset, + const IOOptions& options, + const DataVerificationInfo& verification_info, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->PositionedAppend(data, offset, options, verification_info, + dbg); + } + + virtual IOStatus Truncate(uint64_t size, const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Truncate(size, options, dbg); + } + + virtual IOStatus Close(const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Close(options, dbg); + } + + virtual IOStatus Flush(const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Flush(options, dbg); + } + + virtual IOStatus Sync(const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Sync(options, dbg); + } + + virtual IOStatus Fsync(const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Fsync(options, dbg); + } + +#ifdef ROCKSDB_FALLOCATE_PRESENT + virtual IOStatus Allocate(uint64_t offset, uint64_t len, + const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->Allocate(offset, len, options, dbg); + } +#endif + + virtual IOStatus RangeSync(uint64_t offset, uint64_t nbytes, + const IOOptions& options, + IODebugContext* dbg) override { +#ifndef NDEBUG + const ThreadStatus::OperationType thread_op = + ThreadStatusUtil::GetThreadOperation(); + Env::IOActivity io_activity = + ThreadStatusUtil::TEST_GetExpectedIOActivity(thread_op); + assert(io_activity == Env::IOActivity::kUnknown || + io_activity == options.io_activity); +#endif + return target()->RangeSync(offset, nbytes, options, dbg); + } +}; + class DbStressFSWrapper : public FileSystemWrapper { public: explicit DbStressFSWrapper(const std::shared_ptr& t) @@ -95,6 +250,17 @@ class DbStressFSWrapper : public FileSystemWrapper { return s; } + IOStatus NewWritableFile(const std::string& f, const FileOptions& file_opts, + std::unique_ptr* r, + IODebugContext* dbg) override { + std::unique_ptr file; + IOStatus s = target()->NewWritableFile(f, file_opts, &file, dbg); + if (s.ok()) { + r->reset(new DbStressWritableFileWrapper(std::move(file))); + } + return s; + } + IOStatus DeleteFile(const std::string& f, const IOOptions& opts, IODebugContext* dbg) override { // We determine whether it is a manifest file by searching a strong, diff --git a/db_stress_tool/db_stress_listener.cc b/db_stress_tool/db_stress_listener.cc index e2838c582a1..071561b2897 100644 --- a/db_stress_tool/db_stress_listener.cc +++ b/db_stress_tool/db_stress_listener.cc @@ -130,7 +130,8 @@ UniqueIdVerifier::UniqueIdVerifier(const std::string& db_name, Env* env) } UniqueIdVerifier::~UniqueIdVerifier() { - IOStatus s = data_file_writer_->Close(); + IOStatus s; + s = data_file_writer_->Close(IOOptions()); assert(s.ok()); } @@ -153,13 +154,14 @@ void UniqueIdVerifier::Verify(const std::string& id) { if (id_set_.size() >= 4294967) { return; } - IOStatus s = data_file_writer_->Append(Slice(id)); + IOOptions opts; + IOStatus s = data_file_writer_->Append(opts, Slice(id)); if (!s.ok()) { fprintf(stderr, "Error writing to unique id file: %s\n", s.ToString().c_str()); assert(false); } - s = data_file_writer_->Flush(); + s = data_file_writer_->Flush(opts); if (!s.ok()) { fprintf(stderr, "Error flushing unique id file: %s\n", s.ToString().c_str()); diff --git a/db_stress_tool/multi_ops_txns_stress.cc b/db_stress_tool/multi_ops_txns_stress.cc index 1591a52e998..674b7b03d67 100644 --- a/db_stress_tool/multi_ops_txns_stress.cc +++ b/db_stress_tool/multi_ops_txns_stress.cc @@ -1218,7 +1218,11 @@ void MultiOpsTxnsStressTest::VerifyPkSkFast(const ReadOptions& read_options, assert(db_ == db); assert(db_ != nullptr); + ThreadStatus::OperationType cur_op_type = + ThreadStatusUtil::GetThreadOperation(); + ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN); const Snapshot* const snapshot = db_->GetSnapshot(); + ThreadStatusUtil::SetThreadOperation(cur_op_type); assert(snapshot); ManagedSnapshot snapshot_guard(db_, snapshot); diff --git a/env/env.cc b/env/env.cc index 40493b478b9..b871d529981 100644 --- a/env/env.cc +++ b/env/env.cc @@ -1051,9 +1051,11 @@ void Log(const std::shared_ptr& info_log, const char* format, ...) { } Status WriteStringToFile(Env* env, const Slice& data, const std::string& fname, - bool should_sync) { + bool should_sync, Env::IOActivity io_activity) { const auto& fs = env->GetFileSystem(); - return WriteStringToFile(fs.get(), data, fname, should_sync); + IOOptions opts; + opts.io_activity = io_activity; + return WriteStringToFile(fs.get(), data, fname, should_sync, opts); } Status ReadFileToString(Env* env, const std::string& fname, std::string* data) { diff --git a/env/env_test.cc b/env/env_test.cc index fb23bae130e..2185a440414 100644 --- a/env/env_test.cc +++ b/env/env_test.cc @@ -2609,7 +2609,7 @@ TEST_F(EnvTest, IsDirectory) { FileOptions(), SystemClock::Default().get())); constexpr char buf[] = "test"; - s = fwriter->Append(buf); + s = fwriter->Append(IOOptions(), buf); ASSERT_OK(s); } ASSERT_OK(Env::Default()->IsDirectory(test_file_path, &is_dir)); diff --git a/env/file_system.cc b/env/file_system.cc index 71fb4d5bc74..d2253a62a7f 100644 --- a/env/file_system.cc +++ b/env/file_system.cc @@ -180,19 +180,20 @@ FileOptions FileSystem::OptimizeForBlobFileRead( } IOStatus WriteStringToFile(FileSystem* fs, const Slice& data, - const std::string& fname, bool should_sync) { + const std::string& fname, bool should_sync, + const IOOptions& opts) { std::unique_ptr file; EnvOptions soptions; IOStatus s = fs->NewWritableFile(fname, soptions, &file, nullptr); if (!s.ok()) { return s; } - s = file->Append(data, IOOptions(), nullptr); + s = file->Append(data, opts, nullptr); if (s.ok() && should_sync) { - s = file->Sync(IOOptions(), nullptr); + s = file->Sync(opts, nullptr); } if (!s.ok()) { - fs->DeleteFile(fname, IOOptions(), nullptr); + fs->DeleteFile(fname, opts, nullptr); } return s; } diff --git a/file/file_util.cc b/file/file_util.cc index c5bb22e48b0..43d67eb9f69 100644 --- a/file/file_util.cc +++ b/file/file_util.cc @@ -25,6 +25,8 @@ IOStatus CopyFile(FileSystem* fs, const std::string& source, FileOptions soptions; IOStatus io_s; std::unique_ptr src_reader; + // TODO: plumb Env::IOActivity, Env::IOPriority + const IOOptions opts; { soptions.temperature = temperature; @@ -36,7 +38,7 @@ IOStatus CopyFile(FileSystem* fs, const std::string& source, if (size == 0) { // default argument means copy everything - io_s = fs->GetFileSize(source, IOOptions(), &size, nullptr); + io_s = fs->GetFileSize(source, opts, &size, nullptr); if (!io_s.ok()) { return io_s; } @@ -59,13 +61,14 @@ IOStatus CopyFile(FileSystem* fs, const std::string& source, if (slice.size() == 0) { return IOStatus::Corruption("file too small"); } - io_s = dest_writer->Append(slice); + + io_s = dest_writer->Append(opts, slice); if (!io_s.ok()) { return io_s; } size -= slice.size(); } - return dest_writer->Sync(use_fsync); + return dest_writer->Sync(opts, use_fsync); } IOStatus CopyFile(FileSystem* fs, const std::string& source, @@ -98,6 +101,8 @@ IOStatus CreateFile(FileSystem* fs, const std::string& destination, const EnvOptions soptions; IOStatus io_s; std::unique_ptr dest_writer; + // TODO: plumb Env::IOActivity, Env::IOPriority + const IOOptions opts; std::unique_ptr destfile; io_s = fs->NewWritableFile(destination, soptions, &destfile, nullptr); @@ -106,11 +111,11 @@ IOStatus CreateFile(FileSystem* fs, const std::string& destination, } dest_writer.reset( new WritableFileWriter(std::move(destfile), destination, soptions)); - io_s = dest_writer->Append(Slice(contents)); + io_s = dest_writer->Append(opts, Slice(contents)); if (!io_s.ok()) { return io_s; } - return dest_writer->Sync(use_fsync); + return dest_writer->Sync(opts, use_fsync); } Status DeleteDBFile(const ImmutableDBOptions* db_options, diff --git a/file/file_util.h b/file/file_util.h index 1ee2979557d..df49cd385dd 100644 --- a/file/file_util.h +++ b/file/file_util.h @@ -85,6 +85,14 @@ inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro, return IOStatus::OK(); } +inline IOStatus PrepareIOFromWriteOptions(const WriteOptions& wo, + IOOptions& opts) { + opts.rate_limiter_priority = wo.rate_limiter_priority; + opts.io_activity = wo.io_activity; + + return IOStatus::OK(); +} + // Test method to delete the input directory and all of its contents. // This method is destructive and is meant for use only in tests!!! Status DestroyDir(Env* env, const std::string& dir); diff --git a/file/filename.cc b/file/filename.cc index 1e04c73395e..8c95e2e2ddb 100644 --- a/file/filename.cc +++ b/file/filename.cc @@ -14,6 +14,7 @@ #include #include +#include "file/file_util.h" #include "file/writable_file_writer.h" #include "rocksdb/env.h" #include "test_util/sync_point.h" @@ -385,8 +386,8 @@ bool ParseFileName(const std::string& fname, uint64_t* number, return true; } -IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname, - uint64_t descriptor_number, +IOStatus SetCurrentFile(const WriteOptions& write_options, FileSystem* fs, + const std::string& dbname, uint64_t descriptor_number, FSDirectory* dir_contains_current_file) { // Remove leading "dbname/" and add newline to manifest file name std::string manifest = DescriptorFileName(dbname, descriptor_number); @@ -394,21 +395,25 @@ IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname, assert(contents.starts_with(dbname + "/")); contents.remove_prefix(dbname.size() + 1); std::string tmp = TempFileName(dbname, descriptor_number); - IOStatus s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true); + IOOptions opts; + IOStatus s = PrepareIOFromWriteOptions(write_options, opts); + if (s.ok()) { + s = WriteStringToFile(fs, contents.ToString() + "\n", tmp, true, opts); + } TEST_SYNC_POINT_CALLBACK("SetCurrentFile:BeforeRename", &s); if (s.ok()) { TEST_KILL_RANDOM_WITH_WEIGHT("SetCurrentFile:0", REDUCE_ODDS2); - s = fs->RenameFile(tmp, CurrentFileName(dbname), IOOptions(), nullptr); + s = fs->RenameFile(tmp, CurrentFileName(dbname), opts, nullptr); TEST_KILL_RANDOM_WITH_WEIGHT("SetCurrentFile:1", REDUCE_ODDS2); TEST_SYNC_POINT_CALLBACK("SetCurrentFile:AfterRename", &s); } if (s.ok()) { if (dir_contains_current_file != nullptr) { s = dir_contains_current_file->FsyncWithDirOptions( - IOOptions(), nullptr, DirFsyncOptions(CurrentFileName(dbname))); + opts, nullptr, DirFsyncOptions(CurrentFileName(dbname))); } } else { - fs->DeleteFile(tmp, IOOptions(), nullptr) + fs->DeleteFile(tmp, opts, nullptr) .PermitUncheckedError(); // NOTE: PermitUncheckedError is acceptable // here as we are already handling an error // case, and this is just a best-attempt @@ -417,8 +422,8 @@ IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname, return s; } -Status SetIdentityFile(Env* env, const std::string& dbname, - const std::string& db_id) { +Status SetIdentityFile(const WriteOptions& write_options, Env* env, + const std::string& dbname, const std::string& db_id) { std::string id; if (db_id.empty()) { id = env->GenerateUniqueId(); @@ -429,17 +434,18 @@ Status SetIdentityFile(Env* env, const std::string& dbname, // Reserve the filename dbname/000000.dbtmp for the temporary identity file std::string tmp = TempFileName(dbname, 0); std::string identify_file_name = IdentityFileName(dbname); - Status s = WriteStringToFile(env, id, tmp, true); + Status s = WriteStringToFile(env, id, tmp, true, write_options.io_activity); if (s.ok()) { s = env->RenameFile(tmp, identify_file_name); } std::unique_ptr dir_obj; + IOOptions opts; + s = PrepareIOFromWriteOptions(write_options, opts); if (s.ok()) { - s = env->GetFileSystem()->NewDirectory(dbname, IOOptions(), &dir_obj, - nullptr); + s = env->GetFileSystem()->NewDirectory(dbname, opts, &dir_obj, nullptr); } if (s.ok()) { - s = dir_obj->FsyncWithDirOptions(IOOptions(), nullptr, + s = dir_obj->FsyncWithDirOptions(opts, nullptr, DirFsyncOptions(identify_file_name)); } @@ -447,7 +453,7 @@ Status SetIdentityFile(Env* env, const std::string& dbname, // if it is not impelmented. Detailed explanations can be found in // db/db_impl/db_impl.h if (s.ok()) { - Status temp_s = dir_obj->Close(IOOptions(), nullptr); + Status temp_s = dir_obj->Close(opts, nullptr); if (!temp_s.ok()) { if (temp_s.IsNotSupported()) { temp_s.PermitUncheckedError(); @@ -466,7 +472,8 @@ IOStatus SyncManifest(const ImmutableDBOptions* db_options, WritableFileWriter* file) { TEST_KILL_RANDOM_WITH_WEIGHT("SyncManifest:0", REDUCE_ODDS2); StopWatch sw(db_options->clock, db_options->stats, MANIFEST_FILE_SYNC_MICROS); - return file->Sync(db_options->use_fsync); + // TODO: plumb Env::IOActivity, Env::IOPriority + return file->Sync(IOOptions(), db_options->use_fsync); } Status GetInfoLogFiles(const std::shared_ptr& fs, diff --git a/file/filename.h b/file/filename.h index 2eb125b6a17..3b7fdbe6ede 100644 --- a/file/filename.h +++ b/file/filename.h @@ -162,12 +162,14 @@ extern bool ParseFileName(const std::string& filename, uint64_t* number, // specified number. On its success and when dir_contains_current_file is not // nullptr, the function will fsync the directory containing the CURRENT file // when -extern IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname, +extern IOStatus SetCurrentFile(const WriteOptions& write_options, + FileSystem* fs, const std::string& dbname, uint64_t descriptor_number, FSDirectory* dir_contains_current_file); // Make the IDENTITY file for the db -extern Status SetIdentityFile(Env* env, const std::string& dbname, +extern Status SetIdentityFile(const WriteOptions& write_options, Env* env, + const std::string& dbname, const std::string& db_id = {}); // Sync manifest file `file`. diff --git a/file/random_access_file_reader.cc b/file/random_access_file_reader.cc index 8b22d617b2a..ed29b1fe112 100644 --- a/file/random_access_file_reader.cc +++ b/file/random_access_file_reader.cc @@ -53,6 +53,7 @@ inline Histograms GetFileReadHistograms(Statistics* stats, } return Histograms::HISTOGRAM_ENUM_MAX; } + inline void RecordIOStats(Statistics* stats, Temperature file_temperature, bool is_last_level, size_t size) { IOSTATS_ADD(bytes_read, size); diff --git a/file/writable_file_writer.cc b/file/writable_file_writer.cc index 908878a5fae..4fadf1d71a3 100644 --- a/file/writable_file_writer.cc +++ b/file/writable_file_writer.cc @@ -13,6 +13,7 @@ #include #include "db/version_edit.h" +#include "file/file_util.h" #include "monitoring/histogram.h" #include "monitoring/iostats_context_imp.h" #include "port/port.h" @@ -24,6 +25,24 @@ #include "util/rate_limiter_impl.h" namespace ROCKSDB_NAMESPACE { +inline Histograms GetFileWriteHistograms(Histograms file_writer_hist, + Env::IOActivity io_activity) { + if (file_writer_hist == Histograms::SST_WRITE_MICROS || + file_writer_hist == Histograms::BLOB_DB_BLOB_FILE_WRITE_MICROS) { + switch (io_activity) { + case Env::IOActivity::kFlush: + return Histograms::FILE_WRITE_FLUSH_MICROS; + case Env::IOActivity::kCompaction: + return Histograms::FILE_WRITE_COMPACTION_MICROS; + case Env::IOActivity::kDBOpen: + return Histograms::FILE_WRITE_DB_OPEN_MICROS; + default: + break; + } + } + return Histograms::HISTOGRAM_ENUM_MAX; +} + IOStatus WritableFileWriter::Create(const std::shared_ptr& fs, const std::string& fname, const FileOptions& file_opts, @@ -42,12 +61,16 @@ IOStatus WritableFileWriter::Create(const std::shared_ptr& fs, return io_s; } -IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, - Env::IOPriority op_rate_limiter_priority) { +IOStatus WritableFileWriter::Append(const IOOptions& opts, const Slice& data, + uint32_t crc32c_checksum) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } + StopWatch sw(clock_, stats_, hist_type_, + GetFileWriteHistograms(hist_type_, opts.io_activity)); + + const IOOptions io_options = FinalizeIOOptions(opts); const char* src = data.data(); size_t left = data.size(); IOStatus s; @@ -59,10 +82,6 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, UpdateFileChecksum(data); { - IOOptions io_options; - io_options.rate_limiter_priority = - WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority); IOSTATS_TIMER_GUARD(prepare_write_nanos); TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite"); writable_file_->PrepareWrite(static_cast(GetFileSize()), left, @@ -88,7 +107,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, // Flush only when buffered I/O if (!use_direct_io() && (buf_.Capacity() - buf_.CurrentSize()) < left) { if (buf_.CurrentSize() > 0) { - s = Flush(op_rate_limiter_priority); + s = Flush(io_options); if (!s.ok()) { set_seen_error(); return s; @@ -119,7 +138,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, src += appended; if (left > 0) { - s = Flush(op_rate_limiter_priority); + s = Flush(io_options); if (!s.ok()) { break; } @@ -129,7 +148,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, } else { assert(buf_.CurrentSize() == 0); buffered_data_crc32c_checksum_ = crc32c_checksum; - s = WriteBufferedWithChecksum(src, left, op_rate_limiter_priority); + s = WriteBufferedWithChecksum(io_options, src, left); } } else { // In this case, either we do not need to do the data verification or @@ -149,7 +168,7 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, src += appended; if (left > 0) { - s = Flush(op_rate_limiter_priority); + s = Flush(io_options); if (!s.ok()) { break; } @@ -160,9 +179,9 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, assert(buf_.CurrentSize() == 0); if (perform_data_verification_ && buffered_data_with_checksum_) { buffered_data_crc32c_checksum_ = crc32c::Value(src, left); - s = WriteBufferedWithChecksum(src, left, op_rate_limiter_priority); + s = WriteBufferedWithChecksum(io_options, src, left); } else { - s = WriteBuffered(src, left, op_rate_limiter_priority); + s = WriteBuffered(io_options, src, left); } } } @@ -177,11 +196,12 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum, return s; } -IOStatus WritableFileWriter::Pad(const size_t pad_bytes, - Env::IOPriority op_rate_limiter_priority) { +IOStatus WritableFileWriter::Pad(const IOOptions& opts, + const size_t pad_bytes) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } + const IOOptions io_options = FinalizeIOOptions(opts); assert(pad_bytes < kDefaultPageSize); size_t left = pad_bytes; size_t cap = buf_.Capacity() - buf_.CurrentSize(); @@ -195,7 +215,7 @@ IOStatus WritableFileWriter::Pad(const size_t pad_bytes, buf_.PadWith(append_bytes, 0); left -= append_bytes; if (left > 0) { - IOStatus s = Flush(op_rate_limiter_priority); + IOStatus s = Flush(io_options); if (!s.ok()) { set_seen_error(); return s; @@ -214,11 +234,12 @@ IOStatus WritableFileWriter::Pad(const size_t pad_bytes, return IOStatus::OK(); } -IOStatus WritableFileWriter::Close() { +IOStatus WritableFileWriter::Close(const IOOptions& opts) { + IOOptions io_options = FinalizeIOOptions(opts); if (seen_error()) { IOStatus interim; if (writable_file_.get() != nullptr) { - interim = writable_file_->Close(IOOptions(), nullptr); + interim = writable_file_->Close(io_options, nullptr); writable_file_.reset(); } if (interim.ok()) { @@ -240,11 +261,9 @@ IOStatus WritableFileWriter::Close() { } IOStatus s; - s = Flush(); // flush cache to OS + s = Flush(io_options); // flush cache to OS IOStatus interim; - IOOptions io_options; - io_options.rate_limiter_priority = writable_file_->GetIOPriority(); // In direct I/O mode we write whole pages so // we need to let the file know where data ends. if (use_direct_io()) { @@ -322,11 +341,13 @@ IOStatus WritableFileWriter::Close() { // write out the cached data to the OS cache or storage if direct I/O // enabled -IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) { +IOStatus WritableFileWriter::Flush(const IOOptions& opts) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } + const IOOptions io_options = FinalizeIOOptions(opts); + IOStatus s; TEST_KILL_RANDOM_WITH_WEIGHT("WritableFileWriter::Flush:0", REDUCE_ODDS2); @@ -334,18 +355,17 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) { if (use_direct_io()) { if (pending_sync_) { if (perform_data_verification_ && buffered_data_with_checksum_) { - s = WriteDirectWithChecksum(op_rate_limiter_priority); + s = WriteDirectWithChecksum(io_options); } else { - s = WriteDirect(op_rate_limiter_priority); + s = WriteDirect(io_options); } } } else { if (perform_data_verification_ && buffered_data_with_checksum_) { - s = WriteBufferedWithChecksum(buf_.BufferStart(), buf_.CurrentSize(), - op_rate_limiter_priority); + s = WriteBufferedWithChecksum(io_options, buf_.BufferStart(), + buf_.CurrentSize()); } else { - s = WriteBuffered(buf_.BufferStart(), buf_.CurrentSize(), - op_rate_limiter_priority); + s = WriteBuffered(io_options, buf_.BufferStart(), buf_.CurrentSize()); } } if (!s.ok()) { @@ -359,10 +379,6 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) { if (ShouldNotifyListeners()) { start_ts = FileOperationInfo::StartNow(); } - IOOptions io_options; - io_options.rate_limiter_priority = - WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority); s = writable_file_->Flush(io_options, nullptr); if (ShouldNotifyListeners()) { auto finish_ts = std::chrono::steady_clock::now(); @@ -400,7 +416,8 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) { assert(offset_sync_to >= last_sync_size_); if (offset_sync_to > 0 && offset_sync_to - last_sync_size_ >= bytes_per_sync_) { - s = RangeSync(last_sync_size_, offset_sync_to - last_sync_size_); + s = RangeSync(io_options, last_sync_size_, + offset_sync_to - last_sync_size_); if (!s.ok()) { set_seen_error(); } @@ -429,19 +446,25 @@ const char* WritableFileWriter::GetFileChecksumFuncName() const { } } -IOStatus WritableFileWriter::Sync(bool use_fsync) { +IOStatus WritableFileWriter::PrepareIOOptions(const WriteOptions& wo, + IOOptions& opts) { + return PrepareIOFromWriteOptions(wo, opts); +} + +IOStatus WritableFileWriter::Sync(const IOOptions& opts, bool use_fsync) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } - IOStatus s = Flush(); + IOOptions io_options = FinalizeIOOptions(opts); + IOStatus s = Flush(io_options); if (!s.ok()) { set_seen_error(); return s; } TEST_KILL_RANDOM("WritableFileWriter::Sync:0"); if (!use_direct_io() && pending_sync_) { - s = SyncInternal(use_fsync); + s = SyncInternal(io_options, use_fsync); if (!s.ok()) { set_seen_error(); return s; @@ -452,17 +475,19 @@ IOStatus WritableFileWriter::Sync(bool use_fsync) { return IOStatus::OK(); } -IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) { +IOStatus WritableFileWriter::SyncWithoutFlush(const IOOptions& opts, + bool use_fsync) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } + IOOptions io_options = FinalizeIOOptions(opts); if (!writable_file_->IsSyncThreadSafe()) { return IOStatus::NotSupported( "Can't WritableFileWriter::SyncWithoutFlush() because " "WritableFile::IsSyncThreadSafe() is false"); } TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:1"); - IOStatus s = SyncInternal(use_fsync); + IOStatus s = SyncInternal(io_options, use_fsync); TEST_SYNC_POINT("WritableFileWriter::SyncWithoutFlush:2"); if (!s.ok()) { #ifndef NDEBUG @@ -473,7 +498,8 @@ IOStatus WritableFileWriter::SyncWithoutFlush(bool use_fsync) { return s; } -IOStatus WritableFileWriter::SyncInternal(bool use_fsync) { +IOStatus WritableFileWriter::SyncInternal(const IOOptions& opts, + bool use_fsync) { // Caller is supposed to check seen_error_ IOStatus s; IOSTATS_TIMER_GUARD(fsync_nanos); @@ -487,12 +513,10 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) { start_ts = FileOperationInfo::StartNow(); } - IOOptions io_options; - io_options.rate_limiter_priority = writable_file_->GetIOPriority(); if (use_fsync) { - s = writable_file_->Fsync(io_options, nullptr); + s = writable_file_->Fsync(opts, nullptr); } else { - s = writable_file_->Sync(io_options, nullptr); + s = writable_file_->Sync(opts, nullptr); } if (ShouldNotifyListeners()) { auto finish_ts = std::chrono::steady_clock::now(); @@ -511,7 +535,8 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) { return s; } -IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) { +IOStatus WritableFileWriter::RangeSync(const IOOptions& opts, uint64_t offset, + uint64_t nbytes) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } @@ -522,9 +547,7 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) { if (ShouldNotifyListeners()) { start_ts = FileOperationInfo::StartNow(); } - IOOptions io_options; - io_options.rate_limiter_priority = writable_file_->GetIOPriority(); - IOStatus s = writable_file_->RangeSync(offset, nbytes, io_options, nullptr); + IOStatus s = writable_file_->RangeSync(offset, nbytes, opts, nullptr); if (!s.ok()) { set_seen_error(); } @@ -541,8 +564,8 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) { // This method writes to disk the specified data and makes use of the rate // limiter if available -IOStatus WritableFileWriter::WriteBuffered( - const char* data, size_t size, Env::IOPriority op_rate_limiter_priority) { +IOStatus WritableFileWriter::WriteBuffered(const IOOptions& opts, + const char* data, size_t size) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } @@ -553,11 +576,7 @@ IOStatus WritableFileWriter::WriteBuffered( size_t left = size; DataVerificationInfo v_info; char checksum_buf[sizeof(uint32_t)]; - Env::IOPriority rate_limiter_priority_used = - WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority); - IOOptions io_options; - io_options.rate_limiter_priority = rate_limiter_priority_used; + Env::IOPriority rate_limiter_priority_used = opts.rate_limiter_priority; while (left > 0) { size_t allowed = left; @@ -573,7 +592,7 @@ IOStatus WritableFileWriter::WriteBuffered( TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend"); FileOperationInfo::StartTimePoint start_ts; - uint64_t old_size = writable_file_->GetFileSize(io_options, nullptr); + uint64_t old_size = writable_file_->GetFileSize(opts, nullptr); if (ShouldNotifyListeners()) { start_ts = FileOperationInfo::StartNow(); old_size = next_write_offset_; @@ -585,10 +604,10 @@ IOStatus WritableFileWriter::WriteBuffered( if (perform_data_verification_) { Crc32cHandoffChecksumCalculation(src, allowed, checksum_buf); v_info.checksum = Slice(checksum_buf, sizeof(uint32_t)); - s = writable_file_->Append(Slice(src, allowed), io_options, v_info, + s = writable_file_->Append(Slice(src, allowed), opts, v_info, nullptr); } else { - s = writable_file_->Append(Slice(src, allowed), io_options, nullptr); + s = writable_file_->Append(Slice(src, allowed), opts, nullptr); } if (!s.ok()) { // If writable_file_->Append() failed, then the data may or may not @@ -635,8 +654,9 @@ IOStatus WritableFileWriter::WriteBuffered( return s; } -IOStatus WritableFileWriter::WriteBufferedWithChecksum( - const char* data, size_t size, Env::IOPriority op_rate_limiter_priority) { +IOStatus WritableFileWriter::WriteBufferedWithChecksum(const IOOptions& opts, + const char* data, + size_t size) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } @@ -648,11 +668,7 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum( size_t left = size; DataVerificationInfo v_info; char checksum_buf[sizeof(uint32_t)]; - Env::IOPriority rate_limiter_priority_used = - WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority); - IOOptions io_options; - io_options.rate_limiter_priority = rate_limiter_priority_used; + Env::IOPriority rate_limiter_priority_used = opts.rate_limiter_priority; // Check how much is allowed. Here, we loop until the rate limiter allows to // write the entire buffer. // TODO: need to be improved since it sort of defeats the purpose of the rate @@ -673,7 +689,7 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum( TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend"); FileOperationInfo::StartTimePoint start_ts; - uint64_t old_size = writable_file_->GetFileSize(io_options, nullptr); + uint64_t old_size = writable_file_->GetFileSize(opts, nullptr); if (ShouldNotifyListeners()) { start_ts = FileOperationInfo::StartNow(); old_size = next_write_offset_; @@ -685,7 +701,7 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum( EncodeFixed32(checksum_buf, buffered_data_crc32c_checksum_); v_info.checksum = Slice(checksum_buf, sizeof(uint32_t)); - s = writable_file_->Append(Slice(src, left), io_options, v_info, nullptr); + s = writable_file_->Append(Slice(src, left), opts, v_info, nullptr); SetPerfLevel(prev_perf_level); } if (ShouldNotifyListeners()) { @@ -755,8 +771,7 @@ void WritableFileWriter::Crc32cHandoffChecksumCalculation(const char* data, // whole number of pages to be written again on the next flush because we can // only write on aligned // offsets. -IOStatus WritableFileWriter::WriteDirect( - Env::IOPriority op_rate_limiter_priority) { +IOStatus WritableFileWriter::WriteDirect(const IOOptions& opts) { if (seen_error()) { assert(false); @@ -785,11 +800,7 @@ IOStatus WritableFileWriter::WriteDirect( size_t left = buf_.CurrentSize(); DataVerificationInfo v_info; char checksum_buf[sizeof(uint32_t)]; - Env::IOPriority rate_limiter_priority_used = - WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority); - IOOptions io_options; - io_options.rate_limiter_priority = rate_limiter_priority_used; + Env::IOPriority rate_limiter_priority_used = opts.rate_limiter_priority; while (left > 0) { // Check how much is allowed @@ -813,10 +824,10 @@ IOStatus WritableFileWriter::WriteDirect( Crc32cHandoffChecksumCalculation(src, size, checksum_buf); v_info.checksum = Slice(checksum_buf, sizeof(uint32_t)); s = writable_file_->PositionedAppend(Slice(src, size), write_offset, - io_options, v_info, nullptr); + opts, v_info, nullptr); } else { s = writable_file_->PositionedAppend(Slice(src, size), write_offset, - io_options, nullptr); + opts, nullptr); } if (ShouldNotifyListeners()) { @@ -859,8 +870,7 @@ IOStatus WritableFileWriter::WriteDirect( return s; } -IOStatus WritableFileWriter::WriteDirectWithChecksum( - Env::IOPriority op_rate_limiter_priority) { +IOStatus WritableFileWriter::WriteDirectWithChecksum(const IOOptions& opts) { if (seen_error()) { return AssertFalseAndGetStatusForPrevError(); } @@ -895,11 +905,7 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum( DataVerificationInfo v_info; char checksum_buf[sizeof(uint32_t)]; - Env::IOPriority rate_limiter_priority_used = - WritableFileWriter::DecideRateLimiterPriority( - writable_file_->GetIOPriority(), op_rate_limiter_priority); - IOOptions io_options; - io_options.rate_limiter_priority = rate_limiter_priority_used; + Env::IOPriority rate_limiter_priority_used = opts.rate_limiter_priority; // Check how much is allowed. Here, we loop until the rate limiter allows to // write the entire buffer. // TODO: need to be improved since it sort of defeats the purpose of the rate @@ -925,8 +931,8 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum( // direct writes must be positional EncodeFixed32(checksum_buf, buffered_data_crc32c_checksum_); v_info.checksum = Slice(checksum_buf, sizeof(uint32_t)); - s = writable_file_->PositionedAppend(Slice(src, left), write_offset, - io_options, v_info, nullptr); + s = writable_file_->PositionedAppend(Slice(src, left), write_offset, opts, + v_info, nullptr); if (ShouldNotifyListeners()) { auto finish_ts = std::chrono::steady_clock::now(); @@ -986,4 +992,14 @@ Env::IOPriority WritableFileWriter::DecideRateLimiterPriority( } } +IOOptions WritableFileWriter::FinalizeIOOptions(const IOOptions& opts) const { + Env::IOPriority op_rate_limiter_priority = opts.rate_limiter_priority; + IOOptions io_options(opts); + if (writable_file_.get() != nullptr) { + io_options.rate_limiter_priority = + WritableFileWriter::DecideRateLimiterPriority( + writable_file_->GetIOPriority(), op_rate_limiter_priority); + } + return io_options; +} } // namespace ROCKSDB_NAMESPACE diff --git a/file/writable_file_writer.h b/file/writable_file_writer.h index aac0f59491e..95ecc269d6c 100644 --- a/file/writable_file_writer.h +++ b/file/writable_file_writer.h @@ -13,6 +13,7 @@ #include "db/version_edit.h" #include "env/file_system_tracer.h" +#include "monitoring/thread_status_util.h" #include "port/port.h" #include "rocksdb/file_checksum.h" #include "rocksdb/file_system.h" @@ -159,6 +160,7 @@ class WritableFileWriter { uint64_t bytes_per_sync_; RateLimiter* rate_limiter_; Statistics* stats_; + Histograms hist_type_; std::vector> listeners_; std::unique_ptr checksum_generator_; bool checksum_finalized_; @@ -173,6 +175,7 @@ class WritableFileWriter { const FileOptions& options, SystemClock* clock = nullptr, const std::shared_ptr& io_tracer = nullptr, Statistics* stats = nullptr, + Histograms hist_type = Histograms::HISTOGRAM_ENUM_MAX, const std::vector>& listeners = {}, FileChecksumGenFactory* file_checksum_gen_factory = nullptr, bool perform_data_verification = false, @@ -191,6 +194,7 @@ class WritableFileWriter { bytes_per_sync_(options.bytes_per_sync), rate_limiter_(options.rate_limiter), stats_(stats), + hist_type_(hist_type), listeners_(), checksum_generator_(nullptr), checksum_finalized_(false), @@ -222,35 +226,42 @@ class WritableFileWriter { const std::string& fname, const FileOptions& file_opts, std::unique_ptr* writer, IODebugContext* dbg); + + static IOStatus PrepareIOOptions(const WriteOptions& wo, IOOptions& opts); + WritableFileWriter(const WritableFileWriter&) = delete; WritableFileWriter& operator=(const WritableFileWriter&) = delete; ~WritableFileWriter() { - auto s = Close(); + ThreadStatus::OperationType cur_op_type = + ThreadStatusUtil::GetThreadOperation(); + ThreadStatusUtil::SetThreadOperation( + ThreadStatus::OperationType::OP_UNKNOWN); + auto s = Close(IOOptions()); s.PermitUncheckedError(); + ThreadStatusUtil::SetThreadOperation(cur_op_type); } std::string file_name() const { return file_name_; } // When this Append API is called, if the crc32c_checksum is not provided, we // will calculate the checksum internally. - IOStatus Append(const Slice& data, uint32_t crc32c_checksum = 0, - Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL); + IOStatus Append(const IOOptions& opts, const Slice& data, + uint32_t crc32c_checksum = 0); - IOStatus Pad(const size_t pad_bytes, - Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL); + IOStatus Pad(const IOOptions& opts, const size_t pad_bytes); - IOStatus Flush(Env::IOPriority op_rate_limiter_priority = Env::IO_TOTAL); + IOStatus Flush(const IOOptions& opts); - IOStatus Close(); + IOStatus Close(const IOOptions& opts); - IOStatus Sync(bool use_fsync); + IOStatus Sync(const IOOptions& opts, bool use_fsync); // Sync only the data that was already Flush()ed. Safe to call concurrently // with Append() and Flush(). If !writable_file_->IsSyncThreadSafe(), // returns NotSupported status. - IOStatus SyncWithoutFlush(bool use_fsync); + IOStatus SyncWithoutFlush(const IOOptions& opts, bool use_fsync); uint64_t GetFileSize() const { return filesize_.load(std::memory_order_acquire); @@ -307,14 +318,14 @@ class WritableFileWriter { // Used when os buffering is OFF and we are writing // DMA such as in Direct I/O mode - IOStatus WriteDirect(Env::IOPriority op_rate_limiter_priority); - IOStatus WriteDirectWithChecksum(Env::IOPriority op_rate_limiter_priority); + IOStatus WriteDirect(const IOOptions& opts); + IOStatus WriteDirectWithChecksum(const IOOptions& opts); // Normal write. - IOStatus WriteBuffered(const char* data, size_t size, - Env::IOPriority op_rate_limiter_priority); - IOStatus WriteBufferedWithChecksum(const char* data, size_t size, - Env::IOPriority op_rate_limiter_priority); - IOStatus RangeSync(uint64_t offset, uint64_t nbytes); - IOStatus SyncInternal(bool use_fsync); + IOStatus WriteBuffered(const IOOptions& opts, const char* data, size_t size); + IOStatus WriteBufferedWithChecksum(const IOOptions& opts, const char* data, + size_t size); + IOStatus RangeSync(const IOOptions& opts, uint64_t offset, uint64_t nbytes); + IOStatus SyncInternal(const IOOptions& opts, bool use_fsync); + IOOptions FinalizeIOOptions(const IOOptions& opts) const; }; } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 7a09d17a5ae..271c99832a2 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -1348,9 +1348,10 @@ extern void Fatal(Logger* info_log, const char* format, ...) ROCKSDB_PRINTF_FORMAT_ATTR(2, 3); // A utility routine: write "data" to the named file. -extern Status WriteStringToFile(Env* env, const Slice& data, - const std::string& fname, - bool should_sync = false); +extern Status WriteStringToFile( + Env* env, const Slice& data, const std::string& fname, + bool should_sync = false, + Env::IOActivity io_activity = Env::IOActivity::kUnknown); // A utility routine: read contents of named file into *data extern Status ReadFileToString(Env* env, const std::string& fname, diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index f8e321417ce..1b41c694337 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -1917,7 +1917,8 @@ class FSDirectoryWrapper : public FSDirectory { // A utility routine: write "data" to the named file. extern IOStatus WriteStringToFile(FileSystem* fs, const Slice& data, const std::string& fname, - bool should_sync = false); + bool should_sync = false, + const IOOptions& opts = IOOptions()); // A utility routine: read contents of named file into *data extern IOStatus ReadFileToString(FileSystem* fs, const std::string& fname, diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index d11ccc62f53..3a4da54e334 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1742,7 +1742,7 @@ struct WriteOptions { // system call followed by "fdatasync()". // // Default: false - bool sync; + bool sync = false; // If true, writes will not first go to the write ahead log, // and the write may get lost after a crash. The backup engine @@ -1750,18 +1750,18 @@ struct WriteOptions { // you disable write-ahead logs, you must create backups with // flush_before_backup=true to avoid losing unflushed memtable data. // Default: false - bool disableWAL; + bool disableWAL = false; // If true and if user is trying to write to column families that don't exist // (they were dropped), ignore the write (don't return an error). If there // are multiple writes in a WriteBatch, other writes will succeed. // Default: false - bool ignore_missing_column_families; + bool ignore_missing_column_families = false; // If true and we need to wait or sleep for the write request, fails // immediately with Status::Incomplete(). // Default: false - bool no_slowdown; + bool no_slowdown = false; // If true, this write request is of lower priority if compaction is // behind. In this case, no_slowdown = true, the request will be canceled @@ -1770,7 +1770,7 @@ struct WriteOptions { // it introduces minimum impacts to high priority writes. // // Default: false - bool low_pri; + bool low_pri = false; // If true, this writebatch will maintain the last insert positions of each // memtable as hints in concurrent write. It can improve write performance @@ -1779,7 +1779,7 @@ struct WriteOptions { // option will be ignored. // // Default: false - bool memtable_insert_hint_per_batch; + bool memtable_insert_hint_per_batch = false; // For writes associated with this option, charge the internal rate // limiter (see `DBOptions::rate_limiter`) at the specified priority. The @@ -1794,24 +1794,22 @@ struct WriteOptions { // due to implementation constraints. // // Default: `Env::IO_TOTAL` - Env::IOPriority rate_limiter_priority; + Env::IOPriority rate_limiter_priority = Env::IO_TOTAL; // `protection_bytes_per_key` is the number of bytes used to store // protection information for each key entry. Currently supported values are // zero (disabled) and eight. // // Default: zero (disabled). - size_t protection_bytes_per_key; - - WriteOptions() - : sync(false), - disableWAL(false), - ignore_missing_column_families(false), - no_slowdown(false), - low_pri(false), - memtable_insert_hint_per_batch(false), - rate_limiter_priority(Env::IO_TOTAL), - protection_bytes_per_key(0) {} + size_t protection_bytes_per_key = 0; + + // For RocksDB internal use only + // + // Default: Env::IOActivity::kUnknown. + Env::IOActivity io_activity = Env::IOActivity::kUnknown; + + WriteOptions() {} + explicit WriteOptions(Env::IOActivity _io_activity); }; // Options that control flush operations diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index b07ee1f33e5..5a9f7932fcd 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -567,6 +567,14 @@ enum Histograms : uint32_t { FILE_READ_VERIFY_DB_CHECKSUM_MICROS, FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS, + // Time spent in writing block-based or plain SST table + SST_WRITE_MICROS, + // Time spent in writing SST table (currently only block-based table) or blob + // file for flush, compaction or db open + FILE_WRITE_FLUSH_MICROS, + FILE_WRITE_COMPACTION_MICROS, + FILE_WRITE_DB_OPEN_MICROS, + // The number of subcompactions actually scheduled during a compaction NUM_SUBCOMPACTIONS_SCHEDULED, // Value size distribution in each operation diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index 16120b0370c..1064ff7e19f 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5642,10 +5642,17 @@ class HistogramTypeJni { case ROCKSDB_NAMESPACE::Histograms:: FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS: return 0x41; + case ROCKSDB_NAMESPACE::Histograms::SST_WRITE_MICROS: + return 0x42; + case ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_FLUSH_MICROS: + return 0x43; + case ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_COMPACTION_MICROS: + return 0x44; + case ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_DB_OPEN_MICROS: + return 0x45; case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: // 0x1F for backwards compatibility on current minor version. return 0x1F; - default: // undefined/default return 0x0; @@ -5779,6 +5786,14 @@ class HistogramTypeJni { case 0x41: return ROCKSDB_NAMESPACE::Histograms:: FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS; + case 0x42: + return ROCKSDB_NAMESPACE::Histograms::SST_WRITE_MICROS; + case 0x43: + return ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_FLUSH_MICROS; + case 0x44: + return ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_COMPACTION_MICROS; + case 0x45: + return ROCKSDB_NAMESPACE::Histograms::FILE_WRITE_DB_OPEN_MICROS; case 0x1F: // 0x1F for backwards compatibility on current minor version. return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX; diff --git a/java/src/main/java/org/rocksdb/HistogramType.java b/java/src/main/java/org/rocksdb/HistogramType.java index 41fe241ad3a..aff5cad141a 100644 --- a/java/src/main/java/org/rocksdb/HistogramType.java +++ b/java/src/main/java/org/rocksdb/HistogramType.java @@ -185,6 +185,14 @@ public enum HistogramType { FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS((byte) 0x41), + SST_WRITE_MICROS((byte) 0x42), + + FILE_WRITE_FLUSH_MICROS((byte) 0x43), + + FILE_WRITE_COMPACTION_MICROS((byte) 0x44), + + FILE_WRITE_DB_OPEN_MICROS((byte) 0x45), + // 0x1F for backwards compatibility on current minor version. HISTOGRAM_ENUM_MAX((byte) 0x1F); diff --git a/logging/env_logger.h b/logging/env_logger.h index fc9b245504f..61c95711a22 100644 --- a/logging/env_logger.h +++ b/logging/env_logger.h @@ -75,7 +75,8 @@ class EnvLogger : public Logger { mutex_.AssertHeld(); if (flush_pending_) { flush_pending_ = false; - file_.Flush().PermitUncheckedError(); + // TODO: plumb Env::IOActivity, Env::IOPriority + file_.Flush(IOOptions()).PermitUncheckedError(); file_.reset_seen_error(); } last_flush_micros_ = clock_->NowMicros(); @@ -93,7 +94,8 @@ class EnvLogger : public Logger { Status CloseHelper() { FileOpGuard guard(*this); - const auto close_status = file_.Close(); + // TODO: plumb Env::IOActivity, Env::IOPriority + const auto close_status = file_.Close(IOOptions()); if (close_status.ok()) { return close_status; @@ -162,7 +164,8 @@ class EnvLogger : public Logger { { FileOpGuard guard(*this); // We will ignore any error returned by Append(). - file_.Append(Slice(base, p - base)).PermitUncheckedError(); + // TODO: plumb Env::IOActivity, Env::IOPriority + file_.Append(IOOptions(), Slice(base, p - base)).PermitUncheckedError(); file_.reset_seen_error(); flush_pending_ = true; const uint64_t now_micros = clock_->NowMicros(); diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 5a7473f2ac0..976101f83a2 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -282,6 +282,7 @@ const std::vector> HistogramsNameMap = { {FILE_READ_FLUSH_MICROS, "rocksdb.file.read.flush.micros"}, {FILE_READ_COMPACTION_MICROS, "rocksdb.file.read.compaction.micros"}, {FILE_READ_DB_OPEN_MICROS, "rocksdb.file.read.db.open.micros"}, + {FILE_READ_GET_MICROS, "rocksdb.file.read.get.micros"}, {FILE_READ_MULTIGET_MICROS, "rocksdb.file.read.multiget.micros"}, {FILE_READ_DB_ITERATOR_MICROS, "rocksdb.file.read.db.iterator.micros"}, @@ -289,6 +290,10 @@ const std::vector> HistogramsNameMap = { "rocksdb.file.read.verify.db.checksum.micros"}, {FILE_READ_VERIFY_FILE_CHECKSUMS_MICROS, "rocksdb.file.read.verify.file.checksums.micros"}, + {SST_WRITE_MICROS, "rocksdb.sst.write.micros"}, + {FILE_WRITE_FLUSH_MICROS, "rocksdb.file.write.flush.micros"}, + {FILE_WRITE_COMPACTION_MICROS, "rocksdb.file.write.compaction.micros"}, + {FILE_WRITE_DB_OPEN_MICROS, "rocksdb.file.write.db.open.micros"}, {NUM_SUBCOMPACTIONS_SCHEDULED, "rocksdb.num.subcompactions.scheduled"}, {BYTES_PER_READ, "rocksdb.bytes.per.read"}, {BYTES_PER_WRITE, "rocksdb.bytes.per.write"}, diff --git a/options/options.cc b/options/options.cc index 03289e5b602..fbb82a60df4 100644 --- a/options/options.cc +++ b/options/options.cc @@ -703,4 +703,6 @@ ReadOptions::ReadOptions(bool _verify_checksums, bool _fill_cache) ReadOptions::ReadOptions(Env::IOActivity _io_activity) : io_activity(_io_activity) {} +WriteOptions::WriteOptions(Env::IOActivity _io_activity) + : io_activity(_io_activity) {} } // namespace ROCKSDB_NAMESPACE diff --git a/options/options_parser.cc b/options/options_parser.cc index a8c855d6e22..371323a8ee5 100644 --- a/options/options_parser.cc +++ b/options/options_parser.cc @@ -35,7 +35,8 @@ static const std::string option_file_header = "#\n" "\n"; -Status PersistRocksDBOptions(const DBOptions& db_opt, +Status PersistRocksDBOptions(const WriteOptions& write_options, + const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, const std::string& file_name, FileSystem* fs) { @@ -48,11 +49,12 @@ Status PersistRocksDBOptions(const DBOptions& db_opt, if (db_opt.log_readahead_size > 0) { config_options.file_readahead_size = db_opt.log_readahead_size; } - return PersistRocksDBOptions(config_options, db_opt, cf_names, cf_opts, - file_name, fs); + return PersistRocksDBOptions(write_options, config_options, db_opt, cf_names, + cf_opts, file_name, fs); } -Status PersistRocksDBOptions(const ConfigOptions& config_options_in, +Status PersistRocksDBOptions(const WriteOptions& write_options, + const ConfigOptions& config_options_in, const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, @@ -79,62 +81,71 @@ Status PersistRocksDBOptions(const ConfigOptions& config_options_in, std::string options_file_content; - s = writable->Append( - option_file_header + "[" + opt_section_titles[kOptionSectionVersion] + - "]\n" - " rocksdb_version=" + - std::to_string(ROCKSDB_MAJOR) + "." + std::to_string(ROCKSDB_MINOR) + - "." + std::to_string(ROCKSDB_PATCH) + "\n"); + IOOptions opts; + s = WritableFileWriter::PrepareIOOptions(write_options, opts); + if (!s.ok()) { + return s; + } + s = writable->Append(opts, option_file_header + "[" + + opt_section_titles[kOptionSectionVersion] + + "]\n" + " rocksdb_version=" + + std::to_string(ROCKSDB_MAJOR) + "." + + std::to_string(ROCKSDB_MINOR) + "." + + std::to_string(ROCKSDB_PATCH) + "\n"); if (s.ok()) { s = writable->Append( + opts, " options_file_version=" + std::to_string(ROCKSDB_OPTION_FILE_MAJOR) + - "." + std::to_string(ROCKSDB_OPTION_FILE_MINOR) + "\n"); + "." + std::to_string(ROCKSDB_OPTION_FILE_MINOR) + "\n"); } if (s.ok()) { - s = writable->Append("\n[" + opt_section_titles[kOptionSectionDBOptions] + - "]\n "); + s = writable->Append( + opts, "\n[" + opt_section_titles[kOptionSectionDBOptions] + "]\n "); } if (s.ok()) { s = GetStringFromDBOptions(config_options, db_opt, &options_file_content); } if (s.ok()) { - s = writable->Append(options_file_content + "\n"); + s = writable->Append(opts, options_file_content + "\n"); } for (size_t i = 0; s.ok() && i < cf_opts.size(); ++i) { // CFOptions section - s = writable->Append("\n[" + opt_section_titles[kOptionSectionCFOptions] + - " \"" + EscapeOptionString(cf_names[i]) + "\"]\n "); + s = writable->Append( + opts, "\n[" + opt_section_titles[kOptionSectionCFOptions] + " \"" + + EscapeOptionString(cf_names[i]) + "\"]\n "); if (s.ok()) { s = GetStringFromColumnFamilyOptions(config_options, cf_opts[i], &options_file_content); } if (s.ok()) { - s = writable->Append(options_file_content + "\n"); + s = writable->Append(opts, options_file_content + "\n"); } // TableOptions section auto* tf = cf_opts[i].table_factory.get(); if (tf != nullptr) { if (s.ok()) { s = writable->Append( - "[" + opt_section_titles[kOptionSectionTableOptions] + tf->Name() + - " \"" + EscapeOptionString(cf_names[i]) + "\"]\n "); + opts, "[" + opt_section_titles[kOptionSectionTableOptions] + + tf->Name() + " \"" + EscapeOptionString(cf_names[i]) + + "\"]\n "); } if (s.ok()) { options_file_content.clear(); s = tf->GetOptionString(config_options, &options_file_content); } if (s.ok()) { - s = writable->Append(options_file_content + "\n"); + s = writable->Append(opts, options_file_content + "\n"); } } } if (s.ok()) { - s = writable->Sync(true /* use_fsync */); + s = writable->Sync(opts, true /* use_fsync */); } if (s.ok()) { - s = writable->Close(); + s = writable->Close(opts); } TEST_SYNC_POINT("PersistRocksDBOptions:written"); if (s.ok()) { @@ -733,4 +744,3 @@ Status RocksDBOptionsParser::VerifyTableFactory( return Status::OK(); } } // namespace ROCKSDB_NAMESPACE - diff --git a/options/options_parser.h b/options/options_parser.h index 4268051f340..e702c9f4999 100644 --- a/options/options_parser.h +++ b/options/options_parser.h @@ -32,11 +32,13 @@ enum OptionSection : char { static const std::string opt_section_titles[] = { "Version", "DBOptions", "CFOptions", "TableOptions/", "Unknown"}; -Status PersistRocksDBOptions(const DBOptions& db_opt, +Status PersistRocksDBOptions(const WriteOptions& write_options, + const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, const std::string& file_name, FileSystem* fs); -Status PersistRocksDBOptions(const ConfigOptions& config_options, +Status PersistRocksDBOptions(const WriteOptions& write_options, + const ConfigOptions& config_options, const DBOptions& db_opt, const std::vector& cf_names, const std::vector& cf_opts, diff --git a/options/options_test.cc b/options/options_test.cc index 855243c955f..db663f4d9b2 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -3665,8 +3665,8 @@ TEST_F(OptionsParserTest, Readahead) { std::vector cf_names = {"default", one_mb_string}; const std::string kOptionsFileName = "test-persisted-options.ini"; - ASSERT_OK(PersistRocksDBOptions(base_db_opt, cf_names, base_cf_opts, - kOptionsFileName, fs_.get())); + ASSERT_OK(PersistRocksDBOptions(WriteOptions(), base_db_opt, cf_names, + base_cf_opts, kOptionsFileName, fs_.get())); uint64_t file_size = 0; ASSERT_OK( @@ -3740,8 +3740,8 @@ TEST_F(OptionsParserTest, DumpAndParse) { const std::string kOptionsFileName = "test-persisted-options.ini"; // Use default for escaped(true), unknown(false) and check (exact) ConfigOptions config_options; - ASSERT_OK(PersistRocksDBOptions(base_db_opt, cf_names, base_cf_opts, - kOptionsFileName, fs_.get())); + ASSERT_OK(PersistRocksDBOptions(WriteOptions(), base_db_opt, cf_names, + base_cf_opts, kOptionsFileName, fs_.get())); RocksDBOptionsParser parser; ASSERT_OK(parser.Parse(config_options, kOptionsFileName, fs_.get())); @@ -3801,9 +3801,9 @@ TEST_F(OptionsParserTest, DifferentDefault) { ColumnFamilyOptions cf_univ_opts; cf_univ_opts.OptimizeUniversalStyleCompaction(); - ASSERT_OK(PersistRocksDBOptions(DBOptions(), {"default", "universal"}, - {cf_level_opts, cf_univ_opts}, - kOptionsFileName, fs_.get())); + ASSERT_OK(PersistRocksDBOptions( + WriteOptions(), DBOptions(), {"default", "universal"}, + {cf_level_opts, cf_univ_opts}, kOptionsFileName, fs_.get())); RocksDBOptionsParser parser; ASSERT_OK(parser.Parse(kOptionsFileName, fs_.get(), false, @@ -3946,8 +3946,8 @@ class OptionsSanityCheckTest : public OptionsParserTest, if (!s.ok()) { return s; } - return PersistRocksDBOptions(db_opts, {"default"}, {cf_opts}, - kOptionsFileName, fs_.get()); + return PersistRocksDBOptions(WriteOptions(), db_opts, {"default"}, + {cf_opts}, kOptionsFileName, fs_.get()); } Status PersistCFOptions(const ColumnFamilyOptions& cf_opts) { diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 051f9d87b4c..eb845ba9a7c 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -266,6 +266,7 @@ struct BlockBasedTableBuilder::Rep { // BEGIN from MutableCFOptions std::shared_ptr prefix_extractor; // END from MutableCFOptions + const WriteOptions write_options; const BlockBasedTableOptions table_options; const InternalKeyComparator& internal_comparator; // Size in bytes for the user-defined timestamps. @@ -441,6 +442,7 @@ struct BlockBasedTableBuilder::Rep { WritableFileWriter* f) : ioptions(tbo.ioptions), prefix_extractor(tbo.moptions.prefix_extractor), + write_options(tbo.write_options), table_options(table_opt), internal_comparator(tbo.internal_comparator), ts_sz(tbo.internal_comparator.user_comparator()->timestamp_size()), @@ -1310,6 +1312,13 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock( // checksum: uint32 Rep* r = rep_; bool is_data_block = block_type == BlockType::kData; + IOOptions io_options; + IOStatus io_s = + WritableFileWriter::PrepareIOOptions(r->write_options, io_options); + if (!io_s.ok()) { + r->SetIOStatus(io_s); + return; + } // Old, misleading name of this function: WriteRawBlock StopWatch sw(r->ioptions.clock, r->ioptions.stats, WRITE_RAW_BLOCK_MICROS); const uint64_t offset = r->get_offset(); @@ -1323,7 +1332,7 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock( } { - IOStatus io_s = r->file->Append(block_contents); + io_s = r->file->Append(io_options, block_contents); if (!io_s.ok()) { r->SetIOStatus(io_s); return; @@ -1350,7 +1359,7 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock( "BlockBasedTableBuilder::WriteMaybeCompressedBlock:TamperWithChecksum", trailer.data()); { - IOStatus io_s = r->file->Append(Slice(trailer.data(), trailer.size())); + io_s = r->file->Append(io_options, Slice(trailer.data(), trailer.size())); if (!io_s.ok()) { r->SetIOStatus(io_s); return; @@ -1387,7 +1396,8 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock( (r->alignment - ((block_contents.size() + kBlockTrailerSize) & (r->alignment - 1))) & (r->alignment - 1); - IOStatus io_s = r->file->Pad(pad_bytes); + + io_s = r->file->Pad(io_options, pad_bytes); if (io_s.ok()) { r->set_offset(r->get_offset() + pad_bytes); } else { @@ -1780,6 +1790,13 @@ void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle, BlockHandle& index_block_handle) { assert(ok()); Rep* r = rep_; + IOOptions io_options; + IOStatus ios = + WritableFileWriter::PrepareIOOptions(r->write_options, io_options); + if (!ios.ok()) { + r->SetIOStatus(ios); + return; + } // this is guaranteed by BlockBasedTableBuilder's constructor assert(r->table_options.checksum == kCRC32c || r->table_options.format_version != 0); @@ -1792,7 +1809,7 @@ void BlockBasedTableBuilder::WriteFooter(BlockHandle& metaindex_block_handle, r->SetStatus(s); return; } - IOStatus ios = r->file->Append(footer.GetSlice()); + ios = r->file->Append(io_options, footer.GetSlice()); if (ios.ok()) { r->set_offset(r->get_offset() + footer.GetSlice().size()); } else { diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index 57d65d5552e..c05ebdca1de 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -2792,7 +2792,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { "--------------------------------------\n"; std::unique_ptr metaindex; std::unique_ptr metaindex_iter; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions ro; Status s = ReadMetaIndexBlock(ro, nullptr /* prefetch_buffer */, &metaindex, &metaindex_iter); @@ -2897,7 +2897,7 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) { Status BlockBasedTable::DumpIndexBlock(std::ostream& out_stream) { out_stream << "Index Details:\n" "--------------------------------------\n"; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; std::unique_ptr> blockhandles_iter( NewIndexIterator(read_options, /*need_upper_bound_check=*/false, @@ -2948,7 +2948,7 @@ Status BlockBasedTable::DumpIndexBlock(std::ostream& out_stream) { } Status BlockBasedTable::DumpDataBlocks(std::ostream& out_stream) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; std::unique_ptr> blockhandles_iter( NewIndexIterator(read_options, /*need_upper_bound_check=*/false, diff --git a/table/block_based/block_based_table_reader_test.cc b/table/block_based/block_based_table_reader_test.cc index 2aaf505f866..b6a6607ab95 100644 --- a/table/block_based/block_based_table_reader_test.cc +++ b/table/block_based/block_based_table_reader_test.cc @@ -105,10 +105,11 @@ class BlockBasedTableReaderBaseTest : public testing::Test { compression_opts.max_dict_bytes = compression_dict_bytes; compression_opts.max_dict_buffer_bytes = compression_dict_bytes; IntTblPropCollectorFactories factories; + const WriteOptions write_options; std::unique_ptr table_builder( options_.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, comparator, &factories, - compression_type, compression_opts, + TableBuilderOptions(ioptions, moptions, write_options, comparator, + &factories, compression_type, compression_opts, 0 /* column_family_id */, kDefaultColumnFamilyName, -1 /* level */), writer.get())); diff --git a/table/block_based/data_block_hash_index_test.cc b/table/block_based/data_block_hash_index_test.cc index 2841b271dea..2266c152452 100644 --- a/table/block_based/data_block_hash_index_test.cc +++ b/table/block_based/data_block_hash_index_test.cc @@ -553,9 +553,10 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, std::unique_ptr builder; IntTblPropCollectorFactories int_tbl_prop_collector_factories; std::string column_family_name; + const WriteOptions write_options; builder.reset(ioptions.table_factory->NewTableBuilder( TableBuilderOptions( - ioptions, moptions, internal_comparator, + ioptions, moptions, write_options, internal_comparator, &int_tbl_prop_collector_factories, options.compression, CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, @@ -567,7 +568,7 @@ void TestBoundary(InternalKey& ik1, std::string& v1, InternalKey& ik2, EXPECT_TRUE(builder->status().ok()); Status s = builder->Finish(); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Flush(IOOptions())); EXPECT_TRUE(s.ok()) << s.ToString(); EXPECT_EQ(sink->contents().size(), builder->FileSize()); diff --git a/table/block_fetcher_test.cc b/table/block_fetcher_test.cc index 18109811d56..5fc4b8ae63d 100644 --- a/table/block_fetcher_test.cc +++ b/table/block_fetcher_test.cc @@ -77,9 +77,10 @@ class BlockFetcherTest : public testing::Test { ColumnFamilyOptions cf_options(options_); MutableCFOptions moptions(cf_options); IntTblPropCollectorFactories factories; + const WriteOptions write_options; std::unique_ptr table_builder(table_factory_.NewTableBuilder( - TableBuilderOptions(ioptions, moptions, comparator, &factories, - compression_type, CompressionOptions(), + TableBuilderOptions(ioptions, moptions, write_options, comparator, + &factories, compression_type, CompressionOptions(), 0 /* column_family_id */, kDefaultColumnFamilyName, -1 /* level */), writer.get())); diff --git a/table/cuckoo/cuckoo_table_builder.cc b/table/cuckoo/cuckoo_table_builder.cc index 0cf6834af81..011517bab0b 100644 --- a/table/cuckoo/cuckoo_table_builder.cc +++ b/table/cuckoo/cuckoo_table_builder.cc @@ -319,15 +319,17 @@ Status CuckooTableBuilder::Finish() { unused_bucket.resize(static_cast(bucket_size), 'a'); // Write the table. uint32_t num_added = 0; + // TODO: plumb Env::IOActivity, Env::IOPriority + const IOOptions opts; for (auto& bucket : buckets) { if (bucket.vector_idx == kMaxVectorIdx) { - io_status_ = file_->Append(Slice(unused_bucket)); + io_status_ = file_->Append(opts, Slice(unused_bucket)); } else { ++num_added; - io_status_ = file_->Append(GetKey(bucket.vector_idx)); + io_status_ = file_->Append(opts, GetKey(bucket.vector_idx)); if (io_status_.ok()) { if (value_size_ > 0) { - io_status_ = file_->Append(GetValue(bucket.vector_idx)); + io_status_ = file_->Append(opts, GetValue(bucket.vector_idx)); } } } @@ -383,7 +385,7 @@ Status CuckooTableBuilder::Finish() { BlockHandle property_block_handle; property_block_handle.set_offset(offset); property_block_handle.set_size(property_block.size()); - io_status_ = file_->Append(property_block); + io_status_ = file_->Append(opts, property_block); offset += property_block.size(); if (!io_status_.ok()) { status_ = io_status_; @@ -396,7 +398,7 @@ Status CuckooTableBuilder::Finish() { BlockHandle meta_index_block_handle; meta_index_block_handle.set_offset(offset); meta_index_block_handle.set_size(meta_index_block.size()); - io_status_ = file_->Append(meta_index_block); + io_status_ = file_->Append(opts, meta_index_block); if (!io_status_.ok()) { status_ = io_status_; return status_; @@ -409,7 +411,7 @@ Status CuckooTableBuilder::Finish() { status_ = s; return status_; } - io_status_ = file_->Append(footer.GetSlice()); + io_status_ = file_->Append(opts, footer.GetSlice()); status_ = io_status_; return status_; } diff --git a/table/cuckoo/cuckoo_table_builder_test.cc b/table/cuckoo/cuckoo_table_builder_test.cc index 1a0d58c76d1..967e8e2db7b 100644 --- a/table/cuckoo/cuckoo_table_builder_test.cc +++ b/table/cuckoo/cuckoo_table_builder_test.cc @@ -182,7 +182,7 @@ TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) { ASSERT_OK(builder.status()); ASSERT_EQ(0UL, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); CheckFileContents({}, {}, {}, "", 2, 2, false); } @@ -229,7 +229,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); @@ -277,7 +277,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); @@ -325,7 +325,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); @@ -374,7 +374,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) { size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); @@ -420,7 +420,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { size_t bucket_size = keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = GetInternalKey("key00", true); @@ -463,7 +463,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { size_t bucket_size = user_keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = "key00"; @@ -507,7 +507,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { size_t bucket_size = user_keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = "key00"; @@ -550,7 +550,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) { size_t bucket_size = user_keys[0].size() + values[0].size(); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_OK(builder.Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); std::string expected_unused_bucket = "key00"; @@ -589,7 +589,7 @@ TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) { ASSERT_OK(builder.status()); } ASSERT_TRUE(builder.Finish().IsNotSupported()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); } TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) { @@ -619,7 +619,7 @@ TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) { ASSERT_OK(builder.status()); ASSERT_TRUE(builder.Finish().IsNotSupported()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); } } // namespace ROCKSDB_NAMESPACE diff --git a/table/cuckoo/cuckoo_table_reader.cc b/table/cuckoo/cuckoo_table_reader.cc index a4479ab60cd..dec5873bb77 100644 --- a/table/cuckoo/cuckoo_table_reader.cc +++ b/table/cuckoo/cuckoo_table_reader.cc @@ -59,7 +59,7 @@ CuckooTableReader::CuckooTableReader( } { std::unique_ptr props; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; status_ = ReadTableProperties(file_.get(), file_size, kCuckooTableMagicNumber, diff --git a/table/cuckoo/cuckoo_table_reader_test.cc b/table/cuckoo/cuckoo_table_reader_test.cc index e83baa10779..4fe3c873bc3 100644 --- a/table/cuckoo/cuckoo_table_reader_test.cc +++ b/table/cuckoo/cuckoo_table_reader_test.cc @@ -104,7 +104,7 @@ class CuckooReaderTest : public testing::Test { ASSERT_OK(builder.Finish()); ASSERT_EQ(num_items, builder.NumEntries()); file_size = builder.FileSize(); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); // Check reader now. std::unique_ptr file_reader; @@ -431,7 +431,7 @@ void WriteFile(const std::vector& keys, const uint64_t num, } ASSERT_OK(builder.Finish()); ASSERT_EQ(num, builder.NumEntries()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); uint64_t file_size; ASSERT_OK( @@ -571,4 +571,3 @@ int main(int argc, char** argv) { } #endif // GFLAGS. - diff --git a/table/mock_table.cc b/table/mock_table.cc index 1823758e446..07c1edb9564 100644 --- a/table/mock_table.cc +++ b/table/mock_table.cc @@ -298,7 +298,8 @@ Status MockTableFactory::GetAndWriteNextID(WritableFileWriter* file, *next_id = next_id_.fetch_add(1); char buf[4]; EncodeFixed32(buf, *next_id); - return file->Append(Slice(buf, 4)); + // TODO: plumb Env::IOActivity, Env::IOPriority + return file->Append(IOOptions(), Slice(buf, 4)); } Status MockTableFactory::GetIDFromFile(RandomAccessFileReader* file, diff --git a/table/plain/plain_table_builder.cc b/table/plain/plain_table_builder.cc index ffa811c3ca8..7a1ca1c01e8 100644 --- a/table/plain/plain_table_builder.cc +++ b/table/plain/plain_table_builder.cc @@ -40,7 +40,8 @@ IOStatus WriteBlock(const Slice& block_contents, WritableFileWriter* file, uint64_t* offset, BlockHandle* block_handle) { block_handle->set_offset(*offset); block_handle->set_size(block_contents.size()); - IOStatus io_s = file->Append(block_contents); + // TODO: plumb Env::IOActivity, Env::IOPriority + IOStatus io_s = file->Append(IOOptions(), block_contents); if (io_s.ok()) { *offset += block_contents.size(); @@ -136,6 +137,8 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) { // temp buffer for metadata bytes between key and value. char meta_bytes_buf[6]; size_t meta_bytes_buf_size = 0; + // TODO: plumb Env::IOActivity, Env::IOPriority + const IOOptions opts; ParsedInternalKey internal_key; if (!ParseInternalKey(key, &internal_key, false /* log_err_key */) @@ -176,12 +179,13 @@ void PlainTableBuilder::Add(const Slice& key, const Slice& value) { EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size); assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf)); meta_bytes_buf_size = end_ptr - meta_bytes_buf; - io_status_ = file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size)); + io_status_ = + file_->Append(opts, Slice(meta_bytes_buf, meta_bytes_buf_size)); } // Write value if (io_status_.ok()) { - io_status_ = file_->Append(value); + io_status_ = file_->Append(opts, value); offset_ += value_size + meta_bytes_buf_size; } @@ -300,7 +304,8 @@ Status PlainTableBuilder::Finish() { status_ = s; return status_; } - io_status_ = file_->Append(footer.GetSlice()); + // TODO: plumb Env::IOActivity, Env::IOPriority + io_status_ = file_->Append(IOOptions(), footer.GetSlice()); if (io_status_.ok()) { offset_ += footer.GetSlice().size(); } diff --git a/table/plain/plain_table_key_coding.cc b/table/plain/plain_table_key_coding.cc index 0ac42319103..f72309b4f1b 100644 --- a/table/plain/plain_table_key_coding.cc +++ b/table/plain/plain_table_key_coding.cc @@ -94,6 +94,9 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key, Slice key_to_write = key; // Portion of internal key to write out. uint32_t user_key_size = static_cast(key.size() - 8); + // TODO: plumb Env::IOActivity, Env::IOPriority + const IOOptions opts; + if (encoding_type_ == kPlain) { if (fixed_user_key_len_ == kPlainTableVariableLength) { // Write key length @@ -101,7 +104,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key, char* ptr = EncodeVarint32(key_size_buf, user_key_size); assert(ptr <= key_size_buf + sizeof(key_size_buf)); auto len = ptr - key_size_buf; - IOStatus io_s = file->Append(Slice(key_size_buf, len)); + IOStatus io_s = file->Append(opts, Slice(key_size_buf, len)); if (!io_s.ok()) { return io_s; } @@ -119,7 +122,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key, key_count_for_prefix_ = 1; pre_prefix_.SetUserKey(prefix); size_bytes_pos += EncodeSize(kFullKey, user_key_size, size_bytes); - IOStatus io_s = file->Append(Slice(size_bytes, size_bytes_pos)); + IOStatus io_s = file->Append(opts, Slice(size_bytes, size_bytes_pos)); if (!io_s.ok()) { return io_s; } @@ -137,7 +140,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key, static_cast(pre_prefix_.GetUserKey().size()); size_bytes_pos += EncodeSize(kKeySuffix, user_key_size - prefix_len, size_bytes + size_bytes_pos); - IOStatus io_s = file->Append(Slice(size_bytes, size_bytes_pos)); + IOStatus io_s = file->Append(opts, Slice(size_bytes, size_bytes_pos)); if (!io_s.ok()) { return io_s; } @@ -152,7 +155,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key, // in this buffer to safe one file append call, which takes 1 byte. if (parsed_key.sequence == 0 && parsed_key.type == kTypeValue) { IOStatus io_s = - file->Append(Slice(key_to_write.data(), key_to_write.size() - 8)); + file->Append(opts, Slice(key_to_write.data(), key_to_write.size() - 8)); if (!io_s.ok()) { return io_s; } @@ -160,7 +163,7 @@ IOStatus PlainTableKeyEncoder::AppendKey(const Slice& key, meta_bytes_buf[*meta_bytes_buf_size] = PlainTableFactory::kValueTypeSeqId0; *meta_bytes_buf_size += 1; } else { - IOStatus io_s = file->Append(key_to_write); + IOStatus io_s = file->Append(opts, key_to_write); if (!io_s.ok()) { return io_s; } diff --git a/table/plain/plain_table_reader.cc b/table/plain/plain_table_reader.cc index a74da1f8952..b1db911a707 100644 --- a/table/plain/plain_table_reader.cc +++ b/table/plain/plain_table_reader.cc @@ -126,7 +126,7 @@ Status PlainTableReader::Open( } std::unique_ptr props; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, ioptions, read_options, &props); @@ -300,7 +300,7 @@ Status PlainTableReader::PopulateIndex(TableProperties* props, BlockContents index_block_contents; - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; Status s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */, diff --git a/table/sst_file_dumper.cc b/table/sst_file_dumper.cc index 150776de1b2..5404bbd7626 100644 --- a/table/sst_file_dumper.cc +++ b/table/sst_file_dumper.cc @@ -296,14 +296,17 @@ Status SstFileDumper::ShowCompressionSize( const ImmutableOptions imoptions(opts); const ColumnFamilyOptions cfo(opts); const MutableCFOptions moptions(cfo); + // TODO: plumb Env::IOActivity, Env::IOPriority + const WriteOptions write_options; ROCKSDB_NAMESPACE::InternalKeyComparator ikc(opts.comparator); IntTblPropCollectorFactories block_based_table_factories; std::string column_family_name; int unknown_level = -1; + TableBuilderOptions tb_opts( - imoptions, moptions, ikc, &block_based_table_factories, compress_type, - compress_opt, + imoptions, moptions, write_options, ikc, &block_based_table_factories, + compress_type, compress_opt, TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, column_family_name, unknown_level); uint64_t num_data_blocks = 0; @@ -368,7 +371,7 @@ Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number, RandomAccessFileReader* file, uint64_t file_size, FilePrefetchBuffer* prefetch_buffer) { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; Status s = ROCKSDB_NAMESPACE::ReadTableProperties( file, file_size, table_magic_number, ioptions_, read_options, diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index b929a7e280b..fd0e829ce97 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -40,7 +40,9 @@ struct SstFileWriter::Rep { cfh(_cfh), invalidate_page_cache(_invalidate_page_cache), skip_filters(_skip_filters), - db_session_id(_db_session_id) {} + db_session_id(_db_session_id) { + write_options.rate_limiter_priority = io_priority; + } std::unique_ptr file_writer; std::unique_ptr builder; @@ -48,6 +50,7 @@ struct SstFileWriter::Rep { ImmutableOptions ioptions; MutableCFOptions mutable_cf_options; Env::IOPriority io_priority; + WriteOptions write_options; InternalKeyComparator internal_comparator; ExternalSstFileInfo file_info; InternalKey ikey; @@ -346,12 +349,13 @@ Status SstFileWriter::Open(const std::string& file_path) { // TODO: it would be better to set oldest_key_time to be used for getting the // approximate time of ingested keys. TableBuilderOptions table_builder_options( - r->ioptions, r->mutable_cf_options, r->internal_comparator, - &int_tbl_prop_collector_factories, compression_type, compression_opts, - cf_id, r->column_family_name, unknown_level, false /* is_bottommost */, - TableFileCreationReason::kMisc, 0 /* oldest_key_time */, - 0 /* file_creation_time */, "SST Writer" /* db_id */, r->db_session_id, - 0 /* target_file_size */, r->next_file_number); + r->ioptions, r->mutable_cf_options, r->write_options, + r->internal_comparator, &int_tbl_prop_collector_factories, + compression_type, compression_opts, cf_id, r->column_family_name, + unknown_level, false /* is_bottommost */, TableFileCreationReason::kMisc, + 0 /* oldest_key_time */, 0 /* file_creation_time */, + "SST Writer" /* db_id */, r->db_session_id, 0 /* target_file_size */, + r->next_file_number); // External SST files used to each get a unique session id. Now for // slightly better uniqueness probability in constructing cache keys, we // assign fake file numbers to each file (into table properties) and keep @@ -363,7 +367,8 @@ Status SstFileWriter::Open(const std::string& file_path) { FileTypeSet tmp_set = r->ioptions.checksum_handoff_file_types; r->file_writer.reset(new WritableFileWriter( std::move(sst_file), file_path, r->env_options, r->ioptions.clock, - nullptr /* io_tracer */, nullptr /* stats */, r->ioptions.listeners, + nullptr /* io_tracer */, nullptr /* stats */, + Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, r->ioptions.listeners, r->ioptions.file_checksum_gen_factory.get(), tmp_set.Contains(FileType::kTableFile), false)); @@ -432,11 +437,13 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) { Status s = r->builder->Finish(); r->file_info.file_size = r->builder->FileSize(); + IOOptions opts; + s = WritableFileWriter::PrepareIOOptions(r->write_options, opts); if (s.ok()) { - s = r->file_writer->Sync(r->ioptions.use_fsync); + s = r->file_writer->Sync(opts, r->ioptions.use_fsync); r->InvalidatePageCache(true /* closing */).PermitUncheckedError(); if (s.ok()) { - s = r->file_writer->Close(); + s = r->file_writer->Close(opts); } } if (s.ok()) { diff --git a/table/table_builder.h b/table/table_builder.h index d6f0e1a03c9..5855a9707b0 100644 --- a/table/table_builder.h +++ b/table/table_builder.h @@ -102,6 +102,7 @@ struct TableReaderOptions { struct TableBuilderOptions { TableBuilderOptions( const ImmutableOptions& _ioptions, const MutableCFOptions& _moptions, + const WriteOptions& _write_options, const InternalKeyComparator& _internal_comparator, const IntTblPropCollectorFactories* _int_tbl_prop_collector_factories, CompressionType _compression_type, @@ -115,6 +116,7 @@ struct TableBuilderOptions { const uint64_t _target_file_size = 0, const uint64_t _cur_file_num = 0) : ioptions(_ioptions), moptions(_moptions), + write_options(_write_options), internal_comparator(_internal_comparator), int_tbl_prop_collector_factories(_int_tbl_prop_collector_factories), compression_type(_compression_type), @@ -133,6 +135,7 @@ struct TableBuilderOptions { const ImmutableOptions& ioptions; const MutableCFOptions& moptions; + const WriteOptions& write_options; const InternalKeyComparator& internal_comparator; const IntTblPropCollectorFactories* int_tbl_prop_collector_factories; const CompressionType compression_type; diff --git a/table/table_reader_bench.cc b/table/table_reader_bench.cc index 60c84d7bf09..683ff1ca99e 100644 --- a/table/table_reader_bench.cc +++ b/table/table_reader_bench.cc @@ -98,11 +98,13 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, IntTblPropCollectorFactories int_tbl_prop_collector_factories; int unknown_level = -1; + const WriteOptions write_options; tb = opts.table_factory->NewTableBuilder( - TableBuilderOptions( - ioptions, moptions, ikc, &int_tbl_prop_collector_factories, - CompressionType::kNoCompression, CompressionOptions(), - 0 /* column_family_id */, kDefaultColumnFamilyName, unknown_level), + TableBuilderOptions(ioptions, moptions, write_options, ikc, + &int_tbl_prop_collector_factories, + CompressionType::kNoCompression, + CompressionOptions(), 0 /* column_family_id */, + kDefaultColumnFamilyName, unknown_level), file_writer.get()); } else { s = DB::Open(opts, dbname, &db); @@ -122,7 +124,7 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options, } if (!through_db) { tb->Finish(); - file_writer->Close(); + file_writer->Close(IOOptions()); } else { db->Flush(FlushOptions()); } diff --git a/table/table_test.cc b/table/table_test.cc index e6f95243e1e..a42cec48760 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -382,8 +382,10 @@ class TableConstructor : public Constructor { } std::string column_family_name; + const WriteOptions write_options; builder.reset(ioptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, internal_comparator, + TableBuilderOptions(ioptions, moptions, write_options, + internal_comparator, &int_tbl_prop_collector_factories, options.compression, options.compression_opts, kUnknownColumnFamily, column_family_name, level_), @@ -401,7 +403,7 @@ class TableConstructor : public Constructor { EXPECT_OK(builder->status()); } Status s = builder->Finish(); - EXPECT_OK(file_writer_->Flush()); + EXPECT_OK(file_writer_->Flush(IOOptions())); EXPECT_TRUE(s.ok()) << s.ToString(); EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize()); @@ -1299,7 +1301,7 @@ class FileChecksumTestHelper { EXPECT_TRUE(table_builder_->status().ok()); } Status s = table_builder_->Finish(); - EXPECT_OK(file_writer_->Flush()); + EXPECT_OK(file_writer_->Flush(IOOptions())); EXPECT_OK(s); EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize()); @@ -1307,7 +1309,7 @@ class FileChecksumTestHelper { } std::string GetFileChecksum() { - EXPECT_OK(file_writer_->Close()); + EXPECT_OK(file_writer_->Close(IOOptions())); return table_builder_->GetFileChecksum(); } @@ -3978,8 +3980,9 @@ TEST_P(BlockBasedTableTest, NoFileChecksum) { FileChecksumTestHelper f(true); f.CreateWritableFile(); std::unique_ptr builder; + const WriteOptions write_options; builder.reset(ioptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, *comparator, + TableBuilderOptions(ioptions, moptions, write_options, *comparator, &int_tbl_prop_collector_factories, options.compression, options.compression_opts, kUnknownColumnFamily, column_family_name, level), @@ -4014,8 +4017,9 @@ TEST_P(BlockBasedTableTest, Crc32cFileChecksum) { f.CreateWritableFile(); f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); std::unique_ptr builder; + const WriteOptions write_options; builder.reset(ioptions.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, *comparator, + TableBuilderOptions(ioptions, moptions, write_options, *comparator, &int_tbl_prop_collector_factories, options.compression, options.compression_opts, kUnknownColumnFamily, column_family_name, level), @@ -4060,8 +4064,9 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) { IntTblPropCollectorFactories int_tbl_prop_collector_factories; std::string column_family_name; int unknown_level = -1; + const WriteOptions write_options; std::unique_ptr builder(factory.NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, + TableBuilderOptions(ioptions, moptions, write_options, ikc, &int_tbl_prop_collector_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, column_family_name, unknown_level), @@ -4074,7 +4079,7 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) { builder->Add(key, value); } ASSERT_OK(builder->Finish()); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Flush(IOOptions())); test::StringSink* ss = static_cast(file_writer->writable_file()); @@ -4114,9 +4119,9 @@ TEST_F(PlainTableTest, NoFileChecksum) { int unknown_level = -1; FileChecksumTestHelper f(true); f.CreateWritableFile(); - + const WriteOptions write_options; std::unique_ptr builder(factory.NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, + TableBuilderOptions(ioptions, moptions, write_options, ikc, &int_tbl_prop_collector_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, column_family_name, unknown_level), @@ -4154,9 +4159,9 @@ TEST_F(PlainTableTest, Crc32cFileChecksum) { FileChecksumTestHelper f(true); f.CreateWritableFile(); f.SetFileChecksumGenerator(checksum_crc32c_gen1.release()); - + const WriteOptions write_options; std::unique_ptr builder(factory.NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, + TableBuilderOptions(ioptions, moptions, write_options, ikc, &int_tbl_prop_collector_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, column_family_name, unknown_level), @@ -4763,8 +4768,9 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { new SstFileWriterPropertiesCollectorFactory(2 /* version */, 0 /* global_seqno*/)); std::string column_family_name; + const WriteOptions write_options; std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, + TableBuilderOptions(ioptions, moptions, write_options, ikc, &int_tbl_prop_collector_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, column_family_name, -1), @@ -4778,7 +4784,7 @@ TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) { builder->Add(ik.Encode(), value); } ASSERT_OK(builder->Finish()); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Flush(IOOptions())); test::RandomRWStringSink ss_rw(sink); uint32_t version; @@ -4945,8 +4951,9 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) { InternalKeyComparator ikc(options.comparator); IntTblPropCollectorFactories int_tbl_prop_collector_factories; std::string column_family_name; + const WriteOptions write_options; std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, + TableBuilderOptions(ioptions, moptions, write_options, ikc, &int_tbl_prop_collector_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, column_family_name, -1), @@ -4962,7 +4969,7 @@ TEST_P(BlockBasedTableTest, BlockAlignTest) { builder->Add(ik.Encode(), value); } ASSERT_OK(builder->Finish()); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Flush(IOOptions())); std::unique_ptr source( new test::StringSource(sink->contents(), 73342, false)); @@ -5038,8 +5045,9 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { IntTblPropCollectorFactories int_tbl_prop_collector_factories; std::string column_family_name; + const WriteOptions write_options; std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, + TableBuilderOptions(ioptions, moptions, write_options, ikc, &int_tbl_prop_collector_factories, kNoCompression, CompressionOptions(), kUnknownColumnFamily, column_family_name, -1), @@ -5055,7 +5063,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { builder->Add(ik.Encode(), value); } ASSERT_OK(builder->Finish()); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Flush(IOOptions())); std::unique_ptr source( new test::StringSource(sink->contents(), 73342, true)); @@ -5067,8 +5075,7 @@ TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) { uint64_t file_size = sink->contents().size(); Footer footer; - IOOptions opts; - ASSERT_OK(ReadFooterFromFile(opts, file, *FileSystem::Default(), + ASSERT_OK(ReadFooterFromFile(IOOptions(), file, *FileSystem::Default(), nullptr /* prefetch_buffer */, file_size, &footer, kBlockBasedTableMagicNumber)); @@ -5628,12 +5635,14 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, Basic) { InternalKeyComparator ikc(options.comparator); IntTblPropCollectorFactories int_tbl_prop_collector_factories; + const WriteOptions write_options; std::unique_ptr builder( options.table_factory->NewTableBuilder( - TableBuilderOptions( - ioptions, moptions, ikc, &int_tbl_prop_collector_factories, - kSnappyCompression, options.compression_opts, - kUnknownColumnFamily, "test_cf", -1 /* level */), + TableBuilderOptions(ioptions, moptions, write_options, ikc, + &int_tbl_prop_collector_factories, + kSnappyCompression, options.compression_opts, + kUnknownColumnFamily, "test_cf", + -1 /* level */), file_writer.get())); std::string key1 = "key1"; @@ -5704,8 +5713,9 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, InternalKeyComparator ikc(options.comparator); IntTblPropCollectorFactories int_tbl_prop_collector_factories; + const WriteOptions write_options; std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, + TableBuilderOptions(ioptions, moptions, write_options, ikc, &int_tbl_prop_collector_factories, kSnappyCompression, options.compression_opts, kUnknownColumnFamily, "test_cf", -1 /* level */), @@ -5789,8 +5799,9 @@ TEST_F(ChargeCompressionDictionaryBuildingBufferTest, BasicWithCacheFull) { InternalKeyComparator ikc(options.comparator); IntTblPropCollectorFactories int_tbl_prop_collector_factories; + const WriteOptions write_options; std::unique_ptr builder(options.table_factory->NewTableBuilder( - TableBuilderOptions(ioptions, moptions, ikc, + TableBuilderOptions(ioptions, moptions, write_options, ikc, &int_tbl_prop_collector_factories, kSnappyCompression, options.compression_opts, kUnknownColumnFamily, "test_cf", -1 /* level */), diff --git a/test_util/testutil.cc b/test_util/testutil.cc index 1e771f4fd16..685f09ec040 100644 --- a/test_util/testutil.cc +++ b/test_util/testutil.cc @@ -463,15 +463,16 @@ bool IsPrefetchSupported(const std::shared_ptr& fs, Random rnd(301); std::string test_string = rnd.RandomString(4096); Slice data(test_string); - Status s = WriteStringToFile(fs.get(), data, tmp, true); + IOOptions opts; + Status s = WriteStringToFile(fs.get(), data, tmp, true, opts); if (s.ok()) { std::unique_ptr file; auto io_s = fs->NewRandomAccessFile(tmp, FileOptions(), &file, nullptr); if (io_s.ok()) { - supported = !(file->Prefetch(0, data.size(), IOOptions(), nullptr) - .IsNotSupported()); + supported = + !(file->Prefetch(0, data.size(), opts, nullptr).IsNotSupported()); } - s = fs->DeleteFile(tmp, IOOptions(), nullptr); + s = fs->DeleteFile(tmp, opts, nullptr); } return s.ok() && supported; } @@ -521,7 +522,8 @@ Status CorruptFile(Env* env, const std::string& fname, int offset, for (int i = 0; i < bytes_to_corrupt; i++) { contents[i + offset] ^= 0x80; } - s = WriteStringToFile(env, contents, fname); + s = WriteStringToFile(env, contents, fname, false /* should_sync */, + Env::IOActivity::kUnknown /* io_activity */); } if (s.ok() && verify_checksum) { Options options; @@ -544,7 +546,8 @@ Status TruncateFile(Env* env, const std::string& fname, uint64_t new_length) { s = ReadFileToString(env, fname, &contents); if (s.ok()) { contents.resize(static_cast(new_length), 'b'); - s = WriteStringToFile(env, contents, fname); + s = WriteStringToFile(env, contents, fname, false /* should_sync */, + Env::IOActivity::kUnknown /* io_activity */); } return s; } diff --git a/tools/db_bench_tool_test.cc b/tools/db_bench_tool_test.cc index a30c650654f..1668dfb8836 100644 --- a/tools/db_bench_tool_test.cc +++ b/tools/db_bench_tool_test.cc @@ -130,7 +130,7 @@ namespace {} // namespace TEST_F(DBBenchTest, OptionsFile) { const std::string kOptionsFileName = test_path_ + "/OPTIONS_test"; Options opt = GetDefaultOptions(); - ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"}, + ASSERT_OK(PersistRocksDBOptions(WriteOptions(), DBOptions(opt), {"default"}, {ColumnFamilyOptions(opt)}, kOptionsFileName, opt.env->GetFileSystem().get())); @@ -149,7 +149,7 @@ TEST_F(DBBenchTest, OptionsFileUniversal) { Options opt = GetDefaultOptions(kCompactionStyleUniversal, 1); - ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"}, + ASSERT_OK(PersistRocksDBOptions(WriteOptions(), DBOptions(opt), {"default"}, {ColumnFamilyOptions(opt)}, kOptionsFileName, opt.env->GetFileSystem().get())); @@ -166,7 +166,7 @@ TEST_F(DBBenchTest, OptionsFileMultiLevelUniversal) { Options opt = GetDefaultOptions(kCompactionStyleUniversal, 12); - ASSERT_OK(PersistRocksDBOptions(DBOptions(opt), {"default"}, + ASSERT_OK(PersistRocksDBOptions(WriteOptions(), DBOptions(opt), {"default"}, {ColumnFamilyOptions(opt)}, kOptionsFileName, opt.env->GetFileSystem().get())); diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 77096b11366..c009ba46074 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -4219,8 +4219,10 @@ UnsafeRemoveSstFileCommand::UnsafeRemoveSstFileCommand( } void UnsafeRemoveSstFileCommand::DoCommand() { - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; + const WriteOptions write_options; + PrepareOptions(); OfflineManifestWriter w(options_, db_path_); @@ -4245,7 +4247,7 @@ void UnsafeRemoveSstFileCommand::DoCommand() { s = options_.env->GetFileSystem()->NewDirectory(db_path_, IOOptions(), &db_dir, nullptr); if (s.ok()) { - s = w.LogAndApply(read_options, cfd, &edit, db_dir.get()); + s = w.LogAndApply(read_options, write_options, cfd, &edit, db_dir.get()); } } diff --git a/tools/simulated_hybrid_file_system.cc b/tools/simulated_hybrid_file_system.cc index 2b9aa0950fe..c9f69831349 100644 --- a/tools/simulated_hybrid_file_system.cc +++ b/tools/simulated_hybrid_file_system.cc @@ -86,7 +86,10 @@ SimulatedHybridFileSystem::~SimulatedHybridFileSystem() { metadata += f; metadata += "\n"; } - IOStatus s = WriteStringToFile(target(), metadata, metadata_file_name_, true); + // TODO: plumb Env::IOActivity, Env::IOPriority + IOOptions opts; + IOStatus s = + WriteStringToFile(target(), metadata, metadata_file_name_, true, opts); if (!s.ok()) { fprintf(stderr, "Error writing to file %s: %s", metadata_file_name_.c_str(), s.ToString().c_str()); @@ -240,4 +243,3 @@ IOStatus SimulatedWritableFile::Sync(const IOOptions& options, return target()->Sync(options, dbg); } } // namespace ROCKSDB_NAMESPACE - diff --git a/tools/sst_dump_test.cc b/tools/sst_dump_test.cc index f0b71bf8ea0..132cc6485a4 100644 --- a/tools/sst_dump_test.cc +++ b/tools/sst_dump_test.cc @@ -123,10 +123,12 @@ class SSTDumpToolTest : public testing::Test { std::string column_family_name; int unknown_level = -1; + const WriteOptions write_options; tb.reset(opts.table_factory->NewTableBuilder( TableBuilderOptions( - imoptions, moptions, ikc, &int_tbl_prop_collector_factories, - CompressionType::kNoCompression, CompressionOptions(), + imoptions, moptions, write_options, ikc, + &int_tbl_prop_collector_factories, CompressionType::kNoCompression, + CompressionOptions(), TablePropertiesCollectorFactory::Context::kUnknownColumnFamily, column_family_name, unknown_level), file_writer.get())); @@ -160,7 +162,7 @@ class SSTDumpToolTest : public testing::Test { } } ASSERT_OK(tb->Finish()); - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); } protected: @@ -417,9 +419,11 @@ TEST_F(SSTDumpToolTest, ValidSSTPath) { std::string sst_file = MakeFilePath("rocksdb_sst_test.sst"); createSST(opts, sst_file); std::string text_file = MakeFilePath("text_file"); - ASSERT_OK(WriteStringToFile(opts.env, "Hello World!", text_file)); + ASSERT_OK(WriteStringToFile(opts.env, "Hello World!", text_file, false, + Env::IOActivity::kUnknown)); std::string fake_sst = MakeFilePath("fake_sst.sst"); - ASSERT_OK(WriteStringToFile(opts.env, "Not an SST file!", fake_sst)); + ASSERT_OK(WriteStringToFile(opts.env, "Not an SST file!", fake_sst, false, + Env::IOActivity::kUnknown)); for (const auto& command_arg : {"--command=verify", "--command=identify"}) { snprintf(usage[1], kOptLength, "%s", command_arg); diff --git a/unreleased_history/new_features/sst_write_micros_file_write_stats_break_down.md b/unreleased_history/new_features/sst_write_micros_file_write_stats_break_down.md new file mode 100644 index 00000000000..aeb9c65a76f --- /dev/null +++ b/unreleased_history/new_features/sst_write_micros_file_write_stats_break_down.md @@ -0,0 +1 @@ +Provide new statistics: `rocksdb.sst.write.micros` to measure time spent in writing block-based or plain SST table; `rocksdb.file.write.{flush|compaction|db.open}.micros` to measure time spent in writing SST table (currently only block-based table) or blob file for flush, compaction or db open. diff --git a/util/file_checksum_helper.cc b/util/file_checksum_helper.cc index b8c4099b805..4983b0378dd 100644 --- a/util/file_checksum_helper.cc +++ b/util/file_checksum_helper.cc @@ -98,7 +98,7 @@ Status GetFileChecksumsFromManifest(Env* src_env, const std::string& abs_path, return Status::InvalidArgument("checksum_list is nullptr"); } assert(checksum_list); - // TODO: plumb Env::IOActivity + // TODO: plumb Env::IOActivity, Env::IOPriority const ReadOptions read_options; checksum_list->reset(); Status s; diff --git a/util/file_reader_writer_test.cc b/util/file_reader_writer_test.cc index 68776612b90..db2299a8915 100644 --- a/util/file_reader_writer_test.cc +++ b/util/file_reader_writer_test.cc @@ -113,16 +113,16 @@ TEST_F(WritableFileWriterTest, RangeSync) { for (int i = 0; i < 1000; i++) { int skew_limit = (i < 700) ? 10 : 15; uint32_t num = r.Skewed(skew_limit) * 100 + r.Uniform(100); - s = writer->Append(Slice(large_buf.get(), num)); + s = writer->Append(IOOptions(), Slice(large_buf.get(), num)); ASSERT_OK(s); // Flush in a chance of 1/10. if (r.Uniform(10) == 0) { - s = writer->Flush(); + s = writer->Flush(IOOptions()); ASSERT_OK(s); } } - s = writer->Close(); + s = writer->Close(IOOptions()); ASSERT_OK(s); } @@ -215,16 +215,16 @@ TEST_F(WritableFileWriterTest, IncrementalBuffer) { for (int i = 0; i < 20; i++) { uint32_t num = r.Skewed(16) * 100 + r.Uniform(100); std::string random_string = r.RandomString(num); - ASSERT_OK(writer->Append(Slice(random_string.c_str(), num))); + ASSERT_OK(writer->Append(IOOptions(), Slice(random_string.c_str(), num))); target.append(random_string.c_str(), num); // In some attempts, flush in a chance of 1/10. if (!no_flush && r.Uniform(10) == 0) { - ASSERT_OK(writer->Flush()); + ASSERT_OK(writer->Flush(IOOptions())); } } - ASSERT_OK(writer->Flush()); - ASSERT_OK(writer->Close()); + ASSERT_OK(writer->Flush(IOOptions())); + ASSERT_OK(writer->Close(IOOptions())); ASSERT_EQ(target.size(), actual.size()); ASSERT_EQ(target, actual); } @@ -272,27 +272,28 @@ TEST_F(DBWritableFileWriterTest, AppendWithChecksum) { ImmutableOptions ioptions(options); file_writer.reset(new WritableFileWriter( std::move(file), fname, file_options, SystemClock::Default().get(), - nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, true)); + nullptr, ioptions.stats, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, + ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true, + true)); Random rnd(301); std::string data = rnd.RandomString(1000); uint32_t data_crc32c = crc32c::Value(data.c_str(), data.size()); fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c)); + ASSERT_OK(file_writer->Flush(IOOptions())); Random size_r(47); for (int i = 0; i < 2000; i++) { data = rnd.RandomString((static_cast(size_r.Next()) % 10000)); data_crc32c = crc32c::Value(data.c_str(), data.size()); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); + ASSERT_OK( + file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c)); data = rnd.RandomString((static_cast(size_r.Next()) % 97)); - ASSERT_OK(file_writer->Append(Slice(data.c_str()))); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str()))); + ASSERT_OK(file_writer->Flush(IOOptions())); } - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); Destroy(options); } @@ -314,27 +315,29 @@ TEST_F(DBWritableFileWriterTest, AppendVerifyNoChecksum) { // So Append with checksum logic will not be triggered file_writer.reset(new WritableFileWriter( std::move(file), fname, file_options, SystemClock::Default().get(), - nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, false)); + nullptr, ioptions.stats, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, + ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true, + false)); Random rnd(301); std::string data = rnd.RandomString(1000); uint32_t data_crc32c = crc32c::Value(data.c_str(), data.size()); fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c)); + ASSERT_OK(file_writer->Flush(IOOptions())); Random size_r(47); for (int i = 0; i < 1000; i++) { data = rnd.RandomString((static_cast(size_r.Next()) % 10000)); data_crc32c = crc32c::Value(data.c_str(), data.size()); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); + ASSERT_OK( + file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c)); data = rnd.RandomString((static_cast(size_r.Next()) % 97)); - ASSERT_OK(file_writer->Append(Slice(data.c_str()))); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str()))); + ASSERT_OK(file_writer->Flush(IOOptions())); } - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); Destroy(options); } @@ -357,8 +360,9 @@ TEST_F(DBWritableFileWriterTest, AppendWithChecksumRateLimiter) { // So Append with checksum logic will not be triggered file_writer.reset(new WritableFileWriter( std::move(file), fname, file_options, SystemClock::Default().get(), - nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, true)); + nullptr, ioptions.stats, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, + ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true, + true)); fault_fs_->SetChecksumHandoffFuncType(ChecksumType::kCRC32c); Random rnd(301); @@ -370,17 +374,18 @@ TEST_F(DBWritableFileWriterTest, AppendWithChecksumRateLimiter) { for (int i = 0; i < 100; i++) { data = rnd.RandomString((static_cast(size_r.Next()) % 10000)); data_crc32c = crc32c::Value(data.c_str(), data.size()); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); + ASSERT_OK( + file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c)); bytes_written += static_cast(data.size()); data = rnd.RandomString((static_cast(size_r.Next()) % 97)); - ASSERT_OK(file_writer->Append(Slice(data.c_str()))); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str()))); + ASSERT_OK(file_writer->Flush(IOOptions())); bytes_written += static_cast(data.size()); } uint64_t elapsed = fault_env_->NowMicros() - start; double raw_rate = bytes_written * 1000000.0 / elapsed; - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); // Set the rate-limiter FileOptions file_options1 = FileOptions(); @@ -397,19 +402,21 @@ TEST_F(DBWritableFileWriterTest, AppendWithChecksumRateLimiter) { // So Append with checksum logic will not be triggered file_writer.reset(new WritableFileWriter( std::move(file), fname, file_options1, SystemClock::Default().get(), - nullptr, ioptions.stats, ioptions.listeners, - ioptions.file_checksum_gen_factory.get(), true, true)); + nullptr, ioptions.stats, Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, + ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true, + true)); for (int i = 0; i < 1000; i++) { data = rnd.RandomString((static_cast(size_r.Next()) % 10000)); data_crc32c = crc32c::Value(data.c_str(), data.size()); - ASSERT_OK(file_writer->Append(Slice(data.c_str()), data_crc32c)); + ASSERT_OK( + file_writer->Append(IOOptions(), Slice(data.c_str()), data_crc32c)); data = rnd.RandomString((static_cast(size_r.Next()) % 97)); - ASSERT_OK(file_writer->Append(Slice(data.c_str()))); - ASSERT_OK(file_writer->Flush()); + ASSERT_OK(file_writer->Append(IOOptions(), Slice(data.c_str()))); + ASSERT_OK(file_writer->Flush(IOOptions())); } - ASSERT_OK(file_writer->Close()); + ASSERT_OK(file_writer->Close(IOOptions())); if (file_options1.rate_limiter != nullptr) { delete file_options1.rate_limiter; } @@ -465,12 +472,12 @@ TEST_F(WritableFileWriterTest, AppendStatusReturn) { std::unique_ptr writer( new WritableFileWriter(std::move(wf), "" /* don't care */, EnvOptions())); - ASSERT_OK(writer->Append(std::string(2 * kMb, 'a'))); + ASSERT_OK(writer->Append(IOOptions(), std::string(2 * kMb, 'a'))); // Next call to WritableFile::Append() should fail FakeWF* fwf = static_cast(writer->writable_file()); fwf->SetIOError(true); - ASSERT_NOK(writer->Append(std::string(2 * kMb, 'b'))); + ASSERT_NOK(writer->Append(IOOptions(), std::string(2 * kMb, 'b'))); } class ReadaheadRandomAccessFileTest @@ -498,9 +505,9 @@ class ReadaheadRandomAccessFileTest new test::StringSink(&control_contents_)); std::unique_ptr write_holder(new WritableFileWriter( std::move(sink), "" /* don't care */, FileOptions())); - Status s = write_holder->Append(Slice(str)); + Status s = write_holder->Append(IOOptions(), Slice(str)); EXPECT_OK(s); - s = write_holder->Flush(); + s = write_holder->Flush(IOOptions()); EXPECT_OK(s); std::unique_ptr read_holder( new test::StringSource(control_contents_)); @@ -878,26 +885,27 @@ TEST_F(DBWritableFileWriterTest, IOErrorNotification) { file_writer.reset(new WritableFileWriter( std::move(writable_file_ptr), fname, file_options, - SystemClock::Default().get(), nullptr, ioptions.stats, ioptions.listeners, + SystemClock::Default().get(), nullptr, ioptions.stats, + Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, ioptions.listeners, ioptions.file_checksum_gen_factory.get(), true, true)); FakeWF* fwf = static_cast(file_writer->writable_file()); fwf->SetIOError(true); - ASSERT_NOK(file_writer->Append(std::string(2 * kMb, 'a'))); + ASSERT_NOK(file_writer->Append(IOOptions(), std::string(2 * kMb, 'a'))); fwf->CheckCounters(1, 0); ASSERT_EQ(listener->NotifyErrorCount(), 1); file_writer->reset_seen_error(); fwf->SetIOError(true); - ASSERT_NOK(file_writer->Flush()); + ASSERT_NOK(file_writer->Flush(IOOptions())); fwf->CheckCounters(1, 1); ASSERT_EQ(listener->NotifyErrorCount(), 2); /* No error generation */ file_writer->reset_seen_error(); fwf->SetIOError(false); - ASSERT_OK(file_writer->Append(std::string(2 * kMb, 'b'))); + ASSERT_OK(file_writer->Append(IOOptions(), std::string(2 * kMb, 'b'))); ASSERT_EQ(listener->NotifyErrorCount(), 2); fwf->CheckCounters(1, 1); } @@ -1006,23 +1014,29 @@ class WritableFileWriterIOPriorityTest : public testing::Test { }; TEST_F(WritableFileWriterIOPriorityTest, Append) { - ASSERT_OK(writer_->Append(Slice("abc"))); + ASSERT_OK(writer_->Append(IOOptions(), Slice("abc"))); } -TEST_F(WritableFileWriterIOPriorityTest, Pad) { ASSERT_OK(writer_->Pad(500)); } +TEST_F(WritableFileWriterIOPriorityTest, Pad) { + ASSERT_OK(writer_->Pad(IOOptions(), 500)); +} -TEST_F(WritableFileWriterIOPriorityTest, Flush) { ASSERT_OK(writer_->Flush()); } +TEST_F(WritableFileWriterIOPriorityTest, Flush) { + ASSERT_OK(writer_->Flush(IOOptions())); +} -TEST_F(WritableFileWriterIOPriorityTest, Close) { ASSERT_OK(writer_->Close()); } +TEST_F(WritableFileWriterIOPriorityTest, Close) { + ASSERT_OK(writer_->Close(IOOptions())); +} TEST_F(WritableFileWriterIOPriorityTest, Sync) { - ASSERT_OK(writer_->Sync(false)); - ASSERT_OK(writer_->Sync(true)); + ASSERT_OK(writer_->Sync(IOOptions(), false)); + ASSERT_OK(writer_->Sync(IOOptions(), true)); } TEST_F(WritableFileWriterIOPriorityTest, SyncWithoutFlush) { - ASSERT_OK(writer_->SyncWithoutFlush(false)); - ASSERT_OK(writer_->SyncWithoutFlush(true)); + ASSERT_OK(writer_->SyncWithoutFlush(IOOptions(), false)); + ASSERT_OK(writer_->SyncWithoutFlush(IOOptions(), true)); } TEST_F(WritableFileWriterIOPriorityTest, BasicOp) { @@ -1037,16 +1051,16 @@ TEST_F(WritableFileWriterIOPriorityTest, BasicOp) { for (int i = 0; i < 1000; i++) { int skew_limit = (i < 700) ? 10 : 15; uint32_t num = r.Skewed(skew_limit) * 100 + r.Uniform(100); - s = writer->Append(Slice(large_buf.get(), num)); + s = writer->Append(IOOptions(), Slice(large_buf.get(), num)); ASSERT_OK(s); // Flush in a chance of 1/10. if (r.Uniform(10) == 0) { - s = writer->Flush(); + s = writer->Flush(IOOptions()); ASSERT_OK(s); } } - s = writer->Close(); + s = writer->Close(IOOptions()); ASSERT_OK(s); } } // namespace ROCKSDB_NAMESPACE diff --git a/util/log_write_bench.cc b/util/log_write_bench.cc index c1637db15d5..25602791ecf 100644 --- a/util/log_write_bench.cc +++ b/util/log_write_bench.cc @@ -41,9 +41,9 @@ void RunBenchmark() { std::unique_ptr file; env->NewWritableFile(file_name, &file, env_options); std::unique_ptr writer; - writer.reset(new WritableFileWriter(std::move(file), file_name, env_options, - clock, nullptr /* stats */, - options.listeners)); + writer.reset(new WritableFileWriter( + std::move(file), file_name, env_options, clock, nullptr /* stats */, + Histograms::HISTOGRAM_ENUM_MAX /* hist_type */, options.listeners)); std::string record; record.assign(FLAGS_record_size, 'X'); diff --git a/utilities/backup/backup_engine.cc b/utilities/backup/backup_engine.cc index e74218d45ca..252c969d5ae 100644 --- a/utilities/backup/backup_engine.cc +++ b/utilities/backup/backup_engine.cc @@ -2209,6 +2209,8 @@ IOStatus BackupEngineImpl::CopyOrCreateFile( } Slice data; + // TODO: plumb Env::IOActivity, Env::IOPriority + const IOOptions opts; do { if (stop_backup_.load(std::memory_order_acquire)) { return status_to_io_status(Status::Incomplete("Backup stopped")); @@ -2238,7 +2240,8 @@ IOStatus BackupEngineImpl::CopyOrCreateFile( if (checksum_hex != nullptr) { checksum_value = crc32c::Extend(checksum_value, data.data(), data.size()); } - io_s = dest_writer->Append(data); + + io_s = dest_writer->Append(opts, data); if (rate_limiter != nullptr) { if (!src.empty()) { @@ -2275,10 +2278,10 @@ IOStatus BackupEngineImpl::CopyOrCreateFile( } if (io_s.ok() && sync) { - io_s = dest_writer->Sync(false); + io_s = dest_writer->Sync(opts, false); } if (io_s.ok()) { - io_s = dest_writer->Close(); + io_s = dest_writer->Close(opts); } return io_s; } @@ -3352,4 +3355,3 @@ void TEST_SetDefaultRateLimitersClock( restore_rate_limiter_clock); } } // namespace ROCKSDB_NAMESPACE - diff --git a/utilities/backup/backup_engine_test.cc b/utilities/backup/backup_engine_test.cc index 5ed6ae89513..fd7bb7cad81 100644 --- a/utilities/backup/backup_engine_test.cc +++ b/utilities/backup/backup_engine_test.cc @@ -931,7 +931,8 @@ class BackupEngineTest : public testing::Test { } file_contents[0] = (file_contents[0] + 257) % 256; - return WriteStringToFile(test_db_env_.get(), file_contents, fname); + return WriteStringToFile(test_db_env_.get(), file_contents, fname, false, + Env::IOActivity::kUnknown); } void AssertDirectoryFilesMatchRegex(const std::string& dir, diff --git a/utilities/blob_db/blob_compaction_filter.cc b/utilities/blob_db/blob_compaction_filter.cc index ddaa98c7d32..6eec53c5433 100644 --- a/utilities/blob_db/blob_compaction_filter.cc +++ b/utilities/blob_db/blob_compaction_filter.cc @@ -182,6 +182,7 @@ bool BlobIndexCompactionFilterBase::OpenNewBlobFileIfNeeded() const { assert(blob_db_impl); const Status s = blob_db_impl->CreateBlobFileAndWriter( + WriteOptions(Env::IOActivity::kCompaction), /* has_ttl */ false, ExpirationRange(), "compaction/GC", &blob_file_, &writer_); if (!s.ok()) { @@ -251,8 +252,9 @@ bool BlobIndexCompactionFilterBase::WriteBlobToNewFile( assert(writer_); uint64_t new_key_offset = 0; - const Status s = writer_->AddRecord(key, blob, kNoExpiration, &new_key_offset, - new_blob_offset); + const WriteOptions write_options(Env::IOActivity::kCompaction); + const Status s = writer_->AddRecord(write_options, key, blob, kNoExpiration, + &new_key_offset, new_blob_offset); if (!s.ok()) { const BlobDBImpl* const blob_db_impl = context_.blob_db_impl; @@ -302,7 +304,8 @@ bool BlobIndexCompactionFilterBase::CloseAndRegisterNewBlobFile() const { { WriteLock wl(&blob_db_impl->mutex_); - s = blob_db_impl->CloseBlobFile(blob_file_); + s = blob_db_impl->CloseBlobFile(WriteOptions(Env::IOActivity::kCompaction), + blob_file_); // Note: we delay registering the new blob file until it's closed to // prevent FIFO eviction from processing it during compaction/GC. diff --git a/utilities/blob_db/blob_db.h b/utilities/blob_db/blob_db.h index e2f0b7bdbdd..59242a645a1 100644 --- a/utilities/blob_db/blob_db.h +++ b/utilities/blob_db/blob_db.h @@ -248,7 +248,7 @@ class BlobDB : public StackableDB { virtual BlobDBOptions GetBlobDBOptions() const = 0; - virtual Status SyncBlobFiles() = 0; + virtual Status SyncBlobFiles(const WriteOptions& write_options) = 0; virtual ~BlobDB() {} diff --git a/utilities/blob_db/blob_db_impl.cc b/utilities/blob_db/blob_db_impl.cc index 03470113680..c086ca4063f 100644 --- a/utilities/blob_db/blob_db_impl.cc +++ b/utilities/blob_db/blob_db_impl.cc @@ -23,6 +23,7 @@ #include "logging/logging.h" #include "monitoring/instrumented_mutex.h" #include "monitoring/statistics_impl.h" +#include "monitoring/thread_status_util.h" #include "rocksdb/convenience.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" @@ -106,6 +107,15 @@ BlobDBImpl::~BlobDBImpl() { } Status BlobDBImpl::Close() { + ThreadStatus::OperationType cur_op_type = + ThreadStatusUtil::GetThreadOperation(); + ThreadStatusUtil::SetThreadOperation(ThreadStatus::OperationType::OP_UNKNOWN); + Status s = CloseImpl(); + ThreadStatusUtil::SetThreadOperation(cur_op_type); + return s; +} + +Status BlobDBImpl::CloseImpl() { if (closed_) { return Status::OK(); } @@ -123,7 +133,8 @@ Status BlobDBImpl::Close() { return s; } - s = SyncBlobFiles(); + // TODO: plumb Env::IOActivity, Env::IOPriority + s = SyncBlobFiles(WriteOptions()); return s; } @@ -277,7 +288,7 @@ Status BlobDBImpl::Open(std::vector* handles) { return s; } - UpdateLiveSSTSize(); + UpdateLiveSSTSize(WriteOptions(Env::IOActivity::kDBOpen)); // Start background jobs. if (!bdb_options_.disable_background_tasks) { @@ -824,8 +835,9 @@ Status BlobDBImpl::CheckOrCreateWriterLocked( } Status BlobDBImpl::CreateBlobFileAndWriter( - bool has_ttl, const ExpirationRange& expiration_range, - const std::string& reason, std::shared_ptr* blob_file, + const WriteOptions& write_options, bool has_ttl, + const ExpirationRange& expiration_range, const std::string& reason, + std::shared_ptr* blob_file, std::shared_ptr* writer) { TEST_SYNC_POINT("BlobDBImpl::CreateBlobFileAndWriter"); assert(has_ttl == (expiration_range.first || expiration_range.second)); @@ -846,7 +858,7 @@ Status BlobDBImpl::CreateBlobFileAndWriter( assert(*writer); - s = (*writer)->WriteHeader((*blob_file)->header_); + s = (*writer)->WriteHeader(write_options, (*blob_file)->header_); if (!s.ok()) { ROCKS_LOG_ERROR(db_options_.info_log, "Failed to write header to new blob file: %s" @@ -861,7 +873,8 @@ Status BlobDBImpl::CreateBlobFileAndWriter( return s; } -Status BlobDBImpl::SelectBlobFile(std::shared_ptr* blob_file) { +Status BlobDBImpl::SelectBlobFile(const WriteOptions& write_options, + std::shared_ptr* blob_file) { assert(blob_file); { @@ -885,6 +898,7 @@ Status BlobDBImpl::SelectBlobFile(std::shared_ptr* blob_file) { std::shared_ptr writer; const Status s = CreateBlobFileAndWriter( + write_options, /* has_ttl */ false, ExpirationRange(), /* reason */ "SelectBlobFile", blob_file, &writer); if (!s.ok()) { @@ -897,7 +911,8 @@ Status BlobDBImpl::SelectBlobFile(std::shared_ptr* blob_file) { return s; } -Status BlobDBImpl::SelectBlobFileTTL(uint64_t expiration, +Status BlobDBImpl::SelectBlobFileTTL(const WriteOptions& write_options, + uint64_t expiration, std::shared_ptr* blob_file) { assert(blob_file); assert(expiration != kNoExpiration); @@ -930,9 +945,9 @@ Status BlobDBImpl::SelectBlobFileTTL(uint64_t expiration, oss << "SelectBlobFileTTL range: [" << exp_low << ',' << exp_high << ')'; std::shared_ptr writer; - const Status s = - CreateBlobFileAndWriter(/* has_ttl */ true, expiration_range, - /* reason */ oss.str(), blob_file, &writer); + const Status s = CreateBlobFileAndWriter( + write_options, /* has_ttl */ true, expiration_range, + /* reason */ oss.str(), blob_file, &writer); if (!s.ok()) { return s; } @@ -1055,7 +1070,7 @@ Status BlobDBImpl::PutUntil(const WriteOptions& options, const Slice& key, return s; } -Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/, +Status BlobDBImpl::PutBlobValue(const WriteOptions& write_options, const Slice& key, const Slice& value, uint64_t expiration, WriteBatch* batch) { write_mutex_.AssertHeld(); @@ -1087,30 +1102,30 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/, // Check DB size limit before selecting blob file to // Since CheckSizeAndEvictBlobFiles() can close blob files, it needs to be // done before calling SelectBlobFile(). - s = CheckSizeAndEvictBlobFiles(headerbuf.size() + key.size() + - value_compressed.size()); + s = CheckSizeAndEvictBlobFiles( + write_options, headerbuf.size() + key.size() + value_compressed.size()); if (!s.ok()) { return s; } std::shared_ptr blob_file; if (expiration != kNoExpiration) { - s = SelectBlobFileTTL(expiration, &blob_file); + s = SelectBlobFileTTL(write_options, expiration, &blob_file); } else { - s = SelectBlobFile(&blob_file); + s = SelectBlobFile(write_options, &blob_file); } if (s.ok()) { assert(blob_file != nullptr); assert(blob_file->GetCompressionType() == bdb_options_.compression); - s = AppendBlob(blob_file, headerbuf, key, value_compressed, expiration, - &index_entry); + s = AppendBlob(write_options, blob_file, headerbuf, key, value_compressed, + expiration, &index_entry); } if (s.ok()) { if (expiration != kNoExpiration) { WriteLock file_lock(&blob_file->mutex_); blob_file->ExtendExpirationRange(expiration); } - s = CloseBlobFileIfNeeded(blob_file); + s = CloseBlobFileIfNeeded(write_options, blob_file); } if (s.ok()) { s = WriteBatchInternal::PutBlobIndex(batch, column_family_id, key, @@ -1249,7 +1264,7 @@ void BlobDBImpl::GetCompactionContext(BlobCompactionContext* context, } } -void BlobDBImpl::UpdateLiveSSTSize() { +void BlobDBImpl::UpdateLiveSSTSize(const WriteOptions& write_options) { uint64_t live_sst_size = 0; bool ok = GetIntProperty(DB::Properties::kLiveSstFilesSize, &live_sst_size); if (ok) { @@ -1265,7 +1280,7 @@ void BlobDBImpl::UpdateLiveSSTSize() { { // Trigger FIFO eviction if needed. MutexLock l(&write_mutex_); - Status s = CheckSizeAndEvictBlobFiles(0, true /*force*/); + Status s = CheckSizeAndEvictBlobFiles(write_options, 0, true /*force*/); if (s.IsNoSpace()) { ROCKS_LOG_WARN(db_options_.info_log, "DB grow out-of-space after SST size updated. Current live" @@ -1276,7 +1291,8 @@ void BlobDBImpl::UpdateLiveSSTSize() { } } -Status BlobDBImpl::CheckSizeAndEvictBlobFiles(uint64_t blob_size, +Status BlobDBImpl::CheckSizeAndEvictBlobFiles(const WriteOptions& write_options, + uint64_t blob_size, bool force_evict) { write_mutex_.AssertHeld(); @@ -1316,7 +1332,7 @@ Status BlobDBImpl::CheckSizeAndEvictBlobFiles(uint64_t blob_size, } // FIFO eviction can evict open blob files. if (!blob_file->Immutable()) { - Status s = CloseBlobFile(blob_file); + Status s = CloseBlobFile(write_options, blob_file); if (!s.ok()) { return s; } @@ -1347,7 +1363,8 @@ Status BlobDBImpl::CheckSizeAndEvictBlobFiles(uint64_t blob_size, return Status::OK(); } -Status BlobDBImpl::AppendBlob(const std::shared_ptr& bfile, +Status BlobDBImpl::AppendBlob(const WriteOptions& write_options, + const std::shared_ptr& bfile, const std::string& headerbuf, const Slice& key, const Slice& value, uint64_t expiration, std::string* index_entry) { @@ -1363,8 +1380,8 @@ Status BlobDBImpl::AppendBlob(const std::shared_ptr& bfile, } // write the blob to the blob log. - s = writer->EmitPhysicalRecord(headerbuf, key, value, &key_offset, - &blob_offset); + s = writer->EmitPhysicalRecord(write_options, headerbuf, key, value, + &key_offset, &blob_offset); } if (!s.ok()) { @@ -1767,7 +1784,8 @@ std::pair BlobDBImpl::SanityCheck(bool aborted) { return std::make_pair(true, -1); } -Status BlobDBImpl::CloseBlobFile(std::shared_ptr bfile) { +Status BlobDBImpl::CloseBlobFile(const WriteOptions& write_options, + std::shared_ptr bfile) { TEST_SYNC_POINT("BlobDBImpl::CloseBlobFile"); assert(bfile); assert(!bfile->Immutable()); @@ -1783,7 +1801,7 @@ Status BlobDBImpl::CloseBlobFile(std::shared_ptr bfile) { const SequenceNumber sequence = GetLatestSequenceNumber(); - const Status s = bfile->WriteFooterAndCloseLocked(sequence); + const Status s = bfile->WriteFooterAndCloseLocked(write_options, sequence); if (s.ok()) { total_blob_size_ += BlobLogFooter::kSize; @@ -1815,7 +1833,8 @@ Status BlobDBImpl::CloseBlobFile(std::shared_ptr bfile) { return s; } -Status BlobDBImpl::CloseBlobFileIfNeeded(std::shared_ptr& bfile) { +Status BlobDBImpl::CloseBlobFileIfNeeded(const WriteOptions& write_options, + std::shared_ptr& bfile) { write_mutex_.AssertHeld(); // atomic read @@ -1831,7 +1850,7 @@ Status BlobDBImpl::CloseBlobFileIfNeeded(std::shared_ptr& bfile) { return Status::OK(); } - return CloseBlobFile(bfile); + return CloseBlobFile(write_options, bfile); } void BlobDBImpl::ObsoleteBlobFile(std::shared_ptr blob_file, @@ -1921,7 +1940,8 @@ std::pair BlobDBImpl::EvictExpiredFiles(bool aborted) { } if (!blob_file->Immutable()) { - CloseBlobFile(blob_file).PermitUncheckedError(); + // TODO: plumb Env::IOActivity, Env::IOPriority + CloseBlobFile(WriteOptions(), blob_file).PermitUncheckedError(); } assert(blob_file->Immutable()); @@ -1933,7 +1953,7 @@ std::pair BlobDBImpl::EvictExpiredFiles(bool aborted) { return std::make_pair(true, -1); } -Status BlobDBImpl::SyncBlobFiles() { +Status BlobDBImpl::SyncBlobFiles(const WriteOptions& write_options) { MutexLock l(&write_mutex_); std::vector> process_files; @@ -1949,7 +1969,7 @@ Status BlobDBImpl::SyncBlobFiles() { Status s; for (auto& blob_file : process_files) { - s = blob_file->Fsync(); + s = blob_file->Fsync(write_options); if (!s.ok()) { ROCKS_LOG_ERROR(db_options_.info_log, "Failed to sync blob file %" PRIu64 ", status: %s", @@ -2195,7 +2215,7 @@ Status BlobDBImpl::TEST_CloseBlobFile(std::shared_ptr& bfile) { WriteLock lock(&mutex_); WriteLock file_lock(&bfile->mutex_); - return CloseBlobFile(bfile); + return CloseBlobFile(WriteOptions(), bfile); } void BlobDBImpl::TEST_ObsoleteBlobFile(std::shared_ptr& blob_file, diff --git a/utilities/blob_db/blob_db_impl.h b/utilities/blob_db/blob_db_impl.h index 2d0afc639d7..54497a6e42c 100644 --- a/utilities/blob_db/blob_db_impl.h +++ b/utilities/blob_db/blob_db_impl.h @@ -167,7 +167,7 @@ class BlobDBImpl : public BlobDB { Status Open(std::vector* handles); - Status SyncBlobFiles() override; + Status SyncBlobFiles(const WriteOptions& write_options) override; // Common part of the two GetCompactionContext methods below. // REQUIRES: read lock on mutex_ @@ -245,11 +245,13 @@ class BlobDBImpl : public BlobDB { // to a single thread (like in the case of new files written during // compaction/GC), the locks on write_mutex_ and the blob file's mutex_ can be // avoided. - Status CloseBlobFile(std::shared_ptr bfile); + Status CloseBlobFile(const WriteOptions& write_options, + std::shared_ptr bfile); // Close a file if its size exceeds blob_file_size // REQUIRES: lock held on write_mutex_. - Status CloseBlobFileIfNeeded(std::shared_ptr& bfile); + Status CloseBlobFileIfNeeded(const WriteOptions& write_options, + std::shared_ptr& bfile); // Mark file as obsolete and move the file to obsolete file list. // @@ -261,13 +263,15 @@ class BlobDBImpl : public BlobDB { const Slice& value, uint64_t expiration, WriteBatch* batch); - Status AppendBlob(const std::shared_ptr& bfile, + Status AppendBlob(const WriteOptions& write_options, + const std::shared_ptr& bfile, const std::string& headerbuf, const Slice& key, const Slice& value, uint64_t expiration, std::string* index_entry); // Create a new blob file and associated writer. - Status CreateBlobFileAndWriter(bool has_ttl, + Status CreateBlobFileAndWriter(const WriteOptions& write_options, + bool has_ttl, const ExpirationRange& expiration_range, const std::string& reason, std::shared_ptr* blob_file, @@ -275,11 +279,13 @@ class BlobDBImpl : public BlobDB { // Get the open non-TTL blob log file, or create a new one if no such file // exists. - Status SelectBlobFile(std::shared_ptr* blob_file); + Status SelectBlobFile(const WriteOptions& write_options, + std::shared_ptr* blob_file); // Get the open TTL blob log file for a certain expiration, or create a new // one if no such file exists. - Status SelectBlobFileTTL(uint64_t expiration, + Status SelectBlobFileTTL(const WriteOptions& write_options, + uint64_t expiration, std::shared_ptr* blob_file); std::shared_ptr FindBlobFileLocked(uint64_t expiration) const; @@ -363,7 +369,7 @@ class BlobDBImpl : public BlobDB { void MarkUnreferencedBlobFilesObsolete(); void MarkUnreferencedBlobFilesObsoleteDuringOpen(); - void UpdateLiveSSTSize(); + void UpdateLiveSSTSize(const WriteOptions& write_options); Status GetBlobFileReader(const std::shared_ptr& blob_file, std::shared_ptr* reader); @@ -394,9 +400,12 @@ class BlobDBImpl : public BlobDB { // If is_fifo = true, FIFO eviction will be triggered to make room for the // new blob. If force_evict = true, FIFO eviction will evict blob files // even eviction will not make enough room for the new blob. - Status CheckSizeAndEvictBlobFiles(uint64_t blob_size, + Status CheckSizeAndEvictBlobFiles(const WriteOptions& write_options, + uint64_t blob_size, bool force_evict = false); + Status CloseImpl(); + // name of the database directory std::string dbname_; diff --git a/utilities/blob_db/blob_db_listener.h b/utilities/blob_db/blob_db_listener.h index c95740c50e7..0759b68114d 100644 --- a/utilities/blob_db/blob_db_listener.h +++ b/utilities/blob_db/blob_db_listener.h @@ -22,18 +22,20 @@ class BlobDBListener : public EventListener { void OnFlushBegin(DB* /*db*/, const FlushJobInfo& /*info*/) override { assert(blob_db_impl_ != nullptr); - blob_db_impl_->SyncBlobFiles().PermitUncheckedError(); + blob_db_impl_->SyncBlobFiles(WriteOptions(Env::IOActivity::kFlush)) + .PermitUncheckedError(); } void OnFlushCompleted(DB* /*db*/, const FlushJobInfo& /*info*/) override { assert(blob_db_impl_ != nullptr); - blob_db_impl_->UpdateLiveSSTSize(); + blob_db_impl_->UpdateLiveSSTSize(WriteOptions(Env::IOActivity::kFlush)); } void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& /*info*/) override { assert(blob_db_impl_ != nullptr); - blob_db_impl_->UpdateLiveSSTSize(); + blob_db_impl_->UpdateLiveSSTSize( + WriteOptions(Env::IOActivity::kCompaction)); } const char* Name() const override { return kClassName(); } diff --git a/utilities/blob_db/blob_file.cc b/utilities/blob_db/blob_file.cc index 5b31d569732..c4c0556fb1d 100644 --- a/utilities/blob_db/blob_file.cc +++ b/utilities/blob_db/blob_file.cc @@ -78,7 +78,8 @@ void BlobFile::MarkObsolete(SequenceNumber sequence) { obsolete_.store(true); } -Status BlobFile::WriteFooterAndCloseLocked(SequenceNumber sequence) { +Status BlobFile::WriteFooterAndCloseLocked(const WriteOptions& write_options, + SequenceNumber sequence) { BlobLogFooter footer; footer.blob_count = blob_count_; if (HasTTL()) { @@ -86,7 +87,8 @@ Status BlobFile::WriteFooterAndCloseLocked(SequenceNumber sequence) { } // this will close the file and reset the Writable File Pointer. - Status s = log_writer_->AppendFooter(footer, /* checksum_method */ nullptr, + Status s = log_writer_->AppendFooter(write_options, footer, + /* checksum_method */ nullptr, /* checksum_value */ nullptr); if (s.ok()) { closed_ = true; @@ -137,10 +139,10 @@ Status BlobFile::SetFromFooterLocked(const BlobLogFooter& footer) { return Status::OK(); } -Status BlobFile::Fsync() { +Status BlobFile::Fsync(const WriteOptions& write_options) { Status s; if (log_writer_.get()) { - s = log_writer_->Sync(); + s = log_writer_->Sync(write_options); } return s; } diff --git a/utilities/blob_db/blob_file.h b/utilities/blob_db/blob_file.h index 8651c6b6728..f0ec83ebe8a 100644 --- a/utilities/blob_db/blob_file.h +++ b/utilities/blob_db/blob_file.h @@ -180,7 +180,7 @@ class BlobFile { return obsolete_sequence_; } - Status Fsync(); + Status Fsync(const WriteOptions& write_options); uint64_t GetFileSize() const { return file_size_.load(std::memory_order_acquire); @@ -218,7 +218,8 @@ class BlobFile { private: Status ReadFooter(BlobLogFooter* footer); - Status WriteFooterAndCloseLocked(SequenceNumber sequence); + Status WriteFooterAndCloseLocked(const WriteOptions& write_options, + SequenceNumber sequence); void CloseRandomAccessLocked(); diff --git a/utilities/cache_dump_load_impl.h b/utilities/cache_dump_load_impl.h index 59cabbf3b68..5411521bf77 100644 --- a/utilities/cache_dump_load_impl.h +++ b/utilities/cache_dump_load_impl.h @@ -162,11 +162,13 @@ class ToFileCacheDumpWriter : public CacheDumpWriter { assert(file_writer_ != nullptr); std::string prefix; PutFixed32(&prefix, static_cast(metadata.size())); - IOStatus io_s = file_writer_->Append(Slice(prefix)); + // TODO: plumb Env::IOActivity, Env::IOPriority + const IOOptions opts; + IOStatus io_s = file_writer_->Append(opts, Slice(prefix)); if (!io_s.ok()) { return io_s; } - io_s = file_writer_->Append(metadata); + io_s = file_writer_->Append(opts, metadata); return io_s; } @@ -175,11 +177,13 @@ class ToFileCacheDumpWriter : public CacheDumpWriter { assert(file_writer_ != nullptr); std::string prefix; PutFixed32(&prefix, static_cast(data.size())); - IOStatus io_s = file_writer_->Append(Slice(prefix)); + // TODO: plumb Env::IOActivity, Env::IOPriority + const IOOptions opts; + IOStatus io_s = file_writer_->Append(opts, Slice(prefix)); if (!io_s.ok()) { return io_s; } - io_s = file_writer_->Append(data); + io_s = file_writer_->Append(opts, data); return io_s; } diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 8db8be45f7c..03917112a25 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -917,9 +917,10 @@ IOStatus FaultInjectionTestFS::DeleteFilesCreatedAfterLastDirSync( return io_s; } } else { + IOOptions opts; IOStatus io_s = WriteStringToFile(target(), file_pair.second, - pair.first + "/" + file_pair.first, true); + pair.first + "/" + file_pair.first, true, opts); if (!io_s.ok()) { return io_s; } diff --git a/utilities/options/options_util_test.cc b/utilities/options/options_util_test.cc index fd9affb0d91..2d08c3dd06d 100644 --- a/utilities/options/options_util_test.cc +++ b/utilities/options/options_util_test.cc @@ -57,8 +57,8 @@ TEST_F(OptionsUtilTest, SaveAndLoad) { } const std::string kFileName = "OPTIONS-123456"; - ASSERT_OK(PersistRocksDBOptions(db_opt, cf_names, cf_opts, kFileName, - env_->GetFileSystem().get())); + ASSERT_OK(PersistRocksDBOptions(WriteOptions(), db_opt, cf_names, cf_opts, + kFileName, env_->GetFileSystem().get())); DBOptions loaded_db_opt; std::vector loaded_cf_descs; @@ -125,8 +125,8 @@ TEST_F(OptionsUtilTest, SaveAndLoadWithCacheCheck) { cf_names.push_back("cf_plain_table_sample"); // Saving DB in file const std::string kFileName = "OPTIONS-LOAD_CACHE_123456"; - ASSERT_OK(PersistRocksDBOptions(db_opt, cf_names, cf_opts, kFileName, - env_->GetFileSystem().get())); + ASSERT_OK(PersistRocksDBOptions(WriteOptions(), db_opt, cf_names, cf_opts, + kFileName, env_->GetFileSystem().get())); DBOptions loaded_db_opt; std::vector loaded_cf_descs; @@ -758,8 +758,8 @@ TEST_F(OptionsUtilTest, WalDirInOptins) { options.wal_dir = dbname_; std::string options_file; ASSERT_OK(GetLatestOptionsFileName(dbname_, options.env, &options_file)); - ASSERT_OK(PersistRocksDBOptions(options, {"default"}, {options}, - dbname_ + "/" + options_file, + ASSERT_OK(PersistRocksDBOptions(WriteOptions(), options, {"default"}, + {options}, dbname_ + "/" + options_file, options.env->GetFileSystem().get())); ASSERT_OK(LoadLatestOptions(ignore_opts, dbname_, &db_opts, &cf_descs)); ASSERT_EQ(db_opts.wal_dir, dbname_); @@ -779,4 +779,3 @@ int main(int argc, char** argv) { #endif // GFLAGS return RUN_ALL_TESTS(); } - diff --git a/utilities/simulator_cache/sim_cache.cc b/utilities/simulator_cache/sim_cache.cc index d58c3b34f16..1fbdc0762e8 100644 --- a/utilities/simulator_cache/sim_cache.cc +++ b/utilities/simulator_cache/sim_cache.cc @@ -73,7 +73,8 @@ class CacheActivityLogger { oss << "LOOKUP - " << key.ToString(true) << std::endl; MutexLock l(&mutex_); - Status s = file_writer_->Append(oss.str()); + // TODO: plumb Env::IOActivity, Env::IOPriority + Status s = file_writer_->Append(IOOptions(), oss.str()); if (!s.ok() && bg_status_.ok()) { bg_status_ = s; } @@ -93,7 +94,8 @@ class CacheActivityLogger { // line format: "ADD - - " oss << "ADD - " << key.ToString(true) << " - " << size << std::endl; MutexLock l(&mutex_); - Status s = file_writer_->Append(oss.str()); + // TODO: plumb Env::IOActivity, Env::IOPriority + Status s = file_writer_->Append(IOOptions(), oss.str()); if (!s.ok() && bg_status_.ok()) { bg_status_ = s; } @@ -126,7 +128,8 @@ class CacheActivityLogger { } activity_logging_enabled_.store(false); - Status s = file_writer_->Close(); + // TODO: plumb Env::IOActivity, Env::IOPriority + Status s = file_writer_->Close(IOOptions()); if (!s.ok() && bg_status_.ok()) { bg_status_ = s; } diff --git a/utilities/trace/file_trace_reader_writer.cc b/utilities/trace/file_trace_reader_writer.cc index f2ca741442b..9247f2808cb 100644 --- a/utilities/trace/file_trace_reader_writer.cc +++ b/utilities/trace/file_trace_reader_writer.cc @@ -96,7 +96,8 @@ Status FileTraceWriter::Close() { } Status FileTraceWriter::Write(const Slice& data) { - return file_writer_->Append(data); + // TODO: plumb Env::IOActivity, Env::IOPriority + return file_writer_->Append(IOOptions(), data); } uint64_t FileTraceWriter::GetFileSize() { return file_writer_->GetFileSize(); } diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 4552835aae8..c0a3388121c 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -6370,7 +6370,8 @@ TEST_P(TransactionTest, DoubleCrashInRecovery) { file_content[400] = 'h'; file_content[401] = 'a'; ASSERT_OK(env->DeleteFile(fname)); - ASSERT_OK(WriteStringToFile(env.get(), file_content, fname, true)); + ASSERT_OK(WriteStringToFile(env.get(), file_content, fname, true, + Env::IOActivity::kUnknown)); // Recover from corruption std::vector handles;