Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
hx235 committed Aug 15, 2024
1 parent 21da4ba commit 2b50e35
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 14 deletions.
5 changes: 5 additions & 0 deletions db/db_impl/db_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2943,6 +2943,11 @@ DBOptions SanitizeOptions(const std::string& db, const DBOptions& src,
bool read_only = false,
Status* logger_creation_s = nullptr);

#ifdef OS_LINUX
size_t GetCompactionReadaheadSizeSystemLimit(
const std::vector<DbPath>& db_paths);
#endif // OS_LINUX

CompressionType GetCompressionFlush(const ImmutableCFOptions& ioptions,
const MutableCFOptions& mutable_cf_options);

Expand Down
38 changes: 38 additions & 0 deletions db/db_impl/db_impl_open.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "db/error_handler.h"
#include "db/periodic_task_scheduler.h"
#include "env/composite_env_wrapper.h"
#include "env/io_posix.h"
#include "file/filename.h"
#include "file/read_write_util.h"
#include "file/sst_file_manager_impl.h"
Expand Down Expand Up @@ -144,6 +145,22 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src,
result.wal_dir = result.wal_dir.substr(0, result.wal_dir.size() - 1);
}

#ifdef OS_LINUX
if (result.compaction_readahead_size > 0) {
size_t system_limit =
GetCompactionReadaheadSizeSystemLimit(result.db_paths);
if (system_limit > 0 && result.compaction_readahead_size > system_limit) {
result.compaction_readahead_size = system_limit;
std::stringstream msg;
msg << "Compaction readahead size is set to no more than the POSIX "
"system limit (i.e, max_sectors_kb * 1024) "
": "
<< result.compaction_readahead_size;
ROCKS_LOG_INFO(result.info_log, "%s", msg.str().c_str());
}
}
#endif // OS_LINUX

// Force flush on DB open if 2PC is enabled, since with 2PC we have no
// guarantee that consecutive log files have consecutive sequence id, which
// make recovery complicated.
Expand Down Expand Up @@ -200,6 +217,27 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src,
return result;
}

#ifdef OS_LINUX
size_t GetCompactionReadaheadSizeSystemLimit(
const std::vector<DbPath>& db_paths) {
Status s;
size_t max_sectors_kb = 0;

for (const auto& db_path : db_paths) {
size_t dir_max_sectors_kb = 0;
s = PosixHelper::GetMaxSectorsKBOfDirectory(db_path.path,
&dir_max_sectors_kb);
if (!s.ok()) {
break;
}
max_sectors_kb = (max_sectors_kb == 0)
? dir_max_sectors_kb
: std::min(max_sectors_kb, dir_max_sectors_kb);
}
return max_sectors_kb * 1024;
}
#endif // OS_LINUX

namespace {
Status ValidateOptionsByTable(
const DBOptions& db_opts,
Expand Down
72 changes: 58 additions & 14 deletions env/io_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ IOStatus PosixSequentialFile::InvalidateCache(size_t offset, size_t length) {
* PosixRandomAccessFile
*/
#if defined(OS_LINUX)
const std::string PosixHelper::kLogicalBlockSizeFileName = "logical_block_size";
const std::string PosixHelper::kMaxSectorsKBFileName = "max_sectors_kb";

size_t PosixHelper::GetUniqueIdFromFile(int fd, char* id, size_t max_size) {
if (max_size < kMaxVarint64Length * 3) {
return 0;
Expand Down Expand Up @@ -455,38 +458,62 @@ size_t LogicalBlockSizeCache::GetLogicalBlockSize(const std::string& fname,

Status PosixHelper::GetLogicalBlockSizeOfDirectory(const std::string& directory,
size_t* size) {
return GetQueueSysfsFileValueofDirectory(directory, kLogicalBlockSizeFileName,
size);
}

Status PosixHelper::GetMaxSectorsKBOfDirectory(const std::string& directory,
size_t* kb) {
return GetQueueSysfsFileValueofDirectory(directory, kMaxSectorsKBFileName,
kb);
}

Status PosixHelper::GetQueueSysfsFileValueofDirectory(
const std::string& directory, const std::string& file_name, size_t* value) {
int fd = open(directory.c_str(), O_DIRECTORY | O_RDONLY);
if (fd == -1) {
return Status::IOError("Cannot open directory " + directory);
}
*size = PosixHelper::GetLogicalBlockSizeOfFd(fd);
if (file_name == PosixHelper::kLogicalBlockSizeFileName) {
*value = PosixHelper::GetLogicalBlockSizeOfFd(fd);
} else if (file_name == PosixHelper::kMaxSectorsKBFileName) {
*value = PosixHelper::GetMaxSectorsKBOfFd(fd);
} else {
assert(false);
}
close(fd);
return Status::OK();
}

size_t PosixHelper::GetLogicalBlockSizeOfFd(int fd) {
size_t PosixHelper::GetQueueSysfsFileValueOfFd(int fd,
const std::string& file_name,
size_t default_return_value) {
#ifdef OS_LINUX
struct stat buf;
int result = fstat(fd, &buf);
if (result == -1) {
return kDefaultPageSize;
return default_return_value;
}

// Get device number
if (major(buf.st_dev) == 0) {
// Unnamed devices (e.g. non-device mounts), reserved as null device number.
// These don't have an entry in /sys/dev/block/. Return a sensible default.
return kDefaultPageSize;
return default_return_value;
}

// Reading queue/logical_block_size does not require special permissions.
// Get device path
const int kBufferSize = 100;
char path[kBufferSize];
char real_path[PATH_MAX + 1];
snprintf(path, kBufferSize, "/sys/dev/block/%u:%u", major(buf.st_dev),
minor(buf.st_dev));
if (realpath(path, real_path) == nullptr) {
return kDefaultPageSize;
return default_return_value;
}
std::string device_dir(real_path);

// Get the queue sysfs file path
if (!device_dir.empty() && device_dir.back() == '/') {
device_dir.pop_back();
}
Expand All @@ -500,11 +527,11 @@ size_t PosixHelper::GetLogicalBlockSizeOfFd(int fd) {
// ../../devices/pci0000:17/0000:17:00.0/0000:18:00.0/nvme/nvme0/nvme0n1/nvme0n1p1
size_t parent_end = device_dir.rfind('/', device_dir.length() - 1);
if (parent_end == std::string::npos) {
return kDefaultPageSize;
return default_return_value;
}
size_t parent_begin = device_dir.rfind('/', parent_end - 1);
if (parent_begin == std::string::npos) {
return kDefaultPageSize;
return default_return_value;
}
std::string parent =
device_dir.substr(parent_begin + 1, parent_end - parent_begin - 1);
Expand All @@ -513,25 +540,42 @@ size_t PosixHelper::GetLogicalBlockSizeOfFd(int fd) {
(child.compare(0, 4, "nvme") || child.find('p') != std::string::npos)) {
device_dir = device_dir.substr(0, parent_end);
}
std::string fname = device_dir + "/queue/logical_block_size";
std::string fname = device_dir + "/queue/" + file_name;

// Get value in the queue sysfs file
FILE* fp;
size_t size = 0;
size_t value = 0;
fp = fopen(fname.c_str(), "r");
if (fp != nullptr) {
char* line = nullptr;
size_t len = 0;
if (getline(&line, &len, fp) != -1) {
sscanf(line, "%zu", &size);
sscanf(line, "%zu", &value);
}
free(line);
fclose(fp);
}
if (size != 0 && (size & (size - 1)) == 0) {
return size;
if (file_name == kLogicalBlockSizeFileName && value != 0 &&
(value & (value - 1)) == 0) {
return value;
} else if (file_name == kMaxSectorsKBFileName && value != 0) {
return value;
} else {
assert(false);
}
#endif
(void)fd;
return kDefaultPageSize;
return default_return_value;
}

size_t PosixHelper::GetLogicalBlockSizeOfFd(int fd) {
return GetQueueSysfsFileValueOfFd(fd, kLogicalBlockSizeFileName,
kDefaultPageSize);
}

size_t PosixHelper::GetMaxSectorsKBOfFd(int fd) {
return GetQueueSysfsFileValueOfFd(fd, kMaxSectorsKBFileName,
kDefaultMaxSectorsKB);
}

/*
Expand Down
18 changes: 18 additions & 0 deletions env/io_posix.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,24 @@ class PosixHelper {
static size_t GetLogicalBlockSizeOfFd(int fd);
static Status GetLogicalBlockSizeOfDirectory(const std::string& directory,
size_t* size);
static size_t GetMaxSectorsKBOfFd(int fd);
static Status GetMaxSectorsKBOfDirectory(const std::string& directory,
size_t* kb);

private:
static const std::string kLogicalBlockSizeFileName;
static const std::string kMaxSectorsKBFileName;
static const size_t kDefaultMaxSectorsKB = 2 * 1024;

// Similar to `GetQueueSysfsFileValueFd()` but for directory
static Status GetQueueSysfsFileValueofDirectory(const std::string& directory,
const std::string& file_name,
size_t* value);
// Return the value in the specified file `file_name` under
// `/sys/block/xxx/queue/` for the device where the file of `fd` is on.
// If not found, then return the specified `default_return_value`
static size_t GetQueueSysfsFileValueOfFd(int fd, const std::string& file_name,
size_t default_return_value);
};

/*
Expand Down

0 comments on commit 2b50e35

Please sign in to comment.