Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: parse timestamp from the name of data dir for gc instead of the last update time #1667

Merged
merged 15 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
b637df4
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Oct 27, 2023
464dc5e
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Oct 27, 2023
94a5dfe
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Oct 27, 2023
1e28344
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Oct 31, 2023
9dc66d2
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 1, 2023
b38a237
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 1, 2023
cae375b
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 1, 2023
e2309cd
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 1, 2023
d26babb
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 2, 2023
aa5e062
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 2, 2023
3c0dff2
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 2, 2023
69d8aa4
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 2, 2023
48221f5
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 3, 2023
c3d0fea
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 6, 2023
f3f9abb
fix: parse timestamp from the name of data dir for gc instead of the …
empiredan Nov 6, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 167 additions & 33 deletions src/replica/disk_cleaner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

#include "disk_cleaner.h"

#include <boost/algorithm/string/predicate.hpp>
#include <fmt/core.h>
#include <stdint.h>
#include <sys/types.h>
#include <algorithm>
#include <atomic>
#include <cctype>

#include "common/fs_manager.h"
#include "metadata_types.h"
Expand All @@ -32,6 +34,9 @@
#include "utils/filesystem.h"
#include "utils/flags.h"
#include "utils/fmt_logging.h"
#include "utils/macros.h"
#include "utils/string_conv.h"
#include "utils/string_view.h"

namespace dsn {
namespace replication {
Expand Down Expand Up @@ -70,6 +75,108 @@ const std::string kFolderSuffixBak = ".bak";
const std::string kFolderSuffixOri = ".ori";
const std::string kFolderSuffixTmp = ".tmp";

namespace {

// TODO(wangdan): we could study later whether ctime (i.e. `st_ctime` within `struct stat`,
// the time of last status change) could be used instead of mtime (i.e. `st_ctime` within
// `struct stat`, the last write time), since ctime of the new directory would be updated
// to the current time once rename() is called, while mtime would not be updated.
bool get_expiration_timestamp_by_last_write_time(const std::string &path,
uint64_t delay_seconds,
uint64_t &expiration_timestamp_s)
{
time_t last_write_time_s;
if (!dsn::utils::filesystem::last_write_time(path, last_write_time_s)) {
LOG_WARNING("gc_disk: failed to get last write time of {}", path);
return false;
}

expiration_timestamp_s = static_cast<uint64_t>(last_write_time_s) + delay_seconds;
return true;
}

// Unix timestamp in microseconds for 2010-01-01 00:00:00.
acelyc111 marked this conversation as resolved.
Show resolved Hide resolved
// This timestamp could be used as the minimum, since it's far earlier than the time when
// Pegasus was born.
#define MIN_TIMESTAMP_US 1262275200000000
#define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1)

// Parse timestamp from the directory name.
//
// There are only 2 kinds of directory names that could include timestamp: one is the faulty
// replicas whose name has suffix ".err"; another is the dropped replicas whose name has
// suffix ".gar". The examples for both kinds of directory names:
// 1.1.pegasus.1698843209235962.err
// 1.2.pegasus.1698843214240709.gar
//
// Specify the size of suffix by `suffix_size`. For both kinds of names (.err and .gar),
// `suffix_size` is 4.
//
// The timestamp is the number the number just before the suffix, between the 2 dots. For
acelyc111 marked this conversation as resolved.
Show resolved Hide resolved
// example, in 1.1.pegasus.1698843209235962.err, 1698843209235962 is the timestamp, in
// microseconds, generated by dsn_now_us().
//
// `timestamp_us` is parsed result while returning true; otherwise, it would never be assigned.
bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &timestamp_us)
{
CHECK_GE(name.size(), suffix_size);

if (suffix_size == name.size()) {
return false;
}

const size_t end_idx = name.size() - suffix_size;
auto begin_idx = name.find_last_of('.', end_idx - 1);
if (begin_idx == std::string::npos || ++begin_idx >= end_idx) {
return false;
}

const auto length = end_idx - begin_idx;
if (length < MIN_TIMESTAMP_US_LENGTH) {
return false;
}

// std::isdigit() is not an addressable standard library function, thus it can't be used
// directly as an algorithm predicate.
//
// See following docs for details.
// https://stackoverflow.com/questions/75868796/differences-between-isdigit-and-stdisdigit
// https://en.cppreference.com/w/cpp/string/byte/isdigit
const auto begin_itr = name.cbegin() + begin_idx;
if (!std::all_of(
begin_itr, begin_itr + length, [](unsigned char c) { return std::isdigit(c); })) {
return false;
}

const auto ok =
dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, length), timestamp_us);
return ok ? timestamp_us > MIN_TIMESTAMP_US : false;
}

bool get_expiration_timestamp(const std::string &name,
const std::string &path,
size_t suffix_size,
uint64_t delay_seconds,
uint64_t &expiration_timestamp_s)
{
uint64_t timestamp_us = 0;
if (!parse_timestamp_us(name, suffix_size, timestamp_us)) {
// Once the timestamp could not be extracted from the directory name, the last write time
// would be used as the base time to compute the expiration time.
LOG_WARNING("gc_disk: failed to parse timestamp from {}, turn to "
"the last write time for {}",
name,
path);
return get_expiration_timestamp_by_last_write_time(
path, delay_seconds, expiration_timestamp_s);
}

expiration_timestamp_s = timestamp_us / 1000000 + delay_seconds;
return true;
}

} // anonymous namespace

error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &dir_nodes,
/*output*/ disk_cleaning_report &report)
{
Expand All @@ -87,59 +194,85 @@ error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &d
}
sub_list.insert(sub_list.end(), tmp_list.begin(), tmp_list.end());
}
for (auto &fpath : sub_list) {
auto name = dsn::utils::filesystem::get_file_name(fpath);
if (!is_data_dir_removable(name)) {
continue;
}
std::string folder_suffix = name.substr(name.length() - 4);

time_t mt;
if (!dsn::utils::filesystem::last_write_time(fpath, mt)) {
LOG_WARNING("gc_disk: failed to get last write time of {}", fpath);
continue;
}

auto last_write_time = (uint64_t)mt;
uint64_t current_time_ms = dsn_now_ms();
uint64_t remove_interval_seconds = current_time_ms / 1000;
for (const auto &path : sub_list) {
uint64_t expiration_timestamp_s = 0;

// don't delete ".bak" directory because it is backed by administrator.
if (folder_suffix == kFolderSuffixErr) {
// Note: don't delete ".bak" directory since it could be did by administrator.
const auto name = dsn::utils::filesystem::get_file_name(path);
if (boost::algorithm::ends_with(name, kFolderSuffixErr)) {
report.error_replica_count++;
remove_interval_seconds = FLAGS_gc_disk_error_replica_interval_seconds;
} else if (folder_suffix == kFolderSuffixGar) {
if (!get_expiration_timestamp(name,
path,
kFolderSuffixErr.size(),
FLAGS_gc_disk_error_replica_interval_seconds,
expiration_timestamp_s)) {
continue;
}
} else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) {
report.garbage_replica_count++;
remove_interval_seconds = FLAGS_gc_disk_garbage_replica_interval_seconds;
} else if (folder_suffix == kFolderSuffixTmp) {
if (!get_expiration_timestamp(name,
path,
kFolderSuffixGar.size(),
FLAGS_gc_disk_garbage_replica_interval_seconds,
expiration_timestamp_s)) {
continue;
}
} else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) {
report.disk_migrate_tmp_count++;
remove_interval_seconds = FLAGS_gc_disk_migration_tmp_replica_interval_seconds;
} else if (folder_suffix == kFolderSuffixOri) {
if (!get_expiration_timestamp_by_last_write_time(
path,
FLAGS_gc_disk_migration_tmp_replica_interval_seconds,
expiration_timestamp_s)) {
continue;
}
} else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) {
report.disk_migrate_origin_count++;
remove_interval_seconds = FLAGS_gc_disk_migration_origin_replica_interval_seconds;
if (!get_expiration_timestamp_by_last_write_time(
path,
FLAGS_gc_disk_migration_origin_replica_interval_seconds,
expiration_timestamp_s)) {
continue;
}
} else {
continue;
}

if (last_write_time + remove_interval_seconds <= current_time_ms / 1000) {
if (!dsn::utils::filesystem::remove_path(fpath)) {
LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}",
fpath,
dsn_now_ms() - current_time_ms);
} else {
const auto current_time_ms = dsn_now_ms();
if (expiration_timestamp_s <= current_time_ms / 1000) {
if (dsn::utils::filesystem::remove_path(path)) {
LOG_WARNING("gc_disk: replica_dir_op succeed to delete directory '{}'"
", time_used_ms = {}",
fpath,
path,
dsn_now_ms() - current_time_ms);
report.remove_dir_count++;
} else {
LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}",
path,
dsn_now_ms() - current_time_ms);
}
} else {
LOG_INFO("gc_disk: reserve directory '{}', wait_seconds = {}",
fpath,
last_write_time + remove_interval_seconds - current_time_ms / 1000);
path,
expiration_timestamp_s - current_time_ms / 1000);
}
}
return error_s::ok();
}

bool is_data_dir_removable(const std::string &dir)
acelyc111 marked this conversation as resolved.
Show resolved Hide resolved
{
return boost::algorithm::ends_with(dir, kFolderSuffixErr) ||
boost::algorithm::ends_with(dir, kFolderSuffixGar) ||
boost::algorithm::ends_with(dir, kFolderSuffixTmp) ||
boost::algorithm::ends_with(dir, kFolderSuffixOri);
}

bool is_data_dir_invalid(const std::string &dir)
{
return is_data_dir_removable(dir) || boost::algorithm::ends_with(dir, kFolderSuffixBak);
}

void move_to_err_path(const std::string &path, const std::string &log_prefix)
{
const std::string new_path = fmt::format("{}.{}{}", path, dsn_now_us(), kFolderSuffixErr);
Expand All @@ -150,5 +283,6 @@ void move_to_err_path(const std::string &path, const std::string &log_prefix)
new_path);
LOG_WARNING("{}: succeed to move directory from '{}' to '{}'", log_prefix, path, new_path);
}

} // namespace replication
} // namespace dsn
23 changes: 5 additions & 18 deletions src/replica/disk_cleaner.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,26 +55,13 @@ struct disk_cleaning_report
extern error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &dir_nodes,
/*output*/ disk_cleaning_report &report);

inline bool is_data_dir_removable(const std::string &dir)
{
if (dir.length() < 4) {
return false;
}
const std::string folder_suffix = dir.substr(dir.length() - 4);
return (folder_suffix == kFolderSuffixErr || folder_suffix == kFolderSuffixGar ||
folder_suffix == kFolderSuffixTmp || folder_suffix == kFolderSuffixOri);
}
bool is_data_dir_removable(const std::string &dir);

// Note: ".bak" is invalid but not allow delete, because it can be backed by administrator.
inline bool is_data_dir_invalid(const std::string &dir)
{
if (dir.length() < 4) {
return false;
}
const std::string folder_suffix = dir.substr(dir.length() - 4);
return is_data_dir_removable(dir) || folder_suffix == kFolderSuffixBak;
}
// Note: ".bak" is invalid but not allowed to be deleted, because it could be did by
// administrator on purpose.
bool is_data_dir_invalid(const std::string &dir);

void move_to_err_path(const std::string &path, const std::string &log_prefix);

} // namespace replication
} // namespace dsn
2 changes: 1 addition & 1 deletion src/replica/replica_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1754,7 +1754,7 @@ void replica_stub::on_gc_replica(replica_stub_ptr this_, gpid id)
CHECK(
dsn::utils::filesystem::directory_exists(replica_path), "dir({}) not exist", replica_path);
LOG_INFO("start to move replica({}) as garbage, path: {}", id, replica_path);
const auto rename_path = fmt::format("{}.{}.gar", replica_path, dsn_now_us());
const auto rename_path = fmt::format("{}.{}{}", replica_path, dsn_now_us(), kFolderSuffixGar);
if (!dsn::utils::filesystem::rename_path(replica_path, rename_path)) {
LOG_WARNING("gc_replica: failed to move directory '{}' to '{}'", replica_path, rename_path);

Expand Down
Loading
Loading