Skip to content

Commit ee369d3

Browse files
authored
fix: parse timestamp from the name of data dir for gc instead of the last update time (#1667)
#1673 It's found that sometimes the data directories of replicas are removed immediately after they are renamed with postfixes `.err/.gar`, though actually both of `gc_disk_error_replica_interval_seconds` and `gc_disk_garbage_replica_interval_seconds` have been configured with at least one day. The reason is that the base time for expiration time is **the last write time**, that is, `st_mtime` within `struct stat` returned by `stat()`. Once a long time has passed since the last write time, the data directory will be removed immediately after it is renamed with postfixes `.err/.gar`. To fix this problem, just use the timestamp within the directory name as the base time that is generated when the data directory is renamed with postfixes `.err/.gar`. The last update time would be used iff the timestamp is NOT found within the directory name.
1 parent 724ec0b commit ee369d3

File tree

7 files changed

+275
-75
lines changed

7 files changed

+275
-75
lines changed

src/replica/disk_cleaner.cpp

Lines changed: 167 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919

2020
#include "disk_cleaner.h"
2121

22+
#include <boost/algorithm/string/predicate.hpp>
2223
#include <fmt/core.h>
2324
#include <stdint.h>
2425
#include <sys/types.h>
2526
#include <algorithm>
2627
#include <atomic>
28+
#include <cctype>
2729

2830
#include "common/fs_manager.h"
2931
#include "metadata_types.h"
@@ -32,6 +34,9 @@
3234
#include "utils/filesystem.h"
3335
#include "utils/flags.h"
3436
#include "utils/fmt_logging.h"
37+
#include "utils/macros.h"
38+
#include "utils/string_conv.h"
39+
#include "utils/string_view.h"
3540

3641
namespace dsn {
3742
namespace replication {
@@ -70,6 +75,108 @@ const std::string kFolderSuffixBak = ".bak";
7075
const std::string kFolderSuffixOri = ".ori";
7176
const std::string kFolderSuffixTmp = ".tmp";
7277

78+
namespace {
79+
80+
// TODO(wangdan): we could study later whether ctime (i.e. `st_ctime` within `struct stat`,
81+
// the time of last status change) could be used instead of mtime (i.e. `st_ctime` within
82+
// `struct stat`, the last write time), since ctime of the new directory would be updated
83+
// to the current time once rename() is called, while mtime would not be updated.
84+
bool get_expiration_timestamp_by_last_write_time(const std::string &path,
85+
uint64_t delay_seconds,
86+
uint64_t &expiration_timestamp_s)
87+
{
88+
time_t last_write_time_s;
89+
if (!dsn::utils::filesystem::last_write_time(path, last_write_time_s)) {
90+
LOG_WARNING("gc_disk: failed to get last write time of {}", path);
91+
return false;
92+
}
93+
94+
expiration_timestamp_s = static_cast<uint64_t>(last_write_time_s) + delay_seconds;
95+
return true;
96+
}
97+
98+
// Unix timestamp in microseconds for 2010-01-01 00:00:00 GMT+0000.
99+
// This timestamp could be used as the minimum, since it's far earlier than the time when
100+
// Pegasus was born.
101+
#define MIN_TIMESTAMP_US 1262304000000000
102+
#define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1)
103+
104+
// Parse timestamp from the directory name.
105+
//
106+
// There are only 2 kinds of directory names that could include timestamp: one is the faulty
107+
// replicas whose name has suffix ".err"; another is the dropped replicas whose name has
108+
// suffix ".gar". The examples for both kinds of directory names:
109+
// 1.1.pegasus.1698843209235962.err
110+
// 1.2.pegasus.1698843214240709.gar
111+
//
112+
// Specify the size of suffix by `suffix_size`. For both kinds of names (.err and .gar),
113+
// `suffix_size` is 4.
114+
//
115+
// The timestamp is the number just before the suffix, between the 2 dots. For example, in
116+
// 1.1.pegasus.1698843209235962.err, 1698843209235962 is the timestamp in microseconds,
117+
// generated by dsn_now_us().
118+
//
119+
// `timestamp_us` is parsed result while returning true; otherwise, it would never be assigned.
120+
bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &timestamp_us)
121+
{
122+
CHECK_GE(name.size(), suffix_size);
123+
124+
if (suffix_size == name.size()) {
125+
return false;
126+
}
127+
128+
const size_t end_idx = name.size() - suffix_size;
129+
auto begin_idx = name.find_last_of('.', end_idx - 1);
130+
if (begin_idx == std::string::npos || ++begin_idx >= end_idx) {
131+
return false;
132+
}
133+
134+
const auto length = end_idx - begin_idx;
135+
if (length < MIN_TIMESTAMP_US_LENGTH) {
136+
return false;
137+
}
138+
139+
// std::isdigit() is not an addressable standard library function, thus it can't be used
140+
// directly as an algorithm predicate.
141+
//
142+
// See following docs for details.
143+
// https://stackoverflow.com/questions/75868796/differences-between-isdigit-and-stdisdigit
144+
// https://en.cppreference.com/w/cpp/string/byte/isdigit
145+
const auto begin_itr = name.cbegin() + begin_idx;
146+
if (!std::all_of(
147+
begin_itr, begin_itr + length, [](unsigned char c) { return std::isdigit(c); })) {
148+
return false;
149+
}
150+
151+
const auto ok =
152+
dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, length), timestamp_us);
153+
return ok ? timestamp_us > MIN_TIMESTAMP_US : false;
154+
}
155+
156+
bool get_expiration_timestamp(const std::string &name,
157+
const std::string &path,
158+
size_t suffix_size,
159+
uint64_t delay_seconds,
160+
uint64_t &expiration_timestamp_s)
161+
{
162+
uint64_t timestamp_us = 0;
163+
if (!parse_timestamp_us(name, suffix_size, timestamp_us)) {
164+
// Once the timestamp could not be extracted from the directory name, the last write time
165+
// would be used as the base time to compute the expiration time.
166+
LOG_WARNING("gc_disk: failed to parse timestamp from {}, turn to "
167+
"the last write time for {}",
168+
name,
169+
path);
170+
return get_expiration_timestamp_by_last_write_time(
171+
path, delay_seconds, expiration_timestamp_s);
172+
}
173+
174+
expiration_timestamp_s = timestamp_us / 1000000 + delay_seconds;
175+
return true;
176+
}
177+
178+
} // anonymous namespace
179+
73180
error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &dir_nodes,
74181
/*output*/ disk_cleaning_report &report)
75182
{
@@ -87,59 +194,85 @@ error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &d
87194
}
88195
sub_list.insert(sub_list.end(), tmp_list.begin(), tmp_list.end());
89196
}
90-
for (auto &fpath : sub_list) {
91-
auto name = dsn::utils::filesystem::get_file_name(fpath);
92-
if (!is_data_dir_removable(name)) {
93-
continue;
94-
}
95-
std::string folder_suffix = name.substr(name.length() - 4);
96-
97-
time_t mt;
98-
if (!dsn::utils::filesystem::last_write_time(fpath, mt)) {
99-
LOG_WARNING("gc_disk: failed to get last write time of {}", fpath);
100-
continue;
101-
}
102197

103-
auto last_write_time = (uint64_t)mt;
104-
uint64_t current_time_ms = dsn_now_ms();
105-
uint64_t remove_interval_seconds = current_time_ms / 1000;
198+
for (const auto &path : sub_list) {
199+
uint64_t expiration_timestamp_s = 0;
106200

107-
// don't delete ".bak" directory because it is backed by administrator.
108-
if (folder_suffix == kFolderSuffixErr) {
201+
// Note: don't delete ".bak" directory since it could be did by administrator.
202+
const auto name = dsn::utils::filesystem::get_file_name(path);
203+
if (boost::algorithm::ends_with(name, kFolderSuffixErr)) {
109204
report.error_replica_count++;
110-
remove_interval_seconds = FLAGS_gc_disk_error_replica_interval_seconds;
111-
} else if (folder_suffix == kFolderSuffixGar) {
205+
if (!get_expiration_timestamp(name,
206+
path,
207+
kFolderSuffixErr.size(),
208+
FLAGS_gc_disk_error_replica_interval_seconds,
209+
expiration_timestamp_s)) {
210+
continue;
211+
}
212+
} else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) {
112213
report.garbage_replica_count++;
113-
remove_interval_seconds = FLAGS_gc_disk_garbage_replica_interval_seconds;
114-
} else if (folder_suffix == kFolderSuffixTmp) {
214+
if (!get_expiration_timestamp(name,
215+
path,
216+
kFolderSuffixGar.size(),
217+
FLAGS_gc_disk_garbage_replica_interval_seconds,
218+
expiration_timestamp_s)) {
219+
continue;
220+
}
221+
} else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) {
115222
report.disk_migrate_tmp_count++;
116-
remove_interval_seconds = FLAGS_gc_disk_migration_tmp_replica_interval_seconds;
117-
} else if (folder_suffix == kFolderSuffixOri) {
223+
if (!get_expiration_timestamp_by_last_write_time(
224+
path,
225+
FLAGS_gc_disk_migration_tmp_replica_interval_seconds,
226+
expiration_timestamp_s)) {
227+
continue;
228+
}
229+
} else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) {
118230
report.disk_migrate_origin_count++;
119-
remove_interval_seconds = FLAGS_gc_disk_migration_origin_replica_interval_seconds;
231+
if (!get_expiration_timestamp_by_last_write_time(
232+
path,
233+
FLAGS_gc_disk_migration_origin_replica_interval_seconds,
234+
expiration_timestamp_s)) {
235+
continue;
236+
}
237+
} else {
238+
continue;
120239
}
121240

122-
if (last_write_time + remove_interval_seconds <= current_time_ms / 1000) {
123-
if (!dsn::utils::filesystem::remove_path(fpath)) {
124-
LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}",
125-
fpath,
126-
dsn_now_ms() - current_time_ms);
127-
} else {
241+
const auto current_time_ms = dsn_now_ms();
242+
if (expiration_timestamp_s <= current_time_ms / 1000) {
243+
if (dsn::utils::filesystem::remove_path(path)) {
128244
LOG_WARNING("gc_disk: replica_dir_op succeed to delete directory '{}'"
129245
", time_used_ms = {}",
130-
fpath,
246+
path,
131247
dsn_now_ms() - current_time_ms);
132248
report.remove_dir_count++;
249+
} else {
250+
LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}",
251+
path,
252+
dsn_now_ms() - current_time_ms);
133253
}
134254
} else {
135255
LOG_INFO("gc_disk: reserve directory '{}', wait_seconds = {}",
136-
fpath,
137-
last_write_time + remove_interval_seconds - current_time_ms / 1000);
256+
path,
257+
expiration_timestamp_s - current_time_ms / 1000);
138258
}
139259
}
140260
return error_s::ok();
141261
}
142262

263+
bool is_data_dir_removable(const std::string &dir)
264+
{
265+
return boost::algorithm::ends_with(dir, kFolderSuffixErr) ||
266+
boost::algorithm::ends_with(dir, kFolderSuffixGar) ||
267+
boost::algorithm::ends_with(dir, kFolderSuffixTmp) ||
268+
boost::algorithm::ends_with(dir, kFolderSuffixOri);
269+
}
270+
271+
bool is_data_dir_invalid(const std::string &dir)
272+
{
273+
return is_data_dir_removable(dir) || boost::algorithm::ends_with(dir, kFolderSuffixBak);
274+
}
275+
143276
void move_to_err_path(const std::string &path, const std::string &log_prefix)
144277
{
145278
const std::string new_path = fmt::format("{}.{}{}", path, dsn_now_us(), kFolderSuffixErr);
@@ -150,5 +283,6 @@ void move_to_err_path(const std::string &path, const std::string &log_prefix)
150283
new_path);
151284
LOG_WARNING("{}: succeed to move directory from '{}' to '{}'", log_prefix, path, new_path);
152285
}
286+
153287
} // namespace replication
154288
} // namespace dsn

src/replica/disk_cleaner.h

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -55,26 +55,13 @@ struct disk_cleaning_report
5555
extern error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &dir_nodes,
5656
/*output*/ disk_cleaning_report &report);
5757

58-
inline bool is_data_dir_removable(const std::string &dir)
59-
{
60-
if (dir.length() < 4) {
61-
return false;
62-
}
63-
const std::string folder_suffix = dir.substr(dir.length() - 4);
64-
return (folder_suffix == kFolderSuffixErr || folder_suffix == kFolderSuffixGar ||
65-
folder_suffix == kFolderSuffixTmp || folder_suffix == kFolderSuffixOri);
66-
}
58+
bool is_data_dir_removable(const std::string &dir);
6759

68-
// Note: ".bak" is invalid but not allow delete, because it can be backed by administrator.
69-
inline bool is_data_dir_invalid(const std::string &dir)
70-
{
71-
if (dir.length() < 4) {
72-
return false;
73-
}
74-
const std::string folder_suffix = dir.substr(dir.length() - 4);
75-
return is_data_dir_removable(dir) || folder_suffix == kFolderSuffixBak;
76-
}
60+
// Note: ".bak" is invalid but not allowed to be deleted, because it could be did by
61+
// administrator on purpose.
62+
bool is_data_dir_invalid(const std::string &dir);
7763

7864
void move_to_err_path(const std::string &path, const std::string &log_prefix);
65+
7966
} // namespace replication
8067
} // namespace dsn

src/replica/replica_stub.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1754,7 +1754,7 @@ void replica_stub::on_gc_replica(replica_stub_ptr this_, gpid id)
17541754
CHECK(
17551755
dsn::utils::filesystem::directory_exists(replica_path), "dir({}) not exist", replica_path);
17561756
LOG_INFO("start to move replica({}) as garbage, path: {}", id, replica_path);
1757-
const auto rename_path = fmt::format("{}.{}.gar", replica_path, dsn_now_us());
1757+
const auto rename_path = fmt::format("{}.{}{}", replica_path, dsn_now_us(), kFolderSuffixGar);
17581758
if (!dsn::utils::filesystem::rename_path(replica_path, rename_path)) {
17591759
LOG_WARNING("gc_replica: failed to move directory '{}' to '{}'", replica_path, rename_path);
17601760

0 commit comments

Comments
 (0)