Skip to content

Commit c3d0fea

Browse files
committed
fix: parse timestamp from the name of data dir for gc instead of the last update time
1 parent 48221f5 commit c3d0fea

File tree

2 files changed

+73
-34
lines changed

2 files changed

+73
-34
lines changed

src/replica/disk_cleaner.cpp

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,21 @@ const std::string kFolderSuffixTmp = ".tmp";
7777

7878
namespace {
7979

80-
bool get_expiration_seconds_by_last_write_time(const std::string &path,
81-
uint64_t delay_seconds,
82-
uint64_t &expiration_seconds)
80+
// TODO(wangdan): we could study later whether ctime (i.e. `st_ctime` within `struct stat`,
81+
// the time of last status change) could be used instead of mtime (i.e. `st_ctime` within
82+
// `struct stat`, the last write time), since ctime of the new directory would be updated
83+
// to the current time once rename() is called, while mtime would not be updated.
84+
bool get_expiration_timestamp_by_last_write_time(const std::string &path,
85+
uint64_t delay_seconds,
86+
uint64_t &expiration_timestamp_s)
8387
{
84-
time_t last_write_seconds;
85-
if (!dsn::utils::filesystem::last_write_time(path, last_write_seconds)) {
88+
time_t last_write_time_s;
89+
if (!dsn::utils::filesystem::last_write_time(path, last_write_time_s)) {
8690
LOG_WARNING("gc_disk: failed to get last write time of {}", path);
8791
return false;
8892
}
8993

90-
expiration_seconds = static_cast<uint64_t>(last_write_seconds) + delay_seconds;
94+
expiration_timestamp_s = static_cast<uint64_t>(last_write_time_s) + delay_seconds;
9195
return true;
9296
}
9397

@@ -97,12 +101,24 @@ bool get_expiration_seconds_by_last_write_time(const std::string &path,
97101
#define MIN_TIMESTAMP_US 1262275200000000
98102
#define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1)
99103

104+
// Parse timestamp from the directory name.
105+
//
106+
// There are only 2 kinds of directory names that could include timestamp: one is the faulty
107+
// replicas whose name has suffix ".err"; another is the dropped replicas whose name has
108+
// suffix ".gar". The examples for both kinds of directory names:
109+
// 1.1.pegasus.1698843209235962.err
110+
// 1.2.pegasus.1698843214240709.gar
111+
//
112+
// Specify the size of suffix by `suffix_size`. For both kinds of names (.err and .gar),
113+
// `suffix_size` is 4.
114+
//
115+
// The timestamp is the number the number just before the suffix, between the 2 dots. For
116+
// example, in 1.1.pegasus.1698843209235962.err, 1698843209235962 is the timestamp, in
117+
// microseconds, generated by dsn_now_us().
118+
//
119+
// `timestamp_us` is parsed result while returning true; otherwise, it would never be assigned.
100120
bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &timestamp_us)
101121
{
102-
// Examples of the directory names of faulty or dropped replicas could be:
103-
// 1.1.pegasus.1698843209235962.err
104-
// 2.1.pegasus.1698843214240709.gar
105-
106122
CHECK_GE(name.size(), suffix_size);
107123

108124
if (suffix_size == name.size()) {
@@ -137,11 +153,11 @@ bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &t
137153
return ok ? timestamp_us > MIN_TIMESTAMP_US : false;
138154
}
139155

140-
bool get_expiration_seconds_by_timestamp(const std::string &name,
141-
const std::string &path,
142-
size_t suffix_size,
143-
uint64_t delay_seconds,
144-
uint64_t &expiration_seconds)
156+
bool get_expiration_timestamp(const std::string &name,
157+
const std::string &path,
158+
size_t suffix_size,
159+
uint64_t delay_seconds,
160+
uint64_t &expiration_timestamp_s)
145161
{
146162
uint64_t timestamp_us = 0;
147163
if (!parse_timestamp_us(name, suffix_size, timestamp_us)) {
@@ -151,10 +167,11 @@ bool get_expiration_seconds_by_timestamp(const std::string &name,
151167
"the last write time for {}",
152168
name,
153169
path);
154-
return get_expiration_seconds_by_last_write_time(path, delay_seconds, expiration_seconds);
170+
return get_expiration_timestamp_by_last_write_time(
171+
path, delay_seconds, expiration_timestamp_s);
155172
}
156173

157-
expiration_seconds = timestamp_us / 1000000 + delay_seconds;
174+
expiration_timestamp_s = timestamp_us / 1000000 + delay_seconds;
158175
return true;
159176
}
160177

@@ -179,50 +196,50 @@ error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &d
179196
}
180197

181198
for (const auto &path : sub_list) {
182-
uint64_t expiration_seconds = 0;
199+
uint64_t expiration_timestamp_s = 0;
183200

184201
// Note: don't delete ".bak" directory since it could be did by administrator.
185202
const auto name = dsn::utils::filesystem::get_file_name(path);
186203
if (boost::algorithm::ends_with(name, kFolderSuffixErr)) {
187204
report.error_replica_count++;
188-
if (!get_expiration_seconds_by_timestamp(name,
189-
path,
190-
kFolderSuffixErr.size(),
191-
FLAGS_gc_disk_error_replica_interval_seconds,
192-
expiration_seconds)) {
205+
if (!get_expiration_timestamp(name,
206+
path,
207+
kFolderSuffixErr.size(),
208+
FLAGS_gc_disk_error_replica_interval_seconds,
209+
expiration_timestamp_s)) {
193210
continue;
194211
}
195212
} else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) {
196213
report.garbage_replica_count++;
197-
if (!get_expiration_seconds_by_timestamp(name,
198-
path,
199-
kFolderSuffixGar.size(),
200-
FLAGS_gc_disk_garbage_replica_interval_seconds,
201-
expiration_seconds)) {
214+
if (!get_expiration_timestamp(name,
215+
path,
216+
kFolderSuffixGar.size(),
217+
FLAGS_gc_disk_garbage_replica_interval_seconds,
218+
expiration_timestamp_s)) {
202219
continue;
203220
}
204221
} else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) {
205222
report.disk_migrate_tmp_count++;
206-
if (!get_expiration_seconds_by_last_write_time(
223+
if (!get_expiration_timestamp_by_last_write_time(
207224
path,
208225
FLAGS_gc_disk_migration_tmp_replica_interval_seconds,
209-
expiration_seconds)) {
226+
expiration_timestamp_s)) {
210227
continue;
211228
}
212229
} else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) {
213230
report.disk_migrate_origin_count++;
214-
if (!get_expiration_seconds_by_last_write_time(
231+
if (!get_expiration_timestamp_by_last_write_time(
215232
path,
216233
FLAGS_gc_disk_migration_origin_replica_interval_seconds,
217-
expiration_seconds)) {
234+
expiration_timestamp_s)) {
218235
continue;
219236
}
220237
} else {
221238
continue;
222239
}
223240

224241
const auto current_time_ms = dsn_now_ms();
225-
if (expiration_seconds <= current_time_ms / 1000) {
242+
if (expiration_timestamp_s <= current_time_ms / 1000) {
226243
if (dsn::utils::filesystem::remove_path(path)) {
227244
LOG_WARNING("gc_disk: replica_dir_op succeed to delete directory '{}'"
228245
", time_used_ms = {}",
@@ -237,7 +254,7 @@ error_s disk_remove_useless_dirs(const std::vector<std::shared_ptr<dir_node>> &d
237254
} else {
238255
LOG_INFO("gc_disk: reserve directory '{}', wait_seconds = {}",
239256
path,
240-
expiration_seconds - current_time_ms / 1000);
257+
expiration_timestamp_s - current_time_ms / 1000);
241258
}
242259
}
243260
return error_s::ok();

src/replica/test/replica_disk_test.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,28 @@ TEST_P(replica_disk_test, on_query_disk_info_one_app)
200200
}
201201
}
202202

203+
TEST_P(replica_disk_test, check_data_dir_removable)
204+
{
205+
struct test_case
206+
{
207+
std::string path;
208+
bool expected_removable;
209+
bool expected_invalid;
210+
} tests[] = {{"./replica.0.err", true, true},
211+
{"./replica.1.gar", true, true},
212+
{"./replica.2.tmp", true, true},
213+
{"./replica.3.ori", true, true},
214+
{"./replica.4.bak", false, true},
215+
{"./replica.5.abcde", false, false},
216+
{"./replica.6.x", false, false},
217+
{"./replica.7.8", false, false}};
218+
219+
for (const auto &test : tests) {
220+
EXPECT_EQ(test.expected_removable, is_data_dir_removable(test.path));
221+
EXPECT_EQ(test.expected_invalid, is_data_dir_invalid(test.path));
222+
}
223+
}
224+
203225
TEST_P(replica_disk_test, gc_disk_useless_dir)
204226
{
205227
PRESERVE_FLAG(gc_disk_error_replica_interval_seconds);

0 commit comments

Comments
 (0)