From b637df404db47381e87b054a7683ff6542466b2e Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 27 Oct 2023 17:24:01 +0800 Subject: [PATCH 01/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 47 +++++++++++++++++++++++++++++++----- src/replica/disk_cleaner.h | 23 ++++-------------- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index 9488c32a69..876d7460ef 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -19,11 +19,13 @@ #include "disk_cleaner.h" +#include #include #include #include #include #include +#include #include "common/fs_manager.h" #include "metadata_types.h" @@ -70,6 +72,27 @@ const std::string kFolderSuffixBak = ".bak"; const std::string kFolderSuffixOri = ".ori"; const std::string kFolderSuffixTmp = ".tmp"; +namespace { + +bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t ×tamp_us) +{ + CHECK_GE(name.size(), suffix_size); + + if (suffix_size == name.size()) { + return false; + } + + const size_t end_idx = name.size() - suffix_size; + auto begin_idx = name.find_last_of('.', end_idx - 1); + if (begin_idx == std::string::npos || ++begin_idx >= end_idx) { + return false; + } + + return dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, end_idx - begin_idx), timestamp_us); +} + +} // anonymous namespace + error_s disk_remove_useless_dirs(const std::vector> &dir_nodes, /*output*/ disk_cleaning_report &report) { @@ -87,12 +110,12 @@ error_s disk_remove_useless_dirs(const std::vector> &d } sub_list.insert(sub_list.end(), tmp_list.begin(), tmp_list.end()); } - for (auto &fpath : sub_list) { + + for (const auto &fpath : sub_list) { auto name = dsn::utils::filesystem::get_file_name(fpath); if (!is_data_dir_removable(name)) { continue; } - std::string folder_suffix = name.substr(name.length() - 4); time_t mt; if (!dsn::utils::filesystem::last_write_time(fpath, mt)) { @@ -105,16 +128,16 @@ error_s disk_remove_useless_dirs(const std::vector> &d uint64_t remove_interval_seconds = current_time_ms / 1000; // don't delete ".bak" directory because it is backed by administrator. - if (folder_suffix == kFolderSuffixErr) { + if (boost::algorithm::ends_with(name, kFolderSuffixErr)) { report.error_replica_count++; remove_interval_seconds = FLAGS_gc_disk_error_replica_interval_seconds; - } else if (folder_suffix == kFolderSuffixGar) { + } else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) { report.garbage_replica_count++; remove_interval_seconds = FLAGS_gc_disk_garbage_replica_interval_seconds; - } else if (folder_suffix == kFolderSuffixTmp) { + } else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) { report.disk_migrate_tmp_count++; remove_interval_seconds = FLAGS_gc_disk_migration_tmp_replica_interval_seconds; - } else if (folder_suffix == kFolderSuffixOri) { + } else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) { report.disk_migrate_origin_count++; remove_interval_seconds = FLAGS_gc_disk_migration_origin_replica_interval_seconds; } @@ -140,6 +163,17 @@ error_s disk_remove_useless_dirs(const std::vector> &d return error_s::ok(); } +bool is_data_dir_removable(const std::string &dir) +{ + return (boost::algorithm::ends_with(dir, kFolderSuffixErr) || boost::algorithm::ends_with(dir, kFolderSuffixGar) || + boost::algorithm::ends_with(dir, kFolderSuffixTmp) || boost::algorithm::ends_with(dir, kFolderSuffixOri); +} + +bool is_data_dir_invalid(const std::string &dir) +{ + return is_data_dir_removable(dir) || boost::algorithm::ends_with(dir, kFolderSuffixBak); +} + void move_to_err_path(const std::string &path, const std::string &log_prefix) { const std::string new_path = fmt::format("{}.{}{}", path, dsn_now_us(), kFolderSuffixErr); @@ -150,5 +184,6 @@ void move_to_err_path(const std::string &path, const std::string &log_prefix) new_path); LOG_WARNING("{}: succeed to move directory from '{}' to '{}'", log_prefix, path, new_path); } + } // namespace replication } // namespace dsn diff --git a/src/replica/disk_cleaner.h b/src/replica/disk_cleaner.h index 7961d084cf..9e2edb7523 100644 --- a/src/replica/disk_cleaner.h +++ b/src/replica/disk_cleaner.h @@ -55,26 +55,13 @@ struct disk_cleaning_report extern error_s disk_remove_useless_dirs(const std::vector> &dir_nodes, /*output*/ disk_cleaning_report &report); -inline bool is_data_dir_removable(const std::string &dir) -{ - if (dir.length() < 4) { - return false; - } - const std::string folder_suffix = dir.substr(dir.length() - 4); - return (folder_suffix == kFolderSuffixErr || folder_suffix == kFolderSuffixGar || - folder_suffix == kFolderSuffixTmp || folder_suffix == kFolderSuffixOri); -} +bool is_data_dir_removable(const std::string &dir); -// Note: ".bak" is invalid but not allow delete, because it can be backed by administrator. -inline bool is_data_dir_invalid(const std::string &dir) -{ - if (dir.length() < 4) { - return false; - } - const std::string folder_suffix = dir.substr(dir.length() - 4); - return is_data_dir_removable(dir) || folder_suffix == kFolderSuffixBak; -} +// Note: ".bak" is invalid but not allowed to be deleted, because it could be did by +// administrator on purpose. +bool is_data_dir_invalid(const std::string &dir); void move_to_err_path(const std::string &path, const std::string &log_prefix); + } // namespace replication } // namespace dsn From 464dc5e5a85159e613c66231a11614d588dfe0f2 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 27 Oct 2023 17:26:35 +0800 Subject: [PATCH 02/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index 876d7460ef..af93ff94f8 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -34,6 +34,7 @@ #include "utils/filesystem.h" #include "utils/flags.h" #include "utils/fmt_logging.h" +#include "utils/string_conv.h" namespace dsn { namespace replication { From 94a5dfea13bc4d3847d90be2ceaa171c9c9d6aba Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 27 Oct 2023 21:48:52 +0800 Subject: [PATCH 03/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 66 +++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index af93ff94f8..3044fce49d 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -92,6 +92,29 @@ bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &t return dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, end_idx - begin_idx), timestamp_us); } +bool get_expiration_seconds_by_timestamp(const std::string &name, size_t suffix_size, uint64_t delay_seconds, uint64_t &expiration_seconds) +{ + uint64_t timestamp_us = 0; + if (!parse_timestamp_us(name, suffix_size, timestamp_us)) { + return false; + } + + expiration_seconds = timestamp_us / 1000000 + delay_seconds; + return true; +} + +bool get_expiration_seconds_by_last_write_time(const std::string &path, uint64_t delay_seconds, uint64_t &expiration_seconds) +{ + time_t last_write_seconds; + if (!dsn::utils::filesystem::last_write_time(path, last_write_seconds)) { + LOG_WARNING("gc_disk: failed to get last write time of {}", path); + return false; + } + + expiration_second = static_cast(last_write_seconds) + delay_seconds; + return true; +} + } // anonymous namespace error_s disk_remove_useless_dirs(const std::vector> &dir_nodes, @@ -118,47 +141,50 @@ error_s disk_remove_useless_dirs(const std::vector> &d continue; } - time_t mt; - if (!dsn::utils::filesystem::last_write_time(fpath, mt)) { - LOG_WARNING("gc_disk: failed to get last write time of {}", fpath); - continue; - } - - auto last_write_time = (uint64_t)mt; - uint64_t current_time_ms = dsn_now_ms(); - uint64_t remove_interval_seconds = current_time_ms / 1000; + uint64_t expiration_seconds = 0; // don't delete ".bak" directory because it is backed by administrator. if (boost::algorithm::ends_with(name, kFolderSuffixErr)) { report.error_replica_count++; - remove_interval_seconds = FLAGS_gc_disk_error_replica_interval_seconds; + if (!get_expiration_seconds_by_timestamp(name, kFolderSuffixErr.size(),FLAGS_gc_disk_error_replica_interval_seconds, expiration_seconds)) { + continue; + } } else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) { report.garbage_replica_count++; - remove_interval_seconds = FLAGS_gc_disk_garbage_replica_interval_seconds; + if (get_expiration_seconds_by_timestamp(name, kFolderSuffixGar.size(), FLAGS_gc_disk_garbage_replica_interval_seconds, expiration_seconds)) { + continue; + } } else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) { report.disk_migrate_tmp_count++; - remove_interval_seconds = FLAGS_gc_disk_migration_tmp_replica_interval_seconds; + if (!get_expiration_seconds_by_last_write_time(fpath, FLAGS_gc_disk_migration_tmp_replica_interval_seconds, expiration_seconds)) { + continue; + } } else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) { report.disk_migrate_origin_count++; - remove_interval_seconds = FLAGS_gc_disk_migration_origin_replica_interval_seconds; + if (!get_expiration_seconds_by_last_write_time(fpath, FLAGS_gc_disk_migration_origin_replica_interval_seconds, expiration_seconds)) { + continue; + } + } else { + continue; } - if (last_write_time + remove_interval_seconds <= current_time_ms / 1000) { - if (!dsn::utils::filesystem::remove_path(fpath)) { - LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}", - fpath, - dsn_now_ms() - current_time_ms); - } else { + auto current_time_ms = dsn_now_ms(); + if (expiration_seconds <= current_time_ms / 1000) { + if (dsn::utils::filesystem::remove_path(fpath)) { LOG_WARNING("gc_disk: replica_dir_op succeed to delete directory '{}'" ", time_used_ms = {}", fpath, dsn_now_ms() - current_time_ms); report.remove_dir_count++; + } else { + LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}", + fpath, + dsn_now_ms() - current_time_ms); } } else { LOG_INFO("gc_disk: reserve directory '{}', wait_seconds = {}", fpath, - last_write_time + remove_interval_seconds - current_time_ms / 1000); + expiration_seconds - current_time_ms / 1000); } } return error_s::ok(); From 1e2834495417082092fdaf9efdc5669f644087fe Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Tue, 31 Oct 2023 15:59:41 +0800 Subject: [PATCH 04/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index 3044fce49d..5ac8a2da76 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -84,15 +84,19 @@ bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &t } const size_t end_idx = name.size() - suffix_size; - auto begin_idx = name.find_last_of('.', end_idx - 1); + auto begin_idx = name.find_last_of('.', end_idx - 1); if (begin_idx == std::string::npos || ++begin_idx >= end_idx) { return false; } - return dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, end_idx - begin_idx), timestamp_us); + return dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, end_idx - begin_idx), + timestamp_us); } -bool get_expiration_seconds_by_timestamp(const std::string &name, size_t suffix_size, uint64_t delay_seconds, uint64_t &expiration_seconds) +bool get_expiration_seconds_by_timestamp(const std::string &name, + size_t suffix_size, + uint64_t delay_seconds, + uint64_t &expiration_seconds) { uint64_t timestamp_us = 0; if (!parse_timestamp_us(name, suffix_size, timestamp_us)) { @@ -103,7 +107,9 @@ bool get_expiration_seconds_by_timestamp(const std::string &name, size_t suffix_ return true; } -bool get_expiration_seconds_by_last_write_time(const std::string &path, uint64_t delay_seconds, uint64_t &expiration_seconds) +bool get_expiration_seconds_by_last_write_time(const std::string &path, + uint64_t delay_seconds, + uint64_t &expiration_seconds) { time_t last_write_seconds; if (!dsn::utils::filesystem::last_write_time(path, last_write_seconds)) { @@ -146,22 +152,34 @@ error_s disk_remove_useless_dirs(const std::vector> &d // don't delete ".bak" directory because it is backed by administrator. if (boost::algorithm::ends_with(name, kFolderSuffixErr)) { report.error_replica_count++; - if (!get_expiration_seconds_by_timestamp(name, kFolderSuffixErr.size(),FLAGS_gc_disk_error_replica_interval_seconds, expiration_seconds)) { + if (!get_expiration_seconds_by_timestamp(name, + kFolderSuffixErr.size(), + FLAGS_gc_disk_error_replica_interval_seconds, + expiration_seconds)) { continue; } } else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) { report.garbage_replica_count++; - if (get_expiration_seconds_by_timestamp(name, kFolderSuffixGar.size(), FLAGS_gc_disk_garbage_replica_interval_seconds, expiration_seconds)) { + if (get_expiration_seconds_by_timestamp(name, + kFolderSuffixGar.size(), + FLAGS_gc_disk_garbage_replica_interval_seconds, + expiration_seconds)) { continue; } } else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) { report.disk_migrate_tmp_count++; - if (!get_expiration_seconds_by_last_write_time(fpath, FLAGS_gc_disk_migration_tmp_replica_interval_seconds, expiration_seconds)) { + if (!get_expiration_seconds_by_last_write_time( + fpath, + FLAGS_gc_disk_migration_tmp_replica_interval_seconds, + expiration_seconds)) { continue; } } else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) { report.disk_migrate_origin_count++; - if (!get_expiration_seconds_by_last_write_time(fpath, FLAGS_gc_disk_migration_origin_replica_interval_seconds, expiration_seconds)) { + if (!get_expiration_seconds_by_last_write_time( + fpath, + FLAGS_gc_disk_migration_origin_replica_interval_seconds, + expiration_seconds)) { continue; } } else { From 9dc66d23861a6134a9e190d2977e2572aa188469 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 1 Nov 2023 13:21:52 +0800 Subject: [PATCH 05/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index 5ac8a2da76..2df542211d 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -117,7 +117,7 @@ bool get_expiration_seconds_by_last_write_time(const std::string &path, return false; } - expiration_second = static_cast(last_write_seconds) + delay_seconds; + expiration_seconds = static_cast(last_write_seconds) + delay_seconds; return true; } @@ -210,8 +210,10 @@ error_s disk_remove_useless_dirs(const std::vector> &d bool is_data_dir_removable(const std::string &dir) { - return (boost::algorithm::ends_with(dir, kFolderSuffixErr) || boost::algorithm::ends_with(dir, kFolderSuffixGar) || - boost::algorithm::ends_with(dir, kFolderSuffixTmp) || boost::algorithm::ends_with(dir, kFolderSuffixOri); + return boost::algorithm::ends_with(dir, kFolderSuffixErr) || + boost::algorithm::ends_with(dir, kFolderSuffixGar) || + boost::algorithm::ends_with(dir, kFolderSuffixTmp) || + boost::algorithm::ends_with(dir, kFolderSuffixOri); } bool is_data_dir_invalid(const std::string &dir) From b38a237d8f92e249c6dfd141e0c19796d38eec63 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 1 Nov 2023 17:57:57 +0800 Subject: [PATCH 06/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 90 ++++++++++++++++++++++-------------- src/replica/replica_stub.cpp | 2 +- src/utils/macros.h | 21 +++++++++ src/utils/metrics.h | 3 +- 4 files changed, 78 insertions(+), 38 deletions(-) create mode 100644 src/utils/macros.h diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index 2df542211d..e497932165 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -34,7 +34,9 @@ #include "utils/filesystem.h" #include "utils/flags.h" #include "utils/fmt_logging.h" +#include "utils/macros.h" #include "utils/string_conv.h" +#include "utils/string_view.h" namespace dsn { namespace replication { @@ -75,6 +77,24 @@ const std::string kFolderSuffixTmp = ".tmp"; namespace { +bool get_expiration_seconds_by_last_write_time(const std::string &path, + uint64_t delay_seconds, + uint64_t &expiration_seconds) +{ + time_t last_write_seconds; + if (!dsn::utils::filesystem::last_write_time(path, last_write_seconds)) { + LOG_WARNING("gc_disk: failed to get last write time of {}", path); + return false; + } + + expiration_seconds = static_cast(last_write_seconds) + delay_seconds; + return true; +} + +// Unix timestamp in microseconds for 2010-01-01 00:00:00. +#define MIN_TIMESTAMP_US 1262275200000000 +#define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1) + bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t ×tamp_us) { CHECK_GE(name.size(), suffix_size); @@ -89,38 +109,40 @@ bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &t return false; } - return dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, end_idx - begin_idx), - timestamp_us); + const auto length = end_idx - begin_idx; + if (length < MIN_TIMESTAMP_US_LENGTH) { + return false; + } + + const auto begin_itr = name.cbegin() + begin_idx; + if (!std::all_of(begin_itr, begin_itr + length, ::isdigit)) { + return false; + } + + const auto ok = + dsn::buf2uint64(dsn::string_view(name.data() + begin_idx, length), timestamp_us); + return ok ? timestamp_us > MIN_TIMESTAMP_US : false; } bool get_expiration_seconds_by_timestamp(const std::string &name, + const std::string &path, size_t suffix_size, uint64_t delay_seconds, uint64_t &expiration_seconds) { uint64_t timestamp_us = 0; if (!parse_timestamp_us(name, suffix_size, timestamp_us)) { - return false; + LOG_WARNING("gc_disk: failed to parse timestamp from {}, turn to " + "the last write time for {}", + name, + path); + return get_expiration_seconds_by_last_write_time(path, delay_seconds, expiration_seconds); } expiration_seconds = timestamp_us / 1000000 + delay_seconds; return true; } -bool get_expiration_seconds_by_last_write_time(const std::string &path, - uint64_t delay_seconds, - uint64_t &expiration_seconds) -{ - time_t last_write_seconds; - if (!dsn::utils::filesystem::last_write_time(path, last_write_seconds)) { - LOG_WARNING("gc_disk: failed to get last write time of {}", path); - return false; - } - - expiration_seconds = static_cast(last_write_seconds) + delay_seconds; - return true; -} - } // anonymous namespace error_s disk_remove_useless_dirs(const std::vector> &dir_nodes, @@ -141,18 +163,15 @@ error_s disk_remove_useless_dirs(const std::vector> &d sub_list.insert(sub_list.end(), tmp_list.begin(), tmp_list.end()); } - for (const auto &fpath : sub_list) { - auto name = dsn::utils::filesystem::get_file_name(fpath); - if (!is_data_dir_removable(name)) { - continue; - } - + for (const auto &path : sub_list) { uint64_t expiration_seconds = 0; - // don't delete ".bak" directory because it is backed by administrator. + // Note: don't delete ".bak" directory since it could be did by administrator. + const auto name = dsn::utils::filesystem::get_file_name(path); if (boost::algorithm::ends_with(name, kFolderSuffixErr)) { report.error_replica_count++; if (!get_expiration_seconds_by_timestamp(name, + path, kFolderSuffixErr.size(), FLAGS_gc_disk_error_replica_interval_seconds, expiration_seconds)) { @@ -160,16 +179,17 @@ error_s disk_remove_useless_dirs(const std::vector> &d } } else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) { report.garbage_replica_count++; - if (get_expiration_seconds_by_timestamp(name, - kFolderSuffixGar.size(), - FLAGS_gc_disk_garbage_replica_interval_seconds, - expiration_seconds)) { + if (!get_expiration_seconds_by_timestamp(name, + path, + kFolderSuffixGar.size(), + FLAGS_gc_disk_garbage_replica_interval_seconds, + expiration_seconds)) { continue; } } else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) { report.disk_migrate_tmp_count++; if (!get_expiration_seconds_by_last_write_time( - fpath, + path, FLAGS_gc_disk_migration_tmp_replica_interval_seconds, expiration_seconds)) { continue; @@ -177,7 +197,7 @@ error_s disk_remove_useless_dirs(const std::vector> &d } else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) { report.disk_migrate_origin_count++; if (!get_expiration_seconds_by_last_write_time( - fpath, + path, FLAGS_gc_disk_migration_origin_replica_interval_seconds, expiration_seconds)) { continue; @@ -186,22 +206,22 @@ error_s disk_remove_useless_dirs(const std::vector> &d continue; } - auto current_time_ms = dsn_now_ms(); + const auto current_time_ms = dsn_now_ms(); if (expiration_seconds <= current_time_ms / 1000) { - if (dsn::utils::filesystem::remove_path(fpath)) { + if (dsn::utils::filesystem::remove_path(path)) { LOG_WARNING("gc_disk: replica_dir_op succeed to delete directory '{}'" ", time_used_ms = {}", - fpath, + path, dsn_now_ms() - current_time_ms); report.remove_dir_count++; } else { LOG_WARNING("gc_disk: failed to delete directory '{}', time_used_ms = {}", - fpath, + path, dsn_now_ms() - current_time_ms); } } else { LOG_INFO("gc_disk: reserve directory '{}', wait_seconds = {}", - fpath, + path, expiration_seconds - current_time_ms / 1000); } } diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp index c5bd5000a9..e1815b48bf 100644 --- a/src/replica/replica_stub.cpp +++ b/src/replica/replica_stub.cpp @@ -1754,7 +1754,7 @@ void replica_stub::on_gc_replica(replica_stub_ptr this_, gpid id) CHECK( dsn::utils::filesystem::directory_exists(replica_path), "dir({}) not exist", replica_path); LOG_INFO("start to move replica({}) as garbage, path: {}", id, replica_path); - const auto rename_path = fmt::format("{}.{}.gar", replica_path, dsn_now_us()); + const auto rename_path = fmt::format("{}.{}{}", replica_path, dsn_now_us(), kFolderSuffixGar); if (!dsn::utils::filesystem::rename_path(replica_path, rename_path)) { LOG_WARNING("gc_replica: failed to move directory '{}' to '{}'", replica_path, rename_path); diff --git a/src/utils/macros.h b/src/utils/macros.h new file mode 100644 index 0000000000..596260819a --- /dev/null +++ b/src/utils/macros.h @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#define STRINGIFY_HELPER(x) #x +#define STRINGIFY(x) STRINGIFY_HELPER(x) diff --git a/src/utils/metrics.h b/src/utils/metrics.h index 27c6355f32..75afc31d89 100644 --- a/src/utils/metrics.h +++ b/src/utils/metrics.h @@ -46,6 +46,7 @@ #include "utils/enum_helper.h" #include "utils/fmt_logging.h" #include "utils/long_adder.h" +#include "utils/macros.h" #include "utils/nth_element.h" #include "utils/ports.h" #include "utils/singleton.h" @@ -973,8 +974,6 @@ struct kth_percentile_property double decimal; }; -#define STRINGIFY_HELPER(x) #x -#define STRINGIFY(x) STRINGIFY_HELPER(x) #define STRINGIFY_KTH_PERCENTILE_NAME(kth) STRINGIFY(KTH_PERCENTILE_NAME(kth)) #define KTH_TO_DECIMAL(kth) 0.##kth #define KTH_PERCENTILE_PROPERTY_LIST(kth) \ From cae375b6357d976632581ea98ec3dce06d5051ec Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 1 Nov 2023 18:46:36 +0800 Subject: [PATCH 07/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 6 +++- src/replica/test/replica_disk_test.cpp | 43 ++++++++++++++------------ 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index e497932165..a7d8b53b01 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "common/fs_manager.h" @@ -114,8 +115,11 @@ bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &t return false; } + // https://stackoverflow.com/questions/75868796/differences-between-isdigit-and-stdisdigit + // https://en.cppreference.com/w/cpp/string/byte/isdigit const auto begin_itr = name.cbegin() + begin_idx; - if (!std::all_of(begin_itr, begin_itr + length, ::isdigit)) { + if (!std::all_of( + begin_itr, begin_itr + length, [](unsigned char c) { return std::isdigit(c); })) { return false; } diff --git a/src/replica/test/replica_disk_test.cpp b/src/replica/test/replica_disk_test.cpp index 3bf7459347..53d5078cf3 100644 --- a/src/replica/test/replica_disk_test.cpp +++ b/src/replica/test/replica_disk_test.cpp @@ -205,39 +205,42 @@ TEST_P(replica_disk_test, gc_disk_useless_dir) FLAGS_gc_disk_migration_origin_replica_interval_seconds = 1; FLAGS_gc_disk_migration_tmp_replica_interval_seconds = 1; - std::vector tests{ - "./replica1.err", - "./replica2.err", - "./replica.gar", - "./replica.tmp", - "./replica.ori", - "./replica.bak", - "./replica.1.1", - }; + struct test_case + { + std::string path; + bool expected_exists; + } tests[] = {{"./replica1.err", false}, + {"./replica2.err", false}, + {"./replica.gar", false}, + {"./replica.tmp", false}, + {"./replica.ori", false}, + {"./replica.bak", true}, + {"./replica.1.1", true}, + {fmt::format("./1.1.pegasus.{}.err", dsn_now_us()), false}, + {fmt::format("./2.1.pegasus.{}.gar", dsn_now_us()), false}, + {fmt::format("./1.2.pegasus.{}.gar", dsn_now_us() + 1000 * 1000 * 1000), true}, + {fmt::format("./2.2.pegasus.{}.err", dsn_now_us() + 1000 * 1000 * 1000), true}}; for (const auto &test : tests) { - utils::filesystem::create_directory(test); - ASSERT_TRUE(utils::filesystem::directory_exists(test)); + utils::filesystem::create_directory(test.path); + ASSERT_TRUE(utils::filesystem::directory_exists(test.path)); } sleep(5); disk_cleaning_report report{}; - dsn::replication::disk_remove_useless_dirs({std::make_shared("test", "./")}, report); + ASSERT_TRUE(dsn::replication::disk_remove_useless_dirs( + {std::make_shared("test", "./")}, report)); for (const auto &test : tests) { - if (!dsn::replication::is_data_dir_removable(test)) { - ASSERT_TRUE(utils::filesystem::directory_exists(test)); - continue; - } - ASSERT_FALSE(utils::filesystem::directory_exists(test)); + ASSERT_EQ(test.expected_exists, utils::filesystem::directory_exists(test.path)); } - ASSERT_EQ(report.remove_dir_count, 5); + ASSERT_EQ(report.remove_dir_count, 7); ASSERT_EQ(report.disk_migrate_origin_count, 1); ASSERT_EQ(report.disk_migrate_tmp_count, 1); - ASSERT_EQ(report.garbage_replica_count, 1); - ASSERT_EQ(report.error_replica_count, 2); + ASSERT_EQ(report.garbage_replica_count, 3); + ASSERT_EQ(report.error_replica_count, 4); } TEST_P(replica_disk_test, disk_status_test) From e2309cd710fc4522f993cc1cfccaba9e56788760 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Wed, 1 Nov 2023 21:43:30 +0800 Subject: [PATCH 08/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 13 ++++++++++++- src/replica/test/replica_disk_test.cpp | 15 ++++++++++++++- src/test_util/test_util.h | 7 +++++++ src/utils/macros.h | 7 +++++++ 4 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index a7d8b53b01..33d2c80fc3 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -26,7 +26,6 @@ #include #include #include -#include #include "common/fs_manager.h" #include "metadata_types.h" @@ -93,11 +92,17 @@ bool get_expiration_seconds_by_last_write_time(const std::string &path, } // Unix timestamp in microseconds for 2010-01-01 00:00:00. +// This timestamp could be used as the minimum, since it's far earlier than the time when +// Pegasus was born. #define MIN_TIMESTAMP_US 1262275200000000 #define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1) bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t ×tamp_us) { + // Examples of the directory names of faulty or dropped replicas could be: + // 1.1.pegasus.1698843209235962.err + // 2.1.pegasus.1698843214240709.gar + CHECK_GE(name.size(), suffix_size); if (suffix_size == name.size()) { @@ -115,6 +120,10 @@ bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &t return false; } + // std::isdigit() is not an addressable standard library function, thus it can't be used + // directly as an algorithm predicate. + // + // See following docs for details. // https://stackoverflow.com/questions/75868796/differences-between-isdigit-and-stdisdigit // https://en.cppreference.com/w/cpp/string/byte/isdigit const auto begin_itr = name.cbegin() + begin_idx; @@ -136,6 +145,8 @@ bool get_expiration_seconds_by_timestamp(const std::string &name, { uint64_t timestamp_us = 0; if (!parse_timestamp_us(name, suffix_size, timestamp_us)) { + // Once the timestamp could not be extracted from the directory name, the last write time + // would be used as the base time to compute the expiration time. LOG_WARNING("gc_disk: failed to parse timestamp from {}, turn to " "the last write time for {}", name, diff --git a/src/replica/test/replica_disk_test.cpp b/src/replica/test/replica_disk_test.cpp index 53d5078cf3..0a60862524 100644 --- a/src/replica/test/replica_disk_test.cpp +++ b/src/replica/test/replica_disk_test.cpp @@ -17,6 +17,7 @@ * under the License. */ +#include // IWYU pragma: no_include // IWYU pragma: no_include // IWYU pragma: no_include @@ -42,6 +43,7 @@ #include "replica/test/mock_utils.h" #include "replica_admin_types.h" #include "replica_disk_test_base.h" +#include "runtime/api_layer1.h" #include "runtime/rpc/rpc_holder.h" #include "test_util/test_util.h" #include "utils/autoref_ptr.h" @@ -200,6 +202,11 @@ TEST_P(replica_disk_test, on_query_disk_info_one_app) TEST_P(replica_disk_test, gc_disk_useless_dir) { + PRESERVE_FLAG(gc_disk_error_replica_interval_seconds); + PRESERVE_FLAG(gc_disk_garbage_replica_interval_seconds); + PRESERVE_FLAG(gc_disk_migration_origin_replica_interval_seconds); + PRESERVE_FLAG(gc_disk_migration_tmp_replica_interval_seconds); + FLAGS_gc_disk_error_replica_interval_seconds = 1; FLAGS_gc_disk_garbage_replica_interval_seconds = 1; FLAGS_gc_disk_migration_origin_replica_interval_seconds = 1; @@ -222,7 +229,8 @@ TEST_P(replica_disk_test, gc_disk_useless_dir) {fmt::format("./2.2.pegasus.{}.err", dsn_now_us() + 1000 * 1000 * 1000), true}}; for (const auto &test : tests) { - utils::filesystem::create_directory(test.path); + // Ensure that every directory does not exist and should be created. + CHECK_TRUE(utils::filesystem::create_directory(test.path)); ASSERT_TRUE(utils::filesystem::directory_exists(test.path)); } @@ -234,6 +242,11 @@ TEST_P(replica_disk_test, gc_disk_useless_dir) for (const auto &test : tests) { ASSERT_EQ(test.expected_exists, utils::filesystem::directory_exists(test.path)); + if (test.expected_exists) { + // Delete existing directories, in case that they are mixed with later test cases + // to affect test results. + CHECK_TRUE(dsn::utils::filesystem::remove_path(test.path)); + } } ASSERT_EQ(report.remove_dir_count, 7); diff --git a/src/test_util/test_util.h b/src/test_util/test_util.h index ba253e7b35..4b3ba77d26 100644 --- a/src/test_util/test_util.h +++ b/src/test_util/test_util.h @@ -28,6 +28,7 @@ #include #include "runtime/api_layer1.h" +#include "utils/defer.h" #include "utils/env.h" #include "utils/flags.h" #include "utils/test_macros.h" @@ -40,6 +41,12 @@ class file_meta; DSN_DECLARE_bool(encrypt_data_at_rest); +// Save the current value of a flag and restore it at the end of the function. +#define PRESERVE_FLAG(name) \ + auto PRESERVED_FLAGS_##name = FLAGS_##name; \ + auto PRESERVED_FLAGS_##name##_cleanup = \ + dsn::defer([PRESERVED_FLAGS_##name]() { FLAGS_##name = PRESERVED_FLAGS_##name; }) + namespace pegasus { // A base parameterized test class for testing enable/disable encryption at rest. diff --git a/src/utils/macros.h b/src/utils/macros.h index 596260819a..c44c45e252 100644 --- a/src/utils/macros.h +++ b/src/utils/macros.h @@ -17,5 +17,12 @@ #pragma once +// Suppose there is a macro defined as: +// #define FOO 123 +// +// Once we need the value represented by FOO to be a string, i.e. "123", just do: +// STRINGIFY(FOO) +// +// See https://gcc.gnu.org/onlinedocs/gcc-4.8.5/cpp/Stringification.html for details. #define STRINGIFY_HELPER(x) #x #define STRINGIFY(x) STRINGIFY_HELPER(x) From d26babbc0ecceeb5d7751400188b0feb022215d7 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Thu, 2 Nov 2023 12:18:09 +0800 Subject: [PATCH 09/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/test/replica_disk_test.cpp | 10 +++++++--- src/test_util/test_util.h | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/replica/test/replica_disk_test.cpp b/src/replica/test/replica_disk_test.cpp index 0a60862524..b36ed13d82 100644 --- a/src/replica/test/replica_disk_test.cpp +++ b/src/replica/test/replica_disk_test.cpp @@ -223,6 +223,10 @@ TEST_P(replica_disk_test, gc_disk_useless_dir) {"./replica.ori", false}, {"./replica.bak", true}, {"./replica.1.1", true}, + {"./1.1.pegasus.1234567890.err", false}, + {"./1.2.pegasus.0123456789.gar", false}, + {"./2.1.pegasus.1234567890123456.err", false}, + {"./2.2.pegasus.1234567890abcdef.gar", false}, {fmt::format("./1.1.pegasus.{}.err", dsn_now_us()), false}, {fmt::format("./2.1.pegasus.{}.gar", dsn_now_us()), false}, {fmt::format("./1.2.pegasus.{}.gar", dsn_now_us() + 1000 * 1000 * 1000), true}, @@ -249,11 +253,11 @@ TEST_P(replica_disk_test, gc_disk_useless_dir) } } - ASSERT_EQ(report.remove_dir_count, 7); + ASSERT_EQ(report.remove_dir_count, 11); ASSERT_EQ(report.disk_migrate_origin_count, 1); ASSERT_EQ(report.disk_migrate_tmp_count, 1); - ASSERT_EQ(report.garbage_replica_count, 3); - ASSERT_EQ(report.error_replica_count, 4); + ASSERT_EQ(report.garbage_replica_count, 5); + ASSERT_EQ(report.error_replica_count, 6); } TEST_P(replica_disk_test, disk_status_test) diff --git a/src/test_util/test_util.h b/src/test_util/test_util.h index 4b3ba77d26..8d71bf4959 100644 --- a/src/test_util/test_util.h +++ b/src/test_util/test_util.h @@ -19,12 +19,12 @@ #pragma once +#include +#include #include #include #include -#include #include -#include #include #include "runtime/api_layer1.h" From aa5e0626ce8ac4ffefe30de010a5bf88e2b7dea6 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Thu, 2 Nov 2023 13:23:39 +0800 Subject: [PATCH 10/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/test_util/test_util.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test_util/test_util.cpp b/src/test_util/test_util.cpp index a4dbf41deb..034f009b20 100644 --- a/src/test_util/test_util.cpp +++ b/src/test_util/test_util.cpp @@ -30,7 +30,6 @@ #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "runtime/api_layer1.h" -#include "utils/defer.h" #include "utils/env.h" #include "utils/error_code.h" #include "utils/filesystem.h" From 3c0dff20c917a7e366bbb351028ed898f83b8084 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Thu, 2 Nov 2023 20:12:28 +0800 Subject: [PATCH 11/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/test/replica_disk_test.cpp | 1 + src/test_util/test_util.cpp | 1 + src/test_util/test_util.h | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/replica/test/replica_disk_test.cpp b/src/replica/test/replica_disk_test.cpp index b36ed13d82..a24b8359ef 100644 --- a/src/replica/test/replica_disk_test.cpp +++ b/src/replica/test/replica_disk_test.cpp @@ -47,6 +47,7 @@ #include "runtime/rpc/rpc_holder.h" #include "test_util/test_util.h" #include "utils/autoref_ptr.h" +#include "utils/defer.h" #include "utils/error_code.h" #include "utils/filesystem.h" #include "utils/flags.h" diff --git a/src/test_util/test_util.cpp b/src/test_util/test_util.cpp index 034f009b20..a4dbf41deb 100644 --- a/src/test_util/test_util.cpp +++ b/src/test_util/test_util.cpp @@ -30,6 +30,7 @@ #include "rocksdb/slice.h" #include "rocksdb/status.h" #include "runtime/api_layer1.h" +#include "utils/defer.h" #include "utils/env.h" #include "utils/error_code.h" #include "utils/filesystem.h" diff --git a/src/test_util/test_util.h b/src/test_util/test_util.h index 8d71bf4959..592ef55c75 100644 --- a/src/test_util/test_util.h +++ b/src/test_util/test_util.h @@ -28,7 +28,6 @@ #include #include "runtime/api_layer1.h" -#include "utils/defer.h" #include "utils/env.h" #include "utils/flags.h" #include "utils/test_macros.h" From 69d8aa40d6a10cde92caaf0f6047cf51c420b5f3 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Thu, 2 Nov 2023 21:32:29 +0800 Subject: [PATCH 12/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/test/replica_disk_test.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/replica/test/replica_disk_test.cpp b/src/replica/test/replica_disk_test.cpp index a24b8359ef..b36ed13d82 100644 --- a/src/replica/test/replica_disk_test.cpp +++ b/src/replica/test/replica_disk_test.cpp @@ -47,7 +47,6 @@ #include "runtime/rpc/rpc_holder.h" #include "test_util/test_util.h" #include "utils/autoref_ptr.h" -#include "utils/defer.h" #include "utils/error_code.h" #include "utils/filesystem.h" #include "utils/flags.h" From 48221f53a1281988d867e07aaebc2bbe2a24a76b Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Fri, 3 Nov 2023 11:18:58 +0800 Subject: [PATCH 13/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/test_util/test_util.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/test_util/test_util.h b/src/test_util/test_util.h index 592ef55c75..90930c226c 100644 --- a/src/test_util/test_util.h +++ b/src/test_util/test_util.h @@ -29,6 +29,10 @@ #include "runtime/api_layer1.h" #include "utils/env.h" +// IWYU refused to include "utils/defer.h" everywhere, both in .h and .cpp files. +// However, once "utils/defer.h" is not included, it is inevitable that compilation +// will fail since dsn::defer is referenced. Thus force IWYU to keep it. +#include "utils/defer.h" // IWYU pragma: keep #include "utils/flags.h" #include "utils/test_macros.h" From c3d0fea4c2f6660527dcde504721205c7727bb9b Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Mon, 6 Nov 2023 12:38:08 +0800 Subject: [PATCH 14/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 85 +++++++++++++++----------- src/replica/test/replica_disk_test.cpp | 22 +++++++ 2 files changed, 73 insertions(+), 34 deletions(-) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index 33d2c80fc3..358015e4c3 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -77,17 +77,21 @@ const std::string kFolderSuffixTmp = ".tmp"; namespace { -bool get_expiration_seconds_by_last_write_time(const std::string &path, - uint64_t delay_seconds, - uint64_t &expiration_seconds) +// TODO(wangdan): we could study later whether ctime (i.e. `st_ctime` within `struct stat`, +// the time of last status change) could be used instead of mtime (i.e. `st_ctime` within +// `struct stat`, the last write time), since ctime of the new directory would be updated +// to the current time once rename() is called, while mtime would not be updated. +bool get_expiration_timestamp_by_last_write_time(const std::string &path, + uint64_t delay_seconds, + uint64_t &expiration_timestamp_s) { - time_t last_write_seconds; - if (!dsn::utils::filesystem::last_write_time(path, last_write_seconds)) { + time_t last_write_time_s; + if (!dsn::utils::filesystem::last_write_time(path, last_write_time_s)) { LOG_WARNING("gc_disk: failed to get last write time of {}", path); return false; } - expiration_seconds = static_cast(last_write_seconds) + delay_seconds; + expiration_timestamp_s = static_cast(last_write_time_s) + delay_seconds; return true; } @@ -97,12 +101,24 @@ bool get_expiration_seconds_by_last_write_time(const std::string &path, #define MIN_TIMESTAMP_US 1262275200000000 #define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1) +// Parse timestamp from the directory name. +// +// There are only 2 kinds of directory names that could include timestamp: one is the faulty +// replicas whose name has suffix ".err"; another is the dropped replicas whose name has +// suffix ".gar". The examples for both kinds of directory names: +// 1.1.pegasus.1698843209235962.err +// 1.2.pegasus.1698843214240709.gar +// +// Specify the size of suffix by `suffix_size`. For both kinds of names (.err and .gar), +// `suffix_size` is 4. +// +// The timestamp is the number the number just before the suffix, between the 2 dots. For +// example, in 1.1.pegasus.1698843209235962.err, 1698843209235962 is the timestamp, in +// microseconds, generated by dsn_now_us(). +// +// `timestamp_us` is parsed result while returning true; otherwise, it would never be assigned. bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t ×tamp_us) { - // Examples of the directory names of faulty or dropped replicas could be: - // 1.1.pegasus.1698843209235962.err - // 2.1.pegasus.1698843214240709.gar - CHECK_GE(name.size(), suffix_size); if (suffix_size == name.size()) { @@ -137,11 +153,11 @@ bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t &t return ok ? timestamp_us > MIN_TIMESTAMP_US : false; } -bool get_expiration_seconds_by_timestamp(const std::string &name, - const std::string &path, - size_t suffix_size, - uint64_t delay_seconds, - uint64_t &expiration_seconds) +bool get_expiration_timestamp(const std::string &name, + const std::string &path, + size_t suffix_size, + uint64_t delay_seconds, + uint64_t &expiration_timestamp_s) { uint64_t timestamp_us = 0; if (!parse_timestamp_us(name, suffix_size, timestamp_us)) { @@ -151,10 +167,11 @@ bool get_expiration_seconds_by_timestamp(const std::string &name, "the last write time for {}", name, path); - return get_expiration_seconds_by_last_write_time(path, delay_seconds, expiration_seconds); + return get_expiration_timestamp_by_last_write_time( + path, delay_seconds, expiration_timestamp_s); } - expiration_seconds = timestamp_us / 1000000 + delay_seconds; + expiration_timestamp_s = timestamp_us / 1000000 + delay_seconds; return true; } @@ -179,42 +196,42 @@ error_s disk_remove_useless_dirs(const std::vector> &d } for (const auto &path : sub_list) { - uint64_t expiration_seconds = 0; + uint64_t expiration_timestamp_s = 0; // Note: don't delete ".bak" directory since it could be did by administrator. const auto name = dsn::utils::filesystem::get_file_name(path); if (boost::algorithm::ends_with(name, kFolderSuffixErr)) { report.error_replica_count++; - if (!get_expiration_seconds_by_timestamp(name, - path, - kFolderSuffixErr.size(), - FLAGS_gc_disk_error_replica_interval_seconds, - expiration_seconds)) { + if (!get_expiration_timestamp(name, + path, + kFolderSuffixErr.size(), + FLAGS_gc_disk_error_replica_interval_seconds, + expiration_timestamp_s)) { continue; } } else if (boost::algorithm::ends_with(name, kFolderSuffixGar)) { report.garbage_replica_count++; - if (!get_expiration_seconds_by_timestamp(name, - path, - kFolderSuffixGar.size(), - FLAGS_gc_disk_garbage_replica_interval_seconds, - expiration_seconds)) { + if (!get_expiration_timestamp(name, + path, + kFolderSuffixGar.size(), + FLAGS_gc_disk_garbage_replica_interval_seconds, + expiration_timestamp_s)) { continue; } } else if (boost::algorithm::ends_with(name, kFolderSuffixTmp)) { report.disk_migrate_tmp_count++; - if (!get_expiration_seconds_by_last_write_time( + if (!get_expiration_timestamp_by_last_write_time( path, FLAGS_gc_disk_migration_tmp_replica_interval_seconds, - expiration_seconds)) { + expiration_timestamp_s)) { continue; } } else if (boost::algorithm::ends_with(name, kFolderSuffixOri)) { report.disk_migrate_origin_count++; - if (!get_expiration_seconds_by_last_write_time( + if (!get_expiration_timestamp_by_last_write_time( path, FLAGS_gc_disk_migration_origin_replica_interval_seconds, - expiration_seconds)) { + expiration_timestamp_s)) { continue; } } else { @@ -222,7 +239,7 @@ error_s disk_remove_useless_dirs(const std::vector> &d } const auto current_time_ms = dsn_now_ms(); - if (expiration_seconds <= current_time_ms / 1000) { + if (expiration_timestamp_s <= current_time_ms / 1000) { if (dsn::utils::filesystem::remove_path(path)) { LOG_WARNING("gc_disk: replica_dir_op succeed to delete directory '{}'" ", time_used_ms = {}", @@ -237,7 +254,7 @@ error_s disk_remove_useless_dirs(const std::vector> &d } else { LOG_INFO("gc_disk: reserve directory '{}', wait_seconds = {}", path, - expiration_seconds - current_time_ms / 1000); + expiration_timestamp_s - current_time_ms / 1000); } } return error_s::ok(); diff --git a/src/replica/test/replica_disk_test.cpp b/src/replica/test/replica_disk_test.cpp index b36ed13d82..6717ffd402 100644 --- a/src/replica/test/replica_disk_test.cpp +++ b/src/replica/test/replica_disk_test.cpp @@ -200,6 +200,28 @@ TEST_P(replica_disk_test, on_query_disk_info_one_app) } } +TEST_P(replica_disk_test, check_data_dir_removable) +{ + struct test_case + { + std::string path; + bool expected_removable; + bool expected_invalid; + } tests[] = {{"./replica.0.err", true, true}, + {"./replica.1.gar", true, true}, + {"./replica.2.tmp", true, true}, + {"./replica.3.ori", true, true}, + {"./replica.4.bak", false, true}, + {"./replica.5.abcde", false, false}, + {"./replica.6.x", false, false}, + {"./replica.7.8", false, false}}; + + for (const auto &test : tests) { + EXPECT_EQ(test.expected_removable, is_data_dir_removable(test.path)); + EXPECT_EQ(test.expected_invalid, is_data_dir_invalid(test.path)); + } +} + TEST_P(replica_disk_test, gc_disk_useless_dir) { PRESERVE_FLAG(gc_disk_error_replica_interval_seconds); From f3f9abb90f034228d81086d9cd87299565155a28 Mon Sep 17 00:00:00 2001 From: Dan Wang Date: Mon, 6 Nov 2023 16:56:17 +0800 Subject: [PATCH 15/15] fix: parse timestamp from the name of data dir for gc instead of the last update time --- src/replica/disk_cleaner.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/replica/disk_cleaner.cpp b/src/replica/disk_cleaner.cpp index 358015e4c3..cc9ffeb661 100644 --- a/src/replica/disk_cleaner.cpp +++ b/src/replica/disk_cleaner.cpp @@ -95,10 +95,10 @@ bool get_expiration_timestamp_by_last_write_time(const std::string &path, return true; } -// Unix timestamp in microseconds for 2010-01-01 00:00:00. +// Unix timestamp in microseconds for 2010-01-01 00:00:00 GMT+0000. // This timestamp could be used as the minimum, since it's far earlier than the time when // Pegasus was born. -#define MIN_TIMESTAMP_US 1262275200000000 +#define MIN_TIMESTAMP_US 1262304000000000 #define MIN_TIMESTAMP_US_LENGTH (sizeof(STRINGIFY(MIN_TIMESTAMP_US)) - 1) // Parse timestamp from the directory name. @@ -112,9 +112,9 @@ bool get_expiration_timestamp_by_last_write_time(const std::string &path, // Specify the size of suffix by `suffix_size`. For both kinds of names (.err and .gar), // `suffix_size` is 4. // -// The timestamp is the number the number just before the suffix, between the 2 dots. For -// example, in 1.1.pegasus.1698843209235962.err, 1698843209235962 is the timestamp, in -// microseconds, generated by dsn_now_us(). +// The timestamp is the number just before the suffix, between the 2 dots. For example, in +// 1.1.pegasus.1698843209235962.err, 1698843209235962 is the timestamp in microseconds, +// generated by dsn_now_us(). // // `timestamp_us` is parsed result while returning true; otherwise, it would never be assigned. bool parse_timestamp_us(const std::string &name, size_t suffix_size, uint64_t ×tamp_us)