Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(duplication): prevent plog files from being removed by GC while they are being checked by duplication #1597

Merged
merged 17 commits into from
Mar 28, 2024
Merged
5 changes: 5 additions & 0 deletions src/replica/duplication/load_from_private_log.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ void load_from_private_log::run()

void load_from_private_log::find_log_file_to_start()
{
_duplicator->set_duplication_plog_checking(true);
empiredan marked this conversation as resolved.
Show resolved Hide resolved
empiredan marked this conversation as resolved.
Show resolved Hide resolved
auto cleanup = dsn::defer([this]() { _duplicator->set_duplication_plog_checking(false); });

// `file_map` has already excluded the useless log files during replica init.
const auto &file_map = _private_log->get_log_file_map();

Expand All @@ -168,6 +171,8 @@ void load_from_private_log::find_log_file_to_start()
void load_from_private_log::find_log_file_to_start(
const mutation_log::log_file_map_by_index &log_file_map)
{
auto cleanup = dsn::defer([this]() { _duplicator->set_duplication_plog_checking(false); });
empiredan marked this conversation as resolved.
Show resolved Hide resolved

_current = nullptr;
if (dsn_unlikely(log_file_map.empty())) {
LOG_ERROR_PREFIX("unable to start duplication since no log file is available");
Expand Down
5 changes: 5 additions & 0 deletions src/replica/duplication/replica_duplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,5 +262,10 @@ uint64_t replica_duplicator::get_pending_mutations_count() const
return cnt > 0 ? static_cast<uint64_t>(cnt) : 0;
}

void replica_duplicator::set_duplication_plog_checking(bool checking)
{
_replica->set_duplication_plog_checking(checking);
}

} // namespace replication
} // namespace dsn
4 changes: 3 additions & 1 deletion src/replica/duplication/replica_duplicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ class replica_duplicator : public replica_base, public pipeline::base
// For metric "dup.pending_mutations_count"
uint64_t get_pending_mutations_count() const;

duplication_status::type status() const { return _status; };
duplication_status::type status() const { return _status; }

void set_duplication_plog_checking(bool checking);

private:
friend class duplication_test_base;
Expand Down
8 changes: 8 additions & 0 deletions src/replica/replica.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba
replica_duplicator_manager *get_duplication_manager() const { return _duplication_mgr.get(); }
bool is_duplication_master() const { return _is_duplication_master; }
bool is_duplication_follower() const { return _is_duplication_follower; }
bool is_duplication_plog_checking() const { return _is_duplication_plog_checking.load(); }
void set_duplication_plog_checking(bool checking)
{
_is_duplication_plog_checking.store(checking);
}

//
// Backup
Expand Down Expand Up @@ -625,6 +630,9 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba
bool _is_manual_emergency_checkpointing{false};
bool _is_duplication_master{false};
bool _is_duplication_follower{false};
// Indicate whether the replica is during finding out some private logs to
// load for duplication. It useful to prevent plog GCed unexpectedly.
std::atomic<bool> _is_duplication_plog_checking{false};
acelyc111 marked this conversation as resolved.
Show resolved Hide resolved
acelyc111 marked this conversation as resolved.
Show resolved Hide resolved

// backup
std::unique_ptr<replica_backup_manager> _backup_mgr;
Expand Down
6 changes: 6 additions & 0 deletions src/replica/replica_chkpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,12 @@ void replica::on_checkpoint_timer()
return;
}

if (is_duplication_plog_checking()) {
LOG_DEBUG_PREFIX("gc_private {}: skip gc because duplication is checking plog files",
enum_to_string(status()));
return;
}

tasking::enqueue(LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS,
&_tracker,
[this, plog, cleanable_decree, valid_start_offset] {
Expand Down
Loading