Skip to content

Commit 0476c96

Browse files
Shitanshu Shahfacebook-github-bot
Shitanshu Shah
authored andcommitted
Make Open/R KvStore initial and max backoff time configurable via OpenrConfig
Summary: - Make kvstore inital and max backoff configurable, use defaults when not configured - modify requestThriftPeerSync to use appropriate backoff parameters - Expose processThriftFailure as a public API to allow test code to make use of it - Enhance kvstore wrapper to provide APIs to test-code, in order to trigger processThriftFailure - Add UT automation Reviewed By: xiangxu1121 Differential Revision: D54129764 fbshipit-source-id: 211d97df6430fa13e59265fefcac7e981273e739
1 parent 4cad9c6 commit 0476c96

File tree

9 files changed

+250
-24
lines changed

9 files changed

+250
-24
lines changed

openr/config/Config.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,8 @@ Config::toThriftKvStoreConfig() const {
452452
config.node_name() = getNodeName();
453453
config.key_ttl_ms() = *oldConfig.key_ttl_ms();
454454
config.ttl_decrement_ms() = *oldConfig.ttl_decrement_ms();
455+
config.sync_initial_backoff_ms() = *oldConfig.sync_initial_backoff_ms();
456+
config.sync_max_backoff_ms() = *oldConfig.sync_max_backoff_ms();
455457

456458
if (auto floodRate = oldConfig.flood_rate()) {
457459
thrift::KvStoreFloodRate rate;

openr/if/KvStore.thrift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,8 @@ struct KvStoreConfig {
472472
13: optional string x509_ca_path;
473473
/** Knob to enable/disable TLS thrift client. */
474474
14: bool enable_secure_thrift_client = false;
475+
15: i32 sync_initial_backoff_ms = 4000;
476+
16: i32 sync_max_backoff_ms = 256000;
475477
}
476478

477479
/**

openr/if/OpenrConfig.thrift

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ struct KvstoreConfig {
7979
*/
8080
6: optional list<string> key_prefix_filters;
8181
7: optional list<string> key_originator_id_filters;
82+
/**
83+
* Configurable Initial and Max backoffs for kvstore peer full sync
84+
*/
85+
8: i32 sync_initial_backoff_ms = 4000;
86+
9: i32 sync_max_backoff_ms = 256000;
8287
}
8388

8489
/*

openr/kvstore/KvStore-inl.h

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,41 @@ KvStore<ClientType>::KvStore(
4545
kvParams_.maybeIpTos.value(),
4646
*kvStoreConfig.node_name());
4747
}
48+
if (*kvStoreConfig.sync_initial_backoff_ms() <= 0) {
49+
XLOG(INFO) << fmt::format(
50+
"non-zero sync initial backoff ms {}, re-setting to {}",
51+
*kvStoreConfig.sync_initial_backoff_ms(),
52+
std::chrono::milliseconds(Constants::kKvstoreSyncInitialBackoff)
53+
.count());
54+
55+
kvParams_.syncInitialBackoff = Constants::kKvstoreSyncInitialBackoff;
56+
} else {
57+
kvParams_.syncInitialBackoff =
58+
std::chrono::milliseconds(*kvStoreConfig.sync_initial_backoff_ms());
59+
}
60+
61+
if (*kvStoreConfig.sync_max_backoff_ms() <=
62+
std::chrono::milliseconds(kvParams_.syncInitialBackoff).count()) {
63+
if (kvParams_.syncInitialBackoff < Constants::kKvstoreSyncMaxBackoff) {
64+
kvParams_.syncMaxBackoff = Constants::kKvstoreSyncMaxBackoff;
65+
} else {
66+
// to be tuned if this case is of interest
67+
kvParams_.syncMaxBackoff = (kvParams_.syncInitialBackoff * 2);
68+
}
69+
70+
XLOG(INFO) << fmt::format(
71+
"sync max backoff ms {} less than initial backoff, re-setting to {}",
72+
*kvStoreConfig.sync_max_backoff_ms(),
73+
kvParams_.syncMaxBackoff.count());
74+
} else {
75+
kvParams_.syncMaxBackoff =
76+
std::chrono::milliseconds(*kvStoreConfig.sync_max_backoff_ms());
77+
}
78+
79+
XLOG(INFO) << fmt::format(
80+
"Initial backoff {} and Max backoff {}",
81+
kvParams_.syncInitialBackoff.count(),
82+
kvParams_.syncMaxBackoff.count());
4883

4984
{
5085
auto fiber = addFiberTaskFuture(
@@ -520,6 +555,29 @@ KvStore<ClientType>::semifuture_setKvStoreKeyValues(
520555
return sf;
521556
}
522557

558+
template <class ClientType>
559+
folly::SemiFuture<std::unique_ptr<bool>>
560+
KvStore<ClientType>::semifuture_injectThriftFailure(
561+
std::string area, std::string peerName) {
562+
folly::Promise<std::unique_ptr<bool>> p;
563+
auto sf = p.getSemiFuture();
564+
runInEventBaseThread(
565+
[this, p = std::move(p), peerName = std::move(peerName), area]() mutable {
566+
try {
567+
bool r = true;
568+
auto& kvStoreDb = getAreaDbOrThrow(area, "disconnectPeer");
569+
kvStoreDb.processThriftFailure(
570+
peerName,
571+
"injected thrift failure",
572+
std::chrono::milliseconds(500)); // arbitrary timeout
573+
p.setValue(std::make_unique<bool>(std::move(r)));
574+
} catch (thrift::KvStoreError const& e) {
575+
p.setException(e);
576+
}
577+
});
578+
return sf;
579+
}
580+
523581
template <class ClientType>
524582
folly::SemiFuture<std::optional<thrift::KvStorePeerState>>
525583
KvStore<ClientType>::semifuture_getKvStorePeerState(
@@ -1049,6 +1107,11 @@ KvStoreDb<ClientType>::KvStorePeer::KvStorePeer(
10491107
CHECK(not this->peerSpec.peerAddr()->empty());
10501108
CHECK(
10511109
this->expBackoff.getInitialBackoff() <= this->expBackoff.getMaxBackoff());
1110+
XLOG(INFO) << fmt::format(
1111+
"node: {}, initial backoff {} and max backoff {}",
1112+
nodeName,
1113+
this->expBackoff.getInitialBackoff().count(),
1114+
this->expBackoff.getMaxBackoff().count());
10521115
}
10531116

10541117
template <class ClientType>
@@ -1599,7 +1662,7 @@ KvStoreDb<ClientType>::advertiseSelfOriginatedKeys() {
15991662
// Build keys to be cleaned from local storage
16001663
std::vector<std::string> keysToClear;
16011664

1602-
std::chrono::milliseconds timeout = Constants::kMaxBackoff;
1665+
std::chrono::milliseconds timeout = kvParams_.syncMaxBackoff;
16031666
for (auto const& key : keysToAdvertise_) {
16041667
// Each key was introduced through a persistSelfOriginatedKey() call.
16051668
// Therefore, each key is in selfOriginatedKeyVals_ and has a keyBackoff.
@@ -1918,7 +1981,7 @@ template <class ClientType>
19181981
void
19191982
KvStoreDb<ClientType>::requestThriftPeerSync() {
19201983
// minimal timeout for next run
1921-
auto timeout = std::chrono::milliseconds(Constants::kKvstoreSyncMaxBackoff);
1984+
auto timeout = kvParams_.syncMaxBackoff;
19221985

19231986
// pre-fetch of peers in "SYNCING" state for later calculation
19241987
uint32_t numThriftPeersInSync =
@@ -1935,7 +1998,7 @@ KvStoreDb<ClientType>::requestThriftPeerSync() {
19351998
}
19361999

19372000
// update the global minimum timeout value for next try
1938-
if (not thriftPeer.expBackoff.canTryNow()) {
2001+
if (not expBackoff.canTryNow()) {
19392002
timeout = std::min(timeout, expBackoff.getTimeRemainingUntilRetry());
19402003
continue;
19412004
}
@@ -2006,9 +2069,9 @@ KvStoreDb<ClientType>::requestThriftPeerSync() {
20062069
});
20072070

20082071
// in case pending peer size is over parallelSyncLimit,
2009-
// wait until kMaxBackoff before sending next round of sync
2072+
// wait until syncInitialBackoff before sending next round of sync
20102073
if (numThriftPeersInSync > parallelSyncLimitOverThrift_) {
2011-
timeout = Constants::kKvstoreSyncInitialBackoff;
2074+
timeout = kvParams_.syncInitialBackoff;
20122075
XLOG(INFO)
20132076
<< AreaTag()
20142077
<< fmt::format(
@@ -2317,8 +2380,7 @@ KvStoreDb<ClientType>::addThriftPeers(
23172380
AreaTag(),
23182381
newPeerSpec,
23192382
ExponentialBackoff<std::chrono::milliseconds>(
2320-
Constants::kKvstoreSyncInitialBackoff,
2321-
Constants::kKvstoreSyncMaxBackoff),
2383+
kvParams_.syncInitialBackoff, kvParams_.syncMaxBackoff),
23222384
kvParams_);
23232385
peer.peerSpec.stateEpochTimeMs() = getTimeSinceEpochMs();
23242386
peer.peerSpec.flaps() = -1;

openr/kvstore/KvStore.h

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,23 @@ class KvStoreDb {
207207
void unsetSelfOriginatedKey(std::string const& key, std::string const& value);
208208
void eraseSelfOriginatedKey(std::string const& key);
209209

210+
/*
211+
* [Initial Sync]
212+
*
213+
* util method to process thrift sync response in:
214+
* 1) Success
215+
* 2) Failure
216+
*/
217+
void processThriftSuccess(
218+
std::string const& peerName,
219+
thrift::Publication&& pub,
220+
std::chrono::milliseconds timeDelta);
221+
222+
void processThriftFailure(
223+
std::string const& peerName,
224+
folly::fbstring const& exceptionStr,
225+
std::chrono::milliseconds timeDelta);
226+
210227
private:
211228
// disable copying
212229
KvStoreDb(KvStoreDb const&) = delete;
@@ -243,23 +260,6 @@ class KvStoreDb {
243260
void finalizeFullSync(
244261
const std::unordered_set<std::string>& keys, const std::string& senderId);
245262

246-
/*
247-
* [Initial Sync]
248-
*
249-
* util method to process thrift sync response in:
250-
* 1) Success
251-
* 2) Failure
252-
*/
253-
void processThriftSuccess(
254-
std::string const& peerName,
255-
thrift::Publication&& pub,
256-
std::chrono::milliseconds timeDelta);
257-
258-
void processThriftFailure(
259-
std::string const& peerName,
260-
folly::fbstring const& exceptionStr,
261-
std::chrono::milliseconds timeDelta);
262-
263263
/*
264264
* [Version Inconsistency Mitigation]
265265
*/
@@ -609,6 +609,9 @@ class KvStore final : public OpenrEventBase {
609609
semifuture_setKvStoreKeyValues(
610610
std::string area, thrift::KeySetParams keySetParams);
611611

612+
folly::SemiFuture<std::unique_ptr<bool>> semifuture_injectThriftFailure(
613+
std::string area, std::string peerName);
614+
612615
folly::SemiFuture<std::unique_ptr<std::vector<thrift::Publication>>>
613616
semifuture_dumpKvStoreKeys(
614617
thrift::KeyDumpParams keyDumpParams,

openr/kvstore/KvStoreParams.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ struct KvStoreParams {
3838
std::chrono::milliseconds ttlDecr{Constants::kTtlDecrement};
3939
// TTL for self-originated keys
4040
std::chrono::milliseconds keyTtl{0};
41+
std::chrono::milliseconds syncInitialBackoff{
42+
Constants::kKvstoreSyncInitialBackoff};
43+
std::chrono::milliseconds syncMaxBackoff{Constants::kKvstoreSyncMaxBackoff};
4144

4245
// TLS knob
4346
bool enable_secure_thrift_client{false};
@@ -59,6 +62,10 @@ struct KvStoreParams {
5962
*kvStoreConfig.ttl_decrement_ms())), /* TTL decrement factor */
6063
keyTtl(std::chrono::milliseconds(
6164
*kvStoreConfig.key_ttl_ms())), /*TTL for self-originated keys */
65+
syncInitialBackoff(std::chrono::milliseconds(
66+
*kvStoreConfig.sync_initial_backoff_ms())),
67+
syncMaxBackoff(
68+
std::chrono::milliseconds(*kvStoreConfig.sync_max_backoff_ms())),
6269
enable_secure_thrift_client(
6370
*kvStoreConfig.enable_secure_thrift_client()),
6471
x509_cert_path(kvStoreConfig.x509_cert_path().to_optional()),

openr/kvstore/KvStoreWrapper.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,21 @@ KvStoreWrapper<ClientType>::setKeys(
134134
return true;
135135
}
136136

137+
template <class ClientType>
138+
bool
139+
KvStoreWrapper<ClientType>::injectThriftFailure(
140+
AreaId const& area, std::string const& peerName) {
141+
try {
142+
kvStore_->semifuture_injectThriftFailure(area, peerName);
143+
} catch (std::exception const& e) {
144+
XLOG(ERR) << "Exception to thrift failure injection: "
145+
<< folly::exceptionStr(e);
146+
return false;
147+
}
148+
149+
return true;
150+
}
151+
137152
template <class ClientType>
138153
void
139154
KvStoreWrapper<ClientType>::pushToKvStoreUpdatesQueue(

openr/kvstore/KvStoreWrapper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ class KvStoreWrapper {
118118
const std::vector<std::pair<std::string, thrift::Value>>& keyVals,
119119
std::optional<std::vector<std::string>> nodeIds = std::nullopt);
120120

121+
bool injectThriftFailure(AreaId const& area, std::string const& peerName);
122+
121123
void
122124
publishKvStoreSynced() {
123125
kvStoreUpdatesQueue_.push(thrift::InitializationEvent::KVSTORE_SYNCED);

0 commit comments

Comments
 (0)