Skip to content

Commit 984cde6

Browse files
Added tests
Signed-off-by: Smit Patel <patelsmit32123@gmail.com>
1 parent 7463a9d commit 984cde6

File tree

4 files changed

+162
-6
lines changed

4 files changed

+162
-6
lines changed

server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ private void handleDisconnectedNode(DiscoveryNode discoveryNode) {
308308
FollowerChecker followerChecker = followerCheckers.get(discoveryNode);
309309
if (followerChecker != null) {
310310
logger.info(() -> new ParameterizedMessage("{} disconnected", followerChecker));
311+
clusterManagerMetrics.incrementCounter(clusterManagerMetrics.followerCheckAttemptFailureCounter, 1.0);
311312
followerChecker.failNode("disconnected");
312313
}
313314
}

server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ public String toString() {
378378
void handleDisconnectedNode(DiscoveryNode discoveryNode) {
379379
if (discoveryNode.equals(leader)) {
380380
logger.debug("leader [{}] disconnected", leader);
381+
clusterManagerMetrics.incrementCounter(clusterManagerMetrics.leaderCheckAttemptFailureCounter, 1.0);
381382
leaderFailed(new NodeDisconnectedException(discoveryNode, "disconnected"));
382383
}
383384
}

server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java

Lines changed: 92 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
import static org.opensearch.monitor.StatusInfo.Status.UNHEALTHY;
8989
import static org.opensearch.node.Node.NODE_NAME_SETTING;
9090
import static org.opensearch.transport.TransportService.HANDSHAKE_ACTION_NAME;
91+
import static org.opensearch.transport.TransportService.NOOP_TRANSPORT_INTERCEPTOR;
9192
import static org.hamcrest.Matchers.contains;
9293
import static org.hamcrest.Matchers.containsInAnyOrder;
9394
import static org.hamcrest.Matchers.empty;
@@ -126,7 +127,7 @@ protected void onSendRequest(long requestId, String action, TransportRequest req
126127
final TransportService transportService = mockTransport.createTransportService(
127128
settings,
128129
deterministicTaskQueue.getThreadPool(),
129-
TransportService.NOOP_TRANSPORT_INTERCEPTOR,
130+
NOOP_TRANSPORT_INTERCEPTOR,
130131
boundTransportAddress -> localNode,
131132
null,
132133
emptySet(),
@@ -273,6 +274,10 @@ public void testFailsNodeThatIsDisconnected() {
273274
metricsRegistry
274275
);
275276
assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
277+
assertEquals(
278+
Integer.valueOf(2),
279+
metricsRegistry.getCounterStore().get("followers.checker.attempt.failure.count").getCounterValue()
280+
);
276281
}
277282

278283
public void testFailsNodeThatDisconnects() {
@@ -307,7 +312,7 @@ public String toString() {
307312
final TransportService transportService = mockTransport.createTransportService(
308313
settings,
309314
deterministicTaskQueue.getThreadPool(),
310-
TransportService.NOOP_TRANSPORT_INTERCEPTOR,
315+
NOOP_TRANSPORT_INTERCEPTOR,
311316
boundTransportAddress -> localNode,
312317
null,
313318
emptySet(),
@@ -341,6 +346,10 @@ public String toString() {
341346
assertTrue(nodeFailed.get());
342347
assertThat(followersChecker.getFaultyNodes(), contains(otherNode));
343348
assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
349+
assertEquals(
350+
Integer.valueOf(1),
351+
metricsRegistry.getCounterStore().get("followers.checker.attempt.failure.count").getCounterValue()
352+
);
344353
}
345354

346355
public void testFailsNodeThatIsUnhealthy() {
@@ -403,7 +412,7 @@ public String toString() {
403412
final TransportService transportService = mockTransport.createTransportService(
404413
settings,
405414
deterministicTaskQueue.getThreadPool(),
406-
TransportService.NOOP_TRANSPORT_INTERCEPTOR,
415+
NOOP_TRANSPORT_INTERCEPTOR,
407416
boundTransportAddress -> localNode,
408417
null,
409418
emptySet(),
@@ -510,7 +519,7 @@ protected void onSendRequest(long requestId, String action, TransportRequest req
510519
final TransportService transportService = mockTransport.createTransportService(
511520
settings,
512521
deterministicTaskQueue.getThreadPool(),
513-
TransportService.NOOP_TRANSPORT_INTERCEPTOR,
522+
NOOP_TRANSPORT_INTERCEPTOR,
514523
boundTransportAddress -> follower,
515524
null,
516525
emptySet(),
@@ -587,7 +596,7 @@ protected void onSendRequest(long requestId, String action, TransportRequest req
587596
final TransportService transportService = mockTransport.createTransportService(
588597
settings,
589598
deterministicTaskQueue.getThreadPool(),
590-
TransportService.NOOP_TRANSPORT_INTERCEPTOR,
599+
NOOP_TRANSPORT_INTERCEPTOR,
591600
boundTransportAddress -> follower,
592601
null,
593602
emptySet(),
@@ -748,7 +757,7 @@ public void testPreferClusterManagerNodes() {
748757
TransportService transportService = capturingTransport.createTransportService(
749758
Settings.EMPTY,
750759
deterministicTaskQueue.getThreadPool(),
751-
TransportService.NOOP_TRANSPORT_INTERCEPTOR,
760+
NOOP_TRANSPORT_INTERCEPTOR,
752761
x -> nodes.get(0),
753762
null,
754763
emptySet(),
@@ -770,6 +779,83 @@ public void testPreferClusterManagerNodes() {
770779
assertEquals(sortedFollowerTargets, followerTargets);
771780
}
772781

782+
public void testFollowerCheckerAttemptFailureCountMetric() {
783+
final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
784+
final DiscoveryNode follower = new DiscoveryNode("follower", buildNewFakeTransportAddress(), Version.CURRENT);
785+
786+
final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
787+
final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
788+
final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
789+
AtomicBoolean followerCheckerAttemptFailedOnce = new AtomicBoolean();
790+
final MockTransport mockTransport = new MockTransport() {
791+
@Override
792+
protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) {
793+
if (action.equals(HANDSHAKE_ACTION_NAME)) {
794+
handleResponse(requestId, new TransportService.HandshakeResponse(node, ClusterName.DEFAULT, Version.CURRENT));
795+
return;
796+
}
797+
assertThat(action, equalTo(FOLLOWER_CHECK_ACTION_NAME));
798+
assertEquals(node, follower);
799+
800+
deterministicTaskQueue.scheduleNow(new Runnable() {
801+
@Override
802+
public void run() {
803+
if (followerCheckerAttemptFailedOnce.compareAndSet(false, true)) {
804+
handleRemoteError(requestId, new OpenSearchException("simulated error"));
805+
} else {
806+
handleResponse(requestId, Empty.INSTANCE);
807+
}
808+
}
809+
810+
@Override
811+
public String toString() {
812+
return "response to request " + requestId;
813+
}
814+
});
815+
}
816+
};
817+
818+
final TransportService transportService = mockTransport.createTransportService(
819+
settings,
820+
deterministicTaskQueue.getThreadPool(),
821+
NOOP_TRANSPORT_INTERCEPTOR,
822+
boundTransportAddress -> localNode,
823+
null,
824+
emptySet(),
825+
NoopTracer.INSTANCE
826+
);
827+
transportService.start();
828+
transportService.acceptIncomingRequests();
829+
830+
final AtomicBoolean followerFailed = new AtomicBoolean();
831+
TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
832+
final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry);
833+
final FollowersChecker followersChecker = new FollowersChecker(
834+
settings,
835+
clusterSettings,
836+
transportService,
837+
fcr -> { assert false : fcr; },
838+
(node, reason) -> {
839+
assertThat(reason, equalTo("disconnected"));
840+
assertTrue(followerFailed.compareAndSet(false, true));
841+
},
842+
() -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"),
843+
clusterManagerMetrics
844+
);
845+
846+
followersChecker.setCurrentNodes(DiscoveryNodes.builder().add(localNode).add(follower).localNodeId(localNode.getId()).build());
847+
848+
deterministicTaskQueue.advanceTime();
849+
deterministicTaskQueue.runAllRunnableTasks();
850+
851+
assertFalse(followerFailed.get());
852+
assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue());
853+
assertEquals(
854+
Integer.valueOf(1),
855+
metricsRegistry.getCounterStore().get("followers.checker.attempt.failure.count").getCounterValue()
856+
);
857+
}
858+
773859
private static List<DiscoveryNode> randomNodes(final int numNodes) {
774860
List<DiscoveryNode> nodesList = new ArrayList<>();
775861
for (int i = 0; i < numNodes; i++) {

server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ public String toString() {
360360
assertTrue(leaderFailed.get());
361361
}
362362
assertEquals(Integer.valueOf(3), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue());
363+
assertEquals(Integer.valueOf(3), metricsRegistry.getCounterStore().get("leader.checker.attempt.failure.count").getCounterValue());
363364
}
364365

365366
public void testFollowerFailsImmediatelyOnHealthCheckFailure() {
@@ -443,6 +444,7 @@ public String toString() {
443444
}
444445

445446
assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue());
447+
assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.attempt.failure.count").getCounterValue());
446448
}
447449

448450
public void testLeaderBehaviour() {
@@ -540,6 +542,72 @@ public void testLeaderBehaviour() {
540542
);
541543
}
542544
assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue());
545+
assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("leader.checker.attempt.failure.count").getCounterValue());
546+
}
547+
548+
public void testLeaderCheckerAttemptFailureCountMetric() {
549+
final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT);
550+
final DiscoveryNode leader = new DiscoveryNode("leader", buildNewFakeTransportAddress(), Version.CURRENT);
551+
552+
final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), localNode.getId()).build();
553+
final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
554+
final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue(settings, random());
555+
AtomicBoolean leaderCheckAttemptFailOnce = new AtomicBoolean();
556+
final MockTransport mockTransport = new MockTransport() {
557+
@Override
558+
protected void onSendRequest(long requestId, String action, TransportRequest request, DiscoveryNode node) {
559+
if (action.equals(HANDSHAKE_ACTION_NAME)) {
560+
handleResponse(requestId, new TransportService.HandshakeResponse(node, ClusterName.DEFAULT, Version.CURRENT));
561+
return;
562+
}
563+
assertThat(action, equalTo(LEADER_CHECK_ACTION_NAME));
564+
assertEquals(node, leader);
565+
566+
deterministicTaskQueue.scheduleNow(new Runnable() {
567+
@Override
568+
public void run() {
569+
if (leaderCheckAttemptFailOnce.compareAndSet(false, true)) {
570+
handleRemoteError(requestId, new OpenSearchException("simulated error"));
571+
} else {
572+
handleResponse(requestId, Empty.INSTANCE);
573+
}
574+
}
575+
576+
@Override
577+
public String toString() {
578+
return "response to request " + requestId;
579+
}
580+
});
581+
}
582+
};
583+
584+
final TransportService transportService = mockTransport.createTransportService(
585+
settings,
586+
deterministicTaskQueue.getThreadPool(),
587+
NOOP_TRANSPORT_INTERCEPTOR,
588+
boundTransportAddress -> localNode,
589+
null,
590+
emptySet(),
591+
NoopTracer.INSTANCE
592+
);
593+
transportService.start();
594+
transportService.acceptIncomingRequests();
595+
596+
final AtomicBoolean leaderFailed = new AtomicBoolean();
597+
TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry();
598+
final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry);
599+
final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> {
600+
assertTrue(leaderFailed.compareAndSet(false, true));
601+
}, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics);
602+
603+
leaderChecker.updateLeader(leader);
604+
605+
deterministicTaskQueue.advanceTime();
606+
deterministicTaskQueue.runAllRunnableTasks();
607+
608+
assertFalse(leaderFailed.get());
609+
assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue());
610+
assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.attempt.failure.count").getCounterValue());
543611
}
544612

545613
private class CapturingTransportResponseHandler implements TransportResponseHandler<Empty> {

0 commit comments

Comments
 (0)