Skip to content

Commit da3c43d

Browse files
authored
Additional log information for cluster accept handler and message processing (#2815)
Enhance debugging for cluster logs [1] Add human node names in cluster tests so that we can easily understand which nodes we are interacting with: ``` pong packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: :0 node 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) announces that it is a primary in shard c6d1152caee49a5e70cb4b77d1549386078be603 Reconfiguring node 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) as primary for shard c6d1152caee49a5e70cb4b77d1549386078be603 Configuration change detected. Reconfiguring myself as a replica of node 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) in shard c6d1152caee49a5e70cb4b77d1549386078be603 ``` [2] Currently there are logs showing the address of incoming connections: ``` Accepting cluster node connection from 127.0.0.1:59956 Accepting cluster node connection from 127.0.0.1:59957 Accepting cluster node connection from 127.0.0.1:59958 Accepting cluster node connection from 127.0.0.1:59959 ``` but we have no idea which nodes these connections refer to. I added a logging statement when the node is set to the inbound link connection. ``` Bound cluster node 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) to connection of client 127.0.0.1:59956 ``` [3] Add a debug log when processing a packet to show the packet type, sender node name, and sender client port (this also has the benefit of telling us whether this is an inbound or outbound link). ``` pong packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: :0 ping packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: 127.0.0.1:59973 fail packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: 127.0.0.1:59973 auth-req packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: 127.0.0.1:59973 ``` --------- Signed-off-by: Zhijun <dszhijun@gmail.com>
1 parent e5de417 commit da3c43d

File tree

3 files changed

+32
-8
lines changed

3 files changed

+32
-8
lines changed

src/cluster_legacy.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1708,6 +1708,17 @@ void setClusterNodeToInboundClusterLink(clusterNode *node, clusterLink *link) {
17081708
serverAssert(!node->inbound_link);
17091709
node->inbound_link = link;
17101710
link->node = node;
1711+
if (server.verbosity <= LL_VERBOSE) {
1712+
char ip[NET_IP_STR_LEN];
1713+
int port;
1714+
if (connAddrPeerName(link->conn, ip, sizeof(ip), &port) != -1) {
1715+
serverLog(LL_VERBOSE, "Bound cluster node %.40s (%s) to connection of client %s:%d",
1716+
node->name, node->human_nodename, ip, port);
1717+
} else {
1718+
serverLog(LL_VERBOSE, "Error resolving the inbound connection address of node %.40s (%s)",
1719+
node->name, node->human_nodename);
1720+
}
1721+
}
17111722
}
17121723

17131724
static void clusterConnAcceptHandler(connection *conn) {
@@ -3833,11 +3844,21 @@ int clusterProcessPacket(clusterLink *link) {
38333844
clusterSendPing(link, CLUSTERMSG_TYPE_PONG);
38343845
}
38353846

3847+
if (server.verbosity <= LL_DEBUG) {
3848+
char ip[NET_IP_STR_LEN];
3849+
int port;
3850+
if (connAddrPeerName(link->conn, ip, sizeof(ip), &port) != -1) {
3851+
serverLog(LL_DEBUG, "%s packet received from: %.40s (%s) from client: %s:%d",
3852+
clusterGetMessageTypeString(type),
3853+
clusterLinkGetNodeName(link), clusterLinkGetHumanNodeName(link),
3854+
ip, port);
3855+
} else {
3856+
serverLog(LL_DEBUG, "Error resolving the address of packet sender %.40s (%s)",
3857+
clusterLinkGetNodeName(link), clusterLinkGetHumanNodeName(link));
3858+
}
3859+
}
38363860
/* PING, PONG, MEET: process config information. */
38373861
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG || type == CLUSTERMSG_TYPE_MEET) {
3838-
serverLog(LL_DEBUG, "%s packet received: %.40s", clusterGetMessageTypeString(type),
3839-
clusterLinkGetNodeName(link));
3840-
38413862
if (sender && nodeInMeetState(sender)) {
38423863
/* Once we get a response for MEET from the sender, we can stop sending more MEET. */
38433864
sender->flags &= ~CLUSTER_NODE_MEET;

tests/support/cluster_util.tcl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,9 @@ proc cluster_setup {masters replicas node_count slot_allocator replica_allocator
214214
for {set i 0} {$i < $node_count} {incr i} {
215215
R $i CLUSTER SET-CONFIG-EPOCH $config_epoch
216216
incr config_epoch
217+
# Make it easier to understand how the server interacts with
218+
# other nodes when reading the server logs.
219+
R $i CONFIG SET cluster-announce-human-nodename "R$i"
217220
}
218221

219222
# Have all nodes meet

tests/unit/cluster/manual-failover.tcl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -478,21 +478,21 @@ start_cluster 3 1 {tags {external:skip cluster}} {
478478
# verify we print the logs.
479479

480480
# Both importing slots and migrating slots are move to R3.
481-
set pattern "*Failover occurred in migration source. Update importing source for slot 0 to node $R3_nodeid () in shard $R3_shardid*"
481+
set pattern "*Failover occurred in migration source. Update importing source for slot 0 to node $R3_nodeid * in shard $R3_shardid*"
482482
verify_log_message -1 $pattern $loglines1
483-
set pattern "*Failover occurred in migration target. Slot 5462 is now being migrated to node $R3_nodeid () in shard $R3_shardid*"
483+
set pattern "*Failover occurred in migration target. Slot 5462 is now being migrated to node $R3_nodeid * in shard $R3_shardid*"
484484
verify_log_message -1 $pattern $loglines1
485485

486486
# Both slots are move to R3.
487487
set R0_slots 5462
488-
set pattern "*A failover occurred in shard $R3_shardid; node $R0_nodeid () lost $R0_slots slot(s) and failed over to node $R3_nodeid*"
488+
set pattern "*A failover occurred in shard $R3_shardid; node $R0_nodeid * lost $R0_slots slot(s) and failed over to node $R3_nodeid*"
489489
verify_log_message -1 $pattern $loglines1
490490
verify_log_message -2 $pattern $loglines2
491491

492492
# Both importing slots and migrating slots are move to R3.
493-
set pattern "*A failover occurred in migration source. Update importing source of 1 slot(s) to node $R3_nodeid () in shard $R3_shardid*"
493+
set pattern "*A failover occurred in migration source. Update importing source of 1 slot(s) to node $R3_nodeid * in shard $R3_shardid*"
494494
verify_log_message -1 $pattern $loglines1
495-
set pattern "*A failover occurred in migration target. Update migrating target of 1 slot(s) to node $R3_nodeid () in shard $R3_shardid*"
495+
set pattern "*A failover occurred in migration target. Update migrating target of 1 slot(s) to node $R3_nodeid * in shard $R3_shardid*"
496496
verify_log_message -1 $pattern $loglines1
497497

498498
R 1 debug disable-cluster-reconnection 0

0 commit comments

Comments
 (0)