Skip to content

Commit 3840eee

Browse files
committed
chore: try to workaround master label with ipv6 issue
1 parent 4012589 commit 3840eee

File tree

3 files changed

+63
-25
lines changed

3 files changed

+63
-25
lines changed

internal/controller/dragonfly_instance.go

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -168,15 +168,25 @@ func (dfi *DragonflyInstance) masterExists(ctx context.Context) (bool, error) {
168168
}
169169

170170
func (dfi *DragonflyInstance) getMasterIp(ctx context.Context) (string, error) {
171-
dfi.log.Info("retrieving ip of the master")
171+
dfi.log.Info("retrieving IP of the master")
172172
pods, err := dfi.getPods(ctx)
173173
if err != nil {
174174
return "", err
175175
}
176176

177177
for _, pod := range pods.Items {
178-
if pod.Status.Phase == corev1.PodRunning && pod.Status.ContainerStatuses[0].Ready && pod.Labels[resources.Role] == resources.Master {
179-
return pod.Status.PodIP, nil
178+
if pod.Status.Phase == corev1.PodRunning &&
179+
pod.Status.ContainerStatuses[0].Ready &&
180+
pod.Labels[resources.Role] == resources.Master {
181+
182+
masterIp, hasMasterIp := pod.Annotations[resources.MasterIp]
183+
if hasMasterIp {
184+
dfi.log.Info("Retrieved Master IP from annotation", "masterIp", masterIp)
185+
return masterIp, nil
186+
}
187+
188+
masterIp = pod.Status.PodIP
189+
return masterIp, nil
180190
}
181191
}
182192

@@ -207,7 +217,7 @@ func (dfi *DragonflyInstance) configureReplica(ctx context.Context, pod *corev1.
207217
// connected to the right master
208218
func (dfi *DragonflyInstance) checkReplicaRole(ctx context.Context, pod *corev1.Pod, masterIp string) (bool, error) {
209219
redisClient := redis.NewClient(&redis.Options{
210-
Addr: fmt.Sprintf("%s:%d", pod.Status.PodIP, resources.DragonflyAdminPort),
220+
Addr: net.JoinHostPort(pod.Status.PodIP, strconv.Itoa(resources.DragonflyAdminPort)),
211221
})
212222
defer redisClient.Close()
213223

@@ -262,7 +272,8 @@ func (dfi *DragonflyInstance) checkAndConfigureReplication(ctx context.Context)
262272
// check for one master and all replicas
263273
podRoles := make(map[string][]string)
264274
for _, pod := range pods.Items {
265-
podRoles[pod.Labels[resources.Role]] = append(podRoles[pod.Labels[resources.Role]], pod.Name)
275+
role := pod.Labels[resources.Role]
276+
podRoles[role] = append(podRoles[role], pod.Name)
266277
}
267278

268279
if len(podRoles[resources.Master]) != 1 {
@@ -279,6 +290,7 @@ func (dfi *DragonflyInstance) checkAndConfigureReplication(ctx context.Context)
279290
for _, pod := range pods.Items {
280291
if pod.Labels[resources.Role] == "" {
281292
if pod.Status.Phase == corev1.PodRunning && pod.Status.ContainerStatuses[0].Ready && pod.Status.PodIP != "" {
293+
dfi.log.Info("Marking pod as replica", "pod", pod.Name)
282294
if err := dfi.configureReplica(ctx, &pod); err != nil {
283295
return err
284296
}
@@ -299,17 +311,17 @@ func (dfi *DragonflyInstance) checkAndConfigureReplication(ctx context.Context)
299311
return err
300312
}
301313

302-
// configuring to the right master
314+
// Configure to the right master if not correct
303315
if !ok {
304-
dfi.log.Info("configuring pod as replica to the right master", "pod", pod.Name)
316+
dfi.log.Info("Configuring pod as replica to the correct master", "pod", pod.Name)
305317
if err := dfi.configureReplica(ctx, &pod); err != nil {
306318
return err
307319
}
308320
}
309321
}
310322
}
311323

312-
dfi.log.Info("all pods are configured correctly", "dfi", dfi.df.Name)
324+
dfi.log.Info("All pods are configured correctly", "dfi", dfi.df.Name)
313325
return nil
314326
}
315327

@@ -335,8 +347,11 @@ func (dfi *DragonflyInstance) replicaOf(ctx context.Context, pod *corev1.Pod, ma
335347
})
336348
defer redisClient.Close()
337349

350+
// Sanitize masterIp in case ipv6
351+
masterIp = strings.Trim(masterIp, "[]")
352+
338353
dfi.log.Info("Trying to invoke SLAVE OF command", "pod", pod.Name, "master", masterIp, "addr", redisClient.Options().Addr)
339-
resp, err := redisClient.SlaveOf(ctx, masterIp, fmt.Sprint(resources.DragonflyAdminPort)).Result()
354+
resp, err := redisClient.SlaveOf(ctx, masterIp, strconv.Itoa(resources.DragonflyAdminPort)).Result()
340355
if err != nil {
341356
return fmt.Errorf("error running SLAVE OF command: %s", err)
342357
}
@@ -345,11 +360,14 @@ func (dfi *DragonflyInstance) replicaOf(ctx context.Context, pod *corev1.Pod, ma
345360
return fmt.Errorf("response of `SLAVE OF` on replica is not OK: %s", resp)
346361
}
347362

348-
dfi.log.Info("Marking pod role as replica", "pod", pod.Name)
363+
dfi.log.Info("Marking pod role as replica", "pod", pod.Name, "masterIp", masterIp)
349364
pod.Labels[resources.Role] = resources.Replica
350-
pod.Labels[resources.MasterIp] = masterIp
365+
if pod.Annotations == nil {
366+
pod.Annotations = make(map[string]string)
367+
}
368+
pod.Annotations[resources.MasterIp] = masterIp
351369
if err := dfi.client.Update(ctx, pod); err != nil {
352-
return fmt.Errorf("could not update replica label")
370+
return fmt.Errorf("could not update replica annotation: %w", err)
353371
}
354372

355373
return nil
@@ -373,8 +391,14 @@ func (dfi *DragonflyInstance) replicaOfNoOne(ctx context.Context, pod *corev1.Po
373391
return fmt.Errorf("response of `SLAVE OF NO ONE` on master is not OK: %s", resp)
374392
}
375393

376-
dfi.log.Info("Marking pod role as master", "pod", pod.Name)
394+
masterIp := pod.Status.PodIP
395+
396+
dfi.log.Info("Marking pod role as master", "pod", pod.Name, "masterIp", masterIp)
377397
pod.Labels[resources.Role] = resources.Master
398+
if pod.Annotations == nil {
399+
pod.Annotations = make(map[string]string)
400+
}
401+
pod.Annotations[resources.MasterIp] = masterIp
378402
if err := dfi.client.Update(ctx, pod); err != nil {
379403
return err
380404
}

internal/controller/util.go

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23+
"net"
24+
"strconv"
2325
"strings"
2426

2527
"github.com/dragonflydb/dragonfly-operator/internal/resources"
@@ -88,8 +90,10 @@ func getLatestReplica(ctx context.Context, c client.Client, statefulSet *appsv1.
8890

8991
// replTakeover runs the replTakeOver on the given replica pod
9092
func replTakeover(ctx context.Context, c client.Client, newMaster *corev1.Pod) error {
93+
addr := net.JoinHostPort(newMaster.Status.PodIP, strconv.Itoa(resources.DragonflyPort))
94+
9195
redisClient := redis.NewClient(&redis.Options{
92-
Addr: fmt.Sprintf("%s:%d", newMaster.Status.PodIP, resources.DragonflyAdminPort),
96+
Addr: addr,
9397
})
9498
defer redisClient.Close()
9599

@@ -111,13 +115,15 @@ func replTakeover(ctx context.Context, c client.Client, newMaster *corev1.Pod) e
111115
}
112116

113117
func isStableState(ctx context.Context, pod *corev1.Pod) (bool, error) {
114-
// wait until pod IP is ready
118+
// Ensure PodIP and Pod Phase are ready
115119
if pod.Status.PodIP == "" || pod.Status.Phase != corev1.PodRunning {
116120
return false, nil
117121
}
118122

123+
addr := net.JoinHostPort(pod.Status.PodIP, strconv.Itoa(resources.DragonflyAdminPort))
124+
119125
redisClient := redis.NewClient(&redis.Options{
120-
Addr: fmt.Sprintf("%s:%d", pod.Status.PodIP, resources.DragonflyAdminPort),
126+
Addr: addr,
121127
})
122128
defer redisClient.Close()
123129

@@ -135,14 +141,7 @@ func isStableState(ctx context.Context, pod *corev1.Pod) (bool, error) {
135141
return false, errors.New("empty info")
136142
}
137143

138-
data := map[string]string{}
139-
for _, line := range strings.Split(info, "\n") {
140-
if line == "" || strings.HasPrefix(line, "#") {
141-
continue
142-
}
143-
kv := strings.Split(line, ":")
144-
data[kv[0]] = strings.TrimSuffix(kv[1], "\r")
145-
}
144+
data := parseRedisInfo(info)
146145

147146
if data["master_sync_in_progress"] == "1" {
148147
return false, nil
@@ -158,3 +157,18 @@ func isStableState(ctx context.Context, pod *corev1.Pod) (bool, error) {
158157

159158
return true, nil
160159
}
160+
161+
// Helper function to parse Redis INFO data
162+
func parseRedisInfo(info string) map[string]string {
163+
data := map[string]string{}
164+
for _, line := range strings.Split(info, "\n") {
165+
if line == "" || strings.HasPrefix(line, "#") {
166+
continue
167+
}
168+
kv := strings.Split(line, ":")
169+
if len(kv) == 2 {
170+
data[kv[0]] = strings.TrimSuffix(kv[1], "\r")
171+
}
172+
}
173+
return data
174+
}

internal/resources/const.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ const (
5656
// KubernetesPartOfLabel is the name of a higher level application this one is part of
5757
KubernetesPartOfLabelKey = "app.kubernetes.io/part-of"
5858

59-
MasterIp string = "master-ip"
59+
MasterIp string = "operator.dragonflydb.io/masterIP"
6060

6161
Role string = "role"
6262

0 commit comments

Comments
 (0)