Skip to content

Commit bb9c7ab

Browse files
burdandreivladshub
authored andcommitted
0.4 (#13)
* handling flapping of the master on start * Fixes for not so slight refactor #8 #8
1 parent 870b9a9 commit bb9c7ab

File tree

4 files changed

+41
-24
lines changed

4 files changed

+41
-24
lines changed

CHANGELOG.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
1+
## 0.4.0 (April 26, 2018)
2+
3+
FEATURES:
4+
5+
* Service check now registered as initially passing to avoid service flapping
6+
7+
FIXES:
8+
9+
* Handling slave session invalidation
10+
* Prevent from shutting down if local consul agent is not healthy
11+
112
## 0.3.0 (April 23, 2018)
213

314
FEATURES:
415

516
* Consul lock session name can be adjusted
6-
* Consul lock retry oprion added
17+
* Consul lock retry option added
718
* Dependencies updated (Consul to 1.0.7, go-redis to 6.10.2)
819

920
## 0.2.0 (March 13, 2018)

consul.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,10 @@ func (rc *resec) acquireConsulLeadership() {
5151
return
5252
}
5353

54-
rc.consul.lockIsWaiting = false
5554
rc.consul.lockErrorCh, err = rc.consul.lock.Lock(rc.consul.lockAbortCh)
5655
if err != nil {
56+
rc.consul.lockIsWaiting = false
57+
rc.consul.lockIsHeld = false
5758
rc.consul.lockStatusCh <- &consulLockStatus{
5859
acquired: false,
5960
err: err,
@@ -90,7 +91,7 @@ func (rc *resec) handleWaitForLockError() {
9091
break
9192
}
9293

93-
log.Printf("[DEBUG] Lock Error chanel is closed")
94+
log.Printf("[DEBUG] Lock Error channel is closed")
9495

9596
err := fmt.Errorf("Consul lock lost or error")
9697
log.Printf("[DEBUG] %s", err)
@@ -178,7 +179,7 @@ func (rc *resec) registerService() error {
178179
ServiceID: rc.consul.serviceID,
179180
AgentServiceCheck: consulapi.AgentServiceCheck{
180181
TTL: rc.consul.ttl,
181-
Status: "critical",
182+
Status: "passing",
182183
DeregisterCriticalServiceAfter: rc.consul.deregisterServiceAfter.String(),
183184
},
184185
}
@@ -220,7 +221,7 @@ func (rc *resec) watchConsulMasterService() error {
220221
wp.Handler = func(idx uint64, data interface{}) {
221222
switch masterConsulServiceStatus := data.(type) {
222223
case []*consulapi.ServiceEntry:
223-
log.Printf("[INFO] Received update for master from consul")
224+
log.Printf("[DEBUG] Received update for master from consul")
224225
rc.consulMasterServiceCh <- masterConsulServiceStatus
225226

226227
default:

redis.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,17 @@ func (rc *resec) runAsSlave(masterAddress string, masterPort int) error {
1616
}
1717

1818
log.Printf("[INFO] Enslaved redis %s to be slave of %s:%d", rc.redis.address, masterAddress, masterPort)
19+
20+
// change our internal state to being a slave
21+
rc.redis.replicationStatus = "slave"
22+
if err := rc.registerService(); err != nil {
23+
return fmt.Errorf("[ERROR] Consul Service registration failed - %s", err)
24+
}
25+
26+
// if we are enslaved and our status is published in consul, lets go back to trying
27+
// to acquire leadership / master role as well
28+
go rc.acquireConsulLeadership()
29+
1930
return nil
2031
}
2132

resec.go

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@ func (rc *resec) start() {
2828
return
2929
}
3030

31-
log.Printf("[DEBUG] Got redis replication status update:\n %s", update.output)
31+
log.Printf("[DEBUG] Got redis replication info update")
3232

3333
if rc.consul.healthy {
3434
// if we don't have any check id, we haven't registered our service yet
3535
// let's do that first
3636
if rc.redis.replicationStatus != "" {
3737
if rc.consul.checkID == "" {
38+
log.Printf("[DEBUG] Consul Check ID is not generated")
3839
rc.registerService()
3940
}
4041

@@ -51,6 +52,8 @@ func (rc *resec) start() {
5152
rc.handleConsulError(err)
5253
log.Printf("[ERROR] Failed to update consul Check TTL - %s", err)
5354
}
55+
} else {
56+
log.Printf("[DEBUG] Redis replication status is not defined")
5457
}
5558
} else {
5659
log.Printf("[INFO] Consul is not healthy, skipping service check update")
@@ -66,21 +69,19 @@ func (rc *resec) start() {
6669
// our state is now unhealthy, release the consul lock so someone else can
6770
// acquire the consul leadership and become redis master
6871
if !update.healthy {
69-
log.Printf("[INFO] Redis replication status changed to NOT healthy")
72+
log.Printf("[INFO] Redis status changed to NOT healthy")
7073
rc.releaseConsulLock()
7174
continue
7275
}
7376

74-
log.Printf("[INFO] Redis replication status changed to healthy")
75-
if rc.redis.replicationStatus == "slave" {
77+
log.Printf("[INFO] Redis status changed to healthy")
78+
if rc.redis.replicationStatus != "master" {
7679
if err := rc.runAsSlave(rc.lastKnownMasterInfo.address, rc.lastKnownMasterInfo.port); err != nil {
7780
log.Println(err)
7881
continue
7982
}
8083
}
8184

82-
go rc.acquireConsulLeadership()
83-
8485
case update, ok := <-rc.consulMasterServiceCh:
8586
if !ok {
8687
log.Printf("[ERROR] Consul master service channel was closed, shutting down")
@@ -99,7 +100,7 @@ func (rc *resec) start() {
99100
go rc.acquireConsulLeadership()
100101
continue
101102
}
102-
log.Printf("[DEBUG] Redis is not healthy, nothing to do here")
103+
log.Printf("[DEBUG] No Master found in consul, but redis is not healthy, nothing to do here")
103104

104105
// multiple masters is not good
105106
case masterCount > 1:
@@ -126,21 +127,13 @@ func (rc *resec) start() {
126127

127128
// todo(jippi): if we can't enslave our redis, we shouldn't try to do any further work
128129
// especially not updating our consul catalog entry
129-
if err := rc.runAsSlave(rc.lastKnownMasterInfo.address, rc.lastKnownMasterInfo.port); err != nil {
130-
log.Println(err)
130+
if !rc.redis.healthy {
131131
continue
132132
}
133-
134-
// change our internal state to being a slave
135-
rc.redis.replicationStatus = "slave"
136-
if err := rc.registerService(); err != nil {
137-
log.Printf("[ERROR] Consul Service registration failed - %s", err)
133+
if err := rc.runAsSlave(rc.lastKnownMasterInfo.address, rc.lastKnownMasterInfo.port); err != nil {
134+
log.Println(err)
138135
continue
139136
}
140-
141-
// if we are enslaved and our status is published in consul, lets go back to trying
142-
// to acquire leadership / master role as well
143-
go rc.acquireConsulLeadership()
144137
}
145138

146139
// if our consul lock status has changed
@@ -173,8 +166,9 @@ func (rc *resec) start() {
173166
log.Printf("[ERROR] %s", update.err)
174167
rc.handleConsulError(update.err)
175168

169+
176170
if !rc.consul.healthy {
177-
return
171+
continue
178172
}
179173

180174
if rc.redis.replicationStatus == "master" {

0 commit comments

Comments
 (0)