@@ -79,12 +79,23 @@ type Etcd struct {
79
79
80
80
Server * etcdserver.EtcdServer
81
81
82
- cfg Config
83
- stopc chan struct {}
84
- errc chan error
82
+ cfg Config
85
83
84
+ // closeOnce is to ensure `stopc` is closed only once, no matter
85
+ // how many times the Close() method is called.
86
86
closeOnce sync.Once
87
- wg sync.WaitGroup
87
+ // stopc is used to notify the sub goroutines not to send
88
+ // any errors to `errc`.
89
+ stopc chan struct {}
90
+ // errc is used to receive error from sub goroutines (including
91
+ // client handler, peer handler and metrics handler). It's closed
92
+ // after all these sub goroutines exit (checked via `wg`). Writers
93
+ // should avoid writing after `stopc` is closed by selecting on
94
+ // reading from `stopc`.
95
+ errc chan error
96
+
97
+ // wg is used to track the lifecycle of all sub goroutines created by `StartEtcd`.
98
+ wg sync.WaitGroup
88
99
}
89
100
90
101
type peerListener struct {
@@ -388,6 +399,24 @@ func (e *Etcd) Config() Config {
388
399
// Close gracefully shuts down all servers/listeners.
389
400
// Client requests will be terminated with request timeout.
390
401
// After timeout, enforce remaning requests be closed immediately.
402
+ //
403
+ // The rough workflow to shut down etcd:
404
+ // 1. close the `stopc` channel, so that all error handlers (child
405
+ // goroutines) won't send back any errors anymore;
406
+ // 2. stop the http and grpc servers gracefully, within request timeout;
407
+ // 3. close all client and metrics listeners, so that etcd server
408
+ // stops receiving any new connection;
409
+ // 4. call the cancel function to close the gateway context, so that
410
+ // all gateway connections are closed.
411
+ // 5. stop etcd server gracefully, and ensure the main raft loop
412
+ // goroutine is stopped;
413
+ // 6. stop all peer listeners, so that it stops receiving peer connections
414
+ // and messages (wait up to 1-second);
415
+ // 7. wait for all child goroutines (i.e. client handlers, peer handlers
416
+ // and metrics handlers) to exit;
417
+ // 8. close the `errc` channel to release the resource. Note that it's only
418
+ // safe to close the `errc` after step 7 above is done, otherwise the
419
+ // child goroutines may send errors back to already closed `errc` channel.
391
420
func (e * Etcd ) Close () {
392
421
fields := []zap.Field {
393
422
zap .String ("name" , e .cfg .Name ),
@@ -607,7 +636,9 @@ func (e *Etcd) servePeers() {
607
636
608
637
// start peer servers in a goroutine
609
638
for _ , pl := range e .Peers {
639
+ e .wg .Add (1 )
610
640
go func (l * peerListener ) {
641
+ defer e .wg .Done ()
611
642
u := l .Addr ().String ()
612
643
e .cfg .logger .Info (
613
644
"serving peer traffic" ,
@@ -774,7 +805,9 @@ func (e *Etcd) serveClients() {
774
805
775
806
// start client servers in each goroutine
776
807
for _ , sctx := range e .sctxs {
808
+ e .wg .Add (1 )
777
809
go func (s * serveCtx ) {
810
+ defer e .wg .Done ()
778
811
e .errHandler (s .serve (e .Server , & e .cfg .ClientTLSInfo , mux , e .errHandler , e .grpcGatewayDial (splitHTTP ), splitHTTP , gopts ... ))
779
812
}(sctx )
780
813
}
@@ -859,7 +892,9 @@ func (e *Etcd) serveMetrics() (err error) {
859
892
return err
860
893
}
861
894
e .metricsListeners = append (e .metricsListeners , ml )
895
+ e .wg .Add (1 )
862
896
go func (u url.URL , ln net.Listener ) {
897
+ defer e .wg .Done ()
863
898
e .cfg .logger .Info (
864
899
"serving metrics" ,
865
900
zap .String ("address" , u .String ()),
@@ -872,9 +907,6 @@ func (e *Etcd) serveMetrics() (err error) {
872
907
}
873
908
874
909
func (e * Etcd ) errHandler (err error ) {
875
- e .wg .Add (1 )
876
- defer e .wg .Done ()
877
-
878
910
if err != nil {
879
911
e .GetLogger ().Error ("setting up serving from embedded etcd failed." , zap .Error (err ))
880
912
}
0 commit comments