forked from linkerd/linkerd2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprobe_worker.go
140 lines (122 loc) · 3.46 KB
/
probe_worker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package servicemirror
import (
"fmt"
"net"
"net/http"
"strconv"
"sync"
"time"
"github.com/linkerd/linkerd2/pkg/multicluster"
"github.com/prometheus/client_golang/prometheus"
logging "github.com/sirupsen/logrus"
)
const httpGatewayTimeoutMillis = 50000
// ProbeWorker is responsible for monitoring gateways using a probe specification
type ProbeWorker struct {
localGatewayName string
alive bool
Liveness chan bool
*sync.RWMutex
probeSpec *multicluster.ProbeSpec
stopCh chan struct{}
metrics *ProbeMetrics
log *logging.Entry
}
// NewProbeWorker creates a new probe worker associated with a particular gateway
func NewProbeWorker(localGatewayName string, spec *multicluster.ProbeSpec, metrics *ProbeMetrics, probekey string) *ProbeWorker {
metrics.gatewayEnabled.Set(1)
return &ProbeWorker{
localGatewayName: localGatewayName,
Liveness: make(chan bool, 10),
RWMutex: &sync.RWMutex{},
probeSpec: spec,
stopCh: make(chan struct{}),
metrics: metrics,
log: logging.WithFields(logging.Fields{
"probe-key": probekey,
}),
}
}
// UpdateProbeSpec is used to update the probe specification when something about the gateway changes
func (pw *ProbeWorker) UpdateProbeSpec(spec *multicluster.ProbeSpec) {
pw.Lock()
pw.probeSpec = spec
pw.Unlock()
}
// Stop this probe worker
func (pw *ProbeWorker) Stop() {
pw.metrics.unregister()
pw.log.Infof("Stopping probe worker")
close(pw.stopCh)
}
// Start this probe worker
func (pw *ProbeWorker) Start() {
pw.log.Infof("Starting probe worker")
go pw.run()
}
func (pw *ProbeWorker) run() {
probeTickerPeriod := pw.probeSpec.Period
maxJitter := pw.probeSpec.Period / 10 // max jitter is 10% of period
probeTicker := NewTicker(probeTickerPeriod, maxJitter)
defer probeTicker.Stop()
probeLoop:
for {
select {
case <-pw.stopCh:
break probeLoop
case <-probeTicker.C:
pw.doProbe()
}
}
}
func (pw *ProbeWorker) doProbe() {
pw.RLock()
defer pw.RUnlock()
successLabel := prometheus.Labels{probeSuccessfulLabel: "true"}
notSuccessLabel := prometheus.Labels{probeSuccessfulLabel: "false"}
client := http.Client{
Timeout: httpGatewayTimeoutMillis * time.Millisecond,
}
strPort := strconv.Itoa(int(pw.probeSpec.Port))
urlAddress := net.JoinHostPort(pw.localGatewayName, strPort)
req, err := http.NewRequest("GET", fmt.Sprintf("http://%s%s", urlAddress, pw.probeSpec.Path), nil)
if err != nil {
pw.log.Errorf("Could not create a GET request to gateway: %s", err)
return
}
start := time.Now()
resp, err := client.Do(req)
end := time.Since(start)
if err != nil {
pw.log.Warnf("Problem connecting with gateway. Marking as unhealthy %s", err)
pw.metrics.alive.Set(0)
pw.metrics.probes.With(notSuccessLabel).Inc()
if pw.alive {
pw.alive = false
pw.Liveness <- false
}
return
}
if resp.StatusCode != 200 {
pw.log.Warnf("Gateway returned unexpected status %d. Marking as unhealthy", resp.StatusCode)
pw.metrics.alive.Set(0)
pw.metrics.probes.With(notSuccessLabel).Inc()
if pw.alive {
pw.alive = false
pw.Liveness <- false
}
} else {
pw.log.Debug("Gateway is healthy")
pw.metrics.alive.Set(1)
pw.metrics.latency.Set(float64(end.Milliseconds()))
pw.metrics.latencies.Observe(float64(end.Milliseconds()))
pw.metrics.probes.With(successLabel).Inc()
if !pw.alive {
pw.alive = true
pw.Liveness <- true
}
}
if err := resp.Body.Close(); err != nil {
pw.log.Warnf("Failed to close response body %s", err)
}
}