Skip to content

Commit c13f941

Browse files
shlomi-noachrvrangel
authored andcommitted
Tablet throttler: read and use MySQL host metrics (vitessio#16904)
Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Signed-off-by: Renan Rangel <rrangel@slack-corp.com>
1 parent c7f241e commit c13f941

25 files changed

+735
-118
lines changed

go/osutil/loadavg.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
Copyright 2024 The Vitess Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package osutil
18+
19+
import (
20+
"fmt"
21+
"strconv"
22+
"strings"
23+
)
24+
25+
// parseLoadAvg parses the load average from the content of /proc/loadavg or sysctl output.
26+
// Input such as "1.00 0.99 0.98 1/1 1", "2.83 3.01 3.36"
27+
func parseLoadAvg(content string) (float64, error) {
28+
fields := strings.Fields(content)
29+
if len(fields) == 0 {
30+
return 0, fmt.Errorf("unexpected loadavg content: %s", content)
31+
}
32+
return strconv.ParseFloat(fields[0], 64)
33+
}

go/osutil/loadavg_darwin.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//go:build darwin
2+
3+
/*
4+
Copyright 2024 The Vitess Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package osutil
20+
21+
import (
22+
"fmt"
23+
"os/exec"
24+
)
25+
26+
// LoadAvg returns the past 1 minute system load average. This works on linux and darwin systems.
27+
// On other systems, it returns 0 with no error.
28+
func LoadAvg() (float64, error) {
29+
cmd := exec.Command("sysctl", "-n", "vm.loadavg")
30+
// Sample output: `{ 2.83 3.01 3.36 }`
31+
output, err := cmd.CombinedOutput()
32+
if err != nil {
33+
return 0, err
34+
}
35+
if len(output) < 1 {
36+
return 0, fmt.Errorf("unexpected sysctl output: %q", output)
37+
}
38+
output = output[1:] // Remove the leading `{ `
39+
return parseLoadAvg(string(output))
40+
}

go/osutil/loadavg_linux.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//go:build linux
2+
3+
/*
4+
Copyright 2024 The Vitess Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package osutil
20+
21+
import (
22+
"os"
23+
)
24+
25+
// LoadAvg returns the past 1 minute system load average. This works on linux and darwin systems.
26+
// On other systems, it returns 0 with no error.
27+
func LoadAvg() (float64, error) {
28+
content, err := os.ReadFile("/proc/loadavg")
29+
if err != nil {
30+
return 0, err
31+
}
32+
return parseLoadAvg(string(content))
33+
}

go/osutil/loadavg_other.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//go:build !linux && !darwin
2+
3+
/*
4+
Copyright 2024 The Vitess Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package osutil
20+
21+
// LoadAvg returns the past 1 minute system load average. This works on linux and darwin systems.
22+
// On other systems, it returns 0 with no error.
23+
func LoadAvg() (float64, error) {
24+
return 0, nil
25+
}

go/osutil/loadavg_test.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
/*
2+
Copyright 2024 The Vitess Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package osutil
18+
19+
import (
20+
"testing"
21+
22+
"github.com/stretchr/testify/assert"
23+
)
24+
25+
func TestLoadAvgValue(t *testing.T) {
26+
tcases := []struct {
27+
input string
28+
loadavg float64
29+
isError bool
30+
}{
31+
{
32+
input: "",
33+
isError: true,
34+
},
35+
{
36+
input: "{}",
37+
isError: true,
38+
},
39+
{
40+
input: "{ x y z }",
41+
isError: true,
42+
},
43+
{
44+
input: "1",
45+
loadavg: 1.0,
46+
},
47+
{
48+
input: "0.00 0.00 0.00 1/1 1",
49+
loadavg: 0.0,
50+
},
51+
{
52+
input: "2.72 2.89 3.17",
53+
loadavg: 2.72,
54+
},
55+
{
56+
input: " 2.72 2.89 3.17",
57+
loadavg: 2.72,
58+
},
59+
}
60+
for _, tcase := range tcases {
61+
t.Run(tcase.input, func(t *testing.T) {
62+
loadavg, err := parseLoadAvg(tcase.input)
63+
if tcase.isError {
64+
assert.Error(t, err)
65+
} else {
66+
assert.NoError(t, err)
67+
assert.Equal(t, tcase.loadavg, loadavg)
68+
}
69+
})
70+
}
71+
}
72+
73+
func TestLoadAvg(t *testing.T) {
74+
loadavg, err := LoadAvg()
75+
assert.NoError(t, err)
76+
assert.GreaterOrEqual(t, loadavg, 0.0)
77+
}

go/test/endtoend/tabletmanager/throttler_topo/throttler_test.go

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ func warmUpHeartbeat(t *testing.T) tabletmanagerdatapb.CheckThrottlerResponseCod
215215
}
216216

217217
// waitForThrottleCheckStatus waits for the tablet to return the provided HTTP code in a throttle check
218-
func waitForThrottleCheckStatus(t *testing.T, tablet *cluster.Vttablet, wantCode tabletmanagerdatapb.CheckThrottlerResponseCode) bool {
218+
func waitForThrottleCheckStatus(t *testing.T, tablet *cluster.Vttablet, wantCode tabletmanagerdatapb.CheckThrottlerResponseCode) (*tabletmanagerdatapb.CheckThrottlerResponse, bool) {
219219
_ = warmUpHeartbeat(t)
220220
ctx, cancel := context.WithTimeout(context.Background(), onDemandHeartbeatDuration*4)
221221
defer cancel()
@@ -229,11 +229,11 @@ func waitForThrottleCheckStatus(t *testing.T, tablet *cluster.Vttablet, wantCode
229229
if wantCode == resp.Check.ResponseCode {
230230
// Wait for any cached check values to be cleared and the new
231231
// status value to be in effect everywhere before returning.
232-
return true
232+
return resp.Check, true
233233
}
234234
select {
235235
case <-ctx.Done():
236-
return assert.EqualValues(t, wantCode, resp.Check.StatusCode, "response: %+v", resp)
236+
return resp.Check, false
237237
case <-ticker.C:
238238
}
239239
}
@@ -779,16 +779,16 @@ func TestUpdateAppCheckedMetrics(t *testing.T) {
779779
}
780780
waitForThrottleCheckStatus(t, primaryTablet, tabletmanagerdatapb.CheckThrottlerResponseCode_THRESHOLD_EXCEEDED)
781781
})
782-
t.Run("assigning 'loadavg' metrics to 'test' app", func(t *testing.T) {
782+
t.Run("assigning 'threads_running' metrics to 'test' app", func(t *testing.T) {
783783
{
784-
req := &vtctldatapb.UpdateThrottlerConfigRequest{MetricName: "loadavg", Threshold: 7777}
784+
req := &vtctldatapb.UpdateThrottlerConfigRequest{MetricName: base.ThreadsRunningMetricName.String(), Threshold: 7777}
785785
_, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil)
786786
assert.NoError(t, err)
787787
}
788788
{
789789
req := &vtctldatapb.UpdateThrottlerConfigRequest{}
790790
appCheckedMetrics := map[string]*topodatapb.ThrottlerConfig_MetricNames{
791-
testAppName.String(): {Names: []string{"loadavg"}},
791+
testAppName.String(): {Names: []string{base.ThreadsRunningMetricName.String()}},
792792
}
793793
_, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, appCheckedMetrics)
794794
assert.NoError(t, err)
@@ -802,18 +802,18 @@ func TestUpdateAppCheckedMetrics(t *testing.T) {
802802
for _, tablet := range []cluster.Vttablet{*primaryTablet, *replicaTablet} {
803803
throttler.WaitForThrottlerStatusEnabled(t, &clusterInstance.VtctldClientProcess, &tablet, true, &throttler.Config{Query: throttler.DefaultQuery, Threshold: unreasonablyLowThreshold.Seconds()}, throttlerEnabledTimeout)
804804
}
805-
t.Run("validating OK response from throttler since it's checking loadavg", func(t *testing.T) {
806-
if !waitForThrottleCheckStatus(t, primaryTablet, tabletmanagerdatapb.CheckThrottlerResponseCode_OK) {
805+
t.Run("validating OK response from throttler since it's checking threads_running", func(t *testing.T) {
806+
if _, ok := waitForThrottleCheckStatus(t, primaryTablet, tabletmanagerdatapb.CheckThrottlerResponseCode_OK); !ok {
807807
t.Logf("throttler primary status: %+v", throttleStatus(t, primaryTablet))
808808
t.Logf("throttler replica status: %+v", throttleStatus(t, replicaTablet))
809809
}
810810
})
811811
})
812-
t.Run("assigning 'loadavg,lag' metrics to 'test' app", func(t *testing.T) {
812+
t.Run("assigning 'threads_running,lag' metrics to 'test' app", func(t *testing.T) {
813813
{
814814
req := &vtctldatapb.UpdateThrottlerConfigRequest{}
815815
appCheckedMetrics := map[string]*topodatapb.ThrottlerConfig_MetricNames{
816-
testAppName.String(): {Names: []string{"loadavg,lag"}},
816+
testAppName.String(): {Names: []string{base.ThreadsRunningMetricName.String(), base.LagMetricName.String()}},
817817
}
818818
_, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, appCheckedMetrics)
819819
assert.NoError(t, err)
@@ -831,9 +831,51 @@ func TestUpdateAppCheckedMetrics(t *testing.T) {
831831
waitForThrottleCheckStatus(t, primaryTablet, tabletmanagerdatapb.CheckThrottlerResponseCode_THRESHOLD_EXCEEDED)
832832
})
833833
})
834+
t.Run("assigning 'mysqld-loadavg,mysqld-datadir-used-ratio' metrics to 'test' app", func(t *testing.T) {
835+
{
836+
req := &vtctldatapb.UpdateThrottlerConfigRequest{MetricName: base.MysqldDatadirUsedRatioMetricName.String(), Threshold: 0.9999}
837+
_, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil)
838+
assert.NoError(t, err)
839+
}
840+
{
841+
req := &vtctldatapb.UpdateThrottlerConfigRequest{MetricName: base.MysqldLoadAvgMetricName.String(), Threshold: 5555}
842+
_, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil)
843+
assert.NoError(t, err)
844+
}
845+
{
846+
req := &vtctldatapb.UpdateThrottlerConfigRequest{}
847+
appCheckedMetrics := map[string]*topodatapb.ThrottlerConfig_MetricNames{
848+
testAppName.String(): {Names: []string{base.MysqldDatadirUsedRatioMetricName.String(), base.MysqldLoadAvgMetricName.String()}},
849+
}
850+
_, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, appCheckedMetrics)
851+
assert.NoError(t, err)
852+
}
853+
{
854+
req := &vtctldatapb.UpdateThrottlerConfigRequest{Threshold: extremelyHighThreshold.Seconds()}
855+
_, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil)
856+
assert.NoError(t, err)
857+
}
858+
// Wait for the throttler to be enabled everywhere with new config.
859+
for _, tablet := range []cluster.Vttablet{*primaryTablet, *replicaTablet} {
860+
throttler.WaitForThrottlerStatusEnabled(t, &clusterInstance.VtctldClientProcess, &tablet, true, &throttler.Config{Query: throttler.DefaultQuery, Threshold: extremelyHighThreshold.Seconds()}, throttlerEnabledTimeout)
861+
}
862+
t.Run("validating OK response from throttler since it's checking mysqld-loadavg,mysqld-datadir-used-ratio", func(t *testing.T) {
863+
resp, ok := waitForThrottleCheckStatus(t, primaryTablet, tabletmanagerdatapb.CheckThrottlerResponseCode_OK)
864+
if !ok {
865+
t.Logf("response: %+v", resp)
866+
t.Logf("throttler primary status: %+v", throttleStatus(t, primaryTablet))
867+
t.Logf("throttler replica status: %+v", throttleStatus(t, replicaTablet))
868+
}
869+
require.Contains(t, resp.Metrics, base.MysqldDatadirUsedRatioMetricName.String())
870+
require.Contains(t, resp.Metrics, base.MysqldLoadAvgMetricName.String())
871+
assert.NotContains(t, resp.Metrics, base.ThreadsRunningMetricName.String())
872+
873+
assert.NotZero(t, resp.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value)
874+
})
875+
})
834876
t.Run("removing assignment from 'test' app and restoring defaults", func(t *testing.T) {
835877
{
836-
req := &vtctldatapb.UpdateThrottlerConfigRequest{MetricName: "loadavg", Threshold: 0}
878+
req := &vtctldatapb.UpdateThrottlerConfigRequest{MetricName: base.ThreadsRunningMetricName.String(), Threshold: 0}
837879
_, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil)
838880
assert.NoError(t, err)
839881
}

go/test/endtoend/throttler/util.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ func WaitForValidData(t *testing.T, tablet *cluster.Vttablet, timeout time.Durat
529529
selfCheckURL := fmt.Sprintf("http://localhost:%d/throttler/check-self", tablet.HTTPPort)
530530
ctx, cancel := context.WithTimeout(context.Background(), timeout)
531531
defer cancel()
532-
ticker := time.NewTicker(500 * time.Millisecond)
532+
ticker := time.NewTicker(time.Second)
533533
defer ticker.Stop()
534534

535535
for {
@@ -548,8 +548,10 @@ func WaitForValidData(t *testing.T, tablet *cluster.Vttablet, timeout time.Durat
548548
}
549549
select {
550550
case <-ctx.Done():
551-
t.Errorf("timed out waiting for %s tablet's throttler to return a valid result after %v; last seen value: %+v",
552-
tablet.Alias, timeout, checkResp)
551+
respByte, _ := io.ReadAll(checkResp.Body)
552+
body := string(respByte)
553+
require.Failf(t, "time out", "waiting for %s tablet's throttler to return a valid result after %v; last seen result: %+v",
554+
tablet.Alias, timeout, body)
553555
return
554556
case <-ticker.C:
555557
}

go/textutil/strings.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,19 @@ func SingleWordCamel(w string) string {
9090
return strings.ToUpper(w[0:1]) + strings.ToLower(w[1:])
9191
}
9292

93+
var multiWordSplitterRegexp = regexp.MustCompile(`[-_.\s]+`)
94+
95+
// PascalCase turns a string into PascalCase by splitting it into words and
96+
// capitalizing the first letter of each word.
97+
func PascalCase(w string) string {
98+
var b strings.Builder
99+
words := multiWordSplitterRegexp.Split(w, -1)
100+
for _, word := range words {
101+
b.WriteString(SingleWordCamel(word))
102+
}
103+
return b.String()
104+
}
105+
93106
// ValueIsSimulatedNull returns true if the slice value represents
94107
// a NULL or unknown/unspecified value. This is used to distinguish
95108
// between a zero value empty slice and a user provided value of an

0 commit comments

Comments
 (0)