Skip to content

Commit d7d25ad

Browse files
authored
feat: Enable engine metrics exposing in the agent (#112)
* fix: local metric server should be updated due to k8s version change * feat: enable metric scrape endpoint * chore: enable automaxproc * chore: remove unused file
1 parent 7e9f6dc commit d7d25ad

File tree

9 files changed

+450
-117
lines changed

9 files changed

+450
-117
lines changed

kubernetes/metricServer.yaml

Lines changed: 149 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,76 @@
1-
kind: ClusterRole
1+
# This is copied from kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.7.1/components.yaml
2+
apiVersion: v1
3+
kind: ServiceAccount
4+
metadata:
5+
labels:
6+
k8s-app: metrics-server
7+
name: metrics-server
8+
namespace: kube-system
9+
---
210
apiVersion: rbac.authorization.k8s.io/v1
11+
kind: ClusterRole
312
metadata:
4-
name: system:aggregated-metrics-reader
513
labels:
6-
rbac.authorization.k8s.io/aggregate-to-view: "true"
7-
rbac.authorization.k8s.io/aggregate-to-edit: "true"
14+
k8s-app: metrics-server
815
rbac.authorization.k8s.io/aggregate-to-admin: "true"
16+
rbac.authorization.k8s.io/aggregate-to-edit: "true"
17+
rbac.authorization.k8s.io/aggregate-to-view: "true"
18+
name: system:aggregated-metrics-reader
919
rules:
10-
- apiGroups: ["metrics.k8s.io"]
11-
resources: ["pods"]
12-
verbs: ["get", "list", "watch"]
20+
- apiGroups:
21+
- metrics.k8s.io
22+
resources:
23+
- pods
24+
- nodes
25+
verbs:
26+
- get
27+
- list
28+
- watch
29+
---
30+
apiVersion: rbac.authorization.k8s.io/v1
31+
kind: ClusterRole
32+
metadata:
33+
labels:
34+
k8s-app: metrics-server
35+
name: system:metrics-server
36+
rules:
37+
- apiGroups:
38+
- ""
39+
resources:
40+
- nodes/metrics
41+
verbs:
42+
- get
43+
- apiGroups:
44+
- ""
45+
resources:
46+
- pods
47+
- nodes
48+
verbs:
49+
- get
50+
- list
51+
- watch
52+
---
53+
apiVersion: rbac.authorization.k8s.io/v1
54+
kind: RoleBinding
55+
metadata:
56+
labels:
57+
k8s-app: metrics-server
58+
name: metrics-server-auth-reader
59+
namespace: kube-system
60+
roleRef:
61+
apiGroup: rbac.authorization.k8s.io
62+
kind: Role
63+
name: extension-apiserver-authentication-reader
64+
subjects:
65+
- kind: ServiceAccount
66+
name: metrics-server
67+
namespace: kube-system
1368
---
1469
apiVersion: rbac.authorization.k8s.io/v1
1570
kind: ClusterRoleBinding
1671
metadata:
72+
labels:
73+
k8s-app: metrics-server
1774
name: metrics-server:system:auth-delegator
1875
roleRef:
1976
apiGroup: rbac.authorization.k8s.io
@@ -25,113 +82,122 @@ subjects:
2582
namespace: kube-system
2683
---
2784
apiVersion: rbac.authorization.k8s.io/v1
28-
kind: RoleBinding
85+
kind: ClusterRoleBinding
2986
metadata:
30-
name: metrics-server-auth-reader
31-
namespace: kube-system
87+
labels:
88+
k8s-app: metrics-server
89+
name: system:metrics-server
3290
roleRef:
3391
apiGroup: rbac.authorization.k8s.io
34-
kind: Role
35-
name: extension-apiserver-authentication-reader
92+
kind: ClusterRole
93+
name: system:metrics-server
3694
subjects:
3795
- kind: ServiceAccount
3896
name: metrics-server
3997
namespace: kube-system
4098
---
41-
apiVersion: apiregistration.k8s.io/v1
42-
kind: APIService
43-
metadata:
44-
name: v1beta1.metrics.k8s.io
45-
spec:
46-
service:
47-
name: metrics-server
48-
namespace: kube-system
49-
group: metrics.k8s.io
50-
version: v1beta1
51-
insecureSkipTLSVerify: true
52-
groupPriorityMinimum: 100
53-
versionPriority: 100
54-
---
5599
apiVersion: v1
56-
kind: ServiceAccount
100+
kind: Service
57101
metadata:
102+
labels:
103+
k8s-app: metrics-server
58104
name: metrics-server
59105
namespace: kube-system
106+
spec:
107+
ports:
108+
- name: https
109+
port: 443
110+
protocol: TCP
111+
targetPort: https
112+
selector:
113+
k8s-app: metrics-server
60114
---
61115
apiVersion: apps/v1
62116
kind: Deployment
63117
metadata:
64-
name: metrics-server
65-
namespace: kube-system
66118
labels:
67119
k8s-app: metrics-server
120+
name: metrics-server
121+
namespace: kube-system
68122
spec:
69123
selector:
70124
matchLabels:
71125
k8s-app: metrics-server
126+
strategy:
127+
rollingUpdate:
128+
maxUnavailable: 0
72129
template:
73130
metadata:
74-
name: metrics-server
75131
labels:
76132
k8s-app: metrics-server
77133
spec:
78-
serviceAccountName: metrics-server
79-
volumes:
80-
# mount in tmp so we can safely use from-scratch images and/or read-only containers
81-
- name: tmp-dir
82-
emptyDir: {}
83134
containers:
84-
- name: metrics-server
85-
image: k8s.gcr.io/metrics-server-amd64:v0.3.1
86-
args:
135+
- args:
136+
- --cert-dir=/tmp
137+
- --secure-port=10250
138+
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
139+
- --kubelet-use-node-status-port
87140
- --kubelet-insecure-tls
88-
- --kubelet-preferred-address-types=InternalIP
89-
imagePullPolicy: Always
141+
- --metric-resolution=15s
142+
image: registry.k8s.io/metrics-server/metrics-server:v0.7.1
143+
imagePullPolicy: IfNotPresent
144+
livenessProbe:
145+
failureThreshold: 3
146+
httpGet:
147+
path: /livez
148+
port: https
149+
scheme: HTTPS
150+
periodSeconds: 10
151+
name: metrics-server
152+
ports:
153+
- containerPort: 10250
154+
name: https
155+
protocol: TCP
156+
readinessProbe:
157+
failureThreshold: 3
158+
httpGet:
159+
path: /readyz
160+
port: https
161+
scheme: HTTPS
162+
initialDelaySeconds: 20
163+
periodSeconds: 10
164+
resources:
165+
requests:
166+
cpu: 100m
167+
memory: 200Mi
168+
securityContext:
169+
allowPrivilegeEscalation: false
170+
capabilities:
171+
drop:
172+
- ALL
173+
readOnlyRootFilesystem: true
174+
runAsNonRoot: true
175+
runAsUser: 1000
176+
seccompProfile:
177+
type: RuntimeDefault
90178
volumeMounts:
91-
- name: tmp-dir
92-
mountPath: /tmp
93-
179+
- mountPath: /tmp
180+
name: tmp-dir
181+
nodeSelector:
182+
kubernetes.io/os: linux
183+
priorityClassName: system-cluster-critical
184+
serviceAccountName: metrics-server
185+
volumes:
186+
- emptyDir: {}
187+
name: tmp-dir
94188
---
95-
apiVersion: v1
96-
kind: Service
189+
apiVersion: apiregistration.k8s.io/v1
190+
kind: APIService
97191
metadata:
98-
name: metrics-server
99-
namespace: kube-system
100192
labels:
101-
kubernetes.io/name: "Metrics-server"
102-
spec:
103-
selector:
104193
k8s-app: metrics-server
105-
ports:
106-
- port: 443
107-
protocol: TCP
108-
targetPort: 443
109-
---
110-
apiVersion: rbac.authorization.k8s.io/v1
111-
kind: ClusterRole
112-
metadata:
113-
name: system:metrics-server
114-
rules:
115-
- apiGroups:
116-
- ""
117-
resources:
118-
- pods
119-
- nodes
120-
- nodes/stats
121-
verbs:
122-
- get
123-
- list
124-
- watch
125-
---
126-
apiVersion: rbac.authorization.k8s.io/v1
127-
kind: ClusterRoleBinding
128-
metadata:
129-
name: system:metrics-server
130-
roleRef:
131-
apiGroup: rbac.authorization.k8s.io
132-
kind: ClusterRole
133-
name: system:metrics-server
134-
subjects:
135-
- kind: ServiceAccount
136-
name: metrics-server
137-
namespace: kube-system
194+
name: v1beta1.metrics.k8s.io
195+
spec:
196+
group: metrics.k8s.io
197+
groupPriorityMinimum: 100
198+
insecureSkipTLSVerify: true
199+
service:
200+
name: metrics-server
201+
namespace: kube-system
202+
version: v1beta1
203+
versionPriority: 100

shibuya/controller/collection.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ import (
66
"sync"
77

88
"github.com/rakutentech/shibuya/shibuya/config"
9-
controllerModel "github.com/rakutentech/shibuya/shibuya/controller/model"
9+
enginesModel "github.com/rakutentech/shibuya/shibuya/engines/model"
1010
"github.com/rakutentech/shibuya/shibuya/model"
1111
log "github.com/sirupsen/logrus"
1212
)
1313

14-
func prepareCollection(collection *model.Collection) []*controllerModel.EngineDataConfig {
14+
func prepareCollection(collection *model.Collection) []*enginesModel.EngineDataConfig {
1515
planCount := len(collection.ExecutionPlans)
16-
edc := controllerModel.EngineDataConfig{
16+
edc := enginesModel.EngineDataConfig{
1717
EngineData: map[string]*model.ShibuyaFile{},
1818
}
1919
engineDataConfigs := edc.DeepCopies(planCount)
@@ -102,7 +102,7 @@ func (c *Controller) TriggerCollection(collection *model.Collection) error {
102102
// When all the engines are triggered
103103

104104
pc := NewPlanController(ep, collection, c.Scheduler)
105-
if err := pc.trigger(engineDataConfigs[i]); err != nil {
105+
if err := pc.trigger(engineDataConfigs[i], runID); err != nil {
106106
errs <- err
107107
return
108108
}

shibuya/controller/engine.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212
"time"
1313

1414
"github.com/rakutentech/shibuya/shibuya/config"
15-
controllerModel "github.com/rakutentech/shibuya/shibuya/controller/model"
15+
enginesModel "github.com/rakutentech/shibuya/shibuya/engines/model"
1616
"github.com/rakutentech/shibuya/shibuya/model"
1717
sos "github.com/rakutentech/shibuya/shibuya/object_storage"
1818
"github.com/rakutentech/shibuya/shibuya/scheduler"
@@ -24,7 +24,7 @@ import (
2424
)
2525

2626
type shibuyaEngine interface {
27-
trigger(edc *controllerModel.EngineDataConfig) error
27+
trigger(edc *enginesModel.EngineDataConfig) error
2828
deploy(scheduler.EngineScheduler) error
2929
subscribe(runID int64) error
3030
progress() bool
@@ -76,7 +76,7 @@ type baseEngine struct {
7676
*config.ExecutorContainer
7777
}
7878

79-
func sendTriggerRequest(url string, edc *controllerModel.EngineDataConfig) (*http.Response, error) {
79+
func sendTriggerRequest(url string, edc *enginesModel.EngineDataConfig) (*http.Response, error) {
8080
body := new(bytes.Buffer)
8181
json.NewEncoder(body).Encode(&edc)
8282
req, _ := http.NewRequest("POST", url, body)
@@ -164,7 +164,7 @@ func (be *baseEngine) deploy(manager scheduler.EngineScheduler) error {
164164
return manager.DeployEngine(be.projectID, be.collectionID, be.planID, be.ID, be.ExecutorContainer)
165165
}
166166

167-
func (be *baseEngine) trigger(edc *controllerModel.EngineDataConfig) error {
167+
func (be *baseEngine) trigger(edc *enginesModel.EngineDataConfig) error {
168168
engineUrl := be.engineUrl
169169
base := be.makeBaseUrl()
170170
url := fmt.Sprintf(base, engineUrl, "start")

shibuya/controller/plan.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import (
77
"sync"
88

99
"github.com/rakutentech/shibuya/shibuya/config"
10-
controllerModel "github.com/rakutentech/shibuya/shibuya/controller/model"
10+
enginesModel "github.com/rakutentech/shibuya/shibuya/engines/model"
1111
"github.com/rakutentech/shibuya/shibuya/model"
1212
"github.com/rakutentech/shibuya/shibuya/scheduler"
1313
_ "github.com/rakutentech/shibuya/shibuya/utils"
@@ -37,7 +37,7 @@ func (pc *PlanController) deploy() error {
3737
return nil
3838
}
3939

40-
func (pc *PlanController) prepare(plan *model.Plan, edc *controllerModel.EngineDataConfig) []*controllerModel.EngineDataConfig {
40+
func (pc *PlanController) prepare(plan *model.Plan, edc *enginesModel.EngineDataConfig, runID int64) []*enginesModel.EngineDataConfig {
4141
edc.Duration = strconv.Itoa(pc.ep.Duration)
4242
edc.Concurrency = strconv.Itoa(pc.ep.Concurrency)
4343
edc.Rampup = strconv.Itoa(pc.ep.Rampup)
@@ -52,6 +52,8 @@ func (pc *PlanController) prepare(plan *model.Plan, edc *controllerModel.EngineD
5252
}
5353
// Add test file to all engines
5454
engineDataConfigs[i].EngineData[plan.TestFile.Filename] = plan.TestFile
55+
engineDataConfigs[i].RunID = runID
56+
engineDataConfigs[i].EngineID = i
5557
// add all data uploaded in plans. This will override common data if same filename already exists
5658
for _, d := range plan.Data {
5759
sf := model.ShibuyaFile{
@@ -70,12 +72,12 @@ func (pc *PlanController) prepare(plan *model.Plan, edc *controllerModel.EngineD
7072
return engineDataConfigs
7173
}
7274

73-
func (pc *PlanController) trigger(engineDataConfig *controllerModel.EngineDataConfig) error {
75+
func (pc *PlanController) trigger(engineDataConfig *enginesModel.EngineDataConfig, runID int64) error {
7476
plan, err := model.GetPlan(pc.ep.PlanID)
7577
if err != nil {
7678
return err
7779
}
78-
engineDataConfigs := pc.prepare(plan, engineDataConfig)
80+
engineDataConfigs := pc.prepare(plan, engineDataConfig, runID)
7981
engines, err := generateEnginesWithUrl(pc.ep.Engines, pc.ep.PlanID, pc.collection.ID, pc.collection.ProjectID,
8082
JmeterEngineType, pc.scheduler)
8183
if err != nil {

0 commit comments

Comments
 (0)