Skip to content

Commit

Permalink
Merge pull request #117 from fuweid/weifu/10kpods
Browse files Browse the repository at this point in the history
*: init node100_dp5_pod10k for runkperf bench
  • Loading branch information
fuweid authored Apr 23, 2024
2 parents ca39f2d + 0761d4b commit 921511a
Show file tree
Hide file tree
Showing 15 changed files with 402 additions and 35 deletions.
2 changes: 2 additions & 0 deletions api/types/load_traffic.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ type RequestList struct {
Limit int `json:"limit" yaml:"limit"`
// Selector defines how to identify a set of objects.
Selector string `json:"seletor" yaml:"seletor"`
// FieldSelector defines how to identify a set of objects with field selector.
FieldSelector string `json:"fieldSelector" yaml:"fieldSelector"`
}

// RequestPut defines PUT request for target resource type.
Expand Down
2 changes: 2 additions & 0 deletions api/types/load_traffic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ spec:
resource: pods
namespace: default
seletor: app=x2
fieldSelector: spec.nodeName=x
shares: 200
- quorumList:
group: core
Expand Down Expand Up @@ -94,6 +95,7 @@ spec:
assert.Equal(t, "default", target.Spec.Requests[2].StaleList.Namespace)
assert.Equal(t, 0, target.Spec.Requests[2].StaleList.Limit)
assert.Equal(t, "app=x2", target.Spec.Requests[2].StaleList.Selector)
assert.Equal(t, "spec.nodeName=x", target.Spec.Requests[2].StaleList.FieldSelector)

assert.NotNil(t, target.Spec.Requests[3].QuorumList)
assert.Equal(t, 400, target.Spec.Requests[3].Shares)
Expand Down
108 changes: 108 additions & 0 deletions contrib/cmd/runkperf/commands/bench/node100_dp5_pod10k.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package bench

import (
"context"
"fmt"
"sync"
"time"

internaltypes "github.com/Azure/kperf/contrib/internal/types"
"github.com/Azure/kperf/contrib/internal/utils"

"github.com/urfave/cli"
)

var benchNode100Deployment5Pod10KCase = cli.Command{
Name: "node100_dp5_pod10k",
Usage: `
The test suite is to setup 100 virtual nodes and deploy 5 deployments for 10k
pods on that nodes. It repeats to rolling-update deployments one by one during
benchmark.
`,
Flags: []cli.Flag{
cli.IntFlag{
Name: "total",
Usage: "Total requests per runner (There are 10 runners totally and runner's rate is 10)",
Value: 36000,
},
cli.IntFlag{
Name: "podsize",
Usage: "Add <key=data, value=randomStringByLen(podsize)> in pod's annotation to increase pod size. The value is close to pod's size",
Value: 0,
},
},
Action: func(cliCtx *cli.Context) error {
_, err := renderBenchmarkReportInterceptor(
addAPIServerCoresInfoInterceptor(benchNode100Deployment5Pod10KRun),
)(cliCtx)
return err
},
}

// benchNode100Deployment5Pod10KCase is for subcommand benchNode100Deployment5Pod10KCase.
func benchNode100Deployment5Pod10KRun(cliCtx *cli.Context) (*internaltypes.BenchmarkReport, error) {
ctx := context.Background()
kubeCfgPath := cliCtx.GlobalString("kubeconfig")

rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx,
"loadprofile/node100_dp5_pod10k.yaml")
if err != nil {
return nil, err
}
defer func() { _ = rgCfgFileDone() }()

vcDone, err := deployVirtualNodepool(ctx, cliCtx, "node100dp5pod10k", 100, 150)
if err != nil {
return nil, fmt.Errorf("failed to deploy virtual node: %w", err)
}
defer func() { _ = vcDone() }()

var wg sync.WaitGroup
wg.Add(1)

restartInterval := 10 * time.Second
dpCtx, dpCancel := context.WithCancel(ctx)

podSize := cliCtx.Int("podsize")
rollingUpdateFn, err := utils.RepeatRollingUpdate10KPod(dpCtx, kubeCfgPath, "dp5pod10k", podSize, restartInterval)
if err != nil {
dpCancel()
return nil, fmt.Errorf("failed to setup workload: %w", err)
}

go func() {
defer wg.Done()

// FIXME(weifu):
//
// DeployRunnerGroup should return ready notification.
// The rolling update should run after runners.
rollingUpdateFn()
}()

rgResult, derr := utils.DeployRunnerGroup(ctx,
cliCtx.GlobalString("kubeconfig"),
cliCtx.GlobalString("runner-image"),
rgCfgFile,
cliCtx.GlobalString("runner-flowcontrol"),
cliCtx.GlobalString("rg-affinity"),
)
dpCancel()
wg.Wait()

if derr != nil {
return nil, derr
}

return &internaltypes.BenchmarkReport{
Description: fmt.Sprintf(`
Environment: 100 virtual nodes managed by kwok-controller,
Workload: Deploy 5 deployments with 10,000 pods. Rolling-update deployments one by one and the interval is %v`, restartInterval),
LoadSpec: *rgSpec,
Result: *rgResult,
Info: map[string]interface{}{
"podSizeInBytes": podSize,
},
}, nil
}
33 changes: 3 additions & 30 deletions contrib/cmd/runkperf/commands/bench/node100_job1_pod3k.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@ import (
"sync"
"time"

"github.com/Azure/kperf/api/types"
kperfcmdutils "github.com/Azure/kperf/cmd/kperf/commands/utils"
internaltypes "github.com/Azure/kperf/contrib/internal/types"
"github.com/Azure/kperf/contrib/internal/utils"

"github.com/urfave/cli"
"gopkg.in/yaml.v2"
"k8s.io/klog/v2"
)

var benchNode100Job1Pod3KCase = cli.Command{
Expand Down Expand Up @@ -43,31 +39,8 @@ func benchNode100Job1Pod3KCaseRun(cliCtx *cli.Context) (*internaltypes.Benchmark
ctx := context.Background()
kubeCfgPath := cliCtx.GlobalString("kubeconfig")

var rgSpec types.RunnerGroupSpec
rgCfgFile, rgCfgFileDone, err := utils.NewLoadProfileFromEmbed(
"loadprofile/node100_job1_pod3k.yaml",
func(spec *types.RunnerGroupSpec) error {
reqs := cliCtx.Int("total")
if reqs < 0 {
return fmt.Errorf("invalid total-requests value: %v", reqs)
}

rgAffinity := cliCtx.GlobalString("rg-affinity")
affinityLabels, err := kperfcmdutils.KeyValuesMap([]string{rgAffinity})
if err != nil {
return fmt.Errorf("failed to parse %s affinity: %w", rgAffinity, err)
}

spec.Profile.Spec.Total = reqs
spec.NodeAffinity = affinityLabels

data, _ := yaml.Marshal(spec)
klog.V(2).InfoS("Load Profile", "config", string(data))

rgSpec = *spec
return nil
},
)
rgCfgFile, rgSpec, rgCfgFileDone, err := newLoadProfileFromEmbed(cliCtx,
"loadprofile/node100_job1_pod3k.yaml")
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -108,7 +81,7 @@ func benchNode100Job1Pod3KCaseRun(cliCtx *cli.Context) (*internaltypes.Benchmark
Description: fmt.Sprintf(`
Environment: 100 virtual nodes managed by kwok-controller,
Workload: Deploy 1 job with 3,000 pods repeatedly. The parallelism is 100. The interval is %v`, jobInterval),
LoadSpec: rgSpec,
LoadSpec: *rgSpec,
Result: *rgResult,
Info: make(map[string]interface{}),
}, nil
Expand Down
1 change: 1 addition & 0 deletions contrib/cmd/runkperf/commands/bench/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,6 @@ var Command = cli.Command{
},
Subcommands: []cli.Command{
benchNode100Job1Pod3KCase,
benchNode100Deployment5Pod10KCase,
},
}
37 changes: 37 additions & 0 deletions contrib/cmd/runkperf/commands/bench/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ import (
"os"
"path/filepath"

"github.com/Azure/kperf/api/types"
kperfcmdutils "github.com/Azure/kperf/cmd/kperf/commands/utils"
internaltypes "github.com/Azure/kperf/contrib/internal/types"
"github.com/Azure/kperf/contrib/internal/utils"

"github.com/urfave/cli"
"gopkg.in/yaml.v2"
"k8s.io/klog/v2"
)

Expand Down Expand Up @@ -120,3 +123,37 @@ func deployVirtualNodepool(ctx context.Context, cliCtx *cli.Context, target stri
return kr.DeleteNodepool(ctx, 0, target)
}, nil
}

// newLoadProfileFromEmbed loads load profile from embed and tweaks that load
// profile.
func newLoadProfileFromEmbed(cliCtx *cli.Context, name string) (_name string, _spec *types.RunnerGroupSpec, _cleanup func() error, _err error) {
var rgSpec types.RunnerGroupSpec
rgCfgFile, rgCfgFileDone, err := utils.NewLoadProfileFromEmbed(
name,
func(spec *types.RunnerGroupSpec) error {
reqs := cliCtx.Int("total")
if reqs < 0 {
return fmt.Errorf("invalid total-requests value: %v", reqs)
}

rgAffinity := cliCtx.GlobalString("rg-affinity")
affinityLabels, err := kperfcmdutils.KeyValuesMap([]string{rgAffinity})
if err != nil {
return fmt.Errorf("failed to parse %s affinity: %w", rgAffinity, err)
}

spec.Profile.Spec.Total = reqs
spec.NodeAffinity = affinityLabels

data, _ := yaml.Marshal(spec)
klog.V(2).InfoS("Load Profile", "config", string(data))

rgSpec = *spec
return nil
},
)
if err != nil {
return "", nil, nil, err
}
return rgCfgFile, &rgSpec, rgCfgFileDone, nil
}
12 changes: 12 additions & 0 deletions contrib/internal/manifests/helm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package manifests

import (
rootmainfests "github.com/Azure/kperf/manifests"

"helm.sh/helm/v3/pkg/chart"
)

// LoadChart returns chart from current package's embed filesystem.
func LoadChart(componentName string) (*chart.Chart, error) {
return rootmainfests.LoadChartFromEmbedFS(FS, componentName)
}
40 changes: 40 additions & 0 deletions contrib/internal/manifests/loadprofile/node100_dp5_pod10k.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
count: 10
loadProfile:
version: 1
description: "node100-deployment5-pod10k"
spec:
rate: 10
total: 36000
conns: 10
client: 100
contentType: json
disableHTTP2: false
maxRetries: 0
requests:
- staleList:
version: v1
resource: pods
# NOTE: Please align with ../../utils/utils.go#RepeatRollingUpdate10KPod
seletor: "app=benchmark"
# NOTE: Please align with ../../../cmd/runkperf/commands/bench/node100_dp5_pod10k.go.
# And there are only 100 nodes and each node can run 150 pods. It should
# have items in the response.
fieldSelector: "spec.nodeName=node100dp5pod10k-49"
shares: 1000 # 1000 / (1000 + 100 + 200) * 10 = 7.7 req/s
- staleList:
version: v1
resource: pods
shares: 100 # 100 / (1000 + 100 + 200) * 10 = 0.7 req/s
- quorumList:
version: v1
resource: pods
namespace: benchmark-0
# NOTE: It's to simulate the request created by daemonset to get pods,
# including kubelet, when they want to get pods from ETCD. The limit
# is 100 because it's close to MaxPods value.
limit: 100
# NOTE: Please align with ../../../cmd/runkperf/commands/bench/node100_dp5_pod10k.go.
fieldSelector: "spec.nodeName=node100dp5pod10k-49"
# And there are only 100 nodes and each node can run 150 pods. It should
# have items in the response.
shares: 200 # 200 / (1000 + 100 + 200) * 10 = 1.5 req/s
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
apiVersion: v1
name: "2k-pods-per-1-deployment"
version: "0.0.1"
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{{- $pattern := .Values.pattern }}
{{- $podSizeInBytes := int .Values.podSizeInBytes }}
{{- range $index := (untilStep 0 (int .Values.total) 1) }}
apiVersion: v1
kind: Namespace
metadata:
name: {{ $pattern }}-{{ $index }}
labels:
name: benchmark-testing
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $pattern }}-{{ $index }}
namespace: {{ $pattern }}-{{ $index }}
labels:
app: {{ $pattern }}
spec:
replicas: 2000
strategy:
rollingUpdate:
maxSurge: 100
type: RollingUpdate
selector:
matchLabels:
app: {{ $pattern }}
index: "{{ $index }}"
template:
metadata:
labels:
app: {{ $pattern }}
index: "{{ $index }}"
annotations:
data: "{{ randAlphaNum $podSizeInBytes | nospace }}"
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: type
operator: In
values:
- kperf-virtualnodes
tolerations:
- key: "kperf.io/nodepool"
operator: "Exists"
effect: "NoSchedule"
containers:
- name: fake-container
image: fake-image
---
{{- end}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pattern: "benchmark"
total: 5
podSizeInBytes: 2048
Loading

0 comments on commit 921511a

Please sign in to comment.