Skip to content

Commit 4346c43

Browse files
authored
*: refactor error stats (#141)
Introduce ResponseError type to record error client received. It includes timestamp, timespan in seconds and error message. The RunnerMetricReport will export raw data about each response error. It can help us build view about error. ```go // ResponseError is the record about that error. type ResponseError struct { // Timestamp indicates when this error was received. Timestamp time.Time `json:"timestamp"` // Duration records timespan in seconds. Duration float64 `json:"duration"` // Type indicates that category to which the error belongs. Type ResponseErrorType `json:"type"` // Code only works when Type is http. Code int `json:"code,omitempty"` // Message shows error message for this error. // // NOTE: When Type is http, this field will be empty. Message string `json:"message,omitempty"` } ``` Signed-off-by: Wei Fu <weifu@microsoft.com>
1 parent b9ace2d commit 4346c43

File tree

7 files changed

+223
-177
lines changed

7 files changed

+223
-177
lines changed

api/types/metric.go

Lines changed: 34 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -3,71 +3,43 @@
33

44
package types
55

6-
// HTTP2ErrorStats is the report about http2 error during testing.
7-
type HTTP2ErrorStats struct {
8-
// ConnectionErrors represents connection level errors.
9-
ConnectionErrors map[string]int32 `json:"connectionErrors,omitempty"`
10-
// StreamErrors represents stream level errors.
11-
StreamErrors map[string]int32 `json:"streamErrors,omitempty"`
12-
}
13-
14-
// NewHTTP2ErrorStats returns new instance of HTTP2ErrorStats.
15-
func NewHTTP2ErrorStats() *HTTP2ErrorStats {
16-
return &HTTP2ErrorStats{
17-
ConnectionErrors: make(map[string]int32, 10),
18-
StreamErrors: make(map[string]int32, 10),
19-
}
20-
}
21-
22-
// ResponseErrorStats is the report about errors.
23-
type ResponseErrorStats struct {
24-
// UnknownErrors is all unknown errors.
25-
UnknownErrors []string `json:"unknownErrors"`
26-
// NetErrors is to track errors from net.
27-
NetErrors map[string]int32 `json:"netErrors"`
28-
// ResponseCodes records request number grouped by response
29-
// code between 400 and 600.
30-
ResponseCodes map[int]int32 `json:"responseCodes"`
31-
// HTTP2Errors records http2 related errors.
32-
HTTP2Errors HTTP2ErrorStats `json:"http2Errors"`
33-
}
34-
35-
// NewResponseErrorStats returns empty ResponseErrorStats.
36-
func NewResponseErrorStats() *ResponseErrorStats {
37-
return &ResponseErrorStats{
38-
UnknownErrors: make([]string, 0, 1024),
39-
NetErrors: make(map[string]int32, 10),
40-
ResponseCodes: map[int]int32{},
41-
HTTP2Errors: *NewHTTP2ErrorStats(),
42-
}
43-
}
6+
import "time"
447

45-
// Copy clones self.
46-
func (r *ResponseErrorStats) Copy() ResponseErrorStats {
47-
res := NewResponseErrorStats()
8+
// ResponseErrorType is error type of response.
9+
type ResponseErrorType string
4810

49-
res.UnknownErrors = make([]string, len(r.UnknownErrors))
50-
copy(res.UnknownErrors, r.UnknownErrors)
51-
res.NetErrors = cloneMap(r.NetErrors)
52-
res.ResponseCodes = cloneMap(r.ResponseCodes)
53-
res.HTTP2Errors.ConnectionErrors = cloneMap(r.HTTP2Errors.ConnectionErrors)
54-
res.HTTP2Errors.StreamErrors = cloneMap(r.HTTP2Errors.StreamErrors)
55-
return *res
56-
}
11+
const (
12+
// ResponseErrorTypeUnknown indicates we don't have correct category for errors.
13+
ResponseErrorTypeUnknown ResponseErrorType = "unknown"
14+
// ResponseErrorTypeHTTP indicates that the response returns http code >= 400.
15+
ResponseErrorTypeHTTP ResponseErrorType = "http"
16+
// ResponseErrorTypeHTTP2Protocol indicates that error comes from http2 layer.
17+
ResponseErrorTypeHTTP2Protocol ResponseErrorType = "http2-protocol"
18+
// ResponseErrorTypeConnection indicates that error is related to connection.
19+
// For instance, connection refused caused by server down.
20+
ResponseErrorTypeConnection ResponseErrorType = "connection"
21+
)
5722

58-
// Merge merges two ResponseErrorStats.
59-
func (r *ResponseErrorStats) Merge(from *ResponseErrorStats) {
60-
r.UnknownErrors = append(r.UnknownErrors, from.UnknownErrors...)
61-
mergeMap(r.NetErrors, from.NetErrors)
62-
mergeMap(r.ResponseCodes, from.ResponseCodes)
63-
mergeMap(r.HTTP2Errors.ConnectionErrors, from.HTTP2Errors.ConnectionErrors)
64-
mergeMap(r.HTTP2Errors.StreamErrors, from.HTTP2Errors.StreamErrors)
23+
// ResponseError is the record about that error.
24+
type ResponseError struct {
25+
// Timestamp indicates when this error was received.
26+
Timestamp time.Time `json:"timestamp"`
27+
// Duration records timespan in seconds.
28+
Duration float64 `json:"duration"`
29+
// Type indicates that category to which the error belongs.
30+
Type ResponseErrorType `json:"type"`
31+
// Code only works when Type is http.
32+
Code int `json:"code,omitempty"`
33+
// Message shows error message for this error.
34+
//
35+
// NOTE: When Type is http, this field will be empty.
36+
Message string `json:"message,omitempty"`
6537
}
6638

6739
// ResponseStats is the report about benchmark result.
6840
type ResponseStats struct {
69-
// ErrorStats means summary of errors.
70-
ErrorStats ResponseErrorStats
41+
// Errors stores all the observed errors.
42+
Errors []ResponseError
7143
// LatenciesByURL stores all the observed latencies for each request.
7244
LatenciesByURL map[string][]float64
7345
// TotalReceivedBytes is total bytes read from apiserver.
@@ -79,8 +51,10 @@ type RunnerMetricReport struct {
7951
Total int `json:"total"`
8052
// Duration means the time of benchmark.
8153
Duration string `json:"duration"`
82-
// ErrorStats means summary of errors.
83-
ErrorStats ResponseErrorStats `json:"errorStats"`
54+
// Errors stores all the observed errors.
55+
Errors []ResponseError `json:"errors,omitempty"`
56+
// ErrorStats means summary of errors group by type.
57+
ErrorStats map[string]int32 `json:"errorStats,omitempty"`
8458
// TotalReceivedBytes is total bytes read from apiserver.
8559
TotalReceivedBytes int64 `json:"totalReceivedBytes"`
8660
// LatenciesByURL stores all the observed latencies.
@@ -94,17 +68,3 @@ type RunnerMetricReport struct {
9468
// TODO(weifu): build brand new struct for RunnerGroupsReport to include more
9569
// information, like how many runner groups, service account and flow control.
9670
type RunnerGroupsReport = RunnerMetricReport
97-
98-
func mergeMap[K comparable, V int32](to, from map[K]V) {
99-
for key, value := range from {
100-
to[key] += value
101-
}
102-
}
103-
104-
func cloneMap[K comparable, V int32](src map[K]V) map[K]V {
105-
res := map[K]V{}
106-
for key, value := range src {
107-
res[key] = value
108-
}
109-
return res
110-
}

cmd/kperf/commands/runner/runner.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ func loadConfig(cliCtx *cli.Context) (*types.LoadProfile, error) {
192192
func printResponseStats(f *os.File, rawDataFlagIncluded bool, stats *request.Result) error {
193193
output := types.RunnerMetricReport{
194194
Total: stats.Total,
195-
ErrorStats: stats.ErrorStats,
195+
ErrorStats: metrics.BuildErrorStatsGroupByType(stats.Errors),
196196
Duration: stats.Duration.String(),
197197
TotalReceivedBytes: stats.TotalReceivedBytes,
198198

@@ -215,6 +215,7 @@ func printResponseStats(f *os.File, rawDataFlagIncluded bool, stats *request.Res
215215

216216
if rawDataFlagIncluded {
217217
output.LatenciesByURL = stats.LatenciesByURL
218+
output.Errors = stats.Errors
218219
}
219220

220221
encoder := json.NewEncoder(f)

metrics/request.go

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"container/list"
88
"sync"
99
"sync/atomic"
10+
"time"
1011

1112
"github.com/Azure/kperf/api/types"
1213
)
@@ -16,7 +17,7 @@ type ResponseMetric interface {
1617
// ObserveLatency observes latency.
1718
ObserveLatency(url string, seconds float64)
1819
// ObserveFailure observes failure response.
19-
ObserveFailure(err error)
20+
ObserveFailure(now time.Time, seconds float64, err error)
2021
// ObserveReceivedBytes observes the bytes read from apiserver.
2122
ObserveReceivedBytes(bytes int64)
2223
// Gather returns the summary.
@@ -25,14 +26,14 @@ type ResponseMetric interface {
2526

2627
type responseMetricImpl struct {
2728
mu sync.Mutex
28-
errorStats *types.ResponseErrorStats
29+
errors *list.List
2930
receivedBytes int64
3031
latenciesByURLs map[string]*list.List
3132
}
3233

3334
func NewResponseMetric() ResponseMetric {
3435
return &responseMetricImpl{
35-
errorStats: types.NewResponseErrorStats(),
36+
errors: list.New(),
3637
latenciesByURLs: map[string]*list.List{},
3738
}
3839
}
@@ -51,26 +52,38 @@ func (m *responseMetricImpl) ObserveLatency(url string, seconds float64) {
5152
}
5253

5354
// ObserveFailure implements ResponseMetric.
54-
func (m *responseMetricImpl) ObserveFailure(err error) {
55+
func (m *responseMetricImpl) ObserveFailure(now time.Time, seconds float64, err error) {
5556
if err == nil {
5657
return
5758
}
5859

5960
m.mu.Lock()
6061
defer m.mu.Unlock()
6162

62-
// HTTP2 -> TCP/TLS -> Unknown
63+
oerr := types.ResponseError{
64+
Timestamp: now,
65+
Duration: seconds,
66+
}
67+
68+
// HTTP Code -> HTTP2 -> Connection -> Unknown
6369
code := codeFromHTTP(err)
70+
http2Err, isHTTP2Err := isHTTP2Error(err)
71+
connErr, isConnErr := isConnectionError(err)
6472
switch {
6573
case code != 0:
66-
m.errorStats.ResponseCodes[code]++
67-
case isHTTP2Error(err):
68-
updateHTTP2ErrorStats(m.errorStats, err)
69-
case isNetRelatedError(err):
70-
updateNetErrors(m.errorStats, err)
74+
oerr.Type = types.ResponseErrorTypeHTTP
75+
oerr.Code = code
76+
case isHTTP2Err:
77+
oerr.Type = types.ResponseErrorTypeHTTP2Protocol
78+
oerr.Message = http2Err
79+
case isConnErr:
80+
oerr.Type = types.ResponseErrorTypeConnection
81+
oerr.Message = connErr
7182
default:
72-
m.errorStats.UnknownErrors = append(m.errorStats.UnknownErrors, err.Error())
83+
oerr.Type = types.ResponseErrorTypeUnknown
84+
oerr.Message = err.Error()
7385
}
86+
m.errors.PushBack(oerr)
7487
}
7588

7689
// ObserveReceivedBytes implements ResponseMetric.
@@ -81,7 +94,7 @@ func (m *responseMetricImpl) ObserveReceivedBytes(bytes int64) {
8194
// Gather implements ResponseMetric.
8295
func (m *responseMetricImpl) Gather() types.ResponseStats {
8396
return types.ResponseStats{
84-
ErrorStats: m.dumpErrorStats(),
97+
Errors: m.dumpErrors(),
8598
LatenciesByURL: m.dumpLatencies(),
8699
TotalReceivedBytes: atomic.LoadInt64(&m.receivedBytes),
87100
}
@@ -102,9 +115,13 @@ func (m *responseMetricImpl) dumpLatencies() map[string][]float64 {
102115
return res
103116
}
104117

105-
func (m *responseMetricImpl) dumpErrorStats() types.ResponseErrorStats {
118+
func (m *responseMetricImpl) dumpErrors() []types.ResponseError {
106119
m.mu.Lock()
107120
defer m.mu.Unlock()
108121

109-
return m.errorStats.Copy()
122+
res := make([]types.ResponseError, 0, m.errors.Len())
123+
for e := m.errors.Front(); e != nil; e = e.Next() {
124+
res = append(res, e.Value.(types.ResponseError))
125+
}
126+
return res
110127
}

0 commit comments

Comments
 (0)