Skip to content

Commit

Permalink
CLOUDP-234072: Fix metrics to show & flag run numbers (#1416)
Browse files Browse the repository at this point in the history
  • Loading branch information
josvazg authored Mar 5, 2024
1 parent 6bf53f9 commit 8dc6913
Show file tree
Hide file tree
Showing 162 changed files with 274 additions and 45 deletions.
42 changes: 36 additions & 6 deletions tools/metrics/flakiness.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"fmt"
"strings"
"time"

"github.com/google/go-github/v57/github"
)

type FlakinessQuerier interface {
Expand All @@ -16,7 +18,8 @@ type testFlakiness struct {

type slotFlakiness struct {
interval
flakyTests flakyRank
successfulCloudTestRuns int
flakyTests flakyRank
}

func (sr slotFlakiness) count() int {
Expand Down Expand Up @@ -88,33 +91,60 @@ func QueryFlakiness(qc QueryClient, notAfter time.Time, period time.Duration, sl
return sfr, nil
}
for _, run := range wfRuns.WorkflowRuns {
if run.CreatedAt.Time.After(notAfter) {
continue // skip anything after the end date
}
if run.CreatedAt.Time.Before(notBefore) {
return sfr, nil // data is returned in chronological descendent order
}
if !strings.HasPrefix(*run.Name, "Test") || (run.Conclusion != nil && *run.Conclusion != "success") {
continue // if it failed completely, it is not flaky
if !strings.HasPrefix(*run.Name, "Test") {
continue // skip non tests
}
slot := slotForTimestamp(period, notAfter, run.CreatedAt.Time)
rid := *run.ID
failed, err := queryJobFlakiness(qc, rid)
jobs, err := queryAllJobs(qc, rid)
if err != nil {
return nil, err
}
if run.Conclusion != nil && *run.Conclusion != "success" {
// if it failed completely, it is not flaky
continue
}
if isCloudTest(jobs) {
sfr[slot].successfulCloudTestRuns += 1
}
failed, err := queryJobFlakiness(rid, jobs)
if err != nil {
return nil, err
}
slot := slotForTimestamp(period, notAfter, run.CreatedAt.Time)
for _, failure := range failed {
registerFlakiness(sfr[slot], failure)
}
}
}
}

func queryJobFlakiness(qc QueryClient, rid int64) ([]jobID, error) {
func queryAllJobs(qc QueryClient, rid int64) (*github.Jobs, error) {
jobs, err := qc.TestWorkflowRunJobs(rid, "all", 1)
if err != nil {
return nil, fmt.Errorf("failed to query job run %d: %w", rid, err)
}
if len(jobs.Jobs) > PerPage {
return nil, fmt.Errorf("too many jobs in run (%d > %d)", len(jobs.Jobs), PerPage)
}
return jobs, nil
}

func isCloudTest(jobs *github.Jobs) bool {
for _, job := range jobs.Jobs {
if *job.Name == "cloud-tests" && job.Conclusion != nil && *job.Conclusion != "skipped" {
return true
}
}
return false
}

func queryJobFlakiness(rid int64, jobs *github.Jobs) ([]jobID, error) {
failed := []jobID{}
for _, job := range jobs.Jobs {
if job.Conclusion != nil && *job.Conclusion == "failure" {
Expand Down
25 changes: 23 additions & 2 deletions tools/metrics/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,38 @@ import (
"log"
"os"
"strings"
"time"
)

func main() {
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: [FORMAT=json|summary] %s {regressions|flakiness}\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Usage: [FORMAT=json|summary] [TILL=date] %s {regressions|flakiness}\n", os.Args[0])
os.Exit(1)
}
query := strings.ToLower(os.Args[1])
if report, err := report(query); err != nil {
format := valueOrDefault(os.Getenv("FORMAT"), "json")
end := mustParseRFC3999(os.Getenv("TILL"))
if report, err := Report(NewDefaultQueryClient(), end, query, format); err != nil {
log.Fatal(err)
} else {
fmt.Fprint(os.Stdout, report)
}
}

func valueOrDefault(value, defaultValue string) string {
if value == "" {
return defaultValue
}
return value
}

func mustParseRFC3999(date string) time.Time {
if date == "" {
return time.Now()
}
t, err := time.Parse(time.RFC3339, date)
if err != nil {
panic(err)
}
return t
}
12 changes: 10 additions & 2 deletions tools/metrics/regressions.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type testRegressions struct {

type slotRegressions struct {
interval
runs int
regressions map[string]*testRegressions
}

Expand Down Expand Up @@ -53,18 +54,25 @@ func QueryRegressions(qc QueryClient, notAfter time.Time, period time.Duration,
return srr, nil
}
for _, run := range wfRuns.WorkflowRuns {
if run.CreatedAt.Time.After(notAfter) {
continue // skip anything after the end date
}
if run.CreatedAt.Before(notBefore) {
return srr, nil // data is returned in chronological descendent order
}
if !strings.HasPrefix(*run.Name, "Test") || (run.Conclusion != nil && *run.Conclusion == "success") {
if !strings.HasPrefix(*run.Name, "Test") {
continue
}
rid := *run.ID
slot := slotForTimestamp(period, notAfter, run.CreatedAt.Time)
srr[slot].runs += 1
if run.Conclusion != nil && *run.Conclusion == "success" {
continue
}
failed, err := queryJobRegressions(qc, rid)
if err != nil {
return nil, err
}
slot := slotForTimestamp(period, notAfter, run.CreatedAt.Time)
for _, failure := range failed {
registerRegression(srr[slot], identify(failure), runID(rid))
}
Expand Down
80 changes: 47 additions & 33 deletions tools/metrics/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package main
import (
"encoding/json"
"fmt"
"os"
"strings"
"time"
)
Expand All @@ -17,7 +16,8 @@ type ReportInfo struct {
SlotCount int `json:"slot_count"`
Slots []*SlotInfo `json:"slots"`
Total int `json:"total"`
QueryTme string `json:"query_time"`
QueryTime string `json:"query_time"`
RunsLabel string `json:"runsLabel`
}

type SlotInfo struct {
Expand All @@ -26,6 +26,7 @@ type SlotInfo struct {
End string `json:"end"`
Entries []*EntryInfo `json:"entries,omitempty"`
EntryCount int `json:"entry_count"`
Runs int `json:"runs"`
}

type EntryInfo struct {
Expand All @@ -35,20 +36,33 @@ type EntryInfo struct {
TestCount int `json:"test_count"`
}

func report(query string) (string, error) {
type reportFunc func(QueryClient, time.Time) (*ReportInfo, error)

func Report(qc QueryClient, end time.Time, query, format string) (string, error) {
reportFn := selectReportFunc(query)
report, err := reportFn(qc, end)
if err != nil {
return "", err
}
return formatReport(report, format)
}

func selectReportFunc(query string) reportFunc {
switch query {
case "regressions":
return format(regressions())
return regressions
case "flakiness":
return format(flakiness())
return flakiness
default:
return "", fmt.Errorf("query type %q unsupported, can only be 'regressions' or 'flakiness'", query)
return func(_ QueryClient, _ time.Time) (*ReportInfo, error) {
return nil, fmt.Errorf("query type %q unsupported, can only be 'regressions' or 'flakiness'", query)
}
}
}

func regressions() (*ReportInfo, error) {
func regressions(qc QueryClient, end time.Time) (*ReportInfo, error) {
start := time.Now()
results, err := QueryRegressions(NewDefaultQueryClient(), time.Now(), Weekly, Weeks)
results, err := QueryRegressions(qc, end, Weekly, Weeks)
if err != nil {
return nil, err
}
Expand All @@ -58,7 +72,8 @@ func regressions() (*ReportInfo, error) {
SlotCount: len(results),
Slots: regressionsSlots(results),
Total: results.count(),
QueryTme: fmt.Sprintf("%v", elapsed),
QueryTime: fmt.Sprintf("%v", elapsed),
RunsLabel: "total merges",
}, nil
}

Expand All @@ -71,6 +86,7 @@ func regressionsSlots(results slotRegressionsResult) []*SlotInfo {
End: sr.interval.end.Format(DayFormat),
Entries: regressionEntries(slot, sr.regressions),
EntryCount: len(sr.regressions),
Runs: sr.runs,
}
slots = append(slots, item)
}
Expand Down Expand Up @@ -101,9 +117,9 @@ func runURLs(runIDs []runID) []string {
return urls
}

func flakiness() (*ReportInfo, error) {
func flakiness(qc QueryClient, end time.Time) (*ReportInfo, error) {
start := time.Now()
results, err := QueryFlakiness(NewDefaultQueryClient(), time.Now(), Weekly, Weeks)
results, err := QueryFlakiness(qc, end, Weekly, Weeks)
if err != nil {
return nil, err
}
Expand All @@ -113,7 +129,8 @@ func flakiness() (*ReportInfo, error) {
SlotCount: len(results),
Slots: flakinessSlots(results),
Total: results.count(),
QueryTme: fmt.Sprintf("%v", elapsed),
QueryTime: fmt.Sprintf("%v", elapsed),
RunsLabel: "successful cloud-tests",
}, nil
}

Expand All @@ -126,6 +143,7 @@ func flakinessSlots(results slotFlakinessResult) []*SlotInfo {
End: sr.interval.end.Format(DayFormat),
Entries: flakyEntries(slot, sr.flakyTests),
EntryCount: len(sr.flakyTests.rank),
Runs: sr.successfulCloudTestRuns,
}
slots = append(slots, entry)
}
Expand Down Expand Up @@ -163,20 +181,18 @@ func slotName(slot int) string {
return fmt.Sprintf("%d weeks ago", slot+1)
}

func format(report *ReportInfo, err error) (string, error) {
if os.Getenv("FORMAT") == "summary" {
if err != nil {
return "", err
}
func formatReport(report *ReportInfo, format string) (string, error) {
if format == "summary" {
return Summary(report), nil
}
return jsonize(report, err)
return jsonize(report)
}

func Summary(report *ReportInfo) string {
var sb strings.Builder
currentWeek := report.Slots[0]
fmt.Fprintf(&sb, "Last %d weeks *%s* report *%s*\\n\\n",
len(report.Slots), report.Type, report.Slots[0].End)
len(report.Slots), report.Type, currentWeek.End)
totals := 0
trend := []int{}
for i := len(report.Slots) - 1; i >= 0; i-- {
Expand Down Expand Up @@ -205,8 +221,11 @@ func Summary(report *ReportInfo) string {
level = "*ABOVE*"
below = false
}
if float32(currentWeek.Runs) < avg {
fmt.Fprintf(&sb, "*BEWARE!* %s is too low at %d\\n", report.RunsLabel, currentWeek.Runs)
}
good := decreasing && below
perfect := good && trend[len(report.Slots)-1] == 0
perfect := good && trend[len(report.Slots)-1] == 0 && currentWeek.Runs > 0
if perfect {
fmt.Fprintf(&sb, "*PERFECT WEEK!*\\nStats:\\n")
} else if good {
Expand All @@ -217,35 +236,30 @@ func Summary(report *ReportInfo) string {
for _, occurrences := range trend[0 : len(trend)-1] {
fmt.Fprintf(&sb, "%d, ", occurrences)
}
fmt.Fprintf(&sb, "_*%d*_ <- last week\\n\\n", last)
fmt.Fprintf(&sb, "_*%d*_ on %d %s <- last week\\n\\n",
last, currentWeek.Runs, report.RunsLabel)
fmt.Fprintf(&sb, "- %s %d from last week.\\n", direction, diff)
fmt.Fprintf(&sb, "- %.02f %s current average of %.02f per week.\\n", avgDiff, level, avg)

if report.Slots[0].EntryCount > 0 {
if currentWeek.EntryCount > 0 {
fmt.Fprintf(&sb, "Last week ranking:\\n\\n")
fmt.Fprintf(&sb, "Top offender (make sure we have a jira in progress for this one):\\n\\n")
entry := report.Slots[0].Entries[0]
entry := currentWeek.Entries[0]
fmt.Fprintf(&sb, "- *%d* %s test: %s\\n", entry.TestCount, entry.TestType, entry.TestName)
fmt.Fprintf(&sb, "\\nRest:\\n")
for _, entry := range report.Slots[0].Entries[1:] {
for _, entry := range currentWeek.Entries[1:] {
fmt.Fprintf(&sb, "- %d %s test: %s\\n", entry.TestCount, entry.TestType, entry.TestName)
}

fmt.Fprintf(&sb, "\\n\\nTop offender links:\\n\\n")
for _, url := range report.Slots[0].Entries[0].Tests {
for _, url := range currentWeek.Entries[0].Tests {
fmt.Fprintf(&sb, "%s\\n", url)
}
}
return sb.String()
}

func jsonize(report *ReportInfo, err error) (string, error) {
if err != nil {
return "", err
}
func jsonize(report *ReportInfo) (string, error) {
jsonData, err := json.Marshal(report)
if err != nil {
return "", err
}
return string(jsonData), nil
return string(jsonData), err
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"total_count":6,"jobs":[{"id":19836891493,"run_id":7279810816,"run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816","node_id":"CR_kwDOEnYpJ88AAAAEnl7xZQ","head_branch":"CLOUDP-185089/upgrade-k8s-api-1.27","head_sha":"5e47f8bac6b848a5fb8d04b7a353df0c71053447","url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/jobs/19836891493","html_url":"https://github.com/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816/job/19836891493","status":"completed","conclusion":"cancelled","created_at":"2023-12-20T19:14:45Z","started_at":"2023-12-20T19:14:48Z","completed_at":"2023-12-20T19:14:48Z","name":"lint / lint","check_run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/check-runs/19836891493","labels":["ubuntu-latest"],"runner_id":0,"runner_name":"","runner_group_id":0,"runner_group_name":"","run_attempt":1,"workflow_name":"Test"},{"id":19836891670,"run_id":7279810816,"run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816","node_id":"CR_kwDOEnYpJ88AAAAEnl7yFg","head_branch":"CLOUDP-185089/upgrade-k8s-api-1.27","head_sha":"5e47f8bac6b848a5fb8d04b7a353df0c71053447","url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/jobs/19836891670","html_url":"https://github.com/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816/job/19836891670","status":"completed","conclusion":"cancelled","created_at":"2023-12-20T19:14:45Z","started_at":"2023-12-20T19:14:48Z","completed_at":"2023-12-20T19:14:48Z","name":"check-licenses / Check Licenses","check_run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/check-runs/19836891670","labels":["ubuntu-latest"],"runner_id":0,"runner_name":"","runner_group_id":0,"runner_group_name":"","run_attempt":1,"workflow_name":"Test"},{"id":19836891847,"run_id":7279810816,"run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816","node_id":"CR_kwDOEnYpJ88AAAAEnl7yxw","head_branch":"CLOUDP-185089/upgrade-k8s-api-1.27","head_sha":"5e47f8bac6b848a5fb8d04b7a353df0c71053447","url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/jobs/19836891847","html_url":"https://github.com/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816/job/19836891847","status":"completed","conclusion":"cancelled","created_at":"2023-12-20T19:14:46Z","started_at":"2023-12-20T19:14:48Z","completed_at":"2023-12-20T19:14:48Z","name":"validate-manifests / Validate Manifests","check_run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/check-runs/19836891847","labels":["ubuntu-latest"],"runner_id":0,"runner_name":"","runner_group_id":0,"runner_group_name":"","run_attempt":1,"workflow_name":"Test"},{"id":19836892027,"run_id":7279810816,"run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816","node_id":"CR_kwDOEnYpJ88AAAAEnl7zew","head_branch":"CLOUDP-185089/upgrade-k8s-api-1.27","head_sha":"5e47f8bac6b848a5fb8d04b7a353df0c71053447","url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/jobs/19836892027","html_url":"https://github.com/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816/job/19836892027","status":"completed","conclusion":"cancelled","created_at":"2023-12-20T19:14:46Z","started_at":"2023-12-20T19:14:47Z","completed_at":"2023-12-20T19:14:47Z","name":"unit-tests / Unit tests","check_run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/check-runs/19836892027","labels":["ubuntu-latest"],"runner_id":0,"runner_name":"","runner_group_id":0,"runner_group_name":"","run_attempt":1,"workflow_name":"Test"},{"id":19836892218,"run_id":7279810816,"run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816","node_id":"CR_kwDOEnYpJ88AAAAEnl70Og","head_branch":"CLOUDP-185089/upgrade-k8s-api-1.27","head_sha":"5e47f8bac6b848a5fb8d04b7a353df0c71053447","url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/jobs/19836892218","html_url":"https://github.com/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816/job/19836892218","status":"completed","conclusion":"cancelled","created_at":"2023-12-20T19:14:46Z","started_at":"2023-12-20T19:14:48Z","completed_at":"2023-12-20T19:14:48Z","name":"cloud-tests-filter / cloud-tests-filter","check_run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/check-runs/19836892218","labels":["ubuntu-latest"],"runner_id":0,"runner_name":"","runner_group_id":0,"runner_group_name":"","run_attempt":1,"workflow_name":"Test"},{"id":19836893120,"run_id":7279810816,"run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816","node_id":"CR_kwDOEnYpJ88AAAAEnl73wA","head_branch":"CLOUDP-185089/upgrade-k8s-api-1.27","head_sha":"5e47f8bac6b848a5fb8d04b7a353df0c71053447","url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/actions/jobs/19836893120","html_url":"https://github.com/mongodb/mongodb-atlas-kubernetes/actions/runs/7279810816/job/19836893120","status":"completed","conclusion":"cancelled","created_at":"2023-12-20T19:14:48Z","started_at":"2023-12-20T19:14:48Z","completed_at":"2023-12-20T19:14:48Z","name":"cloud-tests","check_run_url":"https://api.github.com/repos/mongodb/mongodb-atlas-kubernetes/check-runs/19836893120","run_attempt":1,"workflow_name":"Test"}]}
Loading

0 comments on commit 8dc6913

Please sign in to comment.