Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 61 additions & 6 deletions pkg/e2eanalysis/e2e_analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
k8sv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/dynamic"
clientset "k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -106,12 +107,23 @@ func (tm *TestManager) GenerateReport(opt *Options) error {
}
fmt.Println(string(out))
if opt.JUnitDir != "" {
if _, err := os.Stat(opt.JUnitDir); os.IsNotExist(err) {
if err := os.MkdirAll(opt.JUnitDir, 0755); err != nil {
err = fmt.Errorf("failed to create junit directory %q: %w", opt.JUnitDir, err)
logrus.WithError(err).Error("JUnit report write error:")
return err
}
}
filePrefix := "junit_e2e_analysis"
start := time.Now()
timeSuffix := fmt.Sprintf("_%s", start.UTC().Format("20060102-150405"))
path := filepath.Join(opt.JUnitDir, fmt.Sprintf("%s_%s.xml", filePrefix, timeSuffix))
fmt.Fprintf(os.Stderr, "Writing JUnit report to %s\n", path)
os.WriteFile(path, test.StripANSI(out), 0640)
err := os.WriteFile(path, test.StripANSI(out), 0640)
if err != nil {
logrus.WithError(err).Error("Fail to write junit file:")
return err
}
}

return nil
Expand Down Expand Up @@ -162,7 +174,11 @@ func (opt *Options) Run() error {
// ======================================================
// ===== Consistency check between machine and node =====
// ======================================================
checkMachineNodeConsistency(cs, dc, tm)
ret, err = checkMachineNodeConsistency(cs, dc, tm)
if ret == -1 {
logrus.WithError(err).Error("Fatal failure:")
return nil
}

// ======================================================
// =========== Check cluster operators health ===========
Expand Down Expand Up @@ -277,8 +293,9 @@ func (opt *Options) Run() error {
failureMsg = fmt.Sprintf("Operator %q - Available=%s, Degraded=%s, Progressing=%s", opName, available, degraded, progressing)
logrus.Infof("%s", failureMsg)
tm.AddTestCase(tc, failureMsg, "")
// add a flake case in here to begin with, so that we can analyze new tests without causing job failures.
// once we verify stability we can remove the flake
// TODO: add a flake case in here to begin with, so that we can
// analyze new tests without causing job failures. Once we verify
// stability we can remove the flake
tm.AddTestCase(tc, "", "")
failedOperators[opName] = true
}
Expand Down Expand Up @@ -472,7 +489,7 @@ func checkClusterVersionStable(dc dynamic.Interface) (int, error) {
return 0, nil
}

func checkMachineNodeConsistency(clientset clientset.Interface, dc dynamic.Interface, tm *TestManager) {
func checkMachineNodeConsistency(clientset clientset.Interface, dc dynamic.Interface, tm *TestManager) (int, error) {
logrus.Info("Starting Machine and Node consistency check")

// ===== Case 1 =====
Expand Down Expand Up @@ -528,7 +545,7 @@ func checkMachineNodeConsistency(clientset clientset.Interface, dc dynamic.Inter
if err != nil {
message := fmt.Sprintf("Failed to list nodes: %v.", err)
tm.AddTestCase(tc, message, "")
return
return -1, err
}

notReadyNodes := getUnreadyOrUnschedulableNodeNames(nodeList)
Expand Down Expand Up @@ -569,6 +586,44 @@ func checkMachineNodeConsistency(clientset clientset.Interface, dc dynamic.Inter
}
tm.AddTestCase(tc, message, "")
}

// ===== Case 4 =====
tcName = "ensure 1 worker node at least gets ready"
tc = NewTestCase(tcName)
workerReadyCount, err := GetReadyNodeCountByLabel(nodeList, "node-role.kubernetes.io/worker")
if err != nil {
tm.AddTestCase(tc, fmt.Sprintf("%v", err), "")
return -1, err
}
if workerReadyCount > 0 {
tm.AddTestCase(tc, "", "")
} else {
message := "Not any Ready and Scheduable worker node"
tm.AddTestCase(tc, message, "")
return -1, fmt.Errorf("%s", message)
}

return 0, nil
}

// GetReadyNodeCountByLabel filters a list of nodes by a label selector and returns the count of ready nodes.
func GetReadyNodeCountByLabel(nodeList *k8sv1.NodeList, labelSelector string) (int, error) {
selector, err := labels.Parse(labelSelector)
if err != nil {
return 0, fmt.Errorf("failed to parse label selector %q: %w", labelSelector, err)
}

readyNodeCount := 0
for _, node := range nodeList.Items {
// First, check if the node's labels match the selector.
if selector.Matches(labels.Set(node.Labels)) {
// If they match, then check if the node is schedulable.
if e2enode.IsNodeSchedulable(&node) {
readyNodeCount++
}
}
}
return readyNodeCount, nil
}

// GetUnreadyOrUnschedulableNodeNames returns a list of node names that are
Expand Down