ci: fix NO_RACE + CI re-run on timeouts or fails of known flakes (#1304)

* fix script * fix * suggested for ci: re-run tests if failures are flakes (#1305) * suggested ci improvement: re-run tests if failures are flakes * remove flake indicators * bash * add more flakes * fix bash * fix * add flake * sort
ava-labs · Aug 26, 2024 · 3ede00b · 3ede00b
1 parent 32c8cdf
commit 3ede00b
Show file tree

Hide file tree

Showing 6 changed files with 44 additions and 14 deletions.
diff --git a/accounts/keystore/keystore_test.go b/accounts/keystore/keystore_test.go
@@ -293,9 +293,6 @@ type walletEvent struct {
 // Tests that wallet notifications and correctly fired when accounts are added
 // or deleted from the keystore.
 func TestWalletNotifications(t *testing.T) {
-	if os.Getenv("RUN_FLAKY_TESTS") != "true" {
-		t.Skip("FLAKY")
-	}
 	t.Parallel()
 	_, ks := tmpKeyStore(t, false)
 

diff --git a/metrics/sample_test.go b/metrics/sample_test.go
@@ -3,7 +3,6 @@ package metrics
 import (
 	"math"
 	"math/rand"
-	"os"
 	"runtime"
 	"testing"
 	"time"
@@ -133,9 +132,6 @@ func TestExpDecaySample(t *testing.T) {
 // The priority becomes +Inf quickly after starting if this is done,
 // effectively freezing the set of samples until a rescale step happens.
 func TestExpDecaySampleNanosecondRegression(t *testing.T) {
-	if os.Getenv("RUN_FLAKY_TESTS") != "true" {
-		t.Skip("FLAKY")
-	}
 	sw := NewExpDecaySample(100, 0.99)
 	for i := 0; i < 100; i++ {
 		sw.Update(10)

diff --git a/plugin/evm/gossiper_eth_gossiping_test.go b/plugin/evm/gossiper_eth_gossiping_test.go
@@ -8,7 +8,6 @@ import (
 	"crypto/ecdsa"
 	"encoding/json"
 	"math/big"
-	"os"
 	"strings"
 	"sync"
 	"testing"
@@ -74,9 +73,6 @@ func getValidEthTxs(key *ecdsa.PrivateKey, count int, gasPrice *big.Int) []*type
 // show that a geth tx discovered from gossip is requested to the same node that
 // gossiped it
 func TestMempoolEthTxsAppGossipHandling(t *testing.T) {
-	if os.Getenv("RUN_FLAKY_TESTS") != "true" {
-		t.Skip("FLAKY")
-	}
 	assert := assert.New(t)
 
 	key, err := crypto.GenerateKey()

diff --git a/plugin/evm/syncervm_test.go b/plugin/evm/syncervm_test.go
@@ -241,7 +241,6 @@ func TestStateSyncToggleEnabledToDisabled(t *testing.T) {
 }
 
 func TestVMShutdownWhileSyncing(t *testing.T) {
-	t.Skip("FLAKY")
 	var (
 		lock    sync.Mutex
 		vmSetup *syncVMSetup

diff --git a/scripts/build_test.sh b/scripts/build_test.sh
@@ -24,5 +24,39 @@ race="-race"
 if [[ -n "${NO_RACE:-}" ]]; then
     race=""
 fi
-# shellcheck disable=SC2046
-go test -shuffle=on "${race}" -timeout="${TIMEOUT:-600s}" -coverprofile=coverage.out -covermode=atomic "$@" $(go list ./... | grep -v github.com/ava-labs/subnet-evm/tests)
+
+# MAX_RUNS bounds the attempts to retry the tests before giving up
+# This is useful for flaky tests
+MAX_RUNS=4
+for ((i = 1; i <= MAX_RUNS; i++));
+do
+    # shellcheck disable=SC2046
+    go test -shuffle=on ${race:-} -timeout="${TIMEOUT:-600s}" -coverprofile=coverage.out -covermode=atomic "$@" $(go list ./... | grep -v github.com/ava-labs/subnet-evm/tests) | tee test.out || command_status=$?
+
+    # If the test passed, exit
+    if [[ ${command_status:-0} == 0 ]]; then
+        rm test.out
+        exit 0
+    else 
+        unset command_status # Clear the error code for the next run
+    fi
+
+    # If the test failed, print the output
+    unexpected_failures=$(
+        # First grep pattern corresponds to test failures, second pattern corresponds to test panics due to timeouts
+        (grep "^--- FAIL" test.out | awk '{print $3}' || grep -E '^\s+Test.+ \(' test.out | awk '{print $1}') |
+        sort -u | comm -23 - ./scripts/known_flakes.txt
+    )
+    if [ -n "${unexpected_failures}" ]; then
+        echo "Unexpected test failures: ${unexpected_failures}"
+        exit 1
+    fi
+
+    # Note the absence of unexpected failures cannot be indicative that we only need to run the tests that failed,
+    # for example a test may panic and cause subsequent tests in that package to not run.
+    # So we loop here.
+    echo "Test run $i failed with known flakes, retrying..."
+done
+
+# If we reach here, we have failed all retries
+exit 1
diff --git a/scripts/known_flakes.txt b/scripts/known_flakes.txt
@@ -0,0 +1,8 @@
+TestClientCancelWebsocket
+TestExpDecaySampleNanosecondRegression
+TestGolangBindings
+TestMempoolEthTxsAppGossipHandling
+TestTransactionSkipIndexing
+TestVMShutdownWhileSyncing
+TestWalletNotifications
+TestWebsocketLargeRead