From b8e8d7a35951def8639cbfd29f5b096107a3ff50 Mon Sep 17 00:00:00 2001
From: Eric Weber <eric.weber@suse.com>
Date: Tue, 6 Aug 2024 19:12:58 +0000
Subject: [PATCH] feat(timeout): ensure all replicas can time out quickly when
 necessary

Longhorn 8711

Signed-off-by: Eric Weber <eric.weber@suse.com>
---
 pkg/backend/file/file.go  |  2 +-
 pkg/controller/control.go | 31 +++++++++++++++++++------------
 pkg/dataconn/client.go    |  6 +++++-
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/pkg/backend/file/file.go b/pkg/backend/file/file.go
index 9c43c3439..b4012fa42 100644
--- a/pkg/backend/file/file.go
+++ b/pkg/backend/file/file.go
@@ -163,5 +163,5 @@ func (r *Wrapper) GetTimeoutChannel() chan struct{} {
 }
 
 func (r *Wrapper) GetDurationSinceResponse() time.Duration {
-	return -1
+	return time.Duration(0)
 }
diff --git a/pkg/controller/control.go b/pkg/controller/control.go
index 43f638417..93601f685 100644
--- a/pkg/controller/control.go
+++ b/pkg/controller/control.go
@@ -1442,28 +1442,35 @@ func (c *Controller) checkBackendTimeouts(shortTimeout, longTimeout time.Duratio
 	c.RLock()
 	defer c.RUnlock()
 
-	if backend := c.backend.backends[addressToTimeOut]; backend.mode == types.RW {
-		// The last backend we tried to stop via timeout is still not ERR. Don't try another one yet.
-		// TODO: We could speed this up by checking for durationSinceResponce < 0 instead.
+	if backend, ok := c.backend.backends[addressToTimeOut]; ok && backend.backend.GetDurationSinceResponse() >= 0 {
+		// The last backend we tried to stop via timeout hasn't acknowledged it. Don't try another one yet.
 		return addressToTimeOut
 	}
 
 	addressToTimeOutLong := ""
 	addressToTimeOutShort := ""
-	rwBackendCount := 0
+	backendsNotTimedOut := 0
 	for address, backend := range c.backend.backends {
-		if backend.mode == types.RW {
-			rwBackendCount += 1
-			if backend.backend.GetDurationSinceResponse() > longTimeout && addressToTimeOutLong == "" {
-				addressToTimeOutLong = address
-			} else if backend.backend.GetDurationSinceResponse() > shortTimeout {
-				addressToTimeOutShort = address
-			}
+		durationSinceResponse := backend.backend.GetDurationSinceResponse()
+		if durationSinceResponse < 0 {
+			// This backend has acknowledged our request to time out.
+			// It would look cleaner to check for ERR versus RW mode here, but a backend won't actually transition to
+			// ERR until the completion of at least one I/O operation. If, for example, all three backends for an
+			// engine are unreachable, none of the backends can transition to ERR until all of them have timed out (and
+			// the operation fails). We want to be able to time out the other two backends in due time instead of
+			// waiting for some TCP error, etc.
+			continue
+		}
+		backendsNotTimedOut += 1
+		if durationSinceResponse > longTimeout && addressToTimeOutLong == "" {
+			addressToTimeOutLong = address
+		} else if durationSinceResponse > shortTimeout {
+			addressToTimeOutShort = address
 		}
 	}
 	if addressToTimeOutLong != "" {
 		addressToTimeOut = addressToTimeOutLong
-	} else if addressToTimeOutShort != "" && rwBackendCount > 1 {
+	} else if addressToTimeOutShort != "" && backendsNotTimedOut > 1 {
 		// Only use addressToTimeOutShort if there is another available backend.
 		addressToTimeOut = addressToTimeOutShort
 	} else {
diff --git a/pkg/dataconn/client.go b/pkg/dataconn/client.go
index 68a845df0..177d98e83 100644
--- a/pkg/dataconn/client.go
+++ b/pkg/dataconn/client.go
@@ -160,7 +160,7 @@ func (c *Client) loop() {
 		}
 
 		ioInflight = 0
-		c.durationSinceResponse.Store(-1)
+		c.durationSinceResponse.Store(-1) // Indicate successful timeout to the upper layer.
 	}
 
 	for {
@@ -169,6 +169,10 @@ func (c *Client) loop() {
 			return
 		case <-ticker.C:
 			// Keep the upper layer informed of outstanding I/O times.
+			if c.durationSinceResponse.Load() < 0 {
+				// We have already been asked to time out.
+				continue
+			}
 			if lastIOTime.IsZero() {
 				c.durationSinceResponse.Store(0)
 			} else {