diff --git a/pkg/pillar/containerd/containerd.go b/pkg/pillar/containerd/containerd.go
index b8ebfbfa19..a7cca0a0e9 100644
--- a/pkg/pillar/containerd/containerd.go
+++ b/pkg/pillar/containerd/containerd.go
@@ -513,6 +513,9 @@ func (client *Client) CtrContainerInfo(ctx context.Context, name string) (int, i
 		return 0, 0, "", fmt.Errorf("CtrContainerInfo: couldn't determine task status for container %s: %v", name, err)
 	}
 
+	if stat.Status == "unknown" {
+		logrus.Infof("CtrContainerInfo: PID of the task in container %s is %d, exit code is %d, status is %s and the task object (%v)", name, int(t.Pid()), int(stat.ExitStatus), stat.Status, t)
+	}
 	return int(t.Pid()), int(stat.ExitStatus), string(stat.Status), nil
 }
 
diff --git a/pkg/pillar/hypervisor/containerd.go b/pkg/pillar/hypervisor/containerd.go
index a1ed69eca3..0e229c9afe 100644
--- a/pkg/pillar/hypervisor/containerd.go
+++ b/pkg/pillar/hypervisor/containerd.go
@@ -23,6 +23,9 @@ const (
 
 	//ContainerdHypervisorName is a name of containerd hypervisor
 	ContainerdHypervisorName = "containerd"
+
+	// retryCount is the number of times to retry getting the state of a task
+	retryCount = 10
 )
 
 type ctrdContext struct {
@@ -231,12 +234,36 @@ func (ctx ctrdContext) Info(domainName string) (int, types.SwState, error) {
 	defer done()
 	effectiveDomainID, exit, status, err := ctx.ctrdClient.CtrContainerInfo(ctrdCtx, domainName)
 	if err != nil {
-		return 0, types.UNKNOWN, logError("containerd looking up domain %s resulted in %v", domainName, err)
+		return 0, 0, logError("containerd looking up domain %s resulted in %v", domainName, err)
+	}
+
+	// if the VM state is unknown, we will retry 10 times to get the state. This is to handle the case
+	// where the VM is in a transient state.
+	if status == "unknown" {
+		for i := 1; i <= retryCount; i++ {
+			time.Sleep(2 * time.Second)
+			logrus.Infof("task %s is in %s state, retrying %d from %d", domainName, status, i, retryCount)
+			effectiveDomainID, exit, status, err = ctx.ctrdClient.CtrContainerInfo(ctrdCtx, domainName)
+			if err != nil {
+				return 0, 0, logError("containerd looking up domain %s resulted in %v", domainName, err)
+			}
+			// if the VM state will change, we exit the loop.
+			if status != "unknown" {
+				break
+			}
+		}
 	}
 
 	if status == "stopped" && exit != 0 {
 		return 0, types.BROKEN, logError("task broke with exit status %d", exit)
 	}
+	// When the status is "unknown", it typically indicates a communication issue between containerd and the task.
+	// This is generally a temporary state, so rather than returning an error, we’ll maintain the last known valid state.
+	// The goal is to keep the application running without marking it as broken or terminating it unnecessarily.
+	// Todo: Send an alert to the user that the task is in an unknown state, even after the retries.
+	if status == "unknown" {
+		logrus.Errorf("task %s is in %s state, after %d retries", domainName, status, retryCount)
+	}
 
 	stateMap := map[string]types.SwState{
 		"created": types.INSTALLED,
@@ -244,7 +271,9 @@ func (ctx ctrdContext) Info(domainName string) (int, types.SwState, error) {
 		"pausing": types.PAUSING,
 		"paused":  types.PAUSED,
 		"stopped": types.HALTED,
+		"unknown": types.UNKNOWN,
 	}
+
 	if effectiveDomainState, matched := stateMap[status]; !matched {
 		err := fmt.Errorf("task %s happens to be in an unexpected state %s",
 			domainName, status)