From 59ab5d6ea38b00a95e4be415662e4184d3b3e839 Mon Sep 17 00:00:00 2001 From: Shuo Wu Date: Thu, 21 Mar 2024 23:24:30 -0700 Subject: [PATCH] Process: Add timeout and force killing for process deletion Longhorn 8091 Signed-off-by: Shuo Wu --- pkg/process/process.go | 49 ++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/pkg/process/process.go b/pkg/process/process.go index 3ff544795..b1e304f55 100644 --- a/pkg/process/process.go +++ b/pkg/process/process.go @@ -22,6 +22,8 @@ const ( StateError = State(types.ProcessStateError) ) +const DeleteTimeout = 30 * time.Second + type Process struct { Name string Binary string @@ -29,12 +31,13 @@ type Process struct { PortCount int32 PortArgs []string - UUID string - State State - ErrorMsg string - Conditions map[string]bool - PortStart int32 - PortEnd int32 + UUID string + State State + ErrorMsg string + Conditions map[string]bool + PortStart int32 + PortEnd int32 + DeletionTimestamp *time.Time lock *sync.RWMutex cmd Command @@ -137,15 +140,25 @@ func (p *Process) Stop() { } func (p *Process) StopWithSignal(signal syscall.Signal) { - needStop := false + needStop, needTimeoutKill := false, false + now := time.Now() + p.lock.Lock() if p.State != StateStopping && p.State != StateStopped && p.State != StateError { p.State = StateStopping + p.DeletionTimestamp = &now needStop = true } + // Retry the deletion if the process is not stopped in 30 seconds + if p.DeletionTimestamp != nil && !needStop { + if p.DeletionTimestamp.Add(DeleteTimeout).After(now) { + logrus.Infof("Process Manager: process %v deletion takes more than %vs, will retry it", p.Name, DeleteTimeout.Seconds()) + needTimeoutKill = true + } + } p.lock.Unlock() - if !needStop { + if !needStop && !needTimeoutKill { return } p.UpdateCh <- p @@ -170,16 +183,20 @@ func (p *Process) StopWithSignal(signal syscall.Signal) { } // no need for lock - logrus.Infof("Process Manager: trying to stop process %v", p.Name) - cmd.StopWithSignal(signal) - for i := 0; i < types.WaitCount; i++ { - if p.IsStopped() { - return + if needStop { + logrus.Infof("Process Manager: trying to stop process %v", p.Name) + cmd.StopWithSignal(signal) + for i := 0; i < types.WaitCount; i++ { + if p.IsStopped() { + return + } + logrus.Infof("Wait for process %v to shutdown", p.Name) + time.Sleep(types.WaitInterval) } - logrus.Infof("Wait for process %v to shutdown", p.Name) - time.Sleep(types.WaitInterval) + logrus.Warnf("Process Manager: cannot graceful stop process %v in %v, will kill the process", p.Name, time.Duration(types.WaitCount)*types.WaitInterval) + } else if needTimeoutKill { + logrus.Warnf("Process Manager: somehow timeout stopping process %v, will retry killing the process", p.Name) } - logrus.Warnf("Process Manager: cannot graceful stop process %v in %v, will kill the process", p.Name, time.Duration(types.WaitCount)*types.WaitInterval) cmd.Kill() }() }