From 6020882ac4dbb7aaf163887d3199ae7a21fc55e6 Mon Sep 17 00:00:00 2001 From: ShradhaGupta31 Date: Mon, 5 Jan 2026 14:18:55 +0530 Subject: [PATCH] fix(heci): add AMT v19+ compatibility with dual-port LMS fallback - Add bidirectional TLS/non-TLS fallback logic in executor.go to handle systems with LMS running on either port 16992 or 16993 - Implement comprehensive EINTR (interrupted system call) retry logic in linux.go for improved AMT v19+ compatibility - Add exponential backoff retry for LME initialization to prevent MEI resource conflicts - Fix "device or resource busy" errors on newer AMT firmware versions Addresses: https://github.com/device-management-toolkit/rpc-go/issues/1065 Signed-off-by: ShradhaGupta31 --- cmd/rpc/lib.go | 4 ++ internal/rps/executor.go | 49 ++++++++++++-- pkg/heci/linux.go | 137 +++++++++++++++++++++++++++++++-------- 3 files changed, 158 insertions(+), 32 deletions(-) diff --git a/cmd/rpc/lib.go b/cmd/rpc/lib.go index 71b262b5..138a60b6 100644 --- a/cmd/rpc/lib.go +++ b/cmd/rpc/lib.go @@ -68,6 +68,7 @@ func rpcExec(Input *C.char, Output **C.char, ErrOutput **C.char) int { if err != nil { log.Error(AccessErrMsg) captureAndRestoreStderr() + return handleError(err) } @@ -81,6 +82,7 @@ func rpcExec(Input *C.char, Output **C.char, ErrOutput **C.char) int { if err != nil { log.Error(err.Error()) captureAndRestoreStderr() + return utils.InvalidParameterCombination.Code } @@ -106,6 +108,7 @@ func rpcExec(Input *C.char, Output **C.char, ErrOutput **C.char) int { func handleError(err error) int { if customErr, ok := err.(utils.CustomError); ok { log.Error(customErr.Error()) + return customErr.Code } else { errorMsg := err.Error() @@ -114,6 +117,7 @@ func handleError(err error) int { if strings.Contains(errorMsg, "unexpected argument") { return utils.InvalidParameterCombination.Code } + return utils.GenericFailure.Code } } diff --git a/internal/rps/executor.go b/internal/rps/executor.go index b09e03fd..25d6004a 100644 --- a/internal/rps/executor.go +++ b/internal/rps/executor.go @@ -9,6 +9,7 @@ import ( "os/signal" "sync" "syscall" + "time" "github.com/device-management-toolkit/rpc-go/v2/internal/lm" "github.com/device-management-toolkit/rpc-go/v2/pkg/utils" @@ -54,15 +55,51 @@ func NewExecutor(config ExecutorConfig) (Executor, error) { // TEST CONNECTION TO SEE IF LMS EXISTS err := client.localManagement.Connect() if err != nil { + log.Debugf("LMS connection failed: %v", err) + if config.LocalTlsEnforced { - return client, utils.LMSConnectionFailed + // Try non-TLS LMS as fallback before LME + nonTlsLMS := lm.NewLMSConnection(utils.LMSAddress, utils.LMSPort, false, lmDataChannel, lmErrorChannel, config.ControlMode, config.SkipAmtCertCheck) + + err = nonTlsLMS.Connect() + if err == nil { + client.localManagement = nonTlsLMS + client.localManagement.Close() // Test connection, close for now + } else { + log.Trace("LMS TLS connection failed. Attempting LME fallback...") + } + } else { + // Try TLS LMS as fallback before LME + tlsLMS := lm.NewLMSConnection(utils.LMSAddress, utils.LMSTLSPort, true, lmDataChannel, lmErrorChannel, config.ControlMode, config.SkipAmtCertCheck) + + err = tlsLMS.Connect() + if err == nil { + client.localManagement = tlsLMS + client.localManagement.Close() // Test connection, close for now + } else { + log.Trace("LMS not running. Using LME Connection\\n") + } } - // client.localManagement.Close() - log.Trace("LMS not running. Using LME Connection\n") - client.localManagement = lm.NewLMEConnection(lmDataChannel, lmErrorChannel, client.waitGroup) - client.isLME = true - client.localManagement.Initialize() + if err != nil { + // Wait a moment before attempting LME to allow any MEI operations to complete + time.Sleep(2 * time.Second) + + // Try LME connection + lmeConnection := lm.NewLMEConnection(lmDataChannel, lmErrorChannel, client.waitGroup) + + err = lmeConnection.Initialize() + if err == nil { + client.localManagement = lmeConnection + client.isLME = true + } else { + if config.LocalTlsEnforced { + return client, utils.LMSConnectionFailed + } + + return client, utils.LMSConnectionFailed + } + } } else { log.Trace("Using existing LMS\n") client.localManagement.Close() diff --git a/pkg/heci/linux.go b/pkg/heci/linux.go index 1f61c030..1b7fec83 100644 --- a/pkg/heci/linux.go +++ b/pkg/heci/linux.go @@ -12,6 +12,7 @@ import ( "encoding/binary" "os" "syscall" + "time" "unsafe" log "github.com/sirupsen/logrus" @@ -42,20 +43,65 @@ func NewDriver() *Driver { return &Driver{} } +// ResetMEIDevice attempts to reset MEI device state by waiting for it to become available +func (heci *Driver) ResetMEIDevice() error { + if heci.meiDevice != nil { + heci.meiDevice.Close() + heci.meiDevice = nil + } + + log.Debug("Waiting for MEI device to reset...") + time.Sleep(10 * time.Second) + + return nil +} + func (heci *Driver) Init(useLME, useWD bool) error { var err error - heci.meiDevice, err = os.OpenFile(Device, syscall.O_RDWR, 0) - if err != nil { - if err.Error() == "open /dev/mei0: permission denied" { - log.Error("need administrator privileges") - } else if err.Error() == "open /dev/mei0: no such file or directory" { - log.Error("AMT not found: MEI/driver is missing or the call to the HECI driver failed") - } else { - log.Error("Cannot open MEI Device") + // Close existing connection if switching to LME to ensure clean state + if heci.meiDevice != nil && useLME { + heci.meiDevice.Close() + heci.meiDevice = nil + + time.Sleep(3 * time.Second) + } + + // For PTHI/WD, always reopen to ensure fresh connection + // For LME, only open if not already open + if !useLME || heci.meiDevice == nil { + // Close any existing connection for PTHI + if heci.meiDevice != nil && !useLME { + heci.meiDevice.Close() + heci.meiDevice = nil } - return err + // Open MEI device with retry for device busy + for attempt := 1; attempt <= 2; attempt++ { + heci.meiDevice, err = os.OpenFile(Device, syscall.O_RDWR, 0) + if err == nil { + break + } + + if err.Error() == "open /dev/mei0: permission denied" { + log.Error("need administrator privileges") + + return err + } else if err.Error() == "open /dev/mei0: no such file or directory" { + log.Error("AMT not found: MEI/driver is missing or the call to the HECI driver failed") + + return err + } else if err.Error() == "open /dev/mei0: device or resource busy" && attempt == 1 { + log.Debug("MEI device busy, waiting before retry...") + time.Sleep(5 * time.Second) + + continue + } else { + log.Error("Cannot open MEI Device") + + return err + } + } } data := CMEIConnectClientData{} @@ -67,14 +113,7 @@ func (heci *Driver) Init(useLME, useWD bool) error { data.data = MEI_IAMTHIF } - // we try up to 3 times in case the resource/device is still busy from previous call. - for i := 0; i < 3; i++ { - err = Ioctl(heci.meiDevice.Fd(), IOCTL_MEI_CONNECT_CLIENT, uintptr(unsafe.Pointer(&data))) - if err == nil { - break - } - } - + err = Ioctl(heci.meiDevice.Fd(), IOCTL_MEI_CONNECT_CLIENT, uintptr(unsafe.Pointer(&data))) if err != nil { return err } @@ -97,30 +136,76 @@ func (heci *Driver) GetBufferSize() uint32 { } func (heci *Driver) SendMessage(buffer []byte, done *uint32) (bytesWritten int, err error) { - size, err := syscall.Write(int(heci.meiDevice.Fd()), buffer) - if err != nil { + // Validate file descriptor before attempting write + if heci.meiDevice == nil { + return 0, syscall.EBADF + } + + // Retry write operations on interrupted system call + for i := 0; i < 3; i++ { + size, err := syscall.Write(int(heci.meiDevice.Fd()), buffer) + if err == nil { + return size, nil + } + + // Retry on interrupted system call + if err == syscall.EINTR { + time.Sleep(50 * time.Millisecond) + + continue + } + return 0, err } - return size, nil + return 0, syscall.EINTR } func (driver *Driver) ReceiveMessage(buffer []byte, done *uint32) (bytesRead int, err error) { - read, err := unix.Read(int(driver.meiDevice.Fd()), buffer) - if err != nil { + // Validate file descriptor before attempting read + if driver.meiDevice == nil { + return 0, syscall.EBADF + } + + // Retry read operations on interrupted system call + for i := 0; i < 3; i++ { + read, err := unix.Read(int(driver.meiDevice.Fd()), buffer) + if err == nil { + return read, nil + } + + // Retry on interrupted system call + if err == syscall.EINTR { + time.Sleep(50 * time.Millisecond) + + continue + } + return 0, err } - return read, nil + return 0, syscall.EINTR } func Ioctl(fd, op, arg uintptr) error { - _, _, ep := syscall.Syscall(syscall.SYS_IOCTL, fd, op, arg) - if ep != 0 { + // Retry IOCTL on interrupted system call (EINTR) + for i := 0; i < 3; i++ { + _, _, ep := syscall.Syscall(syscall.SYS_IOCTL, fd, op, arg) + if ep == 0 { + return nil + } + + // Retry on interrupted system call (EINTR = 4) + if ep == syscall.EINTR { + time.Sleep(100 * time.Millisecond) + + continue + } + return ep } - return nil + return syscall.EINTR } func (heci *Driver) Close() {