diff --git a/go/vt/mysqlctl/mysqld.go b/go/vt/mysqlctl/mysqld.go index b6a743cc120..a2d2c5b7d71 100644 --- a/go/vt/mysqlctl/mysqld.go +++ b/go/vt/mysqlctl/mysqld.go @@ -38,6 +38,7 @@ import ( "strconv" "strings" "sync" + "syscall" "time" "github.com/spf13/pflag" @@ -321,7 +322,7 @@ func (mysqld *Mysqld) Start(ctx context.Context, cnf *Mycnf, mysqldArgs ...strin return client.Start(ctx, mysqldArgs...) } - if err := mysqld.startNoWait(ctx, cnf, mysqldArgs...); err != nil { + if err := mysqld.startNoWait(cnf, mysqldArgs...); err != nil { return err } @@ -329,7 +330,7 @@ func (mysqld *Mysqld) Start(ctx context.Context, cnf *Mycnf, mysqldArgs ...strin } // startNoWait is the internal version of Start, and it doesn't wait. -func (mysqld *Mysqld) startNoWait(ctx context.Context, cnf *Mycnf, mysqldArgs ...string) error { +func (mysqld *Mysqld) startNoWait(cnf *Mycnf, mysqldArgs ...string) error { var name string ts := fmt.Sprintf("Mysqld.Start(%v)", time.Now().Unix()) @@ -355,6 +356,13 @@ func (mysqld *Mysqld) startNoWait(ctx context.Context, cnf *Mycnf, mysqldArgs .. if err != nil { return err } + // If we're here, and the lockfile still exists for the socket, we have + // to clean that up since we know at this point we need to start MySQL. + // Having this stray lock file present means MySQL fails to start. This + // only happens when running without mysqld_safe. + if err := cleanupLockfile(cnf.SocketFile, ts); err != nil { + return err + } } mysqlBaseDir, err := vtenv.VtMysqlBaseDir() if err != nil { @@ -426,6 +434,46 @@ func (mysqld *Mysqld) startNoWait(ctx context.Context, cnf *Mycnf, mysqldArgs .. return nil } +func cleanupLockfile(socket string, ts string) error { + lockPath := fmt.Sprintf("%s.lock", socket) + pid, err := os.ReadFile(lockPath) + if errors.Is(err, os.ErrNotExist) { + // If there's no lock file, we can early return here, nothing + // to clean up then. + return nil + } else if err != nil { + // Any other errors here are unexpected. + return err + } + p, err := strconv.Atoi(string(pid)) + if err != nil { + log.Errorf("%v: error parsing pid from lock file: %v", ts, err) + return err + } + proc, err := os.FindProcess(p) + if err != nil { + log.Errorf("%v: error finding process: %v", ts, err) + return err + } + err = proc.Signal(syscall.Signal(0)) + if err == nil { + // If the process still exists, it's not safe to + // remove the lock file, so we have to keep it around. + log.Warningf("%v: not removing socket lock file: %v", ts, lockPath) + return nil + } + if !errors.Is(err, os.ErrProcessDone) { + // Any errors except for the process being done + // is unexpected here. + log.Errorf("%v: error checking process: %v", ts, err) + return err + } + + // All good, process is gone and we can safely clean up the lock file. + log.Infof("%v: removing stale socket lock file: %v", ts, lockPath) + return os.Remove(lockPath) +} + // Wait returns nil when mysqld is up and accepting connections. It // will use the dba credentials to try to connect. Use wait() with // different credentials if needed. @@ -642,7 +690,7 @@ func (mysqld *Mysqld) Init(ctx context.Context, cnf *Mycnf, initDBSQLFile string // Start mysqld. We do not use Start, as we have to wait using // the root user. - if err = mysqld.startNoWait(ctx, cnf); err != nil { + if err = mysqld.startNoWait(cnf); err != nil { log.Errorf("failed starting mysqld: %v\n%v", err, readTailOfMysqldErrorLog(cnf.ErrorLogPath)) return err } diff --git a/go/vt/mysqlctl/mysqld_test.go b/go/vt/mysqlctl/mysqld_test.go index 435090008f2..5ee21196ddd 100644 --- a/go/vt/mysqlctl/mysqld_test.go +++ b/go/vt/mysqlctl/mysqld_test.go @@ -17,6 +17,8 @@ limitations under the License. package mysqlctl import ( + "os" + "strconv" "testing" "time" @@ -172,3 +174,22 @@ func TestParseBinlogEntryTimestamp(t *testing.T) { }) } } + +func TestCleanupLockfile(t *testing.T) { + t.Cleanup(func() { + os.Remove("mysql.sock.lock") + }) + ts := "prefix" + // All good if no lockfile exists + assert.NoError(t, cleanupLockfile("mysql.sock", ts)) + + // If lockfile exists, but the process is not found, we clean it up. + os.WriteFile("mysql.sock.lock", []byte("123456789"), 0o600) + assert.NoError(t, cleanupLockfile("mysql.sock", ts)) + assert.NoFileExists(t, "mysql.sock.lock") + + // If the lockfile exists, and the process is found, we don't clean it up. + os.WriteFile("mysql.sock.lock", []byte(strconv.Itoa(os.Getpid())), 0o600) + assert.NoError(t, cleanupLockfile("mysql.sock", ts)) + assert.FileExists(t, "mysql.sock.lock") +}