From c06b527d1f83a194d46c46d445207449593a3a7c Mon Sep 17 00:00:00 2001 From: Renan Rangel Date: Fri, 18 Oct 2024 21:15:36 +0100 Subject: [PATCH] fix releasing the global read lock when mysqlshell backup fails (#17000) Signed-off-by: Renan Rangel Signed-off-by: 'Renan Rangel' --- go/ioutil/writer.go | 39 +++++ go/vt/mysqlctl/fakebackupengine.go | 92 ++++++++++ go/vt/mysqlctl/fakebackupstorage.go | 160 ++++++++++++++++++ go/vt/mysqlctl/fakemysqldaemon.go | 22 ++- go/vt/mysqlctl/mysqlshellbackupengine.go | 16 +- go/vt/mysqlctl/mysqlshellbackupengine_test.go | 120 +++++++++++++ 6 files changed, 443 insertions(+), 6 deletions(-) create mode 100644 go/ioutil/writer.go create mode 100644 go/vt/mysqlctl/fakebackupengine.go create mode 100644 go/vt/mysqlctl/fakebackupstorage.go diff --git a/go/ioutil/writer.go b/go/ioutil/writer.go new file mode 100644 index 00000000000..80ad87428bc --- /dev/null +++ b/go/ioutil/writer.go @@ -0,0 +1,39 @@ +/* +Copyright 2022 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* +MeteredWriteCloser and MeteredWriter are respectively, time-and-byte-tracking +wrappers around WriteCloser and Writer. +*/ + +package ioutil + +import ( + "bytes" +) + +// BytesBufferWriter implements io.WriteCloser using an in-memory buffer. +type BytesBufferWriter struct { + *bytes.Buffer +} + +func (m BytesBufferWriter) Close() error { + return nil +} + +func NewBytesBufferWriter() BytesBufferWriter { + return BytesBufferWriter{bytes.NewBuffer(nil)} +} diff --git a/go/vt/mysqlctl/fakebackupengine.go b/go/vt/mysqlctl/fakebackupengine.go new file mode 100644 index 00000000000..c0fce435d35 --- /dev/null +++ b/go/vt/mysqlctl/fakebackupengine.go @@ -0,0 +1,92 @@ +/* +Copyright 2022 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mysqlctl + +import ( + "context" + "time" + + "vitess.io/vitess/go/vt/mysqlctl/backupstorage" +) + +type FakeBackupEngine struct { + ExecuteBackupCalls []FakeBackupEngineExecuteBackupCall + ExecuteBackupDuration time.Duration + ExecuteBackupReturn FakeBackupEngineExecuteBackupReturn + ExecuteRestoreCalls []FakeBackupEngineExecuteRestoreCall + ExecuteRestoreDuration time.Duration + ExecuteRestoreReturn FakeBackupEngineExecuteRestoreReturn + ShouldDrainForBackupCalls int + ShouldDrainForBackupReturn bool +} + +type FakeBackupEngineExecuteBackupCall struct { + BackupParams BackupParams + BackupHandle backupstorage.BackupHandle +} + +type FakeBackupEngineExecuteBackupReturn struct { + Ok bool + Err error +} + +type FakeBackupEngineExecuteRestoreCall struct { + BackupHandle backupstorage.BackupHandle + RestoreParams RestoreParams +} + +type FakeBackupEngineExecuteRestoreReturn struct { + Manifest *BackupManifest + Err error +} + +func (be *FakeBackupEngine) ExecuteBackup( + ctx context.Context, + params BackupParams, + bh backupstorage.BackupHandle, +) (bool, error) { + be.ExecuteBackupCalls = append(be.ExecuteBackupCalls, FakeBackupEngineExecuteBackupCall{params, bh}) + + if be.ExecuteBackupDuration > 0 { + time.Sleep(be.ExecuteBackupDuration) + } + + return be.ExecuteBackupReturn.Ok, be.ExecuteBackupReturn.Err +} + +func (be *FakeBackupEngine) ExecuteRestore( + ctx context.Context, params RestoreParams, + bh backupstorage.BackupHandle, +) (*BackupManifest, error) { + be.ExecuteRestoreCalls = append(be.ExecuteRestoreCalls, FakeBackupEngineExecuteRestoreCall{bh, params}) + + // mark restore as in progress + if err := createStateFile(params.Cnf); err != nil { + return nil, err + } + + if be.ExecuteRestoreDuration > 0 { + time.Sleep(be.ExecuteRestoreDuration) + } + + return be.ExecuteRestoreReturn.Manifest, be.ExecuteRestoreReturn.Err +} + +func (be *FakeBackupEngine) ShouldDrainForBackup() bool { + be.ShouldDrainForBackupCalls = be.ShouldDrainForBackupCalls + 1 + return be.ShouldDrainForBackupReturn +} diff --git a/go/vt/mysqlctl/fakebackupstorage.go b/go/vt/mysqlctl/fakebackupstorage.go new file mode 100644 index 00000000000..a3adff0dbd0 --- /dev/null +++ b/go/vt/mysqlctl/fakebackupstorage.go @@ -0,0 +1,160 @@ +/* +Copyright 2022 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package mysqlctl + +import ( + "context" + "fmt" + "io" + + "vitess.io/vitess/go/vt/concurrency" + "vitess.io/vitess/go/vt/mysqlctl/backupstorage" +) + +type FakeBackupHandle struct { + Dir string + NameV string + ReadOnly bool + Errors concurrency.AllErrorRecorder + + AbortBackupCalls []context.Context + AbortBackupReturn error + AddFileCalls []FakeBackupHandleAddFileCall + AddFileReturn FakeBackupHandleAddFileReturn + EndBackupCalls []context.Context + EndBackupReturn error + ReadFileCalls []FakeBackupHandleReadFileCall + ReadFileReturnF func(ctx context.Context, filename string) (io.ReadCloser, error) +} + +type FakeBackupHandleAddFileCall struct { + Ctx context.Context + Filename string + Filesize int64 +} + +type FakeBackupHandleAddFileReturn struct { + WriteCloser io.WriteCloser + Err error +} + +type FakeBackupHandleReadFileCall struct { + Ctx context.Context + Filename string +} + +func (fbh *FakeBackupHandle) RecordError(err error) { + fbh.Errors.RecordError(err) +} + +func (fbh *FakeBackupHandle) HasErrors() bool { + return fbh.Errors.HasErrors() +} + +func (fbh *FakeBackupHandle) Error() error { + return fbh.Errors.Error() +} + +func (fbh *FakeBackupHandle) Directory() string { + return fbh.Dir +} + +func (fbh *FakeBackupHandle) Name() string { + return fbh.NameV +} + +func (fbh *FakeBackupHandle) AddFile(ctx context.Context, filename string, filesize int64) (io.WriteCloser, error) { + fbh.AddFileCalls = append(fbh.AddFileCalls, FakeBackupHandleAddFileCall{ctx, filename, filesize}) + return fbh.AddFileReturn.WriteCloser, fbh.AddFileReturn.Err +} + +func (fbh *FakeBackupHandle) EndBackup(ctx context.Context) error { + fbh.EndBackupCalls = append(fbh.EndBackupCalls, ctx) + return fbh.EndBackupReturn +} + +func (fbh *FakeBackupHandle) AbortBackup(ctx context.Context) error { + fbh.AbortBackupCalls = append(fbh.AbortBackupCalls, ctx) + return fbh.AbortBackupReturn +} + +func (fbh *FakeBackupHandle) ReadFile(ctx context.Context, filename string) (io.ReadCloser, error) { + fbh.ReadFileCalls = append(fbh.ReadFileCalls, FakeBackupHandleReadFileCall{ctx, filename}) + if fbh.ReadFileReturnF == nil { + return nil, fmt.Errorf("FakeBackupHandle has not defined a ReadFileReturnF") + } + return fbh.ReadFileReturnF(ctx, filename) +} + +type FakeBackupStorage struct { + CloseCalls int + CloseReturn error + ListBackupsCalls []FakeBackupStorageListBackupsCall + ListBackupsReturn FakeBackupStorageListBackupsReturn + RemoveBackupCalls []FakeBackupStorageRemoveBackupCall + RemoveBackupReturn error + RemoveBackupReturne error + StartBackupCalls []FakeBackupStorageStartBackupCall + StartBackupReturn FakeBackupStorageStartBackupReturn +} + +type FakeBackupStorageListBackupsCall struct { + Ctx context.Context + Dir string +} + +type FakeBackupStorageListBackupsReturn struct { + BackupHandles []backupstorage.BackupHandle + Err error +} + +type FakeBackupStorageRemoveBackupCall struct { + Ctx context.Context + Dir string + Name string +} + +type FakeBackupStorageStartBackupCall struct { + Ctx context.Context + Dir string + Name string +} + +type FakeBackupStorageStartBackupReturn struct { + BackupHandle backupstorage.BackupHandle + Err error +} + +func (fbs *FakeBackupStorage) ListBackups(ctx context.Context, dir string) ([]backupstorage.BackupHandle, error) { + fbs.ListBackupsCalls = append(fbs.ListBackupsCalls, FakeBackupStorageListBackupsCall{ctx, dir}) + return fbs.ListBackupsReturn.BackupHandles, fbs.ListBackupsReturn.Err +} + +func (fbs *FakeBackupStorage) StartBackup(ctx context.Context, dir, name string) (backupstorage.BackupHandle, error) { + fbs.StartBackupCalls = append(fbs.StartBackupCalls, FakeBackupStorageStartBackupCall{ctx, dir, name}) + return fbs.StartBackupReturn.BackupHandle, fbs.StartBackupReturn.Err +} + +func (fbs *FakeBackupStorage) RemoveBackup(ctx context.Context, dir, name string) error { + fbs.RemoveBackupCalls = append(fbs.RemoveBackupCalls, FakeBackupStorageRemoveBackupCall{ctx, dir, name}) + return fbs.RemoveBackupReturn +} + +func (fbs *FakeBackupStorage) Close() error { + fbs.CloseCalls = fbs.CloseCalls + 1 + return fbs.CloseReturn +} diff --git a/go/vt/mysqlctl/fakemysqldaemon.go b/go/vt/mysqlctl/fakemysqldaemon.go index dab1f6b07f2..5d087bc36ec 100644 --- a/go/vt/mysqlctl/fakemysqldaemon.go +++ b/go/vt/mysqlctl/fakemysqldaemon.go @@ -168,8 +168,12 @@ type FakeMysqlDaemon struct { // SemiSyncReplicaEnabled represents the state of rpl_semi_sync_slave_enabled. SemiSyncReplicaEnabled bool - // TimeoutHook is a func that can be called at the beginning of any method to fake a timeout. - // all a test needs to do is make it { return context.DeadlineExceeded } + // GlobalReadLock is used to test if a lock has been acquired already or not + GlobalReadLock bool + + // TimeoutHook is a func that can be called at the beginning of + // any method to fake a timeout. + // All a test needs to do is make it { return context.DeadlineExceeded }. TimeoutHook func() error // Version is the version that will be returned by GetVersionString. @@ -684,10 +688,20 @@ func (fmd *FakeMysqlDaemon) GetVersionComment(ctx context.Context) string { // AcquireGlobalReadLock is part of the MysqlDaemon interface. func (fmd *FakeMysqlDaemon) AcquireGlobalReadLock(ctx context.Context) error { - return errors.New("not implemented") + if fmd.GlobalReadLock { + return errors.New("lock already acquired") + } + + fmd.GlobalReadLock = true + return nil } // ReleaseGlobalReadLock is part of the MysqlDaemon interface. func (fmd *FakeMysqlDaemon) ReleaseGlobalReadLock(ctx context.Context) error { - return errors.New("not implemented") + if fmd.GlobalReadLock { + fmd.GlobalReadLock = false + return nil + } + + return errors.New("no read locks acquired yet") } diff --git a/go/vt/mysqlctl/mysqlshellbackupengine.go b/go/vt/mysqlctl/mysqlshellbackupengine.go index 02c82b66e25..ad2b2a7d84d 100644 --- a/go/vt/mysqlctl/mysqlshellbackupengine.go +++ b/go/vt/mysqlctl/mysqlshellbackupengine.go @@ -53,6 +53,8 @@ var ( // disable redo logging and double write buffer mysqlShellSpeedUpRestore = false + mysqlShellBackupBinaryName = "mysqlsh" + // use when checking if we need to create the directory on the local filesystem or not. knownObjectStoreParams = []string{"s3BucketName", "osBucketName", "azureContainerName"} @@ -104,8 +106,8 @@ type MySQLShellBackupEngine struct { } const ( - mysqlShellBackupBinaryName = "mysqlsh" mysqlShellBackupEngineName = "mysqlshell" + mysqlShellLockMessage = "Global read lock has been released" ) func (be *MySQLShellBackupEngine) ExecuteBackup(ctx context.Context, params BackupParams, bh backupstorage.BackupHandle) (result bool, finalErr error) { @@ -139,6 +141,11 @@ func (be *MySQLShellBackupEngine) ExecuteBackup(ctx context.Context, params Back } lockAcquired := time.Now() // we will report how long we hold the lock for + // we need to release the global read lock in case the backup fails to start and + // the lock wasn't released by releaseReadLock() yet. context might be expired, + // so we pass a new one. + defer func() { _ = params.Mysqld.ReleaseGlobalReadLock(context.Background()) }() + posBeforeBackup, err := params.Mysqld.PrimaryPosition() if err != nil { return false, vterrors.Wrap(err, "failed to fetch position") @@ -171,6 +178,7 @@ func (be *MySQLShellBackupEngine) ExecuteBackup(ctx context.Context, params Back // Get exit status. if err := cmd.Wait(); err != nil { + pipeWriter.Close() // make sure we close the writer so the goroutines above will complete. return false, vterrors.Wrap(err, mysqlShellBackupEngineName+" failed") } @@ -471,7 +479,7 @@ func releaseReadLock(ctx context.Context, reader io.Reader, params BackupParams, if !released { - if !strings.Contains(line, "Global read lock has been released") { + if !strings.Contains(line, mysqlShellLockMessage) { continue } released = true @@ -489,6 +497,10 @@ func releaseReadLock(ctx context.Context, reader io.Reader, params BackupParams, if err := scanner.Err(); err != nil { params.Logger.Errorf("error reading from reader: %v", err) } + + if !released { + params.Logger.Errorf("could not release global lock earlier") + } } func cleanupMySQL(ctx context.Context, params RestoreParams, shouldDeleteUsers bool) error { diff --git a/go/vt/mysqlctl/mysqlshellbackupengine_test.go b/go/vt/mysqlctl/mysqlshellbackupengine_test.go index 8713e5e5a9b..5400491c2fc 100644 --- a/go/vt/mysqlctl/mysqlshellbackupengine_test.go +++ b/go/vt/mysqlctl/mysqlshellbackupengine_test.go @@ -18,13 +18,16 @@ package mysqlctl import ( "context" + "encoding/json" "fmt" + "os" "path" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "vitess.io/vitess/go/ioutil" "vitess.io/vitess/go/mysql/fakesqldb" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/logutil" @@ -276,3 +279,120 @@ func TestCleanupMySQL(t *testing.T) { } } + +// this is a helper to write files in a temporary directory +func generateTestFile(t *testing.T, name, contents string) { + f, err := os.OpenFile(name, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0700) + require.NoError(t, err) + defer f.Close() + _, err = f.WriteString(contents) + require.NoError(t, err) + require.NoError(t, f.Close()) +} + +// This tests if we are properly releasing the global read lock we acquire +// during ExecuteBackup(), even if the backup didn't succeed. +func TestMySQLShellBackupEngine_ExecuteBackup_ReleaseLock(t *testing.T) { + originalLocation := mysqlShellBackupLocation + originalBinary := mysqlShellBackupBinaryName + mysqlShellBackupLocation = "logical" + mysqlShellBackupBinaryName = path.Join(t.TempDir(), "test.sh") + + defer func() { // restore the original values. + mysqlShellBackupLocation = originalLocation + mysqlShellBackupBinaryName = originalBinary + }() + + logger := logutil.NewMemoryLogger() + fakedb := fakesqldb.New(t) + defer fakedb.Close() + mysql := NewFakeMysqlDaemon(fakedb) + defer mysql.Close() + + be := &MySQLShellBackupEngine{} + params := BackupParams{ + TabletAlias: "test", + Logger: logger, + Mysqld: mysql, + } + bs := FakeBackupStorage{ + StartBackupReturn: FakeBackupStorageStartBackupReturn{}, + } + + t.Run("lock released if we see the mysqlsh lock being acquired", func(t *testing.T) { + logger.Clear() + manifestBuffer := ioutil.NewBytesBufferWriter() + bs.StartBackupReturn.BackupHandle = &FakeBackupHandle{ + Dir: t.TempDir(), + AddFileReturn: FakeBackupHandleAddFileReturn{WriteCloser: manifestBuffer}, + } + + // this simulates mysql shell completing without any issues. + generateTestFile(t, mysqlShellBackupBinaryName, fmt.Sprintf("#!/bin/bash\n>&2 echo %s", mysqlShellLockMessage)) + + bh, err := bs.StartBackup(context.Background(), t.TempDir(), t.Name()) + require.NoError(t, err) + + _, err = be.ExecuteBackup(context.Background(), params, bh) + require.NoError(t, err) + require.False(t, mysql.GlobalReadLock) // lock must be released. + + // check the manifest is valid. + var manifest MySQLShellBackupManifest + err = json.Unmarshal(manifestBuffer.Bytes(), &manifest) + require.NoError(t, err) + + require.Equal(t, mysqlShellBackupEngineName, manifest.BackupMethod) + + // did we notice the lock was release and did we release it ours as well? + require.Contains(t, logger.String(), "global read lock released after", + "failed to release the global lock after mysqlsh") + }) + + t.Run("lock released if when we don't see mysqlsh released it", func(t *testing.T) { + mysql.GlobalReadLock = false // clear lock status. + logger.Clear() + manifestBuffer := ioutil.NewBytesBufferWriter() + bs.StartBackupReturn.BackupHandle = &FakeBackupHandle{ + Dir: t.TempDir(), + AddFileReturn: FakeBackupHandleAddFileReturn{WriteCloser: manifestBuffer}, + } + + // this simulates mysqlshell completing, but we don't see the message that is released its lock. + generateTestFile(t, mysqlShellBackupBinaryName, "#!/bin/bash\nexit 0") + + bh, err := bs.StartBackup(context.Background(), t.TempDir(), t.Name()) + require.NoError(t, err) + + // in this case the backup was successful, but even if we didn't see mysqlsh release its lock + // we make sure it is released at the end. + _, err = be.ExecuteBackup(context.Background(), params, bh) + require.NoError(t, err) + require.False(t, mysql.GlobalReadLock) // lock must be released. + + // make sure we are at least logging the lock wasn't able to be released earlier. + require.Contains(t, logger.String(), "could not release global lock earlier", + "failed to log error message when unable to release lock during backup") + }) + + t.Run("lock released when backup fails", func(t *testing.T) { + mysql.GlobalReadLock = false // clear lock status. + logger.Clear() + manifestBuffer := ioutil.NewBytesBufferWriter() + bs.StartBackupReturn.BackupHandle = &FakeBackupHandle{ + Dir: t.TempDir(), + AddFileReturn: FakeBackupHandleAddFileReturn{WriteCloser: manifestBuffer}, + } + + // this simulates the backup process failing. + generateTestFile(t, mysqlShellBackupBinaryName, "#!/bin/bash\nexit 1") + + bh, err := bs.StartBackup(context.Background(), t.TempDir(), t.Name()) + require.NoError(t, err) + + _, err = be.ExecuteBackup(context.Background(), params, bh) + require.ErrorContains(t, err, "mysqlshell failed") + require.False(t, mysql.GlobalReadLock) // lock must be released. + }) + +}