Skip to content

Commit

Permalink
fix(csi): Invalid mount point with volume detached
Browse files Browse the repository at this point in the history
Volume detached because only one replica failed and not remount the
`globalmount`. We need to handle it at NodePublishVolume because
os.ReadDir will be passed when it becomes a read-only file system
after an unexpected volume detaching.

Use an empty file to test the file system

Ref: 4814, 5801

Signed-off-by: James Lu <james.lu@suse.com>
(cherry picked from commit a9d6289)

# Conflicts:
#	csi/node_server.go
#	csi/util.go
  • Loading branch information
mantissahz authored and mergify[bot] committed Jul 6, 2023
1 parent ee45988 commit 87bc581
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 0 deletions.
4 changes: 4 additions & 0 deletions csi/node_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,11 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
if volume.State != string(longhorn.VolumeStateAttached) || volume.Controllers[0].Endpoint == "" {
logrus.Debugf("volume %v hasn't been attached yet, try unmounting potential mount point %v", volumeID, targetPath)
if err := unmount(targetPath, mounter); err != nil {
<<<<<<< HEAD
logrus.Debugf("failed to unmount error: %v", err)
=======
log.WithError(err).Warnf("Failed to unmount: %v", targetPath)
>>>>>>> a9d62894 (fix(csi): Invalid mount point with volume detached)
}
return nil, status.Errorf(codes.InvalidArgument, "volume %s hasn't been attached yet", volumeID)
}
Expand Down
46 changes: 46 additions & 0 deletions csi/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path"
"strconv"
"strings"
"time"
Expand All @@ -29,6 +31,8 @@ const (
defaultStaleReplicaTimeout = 2880

defaultForceUmountTimeout = 30 * time.Second

tempTestMountPointValidStatusFile = ".longhorn-volume-mount-point-test.tmp"
)

// NewForcedParamsExec creates a osExecutor that allows for adding additional params to later occurring Run calls
Expand Down Expand Up @@ -215,6 +219,42 @@ func parseJSONRecurringJobs(jsonRecurringJobs string) ([]longhornclient.Recurrin
return recurringJobs, nil
}

func syncMountPointDirectory(targetPath string) error {
d, err := os.OpenFile(targetPath, os.O_SYNC, 0750)
if err != nil {
return err
}
defer d.Close()

if _, err := d.Readdirnames(1); err != nil {
if !errors.Is(err, io.EOF) {
return err
}
}

// it would not always return `Input/Output Error` or `read-only file system` errors if we only use ReadDir() or Readdirnames() without an I/O operation
// an I/O operation will make the targetPath mount point invalid immediately
tempFile := path.Join(targetPath, tempTestMountPointValidStatusFile)
f, err := os.OpenFile(tempFile, os.O_CREATE|os.O_RDWR|os.O_SYNC|os.O_TRUNC, 0666)
if err != nil {
return err
}
defer func() {
if err := f.Close(); err != nil {
logrus.WithError(err).Warnf("Failed to close %v", tempFile)
}
if err := os.Remove(tempFile); err != nil {
logrus.WithError(err).Warnf("Failed to remove %v", tempFile)
}
}()

if _, err := f.WriteString(tempFile); err != nil {
return err
}

return nil
}

// ensureMountPoint evaluates whether a path is a valid mountPoint
// in case the targetPath does not exists it will create a path and return false
// in case where the mount point exists but is corrupt, the mount point will be cleaned up and a error is returned
Expand All @@ -228,9 +268,15 @@ func ensureMountPoint(targetPath string, mounter mount.Interface) (bool, error)

IsCorruptedMnt := mount.IsCorruptedMnt(err)
if !IsCorruptedMnt {
<<<<<<< HEAD
logrus.Debugf("mount point %v try reading dir to make sure it's healthy", targetPath)
if _, err := os.ReadDir(targetPath); err != nil {
logrus.Debugf("mount point %v was identified as corrupt by ReadDir", targetPath)
=======
logrus.Debugf("Mount point %v try opening and syncing dir to make sure it's healthy", targetPath)
if err := syncMountPointDirectory(targetPath); err != nil {
logrus.WithError(err).Warnf("Mount point %v was identified as corrupt by opening and syncing", targetPath)
>>>>>>> a9d62894 (fix(csi): Invalid mount point with volume detached)
IsCorruptedMnt = true
}
}
Expand Down

0 comments on commit 87bc581

Please sign in to comment.