Skip to content

Commit

Permalink
fix(rwx): remount hangs due to service cleanup
Browse files Browse the repository at this point in the history
longhorn/longhorn-8999

Signed-off-by: Chin-Ya Huang <chin-ya.huang@suse.com>
  • Loading branch information
c3y1huang authored and derekbit committed Jul 17, 2024
1 parent 2abe098 commit b615732
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 24 deletions.
40 changes: 40 additions & 0 deletions controller/setting_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,14 @@ func (sc *SettingController) syncDangerZoneSettingsForManagedComponents(settingN
if err := sc.updateCNI(sc.ds.AreAllRWXVolumesDetached); err != nil {
return err
}

// Perform cleanup of the share manager Service
// This is to allow the creation of the correct Service
// and Endpoint when switching between cluster network
// and storage network.
if err := sc.cleanupShareManagerServiceAndEndpoints(); err != nil {
return err
}
}
return nil
}
Expand Down Expand Up @@ -914,6 +922,38 @@ func (sc *SettingController) updateKubernetesClusterAutoscalerEnabled() error {
return nil
}

func (sc *SettingController) cleanupShareManagerServiceAndEndpoints() error {
var err error
defer func() {
if err != nil {
err = errors.Wrapf(err, "failed to cleanup share manager service and endpoints for %s setting update", types.SettingNameStorageNetworkForRWXVolumeEnabled)
}
}()

shareManagers, err := sc.ds.ListShareManagers()
if err != nil {
return err
}

for _, shareManager := range shareManagers {
log := sc.logger.WithField("shareManager", shareManager.Name)

log.WithField("service", shareManager.Name).Infof("Deleting Service for %v setting update", types.SettingNameStorageNetworkForRWXVolumeEnabled)
err := sc.ds.DeleteService(shareManager.Namespace, shareManager.Name)
if err != nil && !apierrors.IsNotFound(err) {
return err
}

log.WithField("endpoint", shareManager.Name).Infof("Deleting Endpoint for %v setting update", types.SettingNameStorageNetworkForRWXVolumeEnabled)
err = sc.ds.DeleteKubernetesEndpoint(shareManager.Namespace, shareManager.Name)
if err != nil && !apierrors.IsNotFound(err) {
return err
}
}

return nil
}

// updateCNI deletes all system-managed data plane components immediately with the updated CNI annotation.
func (sc *SettingController) updateCNI(fnCheckVolumeDetached func() (bool, error)) error {
storageNetwork, err := sc.ds.GetSettingWithAutoFillingRO(types.SettingNameStorageNetwork)
Expand Down
24 changes: 0 additions & 24 deletions controller/share_manager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,21 +617,6 @@ func (c *ShareManagerController) syncShareManagerVolume(sm *longhorn.ShareManage
return nil
}

func (c *ShareManagerController) cleanupShareManagerService(shareManager *longhorn.ShareManager) error {
log := getLoggerForShareManager(c.logger, shareManager)

service, err := c.ds.GetService(shareManager.Namespace, shareManager.Name)
if err != nil {
if apierrors.IsNotFound(err) {
return nil
}
return err
}

log.Infof("Cleaning up share manager service %v", service.Name)
return c.ds.DeleteService(shareManager.Namespace, service.Name)
}

func (c *ShareManagerController) cleanupShareManagerPod(sm *longhorn.ShareManager) error {
log := getLoggerForShareManager(c.logger, sm)
podName := types.GetShareManagerPodNameFromShareManagerName(sm.Name)
Expand Down Expand Up @@ -677,15 +662,6 @@ func (c *ShareManagerController) syncShareManagerPod(sm *longhorn.ShareManager)
if err != nil {
return
}

// Perform cleanup of the share manager Service
// This is to allow the creation of the correct Service
// and Endpoint when switching between cluster network
// and storage network.
err = c.cleanupShareManagerService(sm)
if err != nil {
return
}
}
}()

Expand Down
1 change: 1 addition & 0 deletions csi/node_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,7 @@ func (ns *NodeServer) getMounter(volume *longhornclient.Volume, volumeCapability
} else {
logrus.Warnf("Volume %v with unsupported filesystem %v, use default fs creation params", volume.Name, fsType)
}

return mounter, nil
}

Expand Down
5 changes: 5 additions & 0 deletions datastore/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,11 @@ func (s *DataStore) CreateKubernetesEndpoint(endpoint *corev1.Endpoints) (*corev
return s.kubeClient.CoreV1().Endpoints(endpoint.Namespace).Create(context.TODO(), endpoint, metav1.CreateOptions{})
}

// DeleteKubernetesEndpoint deletes the Kubernetes Endpoint of the given name in the Longhorn namespace.
func (s *DataStore) DeleteKubernetesEndpoint(namespace, name string) error {
return s.kubeClient.CoreV1().Endpoints(namespace).Delete(context.TODO(), name, metav1.DeleteOptions{})
}

// UpdateKubernetesEndpoint updates the Kubernetes Endpoint of the given name in the Longhorn namespace.
func (s *DataStore) UpdateKubernetesEndpoint(endpoint *corev1.Endpoints) (*corev1.Endpoints, error) {
return s.kubeClient.CoreV1().Endpoints(s.namespace).Update(context.TODO(), endpoint, metav1.UpdateOptions{})
Expand Down

0 comments on commit b615732

Please sign in to comment.