diff --git a/app/cmd/start.go b/app/cmd/start.go index c3f4bd105..7c33f89d2 100644 --- a/app/cmd/start.go +++ b/app/cmd/start.go @@ -24,11 +24,13 @@ import ( healthpb "google.golang.org/grpc/health/grpc_health_v1" "google.golang.org/grpc/keepalive" "google.golang.org/grpc/reflection" + "k8s.io/mount-utils" commonTypes "github.com/longhorn/go-common-libs/types" helpernvme "github.com/longhorn/go-spdk-helper/pkg/nvme" helpertypes "github.com/longhorn/go-spdk-helper/pkg/types" helperutil "github.com/longhorn/go-spdk-helper/pkg/util" + engineutil "github.com/longhorn/longhorn-engine/pkg/util" spdk "github.com/longhorn/longhorn-spdk-engine/pkg/spdk" spdkutil "github.com/longhorn/longhorn-spdk-engine/pkg/util" rpc "github.com/longhorn/types/pkg/generated/imrpc" @@ -159,6 +161,34 @@ func cleanupStaledNvmeAndDmDevices() error { return nil } +func unfreezeFilesystems() error { + // We do not need to switch to the host mount namespace to get mount points here. Usually, longhorn-engine runs in a + // container that has / bind mounted to /host with at least HostToContainer (rslave) propagation. + // - If it does not, we likely can't do a namespace swap anyway, since we don't have access to /host/proc. + // - If it does, we just need to know where in the container we can access the mount points to unfreeze the file + // system. + mounter := mount.New("") + mountPoints, err := mounter.List() + if err != nil { + return errors.Wrap(err, "failed to list mount points while starting up") + } + + for _, mountPoint := range mountPoints { + if strings.Contains(mountPoint.Device, engineutil.DevicePathPrefix) { + // We do not actually expect any filesystems to be frozen. This is a best effort attempt to unfreeze them + // if somehow instance manager crashed at the wrong moment during a snapshot. + unfroze, err := engineutil.UnfreezeFilesystem(mountPoint.Path, nil) + if err != nil { + logrus.WithError(err).Warnf("Failed to unfreeze filesystem mounted at %v", mountPoint) + } + if unfroze { + logrus.Warnf("Unfroze filesystem mounted at %v", mountPoint) + } + } + } + return nil +} + func start(c *cli.Context) (err error) { listen := c.String("listen") logsDir := c.String("logs-dir") @@ -183,6 +213,10 @@ func start(c *cli.Context) (err error) { if err := cleanupStaledNvmeAndDmDevices(); err != nil { return err } + } else { + if err := unfreezeFilesystems(); err != nil { + return err + } } // setup tls config diff --git a/go.mod b/go.mod index f5f55b4d2..fd467e84e 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/longhorn/backupstore v0.0.0-20240509144945-3bce6e69af15 github.com/longhorn/go-common-libs v0.0.0-20240514074907-351459694cbf github.com/longhorn/go-spdk-helper v0.0.0-20240514082311-4069f4804017 - github.com/longhorn/longhorn-engine v1.7.0-dev.0.20240509154612-5fdc92a2d526 + github.com/longhorn/longhorn-engine v1.7.0-dev.0.20240514224711-e39b7f0313b2 github.com/longhorn/longhorn-spdk-engine v0.0.0-20240516014845-759dfa872eae github.com/longhorn/types v0.0.0-20240510221052-ab949bbedea3 github.com/pkg/errors v0.9.1 @@ -43,6 +43,7 @@ require ( github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect + github.com/longhorn/go-iscsi-helper v0.0.0-20240427164656-e9439c0018ce // indirect github.com/longhorn/nsfilelock v0.0.0-20200723175406-fa7c83ad0003 // indirect github.com/longhorn/sparse-tools v0.0.0-20240427164751-a7b9f1b2c8a8 // indirect github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect diff --git a/go.sum b/go.sum index 886c4453d..75dede89e 100644 --- a/go.sum +++ b/go.sum @@ -69,10 +69,12 @@ github.com/longhorn/backupstore v0.0.0-20240509144945-3bce6e69af15 h1:D838/RWPKm github.com/longhorn/backupstore v0.0.0-20240509144945-3bce6e69af15/go.mod h1:n210xpMUVrSn/W4Za/9BZhyXLCTVfAOq5lNdLrRSyz8= github.com/longhorn/go-common-libs v0.0.0-20240514074907-351459694cbf h1:3mjybe5dP3uJF2daMVv/U/IGNcp1dsrzHl4b6w8icPU= github.com/longhorn/go-common-libs v0.0.0-20240514074907-351459694cbf/go.mod h1:gFXUEciTv/03ncyA8CNrfVkbikvSWNqCYwwsTC3lFGg= +github.com/longhorn/go-iscsi-helper v0.0.0-20240427164656-e9439c0018ce h1:PxKniE9F6IZ2DMKfmxDsbqeAxQI1TZhnw7/HOBMs1Is= +github.com/longhorn/go-iscsi-helper v0.0.0-20240427164656-e9439c0018ce/go.mod h1:d9t3gtE+UPjescbCFluXd4xBc8OQT/JrC2cdkk2IXWQ= github.com/longhorn/go-spdk-helper v0.0.0-20240514082311-4069f4804017 h1:Cl8mEO7uP2T9ZDbqV3BK2xhrBVyjelY0WV9LDGdqb/Y= github.com/longhorn/go-spdk-helper v0.0.0-20240514082311-4069f4804017/go.mod h1:SgKStbsHMJqVjf/uDpS7KYQakOp2jHBRSbRnMk6iNqo= -github.com/longhorn/longhorn-engine v1.7.0-dev.0.20240509154612-5fdc92a2d526 h1:JvSWv3PBV89p6TsM2DQhkm/FB4L95U9l2nrPomzT9KA= -github.com/longhorn/longhorn-engine v1.7.0-dev.0.20240509154612-5fdc92a2d526/go.mod h1:SvLrPprzk/iXFG3G4wDbWXD1kePlxg12Oui96FLBHAg= +github.com/longhorn/longhorn-engine v1.7.0-dev.0.20240514224711-e39b7f0313b2 h1:mG7wUWZrJ5arEZWaghm73ywH09LUZo77kGUaHN7nzyg= +github.com/longhorn/longhorn-engine v1.7.0-dev.0.20240514224711-e39b7f0313b2/go.mod h1:G+iGNlJm1RKuKXqz4Igh0b5miiRblld2C9GL9hs4ROk= github.com/longhorn/longhorn-spdk-engine v0.0.0-20240516014845-759dfa872eae h1:07uvZWinlp6Zwvuhacv9Lv3NzRM46UKPu0xIAXP7VEA= github.com/longhorn/longhorn-spdk-engine v0.0.0-20240516014845-759dfa872eae/go.mod h1:i8arwLcEawuN9rdHmo3doVhg9fQFPh0q+GJaKR2FOxk= github.com/longhorn/nsfilelock v0.0.0-20200723175406-fa7c83ad0003 h1:Jw9uANsGcHTxp6HcC++/vN17LfeuDmozHI2j6DoZf5E= diff --git a/pkg/client/proxy_snapshot.go b/pkg/client/proxy_snapshot.go index 87b1350c8..6d36f3a96 100644 --- a/pkg/client/proxy_snapshot.go +++ b/pkg/client/proxy_snapshot.go @@ -12,7 +12,7 @@ import ( ) func (c *ProxyClient) VolumeSnapshot(dataEngine, engineName, volumeName, serviceAddress, - volumeSnapshotName string, labels map[string]string) (snapshotName string, err error) { + volumeSnapshotName string, labels map[string]string, freezeFilesystem bool) (snapshotName string, err error) { input := map[string]string{ "engineName": engineName, "volumeName": volumeName, @@ -55,8 +55,9 @@ func (c *ProxyClient) VolumeSnapshot(dataEngine, engineName, volumeName, service VolumeName: volumeName, }, SnapshotVolume: &enginerpc.VolumeSnapshotRequest{ - Name: volumeSnapshotName, - Labels: labels, + Name: volumeSnapshotName, + Labels: labels, + FreezeFilesystem: freezeFilesystem, }, } recv, err := c.service.VolumeSnapshot(getContextWithGRPCTimeout(c.ctx), req) diff --git a/pkg/proxy/snapshot.go b/pkg/proxy/snapshot.go index 53b97ba0a..1acc2e600 100644 --- a/pkg/proxy/snapshot.go +++ b/pkg/proxy/snapshot.go @@ -41,7 +41,7 @@ func (ops V1DataEngineProxyOps) VolumeSnapshot(ctx context.Context, req *rpc.Eng } defer c.Close() - recv, err := c.VolumeSnapshot(req.SnapshotVolume.Name, req.SnapshotVolume.Labels) + recv, err := c.VolumeSnapshot(req.SnapshotVolume.Name, req.SnapshotVolume.Labels, req.SnapshotVolume.FreezeFilesystem) if err != nil { return nil, err } diff --git a/vendor/github.com/longhorn/go-iscsi-helper/LICENSE b/vendor/github.com/longhorn/go-iscsi-helper/LICENSE new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/longhorn/go-iscsi-helper/iscsi/initiator.go b/vendor/github.com/longhorn/go-iscsi-helper/iscsi/initiator.go new file mode 100644 index 000000000..abe4c77b0 --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/iscsi/initiator.go @@ -0,0 +1,367 @@ +package iscsi + +import ( + "bufio" + "fmt" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + lhns "github.com/longhorn/go-common-libs/ns" + lhtypes "github.com/longhorn/go-common-libs/types" +) + +var ( + DeviceWaitRetryCounts = 10 + DeviceWaitRetryInterval = 1 * time.Second + + ScsiNodesDirs = []string{ + "/etc/iscsi/nodes/", + "/var/lib/iscsi/nodes/", + } +) + +const ( + iscsiBinary = "iscsiadm" + scanModeManual = "manual" + scanModeAuto = "auto" + ScanTimeout = 10 * time.Second +) + +func CheckForInitiatorExistence(nsexec *lhns.Executor) error { + opts := []string{ + "--version", + } + _, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +func UpdateScsiDeviceTimeout(devName string, timeout int64, nsexec *lhns.Executor) error { + deviceTimeoutFile := filepath.Join("/sys/block", devName, "device", "timeout") + return lhns.WriteFile(deviceTimeoutFile, fmt.Sprint(timeout)) +} + +func UpdateIscsiDeviceAbortTimeout(target string, timeout int64, nsexec *lhns.Executor) error { + opts := []string{ + "-m", "node", + "-T", target, + "-o", "update", + "-n", "node.session.err_timeo.abort_timeout", + "-v", strconv.FormatInt(timeout, 10), + } + _, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +func DiscoverTarget(ip, target string, nsexec *lhns.Executor) error { + opts := []string{ + "-m", "discovery", + "-t", "sendtargets", + "-p", ip, + } + output, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return err + } + // Sometime iscsiadm won't return error but showing e.g.: + // iscsiadm: Could not stat /etc/iscsi/nodes//,3260,-1/default to + // delete node: No such file or directory\n\niscsiadm: Could not + // add/update [tcp:[hw=,ip=,net_if=,iscsi_if=default] 172.18.0.5,3260,1 + // iqn.2019-10.io.longhorn:vol9]\n172.18.0.5:3260,1 + // iqn.2019-10.io.longhorn:vol9\n" + if strings.Contains(output, "Could not") { + return fmt.Errorf("cannot discover target: %s", output) + } + if !strings.Contains(output, target) { + return fmt.Errorf("cannot find target %s in discovered targets %s", target, output) + } + return nil +} + +func DeleteDiscoveredTarget(ip, target string, nsexec *lhns.Executor) error { + opts := []string{ + "-m", "node", + "-o", "delete", + "-T", target, + } + if ip != "" { + opts = append(opts, "-p", ip) + } + _, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +func IsTargetDiscovered(ip, target string, nsexec *lhns.Executor) bool { + opts := []string{ + "-m", "node", + "-T", target, + } + if ip != "" { + opts = append(opts, "-p", ip) + } + _, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err == nil +} + +func LoginTarget(ip, target string, nsexec *lhns.Executor) error { + opts := []string{ + "-m", "node", + "-T", target, + "-p", ip, + "--login", + } + _, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return err + } + + scanMode, err := getIscsiNodeSessionScanMode(ip, target, nsexec) + if err != nil { + return errors.Wrap(err, "Failed to get node.session.scan mode") + } + + if scanMode == scanModeManual { + logrus.Infof("Manually rescan LUNs of the target %v:%v", target, ip) + if err := manualScanSession(ip, target, nsexec); err != nil { + return errors.Wrapf(err, "failed to manually rescan iscsi session of target %v:%v", target, ip) + } + } else { + logrus.Infof("default: automatically rescan all LUNs of all iscsi sessions") + } + + return nil +} + +// LogoutTarget will logout all sessions if ip == "" +func LogoutTarget(ip, target string, nsexec *lhns.Executor) error { + opts := []string{ + "-m", "node", + "-T", target, + "--logout", + } + if ip != "" { + opts = append(opts, "-p", ip) + } + _, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +func GetDevice(ip, target string, lun int, nsexec *lhns.Executor) (*lhtypes.BlockDeviceInfo, error) { + var err error + + var dev *lhtypes.BlockDeviceInfo + for i := 0; i < DeviceWaitRetryCounts; i++ { + dev, err = findScsiDevice(ip, target, lun, nsexec) + if err == nil { + break + } + time.Sleep(DeviceWaitRetryInterval) + } + if err != nil { + return nil, err + } + return dev, nil +} + +// IsTargetLoggedIn check all portals if ip == "" +func IsTargetLoggedIn(ip, target string, nsexec *lhns.Executor) bool { + opts := []string{ + "-m", "session", + } + + output, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return false + } + /* It will looks like: + tcp: [463] 172.17.0.2:3260,1 iqn.2019-10.io.longhorn:test-volume + or: + tcp: [463] 172.17.0.2:3260,1 iqn.2019-10.io.longhorn:test-volume (non-flash) + */ + found := false + scanner := bufio.NewScanner(strings.NewReader(output)) + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, ip+":") { + if strings.HasSuffix(line, " "+target) || + strings.Contains(scanner.Text(), " "+target+" ") { + found = true + break + } + } + } + + return found +} + +func manualScanSession(ip, target string, nsexec *lhns.Executor) error { + opts := []string{ + "-m", "node", + "-T", target, + "-p", ip, + "--rescan", + } + _, err := nsexec.Execute(nil, iscsiBinary, opts, ScanTimeout) + return err +} + +func getIscsiNodeSessionScanMode(ip, target string, nsexec *lhns.Executor) (string, error) { + opts := []string{ + "-m", "node", + "-T", target, + "-p", ip, + "-o", "show", + } + output, err := nsexec.Execute(nil, iscsiBinary, opts, ScanTimeout) + if err != nil { + return "", err + } + if strings.Contains(output, "node.session.scan = manual") { + return scanModeManual, nil + } + return scanModeAuto, nil +} + +func findScsiDevice(ip, target string, lun int, nsexec *lhns.Executor) (*lhtypes.BlockDeviceInfo, error) { + name := "" + + opts := []string{ + "-m", "session", + "-P", "3", + } + output, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return nil, err + } + /* + Now we got something like this in output, and need to parse it + Target: iqn.2019-10.io.longhorn:for.all (non-flash) + Current Portal: 172.17.0.2:3260,1 + Persistent Portal: 172.17.0.2:3260,1 + ... + Attached SCSI devices: + ... + scsi12 Channel 00 Id 0 Lun: 0 + scsi12 Channel 00 Id 0 Lun: 1 + Attached scsi disk sdb State: running + ... + Target: ... + */ + scanner := bufio.NewScanner(strings.NewReader(output)) + targetLine := "Target: " + target + ipLine := " " + ip + ":" + lunLine := "Lun: " + strconv.Itoa(lun) + diskPrefix := "Attached scsi disk" + stateLine := "State:" + + inTarget := false + inIP := false + inLun := false + for scanner.Scan() { + /* Target line can be: + Target: iqn.2019-10.io.longhorn:for.all (non-flash) + or: + Target: iqn.2019-10.io.longhorn:for.all + */ + if !inTarget && + (strings.Contains(scanner.Text(), targetLine+" ") || + strings.HasSuffix(scanner.Text(), targetLine)) { + inTarget = true + continue + } + if inTarget && strings.Contains(scanner.Text(), ipLine) { + inIP = true + continue + } + if inIP && strings.Contains(scanner.Text(), lunLine) { + inLun = true + continue + } + // The line we need + if inLun { + line := scanner.Text() + if !strings.Contains(line, diskPrefix) { + return nil, fmt.Errorf("invalid output format, cannot find disk in: %s\n %s", line, output) + } + line = strings.TrimSpace(strings.Split(line, stateLine)[0]) + line = strings.TrimPrefix(line, diskPrefix) + name = strings.TrimSpace(line) + break + } + } + + if name == "" { + return nil, fmt.Errorf("cannot find iSCSI device") + } + + // TODO: replace with namespace joiner + // now that we know the device is mapped, we can get it's (major:minor) + devices, err := lhns.GetSystemBlockDevices() + if err != nil { + return nil, err + } + + dev, known := devices[name] + if !known { + return nil, fmt.Errorf("cannot find kernel device for iSCSI device: %s", name) + } + + return &dev, nil +} + +func CleanupScsiNodes(target string) error { + for _, dir := range ScsiNodesDirs { + if _, err := lhns.GetFileInfo(dir); err != nil { + continue + } + + targetDir := filepath.Join(dir, target) + if _, err := lhns.GetFileInfo(targetDir); err != nil { + continue + } + + // Remove all empty files in the directory + emptyFilePaths, err := lhns.GetEmptyFiles(targetDir) + if err != nil { + return err + } + + for _, emptyFilePath := range emptyFilePaths { + err := lhns.DeletePath(emptyFilePath) + if err != nil { + return errors.Wrapf(err, "failed to clean up empty iSCSI node file %v", emptyFilePath) + } + } + + // Try to remove the upper level directory containing empty files. + // We don't mind if it fails. + dirContainEmptyFiles := make(map[string]bool) + for _, emptyFilePath := range emptyFilePaths { + dirContainEmptyFiles[filepath.Dir(emptyFilePath)] = true + } + for dir := range dirContainEmptyFiles { + err := lhns.DeleteDirectory(dir) + if err != nil { + logrus.WithError(err).Warnf("Failed to clean up iSCSI node directory %v", dir) + } + } + } + return nil +} + +func RescanTarget(ip, target string, nsexec *lhns.Executor) error { + opts := []string{ + "-m", "node", + "-T", target, + "-R", + } + if ip != "" { + opts = append(opts, "-p", ip) + } + _, err := nsexec.Execute(nil, iscsiBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} diff --git a/vendor/github.com/longhorn/go-iscsi-helper/iscsi/target.go b/vendor/github.com/longhorn/go-iscsi-helper/iscsi/target.go new file mode 100644 index 000000000..995101cd5 --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/iscsi/target.go @@ -0,0 +1,405 @@ +package iscsi + +import ( + "bufio" + "fmt" + "io" + "os" + "os/exec" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" + + lhexec "github.com/longhorn/go-common-libs/exec" + lhtypes "github.com/longhorn/go-common-libs/types" +) + +var ( + TgtdRetryCounts = 5 + TgtdRetryInterval = 1 * time.Second +) + +const ( + tgtBinary = "tgtadm" + + maxTargetID = 4095 + + logFile = "/var/log/tgtd.log" +) + +// CreateTarget will create a iSCSI target using the name specified. If name is +// unspecified, a name will be generated. Notice the name must comply with iSCSI +// name format. +func CreateTarget(tid int, name string) error { + opts := []string{ + "--lld", "iscsi", + "--op", "new", + "--mode", "target", + "--tid", strconv.Itoa(tid), + "-T", name, + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// DeleteTarget will remove a iSCSI target specified by tid +func DeleteTarget(tid int) error { + opts := []string{ + "--lld", "iscsi", + "--op", "delete", + "--mode", "target", + "--tid", strconv.Itoa(tid), + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// AddLunBackedByFile will add a LUN in an existing target, which backing by +// specified file. +func AddLunBackedByFile(tid int, lun int, backingFile string) error { + opts := []string{ + "--lld", "iscsi", + "--op", "new", + "--mode", "logicalunit", + "--tid", strconv.Itoa(tid), + "--lun", strconv.Itoa(lun), + "-b", backingFile, + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// AddLun will add a LUN in an existing target, which backing by +// specified file, using AIO backing-store +func AddLun(tid int, lun int, backingFile string, bstype string, bsopts string) error { + if !CheckTargetForBackingStore(bstype) { + return fmt.Errorf("backing-store %s is not supported", bstype) + } + opts := []string{ + "--lld", "iscsi", + "--op", "new", + "--mode", "logicalunit", + "--tid", strconv.Itoa(tid), + "--lun", strconv.Itoa(lun), + "-b", backingFile, + "--bstype", bstype, + } + if bsopts != "" { + opts = append(opts, "--bsopts", bsopts) + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// UpdateLun will update parameters for the LUN +func UpdateLun(tid int, lun int, params map[string]string) error { + opts := []string{ + "--lld", "iscsi", + "--op", "update", + "--mode", "logicalunit", + "--tid", strconv.Itoa(tid), + "--lun", strconv.Itoa(lun), + } + if len(params) != 0 { + paramStr := "" + for k, v := range params { + paramStr += fmt.Sprintf("%s=%s,", k, v) + } + opts = append(opts, "--params", strings.TrimSuffix(paramStr, ",")) + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// SetLunThinProvisioning will set param thin_provisioning to true for the LUN +func SetLunThinProvisioning(tid int, lun int) error { + return UpdateLun(tid, lun, map[string]string{"thin_provisioning": "1"}) +} + +// DisableWriteCache will set param write-cache to false for the LUN +func DisableWriteCache(tid int, lun int) error { + // Mode page 8 is the caching mode page + // Refer to "Caching Mode page (08h)" in SCSI Commands Reference Manual for more information. + // https://www.seagate.com/files/staticfiles/support/docs/manual/Interface%20manuals/100293068j.pdf + // https://github.com/fujita/tgt/blob/master/scripts/tgt-admin#L418 + return UpdateLun(tid, lun, map[string]string{"mode_page": "8:0:18:0x10:0:0xff:0xff:0:0:0xff:0xff:0xff:0xff:0x80:0x14:0:0:0:0:0:0"}) +} + +// DeleteLun will remove a LUN from an target +func DeleteLun(tid int, lun int) error { + opts := []string{ + "--lld", "iscsi", + "--op", "delete", + "--mode", "logicalunit", + "--tid", strconv.Itoa(tid), + "--lun", strconv.Itoa(lun), + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// ExpandLun will update the size for the LUN. +// This is valid only for the customized tgt https://github.com/rancher/tgt/ +func ExpandLun(tid, lun int, size int64) error { + opts := []string{ + "--lld", "iscsi", + "--op", "update", + "--mode", "logicalunit", + "--tid", strconv.Itoa(tid), + "--lun", strconv.Itoa(lun), + "--params", fmt.Sprintf("bsopts=size=%d", size), + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// BindInitiator will add permission to allow certain initiator(s) to connect to +// certain target. "ALL" is a special initiator which is the wildcard +func BindInitiator(tid int, initiator string) error { + opts := []string{ + "--lld", "iscsi", + "--op", "bind", + "--mode", "target", + "--tid", strconv.Itoa(tid), + "-I", initiator, + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// UnbindInitiator will remove permission to allow certain initiator(s) to connect to +// certain target. +func UnbindInitiator(tid int, initiator string) error { + opts := []string{ + "--lld", "iscsi", + "--op", "unbind", + "--mode", "target", + "--tid", strconv.Itoa(tid), + "-I", initiator, + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +// StartDaemon will start tgtd daemon, prepare for further commands +func StartDaemon(debug bool) error { + if CheckTargetForBackingStore("rdwr") { + fmt.Fprintf(os.Stderr, "go-iscsi-helper: tgtd is already running\n") + return nil + } + + logf, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + go startDaemon(logf, debug) + + // Wait until daemon is up + daemonIsRunning := false + for i := 0; i < TgtdRetryCounts; i++ { + if CheckTargetForBackingStore("rdwr") { + daemonIsRunning = true + break + } + time.Sleep(TgtdRetryInterval) + } + if !daemonIsRunning { + return fmt.Errorf("failed to start tgtd daemon") + } + return nil +} + +func startDaemon(logf *os.File, debug bool) { + defer logf.Close() + + opts := []string{ + "-f", + } + if debug { + opts = append(opts, "-d", "1") + } + cmd := exec.Command("tgtd", opts...) + mw := io.MultiWriter(os.Stderr, logf) + cmd.Stdout = mw + cmd.Stderr = mw + if err := cmd.Run(); err != nil { + if CheckTargetForBackingStore("rdwr") { + fmt.Fprintf(mw, "go-iscsi-helper: tgtd is already running\n") + return + } + fmt.Fprintf(mw, "go-iscsi-helper: command failed: %v\n", err) + panic(err) + } + fmt.Fprintln(mw, "go-iscsi-helper: done") +} + +func CheckTargetForBackingStore(name string) bool { + opts := []string{ + "--lld", "iscsi", + "--op", "show", + "--mode", "system", + } + output, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return false + } + return strings.Contains(output, " "+name) +} + +// GetTargetTid If returned TID is -1, then target doesn't exist, but we won't +// return error +func GetTargetTid(name string) (int, error) { + opts := []string{ + "--lld", "iscsi", + "--op", "show", + "--mode", "target", + } + output, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return -1, err + } + /* Output will looks like: + Target 1: iqn.2016-08.com.example:a + System information: + ... + Target 2: iqn.2016-08.com.example:b + System information: + ... + */ + tid := -1 + scanner := bufio.NewScanner(strings.NewReader(output)) + for scanner.Scan() { + if strings.HasSuffix(scanner.Text(), " "+name) { + tidString := strings.Fields(strings.Split(scanner.Text(), ":")[0])[1] + tid, err = strconv.Atoi(tidString) + if err != nil { + return -1, errors.Wrapf(err, "BUG: Failed to parse %s", tidString) + } + break + } + } + return tid, nil +} + +func ShutdownTgtd() error { + opts := []string{ + "--op", "delete", + "--mode", "system", + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +func GetTargetConnections(tid int) (map[string][]string, error) { + opts := []string{ + "--lld", "iscsi", + "--op", "show", + "--mode", "conn", + "--tid", strconv.Itoa(tid), + } + output, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return nil, err + } + /* Output will looks like: + Session: 11 + Connection: 0 + Initiator: iqn.2016-08.com.example:a + IP Address: 192.168.0.1 + Session: 12 + Connection: 1 + Initiator: iqn.2016-08.com.example:a + IP Address: 192.168.0.2 + ... + */ + res := map[string][]string{} + currentSIDString := "" + currentCIDStringList := []string{} + scanner := bufio.NewScanner(strings.NewReader(output)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "Session: ") { + if currentSIDString != "" { + res[currentSIDString] = currentCIDStringList + } + sidFields := strings.Split(line, ": ") + if len(sidFields) != 2 { + return nil, fmt.Errorf("failed to parse and get session id from line %v", line) + } + sidString := sidFields[1] + if _, err := strconv.Atoi(sidString); err != nil { + return nil, err + } + currentSIDString = sidString + currentCIDStringList = []string{} + } + if strings.HasPrefix(line, "Connection: ") { + cidFields := strings.Split(line, ": ") + if len(cidFields) != 2 { + return nil, fmt.Errorf("failed to parse and get connection id from line %v", line) + } + cidString := cidFields[1] + if _, err := strconv.Atoi(cidString); err != nil { + return nil, err + } + currentCIDStringList = append(currentCIDStringList, cidString) + } + } + if len(currentCIDStringList) != 0 { + res[currentSIDString] = currentCIDStringList + } + return res, nil +} + +func CloseConnection(tid int, sid, cid string) error { + opts := []string{ + "--lld", "iscsi", + "--op", "delete", + "--mode", "conn", + "--tid", strconv.Itoa(tid), + "--sid", sid, + "--cid", cid, + } + _, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + return err +} + +func FindNextAvailableTargetID() (int, error) { + existingTids := map[int]struct{}{} + opts := []string{ + "--lld", "iscsi", + "--op", "show", + "--mode", "target", + } + output, err := lhexec.NewExecutor().Execute(nil, tgtBinary, opts, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return -1, err + } + /* Output will looks like: + Target 1: iqn.2016-08.com.example:a + System information: + ... + Target 2: iqn.2016-08.com.example:b + System information: + ... + */ + scanner := bufio.NewScanner(strings.NewReader(output)) + for scanner.Scan() { + if strings.HasPrefix(scanner.Text(), "Target ") { + tidString := strings.Fields(strings.Split(scanner.Text(), ":")[0])[1] + tid, err := strconv.Atoi(tidString) + if err != nil { + return -1, errors.Wrapf(err, "BUG: Failed to parse %s", tidString) + } + existingTids[tid] = struct{}{} + } + } + for i := 1; i < maxTargetID; i++ { + if _, exists := existingTids[i]; !exists { + return i, nil + } + } + return -1, fmt.Errorf("cannot find an available target ID") +} diff --git a/vendor/github.com/longhorn/go-iscsi-helper/iscsidev/iscsi.go b/vendor/github.com/longhorn/go-iscsi-helper/iscsidev/iscsi.go new file mode 100644 index 000000000..06e7c34f0 --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/iscsidev/iscsi.go @@ -0,0 +1,379 @@ +package iscsidev + +import ( + "fmt" + "strings" + "time" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/longhorn/go-iscsi-helper/iscsi" + "github.com/longhorn/go-iscsi-helper/types" + "github.com/longhorn/go-iscsi-helper/util" + + lhns "github.com/longhorn/go-common-libs/ns" + lhtypes "github.com/longhorn/go-common-libs/types" +) + +var ( + LockFile = "/var/run/longhorn-iscsi.lock" + LockTimeout = 120 * time.Second + + TargetLunID = 1 + + RetryCounts = 5 + RetryIntervalSCSI = 3 * time.Second + RetryIntervalTargetID = 500 * time.Millisecond +) + +type ScsiDeviceParameters struct { + ScsiTimeout int64 +} + +type IscsiDeviceParameters struct { + IscsiAbortTimeout int64 +} + +type Device struct { + Target string + KernelDevice *lhtypes.BlockDeviceInfo + + ScsiDeviceParameters + IscsiDeviceParameters + + BackingFile string + BSType string + BSOpts string + + targetID int + + nsexec *lhns.Executor +} + +func NewDevice(name, backingFile, bsType, bsOpts string, scsiTimeout, iscsiAbortTimeout int64) (*Device, error) { + namespaces := []lhtypes.Namespace{lhtypes.NamespaceMnt, lhtypes.NamespaceNet} + nsexec, err := lhns.NewNamespaceExecutor(util.ISCSIdProcess, lhtypes.HostProcDirectory, namespaces) + if err != nil { + return nil, err + } + + dev := &Device{ + Target: GetTargetName(name), + ScsiDeviceParameters: ScsiDeviceParameters{ + ScsiTimeout: scsiTimeout, + }, + IscsiDeviceParameters: IscsiDeviceParameters{ + IscsiAbortTimeout: iscsiAbortTimeout, + }, + BackingFile: backingFile, + BSType: bsType, + BSOpts: bsOpts, + nsexec: nsexec, + } + return dev, nil +} + +func Volume2ISCSIName(name string) string { + return strings.Replace(name, "_", ":", -1) +} + +func GetTargetName(volumeName string) string { + return "iqn.2019-10.io.longhorn:" + Volume2ISCSIName(volumeName) +} + +func (dev *Device) ReloadTargetID() error { + tid, err := iscsi.GetTargetTid(dev.Target) + if err != nil { + return err + } + dev.targetID = tid + return nil +} + +func (dev *Device) CreateTarget() (err error) { + // Start tgtd daemon if it's not already running + if err := iscsi.StartDaemon(false); err != nil { + return err + } + + tid := 0 + for i := 0; i < RetryCounts; i++ { + if tid, err = iscsi.FindNextAvailableTargetID(); err != nil { + return err + } + logrus.Infof("go-iscsi-helper: found available target id %v", tid) + err = iscsi.CreateTarget(tid, dev.Target) + if err == nil { + dev.targetID = tid + break + } + logrus.Infof("go-iscsi-helper: failed to use target id %v, retrying with a new target ID: err %v", tid, err) + time.Sleep(RetryIntervalTargetID) + continue + } + if err != nil { + return err + } + + if err := iscsi.AddLun(dev.targetID, TargetLunID, dev.BackingFile, dev.BSType, dev.BSOpts); err != nil { + return err + } + // Cannot modify the parameters for the LUNs during the adding stage + if err := iscsi.SetLunThinProvisioning(dev.targetID, TargetLunID); err != nil { + return err + } + // Longhorn reads and writes data with direct io rather than buffer io, so + // the write cache is actually disabled in the implementation. + // Explicitly disable the write cache for meeting the SCSI specification. + if err := iscsi.DisableWriteCache(dev.targetID, TargetLunID); err != nil { + return err + } + if err := iscsi.BindInitiator(dev.targetID, "ALL"); err != nil { + return err + } + return nil +} + +func (dev *Device) StartInitator() error { + lock := lhns.NewLock(LockFile, LockTimeout) + if err := lock.Lock(); err != nil { + return errors.Wrap(err, "failed to lock") + } + defer lock.Unlock() + + if err := iscsi.CheckForInitiatorExistence(dev.nsexec); err != nil { + return err + } + + localIP, err := util.GetIPToHost() + if err != nil { + return err + } + + // Setup initiator + for i := 0; i < RetryCounts; i++ { + err := iscsi.DiscoverTarget(localIP, dev.Target, dev.nsexec) + if iscsi.IsTargetDiscovered(localIP, dev.Target, dev.nsexec) { + break + } + + logrus.WithError(err).Warnf("Failed to discover") + // This is a trick to recover from the case. Remove the + // empty entries in /etc/iscsi/nodes/. If one of the entry + // is empty it will triggered the issue. + if err := iscsi.CleanupScsiNodes(dev.Target); err != nil { + logrus.WithError(err).Warnf("Failed to clean up nodes for %v", dev.Target) + } else { + logrus.Warnf("Nodes cleaned up for %v", dev.Target) + } + + time.Sleep(RetryIntervalSCSI) + } + if err := iscsi.UpdateIscsiDeviceAbortTimeout(dev.Target, dev.IscsiAbortTimeout, dev.nsexec); err != nil { + return err + } + if err := iscsi.LoginTarget(localIP, dev.Target, dev.nsexec); err != nil { + return err + } + if dev.KernelDevice, err = iscsi.GetDevice(localIP, dev.Target, TargetLunID, dev.nsexec); err != nil { + return err + } + if err := iscsi.UpdateScsiDeviceTimeout(dev.KernelDevice.Name, dev.ScsiTimeout, dev.nsexec); err != nil { + return err + } + + return nil +} + +// ReloadInitiator does nothing for the iSCSI initiator/target except for +// updating the timeout. It is mainly responsible for initializing the struct +// field `dev.KernelDevice`. +func (dev *Device) ReloadInitiator() error { + lock := lhns.NewLock(LockFile, LockTimeout) + if err := lock.Lock(); err != nil { + return errors.Wrap(err, "failed to lock") + } + defer lock.Unlock() + + if err := iscsi.CheckForInitiatorExistence(dev.nsexec); err != nil { + return err + } + + localIP, err := util.GetIPToHost() + if err != nil { + return err + } + + if err := iscsi.DiscoverTarget(localIP, dev.Target, dev.nsexec); err != nil { + return err + } + + if !iscsi.IsTargetDiscovered(localIP, dev.Target, dev.nsexec) { + return fmt.Errorf("failed to discover target %v for the initiator", dev.Target) + } + + if err := iscsi.UpdateIscsiDeviceAbortTimeout(dev.Target, dev.IscsiAbortTimeout, dev.nsexec); err != nil { + return err + } + if dev.KernelDevice, err = iscsi.GetDevice(localIP, dev.Target, TargetLunID, dev.nsexec); err != nil { + return err + } + + return iscsi.UpdateScsiDeviceTimeout(dev.KernelDevice.Name, dev.ScsiTimeout, dev.nsexec) +} + +func (dev *Device) StopInitiator() error { + lock := lhns.NewLock(LockFile, LockTimeout) + if err := lock.Lock(); err != nil { + return errors.Wrap(err, "failed to lock") + } + defer lock.Unlock() + + if err := LogoutTarget(dev.Target, dev.nsexec); err != nil { + return errors.Wrapf(err, "failed to logout target") + } + return nil +} + +func (dev *Device) RefreshInitiator() error { + lock := lhns.NewLock(LockFile, LockTimeout) + if err := lock.Lock(); err != nil { + return errors.Wrap(err, "failed to lock") + } + defer lock.Unlock() + + if err := iscsi.CheckForInitiatorExistence(dev.nsexec); err != nil { + return err + } + + ip, err := util.GetIPToHost() + if err != nil { + return err + } + + return iscsi.RescanTarget(ip, dev.Target, dev.nsexec) +} + +func LogoutTarget(target string, nsexec *lhns.Executor) error { + if err := iscsi.CheckForInitiatorExistence(nsexec); err != nil { + return err + } + if iscsi.IsTargetLoggedIn("", target, nsexec) { + var err error + loggingOut := false + + logrus.Infof("Shutting down iSCSI device for target %v", target) + for i := 0; i < RetryCounts; i++ { + // New IP may be different from the IP in the previous record. + // https://github.com/longhorn/longhorn/issues/1920 + err = iscsi.LogoutTarget("", target, nsexec) + // Ignore Not Found error + if err == nil || strings.Contains(err.Error(), "exit status 21") { + err = nil + break + } + // The timeout for response may return in the future, + // check session to know if it's logged out or not + if strings.Contains(err.Error(), "Timeout executing: ") { + loggingOut = true + break + } + time.Sleep(RetryIntervalSCSI) + } + // Wait for device to logout + if loggingOut { + logrus.Infof("Logging out iSCSI device timeout, waiting for logout complete") + for i := 0; i < RetryCounts; i++ { + if !iscsi.IsTargetLoggedIn("", target, nsexec) { + err = nil + break + } + time.Sleep(RetryIntervalSCSI) + } + } + if err != nil { + return errors.Wrapf(err, "failed to logout target") + } + /* + * Immediately delete target after logout may result in error: + * + * "Could not execute operation on all records: encountered + * iSCSI database failure" in iscsiadm + * + * This happenes especially there are other iscsiadm db + * operations go on at the same time. + * Retry to workaround this issue. Also treat "exit status + * 21"(no record found) as valid result + */ + for i := 0; i < RetryCounts; i++ { + if !iscsi.IsTargetDiscovered("", target, nsexec) { + err = nil + break + } + + err = iscsi.DeleteDiscoveredTarget("", target, nsexec) + // Ignore Not Found error + if err == nil || strings.Contains(err.Error(), "exit status 21") { + err = nil + break + } + time.Sleep(RetryIntervalSCSI) + } + if err != nil { + return err + } + } + return nil +} + +func (dev *Device) DeleteTarget() error { + if tid, err := iscsi.GetTargetTid(dev.Target); err == nil && tid != -1 { + if tid != dev.targetID && dev.targetID != 0 { + logrus.Errorf("BUG: Invalid TID %v found for %v, was %v", tid, dev.Target, dev.targetID) + } + + logrus.Infof("Shutting down iSCSI target %v", dev.Target) + + // UnbindInitiator can return tgtadmSuccess, tgtadmAclNoexist or tgtadmNoTarget + // Target is deleted in the last step, so tgtadmNoTarget error should not occur here. + // Just ignore tgtadmAclNoexist and continue working on the remaining tasks. + if err := iscsi.UnbindInitiator(tid, "ALL"); err != nil { + if !strings.Contains(err.Error(), types.TgtadmAclNoexist) { + return err + } + logrus.WithError(err).Warnf("failed to unbind initiator target id %v", tid) + } + + sessionConnectionsMap, err := iscsi.GetTargetConnections(tid) + if err != nil { + return err + } + for sid, cidList := range sessionConnectionsMap { + for _, cid := range cidList { + if err := iscsi.CloseConnection(tid, sid, cid); err != nil { + return err + } + } + } + + if err := iscsi.DeleteLun(tid, TargetLunID); err != nil { + return err + } + + if err := iscsi.DeleteTarget(tid); err != nil { + return err + } + } + return nil +} + +func (dev *Device) UpdateScsiBackingStore(bsType, bsOpts string) error { + dev.BSType = bsType + dev.BSOpts = bsOpts + return nil +} + +func (dev *Device) ExpandTarget(size int64) error { + return iscsi.ExpandLun(dev.targetID, TargetLunID, size) +} diff --git a/vendor/github.com/longhorn/go-iscsi-helper/longhorndev/dev.go b/vendor/github.com/longhorn/go-iscsi-helper/longhorndev/dev.go new file mode 100644 index 000000000..35170eac2 --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/longhorndev/dev.go @@ -0,0 +1,465 @@ +package longhorndev + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "sync" + "time" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/longhorn/go-iscsi-helper/iscsidev" + "github.com/longhorn/go-iscsi-helper/types" + "github.com/longhorn/go-iscsi-helper/util" +) + +const ( + SocketDirectory = "/var/run" + DevPath = "/dev/longhorn/" + + WaitInterval = time.Second + WaitCount = 30 +) + +type LonghornDevice struct { + *sync.RWMutex + name string //VolumeName + size int64 + frontend string + endpoint string + scsiTimeout int64 + iscsiAbortTimeout int64 + iscsiTargetRequestTimeout int64 + + scsiDevice *iscsidev.Device +} + +type DeviceService interface { + GetFrontend() string + SetFrontend(frontend string) error + UnsetFrontendCheck() error + UnsetFrontend() + GetEndpoint() string + Enabled() bool + + InitDevice() error + Start() error + Shutdown() error + PrepareUpgrade() error + FinishUpgrade() error + Expand(size int64) error +} + +type DeviceCreator interface { + NewDevice(name string, size int64, frontend string) (DeviceService, error) +} + +type LonghornDeviceCreator struct{} + +func (ldc *LonghornDeviceCreator) NewDevice(name string, size int64, frontend string, scsiTimeout, iscsiAbortTimeout, iscsiTargetRequestTimeout int64) (DeviceService, error) { + if name == "" || size == 0 { + return nil, fmt.Errorf("invalid parameter for creating Longhorn device") + } + dev := &LonghornDevice{ + RWMutex: &sync.RWMutex{}, + name: name, + size: size, + scsiTimeout: scsiTimeout, + iscsiAbortTimeout: iscsiAbortTimeout, + iscsiTargetRequestTimeout: iscsiTargetRequestTimeout, + } + if err := dev.SetFrontend(frontend); err != nil { + return nil, err + } + return dev, nil +} + +func (d *LonghornDevice) InitDevice() error { + d.Lock() + defer d.Unlock() + + if d.scsiDevice != nil { + return nil + } + + if err := d.initScsiDevice(); err != nil { + return err + } + + // Try to cleanup possible leftovers. + return d.shutdownFrontend() +} + +// call with lock hold +func (d *LonghornDevice) initScsiDevice() error { + bsOpts := fmt.Sprintf("size=%v;request_timeout=%v", d.size, d.iscsiTargetRequestTimeout) + scsiDev, err := iscsidev.NewDevice(d.name, d.GetSocketPath(), "longhorn", bsOpts, d.scsiTimeout, d.iscsiAbortTimeout) + if err != nil { + return err + } + d.scsiDevice = scsiDev + + return nil +} + +func (d *LonghornDevice) Start() error { + stopCh := make(chan struct{}) + if err := <-d.WaitForSocket(stopCh); err != nil { + return err + } + + return d.startScsiDevice(true) +} + +func (d *LonghornDevice) startScsiDevice(startScsiDevice bool) (err error) { + d.Lock() + defer d.Unlock() + + switch d.frontend { + case types.FrontendTGTBlockDev: + // If iSCSI device is not started here, e.g., device upgrade, + // d.scsiDevice.KernelDevice is nil. + if startScsiDevice { + if d.scsiDevice == nil { + return fmt.Errorf("there is no iSCSI device during the frontend %v starts", d.frontend) + } + if err := d.scsiDevice.CreateTarget(); err != nil { + return err + } + if err := d.scsiDevice.StartInitator(); err != nil { + return err + } + if err := d.createDev(); err != nil { + return err + } + logrus.Infof("device %v: iSCSI device %s created", d.name, d.scsiDevice.KernelDevice.Name) + } else { + if err := d.scsiDevice.ReloadTargetID(); err != nil { + return err + } + if err := d.scsiDevice.ReloadInitiator(); err != nil { + return err + } + logrus.Infof("device %v: iSCSI device %s reloaded the target and the initiator", d.name, d.scsiDevice.KernelDevice.Name) + } + + d.endpoint = d.getDev() + + case types.FrontendTGTISCSI: + if startScsiDevice { + if d.scsiDevice == nil { + return fmt.Errorf("there is no iSCSI device during the frontend %v starts", d.frontend) + } + if err := d.scsiDevice.CreateTarget(); err != nil { + return err + } + logrus.Infof("device %v: iSCSI target %s created", d.name, d.scsiDevice.Target) + } else { + if err := d.scsiDevice.ReloadTargetID(); err != nil { + return err + } + logrus.Infof("device %v: iSCSI target %s reloaded the target ID", d.name, d.scsiDevice.Target) + } + + d.endpoint = d.scsiDevice.Target + + default: + return fmt.Errorf("unknown frontend %v", d.frontend) + } + + logrus.Debugf("device %v: frontend start succeed", d.name) + return nil +} + +func (d *LonghornDevice) Shutdown() error { + d.Lock() + defer d.Unlock() + + if d.scsiDevice == nil { + return nil + } + + if err := d.shutdownFrontend(); err != nil { + return err + } + + d.scsiDevice = nil + d.endpoint = "" + + return nil +} + +// call with lock hold +func (d *LonghornDevice) shutdownFrontend() error { + switch d.frontend { + case types.FrontendTGTBlockDev: + dev := d.getDev() + if err := util.RemoveDevice(dev); err != nil { + return errors.Wrapf(err, "device %v: failed to remove device %s", d.name, dev) + } + if err := d.scsiDevice.StopInitiator(); err != nil { + return errors.Wrapf(err, "device %v: failed to stop iSCSI device", d.name) + } + if err := d.scsiDevice.DeleteTarget(); err != nil { + return errors.Wrapf(err, "device %v: failed to delete target %v", d.name, d.scsiDevice.Target) + } + logrus.Infof("device %v: iSCSI device %v shutdown", d.name, dev) + case types.FrontendTGTISCSI: + if err := d.scsiDevice.DeleteTarget(); err != nil { + return errors.Wrapf(err, "device %v: failed to delete target %v", d.name, d.scsiDevice.Target) + } + logrus.Infof("device %v: iSCSI target %v ", d.name, d.scsiDevice.Target) + case "": + logrus.Infof("device %v: skip shutdown frontend since it's not enabled", d.name) + default: + return fmt.Errorf("device %v: unknown frontend %v", d.name, d.frontend) + } + + return nil +} + +func (d *LonghornDevice) WaitForSocket(stopCh chan struct{}) chan error { + errCh := make(chan error) + go func(errCh chan error, stopCh chan struct{}) { + socket := d.GetSocketPath() + timeout := time.After(time.Duration(WaitCount) * WaitInterval) + ticker := time.NewTicker(WaitInterval) + defer ticker.Stop() + tick := ticker.C + for { + select { + case <-timeout: + errCh <- fmt.Errorf("device %v: wait for socket %v timed out", d.name, socket) + case <-tick: + if _, err := os.Stat(socket); err == nil { + errCh <- nil + return + } + logrus.Infof("device %v: waiting for socket %v to show up", d.name, socket) + case <-stopCh: + logrus.Infof("device %v: stop wait for socket routine", d.name) + return + } + } + }(errCh, stopCh) + + return errCh +} + +func (d *LonghornDevice) GetSocketPath() string { + return filepath.Join(SocketDirectory, "longhorn-"+d.name+".sock") +} + +// call with lock hold +func (d *LonghornDevice) getDev() string { + return filepath.Join(DevPath, d.name) +} + +// call with lock hold +func (d *LonghornDevice) createDev() error { + if _, err := os.Stat(DevPath); os.IsNotExist(err) { + if err := os.MkdirAll(DevPath, 0755); err != nil { + logrus.Fatalf("device %v: cannot create directory %v", d.name, DevPath) + } + } + + dev := d.getDev() + if _, err := os.Stat(dev); err == nil { + logrus.Warnf("Device %s already exists, clean it up", dev) + if err := util.RemoveDevice(dev); err != nil { + return errors.Wrapf(err, "cannot clean up block device file %v", dev) + } + } + + if err := util.DuplicateDevice(d.scsiDevice.KernelDevice, dev); err != nil { + return err + } + + logrus.Debugf("device %v: Device %s is ready", d.name, dev) + + return nil +} + +func (d *LonghornDevice) PrepareUpgrade() error { + if d.frontend == "" { + return nil + } + + if err := util.RemoveFile(d.GetSocketPath()); err != nil { + return errors.Wrapf(err, "failed to remove socket %v", d.GetSocketPath()) + } + return nil +} + +func (d *LonghornDevice) FinishUpgrade() (err error) { + if d.frontend == "" { + return nil + } + + stopCh := make(chan struct{}) + socketError := d.WaitForSocket(stopCh) + select { + case err = <-socketError: + if err != nil { + logrus.Errorf("error waiting for the socket %v", err) + err = errors.Wrapf(err, "error waiting for the socket") + } + break + default: + } + close(stopCh) + close(socketError) + + if err != nil { + return err + } + + // TODO: Need to fix `ReloadSocketConnection` since it doesn't work for frontend `FrontendTGTISCSI`. + if err := d.ReloadSocketConnection(); err != nil { + return err + } + + d.Lock() + if err := d.initScsiDevice(); err != nil { + d.Unlock() + return err + } + d.Unlock() + + return d.startScsiDevice(false) +} + +func (d *LonghornDevice) ReloadSocketConnection() error { + d.RLock() + dev := d.getDev() + d.RUnlock() + + cmd := exec.Command("sg_raw", dev, "a6", "00", "00", "00", "00", "00") + if err := cmd.Run(); err != nil { + return errors.Wrapf(err, "failed to reload socket connection at %v", dev) + } + logrus.Infof("Reloaded completed for device %v", dev) + return nil +} + +func (d *LonghornDevice) SetFrontend(frontend string) error { + if frontend != types.FrontendTGTBlockDev && frontend != types.FrontendTGTISCSI && frontend != "" { + return fmt.Errorf("invalid frontend %v", frontend) + } + + d.Lock() + defer d.Unlock() + if d.frontend != "" { + if d.frontend != frontend { + return fmt.Errorf("engine frontend %v is already up and cannot be set to %v", d.frontend, frontend) + } + if d.scsiDevice != nil { + logrus.Infof("Engine frontend %v is already up", frontend) + return nil + } + // d.scsiDevice == nil + return fmt.Errorf("engine frontend had been set to %v, but its frontend cannot be started before engine manager shutdown its frontend", frontend) + } + + if d.scsiDevice != nil { + return fmt.Errorf("BUG: engine launcher frontend is empty but scsi device hasn't been cleanup in frontend start") + } + + d.frontend = frontend + + return nil +} + +func (d *LonghornDevice) UnsetFrontendCheck() error { + d.Lock() + defer d.Unlock() + + if d.scsiDevice == nil { + d.frontend = "" + logrus.Debugf("Engine frontend is already down") + return nil + } + + if d.frontend == "" { + return fmt.Errorf("BUG: engine launcher frontend is empty but scsi device hasn't been cleanup in frontend shutdown") + } + return nil +} + +func (d *LonghornDevice) UnsetFrontend() { + d.Lock() + defer d.Unlock() + + d.frontend = "" +} + +func (d *LonghornDevice) Enabled() bool { + d.RLock() + defer d.RUnlock() + return d.scsiDevice != nil +} + +func (d *LonghornDevice) GetEndpoint() string { + d.RLock() + defer d.RUnlock() + return d.endpoint +} + +func (d *LonghornDevice) GetFrontend() string { + d.RLock() + defer d.RUnlock() + return d.frontend +} + +func (d *LonghornDevice) Expand(size int64) (err error) { + d.Lock() + defer d.Unlock() + + if d.size > size { + return fmt.Errorf("device %v: cannot expand the device from size %v to a smaller size %v", d.name, d.size, size) + } else if d.size == size { + return nil + } + + defer func() { + if err == nil { + d.size = size + } + }() + + if d.scsiDevice == nil { + logrus.Info("Device: No need to do anything for the expansion since the frontend is shutdown") + return nil + } + if err := d.scsiDevice.UpdateScsiBackingStore("longhorn", fmt.Sprintf("size=%v", size)); err != nil { + return err + } + + switch d.frontend { + case types.FrontendTGTBlockDev: + logrus.Infof("Device %v: Expanding frontend %v target %v", d.name, d.frontend, d.scsiDevice.Target) + if err := d.scsiDevice.ExpandTarget(size); err != nil { + return fmt.Errorf("device %v: fail to expand target %v: %v", d.name, d.scsiDevice.Target, err) + } + logrus.Infof("Device %v: Refreshing/Rescanning frontend %v initiator for the expansion", d.name, d.frontend) + if err := d.scsiDevice.RefreshInitiator(); err != nil { + return fmt.Errorf("device %v: fail to refresh iSCSI initiator: %v", d.name, err) + } + logrus.Infof("Device %v: Expanded frontend %v size to %d", d.name, d.frontend, size) + case types.FrontendTGTISCSI: + logrus.Infof("Device %v: Frontend is expanding the target %v", d.name, d.scsiDevice.Target) + if err := d.scsiDevice.ExpandTarget(size); err != nil { + return fmt.Errorf("device %v: fail to expand target %v: %v", d.name, d.scsiDevice.Target, err) + } + logrus.Infof("Device %v: Expanded frontend %v size to %d, users need to refresh/rescan the initiator by themselves", d.name, d.frontend, size) + case "": + logrus.Infof("Device %v: skip expansion since the frontend not enabled", d.name) + default: + return fmt.Errorf("failed to expand device %v: unknown frontend %v", d.name, d.frontend) + } + + return nil +} diff --git a/vendor/github.com/longhorn/go-iscsi-helper/types/tgtadm_errors.go b/vendor/github.com/longhorn/go-iscsi-helper/types/tgtadm_errors.go new file mode 100644 index 000000000..1b7215c12 --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/types/tgtadm_errors.go @@ -0,0 +1,30 @@ +package types + +// errors are from tgt/usr/tgtadm_error.h and tgt/usr/tgtadm.c +const ( + TgtadmSuccess = "success" + TgtadmUnknown = "unknown error" + TgtadmNomem = "out of memory" + TgtadmNoDriver = "can't find the driver" + TgtadmNoTarget = "can't find the target" + TgtadmNoLun = "can't find the logical unit" + TgtadmNoSession = "can't find the session" + TgtadmNoConnection = "can't find the connection" + TgtadmNoBinding = "can't find the binding" + TgtadmTargetExist = "this target already exists" + TgtadmBindingExist = "this binding already exists" + TgtadmLunExist = "this logical unit number already exists" + TgtadmAclExist = "this access control rule already exists" + TgtadmAclNoexist = "this access control rule does not exist" + TgtadmUserExist = "this account already exists" + TgtadmNoUser = "can't find the account" + TgtadmTooManyUser = "too many accounts" + TgtadmInvalidRequest = "invalid request" + TgtadmOutAccountExist = "this target already has an outgoing account" + TgtadmTargetActive = "this target is still active" + TgtadmLunActive = "this logical unit is still active" + TgtadmDriverActive = "this driver is busy" + TgtadmUnsupportedOperation = "this operation isn't supported" + TgtadmUnknownParam = "unknown parameter" + TgtadmPreventRemoval = "this device has Prevent Removal set" +) diff --git a/vendor/github.com/longhorn/go-iscsi-helper/types/types.go b/vendor/github.com/longhorn/go-iscsi-helper/types/types.go new file mode 100644 index 000000000..d85a3c436 --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/types/types.go @@ -0,0 +1,6 @@ +package types + +const ( + FrontendTGTBlockDev = "tgt-blockdev" + FrontendTGTISCSI = "tgt-iscsi" +) diff --git a/vendor/github.com/longhorn/go-iscsi-helper/util/process.go b/vendor/github.com/longhorn/go-iscsi-helper/util/process.go new file mode 100644 index 000000000..54412f519 --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/util/process.go @@ -0,0 +1,18 @@ +package util + +import ( + "fmt" + + lhproc "github.com/longhorn/go-common-libs/proc" +) + +const ISCSIdProcess = "iscsid" + +func GetISCSIdNamespaceDirectory(procDir string) (string, error) { + pids, err := lhproc.GetProcessPIDs(ISCSIdProcess, procDir) + if err != nil { + return "", err + } + + return lhproc.GetNamespaceDirectory(procDir, fmt.Sprint(pids[0])), nil +} diff --git a/vendor/github.com/longhorn/go-iscsi-helper/util/util.go b/vendor/github.com/longhorn/go-iscsi-helper/util/util.go new file mode 100644 index 000000000..a51a2b707 --- /dev/null +++ b/vendor/github.com/longhorn/go-iscsi-helper/util/util.go @@ -0,0 +1,121 @@ +package util + +import ( + "fmt" + "net" + "os" + + "strings" + "time" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" + + lhtypes "github.com/longhorn/go-common-libs/types" +) + +func getIPFromAddrs(addrs []net.Addr) string { + for _, addr := range addrs { + if ip, ok := addr.(*net.IPNet); ok && ip.IP.IsGlobalUnicast() { + return strings.Split(ip.IP.String(), "/")[0] + } + } + return "" +} + +func GetIPToHost() (string, error) { + ifaces, err := net.Interfaces() + if err != nil { + return "", err + } + // TODO: This is a workaround, we want to get the interface IP connect + // to the host, it's likely eth1 with one network attached to the host. + for _, iface := range ifaces { + if iface.Name == "eth1" { + addrs, err := iface.Addrs() + if err != nil { + return "", err + } + ip := getIPFromAddrs(addrs) + if ip != "" { + return ip, nil + } + } + } + // And there is no eth1, so get the first real ip + addrs, err := net.InterfaceAddrs() + if err != nil { + return "", err + } + ip := getIPFromAddrs(addrs) + if ip != "" { + return ip, nil + } + return "", fmt.Errorf("cannot find IP connect to the host") +} + +func RemoveFile(file string) error { + if _, err := os.Stat(file); os.IsNotExist(err) { + // file doesn't exist + return nil + } + + if err := remove(file); err != nil { + return errors.Wrapf(err, "failed to remove file %v", file) + } + + return nil +} + +func RemoveDevice(dev string) error { + if _, err := os.Stat(dev); err == nil { + if err := remove(dev); err != nil { + return errors.Wrapf(err, "failed to removing device %s", dev) + } + } + return nil +} + +func DuplicateDevice(dev *lhtypes.BlockDeviceInfo, dest string) error { + if err := mknod(dest, dev.Major, dev.Minor); err != nil { + return errors.Wrapf(err, "cannot create device node %s for device %s", dest, dev.Name) + } + if err := os.Chmod(dest, 0660); err != nil { + return errors.Wrapf(err, "cannot change permission of the device %s", dest) + } + // We use the group 6 by default because this is common group for disks + // See more at https://github.com/longhorn/longhorn/issues/8088#issuecomment-1982300242 + if err := os.Chown(dest, 0, 6); err != nil { + return errors.Wrapf(err, "cannot change ownership of the device %s", dest) + } + return nil +} + +func mknod(device string, major, minor int) error { + var fileMode os.FileMode = 0660 + fileMode |= unix.S_IFBLK + dev := int(unix.Mkdev(uint32(major), uint32(minor))) + + logrus.Infof("Creating device %s %d:%d", device, major, minor) + return unix.Mknod(device, uint32(fileMode), dev) +} + +func removeAsync(path string, done chan<- error) { + if err := os.Remove(path); err != nil && !os.IsNotExist(err) { + logrus.Errorf("Unable to remove: %v", path) + done <- err + } + done <- nil +} + +func remove(path string) error { + done := make(chan error) + go removeAsync(path, done) + select { + case err := <-done: + return err + case <-time.After(30 * time.Second): + return fmt.Errorf("timeout trying to delete %s", path) + } +} diff --git a/vendor/github.com/longhorn/longhorn-engine/pkg/controller/client/controller_client.go b/vendor/github.com/longhorn/longhorn-engine/pkg/controller/client/controller_client.go index 6c8808fd3..40dbf8982 100644 --- a/vendor/github.com/longhorn/longhorn-engine/pkg/controller/client/controller_client.go +++ b/vendor/github.com/longhorn/longhorn-engine/pkg/controller/client/controller_client.go @@ -149,14 +149,15 @@ func (c *ControllerClient) VolumeStart(size, currentSize int64, replicas ...stri return nil } -func (c *ControllerClient) VolumeSnapshot(name string, labels map[string]string) (string, error) { +func (c *ControllerClient) VolumeSnapshot(name string, labels map[string]string, freezeFilesystem bool) (string, error) { controllerServiceClient := c.getControllerServiceClient() ctx, cancel := context.WithTimeout(context.Background(), GRPCServiceTimeout) defer cancel() reply, err := controllerServiceClient.VolumeSnapshot(ctx, &enginerpc.VolumeSnapshotRequest{ - Name: name, - Labels: labels, + Name: name, + Labels: labels, + FreezeFilesystem: freezeFilesystem, }) if err != nil { return "", errors.Wrapf(err, "failed to create snapshot %v for volume %v", name, c.serviceURL) diff --git a/vendor/github.com/longhorn/longhorn-engine/pkg/util/fsfreeze.go b/vendor/github.com/longhorn/longhorn-engine/pkg/util/fsfreeze.go new file mode 100644 index 000000000..64028365a --- /dev/null +++ b/vendor/github.com/longhorn/longhorn-engine/pkg/util/fsfreeze.go @@ -0,0 +1,153 @@ +package util + +import ( + "io/fs" + "path/filepath" + "strings" + "time" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "k8s.io/mount-utils" + + lhexec "github.com/longhorn/go-common-libs/exec" + "github.com/longhorn/go-common-libs/types" + "github.com/longhorn/go-iscsi-helper/longhorndev" +) + +const ( + binaryFsfreeze = "fsfreeze" + notFrozenErrorSubstring = "Invalid argument" + freezePointDirectory = "/var/lib/longhorn/freeze" // We expect this to be INSIDE the container namespace. + DevicePathPrefix = longhorndev.DevPath + + // If the block device is functioning and the filesystem is frozen, fsfreeze -u immediately returns successfully. + // If the block device is NOT functioning, fsfreeze does not return until I/O errors occur (which can take a long + // time). In certain situations (e.g. when it is executed during an instance-manager shutdown that has already + // stopped the associated replica so that I/Os will eventually time out), waiting can impede the shutdown sequence. + unfreezeTimeout = 5 * time.Second +) + +// GetFreezePointFromDevicePath returns the absolute path to the canonical location we will try to mount a filesystem to +// before freezeing it. +func GetFreezePointFromDevicePath(devicePath string) string { + if devicePath == "" { + return "" + } + return GetFreezePointFromVolumeName(filepath.Base(devicePath)) +} + +// GetFreezePointFromVolumeName returns the absolute path to the canonical location we will try to mount a filesystem to +// before freezeing it. +func GetFreezePointFromVolumeName(volumeName string) string { + if volumeName == "" { + return "" + } + return filepath.Join(freezePointDirectory, volumeName) +} + +// GetDevicePathFromVolumeName mirrors longhorndev.getDev. It returns the device path that go-iscsi-helper will use. +func GetDevicePathFromVolumeName(volumeName string) string { + if volumeName == "" { + return "" + } + return filepath.Join(longhorndev.DevPath, volumeName) +} + +// FreezeFilesystem attempts to freeze the filesystem mounted at freezePoint. +func FreezeFilesystem(freezePoint string, exec lhexec.ExecuteInterface) error { + if exec == nil { + exec = lhexec.NewExecutor() + } + + // fsfreeze cannot be cancelled. Once it is started, we must wait for it to complete. If we do not, unfreeze will + // wait for it anyway. + _, err := exec.Execute([]string{}, binaryFsfreeze, []string{"-f", freezePoint}, types.ExecuteNoTimeout) + if err != nil { + return err + } + return nil +} + +// UnfreezeFilesystem attempts to unfreeze the filesystem mounted at freezePoint. It returns true if it +// successfully unfreezes a filesystem, false if there is no need to unfreeze a filesystem, and an error otherwise. +func UnfreezeFilesystem(freezePoint string, exec lhexec.ExecuteInterface) (bool, error) { + if exec == nil { + exec = lhexec.NewExecutor() + } + + _, err := exec.Execute([]string{}, binaryFsfreeze, []string{"-u", freezePoint}, unfreezeTimeout) + if err == nil { + return true, nil + } + if strings.Contains(err.Error(), notFrozenErrorSubstring) { + return false, nil + } + // It the error message is related to a timeout, there is a decent chance the unfreeze will eventually be + // successful. While we stop waiting for the unfreeze to complete, the unfreeze process itself cannot be killed. + // This usually indicates the kernel is locked up waiting for I/O errors to be returned for an iSCSI device that can + // no longer be reached. + return false, err +} + +// UnfreezeAndUnmountFilesystem attempts to unfreeze the filesystem mounted at freezePoint. +func UnfreezeAndUnmountFilesystem(freezePoint string, exec lhexec.ExecuteInterface, + mounter mount.Interface) (bool, error) { + if exec == nil { + exec = lhexec.NewExecutor() + } + if mounter == nil { + mounter = mount.New("") + } + + unfroze, err := UnfreezeFilesystem(freezePoint, exec) + if err != nil { + return unfroze, err + } + return unfroze, mount.CleanupMountPoint(freezePoint, mounter, false) +} + +// UnfreezeFilesystemForDevice attempts to identify a mountPoint for the Longhorn volume and unfreeze it. Under normal +// conditions, it will not find a filesystem, and if it finds a filesystem, it will not be frozen. +// UnfreezeFilesystemForDevice does not return an error if there is nothing to do. UnfreezeFilesystemForDevice is only +// relevant for volumes run with a tgt-blockdev frontend, as only these volumes have a Longhorn device on the node to +// format and mount. +func UnfreezeFilesystemForDevice(devicePath string) error { + // We do not need to switch to the host mount namespace to get mount points here. Usually, longhorn-engine runs in a + // container that has / bind mounted to /host with at least HostToContainer (rslave) propagation. + // - If it does not, we likely can't do a namespace swap anyway, since we don't have access to /host/proc. + // - If it does, we just need to know where in the container we can access the mount points to unfreeze. + mounter := mount.New("") + freezePoint := GetFreezePointFromDevicePath(devicePath) + + // First, try to unfreeze and unmount the expected mount point. + freezePointIsMountPoint, err := mounter.IsMountPoint(freezePoint) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + logrus.WithError(err).Warnf("Failed to determine if %s is a mount point while deciding whether or not to unfreeze", freezePoint) + } + if freezePointIsMountPoint { + unfroze, err := UnfreezeAndUnmountFilesystem(freezePoint, nil, mounter) + if unfroze { + logrus.Warnf("Unfroze filesystem mounted at %v", freezePoint) + } + return err + } + + // If a filesystem is not mounted at the expected mount point, try any other mount point of the device. + mountPoints, err := mounter.List() + if err != nil { + return errors.Wrap(err, "failed to list mount points while deciding whether or not unfreeze") + } + for _, mountPoint := range mountPoints { + if mountPoint.Device == devicePath { + // This one is not ours to unmount. + unfroze, err := UnfreezeFilesystem(mountPoint.Path, nil) + if unfroze { + logrus.Warnf("Unfroze filesystem mounted at %v", freezePoint) + } + return err + } + } + + return nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 85ce5b5ac..0757e4223 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -153,6 +153,13 @@ github.com/longhorn/go-common-libs/sync github.com/longhorn/go-common-libs/sys github.com/longhorn/go-common-libs/types github.com/longhorn/go-common-libs/utils +# github.com/longhorn/go-iscsi-helper v0.0.0-20240427164656-e9439c0018ce +## explicit; go 1.22 +github.com/longhorn/go-iscsi-helper/iscsi +github.com/longhorn/go-iscsi-helper/iscsidev +github.com/longhorn/go-iscsi-helper/longhorndev +github.com/longhorn/go-iscsi-helper/types +github.com/longhorn/go-iscsi-helper/util # github.com/longhorn/go-spdk-helper v0.0.0-20240514082311-4069f4804017 ## explicit; go 1.22.0 github.com/longhorn/go-spdk-helper/pkg/jsonrpc @@ -162,7 +169,7 @@ github.com/longhorn/go-spdk-helper/pkg/spdk/setup github.com/longhorn/go-spdk-helper/pkg/spdk/types github.com/longhorn/go-spdk-helper/pkg/types github.com/longhorn/go-spdk-helper/pkg/util -# github.com/longhorn/longhorn-engine v1.7.0-dev.0.20240509154612-5fdc92a2d526 +# github.com/longhorn/longhorn-engine v1.7.0-dev.0.20240514224711-e39b7f0313b2 ## explicit; go 1.22.2 github.com/longhorn/longhorn-engine/pkg/backingfile github.com/longhorn/longhorn-engine/pkg/controller/client