Skip to content

Commit 700905d

Browse files
authored
Merge pull request #318 from SiaFoundation/nate/fix-force-remove
Fix force remove
2 parents 0b73159 + e198745 commit 700905d

File tree

9 files changed

+469
-278
lines changed

9 files changed

+469
-278
lines changed

host/metrics/types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ type (
8484
Storage struct {
8585
TotalSectors uint64 `json:"totalSectors"`
8686
PhysicalSectors uint64 `json:"physicalSectors"`
87+
LostSectors uint64 `json:"lostSectors"`
8788
ContractSectors uint64 `json:"contractSectors"`
8889
TempSectors uint64 `json:"tempSectors"`
8990

host/storage/persist.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
package storage
22

33
import (
4+
"context"
45
"errors"
56

67
"go.sia.tech/core/types"
78
)
89

910
type (
11+
// MigrateFunc is a callback function that is called for each sector that
12+
// needs to be migrated If the function returns an error, the sector should
13+
// be skipped and migration should continue.
14+
MigrateFunc func(location SectorLocation) error
1015

1116
// A VolumeStore stores and retrieves information about storage volumes.
1217
VolumeStore interface {
@@ -23,7 +28,7 @@ type (
2328
// RemoveVolume removes a storage volume from the volume store. If there
2429
// are used sectors in the volume, ErrVolumeNotEmpty is returned. If
2530
// force is true, the volume is removed even if it is not empty.
26-
RemoveVolume(volumeID int64) error
31+
RemoveVolume(volumeID int64, force bool) error
2732
// GrowVolume grows a storage volume's metadata to maxSectors. If the
2833
// number of sectors in the volume is already greater than maxSectors,
2934
// nil is returned.
@@ -39,9 +44,9 @@ type (
3944

4045
// MigrateSectors returns a new location for each occupied sector of a
4146
// volume starting at min. The sector data should be copied to the new
42-
// location and synced to disk during migrateFn. Iteration is stopped if
43-
// migrateFn returns an error.
44-
MigrateSectors(volumeID int64, min uint64, migrateFn func(SectorLocation) error) error
47+
// location and synced to disk during migrateFn. If migrateFn returns an
48+
// error, migration will continue, but that sector is not migrated.
49+
MigrateSectors(ctx context.Context, volumeID int64, min uint64, migrateFn MigrateFunc) (migrated, failed int, err error)
4550
// StoreSector calls fn with an empty location in a writable volume. If
4651
// the sector root already exists, fn is called with the existing
4752
// location and exists is true. Unless exists is true, The sector must
@@ -74,6 +79,9 @@ type (
7479
)
7580

7681
var (
82+
// ErrMigrationFailed is returned when a volume fails to migrate all
83+
// of its sectors.
84+
ErrMigrationFailed = errors.New("migration failed")
7785
// ErrNotEnoughStorage is returned when there is not enough storage space to
7886
// store a sector.
7987
ErrNotEnoughStorage = errors.New("not enough storage")

host/storage/storage.go

Lines changed: 79 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ func (vm *VolumeManager) loadVolumes() error {
156156
// migrateSector migrates a sector to a new location. The sector is read from
157157
// its current location and written to its new location. The volume is
158158
// immediately synced after the sector is written.
159-
func (vm *VolumeManager) migrateSector(loc SectorLocation, log *zap.Logger) error {
159+
func (vm *VolumeManager) migrateSector(loc SectorLocation) error {
160160
// read the sector from the old location
161161
sector, err := vm.Read(loc.Root)
162162
if err != nil {
@@ -269,14 +269,8 @@ func (vm *VolumeManager) shrinkVolume(ctx context.Context, id int64, volume *vol
269269

270270
// migrate any sectors outside of the target range.
271271
var migrated int
272-
err := vm.vs.MigrateSectors(id, newMaxSectors, func(newLoc SectorLocation) error {
273-
select {
274-
case <-ctx.Done():
275-
return ctx.Err()
276-
default:
277-
}
278-
279-
if err := vm.migrateSector(newLoc, log.Named("migrate")); err != nil {
272+
migrated, failed, err := vm.vs.MigrateSectors(ctx, id, newMaxSectors, func(newLoc SectorLocation) error {
273+
if err := vm.migrateSector(newLoc); err != nil {
280274
return err
281275
}
282276
migrated++
@@ -285,9 +279,11 @@ func (vm *VolumeManager) shrinkVolume(ctx context.Context, id int64, volume *vol
285279
vm.a.Register(a)
286280
return nil
287281
})
288-
log.Info("migrated sectors", zap.Int("count", migrated))
282+
log.Info("migrated sectors", zap.Int("migrated", migrated), zap.Int("failed", failed))
289283
if err != nil {
290284
return err
285+
} else if failed > 0 {
286+
return ErrMigrationFailed
291287
}
292288

293289
for current := oldMaxSectors; current > newMaxSectors; {
@@ -337,73 +333,6 @@ func (vm *VolumeManager) volumeStats(id int64) VolumeStats {
337333
return v.Stats()
338334
}
339335

340-
func (vm *VolumeManager) migrateForRemoval(ctx context.Context, id int64, localPath string, force bool, log *zap.Logger) (int, error) {
341-
ctx, cancel, err := vm.tg.AddContext(ctx)
342-
if err != nil {
343-
return 0, err
344-
}
345-
defer cancel()
346-
347-
// add an alert for the migration
348-
a := alerts.Alert{
349-
ID: frand.Entropy256(),
350-
Message: "Migrating sectors",
351-
Severity: alerts.SeverityInfo,
352-
Data: map[string]interface{}{
353-
"volumeID": id,
354-
"migrated": 0,
355-
"force": force,
356-
},
357-
Timestamp: time.Now(),
358-
}
359-
vm.a.Register(a)
360-
// dismiss the alert when the function returns. It is the caller's
361-
// responsibility to register a completion alert
362-
defer vm.a.Dismiss(a.ID)
363-
364-
// migrate sectors to other volumes
365-
var migrated, failed int
366-
err = vm.vs.MigrateSectors(id, 0, func(newLoc SectorLocation) error {
367-
select {
368-
case <-ctx.Done():
369-
return ctx.Err()
370-
default:
371-
}
372-
373-
if err := vm.migrateSector(newLoc, log.Named("migrate")); err != nil {
374-
log.Error("failed to migrate sector", zap.Stringer("sectorRoot", newLoc.Root), zap.Error(err))
375-
if force {
376-
failed++
377-
a.Data["failed"] = failed
378-
return nil
379-
}
380-
return err
381-
}
382-
migrated++
383-
// update the alert
384-
a.Data["migrated"] = migrated
385-
vm.a.Register(a)
386-
return nil
387-
})
388-
if err != nil {
389-
return migrated, fmt.Errorf("failed to migrate sector data: %w", err)
390-
} else if err := vm.vs.RemoveVolume(id); err != nil {
391-
return migrated, fmt.Errorf("failed to remove volume: %w", err)
392-
}
393-
394-
vm.mu.Lock()
395-
defer vm.mu.Unlock()
396-
// close the volume
397-
vm.volumes[id].Close()
398-
// delete the volume from memory
399-
delete(vm.volumes, id)
400-
// remove the volume file, ignore error if the file does not exist
401-
if err := os.Remove(localPath); err != nil && !errors.Is(err, os.ErrNotExist) {
402-
return migrated, fmt.Errorf("failed to remove volume file: %w", err)
403-
}
404-
return migrated, nil
405-
}
406-
407336
// Close gracefully shutsdown the volume manager.
408337
func (vm *VolumeManager) Close() error {
409338
// wait for all operations to stop
@@ -592,7 +521,7 @@ func (vm *VolumeManager) SetReadOnly(id int64, readOnly bool) error {
592521

593522
// RemoveVolume removes a volume from the manager.
594523
func (vm *VolumeManager) RemoveVolume(ctx context.Context, id int64, force bool, result chan<- error) error {
595-
log := vm.log.Named("remove").With(zap.Int64("volumeID", id))
524+
log := vm.log.Named("remove").With(zap.Int64("volumeID", id), zap.Bool("force", force))
596525
done, err := vm.tg.Add()
597526
if err != nil {
598527
return err
@@ -604,7 +533,10 @@ func (vm *VolumeManager) RemoveVolume(ctx context.Context, id int64, force bool,
604533
vm.mu.Unlock()
605534
if !ok {
606535
return fmt.Errorf("volume %v not found", id)
607-
} else if err := vol.SetStatus(VolumeStatusRemoving); err != nil {
536+
}
537+
538+
oldStatus := vol.Status()
539+
if err := vol.SetStatus(VolumeStatusRemoving); err != nil {
608540
return fmt.Errorf("failed to set volume status: %w", err)
609541
}
610542

@@ -618,38 +550,83 @@ func (vm *VolumeManager) RemoveVolume(ctx context.Context, id int64, force bool,
618550
return fmt.Errorf("failed to set volume %v to read-only: %w", id, err)
619551
}
620552

553+
alert := alerts.Alert{
554+
ID: frand.Entropy256(),
555+
Message: "Removing volume",
556+
Severity: alerts.SeverityInfo,
557+
Data: map[string]interface{}{
558+
"volumeID": id,
559+
"sectors": stat.TotalSectors,
560+
"used": stat.UsedSectors,
561+
"migrated": 0,
562+
"failed": 0,
563+
},
564+
Timestamp: time.Now(),
565+
}
566+
621567
go func() {
622-
start := time.Now()
623-
defer vol.SetStatus(VolumeStatusReady)
568+
defer vol.SetStatus(oldStatus)
624569

625-
migrated, err := vm.migrateForRemoval(ctx, id, stat.LocalPath, force, log)
570+
var migrated, failed int
571+
572+
updateRemovalAlert := func(message string, severity alerts.Severity, err error) {
573+
alert.Message = message
574+
alert.Severity = severity
575+
alert.Data["migrated"] = migrated
576+
alert.Data["failed"] = failed
577+
if err != nil {
578+
alert.Data["error"] = err.Error()
579+
}
580+
vm.a.Register(alert)
581+
}
582+
583+
migrated, failed, err := vm.vs.MigrateSectors(ctx, id, 0, func(newLoc SectorLocation) error {
584+
err := vm.migrateSector(newLoc)
585+
if err != nil {
586+
failed++
587+
} else {
588+
migrated++
589+
}
590+
updateRemovalAlert("Removing volume", alerts.SeverityInfo, nil) // error is ignored during migration
591+
return err
592+
})
626593
if err != nil {
627594
log.Error("failed to migrate sectors", zap.Error(err))
595+
// update the alert
596+
updateRemovalAlert("Failed to remove volume", alerts.SeverityError, err)
597+
result <- err
598+
return
599+
} else if !force && failed > 0 {
600+
updateRemovalAlert("Failed to remove volume", alerts.SeverityError, ErrMigrationFailed)
601+
result <- ErrMigrationFailed
602+
return
628603
}
629604

630-
alert := alerts.Alert{
631-
ID: frand.Entropy256(),
632-
Data: map[string]interface{}{
633-
"volumeID": id,
634-
"elapsed": time.Since(start),
635-
"migratedSectors": migrated,
636-
},
637-
Timestamp: time.Now(),
638-
}
639-
if err != nil {
640-
alert.Message = "Volume removal failed"
641-
alert.Severity = alerts.SeverityError
642-
alert.Data["error"] = err.Error()
643-
} else {
644-
alert.Message = "Volume removed"
645-
alert.Severity = alerts.SeverityInfo
605+
// close the volume and remove it from memory
606+
if err := vol.Close(); err != nil {
607+
log.Error("failed to close volume", zap.Error(err))
608+
updateRemovalAlert("Failed to remove volume", alerts.SeverityError, err)
609+
result <- err
610+
return
611+
} else if err := os.Remove(stat.LocalPath); err != nil && !errors.Is(err, os.ErrNotExist) {
612+
log.Error("failed to remove volume file", zap.Error(err))
613+
updateRemovalAlert("Failed to remove volume", alerts.SeverityError, err)
614+
result <- err
615+
return
646616
}
647-
vm.a.Register(alert)
617+
delete(vm.volumes, id)
648618

649-
select {
650-
case result <- err:
651-
default:
619+
// remove the volume from the volume store
620+
if err := vm.vs.RemoveVolume(id, force); err != nil {
621+
log.Error("failed to remove volume", zap.Error(err))
622+
// update the alert
623+
updateRemovalAlert("Failed to remove volume", alerts.SeverityError, err)
624+
result <- err
625+
return
652626
}
627+
628+
updateRemovalAlert("Volume removed", alerts.SeverityInfo, nil)
629+
result <- nil
653630
}()
654631

655632
return nil

0 commit comments

Comments
 (0)