Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 1885365: daemon: properly handle unit enable/disables #2145

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/daemon/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ const (
// InitialNodeAnnotationsBakPath defines the path of InitialNodeAnnotationsFilePath when the initial bootstrap is done. We leave it around for debugging and reconciling.
InitialNodeAnnotationsBakPath = "/etc/machine-config-daemon/node-annotation.json.bak"

// IgnitionSystemdPresetFile is where Ignition writes initial enabled/disabled systemd unit configs
// This should be removed on boot after MCO takes over, so if any of these are deleted we can go back
// to initial system settings
IgnitionSystemdPresetFile = "/etc/systemd/system-preset/20-ignition.preset"

// EtcPivotFile is used by the `pivot` command
// For more information, see https://github.com/openshift/pivot/pull/25/commits/c77788a35d7ee4058d1410e89e6c7937bca89f6c#diff-04c6e90faac2675aa89e2176d2eec7d8R44
EtcPivotFile = "/etc/pivot/image-pullspec"
Expand Down
15 changes: 13 additions & 2 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@ const (
pathIgnition = "/usr/lib/dracut/modules.d/30ignition/ignition"
// pathSystemd is the path systemd modifiable units, services, etc.. reside
pathSystemd = "/etc/systemd/system"
// wantsPathSystemd is the path where enabled units should be linked
wantsPathSystemd = "/etc/systemd/system/multi-user.target.wants/"
// pathDevNull is the systems path to and endless blackhole
pathDevNull = "/dev/null"
// currentConfigPath is where we store the current config on disk to validate
Expand Down Expand Up @@ -457,6 +455,9 @@ func (dn *Daemon) syncNode(key string) error {
if err := upgradeHackFor44AndBelow(); err != nil {
return err
}
if err := removeIgnitionArtifacts(); err != nil {
return err
}
if err := dn.checkStateOnFirstRun(); err != nil {
return err
}
Expand Down Expand Up @@ -964,6 +965,16 @@ func upgradeHackFor44AndBelow() error {
return nil
}

// Remove artifacts used by ignition, that the MCO should no longer
// use since the machine is up.
// Currently removes the systemd preset file written by Ignition.
func removeIgnitionArtifacts() error {
if err := os.Remove(constants.IgnitionSystemdPresetFile); err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "failed to remove Ignition-written systemd preset file")
}
return nil
}

// checkStateOnFirstRun is a core entrypoint for our state machine.
// It determines whether we're in our desired state, or if we're
// transitioning between states, and whether or not we need to update
Expand Down
103 changes: 62 additions & 41 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1398,8 +1398,12 @@ func (dn *Daemon) deleteStaleData(oldIgnConfig, newIgnConfig *ign3types.Config)
}
path := filepath.Join(pathSystemd, u.Name)
if _, ok := newUnitSet[path]; !ok {
if err := dn.disableUnit(u); err != nil {
glog.Warningf("Unable to disable %s: %s", u.Name, err)
// since the unit doesn't exist anymore within the MachineConfig,
// look to restore defaults here, so that symlinks are removed first
// if the system has the service disabled
// writeUnits() will catch units that still have references in other MCs
if err := dn.presetUnit(u); err != nil {
glog.Infof("Did not restore preset for %s (may not exist): %s", u.Name, err)
}
if _, err := os.Stat(noOrigFileStampName(path)); err == nil {
if err := os.Remove(noOrigFileStampName(path)); err != nil {
Expand Down Expand Up @@ -1429,43 +1433,43 @@ func (dn *Daemon) deleteStaleData(oldIgnConfig, newIgnConfig *ign3types.Config)
return nil
}

// enableUnit enables a systemd unit via symlink
func (dn *Daemon) enableUnit(unit ign3types.Unit) error {
// The link location
wantsPath := filepath.Join(wantsPathSystemd, unit.Name)
// sanity check that we don't return an error when the link already exists
if _, err := os.Stat(wantsPath); err == nil {
glog.Infof("%s already exists. Not making a new symlink", wantsPath)
return nil
}
// The originating file to link
servicePath := filepath.Join(pathSystemd, unit.Name)
err := renameio.Symlink(servicePath, wantsPath)
// enableUnits enables a set of systemd units via systemctl, if any fail all fails.
func (dn *Daemon) enableUnits(units []string) error {
args := append([]string{"enable"}, units...)
stdouterr, err := exec.Command("systemctl", args...).CombinedOutput()
if err != nil {
return err
return fmt.Errorf("error enabling unit: %s", stdouterr)
}
glog.Infof("Enabled %s", unit.Name)
glog.V(2).Infof("Symlinked %s to %s", servicePath, wantsPath)
glog.Infof("Enabled systemd units: %v", units)
return nil
}

// disableUnit disables a systemd unit via symlink removal
func (dn *Daemon) disableUnit(unit ign3types.Unit) error {
// The link location
wantsPath := filepath.Join(wantsPathSystemd, unit.Name)
// sanity check so we don't return an error when the unit was already disabled
if _, err := os.Stat(wantsPath); err != nil {
glog.Infof("%s was not present. No need to remove", wantsPath)
return nil
// disableUnits disables a set of systemd units via systemctl, if any fail all fails.
func (dn *Daemon) disableUnits(units []string) error {
args := append([]string{"disable"}, units...)
stdouterr, err := exec.Command("systemctl", args...).CombinedOutput()
if err != nil {
return fmt.Errorf("error disabling unit: %s", stdouterr)
}
glog.V(2).Infof("Disabling unit at %s", wantsPath)
glog.Infof("Disabled systemd units %v", units)
return nil
}

return os.Remove(wantsPath)
// presetUnit resets a systemd unit to its preset via systemctl
func (dn *Daemon) presetUnit(unit ign3types.Unit) error {
args := []string{"preset", unit.Name}
stdouterr, err := exec.Command("systemctl", args...).CombinedOutput()
if err != nil {
return fmt.Errorf("error running preset on unit: %s", stdouterr)
}
glog.Infof("Preset systemd unit %s", unit.Name)
return nil
}

// writeUnits writes the systemd units to disk
func (dn *Daemon) writeUnits(units []ign3types.Unit) error {

var enabledUnits []string
var disabledUnits []string
for _, u := range units {
// write the dropin to disk
for i := range u.Dropins {
Expand Down Expand Up @@ -1520,23 +1524,40 @@ func (dn *Daemon) writeUnits(units []ign3types.Unit) error {
}

// if the unit doesn't note if it should be enabled or disabled then
// skip all linking.
// if the unit should be enabled, then enable it.
// otherwise the unit is disabled. run disableUnit to ensure the unit is
// disabled. even if the unit wasn't previously enabled the result will
// be fine as disableUnit is idempotent.
// honour system presets. This to account for an edge case where you
// deleted a MachineConfig that enabled/disabled the unit to revert,
// but the unit itself is referenced in other MCs. deleteStaleData() will
// catch fully deleted units.
// if the unit should be enabled/disabled, then enable/disable it.
// this is a no-op if the system wasn't change this iteration
// Also, enable and disable as one command, as if any operation fails
// we'd bubble up the error anyways, and we save a lot of time doing this.
// Presets must be done individually as we don't consider a failed preset
// as an error, but it would cause other presets that would have succeeded
// to not go through.

if u.Enabled != nil {
if *u.Enabled {
if err := dn.enableUnit(u); err != nil {
return err
}
glog.V(2).Infof("Enabled systemd unit %q", u.Name)
enabledUnits = append(enabledUnits, u.Name)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it'd probably be better to do a "diff" here - only enable/disable units that changed between the two configs.

Otherwise I worry that this may cause unexpected side effects.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this doesn't change any existing behaviour. Today we re-write all enable/disables anyways (via the hardlink to multi-user.target.wants) just like how we re-write any file/unit again. I'm not sure if we want to make the exception

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but I suspect part of the problem here is that going via systemctl may touch the DBus API doing a bunch of synchronous requests; that's going to slow things down.

Another angle is to try SYSTEMD_OFFLINE=1 in the environment?

Of course if we implemented #1190 this issue would also go away because we'd only be enabling in the new root, so shouldn't be talking to the running systemd either.

} else {
if err := dn.disableUnit(u); err != nil {
return err
}
glog.V(2).Infof("Disabled systemd unit %q", u.Name)
disabledUnits = append(disabledUnits, u.Name)
}
} else {
if err := dn.presetUnit(u); err != nil {
// Don't fail here, since a unit may have a dropin referencing a nonexisting actual unit
glog.Infof("Could not reset unit preset for %s, skipping. (Error msg: %v)", u.Name, err)
}
}
}

if len(enabledUnits) > 0 {
if err := dn.enableUnits(enabledUnits); err != nil {
return err
}
}
if len(disabledUnits) > 0 {
if err := dn.disableUnits(disabledUnits); err != nil {
return err
}
}
return nil
Expand Down