Skip to content

Commit

Permalink
Merge branch 'main' into james/flare-shipping
Browse files Browse the repository at this point in the history
  • Loading branch information
James-Pickett authored Oct 6, 2023
2 parents 1621350 + 29bc19e commit d540887
Show file tree
Hide file tree
Showing 16 changed files with 664 additions and 134 deletions.
4 changes: 4 additions & 0 deletions cmd/launcher/extension.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ func commonRunnerOptions(logger log.Logger, k types.Knapsack) []runtime.OsqueryI
kolidelog.WithKeyValue("osqlevel", "stdout"),
)

// Only enable watchdog internally for now
enableWatchdog := k.UpdateChannel() == "nightly"

return []runtime.OsqueryInstanceOption{
runtime.WithKnapsack(k),
runtime.WithOsquerydBinary(k.OsquerydPath()),
Expand All @@ -223,6 +226,7 @@ func commonRunnerOptions(logger log.Logger, k types.Knapsack) []runtime.OsqueryI
runtime.WithAutoloadedExtensions(k.AutoloadedExtensions()...),
runtime.WithUpdateDirectory(k.UpdateDirectory()),
runtime.WithUpdateChannel(k.UpdateChannel()),
runtime.WithEnableWatchdog(enableWatchdog),
}
}

Expand Down
26 changes: 19 additions & 7 deletions cmd/launcher/launcher.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package main

import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
Expand All @@ -9,7 +10,6 @@ import (
"net"
"net/http"
"os"
"os/exec"
"os/signal"
"path/filepath"
"runtime"
Expand Down Expand Up @@ -49,6 +49,7 @@ import (
"github.com/kolide/launcher/pkg/log/logshipper"
"github.com/kolide/launcher/pkg/log/teelogger"
"github.com/kolide/launcher/pkg/osquery"
"github.com/kolide/launcher/pkg/osquery/runsimple"
osqueryInstanceHistory "github.com/kolide/launcher/pkg/osquery/runtime/history"
"github.com/kolide/launcher/pkg/rungroup"
"github.com/kolide/launcher/pkg/service"
Expand Down Expand Up @@ -480,23 +481,34 @@ func writePidFile(path string) error {
// runOsqueryVersionCheck execs the osqueryd binary in the background when we're running
// on darwin. Operating on our theory that some startup delay issues for osquery might
// be due to the notarization check taking too long, we execute the binary here ahead
// of time in the hopes of getting the check out of the way.
// of time in the hopes of getting the check out of the way. This is expected to be called
// from a goroutine, and thus does not return an error.
func runOsqueryVersionCheck(ctx context.Context, logger log.Logger, osquerydPath string) {
if runtime.GOOS != "darwin" {
return
}

logger = log.With(logger, "component", "osquery-version-check")

var output bytes.Buffer

osq, err := runsimple.NewOsqueryProcess(osquerydPath, runsimple.WithStdout(&output))
if err != nil {
level.Error(logger).Log("msg", "unable to create process", "err", err)
return
}

// This has a somewhat long timeout, in case there's a notarization fetch
versionCtx, versionCancel := context.WithTimeout(ctx, 30*time.Second)
defer versionCancel()

versionCmd := exec.CommandContext(versionCtx, osquerydPath, "--version")

startTime := time.Now().UnixMilli()
out, err := versionCmd.CombinedOutput()

osqErr := osq.RunVersion(versionCtx)
executionTimeMs := time.Now().UnixMilli() - startTime
outTrimmed := strings.TrimSpace(string(out))
outTrimmed := strings.TrimSpace(output.String())

if err != nil {
if osqErr != nil {
level.Error(logger).Log("msg", "could not check osqueryd version", "output", outTrimmed, "err", err, "execution_time_ms", executionTimeMs)
return
}
Expand Down
16 changes: 9 additions & 7 deletions cmd/launcher/svc_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,21 @@ func runWindowsSvc(args []string) error {
os.Exit(1)
}

// Create a local logger. This logs to a known path, and aims to help diagnostics
if opts.RootDirectory != "" {
logger = teelogger.New(logger, locallogger.NewKitLogger(filepath.Join(opts.RootDirectory, "debug.json")))
locallogger.CleanUpRenamedDebugLogs(opts.RootDirectory, logger)
}

// Now that we've parsed the options, let's set a filter on our logger
// Now that we've parsed the options, let's set a filter on our eventLog logger.
// We don't want to set this on the teelogger below because we want debug logs to always
// go to debug.json.
if opts.Debug {
logger = level.NewFilter(logger, level.AllowDebug())
} else {
logger = level.NewFilter(logger, level.AllowInfo())
}

// Create a local logger. This logs to a known path, and aims to help diagnostics
if opts.RootDirectory != "" {
logger = teelogger.New(logger, locallogger.NewKitLogger(filepath.Join(opts.RootDirectory, "debug.json")))
locallogger.CleanUpRenamedDebugLogs(opts.RootDirectory, logger)
}

// Use the FindNewest mechanism to delete old
// updates. We do this here, as windows will pick up
// the update in main, which does not delete. Note
Expand Down
3 changes: 2 additions & 1 deletion docs/architecture/2018-06-15_request_enrollment_details.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

## Status

Accepted (June 15, 2018)
Accepted, June 15, 2018
Updated, 2023-09, [Revisiting enrollment details](2023-09-29_request_enrollment_details_updates.md)

## Context

Expand Down
48 changes: 48 additions & 0 deletions docs/architecture/2023-09-29_request_enrollment_details_updates.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Revisiting enrollment details

This continues the work started in [Initial Host Details](2018-06-15_request_enrollment_details.md)

## Authors

- seph ([@directionless](https://github.com/directionless))

## Status

Accepted (2023-09)

## Context

Over the last several years, roughly since late 2021, we've seen
occasional problems starting up. Internally, we've called this _The
Monterey Bug_, and there is some information and links in [GitHub
Issue #1211(https://github.com/kolide/launcher/issues/1211).

We have never managed to diagnose this. We have reproduced small parts
of it, but nothing that holds up over all.

Our leading theories are that it is somehow related to runtime
complexity, osquery startup time, and thrift socket contention. There
may be multiple related and unrelated issues at play.

There is additional complexity that stems from the original
implementation of
[`getEnrollmentDetails`](https://github.com/kolide/launcher/blob/ab411f07d1d147b963809df2e1fdb04cb574d1a3/pkg/osquery/extension.go#L934). Because
it use the osquery socket, it cannot run until osquery is started. But
simultaneously launcher is trying to register extensions, and osquery
is trying to enroll.

## Decision

To both simplify startup ordering _and_ reduce socket contention, we
can gather enrollment details via execing osquery. Semantically, this
is a fairly simple change -- we can use the same query, and the same
osquery.

## Consequences

We incur an exec call during startup. But in return, we can gain
several benefits:
- Decouple enrollment details from the main osquery startup.
- Reduce contention on the socket during early startup
- Enrollment no longer has a circular dependency
- Enables future work to completely pull enrollment into launcher
40 changes: 40 additions & 0 deletions ee/desktop/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ type DesktopUsersProcessesRunner struct {
knapsack types.Knapsack
// runnerServer is a local server that desktop processes call to monitor parent
runnerServer *runnerserver.RunnerServer
// osVersion is the version of the OS cached in new
osVersion string
}

// processRecord is used to track spawned desktop processes.
Expand Down Expand Up @@ -189,6 +191,14 @@ func New(k types.Knapsack, opts ...desktopUsersProcessesRunnerOption) (*DesktopU
}
}()

if runtime.GOOS == "darwin" {
osversion, err := osversion()
if err != nil {
level.Error(runner.logger).Log("msg", "getting os version", "err", err)
}
runner.osVersion = osversion
}

setInstance(runner)
return runner, nil
}
Expand All @@ -200,6 +210,8 @@ func (r *DesktopUsersProcessesRunner) Execute() error {
defer updateTicker.Stop()
menuRefreshTicker := time.NewTicker(r.menuRefreshInterval)
defer menuRefreshTicker.Stop()
osUpdateCheckTicker := time.NewTicker(1 * time.Minute)
defer osUpdateCheckTicker.Stop()

for {
// Check immediately on each iteration, avoiding the initial ticker delay
Expand All @@ -213,6 +225,9 @@ func (r *DesktopUsersProcessesRunner) Execute() error {
case <-menuRefreshTicker.C:
r.refreshMenu()
continue
case <-osUpdateCheckTicker.C:
r.checkOsUpdate()
continue
case <-r.interrupt:
level.Debug(r.logger).Log("msg", "interrupt received, exiting desktop execute loop")
return nil
Expand Down Expand Up @@ -801,3 +816,28 @@ func removeFilesWithPrefix(folderPath, prefix string) error {
return os.Remove(path)
})
}

func (r *DesktopUsersProcessesRunner) checkOsUpdate() {
// on darwin, sometimes the desktop disappears after an OS update
// eventhough the process is still there, so lets restart desktop
// via killing the process and letting the runner restart it
if runtime.GOOS != "darwin" {
return
}

osVersion, err := osversion()
if err != nil {
level.Error(r.logger).Log("msg", "getting os version", "err", err)
return
}

if osVersion != r.osVersion {
level.Debug(r.logger).Log(
"msg", "os version changed, restarting desktop",
"old", r.osVersion,
"new", osVersion,
)
r.osVersion = osVersion
r.killDesktopProcesses()
}
}
6 changes: 6 additions & 0 deletions ee/desktop/runner/runner_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"fmt"
"os/exec"
"os/user"

"golang.org/x/sys/unix"
)

// For notifications to work, we must run in the user context with launchctl asuser.
Expand Down Expand Up @@ -50,3 +52,7 @@ func (r *DesktopUsersProcessesRunner) runAsUser(_ context.Context, uid string, c

return cmd.Start()
}

func osversion() (string, error) {
return unix.Sysctl("kern.osrelease")
}
5 changes: 5 additions & 0 deletions ee/desktop/runner/runner_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package runner

import (
"context"
"errors"
"fmt"
"os"
"os/exec"
Expand Down Expand Up @@ -251,3 +252,7 @@ func (r *DesktopUsersProcessesRunner) getXauthority(ctx context.Context, uid str

return ""
}

func osversion() (string, error) {
return "", errors.New("not implemented")
}
5 changes: 5 additions & 0 deletions ee/desktop/runner/runner_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package runner

import (
"context"
"errors"
"fmt"
"os/exec"
"syscall"
Expand Down Expand Up @@ -55,3 +56,7 @@ func processAccessToken(pid int32) (syscall.Token, error) {

return token, err
}

func osversion() (string, error) {
return "", errors.New("not implemented")
}
2 changes: 1 addition & 1 deletion ee/localserver/krypto-ec-middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func (e *kryptoEcMiddleware) Wrap(next http.Handler) http.Handler {
challengeBox, err := extractChallenge(r)
if err != nil {
traces.SetError(span, err)
level.Debug(e.logger).Log("msg", "failed to extract box from request", "err", err)
level.Debug(e.logger).Log("msg", "failed to extract box from request", "err", err, "path", r.URL.Path, "query_params", r.URL.RawQuery)
w.WriteHeader(http.StatusUnauthorized)
return
}
Expand Down
Loading

0 comments on commit d540887

Please sign in to comment.