Skip to content

Commit

Permalink
Make log-validator take glob patterns to monitor for log files (#7172)
Browse files Browse the repository at this point in the history
To simplify deployment of the log validator, this allows wildcards
(using go's filepath.Glob) to be included in the file paths.

In order to detect new files, a new background goroutine polls the glob
patterns every minute for matches.

Because the "monitor" function is running in its own goroutine, a lock
is needed to ensure it's not trying to add new tailers while shutdown is
happening.
  • Loading branch information
mcpherrinm committed Nov 27, 2023
1 parent a31429f commit 32adaf1
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 58 deletions.
7 changes: 1 addition & 6 deletions cmd/log-validator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,9 @@ func main() {
defer oTelShutdown(context.Background())
logger.Info(cmd.VersionString())

v := validator.New(logger, stats)
v := validator.New(config.Files, logger, stats)
defer v.Shutdown()

for _, filename := range config.Files {
err := v.TailValidateFile(filename)
cmd.FailOnError(err, "failed to tail file")
}

cmd.WaitForSignal()
}

Expand Down
2 changes: 1 addition & 1 deletion log/validator/tail_logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"github.com/letsencrypt/boulder/log"
)

// tailLogger is an adapter to the hpcloud/tail module's logging interface.
// tailLogger is an adapter to the nxadm/tail module's logging interface.
type tailLogger struct {
log.Logger
}
Expand Down
144 changes: 103 additions & 41 deletions log/validator/validator.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package validator

import (
"context"
"encoding/base64"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"time"

"github.com/nxadm/tail"
Expand All @@ -17,73 +20,132 @@ import (
var errInvalidChecksum = errors.New("invalid checksum length")

type Validator struct {
tailers []*tail.Tail
// mu guards patterns and tailers to prevent Shutdown racing monitor
mu sync.Mutex

// patterns is the list of glob patterns to monitor with filepath.Glob for logs
patterns []string

// tailers is a map of filenames to the tailer which are currently being tailed
tailers map[string]*tail.Tail

// monitorCancel cancels the monitor's context, so it exits
monitorCancel context.CancelFunc

lineCounter *prometheus.CounterVec
log log.Logger
}

func New(logger log.Logger, stats prometheus.Registerer) *Validator {
// New Validator monitoring paths, which is a list of file globs.
func New(patterns []string, logger log.Logger, stats prometheus.Registerer) *Validator {
lineCounter := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "log_lines",
Help: "A counter of log lines processed, with status",
}, []string{"filename", "status"})
stats.MustRegister(lineCounter)

return &Validator{log: logger, lineCounter: lineCounter}
}
monitorContext, monitorCancel := context.WithCancel(context.Background())

// TailValidateFile takes a filename, and starts tailing it.
// An error is returned if the file couldn't be opened.
func (v *Validator) TailValidateFile(filename string) error {
t, err := tail.TailFile(filename, tail.Config{
ReOpen: true,
MustExist: false, // sometimes files won't exist, so we must tolerate that
Follow: true,
Logger: tailLogger{v.log},
CompleteLines: true,
})
if err != nil {
return err
v := &Validator{
patterns: patterns,
tailers: map[string]*tail.Tail{},
log: logger,
monitorCancel: monitorCancel,
lineCounter: lineCounter,
}

go func() {
// Emit no more than 1 error line per second. This prevents consuming large
// amounts of disk space in case there is problem that causes all log lines to
// be invalid.
outputLimiter := time.NewTicker(time.Second)
defer outputLimiter.Stop()
go v.monitor(monitorContext)

return v
}

// pollPaths expands v.patterns and calls v.tailValidateFile on each resulting file
func (v *Validator) pollPaths() {
v.mu.Lock()
defer v.mu.Unlock()
for _, pattern := range v.patterns {
paths, err := filepath.Glob(pattern)
if err != nil {
v.log.Err(err.Error())
}

for line := range t.Lines {
if line.Err != nil {
v.log.Errf("error while tailing %s: %s", t.Filename, line.Err)
for _, path := range paths {
if _, ok := v.tailers[path]; ok {
// We are already tailing this file
continue
}
err := lineValid(line.Text)

t, err := tail.TailFile(path, tail.Config{
ReOpen: true,
MustExist: false, // sometimes files won't exist, so we must tolerate that
Follow: true,
Logger: tailLogger{v.log},
CompleteLines: true,
})
if err != nil {
if errors.Is(err, errInvalidChecksum) {
v.lineCounter.WithLabelValues(t.Filename, "invalid checksum length").Inc()
} else {
v.lineCounter.WithLabelValues(t.Filename, "bad").Inc()
}
select {
case <-outputLimiter.C:
v.log.Errf("%s: %s %q", t.Filename, err, line.Text)
default:
}
} else {
v.lineCounter.WithLabelValues(t.Filename, "ok").Inc()
// TailFile shouldn't error when MustExist is false
v.log.Errf("unexpected error from TailFile: %v", err)
}

go v.tailValidate(path, t.Lines)

v.tailers[path] = t
}
}
}

// Monitor calls v.pollPaths every minute until its context is cancelled
func (v *Validator) monitor(ctx context.Context) {
for {
v.pollPaths()

// Wait a minute, unless cancelled
timer := time.NewTimer(time.Minute)
select {
case <-ctx.Done():
return
case <-timer.C:
}
}()
}
}

v.tailers = append(v.tailers, t)
func (v *Validator) tailValidate(filename string, lines chan *tail.Line) {
// Emit no more than 1 error line per second. This prevents consuming large
// amounts of disk space in case there is problem that causes all log lines to
// be invalid.
outputLimiter := time.NewTicker(time.Second)
defer outputLimiter.Stop()

return nil
for line := range lines {
if line.Err != nil {
v.log.Errf("error while tailing %s: %s", filename, line.Err)
continue
}
err := lineValid(line.Text)
if err != nil {
if errors.Is(err, errInvalidChecksum) {
v.lineCounter.WithLabelValues(filename, "invalid checksum length").Inc()
} else {
v.lineCounter.WithLabelValues(filename, "bad").Inc()
}
select {
case <-outputLimiter.C:
v.log.Errf("%s: %s %q", filename, err, line.Text)
default:
}
} else {
v.lineCounter.WithLabelValues(filename, "ok").Inc()
}
}
}

// Shutdown should be called before process shutdown
func (v *Validator) Shutdown() {
v.mu.Lock()
defer v.mu.Unlock()

v.monitorCancel()

for _, t := range v.tailers {
// The tail module seems to have a race condition that will generate
// errors like this on shutdown:
Expand Down
12 changes: 2 additions & 10 deletions test/config-next/log-validator.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,8 @@
"files": [
"/var/log/akamai-purger.log",
"/var/log/bad-key-revoker.log",
"/var/log/boulder-ca.log",
"/var/log/boulder-observer.log",
"/var/log/boulder-publisher.log",
"/var/log/boulder-ra.log",
"/var/log/boulder-remoteva.log",
"/var/log/boulder-sa.log",
"/var/log/boulder-va.log",
"/var/log/boulder-wfe2.log",
"/var/log/crl-storer.log",
"/var/log/crl-updater.log",
"/var/log/boulder-*.log",
"/var/log/crl-*.log",
"/var/log/nonce-service.log",
"/var/log/ocsp-responder.log"
]
Expand Down

0 comments on commit 32adaf1

Please sign in to comment.