From 0de28ef0e45877a63edacbeb0d60c551e7efbe5c Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Wed, 15 Nov 2023 10:51:18 +0530 Subject: [PATCH 01/25] initial commit --- x-pack/filebeat/input/websocket/config.go | 5 +++++ x-pack/filebeat/input/websocket/input.go | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 x-pack/filebeat/input/websocket/config.go create mode 100644 x-pack/filebeat/input/websocket/input.go diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go new file mode 100644 index 000000000000..3151c8136ffe --- /dev/null +++ b/x-pack/filebeat/input/websocket/config.go @@ -0,0 +1,5 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go new file mode 100644 index 000000000000..3151c8136ffe --- /dev/null +++ b/x-pack/filebeat/input/websocket/input.go @@ -0,0 +1,5 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket From 1b8980ea3778fc52a02b318a57730cf34e5387c7 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Mon, 29 Jan 2024 18:04:04 +0530 Subject: [PATCH 02/25] working version of websocket input with cel engine --- .../input/default-inputs/inputs_other.go | 2 + x-pack/filebeat/input/websocket/cel.go | 69 ++++ x-pack/filebeat/input/websocket/config.go | 123 +++++++ x-pack/filebeat/input/websocket/input.go | 343 ++++++++++++++++++ .../filebeat/input/websocket/input_manager.go | 90 +++++ x-pack/filebeat/input/websocket/metrics.go | 51 +++ 6 files changed, 678 insertions(+) create mode 100644 x-pack/filebeat/input/websocket/cel.go create mode 100644 x-pack/filebeat/input/websocket/input_manager.go create mode 100644 x-pack/filebeat/input/websocket/metrics.go diff --git a/x-pack/filebeat/input/default-inputs/inputs_other.go b/x-pack/filebeat/input/default-inputs/inputs_other.go index d396d4635a1d..5b55cecc56e8 100644 --- a/x-pack/filebeat/input/default-inputs/inputs_other.go +++ b/x-pack/filebeat/input/default-inputs/inputs_other.go @@ -22,6 +22,7 @@ import ( "github.com/elastic/beats/v7/x-pack/filebeat/input/lumberjack" "github.com/elastic/beats/v7/x-pack/filebeat/input/o365audit" "github.com/elastic/beats/v7/x-pack/filebeat/input/shipper" + "github.com/elastic/beats/v7/x-pack/filebeat/input/websocket" "github.com/elastic/elastic-agent-libs/logp" ) @@ -39,5 +40,6 @@ func xpackInputs(info beat.Info, log *logp.Logger, store beater.StateStore) []v2 awscloudwatch.Plugin(), lumberjack.Plugin(), shipper.Plugin(log, store), + websocket.Plugin(log, store), } } diff --git a/x-pack/filebeat/input/websocket/cel.go b/x-pack/filebeat/input/websocket/cel.go new file mode 100644 index 000000000000..fb8f980d1fd6 --- /dev/null +++ b/x-pack/filebeat/input/websocket/cel.go @@ -0,0 +1,69 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket + +import ( + "compress/gzip" + "context" + "fmt" + "io" + + "github.com/elastic/beats/v7/libbeat/version" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/useragent" + "github.com/elastic/mito/lib" + "github.com/google/cel-go/cel" + "github.com/google/cel-go/checker/decls" +) + +var ( + // mimetypes holds supported MIME type mappings. + mimetypes = map[string]interface{}{ + "application/gzip": func(r io.Reader) (io.Reader, error) { return gzip.NewReader(r) }, + "application/x-ndjson": lib.NDJSON, + "application/zip": lib.Zip, + "text/csv; header=absent": lib.CSVNoHeader, + "text/csv; header=present": lib.CSVHeader, + "text/csv;header=absent": lib.CSVNoHeader, + "text/csv;header=present": lib.CSVHeader, + } +) + +// The Filebeat user-agent is provided to the program as useragent. +var userAgent = useragent.UserAgent("Filebeat", version.GetDefaultVersion(), version.Commit(), version.BuildTime().String()) + +func newProgram(ctx context.Context, src, root string, log *logp.Logger) (cel.Program, error) { + opts := []cel.EnvOption{ + cel.Declarations(decls.NewVar(root, decls.Dyn)), + cel.OptionalTypes(cel.OptionalTypesVersion(lib.OptionalTypesVersion)), + lib.Collections(), + lib.Crypto(), + lib.JSON(nil), + lib.Strings(), + lib.Time(), + lib.Try(), + lib.File(mimetypes), + lib.MIME(mimetypes), + lib.Globals(map[string]interface{}{ + "useragent": userAgent, + }), + } + + env, err := cel.NewEnv(opts...) + if err != nil { + return nil, fmt.Errorf("failed to create env: %w", err) + } + + ast, iss := env.Compile(src) + if iss.Err() != nil { + return nil, fmt.Errorf("failed compilation: %w", iss.Err()) + } + + prg, err := env.Program(ast) + if err != nil { + return nil, fmt.Errorf("failed program instantiation: %w", err) + } + return prg, nil +} diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index 3151c8136ffe..e9dee50ad6e2 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -3,3 +3,126 @@ // you may not use this file except in compliance with the Elastic License. package websocket + +import ( + "errors" + "net/url" + "time" + + "gopkg.in/natefinch/lumberjack.v2" +) + +const defaultMaxExecutions = 1000 + +type config struct { + // Program is the CEL program to be run for each polling. + Program string `config:"program"` + // MaxExecutions is the maximum number of times a single + // periodic CEL execution loop may repeat due to a true + // "want_more" field. If it is nil a sensible default is + // used. + MaxExecutions *int `config:"max_executions"` + // Regexps is the set of regular expression to be made + // available to the program. + Regexps map[string]string `config:"regexp"` + // State is the initial state to be provided to the + // program. If it has a cursor field, that field will + // be overwritten by any stored cursor, but will be + // available if no stored cursor exists. + State map[string]interface{} `config:"state"` + // Auth is the authentication config for connection + Auth authConfig `config:"auth"` + // Resource + Resource *ResourceConfig `config:"resource" validate:"required"` +} + +type ResourceConfig struct { + URL *urlConfig `config:"url" validate:"required"` + Retry retryConfig `config:"retry"` + Tracer *lumberjack.Logger `config:"tracer"` +} + +type authConfig struct { + // Api-Key to use for authentication. + ApiKey *apiKeyConfig `config:"api_key"` + // Baerer token to use for authentication. + BearerToken string `config:"bearer_token"` + // Basic auth token to use for authentication. + BasicToken string `config:"basic_token"` +} + +type apiKeyConfig struct { + // Api-Key to use for authentication. + Header string `config:"header"` + Value string `config:"value"` +} +type urlConfig struct { + *url.URL +} + +func (u *urlConfig) Unpack(in string) error { + parsed, err := url.Parse(in) + if err != nil { + return err + } + u.URL = parsed + return nil +} + +type retryConfig struct { + MaxAttempts *int `config:"max_attempts"` + WaitMin *time.Duration `config:"wait_min"` + WaitMax *time.Duration `config:"wait_max"` +} + +func (c retryConfig) Validate() error { + switch { + case c.MaxAttempts != nil && *c.MaxAttempts <= 0: + return errors.New("max_attempts must be greater than zero") + case c.WaitMin != nil && *c.WaitMin <= 0: + return errors.New("wait_min must be greater than zero") + case c.WaitMax != nil && *c.WaitMax <= 0: + return errors.New("wait_max must be greater than zero") + } + return nil +} + +func defaultConfig() config { + maxExecutions := defaultMaxExecutions + maxAttempts := 5 + waitMin := time.Second + waitMax := time.Minute + + return config{ + MaxExecutions: &maxExecutions, + + Resource: &ResourceConfig{ + Retry: retryConfig{ + MaxAttempts: &maxAttempts, + WaitMin: &waitMin, + WaitMax: &waitMax, + }, + }, + } +} + +func (c retryConfig) getMaxAttempts() int { + if c.MaxAttempts == nil { + return 0 + } + return *c.MaxAttempts +} + +func (c retryConfig) getWaitMin() time.Duration { + if c.WaitMin == nil { + return 0 + } + return *c.WaitMin +} + +func (c retryConfig) getWaitMax() time.Duration { + if c.WaitMax == nil { + return 0 + } + return *c.WaitMax +} diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 3151c8136ffe..062a8c7c457a 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -3,3 +3,346 @@ // you may not use this file except in compliance with the Elastic License. package websocket + +import ( + "context" + "errors" + "fmt" + "reflect" + "strings" + "time" + + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + "github.com/elastic/beats/v7/libbeat/beat" + "github.com/elastic/beats/v7/libbeat/feature" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/go-concert/ctxtool" + "github.com/google/cel-go/cel" + "github.com/gorilla/websocket" + "google.golang.org/protobuf/types/known/structpb" +) + +type input struct { + time func() time.Time + config config +} + +const ( + inputName string = "websocket" + root string = "state" +) + +func Plugin(log *logp.Logger, store inputcursor.StateStore) v2.Plugin { + return v2.Plugin{ + Name: inputName, + Stability: feature.Experimental, + Deprecated: false, + Info: "Websocket Input", + Doc: "Collect data from websocket api endpoints", + Manager: NewInputManager(log, store), + } +} + +func (input) Name() string { return inputName } + +func (input) Test(src inputcursor.Source, _ v2.TestContext) error { + cfg := src.(*source).cfg + if !wantClient(cfg) { + return nil + } + // return test(cfg.Resource.URL.URL) + return nil +} + +// Run starts the input and blocks until it ends completes. It will return on +// context cancellation or type invalidity errors, any other error will be retried. +func (input) Run(env v2.Context, src inputcursor.Source, crsr inputcursor.Cursor, pub inputcursor.Publisher) error { + var cursor map[string]interface{} + if !crsr.IsNew() { // Allow the user to bootstrap the program if needed. + err := crsr.Unpack(&cursor) + if err != nil { + return err + } + } + //return nil + return input{}.run(env, src.(*source), cursor, pub) +} + +func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, pub inputcursor.Publisher) error { + cfg := src.cfg + log := env.Logger.With("input_url", cfg.Resource.URL) + + metrics := newInputMetrics(env.ID) + defer metrics.Close() + metrics.resource.Set(cfg.Resource.URL.String()) + + ctx := ctxtool.FromCanceller(env.Cancelation) + prg, err := newProgram(ctx, cfg.Program, root, log) + if err != nil { + return err + } + var state map[string]interface{} + if cfg.State == nil { + state = make(map[string]interface{}) + } else { + state = cfg.State + } + if cursor != nil { + state["cursor"] = cursor + } + + // websocket client + headers := formHeader(cfg) + url := cfg.Resource.URL.String() + c, _, err := websocket.DefaultDialer.Dial(url, headers) + if err != nil { + log.Errorw("failed to establish websocket connection", "error", err) + return err + } + defer c.Close() + + done := make(chan struct{}) + go func() { + defer close(done) + for { + _, message, err := c.ReadMessage() + if err != nil { + log.Errorw("failed to read websocket data", "error", err) + return + } + state["response"] = message + log.Debugw("received websocket message", logp.Namespace("websocket"), string(message)) + err = i.processAndPublishData(ctx, metrics, prg, state, cursor, pub, log) + if err != nil { + log.Errorw("failed to process and publish data", "error", err) + return + } + } + }() + <-done + + return nil +} + +// processAndPublishData processes the data in state, updates the cursor and publishes it to the publisher. +// the CEL program here only executes a single time, since the websocket connection is persistent and events are received and processed in real time. +func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics, prg cel.Program, + state map[string]interface{}, cursor map[string]interface{}, pub inputcursor.Publisher, log *logp.Logger) error { + goodCursor := cursor + start := i.now().In(time.UTC) + state, err := evalWith(ctx, prg, state, start) + log.Debugw("response state", logp.Namespace("websocket"), "state") + if err != nil { + switch { + case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded): + return err + } + log.Errorw("failed evaluation", "error", err) + } + metrics.celProcessingTime.Update(time.Since(start).Nanoseconds()) + log.Debugw("cel state", logp.Namespace("websocket"), "state") + + e, ok := state["events"] + if !ok { + log.Errorw("unexpected missing events from evaluation") + } + var events []interface{} + switch e := e.(type) { + case []interface{}: + if len(e) == 0 { + return nil + } + events = e + case map[string]interface{}: + if e == nil { + return nil + } + log.Debugw("single event object returned by evaluation", "event", e) + events = []interface{}{e} + default: + return fmt.Errorf("unexpected type returned for evaluation events: %T", e) + } + + metrics.eventsReceived.Add(uint64(len(events))) + + // Drop events from state. If we fail during the publication, + // we will reprocess these events. + delete(state, "events") + + // Get cursors if they exist. + var ( + cursors []interface{} + singleCursor bool + ) + if c, ok := state["cursor"]; ok { + cursors, ok = c.([]interface{}) + if ok { + if len(cursors) != len(events) { + log.Errorw("unexpected cursor list length", "cursors", len(cursors), "events", len(events)) + // But try to continue. + if len(cursors) < len(events) { + cursors = nil + } + } + } else { + cursors = []interface{}{c} + singleCursor = true + } + } + // Drop old cursor from state. This will be replaced with + // the current cursor object below; it is an array now. + delete(state, "cursor") + + //start = time.Now() + var hadPublicationError bool + for i, e := range events { + event, ok := e.(map[string]interface{}) + if !ok { + return fmt.Errorf("unexpected type returned for evaluation events: %T", e) + } + var pubCursor interface{} + if cursors != nil { + if singleCursor { + // Only set the cursor for publication at the last event + // when a single cursor object has been provided. + if i == len(events)-1 { + goodCursor = cursor + cursor, ok = cursors[0].(map[string]interface{}) + if !ok { + return fmt.Errorf("unexpected type returned for evaluation cursor element: %T", cursors[0]) + } + pubCursor = cursor + } + } else { + goodCursor = cursor + cursor, ok = cursors[i].(map[string]interface{}) + if !ok { + return fmt.Errorf("unexpected type returned for evaluation cursor element: %T", cursors[i]) + } + pubCursor = cursor + } + } + fmt.Printf("PUBLISH CURSOR: %v\n", pubCursor) + // Publish the event. + err = pub.Publish(beat.Event{ + Timestamp: time.Now(), + Fields: event, + }, pubCursor) + if err != nil { + hadPublicationError = true + log.Errorw("error publishing event", "error", err) + cursors = nil // We are lost, so retry with this event's cursor, + continue // but continue with the events that we have without + // advancing the cursor. This allows us to potentially publish the + // events we have now, with a fallback to the last guaranteed + // correctly published cursor. + } + metrics.eventsPublished.Add(1) + + err = ctx.Err() + if err != nil { + return err + } + } + + // Advance the cursor to the final state if there was no error during + // publications. This is needed to transition to the next set of events. + if !hadPublicationError { + goodCursor = cursor + } + + // Replace the last known good cursor. + state["cursor"] = goodCursor + + switch { + case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded): + log.Infof("input stopped because context was cancelled with: %v", err) + err = nil + } + return err +} + +func evalWith(ctx context.Context, prg cel.Program, state map[string]interface{}, now time.Time) (map[string]interface{}, error) { + out, _, err := prg.ContextEval(ctx, map[string]interface{}{ + // Replace global program "now" with current time. This is necessary + // as the lib.Time now global is static at program instantiation time + // which will persist over multiple evaluations. The lib.Time behaviour + // is correct for mito where CEL program instances live for only a + // single evaluation. Rather than incurring the cost of creating a new + // cel.Program for each evaluation, shadow lib.Time's now with a new + // value for each eval. We retain the lib.Time now global for + // compatibility between CEL programs developed in mito with programs + // run in the input. + "now": now, + root: state, + }) + if e := ctx.Err(); e != nil { + err = e + } + if err != nil { + state["events"] = errorMessage(fmt.Sprintf("failed eval: %v", err)) + clearWantMore(state) + return state, fmt.Errorf("failed eval: %w", err) + } + + v, err := out.ConvertToNative(reflect.TypeOf((*structpb.Struct)(nil))) + if err != nil { + state["events"] = errorMessage(fmt.Sprintf("failed proto conversion: %v", err)) + clearWantMore(state) + return state, fmt.Errorf("failed proto conversion: %w", err) + } + switch v := v.(type) { + case *structpb.Struct: + return v.AsMap(), nil + default: + // This should never happen. + errMsg := fmt.Sprintf("unexpected native conversion type: %T", v) + state["events"] = errorMessage(errMsg) + clearWantMore(state) + return state, errors.New(errMsg) + } +} + +// now is time.Now with a modifiable time source. +func (i input) now() time.Time { + if i.time == nil { + return time.Now() + } + return i.time() +} + +// clearWantMore sets the state to not request additional work in a periodic evaluation. +// It leaves state intact if there is no "want_more" element, and sets the element to false +// if there is. This is necessary instead of just doing delete(state, "want_more") as +// client CEL code may expect the want_more field to be present. +func clearWantMore(state map[string]interface{}) { + if _, ok := state["want_more"]; ok { + state["want_more"] = false + } +} + +func errorMessage(msg string) map[string]interface{} { + return map[string]interface{}{"error": map[string]interface{}{"message": msg}} +} + +func wantClient(cfg config) bool { + switch scheme, _, _ := strings.Cut(cfg.Resource.URL.Scheme, "+"); scheme { + case "ws", "wss": + return true + default: + return false + } +} + +func formHeader(cfg config) map[string][]string { + header := make(map[string][]string) + if cfg.Auth.ApiKey != nil { + header[cfg.Auth.ApiKey.Header] = []string{cfg.Auth.ApiKey.Value} + } else if cfg.Auth.BearerToken != "" { + header["Authorization"] = []string{"Bearer " + cfg.Auth.BearerToken} + } else if cfg.Auth.BasicToken != "" { + header["Authorization"] = []string{"Basic " + cfg.Auth.BasicToken} + } + return header +} diff --git a/x-pack/filebeat/input/websocket/input_manager.go b/x-pack/filebeat/input/websocket/input_manager.go new file mode 100644 index 000000000000..836d2d10e06c --- /dev/null +++ b/x-pack/filebeat/input/websocket/input_manager.go @@ -0,0 +1,90 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket + +import ( + "github.com/elastic/go-concert/unison" + + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + conf "github.com/elastic/elastic-agent-libs/config" + "github.com/elastic/elastic-agent-libs/logp" +) + +// inputManager wraps one stateless input manager +// and one cursor input manager. It will create one or the other +// based on the config that is passed. +type InputManager struct { + cursor *inputcursor.InputManager +} + +var _ v2.InputManager = InputManager{} + +func NewInputManager(log *logp.Logger, store inputcursor.StateStore) InputManager { + return InputManager{ + cursor: &inputcursor.InputManager{ + Logger: log, + StateStore: store, + Type: inputName, + Configure: cursorConfigure, + }, + } +} + +func cursorConfigure(cfg *conf.C) ([]inputcursor.Source, inputcursor.Input, error) { + src := &source{cfg: defaultConfig()} + if err := cfg.Unpack(&src.cfg); err != nil { + return nil, nil, err + } + + if src.cfg.Program == "" { + // set default program + src.cfg.Program = ` + bytes(state.response).decode_json().as(inner_body,{ + "events": { + "message": inner_body.encode_json(), + }, + "cursor": { + "last_event_ts": ( + inner_body.size() > 0 ? + ( + has(state.cursor) && has(state.cursor.last_event_ts) && + inner_body.ts < state.cursor.last_event_ts ? + state.cursor.last_event_ts + : + inner_body.ts + ) + : + ( + has(state.cursor) && has(state.cursor.last_event_ts) ? + state.cursor.last_event_ts + : + null + ) + ) + } + }) + ` + } + return []inputcursor.Source{src}, input{}, nil +} + +type source struct{ cfg config } + +func (s *source) Name() string { return s.cfg.Resource.URL.String() } + +// Init initializes both wrapped input managers. +func (m InputManager) Init(grp unison.Group, mode v2.Mode) error { + return m.cursor.Init(grp, mode) +} + +// Create creates a cursor input manager. +func (m InputManager) Create(cfg *conf.C) (v2.Input, error) { + config := defaultConfig() + if err := cfg.Unpack(&config); err != nil { + return nil, err + } + return m.cursor.Create(cfg) +} diff --git a/x-pack/filebeat/input/websocket/metrics.go b/x-pack/filebeat/input/websocket/metrics.go new file mode 100644 index 000000000000..61aa130c3c83 --- /dev/null +++ b/x-pack/filebeat/input/websocket/metrics.go @@ -0,0 +1,51 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket + +import ( + "github.com/elastic/beats/v7/libbeat/monitoring/inputmon" + "github.com/elastic/elastic-agent-libs/monitoring" + "github.com/elastic/elastic-agent-libs/monitoring/adapter" + + "github.com/rcrowley/go-metrics" +) + +// inputMetrics handles the input's metric reporting. +type inputMetrics struct { + unregister func() + resource *monitoring.String // URL-ish of input resource + errorsTotal *monitoring.Uint // number of errors encountered + decodeErrorsTotal *monitoring.Uint // number of decode errors encountered + //sourceLagTime *monitoring.Uint // time between the event's timestamp and the time it was received + //executions *monitoring.Uint // times the CEL program has been executed + receivedBytesTotal *monitoring.Uint // number of bytes received + eventsReceived *monitoring.Uint // number of events received + eventsPublished *monitoring.Uint // number of events published + celProcessingTime metrics.Sample // histogram of the elapsed successful cel program processing times in nanoseconds +} + +func newInputMetrics(id string) *inputMetrics { + reg, unreg := inputmon.NewInputRegistry(inputName, id, nil) + out := &inputMetrics{ + unregister: unreg, + resource: monitoring.NewString(reg, "resource"), + errorsTotal: monitoring.NewUint(reg, "errors_total"), + decodeErrorsTotal: monitoring.NewUint(reg, "decode_errors_total"), + //sourceLagTime: monitoring.NewUint(reg, "source_lag_time"), + //executions: monitoring.NewUint(reg, "cel_executions"), + receivedBytesTotal: monitoring.NewUint(reg, "received_bytes_total"), + eventsReceived: monitoring.NewUint(reg, "events_received_total"), + eventsPublished: monitoring.NewUint(reg, "events_published_total"), + celProcessingTime: metrics.NewUniformSample(1024), + } + _ = adapter.NewGoMetrics(reg, "cel_processing_time", adapter.Accept). + Register("histogram", metrics.NewHistogram(out.celProcessingTime)) + + return out +} + +func (m *inputMetrics) Close() { + m.unregister() +} From 9819004b48d5308202c585d06696a9247612ad1c Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Mon, 29 Jan 2024 18:22:10 +0530 Subject: [PATCH 03/25] updated go mod and NOTICE --- NOTICE.txt | 64 +++++++++++++++++++++++++++--------------------------- go.mod | 2 +- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/NOTICE.txt b/NOTICE.txt index c803ff33e8ea..1e75ed1bdbc8 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -18421,6 +18421,38 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- +Dependency : github.com/gorilla/websocket +Version: v1.4.2 +Licence type (autodetected): BSD-2-Clause +-------------------------------------------------------------------------------- + +Contents of probable licence file $GOMODCACHE/github.com/gorilla/websocket@v1.4.2/LICENSE: + +Copyright (c) 2013 The Gorilla WebSocket Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + -------------------------------------------------------------------------------- Dependency : github.com/h2non/filetype Version: v1.1.1 @@ -41504,38 +41536,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- -Dependency : github.com/gorilla/websocket -Version: v1.4.2 -Licence type (autodetected): BSD-2-Clause --------------------------------------------------------------------------------- - -Contents of probable licence file $GOMODCACHE/github.com/gorilla/websocket@v1.4.2/LICENSE: - -Copyright (c) 2013 The Gorilla WebSocket Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -------------------------------------------------------------------------------- Dependency : github.com/hashicorp/cronexpr Version: v1.1.0 diff --git a/go.mod b/go.mod index a7044889fac4..faf9dba39511 100644 --- a/go.mod +++ b/go.mod @@ -212,6 +212,7 @@ require ( github.com/googleapis/gax-go/v2 v2.12.0 github.com/gorilla/handlers v1.5.1 github.com/gorilla/mux v1.8.0 + github.com/gorilla/websocket v1.4.2 github.com/icholy/digest v0.1.22 github.com/lestrrat-go/jwx/v2 v2.0.11 github.com/otiai10/copy v1.12.0 @@ -297,7 +298,6 @@ require ( github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/googleapis/enterprise-certificate-proxy v0.2.4 // indirect github.com/googleapis/gnostic v0.5.5 // indirect - github.com/gorilla/websocket v1.4.2 // indirect github.com/hashicorp/cronexpr v1.1.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect From f2056a067fc317c091f63f617843b43c8c196ce6 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 30 Jan 2024 19:39:31 +0530 Subject: [PATCH 04/25] added regex support to cel engine, added all metrics params to relevant code blocks --- x-pack/filebeat/input/websocket/cel.go | 41 +++++++++++++-- x-pack/filebeat/input/websocket/config.go | 36 ++++++++----- x-pack/filebeat/input/websocket/input.go | 50 ++++++++++++++++--- .../filebeat/input/websocket/input_manager.go | 19 ------- x-pack/filebeat/input/websocket/metrics.go | 28 +++++------ 5 files changed, 113 insertions(+), 61 deletions(-) diff --git a/x-pack/filebeat/input/websocket/cel.go b/x-pack/filebeat/input/websocket/cel.go index fb8f980d1fd6..63ccc843fc45 100644 --- a/x-pack/filebeat/input/websocket/cel.go +++ b/x-pack/filebeat/input/websocket/cel.go @@ -9,11 +9,13 @@ import ( "context" "fmt" "io" + "regexp" "github.com/elastic/beats/v7/libbeat/version" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/useragent" "github.com/elastic/mito/lib" + "github.com/google/cel-go/cel" "github.com/google/cel-go/checker/decls" ) @@ -31,10 +33,25 @@ var ( } ) +func regexpsFromConfig(cfg config) (map[string]*regexp.Regexp, error) { + if len(cfg.Regexps) == 0 { + return nil, nil + } + patterns := make(map[string]*regexp.Regexp) + for name, expr := range cfg.Regexps { + var err error + patterns[name], err = regexp.Compile(expr) + if err != nil { + return nil, err + } + } + return patterns, nil +} + // The Filebeat user-agent is provided to the program as useragent. var userAgent = useragent.UserAgent("Filebeat", version.GetDefaultVersion(), version.Commit(), version.BuildTime().String()) -func newProgram(ctx context.Context, src, root string, log *logp.Logger) (cel.Program, error) { +func newProgram(ctx context.Context, src, root string, patterns map[string]*regexp.Regexp, log *logp.Logger) (cel.Program, *cel.Ast, error) { opts := []cel.EnvOption{ cel.Declarations(decls.NewVar(root, decls.Dyn)), cel.OptionalTypes(cel.OptionalTypesVersion(lib.OptionalTypesVersion)), @@ -44,8 +61,10 @@ func newProgram(ctx context.Context, src, root string, log *logp.Logger) (cel.Pr lib.Strings(), lib.Time(), lib.Try(), + lib.Debug(debug(log)), lib.File(mimetypes), lib.MIME(mimetypes), + lib.Regexp(patterns), lib.Globals(map[string]interface{}{ "useragent": userAgent, }), @@ -53,17 +72,29 @@ func newProgram(ctx context.Context, src, root string, log *logp.Logger) (cel.Pr env, err := cel.NewEnv(opts...) if err != nil { - return nil, fmt.Errorf("failed to create env: %w", err) + return nil, nil, fmt.Errorf("failed to create env: %w", err) } ast, iss := env.Compile(src) if iss.Err() != nil { - return nil, fmt.Errorf("failed compilation: %w", iss.Err()) + return nil, nil, fmt.Errorf("failed compilation: %w", iss.Err()) } prg, err := env.Program(ast) if err != nil { - return nil, fmt.Errorf("failed program instantiation: %w", err) + return nil, nil, fmt.Errorf("failed program instantiation: %w", err) + } + return prg, ast, nil +} + +func debug(log *logp.Logger) func(string, any) { + log = log.Named("websocket_debug") + return func(tag string, value any) { + level := "DEBUG" + if _, ok := value.(error); ok { + level = "ERROR" + } + + log.Debugw(level, "tag", tag, "value", value) } - return prg, nil } diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index e9dee50ad6e2..9d79bdec610d 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -5,25 +5,21 @@ package websocket import ( + "context" "errors" + "fmt" "net/url" + "regexp" "time" + "github.com/elastic/elastic-agent-libs/logp" + "gopkg.in/natefinch/lumberjack.v2" ) -const defaultMaxExecutions = 1000 - type config struct { // Program is the CEL program to be run for each polling. - Program string `config:"program"` - // MaxExecutions is the maximum number of times a single - // periodic CEL execution loop may repeat due to a true - // "want_more" field. If it is nil a sensible default is - // used. - MaxExecutions *int `config:"max_executions"` - // Regexps is the set of regular expression to be made - // available to the program. + Program string `config:"program"` Regexps map[string]string `config:"regexp"` // State is the initial state to be provided to the // program. If it has a cursor field, that field will @@ -69,6 +65,23 @@ func (u *urlConfig) Unpack(in string) error { return nil } +func (c config) Validate() error { + _, err := regexpsFromConfig(c) + if err != nil { + return fmt.Errorf("failed to check regular expressions: %w", err) + } + + var patterns map[string]*regexp.Regexp + if len(c.Regexps) != 0 { + patterns = map[string]*regexp.Regexp{".": nil} + } + _, _, err = newProgram(context.Background(), c.Program, root, patterns, logp.L().Named("input.cel")) + if err != nil { + return fmt.Errorf("failed to check program: %w", err) + } + return nil +} + type retryConfig struct { MaxAttempts *int `config:"max_attempts"` WaitMin *time.Duration `config:"wait_min"` @@ -88,14 +101,11 @@ func (c retryConfig) Validate() error { } func defaultConfig() config { - maxExecutions := defaultMaxExecutions maxAttempts := 5 waitMin := time.Second waitMax := time.Minute return config{ - MaxExecutions: &maxExecutions, - Resource: &ResourceConfig{ Retry: retryConfig{ MaxAttempts: &maxAttempts, diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 062a8c7c457a..dde53dfed42f 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -18,6 +18,8 @@ import ( "github.com/elastic/beats/v7/libbeat/feature" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/go-concert/ctxtool" + "github.com/elastic/mito/lib" + "github.com/google/cel-go/cel" "github.com/gorilla/websocket" "google.golang.org/protobuf/types/known/structpb" @@ -55,7 +57,7 @@ func (input) Test(src inputcursor.Source, _ v2.TestContext) error { return nil } -// Run starts the input and blocks until it ends completes. It will return on +// Run starts the input and blocks as long as websocket connections are alive. It will return on // context cancellation or type invalidity errors, any other error will be retried. func (input) Run(env v2.Context, src inputcursor.Source, crsr inputcursor.Cursor, pub inputcursor.Publisher) error { var cursor map[string]interface{} @@ -76,10 +78,19 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p metrics := newInputMetrics(env.ID) defer metrics.Close() metrics.resource.Set(cfg.Resource.URL.String()) + metrics.errorsTotal.Set(0) ctx := ctxtool.FromCanceller(env.Cancelation) - prg, err := newProgram(ctx, cfg.Program, root, log) + + patterns, err := regexpsFromConfig(cfg) if err != nil { + metrics.errorsTotal.Inc() + return err + } + + prg, ast, err := newProgram(ctx, cfg.Program, root, patterns, log) + if err != nil { + metrics.errorsTotal.Inc() return err } var state map[string]interface{} @@ -97,6 +108,7 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p url := cfg.Resource.URL.String() c, _, err := websocket.DefaultDialer.Dial(url, headers) if err != nil { + metrics.errorsTotal.Inc() log.Errorw("failed to establish websocket connection", "error", err) return err } @@ -108,35 +120,52 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p for { _, message, err := c.ReadMessage() if err != nil { + metrics.errorsTotal.Inc() + if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) { + log.Debugw("websocket connection closed", "error", err) + } log.Errorw("failed to read websocket data", "error", err) + return } + metrics.receivedBytesTotal.Add(uint64(len(message))) state["response"] = message log.Debugw("received websocket message", logp.Namespace("websocket"), string(message)) - err = i.processAndPublishData(ctx, metrics, prg, state, cursor, pub, log) + err = i.processAndPublishData(ctx, metrics, prg, ast, state, cursor, pub, log) if err != nil { + metrics.errorsTotal.Inc() log.Errorw("failed to process and publish data", "error", err) return } } }() - <-done - return nil + // blocks until done is closed or context is cancelled + for { + select { + case <-done: + return nil + case <-ctx.Done(): + return ctx.Err() + } + } } // processAndPublishData processes the data in state, updates the cursor and publishes it to the publisher. // the CEL program here only executes a single time, since the websocket connection is persistent and events are received and processed in real time. -func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics, prg cel.Program, +func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics, prg cel.Program, ast *cel.Ast, state map[string]interface{}, cursor map[string]interface{}, pub inputcursor.Publisher, log *logp.Logger) error { goodCursor := cursor start := i.now().In(time.UTC) - state, err := evalWith(ctx, prg, state, start) + state, err := evalWith(ctx, prg, ast, state, start) log.Debugw("response state", logp.Namespace("websocket"), "state") if err != nil { + metrics.celEvalErrors.Add(1) switch { case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded): return err + default: + metrics.errorsTotal.Inc() } log.Errorw("failed evaluation", "error", err) } @@ -231,6 +260,7 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics }, pubCursor) if err != nil { hadPublicationError = true + metrics.errorsTotal.Inc() log.Errorw("error publishing event", "error", err) cursors = nil // We are lost, so retry with this event's cursor, continue // but continue with the events that we have without @@ -257,13 +287,14 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics switch { case errors.Is(err, context.Canceled), errors.Is(err, context.DeadlineExceeded): + metrics.errorsTotal.Inc() log.Infof("input stopped because context was cancelled with: %v", err) err = nil } return err } -func evalWith(ctx context.Context, prg cel.Program, state map[string]interface{}, now time.Time) (map[string]interface{}, error) { +func evalWith(ctx context.Context, prg cel.Program, ast *cel.Ast, state map[string]interface{}, now time.Time) (map[string]interface{}, error) { out, _, err := prg.ContextEval(ctx, map[string]interface{}{ // Replace global program "now" with current time. This is necessary // as the lib.Time now global is static at program instantiation time @@ -277,6 +308,9 @@ func evalWith(ctx context.Context, prg cel.Program, state map[string]interface{} "now": now, root: state, }) + if err != nil { + err = lib.DecoratedError{AST: ast, Err: err} + } if e := ctx.Err(); e != nil { err = e } diff --git a/x-pack/filebeat/input/websocket/input_manager.go b/x-pack/filebeat/input/websocket/input_manager.go index 836d2d10e06c..10ac9946faf9 100644 --- a/x-pack/filebeat/input/websocket/input_manager.go +++ b/x-pack/filebeat/input/websocket/input_manager.go @@ -45,25 +45,6 @@ func cursorConfigure(cfg *conf.C) ([]inputcursor.Source, inputcursor.Input, erro bytes(state.response).decode_json().as(inner_body,{ "events": { "message": inner_body.encode_json(), - }, - "cursor": { - "last_event_ts": ( - inner_body.size() > 0 ? - ( - has(state.cursor) && has(state.cursor.last_event_ts) && - inner_body.ts < state.cursor.last_event_ts ? - state.cursor.last_event_ts - : - inner_body.ts - ) - : - ( - has(state.cursor) && has(state.cursor.last_event_ts) ? - state.cursor.last_event_ts - : - null - ) - ) } }) ` diff --git a/x-pack/filebeat/input/websocket/metrics.go b/x-pack/filebeat/input/websocket/metrics.go index 61aa130c3c83..166c8218046b 100644 --- a/x-pack/filebeat/input/websocket/metrics.go +++ b/x-pack/filebeat/input/websocket/metrics.go @@ -14,27 +14,23 @@ import ( // inputMetrics handles the input's metric reporting. type inputMetrics struct { - unregister func() - resource *monitoring.String // URL-ish of input resource - errorsTotal *monitoring.Uint // number of errors encountered - decodeErrorsTotal *monitoring.Uint // number of decode errors encountered - //sourceLagTime *monitoring.Uint // time between the event's timestamp and the time it was received - //executions *monitoring.Uint // times the CEL program has been executed - receivedBytesTotal *monitoring.Uint // number of bytes received - eventsReceived *monitoring.Uint // number of events received - eventsPublished *monitoring.Uint // number of events published - celProcessingTime metrics.Sample // histogram of the elapsed successful cel program processing times in nanoseconds + unregister func() + resource *monitoring.String // URL-ish of input resource + celEvalErrors *monitoring.Uint // number of errors encountered during cel program evaluation + errorsTotal *monitoring.Uint // number of errors encountered + receivedBytesTotal *monitoring.Uint // number of bytes received + eventsReceived *monitoring.Uint // number of events received + eventsPublished *monitoring.Uint // number of events published + celProcessingTime metrics.Sample // histogram of the elapsed successful cel program processing times in nanoseconds } func newInputMetrics(id string) *inputMetrics { reg, unreg := inputmon.NewInputRegistry(inputName, id, nil) out := &inputMetrics{ - unregister: unreg, - resource: monitoring.NewString(reg, "resource"), - errorsTotal: monitoring.NewUint(reg, "errors_total"), - decodeErrorsTotal: monitoring.NewUint(reg, "decode_errors_total"), - //sourceLagTime: monitoring.NewUint(reg, "source_lag_time"), - //executions: monitoring.NewUint(reg, "cel_executions"), + unregister: unreg, + resource: monitoring.NewString(reg, "resource"), + celEvalErrors: monitoring.NewUint(reg, "cel_eval_errors"), + errorsTotal: monitoring.NewUint(reg, "errors_total"), receivedBytesTotal: monitoring.NewUint(reg, "received_bytes_total"), eventsReceived: monitoring.NewUint(reg, "events_received_total"), eventsPublished: monitoring.NewUint(reg, "events_published_total"), From de1b707e9924f55862c59948b3516f3549e40d19 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 30 Jan 2024 19:43:28 +0530 Subject: [PATCH 05/25] removed unused config from input struct --- x-pack/filebeat/input/websocket/input.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index dde53dfed42f..90ee9230a6ee 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -26,8 +26,7 @@ import ( ) type input struct { - time func() time.Time - config config + time func() time.Time } const ( @@ -252,7 +251,6 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics pubCursor = cursor } } - fmt.Printf("PUBLISH CURSOR: %v\n", pubCursor) // Publish the event. err = pub.Publish(beat.Event{ Timestamp: time.Now(), From 1f6da1ac39991aab747362c2b595d37ee0691182 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 30 Jan 2024 20:18:51 +0530 Subject: [PATCH 06/25] added redactor --- x-pack/filebeat/input/websocket/config.go | 26 +++-- x-pack/filebeat/input/websocket/input.go | 5 +- x-pack/filebeat/input/websocket/redact.go | 113 ++++++++++++++++++++++ 3 files changed, 136 insertions(+), 8 deletions(-) create mode 100644 x-pack/filebeat/input/websocket/redact.go diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index 9d79bdec610d..f5e6ad848740 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -13,13 +13,13 @@ import ( "time" "github.com/elastic/elastic-agent-libs/logp" - - "gopkg.in/natefinch/lumberjack.v2" ) type config struct { // Program is the CEL program to be run for each polling. - Program string `config:"program"` + Program string `config:"program"` + // Regexps is the set of regular expression to be made + // available to the program. Regexps map[string]string `config:"regexp"` // State is the initial state to be provided to the // program. If it has a cursor field, that field will @@ -30,12 +30,22 @@ type config struct { Auth authConfig `config:"auth"` // Resource Resource *ResourceConfig `config:"resource" validate:"required"` + // Redact is the debug log state redaction configuration. + Redact *redact `config:"redact"` } type ResourceConfig struct { - URL *urlConfig `config:"url" validate:"required"` - Retry retryConfig `config:"retry"` - Tracer *lumberjack.Logger `config:"tracer"` + URL *urlConfig `config:"url" validate:"required"` + Retry retryConfig `config:"retry"` +} + +type redact struct { + // Fields indicates which fields to apply redaction to prior + // to logging. + Fields []string `config:"fields"` + // Delete indicates that fields should be completely deleted + // before logging rather than redaction with a "*". + Delete bool `config:"delete"` } type authConfig struct { @@ -66,6 +76,10 @@ func (u *urlConfig) Unpack(in string) error { } func (c config) Validate() error { + if c.Redact == nil { + logp.L().Named("input.websocket").Warn("missing recommended 'redact' configuration: " + + "see documentation for details: https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-websocket.html#_redact") + } _, err := regexpsFromConfig(c) if err != nil { return fmt.Errorf("failed to check regular expressions: %w", err) diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 90ee9230a6ee..cad4f7f78f23 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -27,6 +27,7 @@ import ( type input struct { time func() time.Time + cfg config } const ( @@ -72,6 +73,7 @@ func (input) Run(env v2.Context, src inputcursor.Source, crsr inputcursor.Cursor func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, pub inputcursor.Publisher) error { cfg := src.cfg + i.cfg = cfg log := env.Logger.With("input_url", cfg.Resource.URL) metrics := newInputMetrics(env.ID) @@ -157,7 +159,7 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics goodCursor := cursor start := i.now().In(time.UTC) state, err := evalWith(ctx, prg, ast, state, start) - log.Debugw("response state", logp.Namespace("websocket"), "state") + log.Debugw("cel engine eval state", logp.Namespace("websocket"), "state", redactor{state: state, cfg: i.cfg.Redact}) if err != nil { metrics.celEvalErrors.Add(1) switch { @@ -169,7 +171,6 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics log.Errorw("failed evaluation", "error", err) } metrics.celProcessingTime.Update(time.Since(start).Nanoseconds()) - log.Debugw("cel state", logp.Namespace("websocket"), "state") e, ok := state["events"] if !ok { diff --git a/x-pack/filebeat/input/websocket/redact.go b/x-pack/filebeat/input/websocket/redact.go new file mode 100644 index 000000000000..1450527340a0 --- /dev/null +++ b/x-pack/filebeat/input/websocket/redact.go @@ -0,0 +1,113 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket + +import ( + "strings" + + "github.com/elastic/elastic-agent-libs/mapstr" +) + +// redactor implements lazy field redaction of sets of a mapstr.M. +type redactor struct { + state mapstr.M + cfg *redact +} + +// String renders the JSON corresponding to r.state after applying redaction +// operations. +func (r redactor) String() string { + if r.cfg == nil || len(r.cfg.Fields) == 0 { + return r.state.String() + } + c := make(mapstr.M, len(r.state)) + cloneMap(c, r.state) + for _, mask := range r.cfg.Fields { + if r.cfg.Delete { + walkMap(c, mask, func(parent mapstr.M, key string) { + delete(parent, key) + }) + continue + } + walkMap(c, mask, func(parent mapstr.M, key string) { + parent[key] = "*" + }) + } + return c.String() +} + +// cloneMap is an enhanced version of mapstr.M.Clone that handles cloning arrays +// within objects. Nested arrays are not handled. +func cloneMap(dst, src mapstr.M) { + for k, v := range src { + switch v := v.(type) { + case mapstr.M: + d := make(mapstr.M, len(v)) + dst[k] = d + cloneMap(d, v) + case map[string]interface{}: + d := make(map[string]interface{}, len(v)) + dst[k] = d + cloneMap(d, v) + case []mapstr.M: + a := make([]mapstr.M, 0, len(v)) + for _, m := range v { + d := make(mapstr.M, len(m)) + cloneMap(d, m) + a = append(a, d) + } + dst[k] = a + case []map[string]interface{}: + a := make([]map[string]interface{}, 0, len(v)) + for _, m := range v { + d := make(map[string]interface{}, len(m)) + cloneMap(d, m) + a = append(a, d) + } + dst[k] = a + default: + dst[k] = v + } + } +} + +// walkMap walks to all ends of the provided path in m and applies fn to the +// final element of each walk. Nested arrays are not handled. +// +//nolint:typecheck // We can ignore typecheck here since all variables are being used. +func walkMap(m mapstr.M, path string, fn func(parent mapstr.M, key string)) { + key, rest, more := strings.Cut(path, ".") + v, ok := m[key] + if !ok { + return + } + if !more { + fn(m, key) + return + } + switch v := v.(type) { + case mapstr.M: + walkMap(v, rest, fn) + case map[string]interface{}: + walkMap(v, rest, fn) + case []mapstr.M: + for _, m := range v { + walkMap(m, rest, fn) + } + case []map[string]interface{}: + for _, m := range v { + walkMap(m, rest, fn) + } + case []interface{}: + for _, v := range v { + switch m := v.(type) { + case mapstr.M: + walkMap(m, rest, fn) + case map[string]interface{}: + walkMap(m, rest, fn) + } + } + } +} From 85ad3e6255fa3e9d7bd43e19f4920def30b7e1f1 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 30 Jan 2024 20:20:29 +0530 Subject: [PATCH 07/25] removed cel references in logs --- x-pack/filebeat/input/websocket/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index f5e6ad848740..6f3367e1f6cb 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -89,7 +89,7 @@ func (c config) Validate() error { if len(c.Regexps) != 0 { patterns = map[string]*regexp.Regexp{".": nil} } - _, _, err = newProgram(context.Background(), c.Program, root, patterns, logp.L().Named("input.cel")) + _, _, err = newProgram(context.Background(), c.Program, root, patterns, logp.L().Named("input.websocket")) if err != nil { return fmt.Errorf("failed to check program: %w", err) } From 8e7ad350f8e30aaa38bb2962db0c77171b2bf138 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Thu, 1 Feb 2024 16:13:27 +0530 Subject: [PATCH 08/25] addressed draft PR suggestions and added more metrics --- x-pack/filebeat/input/websocket/input.go | 19 ++++++---- x-pack/filebeat/input/websocket/metrics.go | 40 +++++++++++++--------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index cad4f7f78f23..6ac4a5d350d8 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -51,7 +51,7 @@ func (input) Name() string { return inputName } func (input) Test(src inputcursor.Source, _ v2.TestContext) error { cfg := src.(*source).cfg if !wantClient(cfg) { - return nil + return fmt.Errorf("unsupported scheme: %s", cfg.Resource.URL.Scheme) } // return test(cfg.Resource.URL.URL) return nil @@ -67,7 +67,6 @@ func (input) Run(env v2.Context, src inputcursor.Source, crsr inputcursor.Cursor return err } } - //return nil return input{}.run(env, src.(*source), cursor, pub) } @@ -193,6 +192,8 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics return fmt.Errorf("unexpected type returned for evaluation events: %T", e) } + // We have a non-empty batch of events to process. + metrics.batchesReceived.Add(1) metrics.eventsReceived.Add(uint64(len(events))) // Drop events from state. If we fail during the publication, @@ -223,7 +224,7 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics // the current cursor object below; it is an array now. delete(state, "cursor") - //start = time.Now() + start = time.Now() var hadPublicationError bool for i, e := range events { event, ok := e.(map[string]interface{}) @@ -267,6 +268,9 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics // events we have now, with a fallback to the last guaranteed // correctly published cursor. } + if i == 0 { + metrics.batchesPublished.Add(1) + } metrics.eventsPublished.Add(1) err = ctx.Err() @@ -274,6 +278,8 @@ func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics return err } } + // calculate batch processing time + metrics.batchProcessingTime.Update(time.Since(start).Nanoseconds()) // Advance the cursor to the final state if there was no error during // publications. This is needed to transition to the next set of events. @@ -370,11 +376,12 @@ func wantClient(cfg config) bool { func formHeader(cfg config) map[string][]string { header := make(map[string][]string) - if cfg.Auth.ApiKey != nil { + switch { + case cfg.Auth.ApiKey != nil: header[cfg.Auth.ApiKey.Header] = []string{cfg.Auth.ApiKey.Value} - } else if cfg.Auth.BearerToken != "" { + case cfg.Auth.BearerToken != "": header["Authorization"] = []string{"Bearer " + cfg.Auth.BearerToken} - } else if cfg.Auth.BasicToken != "" { + case cfg.Auth.BasicToken != "": header["Authorization"] = []string{"Basic " + cfg.Auth.BasicToken} } return header diff --git a/x-pack/filebeat/input/websocket/metrics.go b/x-pack/filebeat/input/websocket/metrics.go index 166c8218046b..8b84ddd5d0df 100644 --- a/x-pack/filebeat/input/websocket/metrics.go +++ b/x-pack/filebeat/input/websocket/metrics.go @@ -14,30 +14,38 @@ import ( // inputMetrics handles the input's metric reporting. type inputMetrics struct { - unregister func() - resource *monitoring.String // URL-ish of input resource - celEvalErrors *monitoring.Uint // number of errors encountered during cel program evaluation - errorsTotal *monitoring.Uint // number of errors encountered - receivedBytesTotal *monitoring.Uint // number of bytes received - eventsReceived *monitoring.Uint // number of events received - eventsPublished *monitoring.Uint // number of events published - celProcessingTime metrics.Sample // histogram of the elapsed successful cel program processing times in nanoseconds + unregister func() + resource *monitoring.String // URL-ish of input resource + celEvalErrors *monitoring.Uint // number of errors encountered during cel program evaluation + batchesReceived *monitoring.Uint // number of event arrays received + errorsTotal *monitoring.Uint // number of errors encountered + receivedBytesTotal *monitoring.Uint // number of bytes received + eventsReceived *monitoring.Uint // number of events received + batchesPublished *monitoring.Uint // number of event arrays published + eventsPublished *monitoring.Uint // number of events published + celProcessingTime metrics.Sample // histogram of the elapsed successful cel program processing times in nanoseconds + batchProcessingTime metrics.Sample // histogram of the elapsed successful batch processing times in nanoseconds (time of receipt to time of ACK for non-empty batches). } func newInputMetrics(id string) *inputMetrics { reg, unreg := inputmon.NewInputRegistry(inputName, id, nil) out := &inputMetrics{ - unregister: unreg, - resource: monitoring.NewString(reg, "resource"), - celEvalErrors: monitoring.NewUint(reg, "cel_eval_errors"), - errorsTotal: monitoring.NewUint(reg, "errors_total"), - receivedBytesTotal: monitoring.NewUint(reg, "received_bytes_total"), - eventsReceived: monitoring.NewUint(reg, "events_received_total"), - eventsPublished: monitoring.NewUint(reg, "events_published_total"), - celProcessingTime: metrics.NewUniformSample(1024), + unregister: unreg, + resource: monitoring.NewString(reg, "resource"), + celEvalErrors: monitoring.NewUint(reg, "cel_eval_errors"), + batchesReceived: monitoring.NewUint(reg, "batches_received"), + errorsTotal: monitoring.NewUint(reg, "errors_total"), + receivedBytesTotal: monitoring.NewUint(reg, "received_bytes_total"), + eventsReceived: monitoring.NewUint(reg, "events_received_total"), + batchesPublished: monitoring.NewUint(reg, "batches_published_total"), + eventsPublished: monitoring.NewUint(reg, "events_published_total"), + celProcessingTime: metrics.NewUniformSample(1024), + batchProcessingTime: metrics.NewUniformSample(1024), } _ = adapter.NewGoMetrics(reg, "cel_processing_time", adapter.Accept). Register("histogram", metrics.NewHistogram(out.celProcessingTime)) + _ = adapter.NewGoMetrics(reg, "batch_processing_time", adapter.Accept). + Register("histogram", metrics.NewHistogram(out.batchProcessingTime)) return out } From 4768d4aa38bbf4039b5a385ee6dd9879f271dfbc Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 00:53:06 +0530 Subject: [PATCH 09/25] added tests --- x-pack/filebeat/input/websocket/input.go | 10 +- x-pack/filebeat/input/websocket/input_test.go | 391 ++++++++++++++++++ .../filebeat/input/websocket/redact_test.go | 148 +++++++ 3 files changed, 546 insertions(+), 3 deletions(-) create mode 100644 x-pack/filebeat/input/websocket/input_test.go create mode 100644 x-pack/filebeat/input/websocket/redact_test.go diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 6ac4a5d350d8..28f7f7fc8145 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -53,7 +53,6 @@ func (input) Test(src inputcursor.Source, _ v2.TestContext) error { if !wantClient(cfg) { return fmt.Errorf("unsupported scheme: %s", cfg.Resource.URL.Scheme) } - // return test(cfg.Resource.URL.URL) return nil } @@ -115,6 +114,8 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p defer c.Close() done := make(chan struct{}) + errChan := make(chan error) + go func() { defer close(done) for { @@ -125,7 +126,7 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p log.Debugw("websocket connection closed", "error", err) } log.Errorw("failed to read websocket data", "error", err) - + errChan <- err return } metrics.receivedBytesTotal.Add(uint64(len(message))) @@ -135,14 +136,17 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p if err != nil { metrics.errorsTotal.Inc() log.Errorw("failed to process and publish data", "error", err) + errChan <- err return } } }() - // blocks until done is closed or context is cancelled + // blocks until done is closed , context is cancelled or an error is received for { select { + case err := <-errChan: + return err case <-done: return nil case <-ctx.Done(): diff --git a/x-pack/filebeat/input/websocket/input_test.go b/x-pack/filebeat/input/websocket/input_test.go new file mode 100644 index 000000000000..2b264fc46d02 --- /dev/null +++ b/x-pack/filebeat/input/websocket/input_test.go @@ -0,0 +1,391 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "reflect" + "sync" + "testing" + "time" + + v2 "github.com/elastic/beats/v7/filebeat/input/v2" + inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" + "github.com/elastic/beats/v7/libbeat/beat" + conf "github.com/elastic/elastic-agent-libs/config" + "github.com/elastic/elastic-agent-libs/logp" + "github.com/elastic/elastic-agent-libs/mapstr" + "github.com/google/go-cmp/cmp" + + "github.com/gorilla/websocket" +) + +// WebSocketHandler is a type for handling WebSocket messages. +type WebSocketHandler func(*testing.T, *websocket.Conn, []string) + +var inputTests = []struct { + name string + server func(*testing.T, WebSocketHandler, map[string]interface{}, []string) + handler WebSocketHandler + config map[string]interface{} + response []string + time func() time.Time + persistCursor map[string]interface{} + want []map[string]interface{} + wantCursor []map[string]interface{} + wantErr error +}{ + { + name: "single_event", + server: newWebSocketTestServer(httptest.NewServer), + handler: defaultHandler, + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + }, + response: []string{` + { + "pps": { + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071" + }, + "ts": "2017-08-17T14:54:12.949180-07:00", + "data": "2017-08-17T14:54:12.949180-07:00 example sendmail[30641]:v7HLqYbx029423: to=/dev/null, ctladdr= (8/0),delay=00:00:00, xdelay=00:00:00, mailer=*file*, tls_verify=NONE, pri=35342,dsn=2.0.0, stat=Sent", + "sm": { + "tls": { + "verify": "NONE" + }, + "stat": "Sent", + "qid": "v7HLqYbx029423", + "dsn": "2.0.0", + "mailer": "*file*", + "to": [ + "/dev/null" + ], + "ctladdr": " (8/0)", + "delay": "00:00:00", + "xdelay": "00:00:00", + "pri": 35342 + }, + "id": "ZeYGULpZmL5N0151HN1OyA" + }`}, + want: []map[string]interface{}{ + { + "pps": map[string]interface{}{ + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071", + }, + "ts": "2017-08-17T14:54:12.949180-07:00", + "data": "2017-08-17T14:54:12.949180-07:00 example sendmail[30641]:v7HLqYbx029423: to=/dev/null, ctladdr= (8/0),delay=00:00:00, xdelay=00:00:00, mailer=*file*, tls_verify=NONE, pri=35342,dsn=2.0.0, stat=Sent", + "sm": map[string]interface{}{ + "tls": map[string]interface{}{ + "verify": "NONE", + }, + "stat": "Sent", + "qid": "v7HLqYbx029423", + "dsn": "2.0.0", + "mailer": "*file*", + "to": []interface{}{ + "/dev/null", + }, + "ctladdr": " (8/0)", + "delay": "00:00:00", + "xdelay": "00:00:00", + "pri": float64(35342), + }, + "id": "ZeYGULpZmL5N0151HN1OyA", + }, + }, + }, + { + name: "multiple_events", + server: newWebSocketTestServer(httptest.NewServer), + handler: defaultHandler, + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + }, + response: []string{` + { + "pps": { + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071" + }, + "ts": "2017-08-17T14:54:12.949180-07:00", + "data": "2017-08-17T14:54:12.949180-07:00 example sendmail[30641]:v7HLqYbx029423: to=/dev/null, ctladdr= (8/0),delay=00:00:00, xdelay=00:00:00, mailer=*file*, tls_verify=NONE, pri=35342,dsn=2.0.0, stat=Sent", + "sm": { + "tls": { + "verify": "NONE" + }, + "stat": "Sent", + "qid": "v7HLqYbx029423", + "dsn": "2.0.0", + "mailer": "*file*", + "to": [ + "/dev/null" + ], + "ctladdr": " (8/0)", + "delay": "00:00:00", + "xdelay": "00:00:00", + "pri": 35342 + }, + "id": "ZeYGULpZmL5N0151HN1OyA" + }`, + `{ + "pps": { + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071" + }, + "ts": "2017-08-17T14:54:12.949180-07:00", + "data": "2017-08-17T14:54:12.949180-07:00 example sendmail[30641]:v7HLqYbx029423: to=/dev/null, ctladdr= (8/0),delay=00:00:00, xdelay=00:00:00, mailer=*file*, tls_verify=NONE, pri=35342,dsn=2.0.0, stat=Sent", + "sm": { + "tls": { + "verify": "NONE" + }, + "stat": "Sent", + "qid": "v7HLqYbx029423", + "dsn": "2.0.0", + "mailer": "*file*", + "to": [ + "/dev/null" + ], + "ctladdr": " (8/0)", + "delay": "00:00:00", + "xdelay": "00:00:00", + "pri": 35342 + }, + "id": "ZeYGULpZmL5N0151HN1OyX" + }`}, + want: []map[string]interface{}{ + { + "pps": map[string]interface{}{ + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071", + }, + "ts": "2017-08-17T14:54:12.949180-07:00", + "data": "2017-08-17T14:54:12.949180-07:00 example sendmail[30641]:v7HLqYbx029423: to=/dev/null, ctladdr= (8/0),delay=00:00:00, xdelay=00:00:00, mailer=*file*, tls_verify=NONE, pri=35342,dsn=2.0.0, stat=Sent", + "sm": map[string]interface{}{ + "tls": map[string]interface{}{ + "verify": "NONE", + }, + "stat": "Sent", + "qid": "v7HLqYbx029423", + "dsn": "2.0.0", + "mailer": "*file*", + "to": []interface{}{ + "/dev/null", + }, + "ctladdr": " (8/0)", + "delay": "00:00:00", + "xdelay": "00:00:00", + "pri": float64(35342), + }, + "id": "ZeYGULpZmL5N0151HN1OyA", + }, + { + "pps": map[string]interface{}{ + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071", + }, + "ts": "2017-08-17T14:54:12.949180-07:00", + "data": "2017-08-17T14:54:12.949180-07:00 example sendmail[30641]:v7HLqYbx029423: to=/dev/null, ctladdr= (8/0),delay=00:00:00, xdelay=00:00:00, mailer=*file*, tls_verify=NONE, pri=35342,dsn=2.0.0, stat=Sent", + "sm": map[string]interface{}{ + "tls": map[string]interface{}{ + "verify": "NONE", + }, + "stat": "Sent", + "qid": "v7HLqYbx029423", + "dsn": "2.0.0", + "mailer": "*file*", + "to": []interface{}{ + "/dev/null", + }, + "ctladdr": " (8/0)", + "delay": "00:00:00", + "xdelay": "00:00:00", + "pri": float64(35342), + }, + "id": "ZeYGULpZmL5N0151HN1OyX", + }, + }, + }, + { + name: "bad_cursor", + server: newWebSocketTestServer(httptest.NewServer), + handler: defaultHandler, + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + "cursor":["What's next?"], + })`, + }, + response: []string{` + { + "pps": { + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071" + }, + }`}, + wantErr: fmt.Errorf("unexpected type returned for evaluation cursor element: %T", "What's next?"), + }, +} + +func TestInput(t *testing.T) { + // tests will ignore context cancelled errors, since they are expected + ctxCancelledError := fmt.Errorf("context canceled") + logp.TestingSetup() + for _, test := range inputTests { + t.Run(test.name, func(t *testing.T) { + if test.server != nil { + test.server(t, test.handler, test.config, test.response) + } + + cfg := conf.MustNewConfigFrom(test.config) + + conf := defaultConfig() + conf.Redact = &redact{} // Make sure we pass the redact requirement. + err := cfg.Unpack(&conf) + if err != nil { + t.Fatalf("unexpected error unpacking config: %v", err) + } + + name := input{}.Name() + if name != "websocket" { + t.Errorf(`unexpected input name: got:%q want:"websocket"`, name) + } + src := &source{conf} + err = input{}.Test(src, v2.TestContext{}) + if err != nil { + t.Fatalf("unexpected error running test: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 10000*time.Second) + defer cancel() + + v2Ctx := v2.Context{ + Logger: logp.NewLogger("websocket_test"), + ID: "test_id:" + test.name, + Cancelation: ctx, + } + var client publisher + client.done = func() { + if len(client.published) >= len(test.want) { + cancel() + } + } + + err = input{test.time, conf}.run(v2Ctx, src, test.persistCursor, &client) + if (fmt.Sprint(err) != fmt.Sprint(ctxCancelledError)) && (fmt.Sprint(err) != fmt.Sprint(test.wantErr)) { + t.Errorf("unexpected error from running input: got:%v want:%v", err, test.wantErr) + } + if test.wantErr != nil { + return + } + + if len(client.published) < len(test.want) { + t.Errorf("unexpected number of published events: got:%d want at least:%d", len(client.published), len(test.want)) + test.want = test.want[:len(client.published)] + } + client.published = client.published[:len(test.want)] + for i, got := range client.published { + if !reflect.DeepEqual(got.Fields, mapstr.M(test.want[i])) { + t.Errorf("unexpected result for event %d: got:- want:+\n%s", i, cmp.Diff(got.Fields, mapstr.M(test.want[i]))) + } + } + + switch { + case len(test.wantCursor) == 0 && len(client.cursors) == 0: + return + case len(test.wantCursor) == 0: + t.Errorf("unexpected cursors: %v", client.cursors) + return + } + if len(client.cursors) < len(test.wantCursor) { + t.Errorf("unexpected number of cursors events: got:%d want at least:%d", len(client.cursors), len(test.wantCursor)) + test.wantCursor = test.wantCursor[:len(client.published)] + } + client.published = client.published[:len(test.want)] + for i, got := range client.cursors { + if !reflect.DeepEqual(mapstr.M(got), mapstr.M(test.wantCursor[i])) { + t.Errorf("unexpected cursor for event %d: got:- want:+\n%s", i, cmp.Diff(got, test.wantCursor[i])) + } + } + }) + } +} + +var _ inputcursor.Publisher = (*publisher)(nil) + +type publisher struct { + done func() + mu sync.Mutex + published []beat.Event + cursors []map[string]interface{} +} + +func (p *publisher) Publish(e beat.Event, cursor interface{}) error { + p.mu.Lock() + p.published = append(p.published, e) + if cursor != nil { + c, ok := cursor.(map[string]interface{}) + if !ok { + return fmt.Errorf("invalid cursor type for testing: %T", cursor) + } + p.cursors = append(p.cursors, c) + } + p.done() + p.mu.Unlock() + return nil +} + +func newV2Context() (v2.Context, func()) { + ctx, cancel := context.WithCancel(context.Background()) + return v2.Context{ + Logger: logp.NewLogger("websocket_test"), + ID: "test_id:", + Cancelation: ctx, + }, cancel +} + +func newWebSocketTestServer(serve func(http.Handler) *httptest.Server) func(*testing.T, WebSocketHandler, map[string]interface{}, []string) { + return func(t *testing.T, handler WebSocketHandler, config map[string]interface{}, response []string) { + server := serve(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + upgrader := websocket.Upgrader{ + CheckOrigin: func(r *http.Request) bool { + return true + }, + } + + conn, err := upgrader.Upgrade(w, r, nil) + if err != nil { + t.Fatalf("error upgrading connection to WebSocket: %v", err) + return + } + + handler(t, conn, response) + })) + + config["resource.url"] = "ws" + server.URL[4:] + t.Cleanup(server.Close) + } +} + +// defaultHandler is a default handler for WebSocket connections. +func defaultHandler(t *testing.T, conn *websocket.Conn, response []string) { + for _, r := range response { + err := conn.WriteMessage(websocket.TextMessage, []byte(r)) + if err != nil { + t.Fatalf("error writing message to WebSocket: %v", err) + } + } +} diff --git a/x-pack/filebeat/input/websocket/redact_test.go b/x-pack/filebeat/input/websocket/redact_test.go new file mode 100644 index 000000000000..89798c246838 --- /dev/null +++ b/x-pack/filebeat/input/websocket/redact_test.go @@ -0,0 +1,148 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket + +import ( + "fmt" + "testing" + + "github.com/elastic/elastic-agent-libs/mapstr" + + "github.com/google/go-cmp/cmp" +) + +var redactorTests = []struct { + name string + state mapstr.M + cfg *redact + + wantOrig string + wantRedact string +}{ + { + name: "nil_redact", + state: mapstr.M{ + "auth": mapstr.M{ + "user": "fred", + "pass": "top_secret", + }, + "other": "data", + }, + cfg: nil, + wantOrig: `{"auth":{"pass":"top_secret","user":"fred"},"other":"data"}`, + wantRedact: `{"auth":{"pass":"top_secret","user":"fred"},"other":"data"}`, + }, + { + name: "auth_no_delete", + state: mapstr.M{ + "auth": mapstr.M{ + "user": "fred", + "pass": "top_secret", + }, + "other": "data", + }, + cfg: &redact{ + Fields: []string{"auth"}, + Delete: false, + }, + wantOrig: `{"auth":{"pass":"top_secret","user":"fred"},"other":"data"}`, + wantRedact: `{"auth":"*","other":"data"}`, + }, + { + name: "auth_delete", + state: mapstr.M{ + "auth": mapstr.M{ + "user": "fred", + "pass": "top_secret", + }, + "other": "data", + }, + cfg: &redact{ + Fields: []string{"auth"}, + Delete: true, + }, + wantOrig: `{"auth":{"pass":"top_secret","user":"fred"},"other":"data"}`, + wantRedact: `{"other":"data"}`, + }, + { + name: "pass_no_delete", + state: mapstr.M{ + "auth": mapstr.M{ + "user": "fred", + "pass": "top_secret", + }, + "other": "data", + }, + cfg: &redact{ + Fields: []string{"auth.pass"}, + Delete: false, + }, + wantOrig: `{"auth":{"pass":"top_secret","user":"fred"},"other":"data"}`, + wantRedact: `{"auth":{"pass":"*","user":"fred"},"other":"data"}`, + }, + { + name: "pass_delete", + state: mapstr.M{ + "auth": mapstr.M{ + "user": "fred", + "pass": "top_secret", + }, + "other": "data", + }, + cfg: &redact{ + Fields: []string{"auth.pass"}, + Delete: true, + }, + wantOrig: `{"auth":{"pass":"top_secret","user":"fred"},"other":"data"}`, + wantRedact: `{"auth":{"user":"fred"},"other":"data"}`, + }, + { + name: "multi_cursor_no_delete", + state: mapstr.M{ + "cursor": []mapstr.M{ + {"key": "val_one", "other": "data"}, + {"key": "val_two", "other": "data"}, + }, + "other": "data", + }, + cfg: &redact{ + Fields: []string{"cursor.key"}, + Delete: false, + }, + wantOrig: `{"cursor":[{"key":"val_one","other":"data"},{"key":"val_two","other":"data"}],"other":"data"}`, + wantRedact: `{"cursor":[{"key":"*","other":"data"},{"key":"*","other":"data"}],"other":"data"}`, + }, + { + name: "multi_cursor_delete", + state: mapstr.M{ + "cursor": []mapstr.M{ + {"key": "val_one", "other": "data"}, + {"key": "val_two", "other": "data"}, + }, + "other": "data", + }, + cfg: &redact{ + Fields: []string{"cursor.key"}, + Delete: true, + }, + wantOrig: `{"cursor":[{"key":"val_one","other":"data"},{"key":"val_two","other":"data"}],"other":"data"}`, + wantRedact: `{"cursor":[{"other":"data"},{"other":"data"}],"other":"data"}`, + }, +} + +func TestRedactor(t *testing.T) { + for _, test := range redactorTests { + t.Run(test.name, func(t *testing.T) { + got := fmt.Sprint(redactor{state: test.state, cfg: test.cfg}) + orig := fmt.Sprint(test.state) + if orig != test.wantOrig { + t.Errorf("unexpected original state after redaction:\n--- got\n--- want\n%s", cmp.Diff(orig, test.wantOrig)) + } + if got != test.wantRedact { + t.Errorf("unexpected redaction:\n--- got\n--- want\n%s", cmp.Diff(got, test.wantRedact)) + } + }) + } +} From 8aaed7174539a5db00f9c10158f694561a47e68f Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 01:37:44 +0530 Subject: [PATCH 10/25] added retry function --- x-pack/filebeat/input/websocket/input.go | 48 ++++++++++++++++++- x-pack/filebeat/input/websocket/input_test.go | 2 +- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 28f7f7fc8145..a4b78aa6918c 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -8,6 +8,8 @@ import ( "context" "errors" "fmt" + "math" + "math/rand" "reflect" "strings" "time" @@ -123,7 +125,15 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p if err != nil { metrics.errorsTotal.Inc() if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) { - log.Debugw("websocket connection closed", "error", err) + log.Debugw("websocket connection closed, attempting to reconnect", "error", err) + c, err = connectWebSocketWithRetry(log, url, cfg.Resource.Retry) + if err != nil { + log.Errorw("failed to reconnect websocket", "error", err) + errChan <- err + return + } + log.Debugw("reconnected to websocket") + continue } log.Errorw("failed to read websocket data", "error", err) errChan <- err @@ -390,3 +400,39 @@ func formHeader(cfg config) map[string][]string { } return header } + +func connectWebSocketWithRetry(log *logp.Logger, url string, config retryConfig) (*websocket.Conn, error) { + var conn *websocket.Conn + var err error + + for attempt := 1; attempt <= *config.MaxAttempts; attempt++ { + conn, _, err = websocket.DefaultDialer.Dial(url, nil) + if err == nil { + return conn, nil + } + + log.Debugw("Attempt %d: WebSocket connection failed. Retrying...\n", attempt) + + waitTime := calculateWaitTime(config.WaitMin, config.WaitMax, attempt) + time.Sleep(waitTime) + } + + return nil, fmt.Errorf("failed to establish WebSocket connection after %d attempts", *config.MaxAttempts) +} + +func calculateWaitTime(waitMin, waitMax *time.Duration, attempt int) time.Duration { + if waitMin == nil || waitMax == nil { + return 0 + } + + // calculate exponential backoff with jitter + base := float64(*waitMin) + maxJitter := float64(*waitMax - *waitMin) + + backoff := base * math.Pow(2, float64(attempt-1)) + jitter := rand.Float64() * maxJitter + + waitTime := time.Duration(backoff + jitter) + + return waitTime +} diff --git a/x-pack/filebeat/input/websocket/input_test.go b/x-pack/filebeat/input/websocket/input_test.go index 2b264fc46d02..338fa796f903 100644 --- a/x-pack/filebeat/input/websocket/input_test.go +++ b/x-pack/filebeat/input/websocket/input_test.go @@ -269,7 +269,7 @@ func TestInput(t *testing.T) { t.Fatalf("unexpected error running test: %v", err) } - ctx, cancel := context.WithTimeout(context.Background(), 10000*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() v2Ctx := v2.Context{ From 9347ff6f269755b4ab3157e28648e9828021827f Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 01:46:40 +0530 Subject: [PATCH 11/25] updated changelog --- CHANGELOG.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 5389ca6551b4..f1edd79b2512 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -188,6 +188,7 @@ Setting environmental variable ELASTIC_NETINFO:false in Elastic Agent pod will d - Relax TCP/UDP metric polling expectations to improve metric collection. {pull}37714[37714] - Add support for PEM-based Okta auth in HTTPJSON. {pull}37772[37772] - Prevent complete loss of long request trace data. {issue}37826[37826] {pull}37836[37836] +- Added experimental version of the Websocket Input. {pull}37774[37774] *Auditbeat* From f7b2d19594f484cc0746b7d88ddde026decf61a1 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 02:03:43 +0530 Subject: [PATCH 12/25] updated tests --- x-pack/filebeat/input/websocket/input_test.go | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/x-pack/filebeat/input/websocket/input_test.go b/x-pack/filebeat/input/websocket/input_test.go index 338fa796f903..ec7defb2aa0b 100644 --- a/x-pack/filebeat/input/websocket/input_test.go +++ b/x-pack/filebeat/input/websocket/input_test.go @@ -20,8 +20,8 @@ import ( conf "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" - "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp" "github.com/gorilla/websocket" ) @@ -37,7 +37,6 @@ var inputTests = []struct { time func() time.Time persistCursor map[string]interface{} want []map[string]interface{} - wantCursor []map[string]interface{} wantErr error }{ { @@ -302,24 +301,6 @@ func TestInput(t *testing.T) { t.Errorf("unexpected result for event %d: got:- want:+\n%s", i, cmp.Diff(got.Fields, mapstr.M(test.want[i]))) } } - - switch { - case len(test.wantCursor) == 0 && len(client.cursors) == 0: - return - case len(test.wantCursor) == 0: - t.Errorf("unexpected cursors: %v", client.cursors) - return - } - if len(client.cursors) < len(test.wantCursor) { - t.Errorf("unexpected number of cursors events: got:%d want at least:%d", len(client.cursors), len(test.wantCursor)) - test.wantCursor = test.wantCursor[:len(client.published)] - } - client.published = client.published[:len(test.want)] - for i, got := range client.cursors { - if !reflect.DeepEqual(mapstr.M(got), mapstr.M(test.wantCursor[i])) { - t.Errorf("unexpected cursor for event %d: got:- want:+\n%s", i, cmp.Diff(got, test.wantCursor[i])) - } - } }) } } From b61850128ac8f3518568ec4e94d5090486aa726a Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 13:55:28 +0530 Subject: [PATCH 13/25] addressed PR suggestions, removed auto retry mechanism for the moment --- x-pack/filebeat/input/websocket/cel.go | 6 +- x-pack/filebeat/input/websocket/config.go | 66 +++---------- x-pack/filebeat/input/websocket/input.go | 94 ++++--------------- x-pack/filebeat/input/websocket/input_test.go | 15 +-- x-pack/filebeat/input/websocket/redact.go | 2 - 5 files changed, 36 insertions(+), 147 deletions(-) diff --git a/x-pack/filebeat/input/websocket/cel.go b/x-pack/filebeat/input/websocket/cel.go index 63ccc843fc45..594fe1f61f97 100644 --- a/x-pack/filebeat/input/websocket/cel.go +++ b/x-pack/filebeat/input/websocket/cel.go @@ -11,13 +11,13 @@ import ( "io" "regexp" + "github.com/google/cel-go/cel" + "github.com/google/cel-go/checker/decls" + "github.com/elastic/beats/v7/libbeat/version" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/useragent" "github.com/elastic/mito/lib" - - "github.com/google/cel-go/cel" - "github.com/google/cel-go/checker/decls" ) var ( diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index 6f3367e1f6cb..a7789034d3c2 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -6,11 +6,10 @@ package websocket import ( "context" - "errors" "fmt" "net/url" "regexp" - "time" + "strings" "github.com/elastic/elastic-agent-libs/logp" ) @@ -35,8 +34,7 @@ type config struct { } type ResourceConfig struct { - URL *urlConfig `config:"url" validate:"required"` - Retry retryConfig `config:"retry"` + URL *urlConfig `config:"url" validate:"required"` } type redact struct { @@ -93,60 +91,22 @@ func (c config) Validate() error { if err != nil { return fmt.Errorf("failed to check program: %w", err) } - return nil -} - -type retryConfig struct { - MaxAttempts *int `config:"max_attempts"` - WaitMin *time.Duration `config:"wait_min"` - WaitMax *time.Duration `config:"wait_max"` -} - -func (c retryConfig) Validate() error { - switch { - case c.MaxAttempts != nil && *c.MaxAttempts <= 0: - return errors.New("max_attempts must be greater than zero") - case c.WaitMin != nil && *c.WaitMin <= 0: - return errors.New("wait_min must be greater than zero") - case c.WaitMax != nil && *c.WaitMax <= 0: - return errors.New("wait_max must be greater than zero") + err = checkURLScheme(c.Resource.URL) + if err != nil { + return err } return nil } -func defaultConfig() config { - maxAttempts := 5 - waitMin := time.Second - waitMax := time.Minute - - return config{ - Resource: &ResourceConfig{ - Retry: retryConfig{ - MaxAttempts: &maxAttempts, - WaitMin: &waitMin, - WaitMax: &waitMax, - }, - }, +func checkURLScheme(url *urlConfig) error { + switch scheme, _, _ := strings.Cut(url.Scheme, "+"); scheme { + case "ws", "wss": + return nil + default: + return fmt.Errorf("unsupported scheme: %s", url.Scheme) } } -func (c retryConfig) getMaxAttempts() int { - if c.MaxAttempts == nil { - return 0 - } - return *c.MaxAttempts -} - -func (c retryConfig) getWaitMin() time.Duration { - if c.WaitMin == nil { - return 0 - } - return *c.WaitMin -} - -func (c retryConfig) getWaitMax() time.Duration { - if c.WaitMax == nil { - return 0 - } - return *c.WaitMax +func defaultConfig() config { + return config{} } diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index a4b78aa6918c..eca2a94fe415 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -8,10 +8,7 @@ import ( "context" "errors" "fmt" - "math" - "math/rand" "reflect" - "strings" "time" v2 "github.com/elastic/beats/v7/filebeat/input/v2" @@ -51,10 +48,6 @@ func Plugin(log *logp.Logger, store inputcursor.StateStore) v2.Plugin { func (input) Name() string { return inputName } func (input) Test(src inputcursor.Source, _ v2.TestContext) error { - cfg := src.(*source).cfg - if !wantClient(cfg) { - return fmt.Errorf("unsupported scheme: %s", cfg.Resource.URL.Scheme) - } return nil } @@ -107,7 +100,11 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p // websocket client headers := formHeader(cfg) url := cfg.Resource.URL.String() - c, _, err := websocket.DefaultDialer.Dial(url, headers) + c, resp, err := websocket.DefaultDialer.Dial(url, headers) + if resp != nil && resp.Body != nil { + log.Debugw("websocket connection response", "body", resp.Body) + resp.Body.Close() + } if err != nil { metrics.errorsTotal.Inc() log.Errorw("failed to establish websocket connection", "error", err) @@ -115,8 +112,7 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p } defer c.Close() - done := make(chan struct{}) - errChan := make(chan error) + done := make(chan error) go func() { defer close(done) @@ -125,18 +121,11 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p if err != nil { metrics.errorsTotal.Inc() if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) { - log.Debugw("websocket connection closed, attempting to reconnect", "error", err) - c, err = connectWebSocketWithRetry(log, url, cfg.Resource.Retry) - if err != nil { - log.Errorw("failed to reconnect websocket", "error", err) - errChan <- err - return - } - log.Debugw("reconnected to websocket") - continue + log.Errorw("websocket connection closed", "error", err) + } else { + log.Errorw("failed to read websocket data", "error", err) } - log.Errorw("failed to read websocket data", "error", err) - errChan <- err + done <- err return } metrics.receivedBytesTotal.Add(uint64(len(message))) @@ -146,22 +135,18 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p if err != nil { metrics.errorsTotal.Inc() log.Errorw("failed to process and publish data", "error", err) - errChan <- err + done <- err return } } }() - // blocks until done is closed , context is cancelled or an error is received - for { - select { - case err := <-errChan: - return err - case <-done: - return nil - case <-ctx.Done(): - return ctx.Err() - } + // blocks until done is closed, context is cancelled or an error is received + select { + case err := <-done: + return err + case <-ctx.Done(): + return ctx.Err() } } @@ -379,15 +364,6 @@ func errorMessage(msg string) map[string]interface{} { return map[string]interface{}{"error": map[string]interface{}{"message": msg}} } -func wantClient(cfg config) bool { - switch scheme, _, _ := strings.Cut(cfg.Resource.URL.Scheme, "+"); scheme { - case "ws", "wss": - return true - default: - return false - } -} - func formHeader(cfg config) map[string][]string { header := make(map[string][]string) switch { @@ -400,39 +376,3 @@ func formHeader(cfg config) map[string][]string { } return header } - -func connectWebSocketWithRetry(log *logp.Logger, url string, config retryConfig) (*websocket.Conn, error) { - var conn *websocket.Conn - var err error - - for attempt := 1; attempt <= *config.MaxAttempts; attempt++ { - conn, _, err = websocket.DefaultDialer.Dial(url, nil) - if err == nil { - return conn, nil - } - - log.Debugw("Attempt %d: WebSocket connection failed. Retrying...\n", attempt) - - waitTime := calculateWaitTime(config.WaitMin, config.WaitMax, attempt) - time.Sleep(waitTime) - } - - return nil, fmt.Errorf("failed to establish WebSocket connection after %d attempts", *config.MaxAttempts) -} - -func calculateWaitTime(waitMin, waitMax *time.Duration, attempt int) time.Duration { - if waitMin == nil || waitMax == nil { - return 0 - } - - // calculate exponential backoff with jitter - base := float64(*waitMin) - maxJitter := float64(*waitMax - *waitMin) - - backoff := base * math.Pow(2, float64(attempt-1)) - jitter := rand.Float64() * maxJitter - - waitTime := time.Duration(backoff + jitter) - - return waitTime -} diff --git a/x-pack/filebeat/input/websocket/input_test.go b/x-pack/filebeat/input/websocket/input_test.go index ec7defb2aa0b..03a102d4e503 100644 --- a/x-pack/filebeat/input/websocket/input_test.go +++ b/x-pack/filebeat/input/websocket/input_test.go @@ -14,15 +14,15 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" + "github.com/gorilla/websocket" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" "github.com/elastic/beats/v7/libbeat/beat" conf "github.com/elastic/elastic-agent-libs/config" "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/elastic-agent-libs/mapstr" - - "github.com/google/go-cmp/cmp" - "github.com/gorilla/websocket" ) // WebSocketHandler is a type for handling WebSocket messages. @@ -329,15 +329,6 @@ func (p *publisher) Publish(e beat.Event, cursor interface{}) error { return nil } -func newV2Context() (v2.Context, func()) { - ctx, cancel := context.WithCancel(context.Background()) - return v2.Context{ - Logger: logp.NewLogger("websocket_test"), - ID: "test_id:", - Cancelation: ctx, - }, cancel -} - func newWebSocketTestServer(serve func(http.Handler) *httptest.Server) func(*testing.T, WebSocketHandler, map[string]interface{}, []string) { return func(t *testing.T, handler WebSocketHandler, config map[string]interface{}, response []string) { server := serve(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/x-pack/filebeat/input/websocket/redact.go b/x-pack/filebeat/input/websocket/redact.go index 1450527340a0..86583f0691c1 100644 --- a/x-pack/filebeat/input/websocket/redact.go +++ b/x-pack/filebeat/input/websocket/redact.go @@ -75,8 +75,6 @@ func cloneMap(dst, src mapstr.M) { // walkMap walks to all ends of the provided path in m and applies fn to the // final element of each walk. Nested arrays are not handled. -// -//nolint:typecheck // We can ignore typecheck here since all variables are being used. func walkMap(m mapstr.M, path string, fn func(parent mapstr.M, key string)) { key, rest, more := strings.Cut(path, ".") v, ok := m[key] From 1e5b205ab6f6e0006fb1ddb30c2d3c89c0230097 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 13:56:50 +0530 Subject: [PATCH 14/25] addressed PR suggestions, removed auto retry mechanism for the moment --- x-pack/filebeat/input/websocket/input.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index eca2a94fe415..4cb7a8652cd0 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -11,6 +11,10 @@ import ( "reflect" "time" + "github.com/google/cel-go/cel" + "github.com/gorilla/websocket" + "google.golang.org/protobuf/types/known/structpb" + v2 "github.com/elastic/beats/v7/filebeat/input/v2" inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" "github.com/elastic/beats/v7/libbeat/beat" @@ -18,10 +22,6 @@ import ( "github.com/elastic/elastic-agent-libs/logp" "github.com/elastic/go-concert/ctxtool" "github.com/elastic/mito/lib" - - "github.com/google/cel-go/cel" - "github.com/gorilla/websocket" - "google.golang.org/protobuf/types/known/structpb" ) type input struct { From e9aa0b3a860ee633f8723ff58ff98a0438aa87dc Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 18:05:09 +0530 Subject: [PATCH 15/25] added documentation and updated codeowners --- .github/CODEOWNERS | 1 + .../docs/inputs/input-websocket.asciidoc | 265 ++++++++++++++++++ x-pack/filebeat/input/websocket/input.go | 2 +- x-pack/filebeat/input/websocket/metrics.go | 2 +- 4 files changed, 268 insertions(+), 2 deletions(-) create mode 100644 x-pack/filebeat/docs/inputs/input-websocket.asciidoc diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index d3e40d854f57..5d02dbba1d47 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -121,6 +121,7 @@ CHANGELOG* /x-pack/filebeat/input/lumberjack/ @elastic/security-service-integrations /x-pack/filebeat/input/netflow/ @elastic/sec-deployment-and-devices /x-pack/filebeat/input/o365audit/ @elastic/security-service-integrations +/x-pack/filebeat/input/websocket/ @elastic/security-service-integrations /x-pack/filebeat/module/activemq @elastic/obs-infraobs-integrations /x-pack/filebeat/module/aws @elastic/obs-cloud-monitoring /x-pack/filebeat/module/awsfargate @elastic/obs-cloud-monitoring diff --git a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc new file mode 100644 index 000000000000..dc4851eadee0 --- /dev/null +++ b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc @@ -0,0 +1,265 @@ +[role="xpack"] + +:type: websocket +:mito_version: v1.8.0 +:mito_docs: https://pkg.go.dev/github.com/elastic/mito@{mito_version} + +[id="{beatname_lc}-input-{type}"] +=== Websocket Input +beta[] + +The `websocket` input reads messages from a websocket server or api endpoint. This input uses the `CEL engine` and the `mito` library interally to parse and process the messages. Having support for `CEL` allows you to parse and process the messages in a more flexible way. It has many similarities with the `cel` input as to how the `CEL` programs are written but deviates in the way the messages are read and processed. The `websocket` input is a `streaming` input and can only be used to read messages from a websocket server or api endpoint. + +This input supports: + +* Auth +** Basic +** Bearer +** API Key + +NOTE: The `websocket` input as of now does not support XML messages. Auto-reconnects are also not supported at the moment so reconnection will occur on input restart. + +==== Execution + +The execution environment provided for the input includes includes the functions, macros, and global variables provided by the mito library. +A single JSON object is provided as an input accessible through a `state` variable. `state` contains a `response` map field and may contain arbitrary other fields configured via the input's `state` configuration. If the CEL program saves cursor states between executions of the program, the configured `state.cursor` value will be replaced by the saved cursor prior to execution. + +On start the `state` is will be something like this: + +["source","json",subs="attributes"] +---- +{ + "response": { ... }, + "cursor": { ... }, + ... +} +---- +The `websocket` input creates a `response` field in the state map and attaches the websocket message to this field. All `CEL` programs written should act on this `response` field. Additional fields may be present at the root of the object and if the program tolerates it, the cursor value may be absent.Only the cursor is persisted over restarts, but all fields in state are retained between iterations of the processing loop except for the produced events array, see below. + +If the cursor is present the program should perform and process requests based on its value. +If cursor is not present the program must have alternative logic to determine what requests to make. + +After completion of a program's execution it should return a single object with a structure looking like this: + +["source","json",subs="attributes"] +---- +{ + "events": [ <1> + {...}, + ... + ], + "cursor": [ <2> + {...}, + ... + ], + "want_more": false <3> +} +---- + +<1> The `events` field must be present, but may be empty or null. If it is not empty, it must only have objects as elements. +The field could be an array or a single object that will be treated as an array with a single element. This depends completely on the websocket server or api endpoint. The `events` field is the array of events to be published to the output. Each event must be a JSON object. + +<2> If `cursor` is present it must be either be a single object or an array with the same length as events; each element _i_ of the `cursor` will be the details for obtaining the events at and beyond event _i_ in the `events` array. If the `cursor` is a single object, it will be the details for obtaining events after the last event in the `events` array and will only be retained on successful publication of all the events in the `events` array. + +<3> Unlike in the `cel` input, the `want_more` field is always false. This is because the `websocket` input is a streaming input and will always be ready to receive more messages from the websocket server or api endpoint, however the `CEL` program will only be executed once for each message received. + +Example configuration: + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +# Read and process simple websocket messages from a local websocket server +- type: websocket + resource.url: ws://localhost:443/v1/stream + program: | + bytes(state.response).decode_json().as(inner_body,{ + "events": { + "message": inner_body.encode_json(), + } + }) +---- + +==== Debug state logging + +The Websocket input will log the complete state after evaluation when logging at the DEBUG level. +This will include any sensitive or secret information kept in the `state` object, and so DEBUG level logging should not be used in production when sensitive information is retained in the `state` object. See <> configuration parameters for settings to exclude sensitive fields from DEBUG logs. + +==== Authentication +The Websocket input supports authentication via Basic token authentication, Bearer token authentication and authentication via Api-Key. Unlike REST inputs Basic Authentication contains a basic auth token, Bearer Authentication contains a bearer token and Api-Key contains an api key. These token/key values are are added to the request headers and are not exposed to the `state` object. For the Api-Key authentication, the header and value are both configurable since different servers might have different api-key header conventions. + +Example configurations with authentication: + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: websocket + auth.basic_token: "dXNlcjpwYXNzd29yZA==" + resource.url: wss://localhost:443/_stream +---- + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: websocket + auth.bearer_token: "dXNlcjpwYXNzd29yZA==" + resource.url: wss://localhost:443/_stream +---- + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: websocket + auth.api_key: + header: "x-api-key" + value: "dXNlcjpwYXNzd29yZA==" + resource.url: wss://localhost:443/_stream +---- + +[[input-state-websocket]] +==== Input state + +The `websocket` input keeps a runtime state between every message received. This state can be accessed by the CEL program and may contain arbitrary objects. +The state must contain a `response` map and may contain any object the user wishes to store in it. All objects are stored at runtime, except `cursor`, which has values that are persisted between restarts. + +==== Configuration options + +The `websocket` input supports the following configuration options plus the +<<{beatname_lc}-input-{type}-common-options>> described later. + +[[program-websocket]] +[float] +==== `program` + +The CEL program that is executed on each message received. This field should ideally be present but if not the default program given below is used. + +["source","yaml",subs="attributes"] +---- + program: | + bytes(state.response).decode_json().as(inner_body,{ + "events": { + "message": inner_body.encode_json(), + } + }) +---- + +[[state-websocket]] +[float] +==== `state` + +`state` is an optional object that is passed to the CEL program on the first execution. It is available to the executing program as the `state` variable. Except for the `state.cursor` field, `state` does not persist over restarts. + +[[cursor-websocket]] +[float] +==== `state.cursor` + +The cursor is an object available as `state.cursor` where arbitrary values may be stored. Cursor state is kept between input restarts and updated after each event of a request has been published. When a cursor is used the CEL program must either create a cursor state for each event that is returned by the program, or a single cursor that reflect the cursor for completion of the full set of events. + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +# Read and process simple websocket messages from a local websocket server +- type: websocket + resource.url: ws://localhost:443/v1/stream + program: | + bytes(state.response).as(body, { + "events": [body.decode_json().with({ + "last_requested_at": has(state.cursor) && has(state.cursor.last_requested_at) ? + state.cursor.last_requested_at + : + now + })], + "cursor": {"last_requested_at": now} + }) +---- + +[[regexp-websocket]] +[float] +==== `regexp` + +A set of named regular expressions that may be used during a CEL program's execution using the `regexp` extension library. The syntax used for the regular expressions is https://github.com/google/re2/wiki/Syntax[RE2]. + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: websocket + # Define two regular expressions, 'products' and 'solutions' for use during CEL program execution. + regexp: + products: '(?i)(Elasticsearch|Beats|Logstash|Kibana)' + solutions: '(?i)(Search|Observability|Security)' +---- + +[[websocket-state-redact]] +[float] +==== `redact` + +During debug level logging, the `state` object and the resulting evaluation result are included in logs. This may result in leaking of secrets. In order to prevent this, fields may be redacted or deleted from the logged `state`. The `redact` configuration allows users to configure this field redaction behaviour. For safety reasons if the `redact` configuration is missing a warning is logged. + +In the case of no-required redaction an empty `redact.fields` configuration should be used to silence the logged warning. + +["source","yaml",subs="attributes"] +---- +- type: websocket + redact: + fields: ~ +---- + +As an example, if a user-constructed Basic Authentication request is used in a CEL program the password can be redacted like so + +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: websocket + resource.url: ws://localhost:443/_stream + state: + user: user@domain.tld + password: P@$$W0₹D + redact: + fields: + - password + delete: true +---- + +Note that fields under the `auth` configuration hierarchy are not exposed to the `state` and so do not need to be redacted. For this reason it is preferable to use these for authentication over the request construction shown above where possible. + +[float] +==== `redact.fields` + +This specifies fields in the `state` to be redacted prior to debug logging. Fields listed in this array will be either replaced with a `*` or deleted entirely from messages sent to debug logs. + +[float] +==== `redact.delete` + +This specifies whether fields should be replaced with a `*` or deleted entirely from messages sent to debug logs. If delete is `true`, fields will be deleted rather than replaced. + +[float] +=== Metrics + +This input exposes metrics under the <>. +These metrics are exposed under the `/inputs` path. They can be used to +observe the activity of the input. + +[options="header"] +|======= +| Metric | Description +| `resource` | URL or path of the input resource. +| `cel_eval_errors` | Number of errors encountered during cel program evaluation. +| `errors_total` | Number of errors encountered over the life cycle of the input. +| `batches_received_total` | Number of event arrays received. +| `batches_published_total` | Number of event arrays published. +| `received_bytes_total` | Number of bytes received over the life cycle of the input. +| `events_received_total` | Number of events received. +| `events_published_total` | Number of events published. +| `cel_processing_time` | Histogram of the elapsed successful CEL program processing times in nanoseconds. +| `batch_processing_time` | Histogram of the elapsed successful batch processing times in nanoseconds (time of receipt to time of ACK for non-empty batches). +|======= + +==== Developer tools + +A stand-alone CEL environment that implements the majority of the websocket input's Comment Expression Language functionality is available in the https://github.com/elastic/mito[Elastic Mito] repository. This tool may be used to help develop CEL programs to be used by the input. Installation is available from source by running `go install github.com/elastic/mito/cmd/mito@latest` and requires a Go toolchain. + +[id="{beatname_lc}-input-{type}-common-options"] +include::../../../../filebeat/docs/inputs/input-common-options.asciidoc[] + +NOTE: The `websocket` input is currently in beta and might have bugs and other issues. Please report any issues on the https://github.com/elastic/beats[Github] repository. + +:type!: \ No newline at end of file diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 4cb7a8652cd0..661aaa2595e5 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -37,7 +37,7 @@ const ( func Plugin(log *logp.Logger, store inputcursor.StateStore) v2.Plugin { return v2.Plugin{ Name: inputName, - Stability: feature.Experimental, + Stability: feature.Beta, Deprecated: false, Info: "Websocket Input", Doc: "Collect data from websocket api endpoints", diff --git a/x-pack/filebeat/input/websocket/metrics.go b/x-pack/filebeat/input/websocket/metrics.go index 8b84ddd5d0df..96d847e3f01b 100644 --- a/x-pack/filebeat/input/websocket/metrics.go +++ b/x-pack/filebeat/input/websocket/metrics.go @@ -33,7 +33,7 @@ func newInputMetrics(id string) *inputMetrics { unregister: unreg, resource: monitoring.NewString(reg, "resource"), celEvalErrors: monitoring.NewUint(reg, "cel_eval_errors"), - batchesReceived: monitoring.NewUint(reg, "batches_received"), + batchesReceived: monitoring.NewUint(reg, "batches_received_total"), errorsTotal: monitoring.NewUint(reg, "errors_total"), receivedBytesTotal: monitoring.NewUint(reg, "received_bytes_total"), eventsReceived: monitoring.NewUint(reg, "events_received_total"), From a71731bc4e0f8013f592c538a89ca4ee0308afc9 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 18:12:40 +0530 Subject: [PATCH 16/25] updated experimental tags --- x-pack/filebeat/docs/inputs/input-websocket.asciidoc | 4 ++-- x-pack/filebeat/input/websocket/input.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc index dc4851eadee0..ac176c810a7f 100644 --- a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc @@ -6,7 +6,7 @@ [id="{beatname_lc}-input-{type}"] === Websocket Input -beta[] +experimental[] The `websocket` input reads messages from a websocket server or api endpoint. This input uses the `CEL engine` and the `mito` library interally to parse and process the messages. Having support for `CEL` allows you to parse and process the messages in a more flexible way. It has many similarities with the `cel` input as to how the `CEL` programs are written but deviates in the way the messages are read and processed. The `websocket` input is a `streaming` input and can only be used to read messages from a websocket server or api endpoint. @@ -260,6 +260,6 @@ A stand-alone CEL environment that implements the majority of the websocket inpu [id="{beatname_lc}-input-{type}-common-options"] include::../../../../filebeat/docs/inputs/input-common-options.asciidoc[] -NOTE: The `websocket` input is currently in beta and might have bugs and other issues. Please report any issues on the https://github.com/elastic/beats[Github] repository. +NOTE: The `websocket` input is currently tagged as experimental and might have bugs and other issues. Please report any issues on the https://github.com/elastic/beats[Github] repository. :type!: \ No newline at end of file diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 661aaa2595e5..4cb7a8652cd0 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -37,7 +37,7 @@ const ( func Plugin(log *logp.Logger, store inputcursor.StateStore) v2.Plugin { return v2.Plugin{ Name: inputName, - Stability: feature.Beta, + Stability: feature.Experimental, Deprecated: false, Info: "Websocket Input", Doc: "Collect data from websocket api endpoints", From 129c5bd0c8109cf93db0a2c00f5975ecef03c0fa Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Tue, 6 Feb 2024 23:30:27 +0530 Subject: [PATCH 17/25] added a new test, cleaned up some code and logic --- x-pack/filebeat/input/websocket/config.go | 12 ++--- .../filebeat/input/websocket/input_manager.go | 4 +- x-pack/filebeat/input/websocket/input_test.go | 46 ++++++++++++++++++- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index a7789034d3c2..55ad3a0dcb56 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -87,9 +87,11 @@ func (c config) Validate() error { if len(c.Regexps) != 0 { patterns = map[string]*regexp.Regexp{".": nil} } - _, _, err = newProgram(context.Background(), c.Program, root, patterns, logp.L().Named("input.websocket")) - if err != nil { - return fmt.Errorf("failed to check program: %w", err) + if c.Program != "" { + _, _, err = newProgram(context.Background(), c.Program, root, patterns, logp.L().Named("input.websocket")) + if err != nil { + return fmt.Errorf("failed to check program: %w", err) + } } err = checkURLScheme(c.Resource.URL) if err != nil { @@ -106,7 +108,3 @@ func checkURLScheme(url *urlConfig) error { return fmt.Errorf("unsupported scheme: %s", url.Scheme) } } - -func defaultConfig() config { - return config{} -} diff --git a/x-pack/filebeat/input/websocket/input_manager.go b/x-pack/filebeat/input/websocket/input_manager.go index 10ac9946faf9..b6c521eaaa72 100644 --- a/x-pack/filebeat/input/websocket/input_manager.go +++ b/x-pack/filebeat/input/websocket/input_manager.go @@ -34,7 +34,7 @@ func NewInputManager(log *logp.Logger, store inputcursor.StateStore) InputManage } func cursorConfigure(cfg *conf.C) ([]inputcursor.Source, inputcursor.Input, error) { - src := &source{cfg: defaultConfig()} + src := &source{cfg: config{}} if err := cfg.Unpack(&src.cfg); err != nil { return nil, nil, err } @@ -63,7 +63,7 @@ func (m InputManager) Init(grp unison.Group, mode v2.Mode) error { // Create creates a cursor input manager. func (m InputManager) Create(cfg *conf.C) (v2.Input, error) { - config := defaultConfig() + config := config{} if err := cfg.Unpack(&config); err != nil { return nil, err } diff --git a/x-pack/filebeat/input/websocket/input_test.go b/x-pack/filebeat/input/websocket/input_test.go index 03a102d4e503..1d9d9287b677 100644 --- a/x-pack/filebeat/input/websocket/input_test.go +++ b/x-pack/filebeat/input/websocket/input_test.go @@ -16,6 +16,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/gorilla/websocket" + "github.com/stretchr/testify/assert" v2 "github.com/elastic/beats/v7/filebeat/input/v2" inputcursor "github.com/elastic/beats/v7/filebeat/input/v2/input-cursor" @@ -237,6 +238,18 @@ var inputTests = []struct { }`}, wantErr: fmt.Errorf("unexpected type returned for evaluation cursor element: %T", "What's next?"), }, + { + name: "invalid_url_scheme", + server: invalidWebSocketTestServer(httptest.NewServer), + handler: defaultHandler, + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + }, + wantErr: fmt.Errorf("unsupported scheme: http accessing config"), + }, } func TestInput(t *testing.T) { @@ -251,10 +264,14 @@ func TestInput(t *testing.T) { cfg := conf.MustNewConfigFrom(test.config) - conf := defaultConfig() + conf := config{} conf.Redact = &redact{} // Make sure we pass the redact requirement. err := cfg.Unpack(&conf) if err != nil { + if test.wantErr != nil { + assert.EqualError(t, err, test.wantErr.Error()) + return + } t.Fatalf("unexpected error unpacking config: %v", err) } @@ -346,8 +363,33 @@ func newWebSocketTestServer(serve func(http.Handler) *httptest.Server) func(*tes handler(t, conn, response) })) + // only set the resource URL if it is not already set + if config["resource.url"] == nil { + config["resource.url"] = "ws" + server.URL[4:] + } + t.Cleanup(server.Close) + } +} + +// invalidWebSocketTestServer returns a function that creates a WebSocket server with an invalid URL scheme. +func invalidWebSocketTestServer(serve func(http.Handler) *httptest.Server) func(*testing.T, WebSocketHandler, map[string]interface{}, []string) { + return func(t *testing.T, handler WebSocketHandler, config map[string]interface{}, response []string) { + server := serve(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + upgrader := websocket.Upgrader{ + CheckOrigin: func(r *http.Request) bool { + return true + }, + } - config["resource.url"] = "ws" + server.URL[4:] + conn, err := upgrader.Upgrade(w, r, nil) + if err != nil { + t.Fatalf("error upgrading connection to WebSocket: %v", err) + return + } + + handler(t, conn, response) + })) + config["resource.url"] = server.URL t.Cleanup(server.Close) } } From 82b54bc5447c5e93a6f4e6fea4ad3db7f5341043 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Thu, 8 Feb 2024 09:03:53 +0530 Subject: [PATCH 18/25] added cursor condition check test and updated filebeat-options asciidoc --- filebeat/docs/filebeat-options.asciidoc | 3 ++ x-pack/filebeat/input/websocket/input_test.go | 41 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/filebeat/docs/filebeat-options.asciidoc b/filebeat/docs/filebeat-options.asciidoc index faff00e7e3d2..0787e3660bf8 100644 --- a/filebeat/docs/filebeat-options.asciidoc +++ b/filebeat/docs/filebeat-options.asciidoc @@ -91,6 +91,7 @@ You can configure {beatname_uc} to use the following inputs: * <<{beatname_lc}-input-tcp>> * <<{beatname_lc}-input-udp>> * <<{beatname_lc}-input-gcs>> +* <<{beatname_lc}-input-websocket>> include::multiline.asciidoc[] @@ -145,3 +146,5 @@ include::inputs/input-udp.asciidoc[] include::inputs/input-unix.asciidoc[] include::../../x-pack/filebeat/docs/inputs/input-gcs.asciidoc[] + +include::../../x-pack/filebeat/docs/inputs/input-websocket.asciidoc[] diff --git a/x-pack/filebeat/input/websocket/input_test.go b/x-pack/filebeat/input/websocket/input_test.go index 1d9d9287b677..ab6289ff6a9b 100644 --- a/x-pack/filebeat/input/websocket/input_test.go +++ b/x-pack/filebeat/input/websocket/input_test.go @@ -250,6 +250,47 @@ var inputTests = []struct { }, wantErr: fmt.Errorf("unsupported scheme: http accessing config"), }, + { + name: "cursor_condition_check", + server: newWebSocketTestServer(httptest.NewServer), + handler: defaultHandler, + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": has(state.cursor) && inner_body.ts > state.cursor.last_updated ? [inner_body] : [], + })`, + "state": map[string]interface{}{ + "cursor": map[string]int{ + "last_updated": 1502908200, + }, + }, + }, + response: []string{` + { + "pps": { + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071" + }, + "ts": 1502908200 + }`, + `{ + "pps": { + "agent": "example.proofpoint-1.com", + "cid": "mmeng_vxciml" + }, + "ts": 1503081000 + }`, + }, + want: []map[string]interface{}{ + { + "pps": map[string]interface{}{ + "agent": "example.proofpoint-1.com", + "cid": "mmeng_vxciml", + }, + "ts": float64(1503081000), + }, + }, + }, } func TestInput(t *testing.T) { From c550da9327aaf14586cfe5f0d9555108d6a4c2a3 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Thu, 8 Feb 2024 16:27:29 +0530 Subject: [PATCH 19/25] added auth tests, removed api-key config and updated it to custom auth to be more generic, added context in Dial method --- .../docs/inputs/input-websocket.asciidoc | 18 ++- x-pack/filebeat/input/websocket/config.go | 8 +- x-pack/filebeat/input/websocket/input.go | 6 +- x-pack/filebeat/input/websocket/input_test.go | 153 +++++++++++++++++- 4 files changed, 173 insertions(+), 12 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc index ac176c810a7f..952f1c1ebc58 100644 --- a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc @@ -15,7 +15,7 @@ This input supports: * Auth ** Basic ** Bearer -** API Key +** Custom NOTE: The `websocket` input as of now does not support XML messages. Auto-reconnects are also not supported at the moment so reconnection will occur on input restart. @@ -24,7 +24,7 @@ NOTE: The `websocket` input as of now does not support XML messages. Auto-reconn The execution environment provided for the input includes includes the functions, macros, and global variables provided by the mito library. A single JSON object is provided as an input accessible through a `state` variable. `state` contains a `response` map field and may contain arbitrary other fields configured via the input's `state` configuration. If the CEL program saves cursor states between executions of the program, the configured `state.cursor` value will be replaced by the saved cursor prior to execution. -On start the `state` is will be something like this: +On start the `state` will be something like this: ["source","json",subs="attributes"] ---- @@ -85,7 +85,7 @@ The Websocket input will log the complete state after evaluation when logging at This will include any sensitive or secret information kept in the `state` object, and so DEBUG level logging should not be used in production when sensitive information is retained in the `state` object. See <> configuration parameters for settings to exclude sensitive fields from DEBUG logs. ==== Authentication -The Websocket input supports authentication via Basic token authentication, Bearer token authentication and authentication via Api-Key. Unlike REST inputs Basic Authentication contains a basic auth token, Bearer Authentication contains a bearer token and Api-Key contains an api key. These token/key values are are added to the request headers and are not exposed to the `state` object. For the Api-Key authentication, the header and value are both configurable since different servers might have different api-key header conventions. +The Websocket input supports authentication via Basic token authentication, Bearer token authentication and authentication via a custom auth config. Unlike REST inputs Basic Authentication contains a basic auth token, Bearer Authentication contains a bearer token and custom auth contains any combination of custom header and value. These token/key values are are added to the request headers and are not exposed to the `state` object. The custom auth configuration is useful for constructing requests that require custom headers and values for authentication. The basic and bearer token configurations will always use the `Authorization` header and prepend the token with `Basic` or `Bearer` respectively. Example configurations with authentication: @@ -109,12 +109,22 @@ filebeat.inputs: ---- filebeat.inputs: - type: websocket - auth.api_key: + auth.custom: header: "x-api-key" value: "dXNlcjpwYXNzd29yZA==" resource.url: wss://localhost:443/_stream ---- +["source","yaml",subs="attributes"] +---- +filebeat.inputs: +- type: websocket + auth.custom: + header: "Auth" + value: "Bearer dXNlcjpwYXNzd29yZA==" + resource.url: wss://localhost:443/_stream +---- + [[input-state-websocket]] ==== Input state diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index 55ad3a0dcb56..c9e6afb6e03d 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -47,16 +47,16 @@ type redact struct { } type authConfig struct { - // Api-Key to use for authentication. - ApiKey *apiKeyConfig `config:"api_key"` + // Custom auth config to use for authentication. + CustomAuth *customAuthConfig `config:"custom"` // Baerer token to use for authentication. BearerToken string `config:"bearer_token"` // Basic auth token to use for authentication. BasicToken string `config:"basic_token"` } -type apiKeyConfig struct { - // Api-Key to use for authentication. +type customAuthConfig struct { + // Custom auth config to use for authentication. Header string `config:"header"` Value string `config:"value"` } diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 4cb7a8652cd0..0533eeea96e9 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -100,7 +100,7 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p // websocket client headers := formHeader(cfg) url := cfg.Resource.URL.String() - c, resp, err := websocket.DefaultDialer.Dial(url, headers) + c, resp, err := websocket.DefaultDialer.DialContext(ctx, url, headers) if resp != nil && resp.Body != nil { log.Debugw("websocket connection response", "body", resp.Body) resp.Body.Close() @@ -367,8 +367,8 @@ func errorMessage(msg string) map[string]interface{} { func formHeader(cfg config) map[string][]string { header := make(map[string][]string) switch { - case cfg.Auth.ApiKey != nil: - header[cfg.Auth.ApiKey.Header] = []string{cfg.Auth.ApiKey.Value} + case cfg.Auth.CustomAuth != nil: + header[cfg.Auth.CustomAuth.Header] = []string{cfg.Auth.CustomAuth.Value} case cfg.Auth.BearerToken != "": header["Authorization"] = []string{"Bearer " + cfg.Auth.BearerToken} case cfg.Auth.BasicToken != "": diff --git a/x-pack/filebeat/input/websocket/input_test.go b/x-pack/filebeat/input/websocket/input_test.go index ab6289ff6a9b..bfc6b9b35bdc 100644 --- a/x-pack/filebeat/input/websocket/input_test.go +++ b/x-pack/filebeat/input/websocket/input_test.go @@ -26,6 +26,13 @@ import ( "github.com/elastic/elastic-agent-libs/mapstr" ) +const ( + basicToken = "dXNlcjpwYXNz" + bearerToken = "BXNlcjpwYVVz" + customHeader = "X-Api-Key" + customValue = "my-api-key" +) + // WebSocketHandler is a type for handling WebSocket messages. type WebSocketHandler func(*testing.T, *websocket.Conn, []string) @@ -291,6 +298,105 @@ var inputTests = []struct { }, }, }, + { + name: "auth_basic_token", + server: webSocketTestServerWithAuth(httptest.NewServer), + handler: defaultHandler, + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + "auth": map[string]interface{}{ + "basic_token": basicToken, + }, + }, + response: []string{` + { + "pps": { + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071" + }, + "ts": 1502908200 + }`, + }, + want: []map[string]interface{}{ + { + "pps": map[string]interface{}{ + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071", + }, + "ts": float64(1502908200), + }, + }, + }, + { + name: "auth_bearer_token", + server: webSocketTestServerWithAuth(httptest.NewServer), + handler: defaultHandler, + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + "auth": map[string]interface{}{ + "bearer_token": bearerToken, + }, + }, + response: []string{` + { + "pps": { + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071" + }, + "ts": 1502908200 + }`, + }, + want: []map[string]interface{}{ + { + "pps": map[string]interface{}{ + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071", + }, + "ts": float64(1502908200), + }, + }, + }, + { + name: "auth_custom", + server: webSocketTestServerWithAuth(httptest.NewServer), + handler: defaultHandler, + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + "auth": map[string]interface{}{ + "custom": map[string]interface{}{ + "header": customHeader, + "value": customValue, + }, + }, + }, + response: []string{` + { + "pps": { + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071" + }, + "ts": 1502908200 + }`, + }, + want: []map[string]interface{}{ + { + "pps": map[string]interface{}{ + "agent": "example.proofpoint.com", + "cid": "mmeng_uivm071", + }, + "ts": float64(1502908200), + }, + }, + }, } func TestInput(t *testing.T) { @@ -326,7 +432,7 @@ func TestInput(t *testing.T) { t.Fatalf("unexpected error running test: %v", err) } - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 1000*time.Second) defer cancel() v2Ctx := v2.Context{ @@ -435,6 +541,51 @@ func invalidWebSocketTestServer(serve func(http.Handler) *httptest.Server) func( } } +// webSocketTestServerWithAuth returns a function that creates a WebSocket server with authentication. This does not however simulate a TLS connection. +func webSocketTestServerWithAuth(serve func(http.Handler) *httptest.Server) func(*testing.T, WebSocketHandler, map[string]interface{}, []string) { + return func(t *testing.T, handler WebSocketHandler, config map[string]interface{}, response []string) { + server := serve(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + upgrader := websocket.Upgrader{ + CheckOrigin: func(r *http.Request) bool { + // check for auth token + authToken := r.Header.Get("Authorization") + if authToken == "" { + authToken = r.Header.Get(customHeader) + if authToken == "" { + return false + } + } + + switch { + case authToken == "Bearer "+bearerToken: + return true + case authToken == "Basic "+basicToken: + return true + case authToken == customValue: + return true + default: + return false + + } + }, + } + + conn, err := upgrader.Upgrade(w, r, nil) + if err != nil { + t.Fatalf("error upgrading connection to WebSocket: %v", err) + return + } + + handler(t, conn, response) + })) + // only set the resource URL if it is not already set + if config["resource.url"] == nil { + config["resource.url"] = "ws" + server.URL[4:] + } + t.Cleanup(server.Close) + } +} + // defaultHandler is a default handler for WebSocket connections. func defaultHandler(t *testing.T, conn *websocket.Conn, response []string) { for _, r := range response { From 8a615095f6dc9c2659b2674721cd7ed911a1f46b Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Fri, 9 Feb 2024 18:10:14 +0530 Subject: [PATCH 20/25] addressed PR suggestions and added config tests --- .../docs/inputs/input-websocket.asciidoc | 69 +++++----- x-pack/filebeat/input/websocket/cel.go | 1 - x-pack/filebeat/input/websocket/config.go | 11 +- .../filebeat/input/websocket/config_test.go | 121 ++++++++++++++++++ x-pack/filebeat/input/websocket/input.go | 6 +- .../filebeat/input/websocket/input_manager.go | 2 +- x-pack/filebeat/input/websocket/input_test.go | 10 +- x-pack/filebeat/input/websocket/metrics.go | 8 +- .../filebeat/input/websocket/redact_test.go | 4 +- 9 files changed, 172 insertions(+), 60 deletions(-) create mode 100644 x-pack/filebeat/input/websocket/config_test.go diff --git a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc index 952f1c1ebc58..0b4da231f997 100644 --- a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc @@ -34,10 +34,9 @@ On start the `state` will be something like this: ... } ---- -The `websocket` input creates a `response` field in the state map and attaches the websocket message to this field. All `CEL` programs written should act on this `response` field. Additional fields may be present at the root of the object and if the program tolerates it, the cursor value may be absent.Only the cursor is persisted over restarts, but all fields in state are retained between iterations of the processing loop except for the produced events array, see below. +The `websocket` input creates a `response` field in the state map and attaches the websocket message to this field. All `CEL` programs written should act on this `response` field. Additional fields may be present at the root of the object and if the program tolerates it, the cursor value may be absent. Only the cursor is persisted over restarts, but all fields in state are retained between iterations of the processing loop except for the produced events array, see below. -If the cursor is present the program should perform and process requests based on its value. -If cursor is not present the program must have alternative logic to determine what requests to make. +If the cursor is present the program should process or filter out responses based on its value. If cursor is not present all responses should be processed as per the program's logic. After completion of a program's execution it should return a single object with a structure looking like this: @@ -51,8 +50,7 @@ After completion of a program's execution it should return a single object with "cursor": [ <2> {...}, ... - ], - "want_more": false <3> + ] } ---- @@ -61,7 +59,6 @@ The field could be an array or a single object that will be treated as an array <2> If `cursor` is present it must be either be a single object or an array with the same length as events; each element _i_ of the `cursor` will be the details for obtaining the events at and beyond event _i_ in the `events` array. If the `cursor` is a single object, it will be the details for obtaining events after the last event in the `events` array and will only be retained on successful publication of all the events in the `events` array. -<3> Unlike in the `cel` input, the `want_more` field is always false. This is because the `websocket` input is a streaming input and will always be ready to receive more messages from the websocket server or api endpoint, however the `CEL` program will only be executed once for each message received. Example configuration: @@ -70,18 +67,18 @@ Example configuration: filebeat.inputs: # Read and process simple websocket messages from a local websocket server - type: websocket - resource.url: ws://localhost:443/v1/stream + url: ws://localhost:443/v1/stream program: | - bytes(state.response).decode_json().as(inner_body,{ - "events": { - "message": inner_body.encode_json(), - } - }) + bytes(state.response).decode_json().as(inner_body,{ + "events": { + "message": inner_body.encode_json(), + } +}) ---- ==== Debug state logging -The Websocket input will log the complete state after evaluation when logging at the DEBUG level. +The Websocket input will log the complete state when logging at the DEBUG level. This will include any sensitive or secret information kept in the `state` object, and so DEBUG level logging should not be used in production when sensitive information is retained in the `state` object. See <> configuration parameters for settings to exclude sensitive fields from DEBUG logs. ==== Authentication @@ -94,7 +91,7 @@ Example configurations with authentication: filebeat.inputs: - type: websocket auth.basic_token: "dXNlcjpwYXNzd29yZA==" - resource.url: wss://localhost:443/_stream + url: wss://localhost:443/_stream ---- ["source","yaml",subs="attributes"] @@ -102,7 +99,7 @@ filebeat.inputs: filebeat.inputs: - type: websocket auth.bearer_token: "dXNlcjpwYXNzd29yZA==" - resource.url: wss://localhost:443/_stream + url: wss://localhost:443/_stream ---- ["source","yaml",subs="attributes"] @@ -112,7 +109,7 @@ filebeat.inputs: auth.custom: header: "x-api-key" value: "dXNlcjpwYXNzd29yZA==" - resource.url: wss://localhost:443/_stream + url: wss://localhost:443/_stream ---- ["source","yaml",subs="attributes"] @@ -122,7 +119,7 @@ filebeat.inputs: auth.custom: header: "Auth" value: "Bearer dXNlcjpwYXNzd29yZA==" - resource.url: wss://localhost:443/_stream + url: wss://localhost:443/_stream ---- [[input-state-websocket]] @@ -144,12 +141,12 @@ The CEL program that is executed on each message received. This field should ide ["source","yaml",subs="attributes"] ---- - program: | - bytes(state.response).decode_json().as(inner_body,{ - "events": { - "message": inner_body.encode_json(), - } - }) +program: | + bytes(state.response).decode_json().as(inner_body,{ + "events": { + "message": inner_body.encode_json(), + } +}) ---- [[state-websocket]] @@ -162,24 +159,24 @@ The CEL program that is executed on each message received. This field should ide [float] ==== `state.cursor` -The cursor is an object available as `state.cursor` where arbitrary values may be stored. Cursor state is kept between input restarts and updated after each event of a request has been published. When a cursor is used the CEL program must either create a cursor state for each event that is returned by the program, or a single cursor that reflect the cursor for completion of the full set of events. +The cursor is an object available as `state.cursor` where arbitrary values may be stored. Cursor state is kept between input restarts and updated after each event of a request has been published. When a cursor is used the CEL program must either create a cursor state for each event that is returned by the program, or a single cursor that reflects the cursor for completion of the full set of events. ["source","yaml",subs="attributes"] ---- filebeat.inputs: # Read and process simple websocket messages from a local websocket server - type: websocket - resource.url: ws://localhost:443/v1/stream + url: ws://localhost:443/v1/stream program: | - bytes(state.response).as(body, { - "events": [body.decode_json().with({ - "last_requested_at": has(state.cursor) && has(state.cursor.last_requested_at) ? - state.cursor.last_requested_at - : - now - })], - "cursor": {"last_requested_at": now} - }) + bytes(state.response).as(body, { + "events": [body.decode_json().with({ + "last_requested_at": has(state.cursor) && has(state.cursor.last_requested_at) ? + state.cursor.last_requested_at + : + now + })], + "cursor": {"last_requested_at": now} +}) ---- [[regexp-websocket]] @@ -219,7 +216,7 @@ As an example, if a user-constructed Basic Authentication request is used in a C ---- filebeat.inputs: - type: websocket - resource.url: ws://localhost:443/_stream + url: ws://localhost:443/_stream state: user: user@domain.tld password: P@$$W0₹D @@ -251,7 +248,7 @@ observe the activity of the input. [options="header"] |======= | Metric | Description -| `resource` | URL or path of the input resource. +| `url` | URL of the input resource. | `cel_eval_errors` | Number of errors encountered during cel program evaluation. | `errors_total` | Number of errors encountered over the life cycle of the input. | `batches_received_total` | Number of event arrays received. diff --git a/x-pack/filebeat/input/websocket/cel.go b/x-pack/filebeat/input/websocket/cel.go index 594fe1f61f97..11c2e7ad8f13 100644 --- a/x-pack/filebeat/input/websocket/cel.go +++ b/x-pack/filebeat/input/websocket/cel.go @@ -62,7 +62,6 @@ func newProgram(ctx context.Context, src, root string, patterns map[string]*rege lib.Time(), lib.Try(), lib.Debug(debug(log)), - lib.File(mimetypes), lib.MIME(mimetypes), lib.Regexp(patterns), lib.Globals(map[string]interface{}{ diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index c9e6afb6e03d..a3b53d119b34 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -9,7 +9,6 @@ import ( "fmt" "net/url" "regexp" - "strings" "github.com/elastic/elastic-agent-libs/logp" ) @@ -28,15 +27,11 @@ type config struct { // Auth is the authentication config for connection Auth authConfig `config:"auth"` // Resource - Resource *ResourceConfig `config:"resource" validate:"required"` + URL *urlConfig `config:"url" validate:"required"` // Redact is the debug log state redaction configuration. Redact *redact `config:"redact"` } -type ResourceConfig struct { - URL *urlConfig `config:"url" validate:"required"` -} - type redact struct { // Fields indicates which fields to apply redaction to prior // to logging. @@ -93,7 +88,7 @@ func (c config) Validate() error { return fmt.Errorf("failed to check program: %w", err) } } - err = checkURLScheme(c.Resource.URL) + err = checkURLScheme(c.URL) if err != nil { return err } @@ -101,7 +96,7 @@ func (c config) Validate() error { } func checkURLScheme(url *urlConfig) error { - switch scheme, _, _ := strings.Cut(url.Scheme, "+"); scheme { + switch url.Scheme { case "ws", "wss": return nil default: diff --git a/x-pack/filebeat/input/websocket/config_test.go b/x-pack/filebeat/input/websocket/config_test.go new file mode 100644 index 000000000000..3cb9c910cde0 --- /dev/null +++ b/x-pack/filebeat/input/websocket/config_test.go @@ -0,0 +1,121 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package websocket + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + + conf "github.com/elastic/elastic-agent-libs/config" + "github.com/elastic/elastic-agent-libs/logp" +) + +var configTests = []struct { + name string + config map[string]interface{} + wantErr error +}{ + { + name: "invalid_url_scheme", + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + "url": "http://localhost:8080", + }, + wantErr: fmt.Errorf("unsupported scheme: http accessing config"), + }, + { + name: "missing_url", + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + }, + wantErr: fmt.Errorf("missing required field accessing 'url'"), + }, + { + name: "invalid_program", + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": has(state.cursor) && inner_body.ts > state.cursor.last_updated ? + [inner_body] + : + null, + })`, + "url": "wss://localhost:443/v1/stream", + }, + wantErr: fmt.Errorf("failed to check program: failed compilation: ERROR: :3:79: found no matching overload for '_?_:_' applied to '(bool, list(dyn), null)'\n | \"events\": has(state.cursor) && inner_body.ts > state.cursor.last_updated ? \n | ..............................................................................^ accessing config"), + }, + { + name: "invalid_regexps", + config: map[string]interface{}{ + "regexp": map[string]interface{}{ + "products": "(?i)(xq>)d+)", + "solutions": "(?)(Sws>(d+)", + }, + "url": "wss://localhost:443/v1/stream", + }, + wantErr: fmt.Errorf("failed to check regular expressions: error parsing regexp: unexpected ): `(?i)(xq>)d+)` accessing config"), + }, + { + name: "valid_regexps", + config: map[string]interface{}{ + "regexp": map[string]interface{}{ + "products": "(?i)(Elasticsearch|Beats|Logstash|Kibana)", + "solutions": "(?i)(Search|Observability|Security)", + }, + "url": "wss://localhost:443/v1/stream", + }, + }, + { + name: "valid_config", + config: map[string]interface{}{ + "program": ` + bytes(state.response).decode_json().as(inner_body,{ + "events": [inner_body], + })`, + "url": "wss://localhost:443/v1/stream", + "regexp": map[string]interface{}{ + "products": "(?i)(Elasticsearch|Beats|Logstash|Kibana)", + "solutions": "(?i)(Search|Observability|Security)", + }, + "state": map[string]interface{}{ + "cursor": map[string]int{ + "last_updated": 1502908200, + }, + }, + }, + }, +} + +func TestConfig(t *testing.T) { + logp.TestingSetup() + for _, test := range configTests { + t.Run(test.name, func(t *testing.T) { + cfg := conf.MustNewConfigFrom(test.config) + conf := config{} + // Make sure we pass the redact requirement. + conf.Redact = &redact{} + err := cfg.Unpack(&conf) + + switch { + case err == nil && test.wantErr != nil: + t.Fatalf("expected error unpacking config: %v", test.wantErr) + case err != nil && test.wantErr == nil: + t.Fatalf("unexpected error unpacking config: %v", err) + case err != nil && test.wantErr != nil: + assert.EqualError(t, err, test.wantErr.Error()) + default: + // no error + } + }) + } +} diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 0533eeea96e9..1235a63cf2cf 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -67,11 +67,11 @@ func (input) Run(env v2.Context, src inputcursor.Source, crsr inputcursor.Cursor func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, pub inputcursor.Publisher) error { cfg := src.cfg i.cfg = cfg - log := env.Logger.With("input_url", cfg.Resource.URL) + log := env.Logger.With("input_url", cfg.URL) metrics := newInputMetrics(env.ID) defer metrics.Close() - metrics.resource.Set(cfg.Resource.URL.String()) + metrics.url.Set(cfg.URL.String()) metrics.errorsTotal.Set(0) ctx := ctxtool.FromCanceller(env.Cancelation) @@ -99,7 +99,7 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p // websocket client headers := formHeader(cfg) - url := cfg.Resource.URL.String() + url := cfg.URL.String() c, resp, err := websocket.DefaultDialer.DialContext(ctx, url, headers) if resp != nil && resp.Body != nil { log.Debugw("websocket connection response", "body", resp.Body) diff --git a/x-pack/filebeat/input/websocket/input_manager.go b/x-pack/filebeat/input/websocket/input_manager.go index b6c521eaaa72..49fca0b0a82c 100644 --- a/x-pack/filebeat/input/websocket/input_manager.go +++ b/x-pack/filebeat/input/websocket/input_manager.go @@ -54,7 +54,7 @@ func cursorConfigure(cfg *conf.C) ([]inputcursor.Source, inputcursor.Input, erro type source struct{ cfg config } -func (s *source) Name() string { return s.cfg.Resource.URL.String() } +func (s *source) Name() string { return s.cfg.URL.String() } // Init initializes both wrapped input managers. func (m InputManager) Init(grp unison.Group, mode v2.Mode) error { diff --git a/x-pack/filebeat/input/websocket/input_test.go b/x-pack/filebeat/input/websocket/input_test.go index bfc6b9b35bdc..fc98a2f0b46a 100644 --- a/x-pack/filebeat/input/websocket/input_test.go +++ b/x-pack/filebeat/input/websocket/input_test.go @@ -511,8 +511,8 @@ func newWebSocketTestServer(serve func(http.Handler) *httptest.Server) func(*tes handler(t, conn, response) })) // only set the resource URL if it is not already set - if config["resource.url"] == nil { - config["resource.url"] = "ws" + server.URL[4:] + if config["url"] == nil { + config["url"] = "ws" + server.URL[4:] } t.Cleanup(server.Close) } @@ -536,7 +536,7 @@ func invalidWebSocketTestServer(serve func(http.Handler) *httptest.Server) func( handler(t, conn, response) })) - config["resource.url"] = server.URL + config["url"] = server.URL t.Cleanup(server.Close) } } @@ -579,8 +579,8 @@ func webSocketTestServerWithAuth(serve func(http.Handler) *httptest.Server) func handler(t, conn, response) })) // only set the resource URL if it is not already set - if config["resource.url"] == nil { - config["resource.url"] = "ws" + server.URL[4:] + if config["url"] == nil { + config["url"] = "ws" + server.URL[4:] } t.Cleanup(server.Close) } diff --git a/x-pack/filebeat/input/websocket/metrics.go b/x-pack/filebeat/input/websocket/metrics.go index 96d847e3f01b..34e6a9620f93 100644 --- a/x-pack/filebeat/input/websocket/metrics.go +++ b/x-pack/filebeat/input/websocket/metrics.go @@ -5,17 +5,17 @@ package websocket import ( + "github.com/rcrowley/go-metrics" + "github.com/elastic/beats/v7/libbeat/monitoring/inputmon" "github.com/elastic/elastic-agent-libs/monitoring" "github.com/elastic/elastic-agent-libs/monitoring/adapter" - - "github.com/rcrowley/go-metrics" ) // inputMetrics handles the input's metric reporting. type inputMetrics struct { unregister func() - resource *monitoring.String // URL-ish of input resource + url *monitoring.String // URL of the input resource celEvalErrors *monitoring.Uint // number of errors encountered during cel program evaluation batchesReceived *monitoring.Uint // number of event arrays received errorsTotal *monitoring.Uint // number of errors encountered @@ -31,7 +31,7 @@ func newInputMetrics(id string) *inputMetrics { reg, unreg := inputmon.NewInputRegistry(inputName, id, nil) out := &inputMetrics{ unregister: unreg, - resource: monitoring.NewString(reg, "resource"), + url: monitoring.NewString(reg, "url"), celEvalErrors: monitoring.NewUint(reg, "cel_eval_errors"), batchesReceived: monitoring.NewUint(reg, "batches_received_total"), errorsTotal: monitoring.NewUint(reg, "errors_total"), diff --git a/x-pack/filebeat/input/websocket/redact_test.go b/x-pack/filebeat/input/websocket/redact_test.go index 89798c246838..c66db60d97b0 100644 --- a/x-pack/filebeat/input/websocket/redact_test.go +++ b/x-pack/filebeat/input/websocket/redact_test.go @@ -8,9 +8,9 @@ import ( "fmt" "testing" - "github.com/elastic/elastic-agent-libs/mapstr" - "github.com/google/go-cmp/cmp" + + "github.com/elastic/elastic-agent-libs/mapstr" ) var redactorTests = []struct { From d60d73b98d80efc7e330341265bc3fe86693eda6 Mon Sep 17 00:00:00 2001 From: ShourieG <105607378+ShourieG@users.noreply.github.com> Date: Mon, 12 Feb 2024 06:48:15 +0530 Subject: [PATCH 21/25] Update x-pack/filebeat/docs/inputs/input-websocket.asciidoc Co-authored-by: Dan Kortschak <90160302+efd6@users.noreply.github.com> --- x-pack/filebeat/docs/inputs/input-websocket.asciidoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc index 0b4da231f997..9f2b42a0c452 100644 --- a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc @@ -69,11 +69,11 @@ filebeat.inputs: - type: websocket url: ws://localhost:443/v1/stream program: | - bytes(state.response).decode_json().as(inner_body,{ - "events": { - "message": inner_body.encode_json(), - } -}) + bytes(state.response).decode_json().as(inner_body,{ + "events": { + "message": inner_body.encode_json(), + } + }) ---- ==== Debug state logging From b3a7e20a5e7d265f42820a9d3678648ef82ff917 Mon Sep 17 00:00:00 2001 From: ShourieG <105607378+ShourieG@users.noreply.github.com> Date: Mon, 12 Feb 2024 06:48:25 +0530 Subject: [PATCH 22/25] Update x-pack/filebeat/docs/inputs/input-websocket.asciidoc Co-authored-by: Dan Kortschak <90160302+efd6@users.noreply.github.com> --- x-pack/filebeat/docs/inputs/input-websocket.asciidoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc index 9f2b42a0c452..4db261e39bb1 100644 --- a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc @@ -142,11 +142,11 @@ The CEL program that is executed on each message received. This field should ide ["source","yaml",subs="attributes"] ---- program: | - bytes(state.response).decode_json().as(inner_body,{ - "events": { - "message": inner_body.encode_json(), - } -}) + bytes(state.response).decode_json().as(inner_body,{ + "events": { + "message": inner_body.encode_json(), + } + }) ---- [[state-websocket]] From 341ad87cdb659958e4e2748eb09a766bae2da621 Mon Sep 17 00:00:00 2001 From: ShourieG <105607378+ShourieG@users.noreply.github.com> Date: Mon, 12 Feb 2024 06:48:33 +0530 Subject: [PATCH 23/25] Update x-pack/filebeat/docs/inputs/input-websocket.asciidoc Co-authored-by: Dan Kortschak <90160302+efd6@users.noreply.github.com> --- .../docs/inputs/input-websocket.asciidoc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc index 4db261e39bb1..619c176e72a0 100644 --- a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc @@ -168,15 +168,15 @@ filebeat.inputs: - type: websocket url: ws://localhost:443/v1/stream program: | - bytes(state.response).as(body, { - "events": [body.decode_json().with({ - "last_requested_at": has(state.cursor) && has(state.cursor.last_requested_at) ? - state.cursor.last_requested_at - : - now - })], - "cursor": {"last_requested_at": now} -}) + bytes(state.response).as(body, { + "events": [body.decode_json().with({ + "last_requested_at": has(state.cursor) && has(state.cursor.last_requested_at) ? + state.cursor.last_requested_at + : + now + })], + "cursor": {"last_requested_at": now} + }) ---- [[regexp-websocket]] From dd850d2c93a244fe33867df13dc8908c659a179b Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Mon, 12 Feb 2024 07:30:27 +0530 Subject: [PATCH 24/25] added debug log for cel state before cel eval, updated the docs accordingly --- x-pack/filebeat/docs/inputs/input-websocket.asciidoc | 2 +- x-pack/filebeat/input/websocket/input.go | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc index 619c176e72a0..8ee2da2b42ad 100644 --- a/x-pack/filebeat/docs/inputs/input-websocket.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-websocket.asciidoc @@ -78,7 +78,7 @@ filebeat.inputs: ==== Debug state logging -The Websocket input will log the complete state when logging at the DEBUG level. +The Websocket input will log the complete state when logging at the DEBUG level before and after CEL evaluation. This will include any sensitive or secret information kept in the `state` object, and so DEBUG level logging should not be used in production when sensitive information is retained in the `state` object. See <> configuration parameters for settings to exclude sensitive fields from DEBUG logs. ==== Authentication diff --git a/x-pack/filebeat/input/websocket/input.go b/x-pack/filebeat/input/websocket/input.go index 1235a63cf2cf..c48ce177931c 100644 --- a/x-pack/filebeat/input/websocket/input.go +++ b/x-pack/filebeat/input/websocket/input.go @@ -155,9 +155,10 @@ func (i input) run(env v2.Context, src *source, cursor map[string]interface{}, p func (i *input) processAndPublishData(ctx context.Context, metrics *inputMetrics, prg cel.Program, ast *cel.Ast, state map[string]interface{}, cursor map[string]interface{}, pub inputcursor.Publisher, log *logp.Logger) error { goodCursor := cursor + log.Debugw("cel engine state before eval", logp.Namespace("websocket"), "state", redactor{state: state, cfg: i.cfg.Redact}) start := i.now().In(time.UTC) state, err := evalWith(ctx, prg, ast, state, start) - log.Debugw("cel engine eval state", logp.Namespace("websocket"), "state", redactor{state: state, cfg: i.cfg.Redact}) + log.Debugw("cel engine state after eval", logp.Namespace("websocket"), "state", redactor{state: state, cfg: i.cfg.Redact}) if err != nil { metrics.celEvalErrors.Add(1) switch { From c68ad525cb71030a87e4dc7f85d0a61dca64cb50 Mon Sep 17 00:00:00 2001 From: Shourie Ganguly Date: Mon, 12 Feb 2024 10:09:36 +0530 Subject: [PATCH 25/25] updated URL config docs --- x-pack/filebeat/input/websocket/config.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/websocket/config.go b/x-pack/filebeat/input/websocket/config.go index a3b53d119b34..1a961f3c1625 100644 --- a/x-pack/filebeat/input/websocket/config.go +++ b/x-pack/filebeat/input/websocket/config.go @@ -24,9 +24,9 @@ type config struct { // be overwritten by any stored cursor, but will be // available if no stored cursor exists. State map[string]interface{} `config:"state"` - // Auth is the authentication config for connection + // Auth is the authentication config for connection. Auth authConfig `config:"auth"` - // Resource + // URL is the websocket url to connect to. URL *urlConfig `config:"url" validate:"required"` // Redact is the debug log state redaction configuration. Redact *redact `config:"redact"`