From 6d264611e256a5b15398409d410c76887daaec5f Mon Sep 17 00:00:00 2001 From: Sean Cunningham Date: Mon, 8 Sep 2025 12:56:25 -0400 Subject: [PATCH] Support extractions in compiler. Replace node scope plugin with default. Node scope plugin moved to machine. --- pkg/ast/ast.go | 10 +++-- pkg/ast/ast_log.go | 90 +++++++++++++++++++++++++++------------- pkg/ast/ast_test.go | 4 ++ pkg/compiler/compiler.go | 4 +- pkg/compiler/plugin.go | 8 ++-- pkg/parser/parse.go | 9 ++++ pkg/parser/tree.go | 44 ++++++++++++++++++-- pkg/schema/schema.go | 1 + pkg/testdata/rules.go | 26 ++++++++++++ 9 files changed, 155 insertions(+), 41 deletions(-) diff --git a/pkg/ast/ast.go b/pkg/ast/ast.go index 0ce8f59..f39a47e 100644 --- a/pkg/ast/ast.go +++ b/pkg/ast/ast.go @@ -66,13 +66,17 @@ type AstNegateOptsT struct { Absolute bool `json:"absolute"` } +type AstExtractT struct { + Name string `json:"name"` + JqValue string `json:"jq_value,omitempty"` + RegexValue string `json:"regex_value,omitempty"` +} + type AstFieldT struct { Field string `json:"field"` - StrValue string `json:"str_value"` - JsonValue string `json:"json_value"` - RegexValue string `json:"regex_value"` TermValue match.TermT `json:"term_value"` NegateOpts *AstNegateOptsT `json:"negate_opts"` + Extracts []AstExtractT `json:"extracts"` } type AstEventT struct { diff --git a/pkg/ast/ast_log.go b/pkg/ast/ast_log.go index fbaf406..53e5125 100644 --- a/pkg/ast/ast_log.go +++ b/pkg/ast/ast_log.go @@ -13,13 +13,17 @@ import ( var ( ErrSeqPosConditions = errors.New("sequences require two or more positive conditions") ErrMissingScalar = errors.New("missing string, jq, or regex condition") + ErrExtractTerm = errors.New("invalid extract (must have name and one of jq or regex)") + ErrNegateCount = errors.New("negate fields cannot have count > 1") + ErrExtractNegate = errors.New("negate fields cannot have extracts") ) type AstLogMatcherT struct { - Event AstEventT - Match []AstFieldT - Negate []AstFieldT - Window time.Duration + Event AstEventT + Match []AstFieldT + Negate []AstFieldT + Correlations []string + Window time.Duration } func validateLogSeq(n *parser.NodeT, matches int) error { @@ -86,15 +90,7 @@ func (b *builderT) buildLogMatcherNode(parserNode *parser.NodeT, machineAddress // Count match fields and remember values for _, field := range match.Match.Fields { - if field.Count > 1 { - for i := 0; i < field.Count; i++ { - if term, err = newMatchTerm(field); err != nil { - zlog.Error().Err(err).Msg("Invalid match field term") - return nil, parserNode.WrapError(err) - } - matchFields = append(matchFields, term) - } - } else { + for range max(field.Count, 1) { if term, err = newMatchTerm(field); err != nil { zlog.Error().Err(err).Msg("Invalid match field term") return nil, parserNode.WrapError(err) @@ -106,20 +102,17 @@ func (b *builderT) buildLogMatcherNode(parserNode *parser.NodeT, machineAddress // Count negate fields and remember values for _, field := range match.Negate.Fields { if field.Count > 1 { - for range field.Count { - if term, err = newNegateTerm(field, uint32(len(match.Negate.Fields))); err != nil { - zlog.Error().Err(err).Msg("Invalid negate field term") - return nil, parserNode.WrapError(err) - } - negateFields = append(negateFields, term) - } - } else { - if term, err = newNegateTerm(field, uint32(len(match.Negate.Fields))); err != nil { - zlog.Error().Err(err).Msg("Invalid negate field term") - return nil, parserNode.WrapError(err) - } - negateFields = append(negateFields, term) + err = ErrNegateCount + zlog.Error().Err(err).Int("count", field.Count).Msg("Negate field with count > 1") + return nil, parserNode.WrapError(err) + } + if term, err = newNegateTerm(field, uint32(len(match.Negate.Fields))); err != nil { + zlog.Error().Err(err).Msg("Invalid negate field term") + return nil, parserNode.WrapError(err) + } + negateFields = append(negateFields, term) + } } @@ -153,9 +146,10 @@ func (b *builderT) doBuildLogMatcherNode(parserNode *parser.NodeT, machineAddres Origin: parserNode.Metadata.Event.Origin, Source: parserNode.Metadata.Event.Source, }, - Match: matchFields, - Negate: negateFields, - Window: parserNode.Metadata.Window, + Match: matchFields, + Negate: negateFields, + Window: parserNode.Metadata.Window, + Correlations: parserNode.Metadata.Correlations, } return matchNode, nil @@ -171,6 +165,15 @@ func newMatchTerm(field parser.FieldT) (AstFieldT, error) { Field: field.Field, } + if len(field.Extract) > 0 { + extracts, err := extractTerms(field.Extract) + if err != nil { + return AstFieldT{}, err + } + + t.Extracts = extracts + } + if field.StrValue != "" { t.TermValue = match.TermT{ Type: match.TermRaw, @@ -208,6 +211,11 @@ func newNegateTerm(field parser.FieldT, anchors uint32) (AstFieldT, error) { err error ) + if len(field.Extract) > 0 { + log.Error().Msg("Negate terms cannot have extracts") + return AstFieldT{}, ErrExtractNegate + } + if t, err = newMatchTerm(field); err != nil { return AstFieldT{}, err } @@ -228,3 +236,27 @@ func newNegateTerm(field parser.FieldT, anchors uint32) (AstFieldT, error) { return t, nil } + +func extractTerms(terms []parser.ExtractT) ([]AstExtractT, error) { + var extracts []AstExtractT + for _, term := range terms { + var ( + cnt int + e = AstExtractT{Name: term.Name} + ) + + if term.RegexValue != "" { + cnt++ + e.RegexValue = term.RegexValue + } + if term.JqValue != "" { + cnt++ + e.JqValue = term.JqValue + } + if cnt != 1 { + return nil, ErrExtractTerm + } + extracts = append(extracts, e) + } + return extracts, nil +} diff --git a/pkg/ast/ast_test.go b/pkg/ast/ast_test.go index f8da6c5..d87fd7d 100644 --- a/pkg/ast/ast_test.go +++ b/pkg/ast/ast_test.go @@ -65,6 +65,10 @@ func TestAstSuccess(t *testing.T) { rule: testdata.TestSuccessNegateOptions2, expectedNodeTypes: []string{"machine_seq", "log_seq", "log_set", "log_set"}, }, + "Success_Extract1": { + rule: testdata.TestSuccessSimpleExtraction, + expectedNodeTypes: []string{"machine_seq", "log_seq"}, + }, } for name, test := range tests { diff --git a/pkg/compiler/compiler.go b/pkg/compiler/compiler.go index de66dbf..b07db66 100644 --- a/pkg/compiler/compiler.go +++ b/pkg/compiler/compiler.go @@ -17,7 +17,7 @@ var ( ) var ( - defaultPlugin = &NodePlugin{} + defaultPlugin = NewDefaultPlugin() defaultRuntime = &NoopRuntime{} ) @@ -77,7 +77,7 @@ func WithPlugin(scope string, plugin PluginI) CompilerOptT { func parseOpts(opts []CompilerOptT) compilerOptsT { o := compilerOptsT{ - plugins: map[string]PluginI{"node": defaultPlugin}, + plugins: map[string]PluginI{schema.ScopeDefault: defaultPlugin}, runtime: defaultRuntime, } for _, opt := range opts { diff --git a/pkg/compiler/plugin.go b/pkg/compiler/plugin.go index f1c04d4..8d01d55 100644 --- a/pkg/compiler/plugin.go +++ b/pkg/compiler/plugin.go @@ -6,13 +6,13 @@ import ( "github.com/rs/zerolog/log" ) -type NodePlugin struct{} +type DefaultPlugin struct{} -func NewNodePlugin() *NodePlugin { - return &NodePlugin{} +func NewDefaultPlugin() *DefaultPlugin { + return &DefaultPlugin{} } -func (p *NodePlugin) Compile(runtime RuntimeI, node *ast.AstNodeT) (ObjsT, error) { +func (p *DefaultPlugin) Compile(runtime RuntimeI, node *ast.AstNodeT) (ObjsT, error) { var ( objs = make(ObjsT, 0) diff --git a/pkg/parser/parse.go b/pkg/parser/parse.go index f3dd396..30c72bc 100644 --- a/pkg/parser/parse.go +++ b/pkg/parser/parse.go @@ -101,6 +101,7 @@ type ParseTermT struct { Set *ParseSetT `yaml:"set,omitempty"` Sequence *ParseSequenceT `yaml:"sequence,omitempty"` NegateOpts *ParseNegateOptsT `yaml:",inline,omitempty"` + Extract []ParseExtractT `yaml:"extract,omitempty"` } type ParseSetT struct { @@ -111,6 +112,12 @@ type ParseSetT struct { Negate []ParseTermT `yaml:"negate,omitempty"` } +type ParseExtractT struct { + Name string `yaml:"name"` + JqValue string `yaml:"jq,omitempty"` + RegexValue string `yaml:"regex,omitempty"` +} + func (o *ParseTermT) UnmarshalYAML(unmarshal func(any) error) error { var str string if err := unmarshal(&str); err == nil { @@ -126,6 +133,7 @@ func (o *ParseTermT) UnmarshalYAML(unmarshal func(any) error) error { Set *ParseSetT `yaml:"set,omitempty"` Sequence *ParseSequenceT `yaml:"sequence,omitempty"` NegateOpts *ParseNegateOptsT `yaml:",inline,omitempty"` + Extract []ParseExtractT `yaml:"extract,omitempty"` } if err := unmarshal(&temp); err != nil { return err @@ -138,6 +146,7 @@ func (o *ParseTermT) UnmarshalYAML(unmarshal func(any) error) error { o.Set = temp.Set o.Sequence = temp.Sequence o.NegateOpts = temp.NegateOpts + o.Extract = temp.Extract return nil } diff --git a/pkg/parser/tree.go b/pkg/parser/tree.go index abf430a..54f15ae 100644 --- a/pkg/parser/tree.go +++ b/pkg/parser/tree.go @@ -33,11 +33,13 @@ var ( ErrInvalidCreId = errors.New("invalid cre id") ErrInvalidRuleId = errors.New("invalid rule id (must be base58)") ErrInvalidRuleHash = errors.New("invalid rule hash (must be base58)") + ErrExtractName = errors.New("invalid extract name (alphanumeric and underscores only)") ) var ( - validCreIdRegex = regexp.MustCompile(`^[A-Za-z0-9-]{4,}$`) - validBase58IdRegex = regexp.MustCompile(`^[1-9A-Za-z]{12,}$`) + validCreIdRegex = regexp.MustCompile(`^[A-Za-z0-9-]{4,}$`) + validBase58IdRegex = regexp.MustCompile(`^[1-9A-Za-z]{12,}$`) + validateExtractName = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_]*$`) ) type TreeT struct { @@ -74,6 +76,12 @@ type NegateOptsT struct { Absolute bool `json:"absolute"` } +type ExtractT struct { + Name string `json:"name"` + JqValue string `json:"jq_value,omitempty"` + RegexValue string `json:"regex_value,omitempty"` +} + type FieldT struct { Field string `json:"field"` StrValue string `json:"value"` @@ -81,6 +89,7 @@ type FieldT struct { RegexValue string `json:"regex_value"` Count int `json:"count"` NegateOpts *NegateOptsT `json:"negate"` + Extract []ExtractT `json:"extract,omitempty"` } type TermsT struct { @@ -108,6 +117,10 @@ func isValidCreId(s string) bool { return validCreIdRegex.MatchString(s) } +func isValidExtractName(s string) bool { + return validateExtractName.MatchString(s) +} + func initNode(ruleId, ruleHash string, creId string, yn *yaml.Node) (*NodeT, error) { if ruleId == "" { @@ -481,6 +494,23 @@ func nodeFromTerm(parent *NodeT, termsT map[string]ParseTermT, term ParseTermT, return node, nil } +func extractTerms(terms []ParseExtractT) ([]ExtractT, error) { + var extracts []ExtractT + for _, term := range terms { + + if !isValidExtractName(term.Name) { + return nil, ErrExtractName + } + + extracts = append(extracts, ExtractT{ + Name: term.Name, + JqValue: term.JqValue, + RegexValue: term.RegexValue, + }) + } + return extracts, nil +} + func negateOpts(term ParseTermT) (*NegateOptsT, error) { var ( opts = &NegateOptsT{} @@ -581,22 +611,30 @@ func buildPosNegChildren(node *NodeT, termsT map[string]ParseTermT, matches, neg func parseValue(term ParseTermT, negate bool) (*MatcherT, error) { var ( + err error matcher = &MatcherT{} ) switch negate { case false: + var extracts []ExtractT + if len(term.Extract) > 0 { + if extracts, err = extractTerms(term.Extract); err != nil { + return nil, err + } + } + matcher.Match.Fields = append(matcher.Match.Fields, FieldT{ Field: term.Field, StrValue: term.StrValue, JqValue: term.JqValue, RegexValue: term.RegexValue, Count: term.Count, + Extract: extracts, }) case true: var ( - err error opts *NegateOptsT ) diff --git a/pkg/schema/schema.go b/pkg/schema/schema.go index 1d2dbd0..291bf67 100644 --- a/pkg/schema/schema.go +++ b/pkg/schema/schema.go @@ -4,6 +4,7 @@ const ( ScopeOrganization = "organization" ScopeCluster = "cluster" ScopeNode = "node" + ScopeDefault = "default" ) type NodeTypeT string diff --git a/pkg/testdata/rules.go b/pkg/testdata/rules.go index ba4ec9d..27831b1 100644 --- a/pkg/testdata/rules.go +++ b/pkg/testdata/rules.go @@ -336,6 +336,32 @@ terms: - value: "Killing" ` +var TestSuccessSimpleExtraction = ` +rules: + - cre: + id: TestSuccessSimpleExtraction + metadata: + id: "J7uRQTGpGMyL1iFpssnBeS" + hash: "rdJLgqYgkEp8jg8Qks1qiq" + generation: 1 + rule: + sequence: + window: 30s + event: + source: log + correlations: + - corr1 + order: + - value: "term1" + extract: + - name: "corr1" + jq: ".field1" + - value: "term2" + extract: + - name: "corr1" + jq: ".field1" +` + /* Failure cases */ var TestFailTypo = ` # Line 1 starts here rules: