diff --git a/.github/workflows/tinygo.yml b/.github/workflows/tinygo.yml index 43ba052b9..b714ce1b2 100644 --- a/.github/workflows/tinygo.yml +++ b/.github/workflows/tinygo.yml @@ -47,3 +47,6 @@ jobs: - name: Tests run: tinygo test ./... + + - name: Tests memoize + run: tinygo test -tags=memoize_builders ./... diff --git a/README.md b/README.md index e57e5a53f..8edb6ddc6 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,9 @@ have compatibility guarantees across minor versions - use with care. the operator with `plugins.RegisterOperator` to reduce binary size / startup overhead. * `coraza.rule.multiphase_valuation` - enables evaluation of rule variables in the phases that they are ready, not only the phase the rule is defined for. +* `memoize_builders` - enables memoization of builders for regex and aho-corasick +dictionaries to reduce memory consumption in deployments that launch several coraza +instances. For more context check [this issue](https://github.com/corazawaf/coraza-caddy/issues/76) ## E2E Testing diff --git a/go.mod b/go.mod index c779790a6..59da50095 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9 github.com/tidwall/gjson v1.14.4 golang.org/x/net v0.11.0 + golang.org/x/sync v0.1.0 rsc.io/binaryregexp v0.2.0 ) diff --git a/go.sum b/go.sum index bf2237977..12f51696a 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,7 @@ golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index 7616ccaca..14016b957 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -15,6 +15,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/memoize" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -456,7 +457,12 @@ func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) e var re *regexp.Regexp if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { key = key[1 : len(key)-1] - re = regexp.MustCompile(key) + + if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { + return err + } else { + re = vare.(*regexp.Regexp) + } } if multiphaseEvaluation { @@ -521,7 +527,11 @@ func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error { var re *regexp.Regexp if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { key = key[1 : len(key)-1] - re = regexp.MustCompile(key) + if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { + return err + } else { + re = vare.(*regexp.Regexp) + } } // Prevent sigsev if r == nil { diff --git a/internal/memoize/README.md b/internal/memoize/README.md new file mode 100644 index 000000000..5ebe09aba --- /dev/null +++ b/internal/memoize/README.md @@ -0,0 +1,17 @@ +# Memoize + +Memoize allows to cache certain expensive function calls and +cache the result. The main advantage in Coraza is to memoize +the regexes and aho-corasick dictionaries when the connects +spins up more than one WAF in the same process and hence same +regexes are being compiled over and over. + +Currently it is opt-in under the `memoize_builders` build tag +as under a misuse (e.g. using after build time) it could lead +to a memory leak as currently the cache is global. + +**Important:** Connectors with *live reload* functionality (e.g. Caddy) +could lead to memory leaks which might or might not be negligible in +most of the cases as usually config changes in a WAF are about a few +rules, this is old objects will be still alive in memory until the program +stops. diff --git a/internal/memoize/noop.go b/internal/memoize/noop.go new file mode 100644 index 000000000..959408aff --- /dev/null +++ b/internal/memoize/noop.go @@ -0,0 +1,10 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !memoize_builders + +package memoize + +func Do(_ string, fn func() (interface{}, error)) (interface{}, error) { + return fn() +} diff --git a/internal/memoize/nosync.go b/internal/memoize/nosync.go new file mode 100644 index 000000000..d238244d9 --- /dev/null +++ b/internal/memoize/nosync.go @@ -0,0 +1,36 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build tinygo && memoize_builders + +package memoize + +import "sync" + +var doer = makeDoer(new(sync.Map)) + +// Do executes and returns the results of the given function, unless there was a cached +// value of the same key. Only one execution is in-flight for a given key at a time. +// The boolean return value indicates whether v was previously stored. +func Do(key string, fn func() (interface{}, error)) (interface{}, error) { + value, err, _ := doer(key, fn) + return value, err +} + +// makeDoer returns a function that executes and returns the results of the given function +func makeDoer(cache *sync.Map) func(string, func() (interface{}, error)) (interface{}, error, bool) { + return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { + // Check cache + value, found := cache.Load(key) + if found { + return value, nil, true + } + + data, err := fn() + if err == nil { + cache.Store(key, data) + } + + return data, err, false + } +} diff --git a/internal/memoize/nosync_test.go b/internal/memoize/nosync_test.go new file mode 100644 index 000000000..c942a522c --- /dev/null +++ b/internal/memoize/nosync_test.go @@ -0,0 +1,167 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "errors" + "sync" + "testing" +) + +func TestDo(t *testing.T) { + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err := Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // Second call on same key SHOULD be cached + result, err = Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err = Do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } +} + +func TestSuccessCall(t *testing.T) { + do := makeDoer(new(sync.Map)) + + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err, cached := do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call on same key SHOULD be cached + result, err, cached = do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err, cached = do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} + +func TestFailedCall(t *testing.T) { + do := makeDoer(new(sync.Map)) + + calls := 0 + + // This function will fail IFF it has not been called before. + twoForTheMoney := func() (interface{}, error) { + calls++ + + if calls == 1 { + return calls, errors.New("Try again") + } else { + return calls, nil + } + } + + // First call should fail, and not be cached + result, err, cached := do("key1", twoForTheMoney) + if err == nil { + t.Fatalf("expected error") + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call should succeed, and not be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Third call should succeed, and be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} diff --git a/internal/memoize/sync.go b/internal/memoize/sync.go new file mode 100644 index 000000000..a8d5c9610 --- /dev/null +++ b/internal/memoize/sync.go @@ -0,0 +1,47 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "sync" + + "golang.org/x/sync/singleflight" +) + +var doer = makeDoer(new(sync.Map), new(singleflight.Group)) + +// Do executes and returns the results of the given function, unless there was a cached +// value of the same key. Only one execution is in-flight for a given key at a time. +// The boolean return value indicates whether v was previously stored. +func Do(key string, fn func() (interface{}, error)) (interface{}, error) { + value, err, _ := doer(key, fn) + return value, err +} + +// makeDoer returns a function that executes and returns the results of the given function +func makeDoer(cache *sync.Map, group *singleflight.Group) func(string, func() (interface{}, error)) (interface{}, error, bool) { + return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { + // Check cache + value, found := cache.Load(key) + if found { + return value, nil, true + } + + // Combine memoized function with a cache store + value, err, _ := group.Do(key, func() (interface{}, error) { + data, innerErr := fn() + if innerErr == nil { + cache.Store(key, data) + } + + return data, innerErr + }) + + return value, err, false + } +} diff --git a/internal/memoize/sync_test.go b/internal/memoize/sync_test.go new file mode 100644 index 000000000..d995d1d41 --- /dev/null +++ b/internal/memoize/sync_test.go @@ -0,0 +1,169 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "errors" + "sync" + "testing" + + "golang.org/x/sync/singleflight" +) + +func TestDo(t *testing.T) { + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err := Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // Second call on same key SHOULD be cached + result, err = Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err = Do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } +} + +func TestSuccessCall(t *testing.T) { + do := makeDoer(new(sync.Map), &singleflight.Group{}) + + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err, cached := do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call on same key SHOULD be cached + result, err, cached = do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err, cached = do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} + +func TestFailedCall(t *testing.T) { + do := makeDoer(new(sync.Map), &singleflight.Group{}) + + calls := 0 + + // This function will fail IFF it has not been called before. + twoForTheMoney := func() (interface{}, error) { + calls++ + + if calls == 1 { + return calls, errors.New("Try again") + } else { + return calls, nil + } + } + + // First call should fail, and not be cached + result, err, cached := do("key1", twoForTheMoney) + if err == nil { + t.Fatalf("expected error") + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call should succeed, and not be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Third call should succeed, and be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} diff --git a/internal/operators/from_file.go b/internal/operators/from_file.go index cc4a7bacd..4d011c1af 100644 --- a/internal/operators/from_file.go +++ b/internal/operators/from_file.go @@ -10,15 +10,15 @@ import ( "path" ) -var errEmptyPaths = errors.New("empty paths") +var errEmptyDirs = errors.New("empty dirs") -func loadFromFile(filepath string, paths []string, root fs.FS) ([]byte, error) { +func loadFromFile(filepath string, dirs []string, root fs.FS) ([]byte, error) { if path.IsAbs(filepath) { return fs.ReadFile(root, filepath) } - if len(paths) == 0 { - return nil, errEmptyPaths + if len(dirs) == 0 { + return nil, errEmptyDirs } // handling files by operators is hard because we must know the paths where we can @@ -29,7 +29,7 @@ func loadFromFile(filepath string, paths []string, root fs.FS) ([]byte, error) { err error ) - for _, p := range paths { + for _, p := range dirs { absFilepath := path.Join(p, filepath) content, err = fs.ReadFile(root, absFilepath) if err != nil { diff --git a/internal/operators/from_file_test.go b/internal/operators/from_file_test.go index 228f3983b..b5c642725 100644 --- a/internal/operators/from_file_test.go +++ b/internal/operators/from_file_test.go @@ -29,7 +29,7 @@ func getTestFile(t *testing.T) (string, string) { func TestLoadFromFileNoPaths(t *testing.T) { _, err := loadFromFile("non-existing-file", nil, io.OSFS{}) if err == nil { - t.Errorf("expected error: %s", errEmptyPaths.Error()) + t.Errorf("expected error: %s", errEmptyDirs.Error()) } } diff --git a/internal/operators/pm.go b/internal/operators/pm.go index df7ada7ee..8c78cc35b 100644 --- a/internal/operators/pm.go +++ b/internal/operators/pm.go @@ -11,6 +11,7 @@ import ( ahocorasick "github.com/petar-dambovaliev/aho-corasick" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) type pm struct { @@ -31,8 +32,9 @@ func newPM(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { DFA: true, }) + m, _ := memoize.Do(data, func() (interface{}, error) { return builder.Build(dict), nil }) // TODO this operator is supposed to support snort data syntax: "@pm A|42|C|44|F" - return &pm{matcher: builder.Build(dict)}, nil + return &pm{matcher: m.(ahocorasick.AhoCorasick)}, nil } func (o *pm) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/operators/pm_from_dataset.go b/internal/operators/pm_from_dataset.go index 9e3cc5385..fc4d57594 100644 --- a/internal/operators/pm_from_dataset.go +++ b/internal/operators/pm_from_dataset.go @@ -11,6 +11,7 @@ import ( ahocorasick "github.com/petar-dambovaliev/aho-corasick" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) func newPMFromDataset(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { @@ -26,7 +27,9 @@ func newPMFromDataset(options plugintypes.OperatorOptions) (plugintypes.Operator DFA: true, }) - return &pm{matcher: builder.Build(dataset)}, nil + m, _ := memoize.Do(data, func() (interface{}, error) { return builder.Build(dataset), nil }) + + return &pm{matcher: m.(ahocorasick.AhoCorasick)}, nil } func init() { diff --git a/internal/operators/pm_from_file.go b/internal/operators/pm_from_file.go index d15b934e0..035a448e4 100644 --- a/internal/operators/pm_from_file.go +++ b/internal/operators/pm_from_file.go @@ -13,12 +13,13 @@ import ( ahocorasick "github.com/petar-dambovaliev/aho-corasick" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) func newPMFromFile(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { - path := options.Arguments + filepath := options.Arguments - data, err := loadFromFile(path, options.Path, options.Root) + data, err := loadFromFile(filepath, options.Path, options.Root) if err != nil { return nil, err } @@ -44,7 +45,9 @@ func newPMFromFile(options plugintypes.OperatorOptions) (plugintypes.Operator, e DFA: false, }) - return &pm{matcher: builder.Build(lines)}, nil + m, _ := memoize.Do(strings.Join(options.Path, ",")+filepath, func() (interface{}, error) { return builder.Build(lines), nil }) + + return &pm{matcher: m.(ahocorasick.AhoCorasick)}, nil } func init() { diff --git a/internal/operators/restpath.go b/internal/operators/restpath.go index 526cb35bf..f1e4a8911 100644 --- a/internal/operators/restpath.go +++ b/internal/operators/restpath.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`) @@ -30,11 +31,12 @@ func newRESTPath(options plugintypes.OperatorOptions) (plugintypes.Operator, err for _, token := range rePathTokenRe.FindAllStringSubmatch(data, -1) { data = strings.Replace(data, token[0], fmt.Sprintf("(?P<%s>.*)", token[1]), 1) } - re, err := regexp.Compile(data) + + re, err := memoize.Do(data, func() (interface{}, error) { return regexp.Compile(data) }) if err != nil { return nil, err } - return &restpath{re: re}, nil + return &restpath{re: re.(*regexp.Regexp)}, nil } func (o *restpath) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/operators/rx.go b/internal/operators/rx.go index 147890e1c..e801c9f72 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -14,6 +14,7 @@ import ( "rsc.io/binaryregexp" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) type rx struct { @@ -35,15 +36,14 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { return newBinaryRX(options) } - re, err := regexp.Compile(data) + re, err := memoize.Do(data, func() (interface{}, error) { return regexp.Compile(data) }) if err != nil { return nil, err } - return &rx{re: re}, nil + return &rx{re: re.(*regexp.Regexp)}, nil } func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool { - if tx.Capturing() { match := o.re.FindStringSubmatch(value) if len(match) == 0 { @@ -72,15 +72,14 @@ var _ plugintypes.Operator = (*binaryRX)(nil) func newBinaryRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { data := options.Arguments - re, err := binaryregexp.Compile(data) + re, err := memoize.Do(data, func() (interface{}, error) { return binaryregexp.Compile(data) }) if err != nil { return nil, err } - return &binaryRX{re: re}, nil + return &binaryRX{re: re.(*binaryregexp.Regexp)}, nil } func (o *binaryRX) Evaluate(tx plugintypes.TransactionState, value string) bool { - if tx.Capturing() { match := o.re.FindStringSubmatch(value) if len(match) == 0 { diff --git a/internal/operators/validate_nid.go b/internal/operators/validate_nid.go index ce6192a2b..383f160b6 100644 --- a/internal/operators/validate_nid.go +++ b/internal/operators/validate_nid.go @@ -12,6 +12,7 @@ import ( "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) type validateNidFunction = func(input string) bool @@ -39,11 +40,13 @@ func newValidateNID(options plugintypes.OperatorOptions) (plugintypes.Operator, default: return nil, fmt.Errorf("invalid @validateNid argument") } - re, err := regexp.Compile(expr) + + re, err := memoize.Do(expr, func() (interface{}, error) { return regexp.Compile(expr) }) if err != nil { return nil, err } - return &validateNid{fn: fn, re: re}, nil + + return &validateNid{fn: fn, re: re.(*regexp.Regexp)}, nil } func (o *validateNid) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/seclang/directives.go b/internal/seclang/directives.go index 8b0628f0a..7a158c349 100644 --- a/internal/seclang/directives.go +++ b/internal/seclang/directives.go @@ -16,6 +16,7 @@ import ( "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/internal/auditlog" "github.com/corazawaf/coraza/v3/internal/corazawaf" + "github.com/corazawaf/coraza/v3/internal/memoize" utils "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" ) @@ -731,9 +732,13 @@ func directiveSecAuditLogRelevantStatus(options *DirectiveOptions) error { return errEmptyOptions } - var err error - options.WAF.AuditLogRelevantStatus, err = regexp.Compile(options.Opts) - return err + re, err := memoize.Do(options.Opts, func() (interface{}, error) { return regexp.Compile(options.Opts) }) + if err != nil { + return err + } + + options.WAF.AuditLogRelevantStatus = re.(*regexp.Regexp) + return nil } // Description: Defines which parts of each transaction are going to be recorded diff --git a/magefile.go b/magefile.go index 840fdb16b..960f0e418 100644 --- a/magefile.go +++ b/magefile.go @@ -110,12 +110,23 @@ func Test() error { if err := sh.RunV("go", "test", "./..."); err != nil { return err } + + if err := sh.RunV("go", "test", "-tags=memoize_builders", "./..."); err != nil { + return err + } + if err := sh.RunV("go", "test", "./examples/http-server"); err != nil { return err } + if err := sh.RunV("go", "test", "./testing/coreruleset"); err != nil { return err } + + if err := sh.RunV("go", "test", "-tags=memoize_builders", "./testing/coreruleset"); err != nil { + return err + } + // Execute FTW tests with multiphase evaluation enabled as well if err := sh.RunV("go", "test", "-tags=coraza.rule.multiphase_evaluation", "./testing/coreruleset"); err != nil { return err