From 4f30afeca2b6cf48f8cd73acd80b072721172782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Carlos=20Ch=C3=A1vez?= Date: Sun, 6 Aug 2023 09:49:50 +0200 Subject: [PATCH] chore: adds memoize implementation for regexes and ahocorasick (#836) * chore: adds memoize implementation for regexes. Currently we create and allocate memory for every regex we compile, however there are cases where you compile the same regex over and over e.g. https://github.com/corazawaf/coraza-caddy/issues/76. Here we implement the memoize pattern to be able to reuse the regex and reduce the memory consumption. * docs: adds comments to code. * chore: simplify the memoize package by using sync.Map. * feat: extends memoize to ahocorasick and allow impl for tinygo but not synced as no concurrency. * tests: covers memoize_builders in tinygo. * chore: fixes nosync for tinygo. * docs: updates docs. --------- Co-authored-by: Juan Pablo Tosso --- .github/workflows/tinygo.yml | 3 + README.md | 3 + go.mod | 1 + go.sum | 1 + internal/corazawaf/rule.go | 14 ++- internal/memoize/README.md | 17 +++ internal/memoize/noop.go | 10 ++ internal/memoize/nosync.go | 36 ++++++ internal/memoize/nosync_test.go | 167 +++++++++++++++++++++++++ internal/memoize/sync.go | 47 +++++++ internal/memoize/sync_test.go | 169 ++++++++++++++++++++++++++ internal/operators/from_file.go | 10 +- internal/operators/from_file_test.go | 2 +- internal/operators/pm.go | 4 +- internal/operators/pm_from_dataset.go | 5 +- internal/operators/pm_from_file.go | 9 +- internal/operators/restpath.go | 6 +- internal/operators/rx.go | 11 +- internal/operators/validate_nid.go | 7 +- internal/seclang/directives.go | 11 +- magefile.go | 11 ++ 21 files changed, 518 insertions(+), 26 deletions(-) create mode 100644 internal/memoize/README.md create mode 100644 internal/memoize/noop.go create mode 100644 internal/memoize/nosync.go create mode 100644 internal/memoize/nosync_test.go create mode 100644 internal/memoize/sync.go create mode 100644 internal/memoize/sync_test.go diff --git a/.github/workflows/tinygo.yml b/.github/workflows/tinygo.yml index 43ba052b9..b714ce1b2 100644 --- a/.github/workflows/tinygo.yml +++ b/.github/workflows/tinygo.yml @@ -47,3 +47,6 @@ jobs: - name: Tests run: tinygo test ./... + + - name: Tests memoize + run: tinygo test -tags=memoize_builders ./... diff --git a/README.md b/README.md index e57e5a53f..8edb6ddc6 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,9 @@ have compatibility guarantees across minor versions - use with care. the operator with `plugins.RegisterOperator` to reduce binary size / startup overhead. * `coraza.rule.multiphase_valuation` - enables evaluation of rule variables in the phases that they are ready, not only the phase the rule is defined for. +* `memoize_builders` - enables memoization of builders for regex and aho-corasick +dictionaries to reduce memory consumption in deployments that launch several coraza +instances. For more context check [this issue](https://github.com/corazawaf/coraza-caddy/issues/76) ## E2E Testing diff --git a/go.mod b/go.mod index c779790a6..59da50095 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9 github.com/tidwall/gjson v1.14.4 golang.org/x/net v0.11.0 + golang.org/x/sync v0.1.0 rsc.io/binaryregexp v0.2.0 ) diff --git a/go.sum b/go.sum index bf2237977..12f51696a 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,7 @@ golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go index 7616ccaca..14016b957 100644 --- a/internal/corazawaf/rule.go +++ b/internal/corazawaf/rule.go @@ -15,6 +15,7 @@ import ( "github.com/corazawaf/coraza/v3/experimental/plugins/macro" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" "github.com/corazawaf/coraza/v3/internal/corazarules" + "github.com/corazawaf/coraza/v3/internal/memoize" "github.com/corazawaf/coraza/v3/types" "github.com/corazawaf/coraza/v3/types/variables" ) @@ -456,7 +457,12 @@ func (r *Rule) AddVariable(v variables.RuleVariable, key string, iscount bool) e var re *regexp.Regexp if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { key = key[1 : len(key)-1] - re = regexp.MustCompile(key) + + if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { + return err + } else { + re = vare.(*regexp.Regexp) + } } if multiphaseEvaluation { @@ -521,7 +527,11 @@ func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error { var re *regexp.Regexp if len(key) > 2 && key[0] == '/' && key[len(key)-1] == '/' { key = key[1 : len(key)-1] - re = regexp.MustCompile(key) + if vare, err := memoize.Do(key, func() (interface{}, error) { return regexp.Compile(key) }); err != nil { + return err + } else { + re = vare.(*regexp.Regexp) + } } // Prevent sigsev if r == nil { diff --git a/internal/memoize/README.md b/internal/memoize/README.md new file mode 100644 index 000000000..5ebe09aba --- /dev/null +++ b/internal/memoize/README.md @@ -0,0 +1,17 @@ +# Memoize + +Memoize allows to cache certain expensive function calls and +cache the result. The main advantage in Coraza is to memoize +the regexes and aho-corasick dictionaries when the connects +spins up more than one WAF in the same process and hence same +regexes are being compiled over and over. + +Currently it is opt-in under the `memoize_builders` build tag +as under a misuse (e.g. using after build time) it could lead +to a memory leak as currently the cache is global. + +**Important:** Connectors with *live reload* functionality (e.g. Caddy) +could lead to memory leaks which might or might not be negligible in +most of the cases as usually config changes in a WAF are about a few +rules, this is old objects will be still alive in memory until the program +stops. diff --git a/internal/memoize/noop.go b/internal/memoize/noop.go new file mode 100644 index 000000000..959408aff --- /dev/null +++ b/internal/memoize/noop.go @@ -0,0 +1,10 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !memoize_builders + +package memoize + +func Do(_ string, fn func() (interface{}, error)) (interface{}, error) { + return fn() +} diff --git a/internal/memoize/nosync.go b/internal/memoize/nosync.go new file mode 100644 index 000000000..d238244d9 --- /dev/null +++ b/internal/memoize/nosync.go @@ -0,0 +1,36 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build tinygo && memoize_builders + +package memoize + +import "sync" + +var doer = makeDoer(new(sync.Map)) + +// Do executes and returns the results of the given function, unless there was a cached +// value of the same key. Only one execution is in-flight for a given key at a time. +// The boolean return value indicates whether v was previously stored. +func Do(key string, fn func() (interface{}, error)) (interface{}, error) { + value, err, _ := doer(key, fn) + return value, err +} + +// makeDoer returns a function that executes and returns the results of the given function +func makeDoer(cache *sync.Map) func(string, func() (interface{}, error)) (interface{}, error, bool) { + return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { + // Check cache + value, found := cache.Load(key) + if found { + return value, nil, true + } + + data, err := fn() + if err == nil { + cache.Store(key, data) + } + + return data, err, false + } +} diff --git a/internal/memoize/nosync_test.go b/internal/memoize/nosync_test.go new file mode 100644 index 000000000..c942a522c --- /dev/null +++ b/internal/memoize/nosync_test.go @@ -0,0 +1,167 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "errors" + "sync" + "testing" +) + +func TestDo(t *testing.T) { + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err := Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // Second call on same key SHOULD be cached + result, err = Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err = Do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } +} + +func TestSuccessCall(t *testing.T) { + do := makeDoer(new(sync.Map)) + + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err, cached := do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call on same key SHOULD be cached + result, err, cached = do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err, cached = do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} + +func TestFailedCall(t *testing.T) { + do := makeDoer(new(sync.Map)) + + calls := 0 + + // This function will fail IFF it has not been called before. + twoForTheMoney := func() (interface{}, error) { + calls++ + + if calls == 1 { + return calls, errors.New("Try again") + } else { + return calls, nil + } + } + + // First call should fail, and not be cached + result, err, cached := do("key1", twoForTheMoney) + if err == nil { + t.Fatalf("expected error") + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call should succeed, and not be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Third call should succeed, and be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} diff --git a/internal/memoize/sync.go b/internal/memoize/sync.go new file mode 100644 index 000000000..a8d5c9610 --- /dev/null +++ b/internal/memoize/sync.go @@ -0,0 +1,47 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "sync" + + "golang.org/x/sync/singleflight" +) + +var doer = makeDoer(new(sync.Map), new(singleflight.Group)) + +// Do executes and returns the results of the given function, unless there was a cached +// value of the same key. Only one execution is in-flight for a given key at a time. +// The boolean return value indicates whether v was previously stored. +func Do(key string, fn func() (interface{}, error)) (interface{}, error) { + value, err, _ := doer(key, fn) + return value, err +} + +// makeDoer returns a function that executes and returns the results of the given function +func makeDoer(cache *sync.Map, group *singleflight.Group) func(string, func() (interface{}, error)) (interface{}, error, bool) { + return func(key string, fn func() (interface{}, error)) (interface{}, error, bool) { + // Check cache + value, found := cache.Load(key) + if found { + return value, nil, true + } + + // Combine memoized function with a cache store + value, err, _ := group.Do(key, func() (interface{}, error) { + data, innerErr := fn() + if innerErr == nil { + cache.Store(key, data) + } + + return data, innerErr + }) + + return value, err, false + } +} diff --git a/internal/memoize/sync_test.go b/internal/memoize/sync_test.go new file mode 100644 index 000000000..d995d1d41 --- /dev/null +++ b/internal/memoize/sync_test.go @@ -0,0 +1,169 @@ +// Copyright 2023 Juan Pablo Tosso and the OWASP Coraza contributors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !tinygo && memoize_builders + +// https://github.com/kofalt/go-memoize/blob/master/memoize.go + +package memoize + +import ( + "errors" + "sync" + "testing" + + "golang.org/x/sync/singleflight" +) + +func TestDo(t *testing.T) { + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err := Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // Second call on same key SHOULD be cached + result, err = Do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err = Do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } +} + +func TestSuccessCall(t *testing.T) { + do := makeDoer(new(sync.Map), &singleflight.Group{}) + + expensiveCalls := 0 + + // Function tracks how many times its been called + expensive := func() (interface{}, error) { + expensiveCalls++ + return expensiveCalls, nil + } + + // First call SHOULD NOT be cached + result, err, cached := do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call on same key SHOULD be cached + result, err, cached = do("key1", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // First call on a new key SHOULD NOT be cached + result, err, cached = do("key2", expensive) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} + +func TestFailedCall(t *testing.T) { + do := makeDoer(new(sync.Map), &singleflight.Group{}) + + calls := 0 + + // This function will fail IFF it has not been called before. + twoForTheMoney := func() (interface{}, error) { + calls++ + + if calls == 1 { + return calls, errors.New("Try again") + } else { + return calls, nil + } + } + + // First call should fail, and not be cached + result, err, cached := do("key1", twoForTheMoney) + if err == nil { + t.Fatalf("expected error") + } + + if want, have := 1, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Second call should succeed, and not be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := false, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } + + // Third call should succeed, and be cached + result, err, cached = do("key1", twoForTheMoney) + if err != nil { + t.Fatalf("unexpected error: %s", err.Error()) + } + + if want, have := 2, result.(int); want != have { + t.Fatalf("unexpected value, want %d, have %d", want, have) + } + + if want, have := true, cached; want != have { + t.Fatalf("unexpected caching, want %t, have %t", want, have) + } +} diff --git a/internal/operators/from_file.go b/internal/operators/from_file.go index cc4a7bacd..4d011c1af 100644 --- a/internal/operators/from_file.go +++ b/internal/operators/from_file.go @@ -10,15 +10,15 @@ import ( "path" ) -var errEmptyPaths = errors.New("empty paths") +var errEmptyDirs = errors.New("empty dirs") -func loadFromFile(filepath string, paths []string, root fs.FS) ([]byte, error) { +func loadFromFile(filepath string, dirs []string, root fs.FS) ([]byte, error) { if path.IsAbs(filepath) { return fs.ReadFile(root, filepath) } - if len(paths) == 0 { - return nil, errEmptyPaths + if len(dirs) == 0 { + return nil, errEmptyDirs } // handling files by operators is hard because we must know the paths where we can @@ -29,7 +29,7 @@ func loadFromFile(filepath string, paths []string, root fs.FS) ([]byte, error) { err error ) - for _, p := range paths { + for _, p := range dirs { absFilepath := path.Join(p, filepath) content, err = fs.ReadFile(root, absFilepath) if err != nil { diff --git a/internal/operators/from_file_test.go b/internal/operators/from_file_test.go index 228f3983b..b5c642725 100644 --- a/internal/operators/from_file_test.go +++ b/internal/operators/from_file_test.go @@ -29,7 +29,7 @@ func getTestFile(t *testing.T) (string, string) { func TestLoadFromFileNoPaths(t *testing.T) { _, err := loadFromFile("non-existing-file", nil, io.OSFS{}) if err == nil { - t.Errorf("expected error: %s", errEmptyPaths.Error()) + t.Errorf("expected error: %s", errEmptyDirs.Error()) } } diff --git a/internal/operators/pm.go b/internal/operators/pm.go index df7ada7ee..8c78cc35b 100644 --- a/internal/operators/pm.go +++ b/internal/operators/pm.go @@ -11,6 +11,7 @@ import ( ahocorasick "github.com/petar-dambovaliev/aho-corasick" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) type pm struct { @@ -31,8 +32,9 @@ func newPM(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { DFA: true, }) + m, _ := memoize.Do(data, func() (interface{}, error) { return builder.Build(dict), nil }) // TODO this operator is supposed to support snort data syntax: "@pm A|42|C|44|F" - return &pm{matcher: builder.Build(dict)}, nil + return &pm{matcher: m.(ahocorasick.AhoCorasick)}, nil } func (o *pm) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/operators/pm_from_dataset.go b/internal/operators/pm_from_dataset.go index 9e3cc5385..fc4d57594 100644 --- a/internal/operators/pm_from_dataset.go +++ b/internal/operators/pm_from_dataset.go @@ -11,6 +11,7 @@ import ( ahocorasick "github.com/petar-dambovaliev/aho-corasick" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) func newPMFromDataset(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { @@ -26,7 +27,9 @@ func newPMFromDataset(options plugintypes.OperatorOptions) (plugintypes.Operator DFA: true, }) - return &pm{matcher: builder.Build(dataset)}, nil + m, _ := memoize.Do(data, func() (interface{}, error) { return builder.Build(dataset), nil }) + + return &pm{matcher: m.(ahocorasick.AhoCorasick)}, nil } func init() { diff --git a/internal/operators/pm_from_file.go b/internal/operators/pm_from_file.go index d15b934e0..035a448e4 100644 --- a/internal/operators/pm_from_file.go +++ b/internal/operators/pm_from_file.go @@ -13,12 +13,13 @@ import ( ahocorasick "github.com/petar-dambovaliev/aho-corasick" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) func newPMFromFile(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { - path := options.Arguments + filepath := options.Arguments - data, err := loadFromFile(path, options.Path, options.Root) + data, err := loadFromFile(filepath, options.Path, options.Root) if err != nil { return nil, err } @@ -44,7 +45,9 @@ func newPMFromFile(options plugintypes.OperatorOptions) (plugintypes.Operator, e DFA: false, }) - return &pm{matcher: builder.Build(lines)}, nil + m, _ := memoize.Do(strings.Join(options.Path, ",")+filepath, func() (interface{}, error) { return builder.Build(lines), nil }) + + return &pm{matcher: m.(ahocorasick.AhoCorasick)}, nil } func init() { diff --git a/internal/operators/restpath.go b/internal/operators/restpath.go index 526cb35bf..f1e4a8911 100644 --- a/internal/operators/restpath.go +++ b/internal/operators/restpath.go @@ -11,6 +11,7 @@ import ( "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) var rePathTokenRe = regexp.MustCompile(`\{([^\}]+)\}`) @@ -30,11 +31,12 @@ func newRESTPath(options plugintypes.OperatorOptions) (plugintypes.Operator, err for _, token := range rePathTokenRe.FindAllStringSubmatch(data, -1) { data = strings.Replace(data, token[0], fmt.Sprintf("(?P<%s>.*)", token[1]), 1) } - re, err := regexp.Compile(data) + + re, err := memoize.Do(data, func() (interface{}, error) { return regexp.Compile(data) }) if err != nil { return nil, err } - return &restpath{re: re}, nil + return &restpath{re: re.(*regexp.Regexp)}, nil } func (o *restpath) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/operators/rx.go b/internal/operators/rx.go index 147890e1c..e801c9f72 100644 --- a/internal/operators/rx.go +++ b/internal/operators/rx.go @@ -14,6 +14,7 @@ import ( "rsc.io/binaryregexp" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) type rx struct { @@ -35,15 +36,14 @@ func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { return newBinaryRX(options) } - re, err := regexp.Compile(data) + re, err := memoize.Do(data, func() (interface{}, error) { return regexp.Compile(data) }) if err != nil { return nil, err } - return &rx{re: re}, nil + return &rx{re: re.(*regexp.Regexp)}, nil } func (o *rx) Evaluate(tx plugintypes.TransactionState, value string) bool { - if tx.Capturing() { match := o.re.FindStringSubmatch(value) if len(match) == 0 { @@ -72,15 +72,14 @@ var _ plugintypes.Operator = (*binaryRX)(nil) func newBinaryRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) { data := options.Arguments - re, err := binaryregexp.Compile(data) + re, err := memoize.Do(data, func() (interface{}, error) { return binaryregexp.Compile(data) }) if err != nil { return nil, err } - return &binaryRX{re: re}, nil + return &binaryRX{re: re.(*binaryregexp.Regexp)}, nil } func (o *binaryRX) Evaluate(tx plugintypes.TransactionState, value string) bool { - if tx.Capturing() { match := o.re.FindStringSubmatch(value) if len(match) == 0 { diff --git a/internal/operators/validate_nid.go b/internal/operators/validate_nid.go index ce6192a2b..383f160b6 100644 --- a/internal/operators/validate_nid.go +++ b/internal/operators/validate_nid.go @@ -12,6 +12,7 @@ import ( "strings" "github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes" + "github.com/corazawaf/coraza/v3/internal/memoize" ) type validateNidFunction = func(input string) bool @@ -39,11 +40,13 @@ func newValidateNID(options plugintypes.OperatorOptions) (plugintypes.Operator, default: return nil, fmt.Errorf("invalid @validateNid argument") } - re, err := regexp.Compile(expr) + + re, err := memoize.Do(expr, func() (interface{}, error) { return regexp.Compile(expr) }) if err != nil { return nil, err } - return &validateNid{fn: fn, re: re}, nil + + return &validateNid{fn: fn, re: re.(*regexp.Regexp)}, nil } func (o *validateNid) Evaluate(tx plugintypes.TransactionState, value string) bool { diff --git a/internal/seclang/directives.go b/internal/seclang/directives.go index 8b0628f0a..7a158c349 100644 --- a/internal/seclang/directives.go +++ b/internal/seclang/directives.go @@ -16,6 +16,7 @@ import ( "github.com/corazawaf/coraza/v3/debuglog" "github.com/corazawaf/coraza/v3/internal/auditlog" "github.com/corazawaf/coraza/v3/internal/corazawaf" + "github.com/corazawaf/coraza/v3/internal/memoize" utils "github.com/corazawaf/coraza/v3/internal/strings" "github.com/corazawaf/coraza/v3/types" ) @@ -731,9 +732,13 @@ func directiveSecAuditLogRelevantStatus(options *DirectiveOptions) error { return errEmptyOptions } - var err error - options.WAF.AuditLogRelevantStatus, err = regexp.Compile(options.Opts) - return err + re, err := memoize.Do(options.Opts, func() (interface{}, error) { return regexp.Compile(options.Opts) }) + if err != nil { + return err + } + + options.WAF.AuditLogRelevantStatus = re.(*regexp.Regexp) + return nil } // Description: Defines which parts of each transaction are going to be recorded diff --git a/magefile.go b/magefile.go index 840fdb16b..960f0e418 100644 --- a/magefile.go +++ b/magefile.go @@ -110,12 +110,23 @@ func Test() error { if err := sh.RunV("go", "test", "./..."); err != nil { return err } + + if err := sh.RunV("go", "test", "-tags=memoize_builders", "./..."); err != nil { + return err + } + if err := sh.RunV("go", "test", "./examples/http-server"); err != nil { return err } + if err := sh.RunV("go", "test", "./testing/coreruleset"); err != nil { return err } + + if err := sh.RunV("go", "test", "-tags=memoize_builders", "./testing/coreruleset"); err != nil { + return err + } + // Execute FTW tests with multiphase evaluation enabled as well if err := sh.RunV("go", "test", "-tags=coraza.rule.multiphase_evaluation", "./testing/coreruleset"); err != nil { return err