Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: removes multiline from default regex modifiers [breaking] #876

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ dictionaries to reduce memory consumption in deployments that launch several cor
instances. For more context check [this issue](https://github.com/corazawaf/coraza-caddy/issues/76)
* `no_fs_access` - indicates that the target environment has no access to FS in order to not leverage OS' filesystem related functionality e.g. file body buffers.
* `coraza.rule.case_sensitive_args_keys` - enables case-sensitive matching for ARGS keys, aligning Coraza behavior with RFC 3986 specification. It will be enabled by default in the next major version.
* `coraza.rule.no_regex_multiline` - disables enabling by default regexes multiline modifiers in `@rx` operator. It aligns with CRS expected behavior, reduces false positives and might improve performances. No multiline regexes by default will be enabled in the next major version. For more context check [this PR](https://github.com/corazawaf/coraza/pull/876)

## E2E Testing

Expand Down
8 changes: 8 additions & 0 deletions internal/operators/multilineregex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build coraza.rule.no_regex_multiline

package operators

var shouldNotUseMultilineRegexesOperatorByDefault = true
8 changes: 8 additions & 0 deletions internal/operators/multilineregex_default.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright 2024 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build !coraza.rule.no_regex_multiline

package operators

var shouldNotUseMultilineRegexesOperatorByDefault = false
17 changes: 13 additions & 4 deletions internal/operators/rx.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,19 @@ type rx struct {
var _ plugintypes.Operator = (*rx)(nil)

func newRX(options plugintypes.OperatorOptions) (plugintypes.Operator, error) {
// (?sm) enables multiline and dotall mode, required by some CRS rules and matching ModSec behavior, see
// - https://stackoverflow.com/a/27680233
// - https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E
data := fmt.Sprintf("(?sm)%s", options.Arguments)
var data string
if shouldNotUseMultilineRegexesOperatorByDefault {
// (?s) enables dotall mode, required by some CRS rules and matching ModSec behavior, see
// - https://github.com/google/re2/wiki/Syntax
// - Flag usage: https://groups.google.com/g/golang-nuts/c/jiVdamGFU9E
data = fmt.Sprintf("(?s)%s", options.Arguments)
} else {
// TODO: deprecate multiline modifier set by default in Coraza v4
// CRS rules will explicitly set the multiline modifier when needed
// Having it enabled by default can lead to false positives and less performance
// See https://github.com/corazawaf/coraza/pull/876
data = fmt.Sprintf("(?sm)%s", options.Arguments)
}

if matchesArbitraryBytes(data) {
// Use binary regex matcher if expression matches non-utf8 bytes. The binary matcher does
Expand Down
119 changes: 119 additions & 0 deletions internal/operators/rx_no_multiline_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build coraza.rule.no_regex_multiline

package operators

import (
"fmt"
"testing"

"github.com/corazawaf/coraza/v3/experimental/plugins/plugintypes"
"github.com/corazawaf/coraza/v3/internal/corazawaf"
)

func TestRx(t *testing.T) {
tests := []struct {
pattern string
input string
want bool
}{
{
pattern: "som(.*)ta",
input: "somedata",
want: true,
},
{
pattern: "som(.*)ta",
input: "notdata",
want: false,
},
{
pattern: "ハロー",
input: "ハローワールド",
want: true,
},
{
pattern: "ハロー",
input: "グッバイワールド",
want: false,
},
{
pattern: `\xac\xed\x00\x05`,
input: "\xac\xed\x00\x05t\x00\x04test",
want: true,
},
{
pattern: `\xac\xed\x00\x05`,
input: "\xac\xed\x00t\x00\x04test",
want: false,
},
{
// Requires dotall
pattern: `hello.*world`,
input: "hello\nworld",
want: true,
},
{
// Requires multiline disabled by default
pattern: `^hello.*world`,
input: "test\nhello\nworld",
want: false,
},
{
// Makes sure multiline can be enabled by the user
pattern: `(?m)^hello.*world`,
input: "test\nhello\nworld",
want: true,
},
{
// Makes sure, (?s) passed by the user does not
// break the regex.
pattern: `(?s)hello.*world`,
input: "hello\nworld",
want: true,
},
{
// Make sure user flags are also applied
pattern: `(?i)hello.*world`,
input: "testHELLO\nworld",
want: true,
},
{
// The so called DOLLAR_ENDONLY modifier in PCRE2 is meant to tweak the meaning of dollar '$'
// so that it matches only at the very end of the string (see: https://www.pcre.org/current/doc/html/pcre2pattern.html#SEC6)
// It seems that re2 already behaves like that by default.
pattern: `123$`,
input: "123\n",
want: false,
},
{
// Dollar endonly match
pattern: `123$`,
input: "test123",
want: true,
},
}

for _, tc := range tests {
tt := tc
t.Run(fmt.Sprintf("%s/%s", tt.pattern, tt.input), func(t *testing.T) {

opts := plugintypes.OperatorOptions{
Arguments: tt.pattern,
}
rx, err := newRX(opts)
if err != nil {
t.Error(err)
}
waf := corazawaf.NewWAF()
tx := waf.NewTransaction()
tx.Capture = true
res := rx.Evaluate(tx, tt.input)
if res != tt.want {
t.Errorf("want %v, got %v", tt.want, res)
}
})
}
}
2 changes: 2 additions & 0 deletions internal/operators/rx_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

//go:build !coraza.rule.no_regex_multiline

package operators

import (
Expand Down
12 changes: 11 additions & 1 deletion magefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ func Test() error {
return err
}

// Execute FTW tests with coraza.rule.no_regex_multiline as well
if err := sh.RunV("go", "test", "-tags=coraza.rule.no_regex_multiline", "./testing/coreruleset"); err != nil {
return err
}

if err := sh.RunV("go", "test", "-tags=coraza.rule.no_regex_multiline", "-run=^TestRx", "./..."); err != nil {
return err
}

if err := sh.RunV("go", "test", "-tags=coraza.rule.case_sensitive_args_keys", "-run=^TestCaseSensitive", "./..."); err != nil {
return err
}
Expand Down Expand Up @@ -174,7 +183,7 @@ func Coverage() error {
if err := sh.RunV("go", "test", tagsCmd, "-coverprofile=build/coverage-ftw.txt", "-covermode=atomic", "-coverpkg=./...", "./testing/coreruleset"); err != nil {
return err
}
// we run tinygo tag only if memoize_builders is is not enabled
// we run tinygo tag only if memoize_builders is not enabled
if !strings.Contains(tags, "memoize_builders") {
if tagsCmd != "" {
tagsCmd += ",tinygo"
Expand Down Expand Up @@ -271,6 +280,7 @@ func combinations(tags []string) []string {
func TagsMatrix() error {
tags := []string{
"coraza.rule.case_sensitive_args_keys",
"coraza.rule.no_regex_multiline",
"memoize_builders",
"coraza.rule.multiphase_valuation",
"no_fs_access",
Expand Down
Loading