Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add quantitative testing #355

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
go-version: ^1.19
go-version: ^1.21
cache: true

- name: Run Go Tests
Expand Down
91 changes: 91 additions & 0 deletions cmd/quantitative.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2023 OWASP ModSecurity Core Rule Set Project
// SPDX-License-Identifier: Apache-2.0

package cmd

import (
"github.com/coreruleset/go-ftw/internal/quantitative"
"github.com/coreruleset/go-ftw/output"
"github.com/spf13/cobra"
"os"
)

// NewQuantitativeCmd
// Returns a new cobra command for running quantitative tests
func NewQuantitativeCmd() *cobra.Command {
runCmd := &cobra.Command{
Use: "quantitative",
Short: "Run Quantitative Tests",
fzipi marked this conversation as resolved.
Show resolved Hide resolved
Long: `Run all quantitative tests`,
RunE: runQuantitativeE,
}

runCmd.Flags().BoolP("markdown", "m", false, "Markdown table output mode")
fzipi marked this conversation as resolved.
Show resolved Hide resolved
runCmd.Flags().IntP("fast", "x", 0, "Process 1 in every X lines of input ('fast run' mode)")
fzipi marked this conversation as resolved.
Show resolved Hide resolved
runCmd.Flags().IntP("lines", "l", 0, "Number of lines of input to process before stopping")
runCmd.Flags().IntP("paranoia-level", "P", 1, "Paranoia level used to run the quantitative tests")
fzipi marked this conversation as resolved.
Show resolved Hide resolved
runCmd.Flags().IntP("number", "n", 0, "Number is the payload line from the corpus to exclusively send")
fzipi marked this conversation as resolved.
Show resolved Hide resolved
runCmd.Flags().StringP("payload", "p", "", "Payload is a string you want to test using quantitative tests. Will not use the corpus.")
runCmd.Flags().IntP("rule", "r", 0, "Rule ID of interest: only show false positives for specified rule ID")
runCmd.Flags().StringP("corpus", "c", "leipzig", "Corpus to use for the quantitative tests")
runCmd.Flags().StringP("corpus-lang", "L", "eng", "Corpus language to use for the quantitative tests.")
fzipi marked this conversation as resolved.
Show resolved Hide resolved
runCmd.Flags().StringP("corpus-size", "s", "100K", "Corpus size to use for the quantitative tests. Most corpus will have a size like \"100K\", \"1M\", etc.")
fzipi marked this conversation as resolved.
Show resolved Hide resolved
runCmd.Flags().StringP("corpus-year", "y", "2023", "Corpus year to use for the quantitative tests. Most corpus will have a year like \"2023\", \"2022\", etc.")
runCmd.Flags().StringP("corpus-source", "S", "news", "Corpus source to use for the quantitative tests. Most corpus will have a source like \"news\", \"web\", \"wikipedia\", etc.")
runCmd.Flags().StringP("directory", "d", ".", "Directory where the CRS rules are stored")
runCmd.Flags().StringP("file", "f", "", "output file path for quantitative tests. Prints to standard output by default.")
fzipi marked this conversation as resolved.
Show resolved Hide resolved
runCmd.Flags().StringP("output", "o", "normal", "output type for quantitative tests. \"normal\" is the default.")
fzipi marked this conversation as resolved.
Show resolved Hide resolved

return runCmd
}

func runQuantitativeE(cmd *cobra.Command, _ []string) error {
cmd.SilenceUsage = true

corpus, _ := cmd.Flags().GetString("corpus")
corpusSize, _ := cmd.Flags().GetString("corpus-size")
corpusLang, _ := cmd.Flags().GetString("corpus-lang")
corpusYear, _ := cmd.Flags().GetString("corpus-year")
corpusSource, _ := cmd.Flags().GetString("corpus-source")
directory, _ := cmd.Flags().GetString("directory")
fast, _ := cmd.Flags().GetInt("fast")
lines, _ := cmd.Flags().GetInt("lines")
markdown, _ := cmd.Flags().GetBool("markdown")
outputFilename, _ := cmd.Flags().GetString("file")
paranoiaLevel, _ := cmd.Flags().GetInt("paranoia-level")
payload, _ := cmd.Flags().GetString("payload")
number, _ := cmd.Flags().GetInt("number")
rule, _ := cmd.Flags().GetInt("rule")
wantedOutput, _ := cmd.Flags().GetString("output")

// use outputFile to write to file
var outputFile *os.File
var err error
if outputFilename == "" {
outputFile = os.Stdout
} else {
outputFile, err = os.Open(outputFilename)
if err != nil {
return err
}
}
out := output.NewOutput(wantedOutput, outputFile)

params := quantitative.QuantitativeParams{
Corpus: corpus,
CorpusSize: corpusSize,
CorpusYear: corpusYear,
CorpusLang: corpusLang,
CorpusSource: corpusSource,
Directory: directory,
Fast: fast,
Lines: lines,
Markdown: markdown,
ParanoiaLevel: paranoiaLevel,
Number: number,
Payload: payload,
Rule: rule,
}

return quantitative.RunQuantitativeTests(params, out)
}
61 changes: 61 additions & 0 deletions cmd/quantitative_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2023 OWASP ModSecurity Core Rule Set Project
// SPDX-License-Identifier: Apache-2.0

package cmd

import (
"context"
"github.com/spf13/cobra"
"github.com/stretchr/testify/suite"
"io/fs"
"os"
"path"
"testing"
)

var crsSetupFileContents = `# CRS Setup Configuration File`
var emptyRulesFile = `# Empty Rules File`

type quantitativeCmdTestSuite struct {
suite.Suite
tempDir string
rootCmd *cobra.Command
}

func TestQuantitativeTestSuite(t *testing.T) {
suite.Run(t, new(quantitativeCmdTestSuite))
}

func (s *quantitativeCmdTestSuite) SetupTest() {
s.rootCmd = NewRootCommand()
s.tempDir = s.T().TempDir()

err := os.MkdirAll(path.Join(s.tempDir, "rules"), fs.ModePerm)
s.Require().NoError(err)
fakeCRSSetupConf, err := os.Create(path.Join(s.tempDir, "crs-setup.conf.example"))
fzipi marked this conversation as resolved.
Show resolved Hide resolved
s.Require().NoError(err)
n, err := fakeCRSSetupConf.WriteString(crsSetupFileContents)
s.Require().NoError(err)
s.Equal(len(crsSetupFileContents), n)
err = fakeCRSSetupConf.Close()
s.Require().NoError(err)
fakeRulesFile, err := os.Create(path.Join(s.tempDir, "rules", "Rules1.conf"))
s.Require().NoError(err)
n, err = fakeRulesFile.WriteString(emptyRulesFile)
s.Require().NoError(err)
s.Equal(len(emptyRulesFile), n)
s.rootCmd.AddCommand(NewQuantitativeCmd())
}

func (s *quantitativeCmdTestSuite) TearDownTest() {
err := os.RemoveAll(s.tempDir)
s.Require().NoError(err)
}

func (s *quantitativeCmdTestSuite) TestQuantitativeCommand() {
s.rootCmd.SetArgs([]string{"quantitative", "-d", s.tempDir})
cmd, err := s.rootCmd.ExecuteContextC(context.Background())
s.Require().NoError(err, "quantitative command should not return an error")
fzipi marked this conversation as resolved.
Show resolved Hide resolved
s.Equal("quantitative", cmd.Name(), "quantitative command should have the name 'quantitative'")
s.Require().NoError(err)
}
1 change: 1 addition & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ func Execute(version string) error {
rootCmd := NewRootCommand()
rootCmd.AddCommand(NewCheckCommand())
rootCmd.AddCommand(NewRunCommand())
rootCmd.AddCommand(NewQuantitativeCmd())
rootCmd.Version = version

return rootCmd.ExecuteContext(context.Background())
Expand Down
76 changes: 76 additions & 0 deletions experimental/corpus/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Package corpus provides functionality for creating and managing corpora.
//
// A corpus is a collection of text documents that are used for training and testing machine learning models.
// The documents in a corpus are typically sentences or paragraphs of text.
//
// The corpus package provides an interface for working with corpora, as well as a set of built-in corpora
// that can be used for detecting which text will generate false positives in WAF rules.
//
// This interface includes methods for getting the URL of the corpus, getting the file from the remote URL,
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// getting an iterator for the corpus, getting the payload given a line from the corpus iterator. Each corpus
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// will have a size, year, source, and language.
// The iterator interface includes methods for getting the next sentence from the corpus and checking if there
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// is another sentence in the corpus.
// Each corpus will need its own implementation of the Corpus interface. As this is an experimental package, this
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// interface is subject to change.
package corpus

// CorpusFile contains the cache directory and file name
type CorpusFile struct {
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// CacheDir is the directory where files are cached
CacheDir string
// FilePath is the path to the cached file
FilePath string
}

// Corpus is the interface that needs to be implemented for getting the payload from a corpus
fzipi marked this conversation as resolved.
Show resolved Hide resolved
type Corpus interface {
// URL returns the URL of the corpus
URL() string

// WithURL sets the URL of the corpus
WithURL(url string) Corpus

// GetCorpusFile gets the file from the remote url.
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// It returns the local file path were the corpus is stored.
fzipi marked this conversation as resolved.
Show resolved Hide resolved
GetCorpusFile() CorpusFile
fzipi marked this conversation as resolved.
Show resolved Hide resolved

// GetIterator returns an iterator for the corpus
GetIterator(c CorpusFile) Iterator

// GetPayload returns the payload given a line from the Corpus Iterator
GetPayload(line string) string
fzipi marked this conversation as resolved.
Show resolved Hide resolved

// Size returns the size of the corpus
Size() string
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// WithSize sets the size of the corpus
// Most corpus will have a size like "100K", "1M", etc., related to the amount of sentences in the corpus
fzipi marked this conversation as resolved.
Show resolved Hide resolved
WithSize(size string) Corpus

// Year returns the year of the corpus
Year() string
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// WithYear sets the year of the corpus
// Most corpus will have a year like "2023", "2022", etc.
fzipi marked this conversation as resolved.
Show resolved Hide resolved
WithYear(year string) Corpus

// Source returns the source of the corpus
Source() string
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// WithSource sets the source of the corpus
// Most corpus will have a source like "news", "web", "wikipedia", etc.
fzipi marked this conversation as resolved.
Show resolved Hide resolved
WithSource(source string) Corpus

// Lang returns the language of the corpus
fzipi marked this conversation as resolved.
Show resolved Hide resolved
Lang() string
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// WithLanguage sets the language of the corpus
// Most corpus will have a language like "eng", "de", etc.
fzipi marked this conversation as resolved.
Show resolved Hide resolved
WithLanguage(lang string) Corpus
}

// Iterator is an interface for iterating over a corpus
type Iterator interface {
// Next returns the next sentence from the corpus
Next() string
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// HasNext returns true if there is another sentence in the corpus
fzipi marked this conversation as resolved.
Show resolved Hide resolved
// false otherwise
HasNext() bool
}
43 changes: 41 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ go 1.21

require (
github.com/Masterminds/sprig v2.22.0+incompatible
github.com/corazawaf/coraza/v3 v3.2.1
github.com/coreruleset/ftw-tests-schema/v2 v2.1.0
github.com/go-logr/zerologr v1.2.3
github.com/goccy/go-yaml v1.9.2
github.com/google/uuid v1.6.0
github.com/hashicorp/go-getter v1.7.6
github.com/icza/backscanner v0.0.0-20240328210400-b40c3a86dec5
github.com/knadh/koanf/parsers/yaml v0.1.0
github.com/knadh/koanf/providers/env v0.1.0
Expand All @@ -26,33 +28,70 @@ require (
)

require (
cloud.google.com/go v0.112.1 // indirect
cloud.google.com/go/compute/metadata v0.3.0 // indirect
cloud.google.com/go/iam v1.1.6 // indirect
cloud.google.com/go/storage v1.38.0 // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver v1.5.0 // indirect
github.com/antchfx/htmlquery v1.3.2 // indirect
github.com/antchfx/xpath v1.3.1 // indirect
github.com/aws/aws-sdk-go v1.44.122 // indirect
github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect
github.com/corazawaf/libinjection-go v0.2.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/fatih/color v1.17.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/s2a-go v0.1.7 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.2 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-safetemp v1.0.0 // indirect
github.com/hashicorp/go-version v1.6.0 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/imdario/mergo v0.3.13 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/klauspost/compress v1.17.4 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/go-testing-interface v1.14.1 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/petar-dambovaliev/aho-corasick v0.0.0-20240411101913-e07a1f0e8eb4 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/tidwall/gjson v1.17.3 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tidwall/pretty v1.2.1 // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
go.opentelemetry.io/otel v1.24.0 // indirect
go.opentelemetry.io/otel/metric v1.24.0 // indirect
go.opentelemetry.io/otel/trace v1.24.0 // indirect
golang.org/x/crypto v0.27.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.25.0 // indirect
golang.org/x/text v0.18.0 // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
google.golang.org/api v0.169.0 // indirect
google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240827150818-7e3bb234dfed // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240827150818-7e3bb234dfed // indirect
google.golang.org/grpc v1.66.1 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
rsc.io/binaryregexp v0.2.0 // indirect
)
Loading