diff --git a/README.md b/README.md index eff3b79..450d65b 100644 --- a/README.md +++ b/README.md @@ -455,6 +455,12 @@ You can see an example of how to implement the `corpus.Corpus` interface in the To run quantitative tests, you just need to pass the `quantitative` flag to `ftw`. +The corpus will be downloaded and cached locally for future use. You can also specify the size of the corpus, +the language, the source, and the year of the corpus. The bare minimum parameter that you must specify is the +directory where the CRS rules are stored. + +Here is the help for the `quantitative` command: + ```bash ❯ ./go-ftw quantitative -h Run all quantitative tests @@ -486,24 +492,36 @@ Global Flags: --trace trace output: really, really verbose ``` + + ### Example of running quantitative tests This will run with the default leipzig corpus and size of 10K payloads. ```bash ❯ ./go-ftw quantitative -d ../coreruleset -s 10K Running quantitative tests -Run 10000 payloads in 16.009683458s -Total False positive ratio: 47/10000 = 0.0047 -False positives per rule: map[932235:4 932270:2 932290:35 932380:2 933160:1 942100:1 942230:1 942360:1] +Run 10000 payloads in 18.482979709s +Total False positive ratio: 408/10000 = 0.0408 +False positives per rule: + Rule 920220: 198 false positives + Rule 920221: 198 false positives + Rule 932235: 4 false positives + Rule 932270: 2 false positives + Rule 932380: 2 false positives + Rule 933160: 1 false positives + Rule 942100: 1 false positives + Rule 942230: 1 false positives + Rule 942360: 1 false positives ``` This will run with the default leipzig corpus and size of 10K payloads, but only for the rule 920350. ```bash ❯ ./go-ftw quantitative -d ../coreruleset -s 10K -r 932270 Running quantitative tests -Run 10000 payloads in 15.782435916s +Run 10000 payloads in 15.218343083s Total False positive ratio: 2/10000 = 0.0002 -False positives per rule: map[932270:2] +False positives per rule: + Rule 932270: 2 false positives ``` If you add `--debug` to the command, you will see the payloads that cause false positives. @@ -522,6 +540,28 @@ Running quantitative tests 12:32PM DBG **> rule 932235 => Matched Data: () last found within ARGS:payload: Consolidated Edison () last issued its earnings results on Thursday, November 3rd. ``` +The default language for the corpus is english, but you can change it to german using the `-L` flag. +```bash +❯ ./go-ftw quantitative -d ../coreruleset -s 10K -L deu +Running quantitative tests +4:18PM INF Downloading corpus file from https://downloads.wortschatz-leipzig.de/corpora/deu_news_2023_10K.tar.gz +Moved /Users/fzipitria/.ftw/extracted/deu_news_2023_10K/deu_news_2023_10K-sentences.txt to /Users/fzipitria/.ftw/deu_news_2023_10K-sentences.txt +Run 10000 payloads in 25.169846084s +Total False positive ratio: 44/10000 = 0.0044 +False positives per rule: + Rule 920220: 19 false positives + Rule 920221: 19 false positives + Rule 932125: 1 false positives + Rule 932290: 5 false positives +``` + +Results can be shown in json format also, to be processed by other tools. +```bash +❯ ./go-ftw quantitative -d ../coreruleset -s 10K -o json + +{"count":10000,"falsePositives":408,"falsePositivesPerRule":{"920220":198,"920221":198,"932235":4,"932270":2,"932380":2,"933160":1,"942100":1,"942230":1,"942360":1},"totalTime":15031086083}% +``` + ### Future work for quantitative tests This feature will enable us to compare between two different versions of CRS (or any two rules) and see, for example, diff --git a/internal/quantitative/stats_test.go b/internal/quantitative/stats_test.go index 8ff9f45..470a3ee 100644 --- a/internal/quantitative/stats_test.go +++ b/internal/quantitative/stats_test.go @@ -4,128 +4,128 @@ package quantitative import ( - "bytes" - "testing" - "time" + "bytes" + "testing" + "time" - "github.com/stretchr/testify/suite" + "github.com/stretchr/testify/suite" - "github.com/coreruleset/go-ftw/output" + "github.com/coreruleset/go-ftw/output" ) type statsTestSuite struct { - suite.Suite + suite.Suite } func TestStatsTestSuite(t *testing.T) { - suite.Run(t, new(statsTestSuite)) + suite.Run(t, new(statsTestSuite)) } func (s *statsTestSuite) TestNewQuantitativeStats() { - tests := []struct { - name string - want *QuantitativeRunStats - }{ - { - name: "Test 1", - want: &QuantitativeRunStats{ - count_: 0, - falsePositives: 0, - falsePositivesPerRule: make(map[int]int), - totalTime: 0, - }, - }, - } - for _, tt := range tests { - s.Run(tt.name, func() { - got := NewQuantitativeStats() - s.Require().Equal(got, tt.want) - }) - } + tests := []struct { + name string + want *QuantitativeRunStats + }{ + { + name: "Test 1", + want: &QuantitativeRunStats{ + count_: 0, + falsePositives: 0, + falsePositivesPerRule: make(map[int]int), + totalTime: 0, + }, + }, + } + for _, tt := range tests { + s.Run(tt.name, func() { + got := NewQuantitativeStats() + s.Require().Equal(got, tt.want) + }) + } } func (s *statsTestSuite) TestQuantitativeRunStats_MarshalJSON() { - type fields struct { - count_ int - totalTime time.Duration - falsePositives int - falsePositivesPerRule map[int]int - } - tests := []struct { - name string - fields fields - want []byte - wantErr bool - }{ - { - name: "Test 1", - fields: fields{ - count_: 1, - totalTime: 1, - falsePositives: 1, - falsePositivesPerRule: map[int]int{920010: 1}, - }, - want: []byte(`{"count":1,"falsePositives":1,"falsePositivesPerRule":{"920010":1},"totalTime":1}`), - wantErr: false, - }, - { - name: "Test 2", - fields: fields{ - count_: 2, - totalTime: 2, - falsePositives: 2, - falsePositivesPerRule: map[int]int{933100: 2}, - }, - want: []byte(`{"count":2,"falsePositives":2,"falsePositivesPerRule":{"933100":2},"totalTime":2}`), - wantErr: false, - }, - } - for _, tt := range tests { - s.Run(tt.name, func() { - q := &QuantitativeRunStats{ - count_: tt.fields.count_, - totalTime: tt.fields.totalTime, - falsePositives: tt.fields.falsePositives, - falsePositivesPerRule: tt.fields.falsePositivesPerRule, - } - got, err := q.MarshalJSON() - s.Require().NoError(err) - s.Require().Equal(got, tt.want) - }) - } + type fields struct { + count_ int + totalTime time.Duration + falsePositives int + falsePositivesPerRule map[int]int + } + tests := []struct { + name string + fields fields + want []byte + wantErr bool + }{ + { + name: "Test 1", + fields: fields{ + count_: 1, + totalTime: 1, + falsePositives: 1, + falsePositivesPerRule: map[int]int{920010: 1}, + }, + want: []byte(`{"count":1,"falsePositives":1,"falsePositivesPerRule":{"920010":1},"totalTime":1}`), + wantErr: false, + }, + { + name: "Test 2", + fields: fields{ + count_: 2, + totalTime: 2, + falsePositives: 2, + falsePositivesPerRule: map[int]int{933100: 2}, + }, + want: []byte(`{"count":2,"falsePositives":2,"falsePositivesPerRule":{"933100":2},"totalTime":2}`), + wantErr: false, + }, + } + for _, tt := range tests { + s.Run(tt.name, func() { + q := &QuantitativeRunStats{ + count_: tt.fields.count_, + totalTime: tt.fields.totalTime, + falsePositives: tt.fields.falsePositives, + falsePositivesPerRule: tt.fields.falsePositivesPerRule, + } + got, err := q.MarshalJSON() + s.Require().NoError(err) + s.Require().Equal(got, tt.want) + }) + } } func (s *statsTestSuite) TestQuantitativeRunStats_functions() { - q := NewQuantitativeStats() + q := NewQuantitativeStats() - q.incrementRun() - s.Require().Equal(q.Count(), 1) + q.incrementRun() + s.Require().Equal(q.Count(), 1) - q.addFalsePositive(920100) - s.Require().Equal(q.FalsePositives(), 1) + q.addFalsePositive(920100) + s.Require().Equal(q.FalsePositives(), 1) - q.incrementRun() - s.Require().Equal(q.Count(), 2) + q.incrementRun() + s.Require().Equal(q.Count(), 2) - q.addFalsePositive(920200) - s.Require().Equal(q.FalsePositives(), 2) + q.addFalsePositive(920200) + s.Require().Equal(q.FalsePositives(), 2) - duration := time.Duration(5000) - q.SetTotalTime(duration) - s.Require().Equal(q.TotalTime(), duration) + duration := time.Duration(5000) + q.SetTotalTime(duration) + s.Require().Equal(q.TotalTime(), duration) } func (s *statsTestSuite) TestQuantitativeRunStats_printSummary() { - var b bytes.Buffer - out := output.NewOutput("plain", &b) - q := NewQuantitativeStats() + var b bytes.Buffer + out := output.NewOutput("plain", &b) + q := NewQuantitativeStats() - q.incrementRun() - s.Require().Equal(q.Count(), 1) + q.incrementRun() + s.Require().Equal(q.Count(), 1) - q.addFalsePositive(920100) - s.Require().Equal(q.FalsePositives(), 1) + q.addFalsePositive(920100) + s.Require().Equal(q.FalsePositives(), 1) - q.printSummary(out) - s.Require().Equal("Run 1 payloads in 0s\nTotal False positive ratio: 1/1 = 1.0000\nFalse positives per rule id:\n 920100: 1 false positives\n", b.String()) + q.printSummary(out) + s.Require().Equal("Run 1 payloads in 0s\nTotal False positive ratio: 1/1 = 1.0000\nFalse positives per rule id:\n 920100: 1 false positives\n", b.String()) }