From 617804bcbc5b5c807ab405a452fdb29dcb8c2818 Mon Sep 17 00:00:00 2001 From: Federico Nafria Date: Sat, 6 Sep 2025 17:30:17 +0200 Subject: [PATCH 1/7] chore: Improve release workflow and enable version embedding --- .github/workflows/ci-and-release.yml | 17 +++++++++++------ cmd/patt/main.go | 4 +++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-and-release.yml b/.github/workflows/ci-and-release.yml index a5a1e87..814a680 100644 --- a/.github/workflows/ci-and-release.yml +++ b/.github/workflows/ci-and-release.yml @@ -2,7 +2,7 @@ name: CI and Release on: push: - branches: + branches: - '**' tags: - 'v*' @@ -24,7 +24,10 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: '1.21' + go-version: '1.24' + + - name: Tidy modules + run: go mod tidy - name: Run tests run: go test -v ./... @@ -46,7 +49,10 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: '1.21' + go-version: '1.24' + + - name: Tidy modules + run: go mod tidy - name: Run tests run: go test -v ./... @@ -82,7 +88,7 @@ jobs: sha256sum * > checksums.txt - name: Create GitHub Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 with: files: | dist/* @@ -90,5 +96,4 @@ jobs: draft: false prerelease: false env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/cmd/patt/main.go b/cmd/patt/main.go index 7655e07..6d14a82 100644 --- a/cmd/patt/main.go +++ b/cmd/patt/main.go @@ -7,6 +7,8 @@ import ( "patt" ) +var version = "dev" + func main() { ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, os.Kill) defer stop() @@ -18,4 +20,4 @@ func exitIfErr(err error) { _, _ = os.Stderr.WriteString(err.Error() + "\n") os.Exit(1) } -} +} \ No newline at end of file From af9882a0f269b19058f1c6279d445d1491ff684b Mon Sep 17 00:00:00 2001 From: Federico Nafria Date: Fri, 12 Sep 2025 14:52:21 +0200 Subject: [PATCH 2/7] feat: add cpu profiling --- cli.go | 13 +++++++++++++ params.go | 6 ++++++ params_test.go | 10 ++++++++++ 3 files changed, 29 insertions(+) diff --git a/cli.go b/cli.go index b7b0601..eb09a1d 100644 --- a/cli.go +++ b/cli.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "os" + "runtime/pprof" "slices" ) @@ -16,6 +17,18 @@ func RunCLI(ctx context.Context, args []string, stdin io.Reader, stdout io.Write return fmt.Errorf("bad parameters: %w", err) } + if params.CPUProfile != "" { + f, err := os.Create(params.CPUProfile) + if err != nil { + return fmt.Errorf("could not create CPU profile: %w", err) + } + defer f.Close() + if err := pprof.StartCPUProfile(f); err != nil { + return fmt.Errorf("could not start CPU profile: %w", err) + } + defer pprof.StopCPUProfile() + } + replacer, err := replacer(params) if err != nil { return fmt.Errorf("cannot parse template: %w", err) diff --git a/params.go b/params.go index 28dbe04..12d9d73 100644 --- a/params.go +++ b/params.go @@ -11,10 +11,12 @@ type CLIParams struct { ReplaceTemplate string InputFiles []string Keep bool + CPUProfile string } // ParseCLIParams parses flags + positional args // +// // patt [flags] search_pattern [[more_search ...] replace_pattern] // [-- file1 [file2 ...]] // @@ -49,6 +51,10 @@ func ParseCLIParams(argsWithFlags []string) (CLIParams, error) { } cmd.Flags().BoolVarP(&out.Keep, "keep", "k", false, "print non‑matching lines") + cmd.Flags().StringVar(&out.CPUProfile, "cpu-profile", "", "write cpu profile to file") + if err := cmd.Flags().MarkHidden("cpu-profile"); err != nil { + return out, err + } if err := cmd.ParseFlags(argsWithFlags); err != nil { return out, err diff --git a/params_test.go b/params_test.go index a7d7270..51c9f16 100644 --- a/params_test.go +++ b/params_test.go @@ -79,6 +79,16 @@ func TestParseCLIParams_NoErrors(t *testing.T) { ReplaceTemplate: "template", }, }, + { + name: "cpu profile flag", + args: []string{"--cpu-profile=cpu.pprof", "pattern", "replacement", "--", "input.txt"}, + want: CLIParams{ + SearchPatterns: []string{"pattern"}, + ReplaceTemplate: "replacement", + InputFiles: []string{"input.txt"}, + CPUProfile: "cpu.pprof", + }, + }, } for _, tt := range tests { From 53f2c6273bda48427260934b0f246b935f68863f Mon Sep 17 00:00:00 2001 From: Federico Nafria Date: Fri, 12 Sep 2025 14:53:15 +0200 Subject: [PATCH 3/7] refactor: remove old profiling cmd --- cmd/profiling/main.go | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 cmd/profiling/main.go diff --git a/cmd/profiling/main.go b/cmd/profiling/main.go deleted file mode 100644 index f824836..0000000 --- a/cmd/profiling/main.go +++ /dev/null @@ -1,42 +0,0 @@ -package main - -import ( - "context" - "log" - "os" - "patt" - "runtime" - "runtime/pprof" -) - -func main() { - - // CPU profiling - f, err := os.Create("cpu.prof") - if err != nil { - log.Fatal(err) - } - defer f.Close() - - if err := pprof.StartCPUProfile(f); err != nil { - log.Fatal(err) - } - defer pprof.StopCPUProfile() - - args := []string{"patt", "[<_> <_>] [error] <_>", "", "./testdata/Apache_500MB.log"} - err = patt.RunCLI(context.Background(), args, os.Stdin, os.Stdout) - if err != nil { - log.Fatal(err) - } - - f, err = os.Create("mem.prof") - if err != nil { - log.Fatal(err) - } - defer f.Close() - - runtime.GC() // get up-to-date statistics - if err := pprof.WriteHeapProfile(f); err != nil { - log.Fatal(err) - } -} From fcce597d94cd8aa763f9377f4bd0b75486f55ed8 Mon Sep 17 00:00:00 2001 From: Federico Nafria Date: Fri, 12 Sep 2025 14:58:47 +0200 Subject: [PATCH 4/7] perf: optimize pattern matching --- pattern.go | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/pattern.go b/pattern.go index 46f2ed2..fd54310 100644 --- a/pattern.go +++ b/pattern.go @@ -1,6 +1,7 @@ package patt import ( + "bytes" "fmt" "patt/pattern" @@ -19,10 +20,16 @@ type LinesMatcher interface { } type PatternMatcher struct { - filter pattern.Matcher + filter pattern.Matcher + hardLiteral []byte } func (m PatternMatcher) Match(b []byte) bool { + if m.hardLiteral != nil { + if bytes.Index(b, m.hardLiteral) == -1 { + return false + } + } return m.filter.Test(b) } @@ -31,7 +38,18 @@ func NewFilter(stringPattern string) (LineReplacer, error) { if err != nil { return nil, err } - matcher := PatternMatcher{filter: *filter} + literals, err := pattern.ParseLiterals(stringPattern) + if err != nil { + return nil, err + } + var hardLiteral []byte + for _, l := range literals { + if len(l) > len(hardLiteral) { + hardLiteral = l + } + } + + matcher := PatternMatcher{filter: *filter, hardLiteral: hardLiteral} replacer := matchFilter{PatternMatcher: &matcher} return replacer, nil } @@ -58,8 +76,18 @@ func NewReplacer(stringPattern, stringReplaceTemplate string) (*Replacer, error) if err != nil { return nil, err } + lits, err := pattern.ParseLiterals(stringPattern) + if err != nil { + return nil, err + } + var hardLiteral []byte + for _, l := range lits { + if len(l) > len(hardLiteral) { + hardLiteral = l + } + } return &Replacer{ - PatternMatcher: &PatternMatcher{filter: *filter}, + PatternMatcher: &PatternMatcher{filter: *filter, hardLiteral: hardLiteral}, literals: literals, positions: positions, }, nil From 8f4abb2128445da280392f8a5b28a696f84e6500 Mon Sep 17 00:00:00 2001 From: Federico Nafria Date: Fri, 12 Sep 2025 15:20:41 +0200 Subject: [PATCH 5/7] refactor: move optimization to Matcher --- pattern.go | 36 ++++-------------------------------- pattern/pattern.go | 33 ++++++++++++++++++++++++++++----- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/pattern.go b/pattern.go index fd54310..ceb7f1f 100644 --- a/pattern.go +++ b/pattern.go @@ -1,7 +1,6 @@ package patt import ( - "bytes" "fmt" "patt/pattern" @@ -20,16 +19,10 @@ type LinesMatcher interface { } type PatternMatcher struct { - filter pattern.Matcher - hardLiteral []byte + filter pattern.Matcher } func (m PatternMatcher) Match(b []byte) bool { - if m.hardLiteral != nil { - if bytes.Index(b, m.hardLiteral) == -1 { - return false - } - } return m.filter.Test(b) } @@ -38,18 +31,7 @@ func NewFilter(stringPattern string) (LineReplacer, error) { if err != nil { return nil, err } - literals, err := pattern.ParseLiterals(stringPattern) - if err != nil { - return nil, err - } - var hardLiteral []byte - for _, l := range literals { - if len(l) > len(hardLiteral) { - hardLiteral = l - } - } - - matcher := PatternMatcher{filter: *filter, hardLiteral: hardLiteral} + matcher := PatternMatcher{filter: *filter} replacer := matchFilter{PatternMatcher: &matcher} return replacer, nil } @@ -76,18 +58,8 @@ func NewReplacer(stringPattern, stringReplaceTemplate string) (*Replacer, error) if err != nil { return nil, err } - lits, err := pattern.ParseLiterals(stringPattern) - if err != nil { - return nil, err - } - var hardLiteral []byte - for _, l := range lits { - if len(l) > len(hardLiteral) { - hardLiteral = l - } - } return &Replacer{ - PatternMatcher: &PatternMatcher{filter: *filter, hardLiteral: hardLiteral}, + PatternMatcher: &PatternMatcher{filter: *filter}, literals: literals, positions: positions, }, nil @@ -175,4 +147,4 @@ func (m *MultiReplacer) Match(line []byte) bool { func (m *MultiReplacer) Replace(line []byte) []byte { return m.replacers[m.lastMatchedIx].Replace(line) -} +} \ No newline at end of file diff --git a/pattern/pattern.go b/pattern/pattern.go index 5e5aad9..05b98fe 100644 --- a/pattern/pattern.go +++ b/pattern/pattern.go @@ -12,8 +12,9 @@ var ( ) type Matcher struct { - e expr - names []string + e expr + names []string + hardLiteral []byte } func New(in string) (*Matcher, error) { @@ -24,9 +25,18 @@ func New(in string) (*Matcher, error) { if err := e.validate(); err != nil { return nil, err } + var hardLiteral []byte + for _, n := range e { + if l, ok := n.(literals); ok { + if len(l) > len(hardLiteral) { + hardLiteral = l + } + } + } return &Matcher{ - e: e, - names: e.captures(), + e: e, + names: e.captures(), + hardLiteral: hardLiteral, }, nil } @@ -41,7 +51,15 @@ func ParseLineFilter(in []byte) (*Matcher, error) { if err = e.validateNoConsecutiveCaptures(); err != nil { return nil, err } - return &Matcher{e: e}, nil + var hardLiteral []byte + for _, n := range e { + if l, ok := n.(literals); ok { + if len(l) > len(hardLiteral) { + hardLiteral = l + } + } + } + return &Matcher{e: e, hardLiteral: hardLiteral}, nil } func ParseLiterals(in string) ([][]byte, error) { @@ -137,6 +155,11 @@ func (m *Matcher) Names() []string { } func (m *Matcher) Test(in []byte) bool { + if m.hardLiteral != nil { + if bytes.Index(in, m.hardLiteral) == -1 { + return false + } + } if len(in) == 0 || len(m.e) == 0 { // An empty line can only match an empty pattern. return len(in) == 0 && len(m.e) == 0 From cad6cc8266027ccddb867e3b9fdcbbf0c477c67b Mon Sep 17 00:00:00 2001 From: Federico Nafria Date: Mon, 15 Sep 2025 17:00:46 +0200 Subject: [PATCH 6/7] feat(pattern): Optimize pattern matching This commit optimizes the pattern matching by adding a fast path for the `hardLiteral`. If the `hardLiteral` (the longest literal in the pattern) is not found in the input, the pattern matching is skipped. This avoids unnecessary calls to `bytes.Index` and improves the performance of the `Test` function. --- pattern/pattern.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pattern/pattern.go b/pattern/pattern.go index 05b98fe..0dea553 100644 --- a/pattern/pattern.go +++ b/pattern/pattern.go @@ -155,15 +155,11 @@ func (m *Matcher) Names() []string { } func (m *Matcher) Test(in []byte) bool { - if m.hardLiteral != nil { - if bytes.Index(in, m.hardLiteral) == -1 { + if len(m.hardLiteral) > 0 { + if !bytes.Contains(in, m.hardLiteral) { return false } } - if len(in) == 0 || len(m.e) == 0 { - // An empty line can only match an empty pattern. - return len(in) == 0 && len(m.e) == 0 - } var off int for i := range m.e { lit, ok := m.e[i].(literals) @@ -181,6 +177,10 @@ func (m *Matcher) Test(in []byte) bool { } off += j + len(lit) } + if len(in) == 0 || len(m.e) == 0 { + // An empty line can only match an empty pattern. + return len(in) == 0 && len(m.e) == 0 + } // If we end up on a literal, we only consider the test successful if // the remaining input is empty. Otherwise, if we end up on a capture, // the remainder (the captured text) must not be empty. From 6191d373e2c883878edfc0ea905833f1ccfb7ac1 Mon Sep 17 00:00:00 2001 From: Federico Nafria Date: Mon, 15 Sep 2025 17:12:56 +0200 Subject: [PATCH 7/7] refactor(pattern): Rename hardLiteral to longestLiteral This commit renames the `hardLiteral` field to `longestLiteral` to better reflect its purpose. This field holds the longest literal in the pattern and is used to quickly discard non-matching lines. --- pattern/pattern.go | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pattern/pattern.go b/pattern/pattern.go index 0dea553..01c7e92 100644 --- a/pattern/pattern.go +++ b/pattern/pattern.go @@ -14,7 +14,7 @@ var ( type Matcher struct { e expr names []string - hardLiteral []byte + longestLiteral []byte } func New(in string) (*Matcher, error) { @@ -25,18 +25,18 @@ func New(in string) (*Matcher, error) { if err := e.validate(); err != nil { return nil, err } - var hardLiteral []byte + var longestLiteral []byte for _, n := range e { if l, ok := n.(literals); ok { - if len(l) > len(hardLiteral) { - hardLiteral = l + if len(l) > len(longestLiteral) { + longestLiteral = l } } } return &Matcher{ e: e, names: e.captures(), - hardLiteral: hardLiteral, + longestLiteral: longestLiteral, }, nil } @@ -51,15 +51,15 @@ func ParseLineFilter(in []byte) (*Matcher, error) { if err = e.validateNoConsecutiveCaptures(); err != nil { return nil, err } - var hardLiteral []byte + var longestLiteral []byte for _, n := range e { if l, ok := n.(literals); ok { - if len(l) > len(hardLiteral) { - hardLiteral = l + if len(l) > len(longestLiteral) { + longestLiteral = l } } } - return &Matcher{e: e, hardLiteral: hardLiteral}, nil + return &Matcher{e: e, longestLiteral: longestLiteral}, nil } func ParseLiterals(in string) ([][]byte, error) { @@ -155,8 +155,8 @@ func (m *Matcher) Names() []string { } func (m *Matcher) Test(in []byte) bool { - if len(m.hardLiteral) > 0 { - if !bytes.Contains(in, m.hardLiteral) { + if len(m.longestLiteral) > 0 { + if !bytes.Contains(in, m.longestLiteral) { return false } } @@ -193,4 +193,4 @@ func (m *Matcher) Test(in []byte) bool { _, reqRem := m.e[len(m.e)-1].(capture) hasRem := off != len(in) return reqRem == hasRem -} +} \ No newline at end of file