Skip to content

Commit

Permalink
merged changes in standard regexp from go1.7.5
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-alrux committed Feb 10, 2017
1 parent 806477a commit 6a3a4a5
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 132 deletions.
5 changes: 5 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ matrix:
fast_finish: true
allow_failures:
- go: tip
- go: 1.6.4
- go: 1.6.3
- go: 1.6.2
- go: 1.6.1
- go: 1.6
- go: 1.5.4
- go: 1.5.3
- go: 1.5.2
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,14 @@ This is an extension of the standard [Go](http://golang.org) package with the sa

[![Build Status](https://travis-ci.org/agext/regexp.svg?branch=master)](https://travis-ci.org/agext/regexp)

v1.1 Stable: Guaranteed no breaking changes to the API in future v1.x releases. No known bugs or performance issues introduced by the added code. Probably safe to use in production, though provided on "AS IS" basis.
v1.2 Stable: Guaranteed no breaking changes to the API in future v1.x releases. No known bugs or performance issues introduced by the added code. Probably safe to use in production, though provided on "AS IS" basis.

This package is being actively maintained. If you encounter any problems or have any suggestions for improvement, please [open an issue](https://github.com/agext/regexp/issues). Pull requests are welcome.

**Note on failing test for older versions (and possibly tip)**

The `TestFoldConstants` in syntax/parse_test.go depends on the standard `unicode/utf8` package. This test fails when this package is used with a Go version in which the unicode/utf8 package handles different folding ranges (new ranges are added from time to time, as unicode/utf8 is refined). The behavior of agext/regexp will be the one you expect from your Go version, based on its unicode/utf8 package, so it is safe to ignore this failing test.

## Overview

[![GoDoc](https://godoc.org/github.com/agext/regexp?status.png)](https://godoc.org/github.com/agext/regexp)
Expand Down
5 changes: 1 addition & 4 deletions backtrack.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ type bitState struct {

end int
cap []int
input input
jobs []job
visited []uint32
}
Expand Down Expand Up @@ -146,7 +145,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
// Optimization: rather than push and pop,
// code that is going to Push and continue
// the loop simply updates ip, p, and arg
// and jumps to CheckAndLoop. We have to
// and jumps to CheckAndLoop. We have to
// do the ShouldVisit check that Push
// would have, but we avoid the stack
// manipulation.
Expand Down Expand Up @@ -254,7 +253,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {

}
panic("bad arg in InstCapture")
continue

case syntax.InstEmptyWidth:
if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 {
Expand Down Expand Up @@ -299,7 +297,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
// Otherwise, continue on in hope of a longer match.
continue
}
panic("unreachable")
}

return m.matched
Expand Down
14 changes: 1 addition & 13 deletions exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ type queue struct {
// A entry is an entry on a queue.
// It holds both the instruction pc and the actual thread.
// Some queue entries are just place holders so that the machine
// knows it has considered that pc. Such entries have t == nil.
// knows it has considered that pc. Such entries have t == nil.
type entry struct {
pc uint32
t *thread
Expand Down Expand Up @@ -107,14 +107,6 @@ func (m *machine) alloc(i *syntax.Inst) *thread {
return t
}

// free returns t to the free pool.
func (m *machine) free(t *thread) {
m.inputBytes.str = nil
m.inputString.str = ""
m.inputReader.r = nil
m.pool = append(m.pool, t)
}

// match runs the machine over the input starting at pos.
// It reports whether a match was found.
// If so, m.matchcap holds the submatch information.
Expand Down Expand Up @@ -192,7 +184,6 @@ func (m *machine) match(i input, pos int) bool {
func (m *machine) clear(q *queue) {
for _, d := range q.dense {
if d.t != nil {
// m.free(d.t)
m.pool = append(m.pool, d.t)
}
}
Expand All @@ -213,7 +204,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
continue
}
if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
// m.free(t)
m.pool = append(m.pool, t)
continue
}
Expand All @@ -232,7 +222,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
// First-match mode: cut off all lower-priority threads.
for _, d := range runq.dense[j+1:] {
if d.t != nil {
// m.free(d.t)
m.pool = append(m.pool, d.t)
}
}
Expand All @@ -253,7 +242,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t)
}
if t != nil {
// m.free(t)
m.pool = append(m.pool, t)
}
}
Expand Down
83 changes: 39 additions & 44 deletions exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
// considered during RE2's exhaustive tests, which run all possible
// regexps over a given set of atoms and operators, up to a given
// complexity, over all possible strings over a given alphabet,
// up to a given size. Rather than try to link with RE2, we read a
// up to a given size. Rather than try to link with RE2, we read a
// log file containing the test cases and the expected matches.
// The log file, re2-exhaustive.txt, is generated by running 'make log'
// in the open source RE2 distribution https://github.com/google/re2/.
Expand All @@ -41,21 +41,21 @@ import (
// -;0-3 0-1 1-2 2-3
//
// The stanza begins by defining a set of strings, quoted
// using Go double-quote syntax, one per line. Then the
// using Go double-quote syntax, one per line. Then the
// regexps section gives a sequence of regexps to run on
// the strings. In the block that follows a regexp, each line
// the strings. In the block that follows a regexp, each line
// gives the semicolon-separated match results of running
// the regexp on the corresponding string.
// Each match result is either a single -, meaning no match, or a
// space-separated sequence of pairs giving the match and
// submatch indices. An unmatched subexpression formats
// submatch indices. An unmatched subexpression formats
// its pair as a single - (not illustrated above). For now
// each regexp run produces two match results, one for a
// ``full match'' that restricts the regexp to matching the entire
// string or nothing, and one for a ``partial match'' that gives
// the leftmost first match found in the string.
//
// Lines beginning with # are comments. Lines beginning with
// Lines beginning with # are comments. Lines beginning with
// a capital letter are test names printed during RE2's test suite
// and are echoed into t but otherwise ignored.
//
Expand Down Expand Up @@ -155,9 +155,9 @@ func testRE2(t *testing.T, file string) {
if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
// RE2's \B considers every byte position,
// so it sees 'not word boundary' in the
// middle of UTF-8 sequences. This package
// middle of UTF-8 sequences. This package
// only considers the positions between runes,
// so it disagrees. Skip those cases.
// so it disagrees. Skip those cases.
continue
}
res := strings.Split(line, ";")
Expand Down Expand Up @@ -409,7 +409,7 @@ Reading:
// h REG_MULTIREF multiple digit backref
// i REG_ICASE ignore case
// j REG_SPAN . matches \n
// k REG_ESCAPE \ to ecape [...] delimiter
// k REG_ESCAPE \ to escape [...] delimiter
// l REG_LEFT implicit ^...
// m REG_MINIMAL minimal match
// n REG_NEWLINE explicit \n match
Expand Down Expand Up @@ -658,47 +658,42 @@ func makeText(n int) []byte {
return text
}

func benchmark(b *testing.B, re string, n int) {
r := MustCompile(re)
t := makeText(n)
b.ResetTimer()
b.SetBytes(int64(n))
for i := 0; i < b.N; i++ {
if r.Match(t) {
b.Fatal("match!")
func BenchmarkMatch(b *testing.B) {
for _, data := range benchData {
r := MustCompile(data.re)
for _, size := range benchSizes {
t := makeText(size.n)
b.Run(data.name+"/"+size.name, func(b *testing.B) {
b.SetBytes(int64(size.n))
for i := 0; i < b.N; i++ {
if r.Match(t) {
b.Fatal("match!")
}
}
})
}
}
}

const (
easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
)
var benchData = []struct{ name, re string }{
{"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
{"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
{"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
{"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
{"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
{"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
}

func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) }
func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) }
func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) }
func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) }
func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) }
func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) }
func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) }
func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) }
func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) }
func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) }
func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 32<<0) }
func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) }
func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) }
func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) }
func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) }
func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) }
func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) }
func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) }
var benchSizes = []struct {
name string
n int
}{
{"32", 32},
{"1K", 1 << 10},
{"32K", 32 << 10},
{"1M", 1 << 20},
{"32M", 32 << 20},
}

func TestLongest(t *testing.T) {
re, err := Compile(`a(|b)`)
Expand Down
4 changes: 1 addition & 3 deletions onepass_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

Expand Down Expand Up @@ -133,8 +133,6 @@ func TestMergeRuneSet(t *testing.T) {
}
}

const noStr = `!`

var onePass = &onePassProg{}

var onePassTests = []struct {
Expand Down
Loading

0 comments on commit 6a3a4a5

Please sign in to comment.