merged changes in standard regexp from go1.7.5

agext · Feb 10, 2017 · 6a3a4a5 · 6a3a4a5
1 parent 806477a
commit 6a3a4a5
Show file tree

Hide file tree

Showing 8 changed files with 125 additions and 132 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -41,6 +41,11 @@ matrix:
   fast_finish: true
   allow_failures:
     - go: tip
+    - go: 1.6.4
+    - go: 1.6.3
+    - go: 1.6.2
+    - go: 1.6.1
+    - go: 1.6
     - go: 1.5.4
     - go: 1.5.3
     - go: 1.5.2

diff --git a/README.md b/README.md
@@ -6,10 +6,14 @@ This is an extension of the standard [Go](http://golang.org) package with the sa
 
 [![Build Status](https://travis-ci.org/agext/regexp.svg?branch=master)](https://travis-ci.org/agext/regexp)
 
-v1.1 Stable: Guaranteed no breaking changes to the API in future v1.x releases. No known bugs or performance issues introduced by the added code. Probably safe to use in production, though provided on "AS IS" basis.
+v1.2 Stable: Guaranteed no breaking changes to the API in future v1.x releases. No known bugs or performance issues introduced by the added code. Probably safe to use in production, though provided on "AS IS" basis.
 
 This package is being actively maintained. If you encounter any problems or have any suggestions for improvement, please [open an issue](https://github.com/agext/regexp/issues). Pull requests are welcome.
 
+**Note on failing test for older versions (and possibly tip)**
+
+The `TestFoldConstants` in syntax/parse_test.go depends on the standard `unicode/utf8` package. This test fails when this package is used with a Go version in which the unicode/utf8 package handles different folding ranges (new ranges are added from time to time, as unicode/utf8 is refined). The behavior of agext/regexp will be the one you expect from your Go version, based on its unicode/utf8 package, so it is safe to ignore this failing test.
+
 ## Overview
 
 [![GoDoc](https://godoc.org/github.com/agext/regexp?status.png)](https://godoc.org/github.com/agext/regexp)

diff --git a/backtrack.go b/backtrack.go
@@ -36,7 +36,6 @@ type bitState struct {
 
 	end     int
 	cap     []int
-	input   input
 	jobs    []job
 	visited []uint32
 }
@@ -146,7 +145,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
 		// Optimization: rather than push and pop,
 		// code that is going to Push and continue
 		// the loop simply updates ip, p, and arg
-		// and jumps to CheckAndLoop.  We have to
+		// and jumps to CheckAndLoop. We have to
 		// do the ShouldVisit check that Push
 		// would have, but we avoid the stack
 		// manipulation.
@@ -254,7 +253,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
 
 			}
 			panic("bad arg in InstCapture")
-			continue
 
 		case syntax.InstEmptyWidth:
 			if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 {
@@ -299,7 +297,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool {
 			// Otherwise, continue on in hope of a longer match.
 			continue
 		}
-		panic("unreachable")
 	}
 
 	return m.matched

diff --git a/exec.go b/exec.go
@@ -19,7 +19,7 @@ type queue struct {
 // A entry is an entry on a queue.
 // It holds both the instruction pc and the actual thread.
 // Some queue entries are just place holders so that the machine
-// knows it has considered that pc.  Such entries have t == nil.
+// knows it has considered that pc. Such entries have t == nil.
 type entry struct {
 	pc uint32
 	t  *thread
@@ -107,14 +107,6 @@ func (m *machine) alloc(i *syntax.Inst) *thread {
 	return t
 }
 
-// free returns t to the free pool.
-func (m *machine) free(t *thread) {
-	m.inputBytes.str = nil
-	m.inputString.str = ""
-	m.inputReader.r = nil
-	m.pool = append(m.pool, t)
-}
-
 // match runs the machine over the input starting at pos.
 // It reports whether a match was found.
 // If so, m.matchcap holds the submatch information.
@@ -192,7 +184,6 @@ func (m *machine) match(i input, pos int) bool {
 func (m *machine) clear(q *queue) {
 	for _, d := range q.dense {
 		if d.t != nil {
-			// m.free(d.t)
 			m.pool = append(m.pool, d.t)
 		}
 	}
@@ -213,7 +204,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
 			continue
 		}
 		if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] {
-			// m.free(t)
 			m.pool = append(m.pool, t)
 			continue
 		}
@@ -232,7 +222,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
 				// First-match mode: cut off all lower-priority threads.
 				for _, d := range runq.dense[j+1:] {
 					if d.t != nil {
-						// m.free(d.t)
 						m.pool = append(m.pool, d.t)
 					}
 				}
@@ -253,7 +242,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy
 			t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t)
 		}
 		if t != nil {
-			// m.free(t)
 			m.pool = append(m.pool, t)
 		}
 	}

diff --git a/exec_test.go b/exec_test.go
@@ -22,7 +22,7 @@ import (
 // considered during RE2's exhaustive tests, which run all possible
 // regexps over a given set of atoms and operators, up to a given
 // complexity, over all possible strings over a given alphabet,
-// up to a given size.  Rather than try to link with RE2, we read a
+// up to a given size. Rather than try to link with RE2, we read a
 // log file containing the test cases and the expected matches.
 // The log file, re2-exhaustive.txt, is generated by running 'make log'
 // in the open source RE2 distribution https://github.com/google/re2/.
@@ -41,21 +41,21 @@ import (
 //	-;0-3 0-1 1-2 2-3
 //
 // The stanza begins by defining a set of strings, quoted
-// using Go double-quote syntax, one per line.  Then the
+// using Go double-quote syntax, one per line. Then the
 // regexps section gives a sequence of regexps to run on
-// the strings.  In the block that follows a regexp, each line
+// the strings. In the block that follows a regexp, each line
 // gives the semicolon-separated match results of running
 // the regexp on the corresponding string.
 // Each match result is either a single -, meaning no match, or a
 // space-separated sequence of pairs giving the match and
-// submatch indices.  An unmatched subexpression formats
+// submatch indices. An unmatched subexpression formats
 // its pair as a single - (not illustrated above).  For now
 // each regexp run produces two match results, one for a
 // ``full match'' that restricts the regexp to matching the entire
 // string or nothing, and one for a ``partial match'' that gives
 // the leftmost first match found in the string.
 //
-// Lines beginning with # are comments.  Lines beginning with
+// Lines beginning with # are comments. Lines beginning with
 // a capital letter are test names printed during RE2's test suite
 // and are echoed into t but otherwise ignored.
 //
@@ -155,9 +155,9 @@ func testRE2(t *testing.T, file string) {
 			if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
 				// RE2's \B considers every byte position,
 				// so it sees 'not word boundary' in the
-				// middle of UTF-8 sequences.  This package
+				// middle of UTF-8 sequences. This package
 				// only considers the positions between runes,
-				// so it disagrees.  Skip those cases.
+				// so it disagrees. Skip those cases.
 				continue
 			}
 			res := strings.Split(line, ";")
@@ -409,7 +409,7 @@ Reading:
 		//     h	REG_MULTIREF		multiple digit backref
 		//     i	REG_ICASE		ignore case
 		//     j	REG_SPAN		. matches \n
-		//     k	REG_ESCAPE		\ to ecape [...] delimiter
+		//     k	REG_ESCAPE		\ to escape [...] delimiter
 		//     l	REG_LEFT		implicit ^...
 		//     m	REG_MINIMAL		minimal match
 		//     n	REG_NEWLINE		explicit \n match
@@ -658,47 +658,42 @@ func makeText(n int) []byte {
 	return text
 }
 
-func benchmark(b *testing.B, re string, n int) {
-	r := MustCompile(re)
-	t := makeText(n)
-	b.ResetTimer()
-	b.SetBytes(int64(n))
-	for i := 0; i < b.N; i++ {
-		if r.Match(t) {
-			b.Fatal("match!")
+func BenchmarkMatch(b *testing.B) {
+	for _, data := range benchData {
+		r := MustCompile(data.re)
+		for _, size := range benchSizes {
+			t := makeText(size.n)
+			b.Run(data.name+"/"+size.name, func(b *testing.B) {
+				b.SetBytes(int64(size.n))
+				for i := 0; i < b.N; i++ {
+					if r.Match(t) {
+						b.Fatal("match!")
+					}
+				}
+			})
 		}
 	}
 }
 
-const (
-	easy0  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
-	easy1  = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
-	medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
-	hard   = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
-	parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
-		"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
-)
+var benchData = []struct{ name, re string }{
+	{"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
+	{"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"},
+	{"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"},
+	{"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
+	{"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"},
+	{"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"},
+}
 
-func BenchmarkMatchEasy0_32(b *testing.B)   { benchmark(b, easy0, 32<<0) }
-func BenchmarkMatchEasy0_1K(b *testing.B)   { benchmark(b, easy0, 1<<10) }
-func BenchmarkMatchEasy0_32K(b *testing.B)  { benchmark(b, easy0, 32<<10) }
-func BenchmarkMatchEasy0_1M(b *testing.B)   { benchmark(b, easy0, 1<<20) }
-func BenchmarkMatchEasy0_32M(b *testing.B)  { benchmark(b, easy0, 32<<20) }
-func BenchmarkMatchEasy1_32(b *testing.B)   { benchmark(b, easy1, 32<<0) }
-func BenchmarkMatchEasy1_1K(b *testing.B)   { benchmark(b, easy1, 1<<10) }
-func BenchmarkMatchEasy1_32K(b *testing.B)  { benchmark(b, easy1, 32<<10) }
-func BenchmarkMatchEasy1_1M(b *testing.B)   { benchmark(b, easy1, 1<<20) }
-func BenchmarkMatchEasy1_32M(b *testing.B)  { benchmark(b, easy1, 32<<20) }
-func BenchmarkMatchMedium_32(b *testing.B)  { benchmark(b, medium, 32<<0) }
-func BenchmarkMatchMedium_1K(b *testing.B)  { benchmark(b, medium, 1<<10) }
-func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
-func BenchmarkMatchMedium_1M(b *testing.B)  { benchmark(b, medium, 1<<20) }
-func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
-func BenchmarkMatchHard_32(b *testing.B)    { benchmark(b, hard, 32<<0) }
-func BenchmarkMatchHard_1K(b *testing.B)    { benchmark(b, hard, 1<<10) }
-func BenchmarkMatchHard_32K(b *testing.B)   { benchmark(b, hard, 32<<10) }
-func BenchmarkMatchHard_1M(b *testing.B)    { benchmark(b, hard, 1<<20) }
-func BenchmarkMatchHard_32M(b *testing.B)   { benchmark(b, hard, 32<<20) }
+var benchSizes = []struct {
+	name string
+	n    int
+}{
+	{"32", 32},
+	{"1K", 1 << 10},
+	{"32K", 32 << 10},
+	{"1M", 1 << 20},
+	{"32M", 32 << 20},
+}
 
 func TestLongest(t *testing.T) {
 	re, err := Compile(`a(|b)`)

diff --git a/onepass_test.go b/onepass_test.go
@@ -1,4 +1,4 @@
-// Copyright 2014 The Go Authors.  All rights reserved.
+// Copyright 2014 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
@@ -133,8 +133,6 @@ func TestMergeRuneSet(t *testing.T) {
 	}
 }
 
-const noStr = `!`
-
 var onePass = &onePassProg{}
 
 var onePassTests = []struct {