diff --git a/.travis.yml b/.travis.yml index 1ec686c..a1bf28d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,6 +41,11 @@ matrix: fast_finish: true allow_failures: - go: tip + - go: 1.6.4 + - go: 1.6.3 + - go: 1.6.2 + - go: 1.6.1 + - go: 1.6 - go: 1.5.4 - go: 1.5.3 - go: 1.5.2 diff --git a/README.md b/README.md index fa03695..6fb7986 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,14 @@ This is an extension of the standard [Go](http://golang.org) package with the sa [![Build Status](https://travis-ci.org/agext/regexp.svg?branch=master)](https://travis-ci.org/agext/regexp) -v1.1 Stable: Guaranteed no breaking changes to the API in future v1.x releases. No known bugs or performance issues introduced by the added code. Probably safe to use in production, though provided on "AS IS" basis. +v1.2 Stable: Guaranteed no breaking changes to the API in future v1.x releases. No known bugs or performance issues introduced by the added code. Probably safe to use in production, though provided on "AS IS" basis. This package is being actively maintained. If you encounter any problems or have any suggestions for improvement, please [open an issue](https://github.com/agext/regexp/issues). Pull requests are welcome. +**Note on failing test for older versions (and possibly tip)** + +The `TestFoldConstants` in syntax/parse_test.go depends on the standard `unicode/utf8` package. This test fails when this package is used with a Go version in which the unicode/utf8 package handles different folding ranges (new ranges are added from time to time, as unicode/utf8 is refined). The behavior of agext/regexp will be the one you expect from your Go version, based on its unicode/utf8 package, so it is safe to ignore this failing test. + ## Overview [![GoDoc](https://godoc.org/github.com/agext/regexp?status.png)](https://godoc.org/github.com/agext/regexp) diff --git a/backtrack.go b/backtrack.go index d83886e..fdfd188 100644 --- a/backtrack.go +++ b/backtrack.go @@ -36,7 +36,6 @@ type bitState struct { end int cap []int - input input jobs []job visited []uint32 } @@ -146,7 +145,7 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { // Optimization: rather than push and pop, // code that is going to Push and continue // the loop simply updates ip, p, and arg - // and jumps to CheckAndLoop. We have to + // and jumps to CheckAndLoop. We have to // do the ShouldVisit check that Push // would have, but we avoid the stack // manipulation. @@ -254,7 +253,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { } panic("bad arg in InstCapture") - continue case syntax.InstEmptyWidth: if syntax.EmptyOp(inst.Arg)&^i.context(pos) != 0 { @@ -299,7 +297,6 @@ func (m *machine) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { // Otherwise, continue on in hope of a longer match. continue } - panic("unreachable") } return m.matched diff --git a/exec.go b/exec.go index 564f494..7733e92 100644 --- a/exec.go +++ b/exec.go @@ -19,7 +19,7 @@ type queue struct { // A entry is an entry on a queue. // It holds both the instruction pc and the actual thread. // Some queue entries are just place holders so that the machine -// knows it has considered that pc. Such entries have t == nil. +// knows it has considered that pc. Such entries have t == nil. type entry struct { pc uint32 t *thread @@ -107,14 +107,6 @@ func (m *machine) alloc(i *syntax.Inst) *thread { return t } -// free returns t to the free pool. -func (m *machine) free(t *thread) { - m.inputBytes.str = nil - m.inputString.str = "" - m.inputReader.r = nil - m.pool = append(m.pool, t) -} - // match runs the machine over the input starting at pos. // It reports whether a match was found. // If so, m.matchcap holds the submatch information. @@ -192,7 +184,6 @@ func (m *machine) match(i input, pos int) bool { func (m *machine) clear(q *queue) { for _, d := range q.dense { if d.t != nil { - // m.free(d.t) m.pool = append(m.pool, d.t) } } @@ -213,7 +204,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy continue } if longest && m.matched && len(t.cap) > 0 && m.matchcap[0] < t.cap[0] { - // m.free(t) m.pool = append(m.pool, t) continue } @@ -232,7 +222,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy // First-match mode: cut off all lower-priority threads. for _, d := range runq.dense[j+1:] { if d.t != nil { - // m.free(d.t) m.pool = append(m.pool, d.t) } } @@ -253,7 +242,6 @@ func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond sy t = m.add(nextq, i.Out, nextPos, t.cap, nextCond, t) } if t != nil { - // m.free(t) m.pool = append(m.pool, t) } } diff --git a/exec_test.go b/exec_test.go index 5ce179b..83fd905 100644 --- a/exec_test.go +++ b/exec_test.go @@ -22,7 +22,7 @@ import ( // considered during RE2's exhaustive tests, which run all possible // regexps over a given set of atoms and operators, up to a given // complexity, over all possible strings over a given alphabet, -// up to a given size. Rather than try to link with RE2, we read a +// up to a given size. Rather than try to link with RE2, we read a // log file containing the test cases and the expected matches. // The log file, re2-exhaustive.txt, is generated by running 'make log' // in the open source RE2 distribution https://github.com/google/re2/. @@ -41,21 +41,21 @@ import ( // -;0-3 0-1 1-2 2-3 // // The stanza begins by defining a set of strings, quoted -// using Go double-quote syntax, one per line. Then the +// using Go double-quote syntax, one per line. Then the // regexps section gives a sequence of regexps to run on -// the strings. In the block that follows a regexp, each line +// the strings. In the block that follows a regexp, each line // gives the semicolon-separated match results of running // the regexp on the corresponding string. // Each match result is either a single -, meaning no match, or a // space-separated sequence of pairs giving the match and -// submatch indices. An unmatched subexpression formats +// submatch indices. An unmatched subexpression formats // its pair as a single - (not illustrated above). For now // each regexp run produces two match results, one for a // ``full match'' that restricts the regexp to matching the entire // string or nothing, and one for a ``partial match'' that gives // the leftmost first match found in the string. // -// Lines beginning with # are comments. Lines beginning with +// Lines beginning with # are comments. Lines beginning with // a capital letter are test names printed during RE2's test suite // and are echoed into t but otherwise ignored. // @@ -155,9 +155,9 @@ func testRE2(t *testing.T, file string) { if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) { // RE2's \B considers every byte position, // so it sees 'not word boundary' in the - // middle of UTF-8 sequences. This package + // middle of UTF-8 sequences. This package // only considers the positions between runes, - // so it disagrees. Skip those cases. + // so it disagrees. Skip those cases. continue } res := strings.Split(line, ";") @@ -409,7 +409,7 @@ Reading: // h REG_MULTIREF multiple digit backref // i REG_ICASE ignore case // j REG_SPAN . matches \n - // k REG_ESCAPE \ to ecape [...] delimiter + // k REG_ESCAPE \ to escape [...] delimiter // l REG_LEFT implicit ^... // m REG_MINIMAL minimal match // n REG_NEWLINE explicit \n match @@ -658,47 +658,42 @@ func makeText(n int) []byte { return text } -func benchmark(b *testing.B, re string, n int) { - r := MustCompile(re) - t := makeText(n) - b.ResetTimer() - b.SetBytes(int64(n)) - for i := 0; i < b.N; i++ { - if r.Match(t) { - b.Fatal("match!") +func BenchmarkMatch(b *testing.B) { + for _, data := range benchData { + r := MustCompile(data.re) + for _, size := range benchSizes { + t := makeText(size.n) + b.Run(data.name+"/"+size.name, func(b *testing.B) { + b.SetBytes(int64(size.n)) + for i := 0; i < b.N; i++ { + if r.Match(t) { + b.Fatal("match!") + } + } + }) } } } -const ( - easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" - medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" - parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" + - "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" -) +var benchData = []struct{ name, re string }{ + {"Easy0", "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, + {"Easy0i", "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"}, + {"Easy1", "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"}, + {"Medium", "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, + {"Hard", "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"}, + {"Hard1", "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"}, +} -func BenchmarkMatchEasy0_32(b *testing.B) { benchmark(b, easy0, 32<<0) } -func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) } -func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) } -func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) } -func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) } -func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) } -func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) } -func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) } -func BenchmarkMatchEasy1_1M(b *testing.B) { benchmark(b, easy1, 1<<20) } -func BenchmarkMatchEasy1_32M(b *testing.B) { benchmark(b, easy1, 32<<20) } -func BenchmarkMatchMedium_32(b *testing.B) { benchmark(b, medium, 32<<0) } -func BenchmarkMatchMedium_1K(b *testing.B) { benchmark(b, medium, 1<<10) } -func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) } -func BenchmarkMatchMedium_1M(b *testing.B) { benchmark(b, medium, 1<<20) } -func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) } -func BenchmarkMatchHard_32(b *testing.B) { benchmark(b, hard, 32<<0) } -func BenchmarkMatchHard_1K(b *testing.B) { benchmark(b, hard, 1<<10) } -func BenchmarkMatchHard_32K(b *testing.B) { benchmark(b, hard, 32<<10) } -func BenchmarkMatchHard_1M(b *testing.B) { benchmark(b, hard, 1<<20) } -func BenchmarkMatchHard_32M(b *testing.B) { benchmark(b, hard, 32<<20) } +var benchSizes = []struct { + name string + n int +}{ + {"32", 32}, + {"1K", 1 << 10}, + {"32K", 32 << 10}, + {"1M", 1 << 20}, + {"32M", 32 << 20}, +} func TestLongest(t *testing.T) { re, err := Compile(`a(|b)`) diff --git a/onepass_test.go b/onepass_test.go index f6b4d27..4e1f2c0 100644 --- a/onepass_test.go +++ b/onepass_test.go @@ -1,4 +1,4 @@ -// Copyright 2014 The Go Authors. All rights reserved. +// Copyright 2014 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -133,8 +133,6 @@ func TestMergeRuneSet(t *testing.T) { } } -const noStr = `!` - var onePass = &onePassProg{} var onePassTests = []struct { diff --git a/regexp.go b/regexp.go index 3aaa8c4..9f2172c 100644 --- a/regexp.go +++ b/regexp.go @@ -22,14 +22,14 @@ // All characters are UTF-8-encoded code points. // // There are 16 methods of Regexp that match a regular expression and identify -// the matched text. Their names are matched by this regular expression: +// the matched text. Their names are matched by this regular expression: // // Find(All)?(String)?(Submatch|Named)?(Index)? // // If 'All' is present, the routine matches successive non-overlapping -// matches of the entire expression. Empty matches abutting a preceding -// match are ignored. The return value is a slice containing the successive -// return values of the corresponding non-'All' routine. These routines take +// matches of the entire expression. Empty matches abutting a preceding +// match are ignored. The return value is a slice containing the successive +// return values of the corresponding non-'All' routine. These routines take // an extra integer argument, n; if n >= 0, the function returns at most n // matches/submatches. // @@ -49,9 +49,9 @@ // // If 'Index' is present, matches and submatches are identified by byte index // pairs within the input string: result[2*n:2*n+1] identifies the indexes of -// the nth submatch. The pair for n==0 identifies the match of the entire -// expression. If 'Index' is not present, the match is identified by the -// text of the match/submatch. If an index is negative, it means that +// the nth submatch. The pair for n==0 identifies the match of the entire +// expression. If 'Index' is not present, the match is identified by the +// text of the match/submatch. If an index is negative, it means that // subexpression did not match any string in the input. // // There is also a subset of the methods that can be applied to text read @@ -59,7 +59,7 @@ // // MatchReader, FindReaderIndex, FindReaderSubmatchIndex // -// This set may grow. Note that regular expression matches may need to +// This set may grow. Note that regular expression matches may need to // examine text beyond the text returned by a match, so the methods that // match text from a RuneReader may read arbitrarily far into the input // before returning. @@ -70,21 +70,28 @@ package regexp import ( "bytes" - "github.com/agext/regexp/syntax" "io" "strconv" "strings" "sync" "unicode" "unicode/utf8" -) -var debug = false + "github.com/agext/regexp/syntax" +) // Regexp is the representation of a compiled regular expression. // A Regexp is safe for concurrent use by multiple goroutines. type Regexp struct { // read-only after Compile + regexpRO + + // cache of machines for running regexp + mu sync.Mutex + machine []*machine +} + +type regexpRO struct { expr string // as passed to Compile prog *syntax.Prog // compiled program onepass *onePassProg // onepass program or nil @@ -98,10 +105,6 @@ type Regexp struct { subexpNames []string namedSubexp map[string]int longest bool - - // cache of machines for running regexp - mu sync.Mutex - machine []*machine } // String returns the source text used to compile the regular expression. @@ -114,10 +117,11 @@ func (re *Regexp) String() string { // When using a Regexp in multiple goroutines, giving each goroutine // its own copy helps to avoid lock contention. func (re *Regexp) Copy() *Regexp { - r := *re - r.mu = sync.Mutex{} - r.machine = nil - return &r + // It is not safe to copy Regexp by value + // since it contains a sync.Mutex. + return &Regexp{ + regexpRO: re.regexpRO, + } } // Compile parses a regular expression and returns, if successful, @@ -179,14 +183,16 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { return nil, err } regexp := &Regexp{ - expr: expr, - prog: prog, - onepass: compileOnePass(prog), - numSubexp: maxCap, - subexpNames: capNames, - namedSubexp: make(map[string]int, len(capNames)), - cond: prog.StartCond(), - longest: longest, + regexpRO: regexpRO{ + expr: expr, + prog: prog, + onepass: compileOnePass(prog), + numSubexp: maxCap, + subexpNames: capNames, + namedSubexp: make(map[string]int, len(capNames)), + cond: prog.StartCond(), + longest: longest, + }, } for i, name := range capNames { regexp.namedSubexp[name] = 2 * i @@ -268,10 +274,10 @@ func (re *Regexp) NumSubexp() int { } // SubexpNames returns the names of the parenthesized subexpressions -// in this Regexp. The name for the first sub-expression is names[1], +// in this Regexp. The name for the first sub-expression is names[1], // so that if m is a match slice, the name for m[i] is SubexpNames()[i]. // Since the Regexp as a whole cannot be named, names[0] is always -// the empty string. The slice should not be modified. +// the empty string. The slice should not be modified. func (re *Regexp) SubexpNames() []string { return re.subexpNames } @@ -404,7 +410,7 @@ func (i *inputReader) context(pos int) syntax.EmptyOp { } // LiteralPrefix returns a literal string that must begin any match -// of the regular expression re. It returns the boolean true if the +// of the regular expression re. It returns the boolean true if the // literal string comprises the entire regular expression. func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { return re.prefix, re.prefixComplete @@ -427,7 +433,7 @@ func (re *Regexp) Match(b []byte) bool { } // MatchReader checks whether a textual regular expression matches the text -// read by the RuneReader. More complicated queries need to use Compile and +// read by the RuneReader. More complicated queries need to use Compile and // the full Regexp interface. func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) { re, err := Compile(pattern) @@ -438,7 +444,7 @@ func MatchReader(pattern string, r io.RuneReader) (matched bool, err error) { } // MatchString checks whether a textual regular expression -// matches a string. More complicated queries need +// matches a string. More complicated queries need // to use Compile and the full Regexp interface. func MatchString(pattern string, s string) (matched bool, err error) { re, err := Compile(pattern) @@ -449,7 +455,7 @@ func MatchString(pattern string, s string) (matched bool, err error) { } // Match checks whether a textual regular expression -// matches a byte slice. More complicated queries need +// matches a byte slice. More complicated queries need // to use Compile and the full Regexp interface. func Match(pattern string, b []byte) (matched bool, err error) { re, err := Compile(pattern) @@ -460,7 +466,7 @@ func Match(pattern string, b []byte) (matched bool, err error) { } // ReplaceAllString returns a copy of src, replacing matches of the Regexp -// with the replacement string repl. Inside repl, $ signs are interpreted as +// with the replacement string repl. Inside repl, $ signs are interpreted as // in Expand, so for instance $1 represents the text of the first submatch. func (re *Regexp) ReplaceAllString(src, repl string) string { n := 2 @@ -474,7 +480,7 @@ func (re *Regexp) ReplaceAllString(src, repl string) string { } // ReplaceAllLiteralString returns a copy of src, replacing matches of the Regexp -// with the replacement string repl. The replacement repl is substituted directly, +// with the replacement string repl. The replacement repl is substituted directly, // without using Expand. func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { return string(re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { @@ -484,7 +490,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { // ReplaceAllStringFunc returns a copy of src in which all matches of the // Regexp have been replaced by the return value of function repl applied -// to the matched substring. The replacement returned by repl is substituted +// to the matched substring. The replacement returned by repl is substituted // directly, without using Expand. func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { b := re.replaceAll(nil, src, 2, func(dst []byte, match []int) []byte { @@ -540,7 +546,7 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst searchPos += width } else if searchPos+1 > a[1] { // This clause is only needed at the end of the input - // string. In that case, DecodeRuneInString returns width=0. + // string. In that case, DecodeRuneInString returns width=0. searchPos++ } else { searchPos = a[1] @@ -558,7 +564,7 @@ func (re *Regexp) replaceAll(bsrc []byte, src string, nmatch int, repl func(dst } // ReplaceAll returns a copy of src, replacing matches of the Regexp -// with the replacement text repl. Inside repl, $ signs are interpreted as +// with the replacement text repl. Inside repl, $ signs are interpreted as // in Expand, so for instance $1 represents the text of the first submatch. func (re *Regexp) ReplaceAll(src, repl []byte) []byte { n := 2 @@ -576,7 +582,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte { } // ReplaceAllLiteral returns a copy of src, replacing matches of the Regexp -// with the replacement bytes repl. The replacement repl is substituted directly, +// with the replacement bytes repl. The replacement repl is substituted directly, // without using Expand. func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { @@ -586,7 +592,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { // ReplaceAllFunc returns a copy of src in which all matches of the // Regexp have been replaced by the return value of function repl applied -// to the matched byte slice. The replacement returned by repl is substituted +// to the matched byte slice. The replacement returned by repl is substituted // directly, without using Expand. func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { return re.replaceAll(src, "", 2, func(dst []byte, match []int) []byte { @@ -602,7 +608,7 @@ func special(b byte) bool { // QuoteMeta returns a string that quotes all regular expression metacharacters // inside the argument text; the returned string is a regular expression matching -// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. +// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. func QuoteMeta(s string) string { b := make([]byte, 2*len(s)) @@ -694,7 +700,7 @@ func (re *Regexp) Find(b []byte) []byte { } // FindIndex returns a two-element slice of integers defining the location of -// the leftmost match in b of the regular expression. The match itself is at +// the leftmost match in b of the regular expression. The match itself is at // b[loc[0]:loc[1]]. // A return value of nil indicates no match. func (re *Regexp) FindIndex(b []byte) (loc []int) { @@ -706,9 +712,9 @@ func (re *Regexp) FindIndex(b []byte) (loc []int) { } // FindString returns a string holding the text of the leftmost match in s of the regular -// expression. If there is no match, the return value is an empty string, +// expression. If there is no match, the return value is an empty string, // but it will also be empty if the regular expression successfully matches -// an empty string. Use FindStringIndex or FindStringSubmatch if it is +// an empty string. Use FindStringIndex or FindStringSubmatch if it is // necessary to distinguish these cases. func (re *Regexp) FindString(s string) string { a := re.doExecute(nil, nil, s, 0, 2) @@ -719,7 +725,7 @@ func (re *Regexp) FindString(s string) string { } // FindStringIndex returns a two-element slice of integers defining the -// location of the leftmost match in s of the regular expression. The match +// location of the leftmost match in s of the regular expression. The match // itself is at s[loc[0]:loc[1]]. // A return value of nil indicates no match. func (re *Regexp) FindStringIndex(s string) (loc []int) { @@ -732,7 +738,7 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) { // FindReaderIndex returns a two-element slice of integers defining the // location of the leftmost match of the regular expression in text read from -// the RuneReader. The match text was found in the input stream at +// the RuneReader. The match text was found in the input stream at // byte offset loc[0] through loc[1]-1. // A return value of nil indicates no match. func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { @@ -764,14 +770,14 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte { // Expand appends template to dst and returns the result; during the // append, Expand replaces variables in the template with corresponding -// matches drawn from src. The match slice should have been returned by +// matches drawn from src. The match slice should have been returned by // FindSubmatchIndex. // // In the template, a variable is denoted by a substring of the form // $name or ${name}, where name is a non-empty sequence of letters, -// digits, and underscores. A purely numeric name like $1 refers to +// digits, and underscores. A purely numeric name like $1 refers to // the submatch with the corresponding index; other names refer to -// capturing parentheses named with the (?P...) syntax. A +// capturing parentheses named with the (?P...) syntax. A // reference to an out of range or unmatched index or a name that is not // present in the regular expression is replaced with an empty slice. // @@ -930,7 +936,7 @@ func (re *Regexp) FindStringSubmatchIndex(s string) []int { // FindReaderSubmatchIndex returns a slice holding the index pairs // identifying the leftmost match of the regular expression of text read by // the RuneReader, and the matches, if any, of its subexpressions, as defined -// by the 'Submatch' and 'Index' descriptions in the package comment. A +// by the 'Submatch' and 'Index' descriptions in the package comment. A // return value of nil indicates no match. func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { return re.pad(re.doExecute(r, nil, "", 0, re.prog.NumCap)) diff --git a/syntax/parse.go b/syntax/parse.go index f38bbf6..7b8be55 100644 --- a/syntax/parse.go +++ b/syntax/parse.go @@ -1,4 +1,4 @@ -// Copyright 2011 The Go Authors. All rights reserved. +// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. @@ -141,9 +141,9 @@ func (p *parser) push(re *Regexp) *Regexp { } // maybeConcat implements incremental concatenation -// of literal runes into string nodes. The parser calls this +// of literal runes into string nodes. The parser calls this // before each push, so only the top fragment of the stack -// might need processing. Since this is called before a push, +// might need processing. Since this is called before a push, // the topmost literal is no longer subject to operators like * // (Otherwise ab* would turn into (ab)*.) // If r >= 0 and there's a node left over, maybeConcat uses it @@ -600,7 +600,7 @@ func (p *parser) leadingString(re *Regexp) ([]rune, Flags) { } // removeLeadingString removes the first n leading runes -// from the beginning of re. It returns the replacement for re. +// from the beginning of re. It returns the replacement for re. func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp { if re.Op == OpConcat && len(re.Sub) > 0 { // Removing a leading string in a concatenation @@ -957,11 +957,11 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { // Perl 5.10 gave in and implemented the Python version too, // but they claim that the last two are the preferred forms. // PCRE and languages based on it (specifically, PHP and Ruby) - // support all three as well. EcmaScript 4 uses only the Python form. + // support all three as well. EcmaScript 4 uses only the Python form. // // In both the open source world (via Code Search) and the // Google source tree, (?Pname) is the dominant form, - // so that's the one we implement. One is enough. + // so that's the one we implement. One is enough. if len(t) > 4 && t[2] == 'P' && t[3] == '<' { // Pull out name. end := strings.IndexRune(t, '>') @@ -989,7 +989,7 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) { return t[end+1:], nil } - // Non-capturing group. Might also twiddle Perl flags. + // Non-capturing group. Might also twiddle Perl flags. var c rune t = t[2:] // skip (? flags := p.flags @@ -1257,7 +1257,7 @@ Switch: if c < utf8.RuneSelf && !isalnum(c) { // Escaped non-word characters are always themselves. // PCRE is not quite so rigorous: it accepts things like - // \q, but we don't. We once rejected \_, but too many + // \q, but we don't. We once rejected \_, but too many // programs and people insist on using it, so allow \_. return c, t, nil } @@ -1292,7 +1292,7 @@ Switch: if c == '{' { // Any number of digits in braces. // Perl accepts any text at all; it ignores all text - // after the first non-hex digit. We require only hex digits, + // after the first non-hex digit. We require only hex digits, // and at least one. nhex := 0 r = 0 @@ -1333,10 +1333,10 @@ Switch: } return x*16 + y, t, nil - // C escapes. There is no case 'b', to avoid misparsing + // C escapes. There is no case 'b', to avoid misparsing // the Perl word-boundary \b as the C backspace \b - // when in POSIX mode. In Perl, /\b/ means word-boundary - // but /[\b]/ means backspace. We don't support that. + // when in POSIX mode. In Perl, /\b/ means word-boundary + // but /[\b]/ means backspace. We don't support that. // If you want a backspace, embed a literal backspace // character or use \x08. case 'a': @@ -1377,7 +1377,7 @@ type charGroup struct { } // parsePerlClassEscape parses a leading Perl character class escape like \d -// from the beginning of s. If one is present, it appends the characters to r +// from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) { if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' { @@ -1391,7 +1391,7 @@ func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest stri } // parseNamedClass parses a leading POSIX named character class like [:alnum:] -// from the beginning of s. If one is present, it appends the characters to r +// from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err error) { if len(s) < 2 || s[0] != '[' || s[1] != ':' { @@ -1454,7 +1454,7 @@ func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) { } // parseUnicodeClass parses a leading Unicode character class like \p{Han} -// from the beginning of s. If one is present, it appends the characters to r +// from the beginning of s. If one is present, it appends the characters to r // and returns the new slice r and the remainder of the string. func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err error) { if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' { @@ -1692,7 +1692,7 @@ const ( // minimum and maximum runes involved in folding. // checked during test. minFold = 0x0041 - maxFold = 0x118df + maxFold = 0x1e943 ) // appendFoldedRange returns the result of appending the range lo-hi @@ -1718,7 +1718,7 @@ func appendFoldedRange(r []rune, lo, hi rune) []rune { hi = maxFold } - // Brute force. Depend on appendRange to coalesce ranges on the fly. + // Brute force. Depend on appendRange to coalesce ranges on the fly. for c := lo; c <= hi; c++ { r = appendRange(r, c, c) f := unicode.SimpleFold(c)