Skip to content

Commit

Permalink
Merge pull request #2 from opsidian/memoize_refactor
Browse files Browse the repository at this point in the history
Move Memoize to History.Memoize, generate the parser only once for sepby combinators
  • Loading branch information
bandesz committed Aug 13, 2017
2 parents 52c1dcc + e9bdac2 commit 5ba0ad2
Show file tree
Hide file tree
Showing 9 changed files with 249 additions and 191 deletions.
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
## 1.0.3

DEPRECATED:

* combinator.Memoize will be removed in version 2. Use the Memoize method on the history object instead.

IMPROVEMENTS:

* You don't need to use meaningless names for memoization anymore, but you have to be careful to call Memoize
for your parsers only once.

CHANGES:

* History.GetParserIndex was removed as only the original combinator.Memoize needed it

## 1.0.2

IMPROVEMENTS:
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ Combinators are special parsers as they are combining other parsers to process m

#### Memoization and handling left-recursion

IMPORTANT: make sure you only call the Memoize generator function once for a specific parser as it generates an internal parser index for every call.

Depending on the language you define a parser can attempt to match at the same reader position multiple times. You can cache the parser results with the provided Memoize wrapper.

Also if your language contains left-recursion you need to use Memoize for any parser that is directly or indirectly part of it as Memoize is responsible for curtailing these calls.
Expand All @@ -102,7 +104,7 @@ The following code will wrap the integer parser with a memoizer:

```
h := parser.NewHistory()
cachedParser := combinator.Memoize("INTEGER", h, terminal.Integer())
cachedParser := h.Memoize(terminal.Integer())
```

The history object will store the result cache and also track left recursion counts and curtailing parsers, so you should only create it once.
Expand Down
30 changes: 8 additions & 22 deletions combinator/memoize.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,16 @@
package combinator

import (
"github.com/opsidian/parsley/data"
"github.com/opsidian/parsley/parser"
"github.com/opsidian/parsley/reader"
)

// Memoize handles result cache and curtailing left recursion
// The name has to be unique for a grammar.
func Memoize(name string, h *parser.History, p parser.Parser) parser.Func {
parserIndex := h.GetParserIndex(name)
return parser.Func(func(leftRecCtx data.IntMap, r reader.Reader) (data.IntSet, parser.ResultSet, parser.Error) {
cp, rs, err, found := h.GetResults(parserIndex, r.Cursor().Pos(), leftRecCtx)
if found {
return cp, rs, err
}

if leftRecCtx.Get(parserIndex) > r.Remaining()+1 {
return data.NewIntSet(parserIndex), nil, nil
}

cp, rs, err = p.Parse(leftRecCtx.Inc(parserIndex), r)
leftRecCtx = leftRecCtx.Filter(cp)

h.RegisterResults(parserIndex, r.Cursor().Pos(), cp, rs, err, leftRecCtx)
type memoizer interface {
Memoize(p parser.Parser) parser.Func
}

return cp, rs, err
})
// Memoize handles result cache and curtailing left recursion
//
// Deprecated: please use the Memoize method on the history object
func Memoize(name string, h memoizer, p parser.Parser) parser.Func {
return h.Memoize(p)
}
131 changes: 18 additions & 113 deletions combinator/memoize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,134 +7,39 @@
package combinator_test

import (
"fmt"
"testing"

"github.com/opsidian/parsley"
"github.com/opsidian/parsley/ast"
"github.com/opsidian/parsley/ast/builder"
"github.com/opsidian/parsley/combinator"
"github.com/opsidian/parsley/data"
"github.com/opsidian/parsley/parser"
"github.com/opsidian/parsley/reader"
"github.com/opsidian/parsley/test"
"github.com/opsidian/parsley/text/terminal"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)

// Let's define a left-recursive language where we need to curtail left-recursion
// and also cache previous parser matches with Memoize.
// Grammar: S -> A, A -> a | Ab
func ExampleMemoize() {
h := parser.NewHistory()
var memoizeFunc = parser.Func(func(leftRecCtx data.IntMap, r reader.Reader) (data.IntSet, parser.ResultSet, parser.Error) {
return data.EmptyIntSet(), nil, nil
})

concat := ast.InterpreterFunc(func(ctx interface{}, nodes []ast.Node) (interface{}, error) {
var res string
for _, node := range nodes {
val, _ := node.Value(ctx)
if runeVal, ok := val.(rune); ok {
res += string(runeVal)
} else {
res += val.(string)
}
}
return res, nil
})

var a parser.Func
a = combinator.Memoize("A", h, combinator.Any("a or ab",
terminal.Rune('a', "CHAR"),
combinator.Seq(builder.All("AB", concat),
&a,
terminal.Rune('b', "CHAR"),
),
))
s := combinator.Seq(builder.Select(0), &a, parser.End())

value, _ := parsley.EvaluateText([]byte("abbbbbbbb"), true, s, nil)
fmt.Printf("%T %v\n", value, value)
// Output: string abbbbbbbb
type historyMock struct {
mock.Mock
}

func TestMemoizeShouldIncreaseLeftRecCtx(t *testing.T) {
r := test.NewReader(0, 2, false, false)
h := parser.NewHistory()
leftRecCtx := parser.EmptyLeftRecCtx()
parserIndex := h.GetParserIndex("p1")
assert.Equal(t, leftRecCtx.Get(parserIndex), 0)

p := parser.Func(func(leftRecCtx data.IntMap, r reader.Reader) (data.IntSet, parser.ResultSet, parser.Error) {
assert.Equal(t, leftRecCtx.Get(parserIndex), 1)
return parser.NoCurtailingParsers(), nil, nil
})
combinator.Memoize("p1", h, p).Parse(leftRecCtx, r)
func (h *historyMock) Memoize(p parser.Parser) parser.Func {
h.Called(p)
return memoizeFunc
}

func TestMemoizeShouldReturnParserResult(t *testing.T) {
r := test.NewReader(0, 2, false, false)
h := parser.NewHistory()

node := ast.NewTerminalNode("a", test.NewPosition(1), "a")
expectedCP := data.NewIntSet(1)
expectedRS := parser.NewResult(node, r).AsSet()
expectedErr := parser.NewError(test.NewPosition(1), "ERR1")

p := parser.Func(func(ctx data.IntMap, r reader.Reader) (data.IntSet, parser.ResultSet, parser.Error) {
return expectedCP, expectedRS, expectedErr
})
cp, rs, err := combinator.Memoize("p1", h, p).Parse(parser.EmptyLeftRecCtx(), r)
assert.Equal(t, expectedCP, cp)
assert.Equal(t, expectedRS, rs)
assert.Equal(t, expectedErr, err)
type parserMock struct {
}

func TestMemoizeShouldRememberResult(t *testing.T) {
r := test.NewReader(0, 2, false, false)
h := parser.NewHistory()

node := ast.NewTerminalNode("a", test.NewPosition(1), "a")
expectedCP := data.NewIntSet(1)
expectedRS := parser.NewResult(node, r).AsSet()
expectedErr := parser.NewError(test.NewPosition(1), "ERR1")

called := false
p := parser.Func(func(ctx data.IntMap, r reader.Reader) (data.IntSet, parser.ResultSet, parser.Error) {
called = true
return expectedCP, expectedRS, expectedErr
})
combinator.Memoize("p1", h, p).Parse(parser.EmptyLeftRecCtx(), r)

called = false

cp, rs, err := combinator.Memoize("p1", h, p).Parse(parser.EmptyLeftRecCtx(), r)

assert.False(t, called, "The parser function should not have been called")
assert.Equal(t, expectedCP, cp)
assert.Equal(t, expectedRS, rs)
assert.Equal(t, expectedErr, err)

combinator.Memoize("p1", h, p).Parse(parser.EmptyLeftRecCtx(), test.NewReader(1, 1, false, true))
assert.True(t, called, "The parser function should have been called for the new position")
func (p parserMock) Parse(leftRecCtx data.IntMap, r reader.Reader) (data.IntSet, parser.ResultSet, parser.Error) {
return data.EmptyIntSet(), nil, nil
}

// The parser calls should be curtailed if the parser was already called more times than the remaining token count plus one
func TestMemoizeShouldCurtailResult(t *testing.T) {
r := test.NewReader(0, 1, false, false)
h := parser.NewHistory()
ctx := data.NewIntMap(map[int]int{
h.GetParserIndex("p1"): 3,
h.GetParserIndex("p2"): 2,
})

called := false
p := parser.Func(func(ctx data.IntMap, r reader.Reader) (data.IntSet, parser.ResultSet, parser.Error) {
called = true
return parser.NoCurtailingParsers(), nil, parser.NewError(test.NewPosition(1), "ERR1")
})
expectedCP := data.NewIntSet(h.GetParserIndex("p1"))
cp, rs, err := combinator.Memoize("p1", h, p).Parse(ctx, r)
assert.False(t, called, "The call tree should have been curtailed")
assert.Equal(t, expectedCP, cp)
assert.Nil(t, rs)
assert.Nil(t, err)
func TestMemoizeShouldCallHistoryMethod(t *testing.T) {
h := new(historyMock)
p := new(parserMock)
h.On("Memoize", p).Return(memoizeFunc)
combinator.Memoize("x", h, p)
h.AssertExpectations(t)
}
24 changes: 15 additions & 9 deletions combinator/sep_by.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,34 @@ import (
// SepBy applies the given value parser zero or more times separated by the separator parser
// It simply uses the Seq, SeqTry, Many and Memoize combinators.
func SepBy(token string, h *parser.History, valueP parser.Parser, sepP parser.Parser, interpreter ast.Interpreter) parser.Parser {
return sepBy{token, h, valueP, sepP, interpreter, 0}
return newSepBy(token, h, valueP, sepP, interpreter, 0)
}

// SepBy1 applies the given value parser one or more times separated by the separator parser
// It simply uses the Seq, SeqTry, Many and Memoize combinators.
func SepBy1(token string, h *parser.History, valueP parser.Parser, sepP parser.Parser, interpreter ast.Interpreter) parser.Parser {
return sepBy{token, h, valueP, sepP, interpreter, 1}
return newSepBy(token, h, valueP, sepP, interpreter, 1)
}

type sepBy struct {
token string
h *parser.History
valueP parser.Parser
sepP parser.Parser
interpreter ast.Interpreter
min int
p parser.Parser
}

func newSepBy(token string, h *parser.History, valueP parser.Parser, sepP parser.Parser, interpreter ast.Interpreter, min int) sepBy {
inst := sepBy{
token: token,
interpreter: interpreter,
}
sepValue := h.Memoize(Seq(builder.All("SEP_VALUE", nil), sepP, valueP))
sepValueMany := h.Memoize(Many(builder.Flatten(token, nil), sepValue))
inst.p = SeqTry(inst, min, valueP, sepValueMany)
return inst
}

func (s sepBy) Parse(leftRecCtx data.IntMap, r reader.Reader) (data.IntSet, parser.ResultSet, parser.Error) {
sepValue := Memoize(s.token+"_MSB", s.h, Seq(builder.All("SEP_VALUE", nil), s.sepP, s.valueP))
sepValueMany := Memoize(s.token+"_MSB*", s.h, Many(builder.Flatten(s.token, nil), sepValue))
return SeqTry(s, s.min, s.valueP, sepValueMany).Parse(leftRecCtx, r)
return s.p.Parse(leftRecCtx, r)
}

func (s sepBy) BuildNode(nodes []ast.Node) ast.Node {
Expand Down
2 changes: 1 addition & 1 deletion examples/json/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func main() {
terminal.Rune('}', "}"),
)

value = combinator.Memoize("VALUE", h, combinator.Any("value",
value = h.Memoize(combinator.Any("value",
terminal.String(),
terminal.Integer(),
terminal.Float(),
Expand Down
43 changes: 27 additions & 16 deletions parser/history.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
package parser

import (
"sync/atomic"

"github.com/opsidian/parsley/data"
"github.com/opsidian/parsley/reader"
)

type storedResult struct {
Expand All @@ -19,37 +22,23 @@ type storedResult struct {

// History records information about parser calls
type History struct {
parserCount int
parsers map[string]int
parserIndex int32
results map[int]map[int]storedResult
}

// NewHistory creates a history instance
func NewHistory() *History {
return &History{
parserCount: 0,
parsers: make(map[string]int),
parserIndex: 0,
results: make(map[int]map[int]storedResult),
}
}

// Reset deletes the collected data
func (h *History) Reset() {
h.parserCount = 0
h.results = make(map[int]map[int]storedResult)
}

// GetParserIndex maps the given parser to an integer index
func (h *History) GetParserIndex(parser string) (parserIndex int) {
parserIndex, ok := h.parsers[parser]
if !ok {
parserIndex = h.parserCount
h.parsers[parser] = parserIndex
h.parserCount++
}
return
}

// RegisterResults registers a parser result for a certain position
func (h *History) RegisterResults(parserIndex int, pos int, curtailingParsers data.IntSet, resultSet ResultSet, err Error, leftRecCtx data.IntMap) {
if _, ok := h.results[parserIndex]; !ok {
Expand All @@ -73,3 +62,25 @@ func (h *History) GetResults(parserIndex int, pos int, leftRecCtx data.IntMap) (

return storedResult.curtailingParsers, storedResult.resultSet, storedResult.err, true
}

// Memoize handles result cache and curtailing left recursion
func (h *History) Memoize(p Parser) Func {
parserIndex := int(atomic.AddInt32(&h.parserIndex, 1))
return Func(func(leftRecCtx data.IntMap, r reader.Reader) (data.IntSet, ResultSet, Error) {
cp, rs, err, found := h.GetResults(parserIndex, r.Cursor().Pos(), leftRecCtx)
if found {
return cp, rs, err
}

if leftRecCtx.Get(parserIndex) > r.Remaining()+1 {
return data.NewIntSet(parserIndex), nil, nil
}

cp, rs, err = p.Parse(leftRecCtx.Inc(parserIndex), r)
leftRecCtx = leftRecCtx.Filter(cp)

h.RegisterResults(parserIndex, r.Cursor().Pos(), cp, rs, err, leftRecCtx)

return cp, rs, err
})
}
Loading

0 comments on commit 5ba0ad2

Please sign in to comment.