From dd5ed2d9b18ac3527684c42429c066fd28b5ac72 Mon Sep 17 00:00:00 2001 From: Tim Shannon Date: Wed, 2 Aug 2017 21:52:19 -0500 Subject: [PATCH 1/5] First pass on index selection --- README.md | 10 ++- index.go | 23 +----- query.go | 236 +++++++++++++++++++++++++++++++++++------------------- 3 files changed, 160 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index 7bf0895..4838dbf 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,9 @@ Optionally, you can implement the `Storer` interface, to specify your own indexe struct tag. ## Queries -Queries are chain-able constructs that filters out any data that doesn't match it's criteria. There will be no -"query optimiser". The first field listed in the query will be the index that the query starts at (if one exists). +Queries are chain-able constructs that filters out any data that doesn't match it's criteria. An index will be used if +the `.Index()` chain is called, otherwise bolthold will try all possible indexes simultaneously and return the data +from the quickest query, and canceling the rest. Queries will look like this: ```Go @@ -64,6 +65,7 @@ Fields must be exported, and thus always need to start with an upper-case letter * Limit - `Where("field").Eq(value).Limit(10)` * SortBy - `Where("field").Eq(value).SortBy("field1", "field2")` * Reverse - `Where("field").Eq(value).SortBy("field").Reverse()` +* Index - `Where("field").Eq(value).Index("indexName")` If you want to run a query's criteria against the Key value, you can use the `bolthold.Key` constant: @@ -236,7 +238,7 @@ err = store.Insert("key", &Item{ That's it! -Bolthold is still very much alpha software at this point, but there is currently over 80% coverage in unit tests, and -it's backed by BoltDB which is a very solid and well built piece of software, so I encourage you to give it a try. +Bolthold currently has over 80% coverage in unit tests, and it's backed by BoltDB which is a very solid and well built +piece of software, so I encourage you to give it a try. If you end up using BoltHold, I'd love to hear about it. diff --git a/index.go b/index.go index 501c1d4..f4147fa 100644 --- a/index.go +++ b/index.go @@ -206,7 +206,7 @@ func newIterator(tx *bolt.Tx, typeName string, query *Query) *iterator { return iter } - iBucket := findIndexBucket(tx, typeName, query) + iBucket := tx.Bucket(indexBucketName(typeName, query.index)) if iBucket == nil { // bad index, filter through entire store @@ -300,27 +300,6 @@ func seekCursor(cursor *bolt.Cursor, criteria []*Criterion) (key, value []byte) return cursor.First() } -// findIndexBucket returns the index bucket from the query, and if not found, tries to find the next available index -func findIndexBucket(tx *bolt.Tx, typeName string, query *Query) *bolt.Bucket { - iBucket := tx.Bucket(indexBucketName(typeName, query.index)) - if iBucket != nil { - return iBucket - } - - for field := range query.fieldCriteria { - if field == query.index { - continue - } - - iBucket = tx.Bucket(indexBucketName(typeName, field)) - if iBucket != nil { - query.index = field - return iBucket - } - } - return nil -} - // Next returns the next key value that matches the iterators criteria // If no more kv's are available the return nil, if there is an error, they return nil // and iterator.Error() will return the error diff --git a/query.go b/query.go index 3fbce82..bcc3cab 100644 --- a/query.go +++ b/query.go @@ -10,6 +10,7 @@ import ( "regexp" "sort" "strings" + "sync" "unicode" "github.com/boltdb/bolt" @@ -98,7 +99,6 @@ func Where(field string) *Criterion { return &Criterion{ query: &Query{ - index: field, currentField: field, fieldCriteria: make(map[string][]*Criterion), }, @@ -177,6 +177,12 @@ func (q *Query) Reverse() *Query { return q } +// Index specifies the index to use when running this query +func (q *Query) Index(indexName string) *Query { + q.index = indexName + return q +} + // Or creates another separate query that gets unioned with any other results in the query // Or will panic if the query passed in contains a limit or skip value, as they are only // allowed on top level queries @@ -519,8 +525,11 @@ type record struct { } func runQuery(tx *bolt.Tx, dataType interface{}, query *Query, retrievedKeys keyList, skip int, action func(r *record) error) error { - storer := newStorer(dataType) + if query.index == "" { + return runQueryIndexSelect(tx, dataType, query, action) + } + storer := newStorer(dataType) tp := dataType for reflect.TypeOf(tp).Kind() == reflect.Ptr { @@ -530,87 +539,7 @@ func runQuery(tx *bolt.Tx, dataType interface{}, query *Query, retrievedKeys key query.dataType = reflect.TypeOf(tp) if len(query.sort) > 0 { - // Validate sort fields - for _, field := range query.sort { - _, found := query.dataType.FieldByName(field) - if !found { - return fmt.Errorf("The field %s does not exist in the type %s", field, query.dataType) - } - } - - // Run query without sort, skip or limit - // apply sort, skip and limit to entire dataset - qCopy := *query - qCopy.sort = nil - qCopy.limit = 0 - qCopy.skip = 0 - - var records []*record - err := runQuery(tx, dataType, &qCopy, nil, 0, - func(r *record) error { - records = append(records, r) - - return nil - }) - - if err != nil { - return err - } - - sort.Slice(records, func(i, j int) bool { - for _, field := range query.sort { - value := records[i].value.Elem().FieldByName(field).Interface() - other := records[j].value.Elem().FieldByName(field).Interface() - - if query.reverse { - value, other = other, value - } - - cmp, cerr := compare(value, other) - if cerr != nil { - // if for some reason there is an error on compare, fallback to a lexicographic compare - valS := fmt.Sprintf("%s", value) - otherS := fmt.Sprintf("%s", other) - if valS < otherS { - return true - } else if valS == otherS { - continue - } - return false - } - - if cmp == -1 { - return true - } else if cmp == 0 { - continue - } - return false - } - return false - }) - - // apply skip and limit - limit := query.limit - skip := query.skip - - if skip > len(records) { - records = records[0:0] - } else { - records = records[skip:] - } - - if limit > 0 && limit <= len(records) { - records = records[:limit] - } - - for i := range records { - err = action(records[i]) - if err != nil { - return err - } - } - - return nil + return runQuerySort(tx, dataType, query, action) } iter := newIterator(tx, storer.Type(), query) @@ -693,6 +622,147 @@ func runQuery(tx *bolt.Tx, dataType interface{}, query *Query, retrievedKeys key return nil } +// runQuerySort runs the query without sort, skip, or limit, then applies them to the entire result set +func runQuerySort(tx *bolt.Tx, dataType interface{}, query *Query, action func(r *record) error) error { + // Validate sort fields + for _, field := range query.sort { + _, found := query.dataType.FieldByName(field) + if !found { + return fmt.Errorf("The field %s does not exist in the type %s", field, query.dataType) + } + } + + // Run query without sort, skip or limit + // apply sort, skip and limit to entire dataset + qCopy := *query + qCopy.sort = nil + qCopy.limit = 0 + qCopy.skip = 0 + + var records []*record + err := runQuery(tx, dataType, &qCopy, nil, 0, + func(r *record) error { + records = append(records, r) + + return nil + }) + + if err != nil { + return err + } + + sort.Slice(records, func(i, j int) bool { + for _, field := range query.sort { + value := records[i].value.Elem().FieldByName(field).Interface() + other := records[j].value.Elem().FieldByName(field).Interface() + + if query.reverse { + value, other = other, value + } + + cmp, cerr := compare(value, other) + if cerr != nil { + // if for some reason there is an error on compare, fallback to a lexicographic compare + valS := fmt.Sprintf("%s", value) + otherS := fmt.Sprintf("%s", other) + if valS < otherS { + return true + } else if valS == otherS { + continue + } + return false + } + + if cmp == -1 { + return true + } else if cmp == 0 { + continue + } + return false + } + return false + }) + + // apply skip and limit + limit := query.limit + skip := query.skip + + if skip > len(records) { + records = records[0:0] + } else { + records = records[skip:] + } + + if limit > 0 && limit <= len(records) { + records = records[:limit] + } + + for i := range records { + err = action(records[i]) + if err != nil { + return err + } + } + + return nil + +} + +// runQueryIndexSelect selects the fastest index by running the query against all defined indexes and using +// the one that returns first +func runQueryIndexSelect(tx *bolt.Tx, dataType interface{}, query *Query, action func(r *record) error) error { + indexes := newStorer(dataType).Indexes() + + var once sync.Once + var err error + + var records []*record + + done := make(chan error) + for indexName := range indexes { + go func(index string) { + qCopy := *query + qCopy.index = index + + var indexRecords []*record + + err := runQuery(tx, dataType, &qCopy, nil, 0, + func(r *record) error { + records = append(records, r) + + return nil + }) + + if err != nil { + done <- err + return + } + + once.Do(func() { + records = indexRecords + }) + done <- nil + }(indexName) + rErr := <-done + if rErr != nil { + err = rErr + } + } + + if err != nil { + return err + } + + for i := range records { + err = action(records[i]) + if err != nil { + return err + } + } + + return nil +} + func findQuery(tx *bolt.Tx, result interface{}, query *Query) error { if query == nil { query = &Query{} From b1fdccca5ce2a5cb0ed723cded585f483255d860 Mon Sep 17 00:00:00 2001 From: Tim Shannon Date: Tue, 26 Sep 2017 17:46:51 -0500 Subject: [PATCH 2/5] Fixed a few issues * Added check for invalid index * Made index selector smarter about only running indexes for fields actually in the query --- find_test.go | 18 ++++++++++++++++++ index.go | 5 ++++- query.go | 28 +++++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/find_test.go b/find_test.go index 7de7bf0..a693408 100644 --- a/find_test.go +++ b/find_test.go @@ -453,6 +453,11 @@ var tests = []test{ query: bolthold.Where("Category").In("animal", "vehicle"), result: []int{0, 1, 2, 3, 5, 6, 8, 9, 11, 13, 14, 16}, }, + test{ + name: "Equal Field With Specific Index", + query: bolthold.Where("Category").Eq("vehicle").Index("Category"), + result: []int{0, 1, 3, 6, 11}, + }, } func insertTestData(t *testing.T, store *bolthold.Store) { @@ -582,6 +587,19 @@ func TestFindOnInvalidFieldName(t *testing.T) { }) } +func TestFindOnInvalidIndex(t *testing.T) { + testWrap(t, func(store *bolthold.Store, t *testing.T) { + insertTestData(t, store) + var result []ItemTest + + err := store.Find(&result, bolthold.Where("Name").Eq("test").Index("BadIndex")) + if err == nil { + t.Fatalf("Find query against a bad index name didn't return an error!") + } + + }) +} + func TestQueryStringPrint(t *testing.T) { q := bolthold.Where("FirstField").Eq("first value").And("SecondField").Gt("Second Value").And("ThirdField"). Lt("Third Value").And("FourthField").Ge("FourthValue").And("FifthField").Le("FifthValue").And("SixthField"). diff --git a/index.go b/index.go index f4147fa..92684ac 100644 --- a/index.go +++ b/index.go @@ -206,7 +206,10 @@ func newIterator(tx *bolt.Tx, typeName string, query *Query) *iterator { return iter } - iBucket := tx.Bucket(indexBucketName(typeName, query.index)) + var iBucket *bolt.Bucket + if !query.badIndex { + iBucket = tx.Bucket(indexBucketName(typeName, query.index)) + } if iBucket == nil { // bad index, filter through entire store diff --git a/query.go b/query.go index bcc3cab..a0465ac 100644 --- a/query.go +++ b/query.go @@ -525,11 +525,15 @@ type record struct { } func runQuery(tx *bolt.Tx, dataType interface{}, query *Query, retrievedKeys keyList, skip int, action func(r *record) error) error { - if query.index == "" { + if query.index == "" && !query.badIndex { return runQueryIndexSelect(tx, dataType, query, action) } storer := newStorer(dataType) + if tx.Bucket(indexBucketName(storer.Type(), query.index)) == nil { + return fmt.Errorf("The index %s does not exist", query.index) + } + tp := dataType for reflect.TypeOf(tp).Kind() == reflect.Ptr { @@ -713,13 +717,29 @@ func runQuerySort(tx *bolt.Tx, dataType interface{}, query *Query, action func(r func runQueryIndexSelect(tx *bolt.Tx, dataType interface{}, query *Query, action func(r *record) error) error { indexes := newStorer(dataType).Indexes() + // find overlap between query fields and indexes + var found []string + for indexName := range indexes { + for field := range query.fieldCriteria { + if field == indexName { + found = append(found, indexName) + } + } + } + + if len(found) == 0 { + // no indexes overlap with selected fields, force iterator to scan the entire dataset + found = append(found, "") + query.badIndex = true + } + var once sync.Once var err error var records []*record done := make(chan error) - for indexName := range indexes { + for _, indexName := range found { go func(index string) { qCopy := *query qCopy.index = index @@ -728,7 +748,7 @@ func runQueryIndexSelect(tx *bolt.Tx, dataType interface{}, query *Query, action err := runQuery(tx, dataType, &qCopy, nil, 0, func(r *record) error { - records = append(records, r) + indexRecords = append(indexRecords, r) return nil }) @@ -749,6 +769,8 @@ func runQueryIndexSelect(tx *bolt.Tx, dataType interface{}, query *Query, action } } + close(done) + if err != nil { return err } From 8019bdfeff16fd6f2b6c8ebd08c5ddc304a86b26 Mon Sep 17 00:00:00 2001 From: Tim Shannon Date: Thu, 28 Sep 2017 17:40:27 -0500 Subject: [PATCH 3/5] Fixed bug with query managed currentRow --- compare.go | 6 +++--- find_test.go | 13 +++++++++++++ index.go | 24 +++++++++++++++++------- query.go | 41 ++++++++++++++++++++++++----------------- 4 files changed, 57 insertions(+), 27 deletions(-) diff --git a/compare.go b/compare.go index ada7e6f..abe1dd8 100644 --- a/compare.go +++ b/compare.go @@ -31,7 +31,7 @@ type Comparer interface { Compare(other interface{}) (int, error) } -func (c *Criterion) compare(rowValue, criterionValue interface{}) (int, error) { +func (c *Criterion) compare(rowValue, criterionValue interface{}, currentRow interface{}) (int, error) { if rowValue == nil || criterionValue == nil { if rowValue == criterionValue { return 0, nil @@ -40,10 +40,10 @@ func (c *Criterion) compare(rowValue, criterionValue interface{}) (int, error) { } if _, ok := criterionValue.(Field); ok { - fVal := reflect.ValueOf(c.query.currentRow).Elem().FieldByName(string(criterionValue.(Field))) + fVal := reflect.ValueOf(currentRow).Elem().FieldByName(string(criterionValue.(Field))) if !fVal.IsValid() { return 0, fmt.Errorf("The field %s does not exist in the type %s", criterionValue, - reflect.TypeOf(c.query.currentRow)) + reflect.TypeOf(currentRow)) } criterionValue = fVal.Interface() diff --git a/find_test.go b/find_test.go index a693408..d1c135e 100644 --- a/find_test.go +++ b/find_test.go @@ -432,6 +432,19 @@ var tests = []test{ }), result: []int{2, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16}, }, + test{ + name: "Issue #8 - Function Field on a specific index", + query: bolthold.Where("Category").MatchFunc(func(ra *bolthold.RecordAccess) (bool, error) { + field := ra.Field() + _, ok := field.(string) + if !ok { + return false, fmt.Errorf("Field not a string, it's a %T!", field) + } + + return !strings.HasPrefix(field.(string), "veh"), nil + }).Index("Category"), + result: []int{2, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16}, + }, test{ name: "Find item with max ID in each category - sub aggregate query", query: bolthold.Where("ID").MatchFunc(func(ra *bolthold.RecordAccess) (bool, error) { diff --git a/index.go b/index.go index 92684ac..c3ad456 100644 --- a/index.go +++ b/index.go @@ -6,6 +6,7 @@ package bolthold import ( "bytes" + "reflect" "sort" "github.com/boltdb/bolt" @@ -171,10 +172,11 @@ func newIterator(tx *bolt.Tx, typeName string, query *Query) *iterator { return iter } + criteria := query.fieldCriteria[query.index] + // Key field - if query.index == Key { + if query.index == Key && !query.badIndex { iter.indexCursor = tx.Bucket([]byte(typeName)).Cursor() - criteria := query.fieldCriteria[Key] iter.nextKeys = func(prepCursor bool, cursor *bolt.Cursor) ([][]byte, error) { var nKeys [][]byte @@ -191,7 +193,14 @@ func newIterator(tx *bolt.Tx, typeName string, query *Query) *iterator { return nKeys, nil } - ok, err := matchesAllCriteria(criteria, k, true) + val := reflect.New(query.dataType) + v := iter.dataBucket.Get(k) + err := decode(v, val.Interface()) + if err != nil { + return nil, err + } + + ok, err := matchesAllCriteria(criteria, k, true, val.Interface()) if err != nil { return nil, err } @@ -211,8 +220,8 @@ func newIterator(tx *bolt.Tx, typeName string, query *Query) *iterator { iBucket = tx.Bucket(indexBucketName(typeName, query.index)) } - if iBucket == nil { - // bad index, filter through entire store + if iBucket == nil || hasMatchFunc(criteria) { + // bad index or matches Function on indexed field, filter through entire store query.badIndex = true iter.indexCursor = tx.Bucket([]byte(typeName)).Cursor() @@ -245,7 +254,6 @@ func newIterator(tx *bolt.Tx, typeName string, query *Query) *iterator { iter.nextKeys = func(prepCursor bool, cursor *bolt.Cursor) ([][]byte, error) { var nKeys [][]byte - criteria := query.fieldCriteria[query.index] for len(nKeys) < iteratorKeyMinCacheSize { var k, v []byte @@ -258,7 +266,9 @@ func newIterator(tx *bolt.Tx, typeName string, query *Query) *iterator { if k == nil { return nKeys, nil } - ok, err := matchesAllCriteria(criteria, k, true) + + // no currentRow on indexes as it refers to multiple rows + ok, err := matchesAllCriteria(criteria, k, true, nil) if err != nil { return nil, err } diff --git a/query.go b/query.go index a0465ac..abf7b75 100644 --- a/query.go +++ b/query.go @@ -44,10 +44,9 @@ type Query struct { fieldCriteria map[string][]*Criterion ors []*Query - badIndex bool - currentRow interface{} - dataType reflect.Type - tx *bolt.Tx + badIndex bool + dataType reflect.Type + tx *bolt.Tx limit int skip int @@ -80,6 +79,15 @@ type Criterion struct { inValues []interface{} } +func hasMatchFunc(criteria []*Criterion) bool { + for _, c := range criteria { + if c.operator == fn { + return true + } + } + return false +} + // Field allows for referencing a field in structure being compared type Field string @@ -194,7 +202,7 @@ func (q *Query) Or(query *Query) *Query { return q } -func (q *Query) matchesAllFields(key []byte, value reflect.Value) (bool, error) { +func (q *Query) matchesAllFields(key []byte, value reflect.Value, currentRow interface{}) (bool, error) { if q.IsEmpty() { return true, nil } @@ -206,7 +214,7 @@ func (q *Query) matchesAllFields(key []byte, value reflect.Value) (bool, error) } if field == Key { - ok, err := matchesAllCriteria(criteria, key, true) + ok, err := matchesAllCriteria(criteria, key, true, currentRow) if err != nil { return false, err } @@ -223,7 +231,7 @@ func (q *Query) matchesAllFields(key []byte, value reflect.Value) (bool, error) return false, fmt.Errorf("The field %s does not exist in the type %s", field, value) } - ok, err := matchesAllCriteria(criteria, fVal.Interface(), false) + ok, err := matchesAllCriteria(criteria, fVal.Interface(), false, currentRow) if err != nil { return false, err } @@ -317,7 +325,7 @@ func (r *RecordAccess) Field() interface{} { func (r *RecordAccess) Record() interface{} { if r.record == nil { // I don't like this, but it's better than not having access to Record for all other query types - panic("Current Record doesn't exists, because this criteria was executed on an index") + panic("Current Record doesn't exists, because this criteria was executed on an index that refers to multiple records") } return r.record } @@ -344,7 +352,7 @@ func (c *Criterion) MatchFunc(match MatchFunc) *Query { } // test if the criterion passes with the passed in value -func (c *Criterion) test(testValue interface{}, encoded bool) (bool, error) { +func (c *Criterion) test(testValue interface{}, encoded bool, currentRow interface{}) (bool, error) { var value interface{} if encoded { if len(testValue.([]byte)) != 0 { @@ -393,7 +401,7 @@ func (c *Criterion) test(testValue interface{}, encoded bool) (bool, error) { switch c.operator { case in: for i := range c.inValues { - result, err := c.compare(value, c.inValues[i]) + result, err := c.compare(value, c.inValues[i], currentRow) if err != nil { return false, err } @@ -408,14 +416,14 @@ func (c *Criterion) test(testValue interface{}, encoded bool) (bool, error) { case fn: return c.value.(MatchFunc)(&RecordAccess{ field: value, - record: c.query.currentRow, + record: currentRow, tx: c.query.tx, }) case isnil: return reflect.ValueOf(value).IsNil(), nil default: //comparison operators - result, err := c.compare(value, c.value) + result, err := c.compare(value, c.value, currentRow) if err != nil { return false, err } @@ -439,9 +447,9 @@ func (c *Criterion) test(testValue interface{}, encoded bool) (bool, error) { } } -func matchesAllCriteria(criteria []*Criterion, value interface{}, encoded bool) (bool, error) { +func matchesAllCriteria(criteria []*Criterion, value interface{}, encoded bool, currentRow interface{}) (bool, error) { for i := range criteria { - ok, err := criteria[i].test(value, encoded) + ok, err := criteria[i].test(value, encoded, currentRow) if err != nil { return false, err } @@ -530,7 +538,7 @@ func runQuery(tx *bolt.Tx, dataType interface{}, query *Query, retrievedKeys key } storer := newStorer(dataType) - if tx.Bucket(indexBucketName(storer.Type(), query.index)) == nil { + if query.index != "" && tx.Bucket(indexBucketName(storer.Type(), query.index)) == nil { return fmt.Errorf("The index %s does not exist", query.index) } @@ -567,10 +575,9 @@ func runQuery(tx *bolt.Tx, dataType interface{}, query *Query, retrievedKeys key return err } - query.currentRow = val.Interface() query.tx = tx - ok, err := query.matchesAllFields(k, val) + ok, err := query.matchesAllFields(k, val, val.Interface()) if err != nil { return err } From 18e7592f2e9250813f7a78b51a42ec295bc4c2d0 Mon Sep 17 00:00:00 2001 From: Tim Shannon Date: Fri, 29 Sep 2017 16:30:00 -0500 Subject: [PATCH 4/5] Removed threaded query optimiser. Caused too many issues, and made query performance non-deterministic. Better to just let users specify which index they want to use. --- README.md | 3 +- find_test.go | 18 ------------ query.go | 78 ---------------------------------------------------- 3 files changed, 1 insertion(+), 98 deletions(-) diff --git a/README.md b/README.md index 4838dbf..72e77e1 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,7 @@ struct tag. ## Queries Queries are chain-able constructs that filters out any data that doesn't match it's criteria. An index will be used if -the `.Index()` chain is called, otherwise bolthold will try all possible indexes simultaneously and return the data -from the quickest query, and canceling the rest. +the `.Index()` chain is called, otherwise bolthold won't use any index. Queries will look like this: ```Go diff --git a/find_test.go b/find_test.go index d1c135e..257a1de 100644 --- a/find_test.go +++ b/find_test.go @@ -834,24 +834,6 @@ func TestKeyMatchFunc(t *testing.T) { }) } -func TestRecordOnIndexMatchFunc(t *testing.T) { - testWrap(t, func(store *bolthold.Store, t *testing.T) { - insertTestData(t, store) - - defer func() { - if r := recover(); r == nil { - t.Fatalf("Running matchFunc against an Index did not panic when trying to access the Record!") - } - }() - - var result []ItemTest - _ = store.Find(&result, bolthold.Where("Category").MatchFunc(func(ra *bolthold.RecordAccess) (bool, error) { - ra.Record() - return false, nil - })) - }) -} - func TestKeyStructTag(t *testing.T) { testWrap(t, func(store *bolthold.Store, t *testing.T) { type KeyTest struct { diff --git a/query.go b/query.go index abf7b75..82c18ea 100644 --- a/query.go +++ b/query.go @@ -10,7 +10,6 @@ import ( "regexp" "sort" "strings" - "sync" "unicode" "github.com/boltdb/bolt" @@ -533,10 +532,6 @@ type record struct { } func runQuery(tx *bolt.Tx, dataType interface{}, query *Query, retrievedKeys keyList, skip int, action func(r *record) error) error { - if query.index == "" && !query.badIndex { - return runQueryIndexSelect(tx, dataType, query, action) - } - storer := newStorer(dataType) if query.index != "" && tx.Bucket(indexBucketName(storer.Type(), query.index)) == nil { return fmt.Errorf("The index %s does not exist", query.index) @@ -719,79 +714,6 @@ func runQuerySort(tx *bolt.Tx, dataType interface{}, query *Query, action func(r } -// runQueryIndexSelect selects the fastest index by running the query against all defined indexes and using -// the one that returns first -func runQueryIndexSelect(tx *bolt.Tx, dataType interface{}, query *Query, action func(r *record) error) error { - indexes := newStorer(dataType).Indexes() - - // find overlap between query fields and indexes - var found []string - for indexName := range indexes { - for field := range query.fieldCriteria { - if field == indexName { - found = append(found, indexName) - } - } - } - - if len(found) == 0 { - // no indexes overlap with selected fields, force iterator to scan the entire dataset - found = append(found, "") - query.badIndex = true - } - - var once sync.Once - var err error - - var records []*record - - done := make(chan error) - for _, indexName := range found { - go func(index string) { - qCopy := *query - qCopy.index = index - - var indexRecords []*record - - err := runQuery(tx, dataType, &qCopy, nil, 0, - func(r *record) error { - indexRecords = append(indexRecords, r) - - return nil - }) - - if err != nil { - done <- err - return - } - - once.Do(func() { - records = indexRecords - }) - done <- nil - }(indexName) - rErr := <-done - if rErr != nil { - err = rErr - } - } - - close(done) - - if err != nil { - return err - } - - for i := range records { - err = action(records[i]) - if err != nil { - return err - } - } - - return nil -} - func findQuery(tx *bolt.Tx, result interface{}, query *Query) error { if query == nil { query = &Query{} From d79ac27e6e2de855c82230c1b6f89c3ac3494313 Mon Sep 17 00:00:00 2001 From: Tim Shannon Date: Fri, 29 Sep 2017 17:09:40 -0500 Subject: [PATCH 5/5] Added more tests The change in index handling behavior made my tests not cover all code paths anymore, so I added some additional tests to make sure those all still work. --- aggregate_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ find_test.go | 23 ++++++++++++++++++++++- query.go | 25 +------------------------ 3 files changed, 67 insertions(+), 25 deletions(-) diff --git a/aggregate_test.go b/aggregate_test.go index bf15d21..5c70ca8 100644 --- a/aggregate_test.go +++ b/aggregate_test.go @@ -475,3 +475,47 @@ func TestFindAggregateBadSumFieldPanic(t *testing.T) { result[0].Sum("BadField") }) } + +func TestFindAggregateBadGroupField(t *testing.T) { + testWrap(t, func(store *bolthold.Store, t *testing.T) { + insertTestData(t, store) + + _, err := store.FindAggregate(&ItemTest{}, nil, "BadField") + if err == nil { + t.Fatalf("FindAggregate didn't fail when grouped by a bad field.") + } + + }) +} + +func TestFindAggregateWithNoResult(t *testing.T) { + testWrap(t, func(store *bolthold.Store, t *testing.T) { + insertTestData(t, store) + + result, err := store.FindAggregate(&ItemTest{}, bolthold.Where("Name").Eq("Never going to match on this"), "Category") + if err != nil { + t.Fatalf("FindAggregate failed when the query produced no results") + } + + if len(result) != 0 { + t.Fatalf("Incorrect result. Wanted 0 got %d", len(result)) + } + + }) +} + +func TestFindAggregateWithNoGroupBy(t *testing.T) { + testWrap(t, func(store *bolthold.Store, t *testing.T) { + insertTestData(t, store) + + result, err := store.FindAggregate(&ItemTest{}, nil) + if err != nil { + t.Fatalf("FindAggregate failed when there was no groupBy ") + } + + if len(result) != 1 { + t.Fatalf("Incorrect result. Wanted 1 got %d", len(result)) + } + + }) +} diff --git a/find_test.go b/find_test.go index 257a1de..108d290 100644 --- a/find_test.go +++ b/find_test.go @@ -282,6 +282,16 @@ var tests = []test{ query: bolthold.Where("ID").In(5, 8, 3), result: []int{6, 7, 4, 13, 3}, }, + test{ + name: "In on data from other index", + query: bolthold.Where("ID").In(5, 8, 3).Index("Category"), + result: []int{6, 7, 4, 13, 3}, + }, + test{ + name: "In on index", + query: bolthold.Where("Category").In("food", "animal").Index("Category"), + result: []int{2, 4, 5, 7, 8, 9, 10, 12, 13, 14, 15, 16}, + }, test{ name: "Regular Expression", query: bolthold.Where("Name").RegExp(regexp.MustCompile("ea")), @@ -471,6 +481,16 @@ var tests = []test{ query: bolthold.Where("Category").Eq("vehicle").Index("Category"), result: []int{0, 1, 3, 6, 11}, }, + test{ + name: "Key test after lead field", + query: bolthold.Where("Category").Eq("food").And(bolthold.Key).Gt(testData[10].Key), + result: []int{12, 15}, + }, + test{ + name: "Key test after lead index", + query: bolthold.Where("Category").Eq("food").Index("Category").And(bolthold.Key).Gt(testData[10].Key), + result: []int{12, 15}, + }, } func insertTestData(t *testing.T, store *bolthold.Store) { @@ -617,7 +637,7 @@ func TestQueryStringPrint(t *testing.T) { q := bolthold.Where("FirstField").Eq("first value").And("SecondField").Gt("Second Value").And("ThirdField"). Lt("Third Value").And("FourthField").Ge("FourthValue").And("FifthField").Le("FifthValue").And("SixthField"). Ne("Sixth Value").Or(bolthold.Where("FirstField").In("val1", "val2", "val3").And("SecondField").IsNil(). - And("ThirdField").RegExp(regexp.MustCompile("test")).And("FirstField"). + And("ThirdField").RegExp(regexp.MustCompile("test")).Index("IndexName").And("FirstField"). MatchFunc(func(ra *bolthold.RecordAccess) (bool, error) { return true, nil })) @@ -633,6 +653,7 @@ func TestQueryStringPrint(t *testing.T) { "FirstField matches the function", "SecondField is nil", "ThirdField matches the regular expression test", + "Using Index [IndexName]", } // map order isn't guaranteed, check if all needed lines exist diff --git a/query.go b/query.go index 82c18ea..2f4dd41 100644 --- a/query.go +++ b/query.go @@ -322,10 +322,6 @@ func (r *RecordAccess) Field() interface{} { // Record is the complete record for a given row in bolthold func (r *RecordAccess) Record() interface{} { - if r.record == nil { - // I don't like this, but it's better than not having access to Record for all other query types - panic("Current Record doesn't exists, because this criteria was executed on an index that refers to multiple records") - } return r.record } @@ -355,26 +351,7 @@ func (c *Criterion) test(testValue interface{}, encoded bool, currentRow interfa var value interface{} if encoded { if len(testValue.([]byte)) != 0 { - if c.operator == fn { - // with matchFunc, their is no value type to assume a type from, so we need to get it - // from the current field being tested - // This is not possible with Keys, so defining a matchFunc query on a Key panics - - fieldType, ok := c.query.dataType.FieldByName(c.query.index) - if !ok { - return false, fmt.Errorf("current row does not contain the field %s which has been indexed", - c.query.index) - } - value = reflect.New(fieldType.Type).Interface() - err := decode(testValue.([]byte), value) - if err != nil { - return false, err - } - - // in order to decode, we needed a pointer, but matchFunc is expecting the original - // type, so we need to get an interface of the actual element, not the pointer - value = reflect.ValueOf(value).Elem().Interface() - } else if c.operator == in { + if c.operator == in { // value is a slice of values, use c.inValues value = reflect.New(reflect.TypeOf(c.inValues[0])).Interface() err := decode(testValue.([]byte), value)