diff --git a/README.md b/README.md index 2faef8b..c843293 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,9 @@ The nature of the search depends on the value of `type`: - For `"and"` and `"or"`, SewerRat searches on a combination of other filters. The search clause should contain the `children` property, which is an array of other search clauses. A file is only considered to be a match if it matches all (`"and"`) or any (`"or"`) of the individual clauses in `children`. +- For `"not"`, SewerRat negates the filter. + The search clause should contain the `child` property, which contains the search clause to be negated. + A file is only considered to be a match if it does not match the clause in `child`. The API returns a request body that contains a JSON object with the following properties: diff --git a/database_test.go b/database_test.go index a163f07..95a1e0e 100644 --- a/database_test.go +++ b/database_test.go @@ -753,7 +753,7 @@ func TestQueryTokens(t *testing.T) { } }) - t.Run("partial text", func(t *testing.T) { + t.Run("text with field", func(t *testing.T) { res, err := queryTokens(dbconn, &searchClause{ Type: "text", Text: "lamb", Field: "variants" }, nil, 0) if err != nil { t.Fatalf(err.Error()) @@ -763,7 +763,7 @@ func TestQueryTokens(t *testing.T) { } }) - t.Run("text with field", func(t *testing.T) { + t.Run("partial test", func(t *testing.T) { res, err := queryTokens(dbconn, &searchClause{ Type: "text", Text: "%ar%", Partial: true }, nil, 0) if err != nil { t.Fatalf(err.Error()) @@ -773,6 +773,112 @@ func TestQueryTokens(t *testing.T) { } }) + t.Run("not (simple)", func(t *testing.T) { + res, err := queryTokens( + dbconn, + &searchClause{ + Type: "not", + Child: &searchClause{ Type: "text", Text: "yuru" }, + }, + nil, + 0, + ) + if err != nil { + t.Fatalf(err.Error()) + } + if len(res) != 2 || res[0].Path != filepath.Join(to_add, "stuff/other.json") || res[1].Path != filepath.Join(to_add, "metadata.json") { + t.Fatalf("search results are not as expected %v", res) + } + }) + + t.Run("not (complex)", func(t *testing.T) { + res, err := queryTokens( + dbconn, + &searchClause{ + Type: "not", + Child: &searchClause{ + Type: "or", + Children: []*searchClause{ + &searchClause{ Type: "text", Text: "yuru" }, + &searchClause{ Type: "text", Text: "lamb" }, + }, + }, + }, + nil, + 0, + ) + if err != nil { + t.Fatalf(err.Error()) + } + if len(res) != 0 { + t.Fatalf("search results are not as expected %v", res) + } + }) + + t.Run("not (partial)", func(t *testing.T) { + res, err := queryTokens( + dbconn, + &searchClause{ + Type: "not", + Child: &searchClause{ Type: "text", Text: "%ar%", Partial: true }, + }, + nil, + 0, + ) + if err != nil { + t.Fatalf(err.Error()) + } + if len(res) != 1 || res[0].Path != filepath.Join(to_add, "whee/other.json") { + t.Fatalf("search results are not as expected %v", res) + } + }) + + t.Run("not (nested)", func(t *testing.T) { + res, err := queryTokens( + dbconn, + &searchClause{ + Type: "and", + Children: []*searchClause{ + &searchClause{ Type: "text", Text: "hoshino" }, + &searchClause{ + Type: "not", + Child: &searchClause{ Type: "text", Text: "lamb" }, + }, + }, + }, + nil, + 0, + ) + if err != nil { + t.Fatalf(err.Error()) + } + if len(res) != 1 || res[0].Path != filepath.Join(to_add, "stuff/metadata.json") { + t.Fatalf("search results are not as expected %v", res) + } + }) + + t.Run("and (simple)", func(t *testing.T) { + res, err := queryTokens( + dbconn, + &searchClause{ + Type: "and", + Children: []*searchClause{ + &searchClause{ Type: "text", Text: "yuru" }, + &searchClause{ Type: "text", Text: "non" }, + }, + }, + nil, + 0, + ) + if err != nil { + t.Fatalf(err.Error()) + } + if len(res) != 1 || res[0].Path != filepath.Join(to_add, "whee/other.json") { + t.Fatalf("search results are not as expected %v", res) + } + }) + + t.Run("and (simple)", func(t *testing.T) { res, err := queryTokens( dbconn, diff --git a/html/index.html b/html/index.html index 161ca5b..8ffbba7 100644 --- a/html/index.html +++ b/html/index.html @@ -225,17 +225,22 @@

SewerRat search

Free text search on the metadata. Leave empty to skip this filter. For simple use cases, just enter one or more search terms, and we'll search for metadata files that match all the terms.

- Advanced users can use the AND and OR keywords to assemble complex queries. + Advanced users can use the AND and OR keywords to perform binary logical operations. (Make sure to use all-caps for these keywords.) This can be combined with parentheses to control precedence, e.g., (a b OR c d) AND (e f); otherwise, AND takes precedence over OR. Note that any sequence of adjacent search terms are implicitly AND, i.e., the query above can be expanded as ((a AND b) OR (c AND d)) AND (e AND f)).

+ On a similar note, the NOT keyword can be used for unary negation. + This should be put before any search terms, e.g., (NOT a b) AND (c d). + If there are no parenthese, any NOT will take precedence over the other boolean operations, + i.e., the above query is the same as NOT a b AND c d. +

Even more advanced users can prefix any sequence of search terms with the name of a metadata field, to only search for matches within that field of the metadata file, e.g., (title: prostate cancer) AND (genome: GRCh38 OR genome: GRCm38). - Note that this does not extend to the AND and OR keywords, + Note that this does not extend to the AND, OR and NOT keywords, i.e., title:foo OR bar will not limit the search for bar to the title field.

Extremely advanced users can attach a % wildcard to any term to enable a partial search, diff --git a/html/parseQuery.js b/html/parseQuery.js index 1eb70b3..ea72f6b 100644 --- a/html/parseQuery.js +++ b/html/parseQuery.js @@ -3,6 +3,7 @@ function parseQuery(message, at=0, open_par=false) { let words = []; let clauses = []; let operations = []; + let negation = false; function add_text_clause(at) { let new_component = { type: "text" }; @@ -30,6 +31,12 @@ function parseQuery(message, at=0, open_par=false) { if (new_component.text.match("%")) { new_component.partial = true; } + + if (negation) { + new_component = { "type": "not", "child": new_component }; + negation = false; + } + clauses.push(new_component); words = []; return; @@ -57,14 +64,20 @@ function parseQuery(message, at=0, open_par=false) { if (c == "(") { if (word == "AND" || word == "OR") { add_operation(i); + } else if (word == "NOT") { + negation = true; + word = ""; } else if (word != "" || words.length > 0) { throw new Error("search clauses must be separated by AND or OR at position " + String(i)); } let nested = parseQuery(message, i + 1, true); i = nested.at; clauses.push(nested.metadata); + negation = false; continue; - } else if (c == ")") { + } + + if (c == ")") { if (!open_par) { throw new Error("unmatched closing parenthesis at position " + String(i)) } @@ -76,6 +89,9 @@ function parseQuery(message, at=0, open_par=false) { if (is_whitespace) { if (word == "AND" || word == "OR") { add_operation(i); + } else if (word == "NOT") { + negation = true; + word = ""; } else if (word.length) { words.push(word) word = ""; diff --git a/query.go b/query.go index e5d2fed..a496c9e 100644 --- a/query.go +++ b/query.go @@ -26,6 +26,9 @@ type searchClause struct { // Only relevant for type = and/or. Children []*searchClause `json:"children"` + + // Only relevant for type = not. + Child *searchClause `json:"child"` } func escapeWildcards(input string) (string, string, error) { @@ -107,6 +110,17 @@ func sanitizeQuery(original *searchClause, deftok, wildtok *unicodeTokenizer) (* } } + if original.Type == "not" { + if original.Child == nil { + return nil, fmt.Errorf("search clause of type %q should have non-empty 'child'", original.Type) + } + san, err := sanitizeQuery(original.Child, deftok, wildtok) + if err != nil { + return nil, err + } + return &searchClause { Type: original.Type, Child: san }, nil + } + if original.Type == "text" { var tokens []string var err error @@ -191,6 +205,11 @@ func assembleFilter(query *searchClause) (string, []interface{}) { return filter, []interface{}{ query.Time } } + if query.Type == "not" { + curfilt, curpar := assembleFilter(query.Child) + return "NOT " + curfilt, curpar + } + if query.Type == "and" { collected := []string{} parameters := []interface{}{} diff --git a/query_test.go b/query_test.go index 26c93e1..4b94725 100644 --- a/query_test.go +++ b/query_test.go @@ -168,6 +168,23 @@ func TestSanitizeQuery(t *testing.T) { } }) + t.Run("not", func(t *testing.T) { + query := &searchClause { + Type: "not", + Child: &searchClause{ Type: "text", Text: "whee blah" }, + } + + san, err := sanitizeQuery(query, deftok, wildtok) + if err != nil { + t.Fatalf(err.Error()) + } + + // Now the nested OR is collapsed, but the AND is retained. + if san == nil || san.Type != "not" || san.Child == nil || san.Child.Type != "and" { + t.Fatalf("unexpected result from sanitization %v", san) + } + }) + t.Run("text", func(t *testing.T) { { // Single token.