Skip to content

Commit

Permalink
Support negation of a search clause in the query endpoint.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Mar 5, 2024
1 parent 806883a commit 92dddbb
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 5 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ The nature of the search depends on the value of `type`:
- For `"and"` and `"or"`, SewerRat searches on a combination of other filters.
The search clause should contain the `children` property, which is an array of other search clauses.
A file is only considered to be a match if it matches all (`"and"`) or any (`"or"`) of the individual clauses in `children`.
- For `"not"`, SewerRat negates the filter.
The search clause should contain the `child` property, which contains the search clause to be negated.
A file is only considered to be a match if it does not match the clause in `child`.

The API returns a request body that contains a JSON object with the following properties:

Expand Down
110 changes: 108 additions & 2 deletions database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ func TestQueryTokens(t *testing.T) {
}
})

t.Run("partial text", func(t *testing.T) {
t.Run("text with field", func(t *testing.T) {
res, err := queryTokens(dbconn, &searchClause{ Type: "text", Text: "lamb", Field: "variants" }, nil, 0)
if err != nil {
t.Fatalf(err.Error())
Expand All @@ -763,7 +763,7 @@ func TestQueryTokens(t *testing.T) {
}
})

t.Run("text with field", func(t *testing.T) {
t.Run("partial test", func(t *testing.T) {
res, err := queryTokens(dbconn, &searchClause{ Type: "text", Text: "%ar%", Partial: true }, nil, 0)
if err != nil {
t.Fatalf(err.Error())
Expand All @@ -773,6 +773,112 @@ func TestQueryTokens(t *testing.T) {
}
})

t.Run("not (simple)", func(t *testing.T) {
res, err := queryTokens(
dbconn,
&searchClause{
Type: "not",
Child: &searchClause{ Type: "text", Text: "yuru" },
},
nil,
0,
)
if err != nil {
t.Fatalf(err.Error())
}
if len(res) != 2 || res[0].Path != filepath.Join(to_add, "stuff/other.json") || res[1].Path != filepath.Join(to_add, "metadata.json") {
t.Fatalf("search results are not as expected %v", res)
}
})

t.Run("not (complex)", func(t *testing.T) {
res, err := queryTokens(
dbconn,
&searchClause{
Type: "not",
Child: &searchClause{
Type: "or",
Children: []*searchClause{
&searchClause{ Type: "text", Text: "yuru" },
&searchClause{ Type: "text", Text: "lamb" },
},
},
},
nil,
0,
)
if err != nil {
t.Fatalf(err.Error())
}
if len(res) != 0 {
t.Fatalf("search results are not as expected %v", res)
}
})

t.Run("not (partial)", func(t *testing.T) {
res, err := queryTokens(
dbconn,
&searchClause{
Type: "not",
Child: &searchClause{ Type: "text", Text: "%ar%", Partial: true },
},
nil,
0,
)
if err != nil {
t.Fatalf(err.Error())
}
if len(res) != 1 || res[0].Path != filepath.Join(to_add, "whee/other.json") {
t.Fatalf("search results are not as expected %v", res)
}
})

t.Run("not (nested)", func(t *testing.T) {
res, err := queryTokens(
dbconn,
&searchClause{
Type: "and",
Children: []*searchClause{
&searchClause{ Type: "text", Text: "hoshino" },
&searchClause{
Type: "not",
Child: &searchClause{ Type: "text", Text: "lamb" },
},
},
},
nil,
0,
)
if err != nil {
t.Fatalf(err.Error())
}
if len(res) != 1 || res[0].Path != filepath.Join(to_add, "stuff/metadata.json") {
t.Fatalf("search results are not as expected %v", res)
}
})

t.Run("and (simple)", func(t *testing.T) {
res, err := queryTokens(
dbconn,
&searchClause{
Type: "and",
Children: []*searchClause{
&searchClause{ Type: "text", Text: "yuru" },
&searchClause{ Type: "text", Text: "non" },
},
},
nil,
0,
)
if err != nil {
t.Fatalf(err.Error())
}
if len(res) != 1 || res[0].Path != filepath.Join(to_add, "whee/other.json") {
t.Fatalf("search results are not as expected %v", res)
}
})


t.Run("and (simple)", func(t *testing.T) {
res, err := queryTokens(
dbconn,
Expand Down
9 changes: 7 additions & 2 deletions html/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -225,17 +225,22 @@ <h1>SewerRat search</h1>
Free text search on the metadata. Leave empty to skip this filter.
For simple use cases, just enter one or more search terms, and we'll search for metadata files that match all the terms.
<br><br>
Advanced users can use the <code>AND</code> and <code>OR</code> keywords to assemble complex queries.
Advanced users can use the <code>AND</code> and <code>OR</code> keywords to perform binary logical operations.
(Make sure to use all-caps for these keywords.)
This can be combined with parentheses to control precedence, e.g., <code>(a b OR c d) AND (e f)</code>;
otherwise, <code>AND</code> takes precedence over <code>OR</code>.
Note that any sequence of adjacent search terms are implicitly <code>AND</code>,
i.e., the query above can be expanded as <code>((a AND b) OR (c AND d)) AND (e AND f))</code>.
<br><br>
On a similar note, the <code>NOT</code> keyword can be used for unary negation.
This should be put before any search terms, e.g., <code>(NOT a b) AND (c d)</code>.
If there are no parenthese, any <code>NOT</code> will take precedence over the other boolean operations,
i.e., the above query is the same as <code>NOT a b AND c d</code>.
<br><br>
Even more advanced users can prefix any sequence of search terms with the name of a metadata field,
to only search for matches within that field of the metadata file, e.g.,
<code>(title: prostate cancer) AND (genome: GRCh38 OR genome: GRCm38)</code>.
Note that this does not extend to the <code>AND</code> and <code>OR</code> keywords,
Note that this does not extend to the <code>AND</code>, <code>OR</code> and <code>NOT</code> keywords,
i.e., <code>title:foo OR bar</code> will not limit the search for <code>bar</code> to the <code>title</code> field.
<br><br>
Extremely advanced users can attach a <code>%</code> wildcard to any term to enable a partial search,
Expand Down
18 changes: 17 additions & 1 deletion html/parseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ function parseQuery(message, at=0, open_par=false) {
let words = [];
let clauses = [];
let operations = [];
let negation = false;

function add_text_clause(at) {
let new_component = { type: "text" };
Expand Down Expand Up @@ -30,6 +31,12 @@ function parseQuery(message, at=0, open_par=false) {
if (new_component.text.match("%")) {
new_component.partial = true;
}

if (negation) {
new_component = { "type": "not", "child": new_component };
negation = false;
}

clauses.push(new_component);
words = [];
return;
Expand Down Expand Up @@ -57,14 +64,20 @@ function parseQuery(message, at=0, open_par=false) {
if (c == "(") {
if (word == "AND" || word == "OR") {
add_operation(i);
} else if (word == "NOT") {
negation = true;
word = "";
} else if (word != "" || words.length > 0) {
throw new Error("search clauses must be separated by AND or OR at position " + String(i));
}
let nested = parseQuery(message, i + 1, true);
i = nested.at;
clauses.push(nested.metadata);
negation = false;
continue;
} else if (c == ")") {
}

if (c == ")") {
if (!open_par) {
throw new Error("unmatched closing parenthesis at position " + String(i))
}
Expand All @@ -76,6 +89,9 @@ function parseQuery(message, at=0, open_par=false) {
if (is_whitespace) {
if (word == "AND" || word == "OR") {
add_operation(i);
} else if (word == "NOT") {
negation = true;
word = "";
} else if (word.length) {
words.push(word)
word = "";
Expand Down
19 changes: 19 additions & 0 deletions query.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ type searchClause struct {

// Only relevant for type = and/or.
Children []*searchClause `json:"children"`

// Only relevant for type = not.
Child *searchClause `json:"child"`
}

func escapeWildcards(input string) (string, string, error) {
Expand Down Expand Up @@ -107,6 +110,17 @@ func sanitizeQuery(original *searchClause, deftok, wildtok *unicodeTokenizer) (*
}
}

if original.Type == "not" {
if original.Child == nil {
return nil, fmt.Errorf("search clause of type %q should have non-empty 'child'", original.Type)
}
san, err := sanitizeQuery(original.Child, deftok, wildtok)
if err != nil {
return nil, err
}
return &searchClause { Type: original.Type, Child: san }, nil
}

if original.Type == "text" {
var tokens []string
var err error
Expand Down Expand Up @@ -191,6 +205,11 @@ func assembleFilter(query *searchClause) (string, []interface{}) {
return filter, []interface{}{ query.Time }
}

if query.Type == "not" {
curfilt, curpar := assembleFilter(query.Child)
return "NOT " + curfilt, curpar
}

if query.Type == "and" {
collected := []string{}
parameters := []interface{}{}
Expand Down
17 changes: 17 additions & 0 deletions query_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,23 @@ func TestSanitizeQuery(t *testing.T) {
}
})

t.Run("not", func(t *testing.T) {
query := &searchClause {
Type: "not",
Child: &searchClause{ Type: "text", Text: "whee blah" },
}

san, err := sanitizeQuery(query, deftok, wildtok)
if err != nil {
t.Fatalf(err.Error())
}

// Now the nested OR is collapsed, but the AND is retained.
if san == nil || san.Type != "not" || san.Child == nil || san.Child.Type != "and" {
t.Fatalf("unexpected result from sanitization %v", san)
}
})

t.Run("text", func(t *testing.T) {
{
// Single token.
Expand Down

0 comments on commit 92dddbb

Please sign in to comment.