Skip to content

Commit 955cae6

Browse files
committed
Handle maybe UIDs separately from known good matches
In some edge cases, a maybe query can potentially break search results because Dovecot's internal fallback querying does not have the necessary contextual information to properly query (i.e. if the original text was decoded from an attachment). Fixed by handling the one "maybe" query flatcurve does (queries on non-indexed headers) separately from the "main" full query that will return definite results. Fixes GitHub Issue #38
1 parent 0bdd265 commit 955cae6

File tree

5 files changed

+91
-25
lines changed

5 files changed

+91
-25
lines changed

docs/data/events.data.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export default {
3737
fields: {
3838
count: "The number of messages matched",
3939
mailbox: "The mailbox name",
40-
maybe: "Are the results uncertain?",
40+
maybe_uids: "The list of maybe UIDs returned by the query (these UIDs need to have their contents directly searched by Dovecot core)",
4141
query: "The query text sent to Xapian",
4242
uids: "The list of UIDs returned by the query"
4343
},

src/fts-backend-flatcurve-xapian.cpp

Lines changed: 71 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "fts-flatcurve-config.h"
1616
extern "C" {
1717
#include "lib.h"
18+
#include "array.h"
1819
#include "file-create-locked.h"
1920
#include "hash.h"
2021
#include "mail-storage-private.h"
@@ -148,8 +149,13 @@ struct flatcurve_xapian {
148149
bool closing:1;
149150
};
150151

152+
struct flatcurve_fts_query_xapian_maybe {
153+
Xapian::Query *query;
154+
};
155+
151156
struct flatcurve_fts_query_xapian {
152157
Xapian::Query *query;
158+
ARRAY(struct flatcurve_fts_query_xapian_maybe) maybe_queries;
153159
};
154160

155161
struct flatcurve_xapian_db_iter {
@@ -185,6 +191,7 @@ struct fts_flatcurve_xapian_query_iter {
185191
Xapian::Enquire *enquire;
186192
Xapian::MSetIterator i;
187193
struct fts_flatcurve_xapian_query_result *result;
194+
int curr_query;
188195
};
189196

190197
static void
@@ -1465,6 +1472,8 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
14651472
const char *term)
14661473
{
14671474
const char *hdr;
1475+
bool maybe = FALSE;
1476+
struct flatcurve_fts_query_xapian_maybe *mquery;
14681477
Xapian::Query::op op = Xapian::Query::OP_INVALID;
14691478
Xapian::Query *oldq, q;
14701479
struct flatcurve_fts_query_xapian *x = query->xapian;
@@ -1529,7 +1538,7 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
15291538
* appears in the general pool of header
15301539
* terms for the message, not to a specific
15311540
* header, so this is only a maybe match. */
1532-
query->maybe = TRUE;
1541+
maybe = TRUE;
15331542
}
15341543
} else {
15351544
hdr = t_str_lcase(arg->hdr_field_name);
@@ -1545,9 +1554,17 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
15451554
q = Xapian::Query(Xapian::Query::OP_AND_NOT,
15461555
Xapian::Query::MatchAll, q);
15471556

1548-
if (x->query == NULL)
1557+
if (maybe) {
1558+
/* Maybe searches are not added to the "master search" query; they
1559+
* will be run independently. Matches will be placed in the maybe
1560+
* results array. */
1561+
if (!array_is_created(&x->maybe_queries))
1562+
p_array_init(&x->maybe_queries, query->pool, 4);
1563+
mquery = array_append_space(&x->maybe_queries);
1564+
mquery->query = new Xapian::Query(std_move(q));
1565+
} else if (x->query == NULL) {
15491566
x->query = new Xapian::Query(std_move(q));
1550-
else {
1567+
} else {
15511568
oldq = x->query;
15521569
x->query = new Xapian::Query(op, *(x->query), q);
15531570
delete(oldq);
@@ -1642,29 +1659,59 @@ fts_flatcurve_xapian_query_iter_init(struct flatcurve_fts_query *query)
16421659
struct fts_flatcurve_xapian_query_iter *iter;
16431660

16441661
iter = p_new(query->pool, struct fts_flatcurve_xapian_query_iter, 1);
1662+
/* -1: "Master" query
1663+
* >= 0: Current index of maybe_queries */
1664+
iter->curr_query = -1;
16451665
iter->query = query;
16461666
iter->result = p_new(query->pool,
16471667
struct fts_flatcurve_xapian_query_result, 1);
16481668

16491669
return iter;
16501670
}
16511671

1672+
static void
1673+
fts_flatcurve_xapian_query_iter_clear_current(struct fts_flatcurve_xapian_query_iter *iter)
1674+
{
1675+
iter->i.~MSetIterator();
1676+
delete(iter->enquire);
1677+
iter->enquire = NULL;
1678+
}
1679+
16521680
struct fts_flatcurve_xapian_query_result *
16531681
fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *iter)
16541682
{
16551683
Xapian::MSet m;
1684+
const struct flatcurve_fts_query_xapian_maybe *mquery;
16561685
enum flatcurve_xapian_db_opts opts =
16571686
ENUM_EMPTY(flatcurve_xapian_db_opts);
1687+
Xapian::Query *q = NULL;
16581688

16591689
if (iter->enquire == NULL) {
1660-
if ((iter->query->xapian->query == NULL) ||
1690+
/* Master query. */
1691+
if (iter->curr_query == -1) {
1692+
if (iter->query->xapian->query == NULL)
1693+
++iter->curr_query;
1694+
else
1695+
q = iter->query->xapian->query;
1696+
}
1697+
1698+
/* Maybe queries. */
1699+
if ((iter->curr_query >= 0) &&
1700+
(array_is_created(&iter->query->xapian->maybe_queries)) &&
1701+
(array_count(&iter->query->xapian->maybe_queries) > iter->curr_query)) {
1702+
mquery = array_idx(&iter->query->xapian->maybe_queries,
1703+
iter->curr_query);
1704+
q = mquery->query;
1705+
}
1706+
1707+
if ((q == NULL) ||
16611708
((iter->db = fts_flatcurve_xapian_read_db(iter->query->backend, opts)) == NULL))
16621709
return NULL;
16631710

16641711
iter->enquire = new Xapian::Enquire(*iter->db);
16651712
iter->enquire->set_docid_order(
16661713
Xapian::Enquire::DONT_CARE);
1667-
iter->enquire->set_query(*iter->query->xapian->query);
1714+
iter->enquire->set_query(*q);
16681715

16691716
try {
16701717
m = iter->enquire->get_mset(0,
@@ -1685,9 +1732,13 @@ fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *ite
16851732
iter->i = m.begin();
16861733
}
16871734

1688-
if (iter->i == m.end())
1689-
return NULL;
1735+
if (iter->i == m.end()) {
1736+
fts_flatcurve_xapian_query_iter_clear_current(iter);
1737+
++iter->curr_query;
1738+
return fts_flatcurve_xapian_query_iter_next(iter);
1739+
}
16901740

1741+
iter->result->maybe = (iter->curr_query >= 0);
16911742
iter->result->score = iter->i.get_weight();
16921743
/* MSet docid can be an "interleaved" docid generated by
16931744
* Xapian::Database when handling multiple DBs at once. Instead, we
@@ -1707,8 +1758,7 @@ fts_flatcurve_xapian_query_iter_deinit(struct fts_flatcurve_xapian_query_iter **
17071758
/* Need to explicitly call dtor, or else MSet doesn't release memory
17081759
* allocated internally. */
17091760
*_iter = NULL;
1710-
iter->i.~MSetIterator();
1711-
delete(iter->enquire);
1761+
fts_flatcurve_xapian_query_iter_clear_current(iter);
17121762
p_free(iter->query->pool, iter->result);
17131763
p_free(iter->query->pool, iter);
17141764
}
@@ -1723,7 +1773,10 @@ bool fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
17231773
if ((iter = fts_flatcurve_xapian_query_iter_init(query)) == NULL)
17241774
return FALSE;
17251775
while ((result = fts_flatcurve_xapian_query_iter_next(iter)) != NULL) {
1726-
seq_range_array_add(&r->uids, result->uid);
1776+
if (result->maybe)
1777+
seq_range_array_add(&r->maybe_uids, result->uid);
1778+
else
1779+
seq_range_array_add(&r->uids, result->uid);
17271780
score = array_append_space(&r->scores);
17281781
score->score = (float)result->score;
17291782
score->uid = result->uid;
@@ -1734,7 +1787,15 @@ bool fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
17341787

17351788
void fts_flatcurve_xapian_destroy_query(struct flatcurve_fts_query *query)
17361789
{
1790+
struct flatcurve_fts_query_xapian_maybe *mquery;
1791+
17371792
delete(query->xapian->query);
1793+
if (array_is_created(&query->xapian->maybe_queries)) {
1794+
array_foreach_modifiable(&query->xapian->maybe_queries, mquery) {
1795+
delete(mquery->query);
1796+
}
1797+
array_free(&query->xapian->maybe_queries);
1798+
}
17381799
}
17391800

17401801
const char *fts_flatcurve_xapian_library_version()

src/fts-backend-flatcurve-xapian.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
struct fts_flatcurve_xapian_query_result {
88
double score;
99
uint32_t uid;
10+
11+
bool maybe:1;
1012
};
1113

1214
struct fts_flatcurve_xapian_db_check {

src/fts-backend-flatcurve.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -548,10 +548,10 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
548548
ARRAY(struct fts_result) box_results;
549549
struct flatcurve_fts_result *fresult;
550550
unsigned int i;
551+
const char *m_debug = "", *u_debug = "";
551552
struct flatcurve_fts_query *query;
552553
struct fts_result *r;
553554
int ret = 0;
554-
const char *u;
555555

556556
/* Create query */
557557
query = fts_backend_flatcurve_create_query(backend, result->pool);
@@ -565,6 +565,7 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
565565
r->box = boxes[i];
566566

567567
fresult = p_new(result->pool, struct flatcurve_fts_result, 1);
568+
p_array_init(&fresult->maybe_uids, result->pool, 32);
568569
p_array_init(&fresult->scores, result->pool, 32);
569570
p_array_init(&fresult->uids, result->pool, 32);
570571

@@ -575,29 +576,31 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
575576
break;
576577
}
577578

578-
if ((query->maybe) ||
579-
((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) != 0))
580-
r->maybe_uids = fresult->uids;
581-
else
582-
r->definite_uids = fresult->uids;
579+
r->definite_uids = fresult->uids;
580+
r->maybe_uids = fresult->maybe_uids;
583581
r->scores = fresult->scores;
584582

585583
/* This was an empty query - skip output of debug info. */
586584
if (!str_len(query->qtext))
587585
continue;
588586

589-
u = str_c(fts_backend_flatcurve_seq_range_string(&fresult->uids,
590-
query->pool));
587+
if (array_not_empty(&fresult->maybe_uids))
588+
m_debug = str_c(fts_backend_flatcurve_seq_range_string(
589+
&fresult->maybe_uids, query->pool));
590+
if (array_not_empty(&fresult->uids))
591+
u_debug = str_c(fts_backend_flatcurve_seq_range_string(
592+
&fresult->uids, query->pool));
593+
591594
e_debug(event_create_passthrough(backend->event)->
592595
set_name("fts_flatcurve_query")->
593596
add_int("count", array_count(&fresult->uids))->
594597
add_str("mailbox", r->box->vname)->
595-
add_str("maybe", query->maybe ? "yes" : "no")->
598+
add_str("maybe_uids", m_debug)->
596599
add_str("query", str_c(query->qtext))->
597-
add_str("uids", u)->event(), "Query (%s) "
598-
"%smatches=%d uids=%s", str_c(query->qtext),
599-
query->maybe ? "maybe_" : "",
600-
array_count(&fresult->uids), u);
600+
add_str("uids", u_debug)->event(), "Query (%s) "
601+
"matches=%d uids=%s maybe_matches=%d maybe_uids=%s",
602+
str_c(query->qtext), array_count(&fresult->uids), u_debug,
603+
array_count(&fresult->maybe_uids), m_debug);
601604
}
602605

603606
if (ret == 0) {

src/fts-backend-flatcurve.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ struct flatcurve_fts_query {
5050
pool_t pool;
5151

5252
bool match_all:1;
53-
bool maybe:1;
5453
};
5554

5655
struct flatcurve_fts_result {
5756
ARRAY_TYPE(fts_score_map) scores;
57+
ARRAY_TYPE(seq_range) maybe_uids;
5858
ARRAY_TYPE(seq_range) uids;
5959
};
6060

0 commit comments

Comments
 (0)