Skip to content

Commit c503715

Browse files
committed
Handle maybe UIDs separately from known good matches
In some edge cases, a maybe query can potentially break search results because Dovecot's internal fallback querying does not have the necessary contextual information to properly query (i.e. if the original text was decoded from an attachment). Fixed by handling the one "maybe" query flatcurve does (queries on non-indexed headers) separately from the "main" full query that will return definite results. Fixes GitHub Issue #38
1 parent 1e40cd6 commit c503715

File tree

5 files changed

+104
-28
lines changed

5 files changed

+104
-28
lines changed

docs/data/events.data.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export default {
3737
fields: {
3838
count: "The number of messages matched",
3939
mailbox: "The mailbox name",
40-
maybe: "Are the results uncertain?",
40+
maybe_uids: "The list of maybe UIDs returned by the query (these UIDs need to have their contents directly searched by Dovecot core)",
4141
query: "The query text sent to Xapian",
4242
uids: "The list of UIDs returned by the query"
4343
},

src/fts-backend-flatcurve-xapian.cpp

Lines changed: 83 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "fts-flatcurve-config.h"
1616
extern "C" {
1717
#include "lib.h"
18+
#include "array.h"
1819
#include "file-create-locked.h"
1920
#include "hash.h"
2021
#include "mail-storage-private.h"
@@ -148,8 +149,17 @@ struct flatcurve_xapian {
148149
bool closing:1;
149150
};
150151

152+
struct flatcurve_fts_query_xapian_maybe {
153+
Xapian::Query *query;
154+
};
155+
151156
struct flatcurve_fts_query_xapian {
152157
Xapian::Query *query;
158+
ARRAY(struct flatcurve_fts_query_xapian_maybe) maybe_queries;
159+
160+
bool and_search:1;
161+
bool maybe:1;
162+
bool start:1;
153163
};
154164

155165
struct flatcurve_xapian_db_iter {
@@ -185,6 +195,7 @@ struct fts_flatcurve_xapian_query_iter {
185195
Xapian::Enquire *enquire;
186196
Xapian::MSetIterator i;
187197
struct fts_flatcurve_xapian_query_result *result;
198+
int curr_query;
188199
};
189200

190201
static void
@@ -1465,19 +1476,22 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
14651476
const char *term)
14661477
{
14671478
const char *hdr;
1479+
bool maybe_or = FALSE;
1480+
struct flatcurve_fts_query_xapian_maybe *mquery;
14681481
Xapian::Query::op op = Xapian::Query::OP_INVALID;
14691482
Xapian::Query *oldq, q;
14701483
struct flatcurve_fts_query_xapian *x = query->xapian;
14711484

1472-
if (x->query != NULL) {
1473-
if ((query->flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0) {
1485+
if (x->start) {
1486+
if (x->and_search) {
14741487
op = Xapian::Query::OP_AND;
14751488
str_append(query->qtext, " AND ");
14761489
} else {
14771490
op = Xapian::Query::OP_OR;
14781491
str_append(query->qtext, " OR ");
14791492
}
14801493
}
1494+
x->start = TRUE;
14811495

14821496
if (arg->match_not)
14831497
str_append(query->qtext, "NOT ");
@@ -1529,7 +1543,10 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
15291543
* appears in the general pool of header
15301544
* terms for the message, not to a specific
15311545
* header, so this is only a maybe match. */
1532-
query->maybe = TRUE;
1546+
if (x->and_search)
1547+
x->maybe = TRUE;
1548+
else
1549+
maybe_or = TRUE;
15331550
}
15341551
} else {
15351552
hdr = t_str_lcase(arg->hdr_field_name);
@@ -1545,9 +1562,17 @@ fts_flatcurve_build_query_arg_term(struct flatcurve_fts_query *query,
15451562
q = Xapian::Query(Xapian::Query::OP_AND_NOT,
15461563
Xapian::Query::MatchAll, q);
15471564

1548-
if (x->query == NULL)
1565+
if (maybe_or) {
1566+
/* Maybe searches are not added to the "master search" query if this
1567+
* is an OR search; they will be run independently. Matches will be
1568+
* placed in the maybe results array. */
1569+
if (!array_is_created(&x->maybe_queries))
1570+
p_array_init(&x->maybe_queries, query->pool, 4);
1571+
mquery = array_append_space(&x->maybe_queries);
1572+
mquery->query = new Xapian::Query(std_move(q));
1573+
} else if (x->query == NULL) {
15491574
x->query = new Xapian::Query(std_move(q));
1550-
else {
1575+
} else {
15511576
oldq = x->query;
15521577
x->query = new Xapian::Query(op, *(x->query), q);
15531578
delete(oldq);
@@ -1631,6 +1656,8 @@ void fts_flatcurve_xapian_build_query(struct flatcurve_fts_query *query)
16311656
return;
16321657
}
16331658

1659+
x->and_search = ((query->flags & FTS_LOOKUP_FLAG_AND_ARGS) != 0);
1660+
16341661
for (; args != NULL ; args = args->next) {
16351662
fts_flatcurve_build_query_arg(query, args);
16361663
}
@@ -1642,29 +1669,59 @@ fts_flatcurve_xapian_query_iter_init(struct flatcurve_fts_query *query)
16421669
struct fts_flatcurve_xapian_query_iter *iter;
16431670

16441671
iter = p_new(query->pool, struct fts_flatcurve_xapian_query_iter, 1);
1672+
/* -1: "Master" query
1673+
* >= 0: Current index of maybe_queries */
1674+
iter->curr_query = -1;
16451675
iter->query = query;
16461676
iter->result = p_new(query->pool,
16471677
struct fts_flatcurve_xapian_query_result, 1);
16481678

16491679
return iter;
16501680
}
16511681

1682+
static void
1683+
fts_flatcurve_xapian_query_iter_clear_current(struct fts_flatcurve_xapian_query_iter *iter)
1684+
{
1685+
iter->i.~MSetIterator();
1686+
delete(iter->enquire);
1687+
iter->enquire = NULL;
1688+
}
1689+
16521690
struct fts_flatcurve_xapian_query_result *
16531691
fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *iter)
16541692
{
16551693
Xapian::MSet m;
1694+
const struct flatcurve_fts_query_xapian_maybe *mquery;
16561695
enum flatcurve_xapian_db_opts opts =
16571696
ENUM_EMPTY(flatcurve_xapian_db_opts);
1697+
Xapian::Query *q = NULL;
16581698

16591699
if (iter->enquire == NULL) {
1660-
if ((iter->query->xapian->query == NULL) ||
1700+
/* Master query. */
1701+
if (iter->curr_query == -1) {
1702+
if (iter->query->xapian->query == NULL)
1703+
++iter->curr_query;
1704+
else
1705+
q = iter->query->xapian->query;
1706+
}
1707+
1708+
/* Maybe queries. */
1709+
if ((iter->curr_query >= 0) &&
1710+
(array_is_created(&iter->query->xapian->maybe_queries)) &&
1711+
(array_count(&iter->query->xapian->maybe_queries) > iter->curr_query)) {
1712+
mquery = array_idx(&iter->query->xapian->maybe_queries,
1713+
iter->curr_query);
1714+
q = mquery->query;
1715+
}
1716+
1717+
if ((q == NULL) ||
16611718
((iter->db = fts_flatcurve_xapian_read_db(iter->query->backend, opts)) == NULL))
16621719
return NULL;
16631720

16641721
iter->enquire = new Xapian::Enquire(*iter->db);
16651722
iter->enquire->set_docid_order(
16661723
Xapian::Enquire::DONT_CARE);
1667-
iter->enquire->set_query(*iter->query->xapian->query);
1724+
iter->enquire->set_query(*q);
16681725

16691726
try {
16701727
m = iter->enquire->get_mset(0,
@@ -1685,9 +1742,13 @@ fts_flatcurve_xapian_query_iter_next(struct fts_flatcurve_xapian_query_iter *ite
16851742
iter->i = m.begin();
16861743
}
16871744

1688-
if (iter->i == m.end())
1689-
return NULL;
1745+
if (iter->i == m.end()) {
1746+
fts_flatcurve_xapian_query_iter_clear_current(iter);
1747+
++iter->curr_query;
1748+
return fts_flatcurve_xapian_query_iter_next(iter);
1749+
}
16901750

1751+
iter->result->maybe = (iter->curr_query >= 0);
16911752
iter->result->score = iter->i.get_weight();
16921753
/* MSet docid can be an "interleaved" docid generated by
16931754
* Xapian::Database when handling multiple DBs at once. Instead, we
@@ -1707,8 +1768,7 @@ fts_flatcurve_xapian_query_iter_deinit(struct fts_flatcurve_xapian_query_iter **
17071768
/* Need to explicitly call dtor, or else MSet doesn't release memory
17081769
* allocated internally. */
17091770
*_iter = NULL;
1710-
iter->i.~MSetIterator();
1711-
delete(iter->enquire);
1771+
fts_flatcurve_xapian_query_iter_clear_current(iter);
17121772
p_free(iter->query->pool, iter->result);
17131773
p_free(iter->query->pool, iter);
17141774
}
@@ -1723,7 +1783,10 @@ bool fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
17231783
if ((iter = fts_flatcurve_xapian_query_iter_init(query)) == NULL)
17241784
return FALSE;
17251785
while ((result = fts_flatcurve_xapian_query_iter_next(iter)) != NULL) {
1726-
seq_range_array_add(&r->uids, result->uid);
1786+
if (result->maybe || query->xapian->maybe)
1787+
seq_range_array_add(&r->maybe_uids, result->uid);
1788+
else
1789+
seq_range_array_add(&r->uids, result->uid);
17271790
score = array_append_space(&r->scores);
17281791
score->score = (float)result->score;
17291792
score->uid = result->uid;
@@ -1734,7 +1797,15 @@ bool fts_flatcurve_xapian_run_query(struct flatcurve_fts_query *query,
17341797

17351798
void fts_flatcurve_xapian_destroy_query(struct flatcurve_fts_query *query)
17361799
{
1800+
struct flatcurve_fts_query_xapian_maybe *mquery;
1801+
17371802
delete(query->xapian->query);
1803+
if (array_is_created(&query->xapian->maybe_queries)) {
1804+
array_foreach_modifiable(&query->xapian->maybe_queries, mquery) {
1805+
delete(mquery->query);
1806+
}
1807+
array_free(&query->xapian->maybe_queries);
1808+
}
17381809
}
17391810

17401811
const char *fts_flatcurve_xapian_library_version()

src/fts-backend-flatcurve-xapian.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
struct fts_flatcurve_xapian_query_result {
88
double score;
99
uint32_t uid;
10+
11+
bool maybe:1;
1012
};
1113

1214
struct fts_flatcurve_xapian_db_check {

src/fts-backend-flatcurve.c

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -548,10 +548,10 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
548548
ARRAY(struct fts_result) box_results;
549549
struct flatcurve_fts_result *fresult;
550550
unsigned int i;
551+
const char *m_debug = "", *u_debug = "";
551552
struct flatcurve_fts_query *query;
552553
struct fts_result *r;
553554
int ret = 0;
554-
const char *u;
555555

556556
/* Create query */
557557
query = fts_backend_flatcurve_create_query(backend, result->pool);
@@ -565,6 +565,7 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
565565
r->box = boxes[i];
566566

567567
fresult = p_new(result->pool, struct flatcurve_fts_result, 1);
568+
p_array_init(&fresult->maybe_uids, result->pool, 32);
568569
p_array_init(&fresult->scores, result->pool, 32);
569570
p_array_init(&fresult->uids, result->pool, 32);
570571

@@ -575,29 +576,31 @@ fts_backend_flatcurve_lookup_multi(struct fts_backend *_backend,
575576
break;
576577
}
577578

578-
if ((query->maybe) ||
579-
((flags & FTS_LOOKUP_FLAG_NO_AUTO_FUZZY) != 0))
580-
r->maybe_uids = fresult->uids;
581-
else
582-
r->definite_uids = fresult->uids;
579+
r->definite_uids = fresult->uids;
580+
r->maybe_uids = fresult->maybe_uids;
583581
r->scores = fresult->scores;
584582

585583
/* This was an empty query - skip output of debug info. */
586584
if (!str_len(query->qtext))
587585
continue;
588586

589-
u = str_c(fts_backend_flatcurve_seq_range_string(&fresult->uids,
590-
query->pool));
587+
if (array_not_empty(&fresult->maybe_uids))
588+
m_debug = str_c(fts_backend_flatcurve_seq_range_string(
589+
&fresult->maybe_uids, query->pool));
590+
if (array_not_empty(&fresult->uids))
591+
u_debug = str_c(fts_backend_flatcurve_seq_range_string(
592+
&fresult->uids, query->pool));
593+
591594
e_debug(event_create_passthrough(backend->event)->
592595
set_name("fts_flatcurve_query")->
593-
add_int("count", array_count(&fresult->uids))->
596+
add_int("count", seq_range_count(&fresult->uids))->
594597
add_str("mailbox", r->box->vname)->
595-
add_str("maybe", query->maybe ? "yes" : "no")->
598+
add_str("maybe_uids", m_debug)->
596599
add_str("query", str_c(query->qtext))->
597-
add_str("uids", u)->event(), "Query (%s) "
598-
"%smatches=%d uids=%s", str_c(query->qtext),
599-
query->maybe ? "maybe_" : "",
600-
array_count(&fresult->uids), u);
600+
add_str("uids", u_debug)->event(), "Query (%s) "
601+
"matches=%d uids=%s maybe_matches=%d maybe_uids=%s",
602+
str_c(query->qtext), seq_range_count(&fresult->uids), u_debug,
603+
seq_range_count(&fresult->maybe_uids), m_debug);
601604
}
602605

603606
if (ret == 0) {

src/fts-backend-flatcurve.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,11 @@ struct flatcurve_fts_query {
5050
pool_t pool;
5151

5252
bool match_all:1;
53-
bool maybe:1;
5453
};
5554

5655
struct flatcurve_fts_result {
5756
ARRAY_TYPE(fts_score_map) scores;
57+
ARRAY_TYPE(seq_range) maybe_uids;
5858
ARRAY_TYPE(seq_range) uids;
5959
};
6060

0 commit comments

Comments
 (0)