Skip to content

Commit

Permalink
mango: fix $beginsWith range
Browse files Browse the repository at this point in the history
In the intial implementation of $beginsWith, the range calculation
for view indexes mistakenly appends an integer with the size of
8 bits which gets maxed out at FF, rather than building a binary
with an extra 3 bytes at the end.

Additionally, ICU defines the maximum sortable code point as
`U+FFFF`. This is a more correct suffix when calculating the
key range and is supported by older ICU versions (required
for e.g. CentOS 7).

This commit fixes the range calculation by correctly appending
the `U+FFFF` code point in the range calculation. Additionally,
we use the Erlang `utf8` binary type to verify that the result
is a valid utf8 string.
  • Loading branch information
willholley committed Nov 2, 2023
1 parent e9d703c commit 5296a02
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/mango/src/mango_idx_view.erl
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ range(_, _, LCmp, Low, HCmp, High) ->
% beginsWith requires both a high and low bound
range({[{<<"$beginsWith">>, Arg}]}, LCmp, Low, HCmp, High) ->
{LCmp0, Low0, HCmp0, High0} = range({[{<<"$gte">>, Arg}]}, LCmp, Low, HCmp, High),
range({[{<<"$lte">>, <<Arg/binary, 16#10FFFF>>}]}, LCmp0, Low0, HCmp0, High0);
range({[{<<"$lte">>, <<Arg/binary, 16#FFFF/utf8>>}]}, LCmp0, Low0, HCmp0, High0);
range({[{<<"$lt">>, Arg}]}, LCmp, Low, HCmp, High) ->
case range_pos(Low, Arg, High) of
min ->
Expand Down
10 changes: 5 additions & 5 deletions src/mango/test/25-beginswith-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ def test_json_range(self):

self.assertEqual(mrargs["start_key"], ["A"])
end_key_bytes = to_utf8_bytes(mrargs["end_key"])
self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbd", b"<MAX>"])
self.assertEqual(end_key_bytes, [b"A\xef\xbf\xbf", b"<MAX>"])

def test_compound_key(self):
selector = {"name": "Eddie", "location": {"$beginsWith": "A"}}
mrargs = self.get_mrargs(selector)

self.assertEqual(mrargs["start_key"], ["Eddie", "A"])
end_key_bytes = to_utf8_bytes(mrargs["end_key"])
self.assertEqual(end_key_bytes, [b"Eddie", b"A\xef\xbf\xbd", b"<MAX>"])
self.assertEqual(end_key_bytes, [b"Eddie", b"A\xef\xbf\xbf", b"<MAX>"])

docs = self.db.find(selector)
self.assertEqual(len(docs), 1)
Expand All @@ -74,12 +74,12 @@ def test_sort(self):
{
"sort": ["location"],
"start_key": [b"A"],
"end_key": [b"A\xef\xbf\xbd", b"<MAX>"],
"end_key": [b"A\xef\xbf\xbf", b"<MAX>"],
"direction": "fwd",
},
{
"sort": [{"location": "desc"}],
"start_key": [b"A\xef\xbf\xbd", b"<MAX>"],
"start_key": [b"A\xef\xbf\xbf", b"<MAX>"],
"end_key": [b"A"],
"direction": "rev",
},
Expand All @@ -97,7 +97,7 @@ def test_all_docs_range(self):

self.assertEqual(mrargs["start_key"], "a")
end_key_bytes = to_utf8_bytes(mrargs["end_key"])
self.assertEqual(end_key_bytes, [b"a", b"\xef\xbf\xbd"])
self.assertEqual(end_key_bytes, [b"a", b"\xef\xbf\xbf"])

def test_no_index(self):
selector = {"foo": {"$beginsWith": "a"}}
Expand Down

0 comments on commit 5296a02

Please sign in to comment.