Skip to content

Commit

Permalink
Optimize Query::between for integers and timestamps (#7785)
Browse files Browse the repository at this point in the history
* Add benchmark test for QueryRange<Timestamp>
  • Loading branch information
jedelbo authored Jun 11, 2024
1 parent b9b3d89 commit d318021
Show file tree
Hide file tree
Showing 16 changed files with 188 additions and 41 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Enhancements
* <New feature description> (PR [#????](https://github.com/realm/realm-core/pull/????))
* Performance has been improved for range queries on integers and timestamps. Requires that you use the "BETWEEN" operation in MQL or the Query::between() method when you build the query. (PR [#7785](https://github.com/realm/realm-core/pull/7785))
* None.

### Fixed
Expand Down
25 changes: 25 additions & 0 deletions src/realm/array_integer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ Mixed ArrayInteger::get_any(size_t ndx) const
return Mixed(get(ndx));
}

size_t ArrayInteger::find_first_in_range(int64_t from, int64_t to, size_t start, size_t end) const
{
if (m_ubound >= from && m_lbound <= to) {
while (start < end) {
auto val = get(start);
if (from <= val && val <= to)
return start;
start++;
}
}
return realm::not_found;
}

Mixed ArrayIntNull::get_any(size_t ndx) const
{
return Mixed(get(ndx));
Expand Down Expand Up @@ -177,6 +190,18 @@ size_t ArrayIntNull::find_first(value_type value, size_t begin, size_t end) cons
return find_first<Equal>(value, begin, end);
}

size_t ArrayIntNull::find_first_in_range(int64_t from, int64_t to, size_t start, size_t end) const
{
if (m_ubound >= from && m_lbound <= to) {
for (size_t i = start; i < end; i++) {
auto val = get(i);
if (val && *val >= from && *val <= to)
return i;
}
}
return realm::not_found;
}

void ArrayIntNull::get_chunk(size_t ndx, value_type res[8]) const noexcept
{
// FIXME: Optimize this
Expand Down
3 changes: 3 additions & 0 deletions src/realm/array_integer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ class ArrayInteger : public Array, public ArrayPayload {
}
template <class cond>
bool find(value_type value, size_t start, size_t end, QueryStateBase* state) const;

size_t find_first_in_range(int64_t from, int64_t to, size_t start, size_t end) const;
};

class ArrayIntNull : public Array, public ArrayPayload {
Expand Down Expand Up @@ -138,6 +140,7 @@ class ArrayIntNull : public Array, public ArrayPayload {


size_t find_first(value_type value, size_t begin = 0, size_t end = npos) const;
size_t find_first_in_range(int64_t from, int64_t to, size_t start, size_t end) const;

protected:
void avoid_null_collision(int64_t value);
Expand Down
17 changes: 17 additions & 0 deletions src/realm/array_timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,23 @@ size_t ArrayTimestamp::find_first<NotEqual>(Timestamp value, size_t begin, size_
return not_found;
}

size_t ArrayTimestamp::find_first_in_range(Timestamp from, Timestamp to, size_t start, size_t end) const
{
while (start < end) {
start = m_seconds.find_first_in_range(from.get_seconds(), to.get_seconds(), start, end);
if (start != realm::not_found) {
util::Optional<int64_t> seconds = m_seconds.get(start);
int32_t nanos = int32_t(m_nanoseconds.get(start));
if ((from.get_seconds() < *seconds || from.get_nanoseconds() <= nanos) &&
(to.get_seconds() > *seconds || nanos <= to.get_nanoseconds()))
return start;
start++;
}
}
return not_found;
}


void ArrayTimestamp::verify() const
{
#ifdef REALM_DEBUG
Expand Down
1 change: 1 addition & 0 deletions src/realm/array_timestamp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class ArrayTimestamp : public ArrayPayload, private Array {
size_t find_first(Timestamp value, size_t begin, size_t end) const noexcept;

size_t find_first(Timestamp value, size_t begin, size_t end) const noexcept;
size_t find_first_in_range(Timestamp from, Timestamp to, size_t start, size_t end) const;

void verify() const;

Expand Down
9 changes: 5 additions & 4 deletions src/realm/exec/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,11 @@ void print_row(Table& table, size_t r)
std::cout << "\n";
}

} // anonymous namespace

bool is_null(const char* v)
namespace realm {
template <>
bool is_null(const char* const& v)
{
if (v[0] == 0)
return true;
Expand All @@ -132,9 +135,7 @@ bool is_null(const char* v)

return false;
}

} // anonymous namespace

} // namespace realm

Importer::Importer()
: Quiet(false)
Expand Down
6 changes: 6 additions & 0 deletions src/realm/null.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ struct null {
}
};

template <typename T>
inline bool is_null(const T&)
{
return false;
}

template <class OS>
OS& operator<<(OS& os, const null&)
{
Expand Down
21 changes: 20 additions & 1 deletion src/realm/parser/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,10 +640,29 @@ Query BetweenNode::visit(ParserDriver* drv)

auto& min(limits->elements.at(0));
auto& max(limits->elements.at(1));
Query q(drv->m_base_table);

auto tmp = prop->visit(drv);
const ObjPropertyBase* obj_prop = dynamic_cast<const ObjPropertyBase*>(tmp.get());
if (obj_prop) {
if (tmp->get_type() == type_Int) {
auto min_val = min->visit(drv, type_Int);
auto max_val = max->visit(drv, type_Int);
q.between(obj_prop->column_key(), min_val->get_mixed().get_int(), max_val->get_mixed().get_int());
return q;
}
if (tmp->get_type() == type_Timestamp) {
auto min_val = min->visit(drv, type_Timestamp);
auto max_val = max->visit(drv, type_Timestamp);
q.between(obj_prop->column_key(), min_val->get_mixed().get_timestamp(),
max_val->get_mixed().get_timestamp());
return q;
}
}

RelationalNode cmp1(prop, CompareType::GREATER_EQUAL, min);
RelationalNode cmp2(prop, CompareType::LESS_EQUAL, max);

Query q(drv->m_base_table);
q.and_query(cmp1.visit(drv));
q.and_query(cmp2.visit(drv));

Expand Down
15 changes: 11 additions & 4 deletions src/realm/query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,10 +624,12 @@ Query& Query::less(ColKey column_key, int64_t value)
}
Query& Query::between(ColKey column_key, int64_t from, int64_t to)
{
group();
greater_equal(column_key, from);
less_equal(column_key, to);
end_group();
if (column_key.is_nullable()) {
add_node(std::unique_ptr<realm::ParentNode>(new BetweenNode<ArrayIntNull>(from, to, column_key)));
}
else {
add_node(std::unique_ptr<realm::ParentNode>(new BetweenNode<ArrayInteger>(from, to, column_key)));
}
return *this;
}
Query& Query::equal(ColKey column_key, bool value)
Expand Down Expand Up @@ -737,6 +739,11 @@ Query& Query::less(ColKey column_key, Timestamp value)
return add_condition<Less>(column_key, value);
}

Query& Query::between(ColKey column_key, Timestamp from, Timestamp to)
{
add_node(std::unique_ptr<realm::ParentNode>(new BetweenNode<ArrayTimestamp>(from, to, column_key)));
return *this;
}
// ------------- ObjectId
Query& Query::greater(ColKey column_key, ObjectId value)
{
Expand Down
1 change: 1 addition & 0 deletions src/realm/query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ class Query final {
Query& greater_equal(ColKey column_key, Timestamp value);
Query& less_equal(ColKey column_key, Timestamp value);
Query& less(ColKey column_key, Timestamp value);
Query& between(ColKey column_key, Timestamp from, Timestamp to);

// Conditions: ObjectId
Query& equal(ColKey column_key, ObjectId value);
Expand Down
60 changes: 60 additions & 0 deletions src/realm/query_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,66 @@ class IntegerNodeBase : public ColumnNodeBase {
};


template <class LeafType>
class BetweenNode : public ColumnNodeBase {
public:
using TConditionValue = typename util::RemoveOptional<typename LeafType::value_type>::type;

BetweenNode(TConditionValue from, TConditionValue to, ColKey column_key)
: ColumnNodeBase(column_key)
, m_from(std::move(from))
, m_to(std::move(to))
{
if (is_null(from) || is_null(to))
throw InvalidArgument("'from' or 'to' must not be null");
}

BetweenNode(const BetweenNode& from)
: ColumnNodeBase(from)
, m_from(from.m_from)
, m_to(from.m_to)
{
}

void cluster_changed() override
{
m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
m_cluster->init_leaf(this->m_condition_column_key, &*m_leaf);
}

void init(bool will_query_ranges) override
{
ColumnNodeBase::init(will_query_ranges);

m_dT = .25;
}

size_t find_first_local(size_t start, size_t end) override
{
return m_leaf->find_first_in_range(m_from, m_to, start, end);
}

std::string describe(util::serializer::SerialisationState& state) const override
{
return state.describe_column(ParentNode::m_table, ColumnNodeBase::m_condition_column_key) + " between {" +
util::serializer::print_value(this->m_from) + ", " + util::serializer::print_value(this->m_to) + "}";
}

std::unique_ptr<ParentNode> clone() const override
{
return std::unique_ptr<ParentNode>(new BetweenNode(*this));
}

private:
// Search values:
TConditionValue m_from;
TConditionValue m_to;

// Leaf cache
std::optional<LeafType> m_leaf;
};


template <class LeafType, class TConditionFunction>
class IntegerNode : public IntegerNodeBase<LeafType> {
using BaseType = IntegerNodeBase<LeafType>;
Expand Down
6 changes: 6 additions & 0 deletions src/realm/timestamp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,12 @@ inline std::basic_ostream<C, T>& operator<<(std::basic_ostream<C, T>& out, const
}
// LCOV_EXCL_STOP

template <>
inline bool is_null(const Timestamp& t)
{
return t.is_null();
}

} // namespace realm

namespace std {
Expand Down
5 changes: 3 additions & 2 deletions test/benchmark-common-tasks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,8 +634,8 @@ struct BenchmarkRangeForType : public BenchmarkWithType<Type> {
}
else {
TableView results = Base::m_table->where()
.greater(Base::m_col, Base::needles[i - 1].template get<underlying_type>())
.less(Base::m_col, Base::needles[i].template get<underlying_type>())
.between(Base::m_col, Base::needles[i - 1].template get<underlying_type>(),
Base::needles[i].template get<underlying_type>())
.find_all();
static_cast<void>(results);
}
Expand Down Expand Up @@ -2741,6 +2741,7 @@ int benchmark_common_tasks_main()
BENCH(BenchmarkParsedIn<Prop<ObjectId>, 5>);

BENCH(BenchmarkRangeForType<Prop<Int>>);
BENCH(BenchmarkRangeForType<Prop<Timestamp>>);
BENCH(BenchmarkCreateIndexForType<NullableIndexed<String>>);
BENCH(BenchmarkCreateIndexForType<NullableIndexed<Int>>);
BENCH(BenchmarkCreateIndexForType<NullableIndexed<Timestamp>>);
Expand Down
22 changes: 22 additions & 0 deletions test/test_query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3899,6 +3899,28 @@ TEST(Query_SortDates)
CHECK_EQUAL(tv[2].get<Timestamp>(col_date), Timestamp(3000, 0));
}

TEST(Query_DateRange)
{
Table table;
auto col_date = table.add_column(type_Timestamp, "date", true);

for (int64_t sec = 100; sec < 110; sec++) {
for (int nano = 0; nano < 5; nano++) {
table.create_object().set(col_date, Timestamp(sec, nano));
}
table.create_object();
}

CHECK_EQUAL(table.where().between(col_date, Timestamp(100, 1), Timestamp(100, 1)).count(), 1);
CHECK_EQUAL(table.where().between(col_date, Timestamp(100, 1), Timestamp(100, 4)).count(), 4);
CHECK_EQUAL(table.where().between(col_date, Timestamp(100, 4), Timestamp(100, 7)).count(), 1);
CHECK_EQUAL(table.where().between(col_date, Timestamp(100, 4), Timestamp(101, 0)).count(), 2);
auto q = table.where().between(col_date, Timestamp(102, 0), Timestamp(103, 10));
CHECK_EQUAL(q.count(), 10);
auto d = q.get_description();
q = table.query(d);
CHECK_EQUAL(q.count(), 10);
}

TEST(Query_SortBools)
{
Expand Down
9 changes: 7 additions & 2 deletions test/test_query2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,21 +192,26 @@ TEST(Query_FindAll2)
CHECK_EQUAL(6, tv2[0].get<Int>(col_id));
}

TEST(Query_FindAllBetween)
TEST_TYPES(Query_FindAllBetween, std::true_type, std::false_type)
{
Table ttt;
auto col_id = ttt.add_column(type_Int, "id");
auto col_int = ttt.add_column(type_Int, "1");
auto col_int = ttt.add_column(type_Int, "1", TEST_TYPE::value);
ttt.add_column(type_String, "2");

ttt.create_object().set_all(0, 1, "a");
ttt.create_object().set_all(1, 2, "a");
ttt.create_object().set_all(2, 3, "X");
ttt.create_object().set(col_id, 7); // null or 0
ttt.create_object().set_all(3, 4, "a");
ttt.create_object().set_all(4, 5, "a");
ttt.create_object().set_all(5, 11, "X");
ttt.create_object().set_all(6, 3, "X");

Query q1 = ttt.where().between(col_int, 100, 200);
TableView tv1 = q1.find_all();
CHECK_EQUAL(tv1.size(), 0);

Query q2 = ttt.where().between(col_int, 3, 5);
TableView tv2 = q2.find_all();
CHECK_EQUAL(tv2.size(), 4);
Expand Down
28 changes: 0 additions & 28 deletions test/test_shared.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3732,34 +3732,6 @@ TEST(Shared_GetCommitSize)
}
}

/*
#include <valgrind/callgrind.h>
TEST(Shared_TimestampQuery)
{
Table table;
auto col_date = table.add_column(type_Timestamp, "date", true);

Random random(random_int<unsigned long>()); // Seed from slow global generator

for (int i = 0; i < 10000; i++) {
auto ndx = table.add_empty_row();
int seconds = random.draw_int_max(3600 * 24 * 10);
table.set_timestamp(col_date, ndx, Timestamp(seconds, 0));
}

Query q = table.column<Timestamp>(col_date) > Timestamp(3600 * 24 * 5, 3);
auto start = std::chrono::steady_clock::now();
CALLGRIND_START_INSTRUMENTATION;
auto cnt = q.count();
CALLGRIND_STOP_INSTRUMENTATION;
auto end = std::chrono::steady_clock::now();

std::cout << "Time: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << " us"
<< std::endl;
CHECK_GREATER(cnt, 50000);
}
*/

TEST_IF(Shared_LargeFile, TEST_DURATION > 0 && !REALM_ANDROID)
{
SHARED_GROUP_TEST_PATH(path);
Expand Down

0 comments on commit d318021

Please sign in to comment.