Skip to content

Commit

Permalink
Optimize sort followed by limit
Browse files Browse the repository at this point in the history
If the limit is much smaller than the total size of the TableView, then
it is faster to make a sorted insert into a vector that is kept at the
limit size.
  • Loading branch information
jedelbo committed Sep 1, 2023
1 parent 8b4b08d commit 25e7ba8
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/realm/query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1398,9 +1398,12 @@ TableView Query::find_all(size_t limit) const

TableView ret(*this, limit);
if (m_ordering) {
// apply_descriptor_ordering will call do_sync
ret.apply_descriptor_ordering(*m_ordering);
}
ret.do_sync();
else {
ret.do_sync();
}
return ret;
}

Expand Down
24 changes: 23 additions & 1 deletion src/realm/sort_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,29 @@ BaseDescriptor::Sorter SortDescriptor::sorter(Table const& table, const IndexPai

void SortDescriptor::execute(IndexPairs& v, const Sorter& predicate, const BaseDescriptor* next) const
{
std::sort(v.begin(), v.end(), std::ref(predicate));
size_t limit = size_t(-1);
if (next->get_type() == DescriptorType::Limit) {
limit = static_cast<const LimitDescriptor*>(next)->get_limit();
}
// Measurements shows that if limit is smaller than size / 16, then
// it is quicker to make a sorted insert into a smaller vector
if (limit < (v.size() >> 4)) {
IndexPairs buffer;
buffer.reserve(limit + 1);
for (auto& elem : v) {
auto it = std::lower_bound(buffer.begin(), buffer.end(), elem, predicate);
buffer.insert(it, elem);
if (buffer.size() > limit) {
buffer.pop_back();
}
}
v.m_removed_by_limit += v.size() - limit;
v.erase(v.begin() + limit, v.end());
std::move(buffer.begin(), buffer.end(), v.begin());
}
else {
std::sort(v.begin(), v.end(), std::ref(predicate));
}

// not doing this on the last step is an optimisation
if (next) {
Expand Down
36 changes: 36 additions & 0 deletions test/test_table_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <sstream>
#include <ostream>
#include <cwchar>
#include <chrono>

#include <realm.hpp>

Expand All @@ -32,9 +33,13 @@
#include "test.hpp"
#include "test_table_helper.hpp"

using namespace std::chrono;

using namespace realm;
using namespace test_util;

extern unsigned int unit_test_random_seed;

// Test independence and thread-safety
// -----------------------------------
//
Expand Down Expand Up @@ -2833,4 +2838,35 @@ TEST(TableView_CopyKeyValues)
CHECK_EQUAL(yet_another_view.get_key(0), ObjKey(0));
}

TEST(TableView_SortFollowedByLimit)
{
constexpr int limit = 100;
Table table;
auto col = table.add_column(type_Int, "first");
std::vector<int> values(10000);
std::iota(values.begin(), values.end(), 0);
std::shuffle(values.begin(), values.end(), std::mt19937(unit_test_random_seed));

for (auto i : values) {
table.create_object().set(col, i);
}

auto tv = table.where().find_all();
DescriptorOrdering ordering;
ordering.append_sort(SortDescriptor({{col}}));
ordering.append_limit(limit);

auto t1 = steady_clock::now();
tv.apply_descriptor_ordering(ordering);
auto t2 = steady_clock::now();

CHECK(t2 > t1);
// std::cout << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;

CHECK_EQUAL(tv.size(), limit);
for (int i = 0; i < limit; i++) {
CHECK_EQUAL(tv.get_object(i).get<Int>(col), i);
}
}

#endif // TEST_TABLE_VIEW

0 comments on commit 25e7ba8

Please sign in to comment.