Skip to content

Commit

Permalink
Optimize sort followed by limit
Browse files Browse the repository at this point in the history
If the limit is much smaller than the total size of the TableView, then
it is faster to make a sorted insert into a vector that is kept at the
limit size.
  • Loading branch information
jedelbo committed Sep 1, 2023
1 parent 8b4b08d commit aaf922e
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/realm/query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1398,9 +1398,12 @@ TableView Query::find_all(size_t limit) const

TableView ret(*this, limit);
if (m_ordering) {
// apply_descriptor_ordering will call do_sync
ret.apply_descriptor_ordering(*m_ordering);
}
ret.do_sync();
else {
ret.do_sync();
}
return ret;
}

Expand Down
27 changes: 26 additions & 1 deletion src/realm/sort_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,33 @@ BaseDescriptor::Sorter SortDescriptor::sorter(Table const& table, const IndexPai

void SortDescriptor::execute(IndexPairs& v, const Sorter& predicate, const BaseDescriptor* next) const
{
std::sort(v.begin(), v.end(), std::ref(predicate));
size_t limit = size_t(-1);
if (next->get_type() == DescriptorType::Limit) {
limit = static_cast<const LimitDescriptor*>(next)->get_limit();
}
// Measurements shows that if limit is smaller than size / 16, then
// it is quicker to make a sorted insert into a smaller vector
if (limit < (v.size() >> 4)) {
IndexPairs buffer;
buffer.reserve(limit + 1);
for (auto& elem : v) {
auto it = std::lower_bound(buffer.begin(), buffer.end(), elem, predicate);
buffer.insert(it, elem);
if (buffer.size() > limit) {
buffer.pop_back();
}
}
v.m_removed_by_limit += v.size() - limit;
v.erase(v.begin() + limit, v.end());
std::move(buffer.begin(), buffer.end(), v.begin());
}
else {
std::sort(v.begin(), v.end(), std::ref(predicate));
}

/*
std::sort(v.begin(), v.end(), std::ref(predicate));
*/
// not doing this on the last step is an optimisation
if (next) {
const size_t v_size = v.size();
Expand Down
36 changes: 36 additions & 0 deletions test/test_table_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <sstream>
#include <ostream>
#include <cwchar>
#include <chrono>

#include <realm.hpp>

Expand All @@ -32,9 +33,13 @@
#include "test.hpp"
#include "test_table_helper.hpp"

using namespace std::chrono;

using namespace realm;
using namespace test_util;

extern unsigned int unit_test_random_seed;

// Test independence and thread-safety
// -----------------------------------
//
Expand Down Expand Up @@ -2833,4 +2838,35 @@ TEST(TableView_CopyKeyValues)
CHECK_EQUAL(yet_another_view.get_key(0), ObjKey(0));
}

TEST(TableView_SortFollowedByLimit)
{
constexpr int limit = 100;
Table table;
auto col = table.add_column(type_Int, "first");
std::vector<int> values(10000);
std::iota(values.begin(), values.end(), 0);
std::shuffle(values.begin(), values.end(), std::mt19937(unit_test_random_seed));

for (auto i : values) {
table.create_object().set(col, i);
}

auto tv = table.where().find_all();
DescriptorOrdering ordering;
ordering.append_sort(SortDescriptor({{col}}));
ordering.append_limit(limit);

auto t1 = steady_clock::now();
tv.apply_descriptor_ordering(ordering);
auto t2 = steady_clock::now();

CHECK(t2 > t1);
// std::cout << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;

CHECK_EQUAL(tv.size(), limit);
for (int i = 0; i < limit; i++) {
CHECK_EQUAL(tv.get_object(i).get<Int>(col), i);
}
}

#endif // TEST_TABLE_VIEW

0 comments on commit aaf922e

Please sign in to comment.