Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize sort followed by limit #6941

Merged
merged 2 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/realm/query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1398,9 +1398,12 @@ TableView Query::find_all(size_t limit) const

TableView ret(*this, limit);
if (m_ordering) {
// apply_descriptor_ordering will call do_sync
ret.apply_descriptor_ordering(*m_ordering);
}
ret.do_sync();
else {
ret.do_sync();
}
return ret;
}

Expand Down
24 changes: 23 additions & 1 deletion src/realm/sort_descriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,29 @@ BaseDescriptor::Sorter SortDescriptor::sorter(Table const& table, const IndexPai

void SortDescriptor::execute(IndexPairs& v, const Sorter& predicate, const BaseDescriptor* next) const
{
std::sort(v.begin(), v.end(), std::ref(predicate));
size_t limit = size_t(-1);
if (next && next->get_type() == DescriptorType::Limit) {
limit = static_cast<const LimitDescriptor*>(next)->get_limit();
}
// Measurements shows that if limit is smaller than size / 16, then
// it is quicker to make a sorted insert into a smaller vector
if (limit < (v.size() >> 4)) {
IndexPairs buffer;
buffer.reserve(limit + 1);
for (auto& elem : v) {
auto it = std::lower_bound(buffer.begin(), buffer.end(), elem, std::ref(predicate));
buffer.insert(it, elem);
if (buffer.size() > limit) {
buffer.pop_back();
}
}
v.m_removed_by_limit += v.size() - limit;
v.erase(v.begin() + limit, v.end());
std::move(buffer.begin(), buffer.end(), v.begin());
}
else {
std::sort(v.begin(), v.end(), std::ref(predicate));
}

// not doing this on the last step is an optimisation
if (next) {
Expand Down
45 changes: 44 additions & 1 deletion test/benchmark-common-tasks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,49 @@ struct BenchmarkSortIntDictionary : Benchmark {
std::vector<size_t> m_indices;
};

struct BenchmarkSortThenLimit : Benchmark {
const char* name() const
{
return "SortThenLimit";
}

void before_all(DBRef group)
{
WriteTransaction tr(group);
TableRef t = tr.add_table(name());
m_col = t->add_column(type_Int, "first");

std::vector<int> values(10000);
std::iota(values.begin(), values.end(), 0);
std::shuffle(values.begin(), values.end(), std::mt19937(std::random_device()()));

for (auto i : values) {
t->create_object().set(m_col, i);
}
tr.commit();
}

void after_all(DBRef db)
{
WriteTransaction tr(db);
tr.get_group().remove_table(name());
tr.commit();
}

void operator()(DBRef db)
{
realm::ReadTransaction tr(db);
auto tv = tr.get_group().get_table(name())->where().find_all();
DescriptorOrdering ordering;
ordering.append_sort(SortDescriptor({{m_col}}));
ordering.append_limit(100);

tv.apply_descriptor_ordering(ordering);
}

ColKey m_col;
};

struct BenchmarkInsert : BenchmarkWithStringsTable {
const char* name() const
{
Expand Down Expand Up @@ -2249,7 +2292,6 @@ int benchmark_common_tasks_main()

#define BENCH(B) run_benchmark<B>(results)
#define BENCH2(B, mode) run_benchmark<B>(results, mode)

BENCH2(BenchmarkEmptyCommit, true);
BENCH2(BenchmarkEmptyCommit, false);
BENCH2(BenchmarkNonInitiatorOpen, true);
Expand All @@ -2267,6 +2309,7 @@ int benchmark_common_tasks_main()
BENCH(BenchmarkSortInt);
BENCH(BenchmarkSortIntList);
BENCH(BenchmarkSortIntDictionary);
BENCH(BenchmarkSortThenLimit);

BENCH(BenchmarkUnorderedTableViewClear);
BENCH(BenchmarkUnorderedTableViewClearIndexed);
Expand Down
36 changes: 36 additions & 0 deletions test/test_table_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <sstream>
#include <ostream>
#include <cwchar>
#include <chrono>

#include <realm.hpp>

Expand All @@ -32,9 +33,13 @@
#include "test.hpp"
#include "test_table_helper.hpp"

using namespace std::chrono;

using namespace realm;
using namespace test_util;

extern unsigned int unit_test_random_seed;

// Test independence and thread-safety
// -----------------------------------
//
Expand Down Expand Up @@ -2833,4 +2838,35 @@ TEST(TableView_CopyKeyValues)
CHECK_EQUAL(yet_another_view.get_key(0), ObjKey(0));
}

TEST(TableView_SortFollowedByLimit)
{
constexpr int limit = 100;
Table table;
auto col = table.add_column(type_Int, "first");
std::vector<int> values(10000);
std::iota(values.begin(), values.end(), 0);
std::shuffle(values.begin(), values.end(), std::mt19937(unit_test_random_seed));

for (auto i : values) {
table.create_object().set(col, i);
}

auto tv = table.where().find_all();
DescriptorOrdering ordering;
ordering.append_sort(SortDescriptor({{col}}));
ordering.append_limit(limit);

auto t1 = steady_clock::now();
tv.apply_descriptor_ordering(ordering);
auto t2 = steady_clock::now();

CHECK(t2 > t1);
// std::cout << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;

CHECK_EQUAL(tv.size(), limit);
for (int i = 0; i < limit; i++) {
CHECK_EQUAL(tv.get_object(i).get<Int>(col), i);
}
}

#endif // TEST_TABLE_VIEW
Loading