Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize size of Node and Array #7992

Merged
merged 3 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/realm/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,10 +532,10 @@ class Array : public Node, public ArrayParent {
Getter m_getter = nullptr; // cached to avoid indirection
const VTable* m_vtable = nullptr;

uint_least8_t m_width = 0; // Size of an element (meaning depend on type of array).
int64_t m_lbound; // min number that can be stored with current m_width
int64_t m_ubound; // max number that can be stored with current m_width

uint8_t m_width = 0; // Size of an element (meaning depend on type of array).
bool m_is_inner_bptree_node; // This array is an inner node of B+-tree.
bool m_has_refs; // Elements whose first bit is zero are refs to subarrays.
bool m_context_flag; // Meaning depends on context.
Expand Down
142 changes: 72 additions & 70 deletions src/realm/index_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,39 +930,38 @@ void StringIndex::insert_row_list(size_t ref, size_t offset, StringData index_da
m_array->insert(ins_pos + 1, ref);
}


void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
void StringIndex::new_node(const NodeChange& nc)
{
NodeChange nc = do_insert(obj_key, key, offset, index_data, value);
StringIndex new_node(inner_node_tag(), m_array->get_alloc());
switch (nc.type) {
case NodeChange::change_None:
return;
break;
Comment on lines 937 to +938
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It appears that this case should never happen, should we have a REALM_UNREACHABLE here?

case NodeChange::change_InsertBefore: {
StringIndex new_node(inner_node_tag(), m_array->get_alloc());
new_node.node_add_key(nc.ref1);
new_node.node_add_key(get_ref());
m_array->init_from_ref(new_node.get_ref());
m_array->update_parent();
return;
break;
}
case NodeChange::change_InsertAfter: {
StringIndex new_node(inner_node_tag(), m_array->get_alloc());
new_node.node_add_key(get_ref());
new_node.node_add_key(nc.ref1);
m_array->init_from_ref(new_node.get_ref());
m_array->update_parent();
return;
break;
}
case NodeChange::change_Split: {
StringIndex new_node(inner_node_tag(), m_array->get_alloc());
new_node.node_add_key(nc.ref1);
new_node.node_add_key(nc.ref2);
m_array->init_from_ref(new_node.get_ref());
m_array->update_parent();
return;
break;
}
}
REALM_ASSERT(false); // LCOV_EXCL_LINE; internal Realm error
m_array->init_from_ref(new_node.get_ref());
m_array->update_parent();
}

void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
{
auto nc = do_insert(obj_key, key, offset, index_data, value);
if (nc.type != NodeChange::change_None) {
new_node(nc);
}
}


Expand Down Expand Up @@ -1155,58 +1154,61 @@ bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, Strin
throw LogicError(ErrorCodes::LimitExceeded,
util::format("String of length %1 exceeds maximum string length of %2.", len, max));
}

// Get subnode table
Allocator& alloc = m_array->get_alloc();
Array keys(alloc);
get_child(*m_array, 0, keys);
REALM_ASSERT(m_array->size() == keys.size() + 1);
size_t ins_pos_refs; // first entry in refs points to offsets

// If we are keeping the complete string in the index
// we want to know if this is the last part
bool is_at_string_end = offset + 4 >= index_data.size();
{
// Get subnode table
Array keys(alloc);
get_child(*m_array, 0, keys);
REALM_ASSERT(m_array->size() == keys.size() + 1);

size_t ins_pos = keys.lower_bound_int(key);
size_t ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets
// If we are keeping the complete string in the index
// we want to know if this is the last part
bool is_at_string_end = offset + 4 >= index_data.size();

if (ins_pos == keys.size()) {
if (noextend)
return false;
size_t ins_pos = keys.lower_bound_int(key);
ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets

// When key is outside current range, we can just add it
keys.add(key);
if (!m_target_column.full_word() || is_at_string_end) {
int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
m_array->add(shifted);
}
else {
// create subindex for rest of string
StringIndex subindex(m_target_column, m_array->get_alloc());
subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
m_array->add(subindex.get_ref());
if (ins_pos == keys.size()) {
if (noextend)
return false;

// When key is outside current range, we can just add it
keys.add(key);
if (!m_target_column.full_word() || is_at_string_end) {
int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
m_array->add(shifted);
}
else {
// create subindex for rest of string
StringIndex subindex(m_target_column, m_array->get_alloc());
subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
m_array->add(subindex.get_ref());
}
return true;
}
return true;
}

key_type k = key_type(keys.get(ins_pos));
key_type k = key_type(keys.get(ins_pos));

// If key is not present we add it at the correct location
if (k != key) {
if (noextend)
return false;
// If key is not present we add it at the correct location
if (k != key) {
if (noextend)
return false;

keys.insert(ins_pos, key);
if (!m_target_column.full_word() || is_at_string_end) {
int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
m_array->insert(ins_pos_refs, shifted);
}
else {
// create subindex for rest of string
StringIndex subindex(m_target_column, m_array->get_alloc());
subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
m_array->insert(ins_pos_refs, subindex.get_ref());
keys.insert(ins_pos, key);
if (!m_target_column.full_word() || is_at_string_end) {
int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
m_array->insert(ins_pos_refs, shifted);
}
else {
// create subindex for rest of string
StringIndex subindex(m_target_column, m_array->get_alloc());
subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
m_array->insert(ins_pos_refs, subindex.get_ref());
}
return true;
}
return true;
}

// This leaf already has a slot for for the key
Expand Down Expand Up @@ -1266,28 +1268,28 @@ bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, Strin
IntegerColumn sub(alloc, ref); // Throws
sub.set_parent(m_array.get(), ins_pos_refs);

IntegerColumn::const_iterator it_end = sub.cend();
IntegerColumn::const_iterator lower = it_end;
IntegerColumn::const_iterator lower = sub.cend();

auto value_exists_in_list = [&]() {
if (m_target_column.full_word()) {
lower = sub.cbegin();
return reconstruct_string(offset, key, index_data) == value.get_string();
}
bool value_exists_in_list = false;
if (m_target_column.full_word()) {
lower = sub.cbegin();
value_exists_in_list = reconstruct_string(offset, key, index_data) == value.get_string();
}
else {
SortedListComparator slc(m_target_column);
IntegerColumn::const_iterator it_end = lower;
lower = slc.find_start_of_unsorted(value, sub);

if (lower != it_end) {
Mixed lower_value = get(ObjKey(*lower));
if (lower_value == value) {
return true;
value_exists_in_list = true;
}
}
return false;
};
}

// If we found the value in this list, add the duplicate to the list.
if (value_exists_in_list()) {
if (value_exists_in_list) {
insert_to_existing_list_at_lower(obj_key, value, sub, lower);
}
else {
Expand Down
1 change: 1 addition & 0 deletions src/realm/index_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ class StringIndex : public SearchIndex {
// B-Tree functions
void TreeInsert(ObjKey obj_key, key_type, size_t offset, StringData index_data, const Mixed& value);
NodeChange do_insert(ObjKey, key_type, size_t offset, StringData index_data, const Mixed& value);
void new_node(const NodeChange&);
/// Returns true if there is room or it can join existing entries
bool leaf_insert(ObjKey obj_key, key_type, size_t offset, StringData index_data, const Mixed& value,
bool noextend = false);
Expand Down
8 changes: 2 additions & 6 deletions src/realm/node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,11 +243,7 @@ class Node : public NodeHeader {
void set_parent(ArrayParent* parent, size_t ndx_in_parent) noexcept
{
m_parent = parent;
m_ndx_in_parent = ndx_in_parent;
}
void set_ndx_in_parent(size_t ndx) noexcept
{
m_ndx_in_parent = ndx;
m_ndx_in_parent = unsigned(ndx_in_parent);
}

void clear_missing_parent_update()
Expand Down Expand Up @@ -339,7 +335,7 @@ class Node : public NodeHeader {
private:
friend class NodeTree;
ArrayParent* m_parent = nullptr;
size_t m_ndx_in_parent = 0; // Ignored if m_parent is null.
unsigned m_ndx_in_parent = 0; // Ignored if m_parent is null.
bool m_missing_parent_update = false;

void do_copy_on_write(size_t minimum_size = 0);
Expand Down
6 changes: 0 additions & 6 deletions src/realm/search_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ class SearchIndex {
bool is_attached() const noexcept;
void set_parent(ArrayParent* parent, size_t ndx_in_parent) noexcept;
size_t get_ndx_in_parent() const noexcept;
void set_ndx_in_parent(size_t ndx_in_parent) noexcept;
void update_from_parent() noexcept;
void refresh_accessor_tree(const ClusterColumn& target_column);
ref_type get_ref() const noexcept;
Expand Down Expand Up @@ -174,11 +173,6 @@ inline size_t SearchIndex::get_ndx_in_parent() const noexcept
return m_root_array->get_ndx_in_parent();
}

inline void SearchIndex::set_ndx_in_parent(size_t ndx_in_parent) noexcept
{
m_root_array->set_ndx_in_parent(ndx_in_parent);
}

inline void SearchIndex::update_from_parent() noexcept
{
m_root_array->update_from_parent();
Expand Down
37 changes: 9 additions & 28 deletions src/realm/spec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,13 @@ void Spec::init(MemRef mem) noexcept
{
m_top.init_from_mem(mem);
size_t top_size = m_top.size();
REALM_ASSERT(top_size > s_attributes_ndx && top_size <= s_spec_max_size);
// Since Core6 we will always have the column keys array
REALM_ASSERT(top_size == s_spec_max_size);

m_types.init_from_ref(m_top.get_as_ref(s_types_ndx));
m_names.init_from_ref(m_top.get_as_ref(s_names_ndx));
m_attr.init_from_ref(m_top.get_as_ref(s_attributes_ndx));

while (m_top.size() < s_spec_max_size) {
m_top.add(0);
}

// Enumkeys array is only there when there are StringEnum columns
if (auto ref = m_top.get_as_ref(s_enum_keys_ndx)) {
m_enumkeys.init_from_ref(ref);
Expand All @@ -59,34 +56,22 @@ void Spec::init(MemRef mem) noexcept
m_enumkeys.detach();
}

if (m_top.get_as_ref(s_col_keys_ndx) == 0) {
// This is an upgrade - create column key array
MemRef mem_ref = Array::create_empty_array(Array::type_Normal, false, m_top.get_alloc()); // Throws
m_keys.init_from_mem(mem_ref);
m_keys.update_parent();
size_t num_cols = m_types.size();
for (size_t i = 0; i < num_cols; i++) {
m_keys.add(i);
}
}
else {
m_keys.init_from_parent();
}

m_keys.init_from_parent();
Copy link
Contributor

@ironage ironage Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

consider adding an assertion that m_top.get_as_ref(s_col_keys_ndx) is not zero (not blocking)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, we don't check the other entries and I am sure we will have an assertion down the line if the ref is zero.


update_internals();
}

void Spec::update_internals() noexcept
{
m_num_public_columns = 0;
size_t n = m_types.size();
for (size_t i = 0; i < n; ++i) {
if (ColumnType(int(m_types.get(i))) == col_type_BackLink) {
// Now we have no more public columns
m_num_public_columns = n;
// We normally have fewer backlink columns than public columns, so quicker to go backwards
for (size_t i = n; i; --i) {
if (ColumnType(int(m_types.get(i - 1))) != col_type_BackLink) {
// Now we have no more backlink columns. The rest must be public
return;
}
m_num_public_columns++;
m_num_public_columns--;
}
}

Expand Down Expand Up @@ -207,8 +192,6 @@ void Spec::insert_column(size_t column_ndx, ColKey col_key, ColumnType type, Str
if (m_enumkeys.is_attached() && type != col_type_BackLink) {
m_enumkeys.insert(column_ndx, 0);
}

update_internals();
}

void Spec::erase_column(size_t column_ndx)
Expand Down Expand Up @@ -246,8 +229,6 @@ void Spec::erase_column(size_t column_ndx)
m_types.erase(column_ndx); // Throws
m_attr.erase(column_ndx); // Throws
m_keys.erase(column_ndx);

update_internals();
}

void Spec::upgrade_string_to_enum(size_t column_ndx, ref_type keys_ref)
Expand Down
6 changes: 0 additions & 6 deletions src/realm/spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ class Spec {
void destroy() noexcept;

size_t get_ndx_in_parent() const noexcept;
void set_ndx_in_parent(size_t) noexcept;

void verify() const;

Expand Down Expand Up @@ -177,11 +176,6 @@ inline size_t Spec::get_ndx_in_parent() const noexcept
return m_top.get_ndx_in_parent();
}

inline void Spec::set_ndx_in_parent(size_t ndx) noexcept
{
m_top.set_ndx_in_parent(ndx);
}

inline ref_type Spec::get_ref() const noexcept
{
return m_top.get_ref();
Expand Down
8 changes: 0 additions & 8 deletions src/realm/table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -838,8 +838,6 @@ class Table {

util::Logger* get_logger() const noexcept;

void set_ndx_in_parent(size_t ndx_in_parent) noexcept;

/// Refresh the part of the accessor tree that is rooted at this
/// table.
void refresh_accessor_tree();
Expand Down Expand Up @@ -1340,12 +1338,6 @@ inline bool Table::is_link_type(ColumnType col_type) noexcept
return col_type == col_type_Link;
}

inline void Table::set_ndx_in_parent(size_t ndx_in_parent) noexcept
{
REALM_ASSERT(m_top.is_attached());
m_top.set_ndx_in_parent(ndx_in_parent);
}

inline size_t Table::colkey2spec_ndx(ColKey key) const
{
auto leaf_idx = key.get_index();
Expand Down
Loading