Skip to content

Commit

Permalink
shard the node store to reduce memory usage
Browse files Browse the repository at this point in the history
It looks like PR #499 adopts 36 bits as the max size of an OSM ID.

The NodeStore currently uses a full 64 bits for these IDs. This PR
changes it to shard the nodes across 16 collections (4 bits) and
then store only the last 32 bits in the collection itself.

This reduces memory usage for the NodeStore by 25%, without much
impact on runtime.

The CompactNodeStore is still much better, as it has no overhead and
constant time lookups -- but I'm often lazy and not using a renumbered
PBF file.
  • Loading branch information
cldellow committed Nov 5, 2023
1 parent fcec727 commit cbd06b1
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 15 deletions.
4 changes: 4 additions & 0 deletions include/geom.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ typedef boost::variant<Point,Linestring,MultiLinestring,MultiPolygon> Geometry;
typedef std::pair<Box, uint> IndexValue;
typedef boost::geometry::index::rtree< IndexValue, boost::geometry::index::quadratic<16> > RTree;

// A 36-bit integer can store all OSM node IDs; we represent this as 16 collections
// of 32-bit integers.
#define NODE_SHARDS 16
typedef uint32_t ShardedNodeID;
typedef uint64_t NodeID;
typedef uint64_t WayID;

Expand Down
58 changes: 47 additions & 11 deletions include/osm_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,24 +79,34 @@ class NodeStore

public:
using element_t = std::pair<NodeID, LatpLon>;
using internal_element_t = std::pair<ShardedNodeID, LatpLon>;
using map_t = std::deque<element_t, mmap_allocator<element_t>>;

void reopen()
{
std::lock_guard<std::mutex> lock(mutex);
mLatpLons = std::make_unique<map_t>();
for (auto i = 0; i < mLatpLons.size(); i++)
mLatpLons[i]->clear();

mLatpLons.clear();
for (auto i = 0; i < NODE_SHARDS; i++) {
mLatpLons.push_back(std::make_unique<map_t>());
}
}

// @brief Lookup a latp/lon pair
// @param i OSM ID of a node
// @return Latp/lon pair
// @exception NotFound
LatpLon at(NodeID i) const {
auto iter = std::lower_bound(mLatpLons->begin(), mLatpLons->end(), i, [](auto const &e, auto i) {
auto shard = mLatpLons[shardPart(i)];
auto id = idPart(i);

auto iter = std::lower_bound(shard->begin(), shard->end(), id, [](auto const &e, auto i) {
return e.first < i;
});

if(iter == mLatpLons->end() || iter->first != i)
if(iter == shard->end() || iter->first != id)
throw std::out_of_range("Could not find node with id " + std::to_string(i));

return iter->second;
Expand All @@ -105,7 +115,11 @@ class NodeStore
// @brief Return the number of stored items
size_t size() const {
std::lock_guard<std::mutex> lock(mutex);
return mLatpLons->size();
uint64_t size = 0;
for (auto i = 0; i < mLatpLons.size(); i++)
size += mLatpLons[i]->size();

return size;
}

// @brief Insert a latp/lon pair.
Expand All @@ -114,27 +128,49 @@ class NodeStore
// @invariant The OSM ID i must be larger than previously inserted OSM IDs of nodes
// (though unnecessarily for current impl, future impl may impose that)
void insert_back(NodeID i, LatpLon coord) {
mLatpLons->push_back(std::make_pair(i, coord));
mLatpLons[shardPart(i)]->push_back(std::make_pair(idPart(i), coord));
}

void insert_back(std::vector<element_t> const &element) {
uint32_t newEntries[NODE_SHARDS] = {};

// Before taking the lock, do a pass to find out how much
// to grow each backing collection
for (auto it = element.begin(); it != element.end(); it++) {
newEntries[shardPart(it->first)]++;
}

std::lock_guard<std::mutex> lock(mutex);
auto i = mLatpLons->size();
mLatpLons->resize(i + element.size());
std::copy(element.begin(), element.end(), mLatpLons->begin() + i);
for (auto i = 0; i < NODE_SHARDS; i++) {
if (newEntries[i] == 0) continue;
auto size = mLatpLons[i]->size();
mLatpLons[i]->resize(size + newEntries[i]);
}

for (auto it = element.begin(); it != element.end(); it++) {
insert_back(it->first, it->second);
}
}

// @brief Make the store empty
void clear() {
std::lock_guard<std::mutex> lock(mutex);
mLatpLons->clear();
reopen();
}

void sort(unsigned int threadNum);

private:
mutable std::mutex mutex;
std::shared_ptr<map_t> mLatpLons;
std::vector<std::shared_ptr<map_t>> mLatpLons;

uint32_t shardPart(NodeID id) const {
uint32_t rv = id >> 32;
return rv;
}

uint32_t idPart(NodeID id) const {
return id;
}
};

class CompactNodeStore
Expand Down
10 changes: 6 additions & 4 deletions src/osm_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,12 @@ void void_mmap_allocator::destroy(void *p)

void NodeStore::sort(unsigned int threadNum) {
std::lock_guard<std::mutex> lock(mutex);
boost::sort::block_indirect_sort(
mLatpLons->begin(), mLatpLons->end(),
[](auto const &a, auto const &b) { return a.first < b.first; },
threadNum);
for (auto i = 0; i < NODE_SHARDS; i++) {
boost::sort::block_indirect_sort(
mLatpLons[i]->begin(), mLatpLons[i]->end(),
[](auto const &a, auto const &b) { return a.first < b.first; },
threadNum);
}
}

void WayStore::sort(unsigned int threadNum) {
Expand Down

0 comments on commit cbd06b1

Please sign in to comment.