Skip to content

Commit

Permalink
remove PossiblyKnownTagValue
Browse files Browse the repository at this point in the history
When I replaced #604 with #626, I botched extracting this part of the
code. I had the trait, which taught kaguya how to serialize
`PossiblyKnownTagValue`, but I missed updating the parameter type
of `Attribute` to actually use it, so it was a no-op.

This PR restores the behaviour of avoiding string copies, but now that
we have protozero's data_view class, we can use that rather than
our own weirdo struct.
  • Loading branch information
cldellow committed Jan 12, 2024
1 parent d749f76 commit b77340c
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 76 deletions.
8 changes: 5 additions & 3 deletions include/attribute_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <boost/functional/hash.hpp>
#include <boost/container/flat_map.hpp>
#include <vector>
#include <protozero/data_view.hpp>
#include "pooled_string.h"
#include "deque_map.h"

Expand Down Expand Up @@ -126,7 +127,7 @@ struct AttributePair {

void ensureStringIsOwned();

static bool isHot(const std::string& keyName, const std::string& value) {
static bool isHot(const std::string& keyName, const protozero::data_view value) {
// Is this pair a candidate for the hot pool?

// Hot pairs are pairs that we think are likely to be re-used, like
Expand All @@ -139,7 +140,8 @@ struct AttributePair {

// Only strings that are IDish are eligible: only lowercase letters.
bool ok = true;
for (const auto& c: value) {
for (size_t i = 0; i < value.size(); i++) {
const auto c = value.data()[i];
if (c != '-' && c != '_' && (c < 'a' || c > 'z'))
return false;
}
Expand Down Expand Up @@ -404,7 +406,7 @@ struct AttributeStore {
void reportSize() const;
void finalize();

void addAttribute(AttributeSet& attributeSet, std::string const &key, const std::string& v, char minzoom);
void addAttribute(AttributeSet& attributeSet, std::string const &key, const protozero::data_view v, char minzoom);
void addAttribute(AttributeSet& attributeSet, std::string const &key, float v, char minzoom);
void addAttribute(AttributeSet& attributeSet, std::string const &key, bool v, char minzoom);

Expand Down
22 changes: 3 additions & 19 deletions include/osm_lua_processing.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,6 @@ extern bool verbose;
class AttributeStore;
class AttributeSet;

// A string, which might be in `currentTags` as a value. If Lua
// code refers to an absent value, it'll fallback to passing
// it as a std::string.
//
// The intent is that Attribute("name", Find("name")) is a common
// pattern, and we ought to avoid marshalling a string back and
// forth from C++ to Lua when possible.
struct PossiblyKnownTagValue {
bool found;
uint32_t index;
std::string fallback;
};

/**
\brief OsmLuaProcessing - converts OSM objects into OutputObjects.
Expand Down Expand Up @@ -183,12 +170,9 @@ class OsmLuaProcessing {
void LayerAsCentroid(const std::string &layerName, kaguya::VariadicArgType nodeSources);

// Set attributes in a vector tile's Attributes table
void Attribute(const std::string &key, const std::string &val);
void AttributeWithMinZoom(const std::string &key, const std::string &val, const char minzoom);
void AttributeNumeric(const std::string &key, const float val);
void AttributeNumericWithMinZoom(const std::string &key, const float val, const char minzoom);
void AttributeBoolean(const std::string &key, const bool val);
void AttributeBooleanWithMinZoom(const std::string &key, const bool val, const char minzoom);
void Attribute(const std::string &key, const protozero::data_view val, const char minzoom);
void AttributeNumeric(const std::string &key, const float val, const char minzoom);
void AttributeBoolean(const std::string &key, const bool val, const char minzoom);
void MinZoom(const double z);
void ZOrder(const double z);

Expand Down
8 changes: 5 additions & 3 deletions include/pooled_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <cstdint>
#include <vector>
#include <string>
#include <protozero/data_view.hpp>

namespace PooledStringNS {
class PooledString {
Expand All @@ -40,9 +41,10 @@ namespace PooledStringNS {
PooledString(const std::string& str);


// Create a std string - only valid so long as the string that is
// pointed to is valid.
PooledString(const std::string* str);
// Wrap a protozero::data_view - only valid so long as the
// data_view that is pointed to is valid; call ensureStringIsOwned
// if you need to persist the string.
PooledString(const protozero::data_view* str);
size_t size() const;
bool operator<(const PooledString& other) const;
bool operator==(const PooledString& other) const;
Expand Down
2 changes: 1 addition & 1 deletion src/attribute_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ void AttributeSet::removePairWithKey(const AttributePairStore& pairStore, uint32
}
}

void AttributeStore::addAttribute(AttributeSet& attributeSet, std::string const &key, const std::string& v, char minzoom) {
void AttributeStore::addAttribute(AttributeSet& attributeSet, std::string const &key, const protozero::data_view v, char minzoom) {
PooledString ps(&v);
AttributePair kv(keyStore.key2index(key), ps, minzoom);
bool isHot = AttributePair::isHot(key, v);
Expand Down
49 changes: 14 additions & 35 deletions src/osm_lua_processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ template<> struct kaguya::lua_type_traits<KnownTagKey> {
}
};

template<> struct kaguya::lua_type_traits<PossiblyKnownTagValue> {
typedef PossiblyKnownTagValue get_type;
typedef const PossiblyKnownTagValue& push_type;
template<> struct kaguya::lua_type_traits<protozero::data_view> {
typedef protozero::data_view get_type;
typedef const protozero::data_view& push_type;

static bool strictCheckType(lua_State* l, int index)
{
Expand All @@ -92,32 +92,14 @@ template<> struct kaguya::lua_type_traits<PossiblyKnownTagValue> {
}
static get_type get(lua_State* l, int index)
{
PossiblyKnownTagValue rv = { false, 0 };
size_t size = 0;
const char* buffer = lua_tolstring(l, index, &size);

// For long strings where we might need to do a malloc, see if we
// can instead pass a pointer to a value from this object's tag
// map.
//
// 15 is the threshold where gcc no longer applies the small string
// optimization.
if (size > 15) {
int64_t tagLoc = osmLuaProcessing->currentTags->getValue(buffer, size);

if (tagLoc >= 0) {
rv.found = true;
rv.index = tagLoc;
return rv;
}
}

rv.fallback = std::string(buffer, size);
protozero::data_view rv = { buffer, size };
return rv;
}
static int push(lua_State* l, push_type s)
{
throw std::runtime_error("Lua code doesn't know how to use PossiblyKnownTagValue");
throw std::runtime_error("Lua code doesn't know how to use protozero::data_view");
}
};

Expand Down Expand Up @@ -203,16 +185,16 @@ OsmLuaProcessing::OsmLuaProcessing(
luaState["Layer"] = &rawLayer;
luaState["LayerAsCentroid"] = &rawLayerAsCentroid;
luaState["Attribute"] = kaguya::overload(
[](const std::string &key, const std::string& val) { osmLuaProcessing->Attribute(key, val); },
[](const std::string &key, const std::string& val, const char minzoom) { osmLuaProcessing->AttributeWithMinZoom(key, val, minzoom); }
[](const std::string &key, const protozero::data_view val) { osmLuaProcessing->Attribute(key, val, 0); },
[](const std::string &key, const protozero::data_view val, const char minzoom) { osmLuaProcessing->Attribute(key, val, minzoom); }
);
luaState["AttributeNumeric"] = kaguya::overload(
[](const std::string &key, const float val) { osmLuaProcessing->AttributeNumeric(key, val); },
[](const std::string &key, const float val, const char minzoom) { osmLuaProcessing->AttributeNumericWithMinZoom(key, val, minzoom); }
[](const std::string &key, const float val) { osmLuaProcessing->AttributeNumeric(key, val, 0); },
[](const std::string &key, const float val, const char minzoom) { osmLuaProcessing->AttributeNumeric(key, val, minzoom); }
);
luaState["AttributeBoolean"] = kaguya::overload(
[](const std::string &key, const bool val) { osmLuaProcessing->AttributeBoolean(key, val); },
[](const std::string &key, const bool val, const char minzoom) { osmLuaProcessing->AttributeBooleanWithMinZoom(key, val, minzoom); }
[](const std::string &key, const bool val) { osmLuaProcessing->AttributeBoolean(key, val, 0); },
[](const std::string &key, const bool val, const char minzoom) { osmLuaProcessing->AttributeBoolean(key, val, minzoom); }
);

luaState["MinZoom"] = &rawMinZoom;
Expand Down Expand Up @@ -786,25 +768,22 @@ void OsmLuaProcessing::removeAttributeIfNeeded(const string& key) {
}

// Set attributes in a vector tile's Attributes table
void OsmLuaProcessing::Attribute(const string &key, const string &val) { AttributeWithMinZoom(key,val,0); }
void OsmLuaProcessing::AttributeWithMinZoom(const string &key, const string &val, const char minzoom) {
void OsmLuaProcessing::Attribute(const string &key, const protozero::data_view val, const char minzoom) {
if (val.size()==0) { return; } // don't set empty strings
if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; }
removeAttributeIfNeeded(key);
attributeStore.addAttribute(outputs.back().second, key, val, minzoom);
setVectorLayerMetadata(outputs.back().first.layer, key, 0);
}

void OsmLuaProcessing::AttributeNumeric(const string &key, const float val) { AttributeNumericWithMinZoom(key,val,0); }
void OsmLuaProcessing::AttributeNumericWithMinZoom(const string &key, const float val, const char minzoom) {
void OsmLuaProcessing::AttributeNumeric(const string &key, const float val, const char minzoom) {
if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; }
removeAttributeIfNeeded(key);
attributeStore.addAttribute(outputs.back().second, key, val, minzoom);
setVectorLayerMetadata(outputs.back().first.layer, key, 1);
}

void OsmLuaProcessing::AttributeBoolean(const string &key, const bool val) { AttributeBooleanWithMinZoom(key,val,0); }
void OsmLuaProcessing::AttributeBooleanWithMinZoom(const string &key, const bool val, const char minzoom) {
void OsmLuaProcessing::AttributeBoolean(const string &key, const bool val, const char minzoom) {
if (outputs.size()==0) { ProcessingError("Can't add Attribute if no Layer set"); return; }
removeAttributeIfNeeded(key);
attributeStore.addAttribute(outputs.back().second, key, val, minzoom);
Expand Down
24 changes: 12 additions & 12 deletions src/pooled_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace PooledStringNS {

const uint8_t ShortString = 0b00;
const uint8_t HeapString = 0b10;
const uint8_t StdString = 0b11;
const uint8_t DataViewString = 0b11;

// Each thread has its own string table, we only take a lock
// to push a new table onto the vector.
Expand Down Expand Up @@ -57,16 +57,16 @@ PooledString::PooledString(const std::string& str) {
}
}

PooledString::PooledString(const std::string* str) {
storage[0] = StdString << 6;
PooledString::PooledString(const protozero::data_view* str) {
storage[0] = DataViewString << 6;

*(const std::string**)((void*)(storage + 8)) = str;
*(const protozero::data_view**)((void*)(storage + 8)) = str;
}

bool PooledStringNS::PooledString::operator==(const PooledString& other) const {
// NOTE: We have surprising equality semantics!
//
// If one of the strings is a StdString, it's value equality.
// If one of the strings is a DataViewString, it's value equality.
//
// Else, for short strings, you are equal if the strings are equal.
//
Expand All @@ -76,7 +76,7 @@ bool PooledStringNS::PooledString::operator==(const PooledString& other) const {
uint8_t kind = storage[0] >> 6;
uint8_t otherKind = other.storage[0] >> 6;

if (kind == StdString || otherKind == StdString) {
if (kind == DataViewString || otherKind == DataViewString) {
size_t mySize = size();
if (mySize != other.size())
return false;
Expand All @@ -97,8 +97,8 @@ const char* PooledStringNS::PooledString::data() const {
if (kind == ShortString)
return (char *)(storage + 1);

if (kind == StdString) {
const std::string* str = *(const std::string**)((void*)(storage + 8));
if (kind == DataViewString) {
const protozero::data_view* str = *(const protozero::data_view**)((void*)(storage + 8));
return str->data();
}

Expand All @@ -121,7 +121,7 @@ size_t PooledStringNS::PooledString::size() const {
// Otherwise it's stored in the lower 7 bits of the highest byte.
return storage[0] & 0b01111111;

const std::string* str = *(const std::string**)((void*)(storage + 8));
const protozero::data_view* str = *(const protozero::data_view**)((void*)(storage + 8));
return str->size();
}

Expand All @@ -146,14 +146,14 @@ std::string PooledStringNS::PooledString::toString() const {
return rv;
}

const std::string* str = *(const std::string**)((void*)(storage + 8));
return *str;
const protozero::data_view* str = *(const protozero::data_view**)((void*)(storage + 8));
return std::string(str->data(), str->size());
}

void PooledStringNS::PooledString::ensureStringIsOwned() {
uint8_t kind = storage[0] >> 6;

if (kind != StdString)
if (kind != DataViewString)
return;

*this = PooledString(toString());
Expand Down
6 changes: 4 additions & 2 deletions test/pooled_string.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ MU_TEST(test_pooled_string) {
mu_check(big.toString() != big2.toString());

std::string shortString("short");
protozero::data_view shortStringView = { shortString.data(), shortString.size() };
std::string longString("this is a very long string");
protozero::data_view longStringView = { longString.data(), longString.size() };

PooledString stdShortString(&shortString);
PooledString stdShortString(&shortStringView);
mu_check(stdShortString.size() == 5);
mu_check(stdShortString.toString() == "short");

PooledString stdLongString(&longString);
PooledString stdLongString(&longStringView);
mu_check(stdLongString.size() == 26);
mu_check(stdLongString.toString() == "this is a very long string");

Expand Down
4 changes: 3 additions & 1 deletion test/sorted_way_store.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ class TestNodeStore : public NodeStore {
void insert(const std::vector<std::pair<NodeID, LatpLon>>& elements) override {}

bool contains(size_t shard, NodeID id) const override { return true; }
size_t shard() const override { return 0; }
NodeStore& shard(size_t shard) override { return *this; }
const NodeStore& shard(size_t shard) const override { return *this; }

size_t shards() const override { return 1; }
};

Expand Down

0 comments on commit b77340c

Please sign in to comment.