Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance optimizations for binning #283

Merged
merged 7 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 2.66.0

* Only bin by ID, not by geometry, if --bin-by-id-list is specified
* Do attribute accumulation in overzoom in mvt_value instead of converting to serial_val
* Fix bool values read from flatgeobuf sources (#289)

# 2.65.0

* Improve spatial distribution of --retain-points-multiplier features
Expand Down
63 changes: 30 additions & 33 deletions attribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,36 +88,33 @@ void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribut
set_attribute_accum(attribute_accum, name, type);
}

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
template <class T>
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::string> &full_keys, std::vector<T> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
for (size_t i = 0; i < full_keys.size(); i++) {
if (key == full_keys[i]) {
switch (op) {
case op_sum:
full_values[i].s = milo::dtoa_milo(atof(full_values[i].s.c_str()) + atof(val.s.c_str()));
full_values[i].type = mvt_double;
full_values[i] = (full_values[i].to_double() + val.to_double());
return;

case op_product:
full_values[i].s = milo::dtoa_milo(atof(full_values[i].s.c_str()) * atof(val.s.c_str()));
full_values[i].type = mvt_double;
full_values[i] = (full_values[i].to_double() * val.to_double());
return;

case op_max: {
double existing = atof(full_values[i].s.c_str());
double maybe = atof(val.s.c_str());
double existing = full_values[i].to_double();
double maybe = val.to_double();
if (maybe > existing) {
full_values[i].s = val.s.c_str();
full_values[i].type = mvt_double;
full_values[i] = val;
}
return;
}

case op_min: {
double existing = atof(full_values[i].s.c_str());
double maybe = atof(val.s.c_str());
double existing = full_values[i].to_double();
double maybe = val.to_double();
if (maybe < existing) {
full_values[i].s = val.s.c_str();
full_values[i].type = mvt_double;
full_values[i] = val;
}
return;
}
Expand All @@ -126,30 +123,26 @@ void preserve_attribute(attribute_op const &op, std::string const &key, serial_v
auto state = attribute_accum_state.find(key);
if (state == attribute_accum_state.end()) {
accum_state s;
s.sum = atof(full_values[i].s.c_str()) + atof(val.s.c_str());
s.sum = full_values[i].to_double() + val.to_double();
s.count = 2;
attribute_accum_state.insert(std::pair<std::string, accum_state>(key, s));

full_values[i].s = milo::dtoa_milo(s.sum / s.count);
full_values[i].type = mvt_double;
full_values[i] = (s.sum / s.count);
} else {
state->second.sum += atof(val.s.c_str());
state->second.sum += val.to_double();
state->second.count += 1;

full_values[i].s = milo::dtoa_milo(state->second.sum / state->second.count);
full_values[i].type = mvt_double;
full_values[i] = (state->second.sum / state->second.count);
}
return;
}

case op_concat:
full_values[i].s += val.s;
full_values[i].type = mvt_string;
full_values[i].set_string_value(full_values[i].get_string_value() + val.get_string_value());
return;

case op_comma:
full_values[i].s += std::string(",") + val.s;
full_values[i].type = mvt_string;
full_values[i].set_string_value(full_values[i].get_string_value() + "," + val.get_string_value());
return;

case op_count: {
Expand All @@ -159,12 +152,10 @@ void preserve_attribute(attribute_op const &op, std::string const &key, serial_v
s.count = 2;
attribute_accum_state.insert(std::pair<std::string, accum_state>(key, s));

full_values[i].type = mvt_double;
full_values[i].s = std::to_string(s.count);
full_values[i] = (s.count);
} else { // already present, incrementing
state->second.count += 1;
full_values[i].type = mvt_double;
full_values[i].s = std::to_string(state->second.count);
full_values[i] = (state->second.count);
}
return;
}
Expand All @@ -174,13 +165,12 @@ void preserve_attribute(attribute_op const &op, std::string const &key, serial_v

// not found, so we are making a new value

serial_val sv;
T v;
switch (op) {
case op_sum:
case op_max:
case op_min:
sv.s = val.s;
sv.type = mvt_double;
v = val;
break;

case op_count: {
Expand All @@ -190,12 +180,11 @@ void preserve_attribute(attribute_op const &op, std::string const &key, serial_v
s.count = 1;
attribute_accum_state.insert(std::pair<std::string, accum_state>(key, s));

sv.s = std::to_string(s.count);
v = (s.count);
} else { // already present, incrementing
fprintf(stderr, "preserve_attribute: can't happen (count)\n");
exit(EXIT_IMPOSSIBLE);
}
sv.type = mvt_double;
break;
}

Expand All @@ -205,5 +194,13 @@ void preserve_attribute(attribute_op const &op, std::string const &key, serial_v
}

full_keys.push_back(key);
full_values.push_back(sv);
full_values.push_back(v);
}

void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::string> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state);
}

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state);
}
6 changes: 5 additions & 1 deletion attribute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include <vector>
#include <unordered_map>
#include <map>
#include "mvt.hpp"
#include "milo/dtoa_milo.h"

enum attribute_op {
op_sum,
Expand All @@ -25,7 +27,9 @@ struct serial_val;

void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, std::string name, std::string type);
void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, const char *arg, char **argv);
void preserve_attribute(attribute_op const &op, const std::string &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::string> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);

extern std::map<std::string, attribute_op> numeric_operations;

Expand Down
53 changes: 25 additions & 28 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,7 +1165,7 @@ static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &
// accumulate :sum:, :min:, :max:, and :count: versions of the specified attribute
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::string> &full_keys, // keys of feature being accumulated onto
std::vector<serial_val> &full_values, // values of features being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
Expand Down Expand Up @@ -1218,49 +1218,45 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
if (op.second == op_count) {
if (starting_from_accumulation) {
// copy our count
full_values.push_back(mvt_value_to_serial_val(val));
full_values.push_back(val);
} else {
// new count of 1
serial_val sv;
sv.type = mvt_double;
sv.s = "1";
full_values.push_back(sv);
full_values.push_back(mvt_value(1));
}
} else {
full_values.push_back(mvt_value_to_serial_val(val));
full_values.push_back(val);
}
} else {
// exists unprefixed, so copy it, and then accumulate on our value
numeric_out_field.emplace(prefixed, full_keys.size());
full_keys.push_back(prefixed);

if (op.second == op_count) {
serial_val sv;
sv.type = mvt_double;
mvt_value v;
if (starting_from_accumulation) {
// sum our count onto the existing 1
sv.s = std::to_string(1 + mvt_value_to_long_long(val));
v = mvt_value(1 + mvt_value_to_long_long(val));
} else {
// sum our 1 onto the existing 1
sv.s = "2";
v = mvt_value(2);
}
full_values.push_back(sv);
full_values.push_back(v);
} else {
full_values.push_back(full_values[out_attr->second]);
preserve_attribute(op.second, prefixed, mvt_value_to_serial_val(val), full_keys, full_values, attribute_accum_state);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state);
}
}
} else {
// exists, so accumulate on our value
if (op.second == op_count) {
if (starting_from_accumulation) {
// sum our count onto the existing count
full_values[prefixed_attr->second].s = std::to_string(atoll(full_values[prefixed_attr->second].s.c_str()) + mvt_value_to_long_long(val));
full_values[prefixed_attr->second] = mvt_value(mvt_value_to_long_long(full_values[prefixed_attr->second]) + mvt_value_to_long_long(val));
} else {
full_values[prefixed_attr->second].s = std::to_string(atoll(full_values[prefixed_attr->second].s.c_str()) + 1);
full_values[prefixed_attr->second] = mvt_value(mvt_value_to_long_long(full_values[prefixed_attr->second]) + 1);
}
} else {
preserve_attribute(op.second, prefixed, mvt_value_to_serial_val(val), full_keys, full_values, attribute_accum_state);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state);
}
}
}
Expand Down Expand Up @@ -1293,7 +1289,6 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix,
std::unordered_map<std::string, attribute_op> const &attribute_accum,
std::shared_ptr<std::string> const &tile_stringpool,
std::string const &accumulate_numeric) {
// Add geometry to output feature

Expand All @@ -1315,13 +1310,13 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou

if (attribute_accum.size() > 0 || accumulate_numeric.size() > 0) {
// convert the attributes of the output feature
// from mvt_value to serial_val so they can have
// from layer references to a vector so they can have
// attributes from the other features of the
// multiplier cluster accumulated onto them

std::unordered_map<std::string, accum_state> attribute_accum_state;
std::vector<std::string> full_keys;
std::vector<serial_val> full_values;
std::vector<mvt_value> full_values;
std::map<std::string, size_t> numeric_out_field;

for (size_t i = 0; i + 1 < features[0].tags.size(); i += 2) {
Expand All @@ -1330,12 +1325,12 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
if (f != attribute_accum.end()) {
// this attribute has an accumulator, so convert it
full_keys.push_back(features[0].layer->keys[features[0].tags[i]]);
full_values.push_back(mvt_value_to_serial_val(features[0].layer->values[features[0].tags[i + 1]]));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else if (accumulate_numeric.size() > 0 && features[0].layer->values[features[0].tags[i + 1]].is_numeric()) {
// convert numeric for accumulation
numeric_out_field.emplace(key, full_keys.size());
full_keys.push_back(key);
full_values.push_back(mvt_value_to_serial_val(features[0].layer->values[features[0].tags[i + 1]]));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else {
// otherwise just tag it directly onto the output feature
if (should_keep(features[0].layer->keys[features[0].tags[i]], keep, exclude, exclude_prefix)) {
Expand All @@ -1361,7 +1356,7 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou

auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
serial_val val = mvt_value_to_serial_val(features[i].layer->values[features[i].tags[j + 1]]);
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
Expand All @@ -1379,7 +1374,7 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou

for (size_t i = 0; i < full_keys.size(); i++) {
if (should_keep(full_keys[i], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, full_keys[i], stringified_to_mvt_value(full_values[i].type, full_values[i].s.c_str(), tile_stringpool));
outlayer.tag(outfeature, full_keys[i], full_values[i]);
}
}

Expand Down Expand Up @@ -1570,7 +1565,6 @@ mvt_tile assign_to_bins(mvt_tile &features,
outlayer.name = features.layers[0].name;

std::vector<std::vector<tile_feature>> outfeatures;
std::shared_ptr<std::string> tile_stringpool = std::make_shared<std::string>();

for (auto &e : events) {
if (e.kind == index_event::ENTER) {
Expand Down Expand Up @@ -1629,6 +1623,10 @@ mvt_tile assign_to_bins(mvt_tile &features,

active.insert(std::move(a));
} else if (e.kind == index_event::CHECK) {
if (bin_by_id_list.size() > 0) {
continue; // only bin by id, not geometrically
}

auto const &feature = features.layers[e.layer].features[e.feature];

if (feature.geometry.size() == 0) {
Expand Down Expand Up @@ -1680,7 +1678,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
if (outfeatures[i].size() > 1) {
feature_out(outfeatures[i], outlayer,
keep, exclude, exclude_prefix, attribute_accum,
tile_stringpool, accumulate_numeric);
accumulate_numeric);
mvt_feature &nfeature = outlayer.features.back();
mvt_value val;
val.type = mvt_uint;
Expand Down Expand Up @@ -1715,7 +1713,6 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
std::vector<mvt_layer> const &bins, std::string const &bin_by_id_list,
std::string const &accumulate_numeric) {
mvt_tile outtile;
std::shared_ptr<std::string> tile_stringpool = std::make_shared<std::string>();

for (auto const &tile : tiles) {
for (auto const &layer : tile.tile.layers) {
Expand Down Expand Up @@ -1840,7 +1837,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int

if (flush_multiplier_cluster) {
if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, tile_stringpool, accumulate_numeric);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric);
pending_tile_features.clear();
}
}
Expand Down Expand Up @@ -1897,7 +1894,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
}

if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, tile_stringpool, accumulate_numeric);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric);
pending_tile_features.clear();
}

Expand Down
Loading
Loading