-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Spatial join area #1713
base: master
Are you sure you want to change the base?
Spatial join area #1713
Changes from all commits
c7a1ea8
a266201
2cc8504
d784513
f6e1647
1a4ff8f
e3a2644
f38cb1d
4d71c4e
38c9585
ddea6d3
1902d44
31e7054
cba4324
d5c934c
c1dc963
c5284ef
fc1a1fe
ec75e89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
|
||
#include <cmath> | ||
|
||
#include "engine/ExportQueryExecutionTrees.h" | ||
#include "engine/SpatialJoin.h" | ||
#include "util/GeoSparqlHelpers.h" | ||
|
||
|
@@ -37,19 +38,103 @@ | |
: std::nullopt; | ||
}; | ||
|
||
// ____________________________________________________________________________ | ||
std::string SpatialJoinAlgorithms::betweenQuotes( | ||
std::string extractFrom) const { | ||
size_t pos1 = extractFrom.find("\"", 0); | ||
size_t pos2 = extractFrom.find("\"", pos1 + 1); | ||
if (pos1 != std::string::npos && pos2 != std::string::npos) { | ||
return extractFrom.substr(pos1 + 1, pos2 - pos1 - 1); | ||
} else { | ||
return extractFrom; | ||
} | ||
} | ||
|
||
struct DistanceVisitor : public boost::static_visitor<double> { | ||
template <typename Geometry1, typename Geometry2> | ||
double operator()(const Geometry1& geom1, const Geometry2& geom2) const { | ||
return bg::distance(geom1, geom2); | ||
} | ||
}; | ||
|
||
// ____________________________________________________________________________ | ||
Id SpatialJoinAlgorithms::computeDist(const IdTable* idTableLeft, | ||
const IdTable* idTableRight, | ||
size_t rowLeft, size_t rowRight, | ||
ColumnIndex leftPointCol, | ||
ColumnIndex rightPointCol) const { | ||
auto getAreaString = [&](const IdTable* idtable, size_t row, size_t col) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think your algorithm needs the parsing at multiple points |
||
return betweenQuotes(ExportQueryExecutionTrees::idToStringAndType( | ||
qec_->getIndex(), idtable->at(row, col), {}) | ||
.value() | ||
.first); | ||
}; | ||
|
||
auto getAreaOrPointGeometry = | ||
[&](const IdTable* idtable, size_t row, size_t col, | ||
std::optional<GeoPoint> point) -> std::optional<AnyGeometry> { | ||
AnyGeometry geometry; | ||
try { | ||
if (!point) { | ||
boost::geometry::read_wkt(getAreaString(idtable, row, col), geometry); | ||
} else { | ||
geometry = Point(point.value().getLng(), point.value().getLat()); | ||
} | ||
} catch (...) { | ||
return std::nullopt; | ||
} | ||
Comment on lines
+83
to
+85
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does boost geometry have a mode or overload of |
||
return geometry; | ||
}; | ||
|
||
// for now we need to get the data from the disk, but in the future, this | ||
// information will be stored in an ID, just like GeoPoint | ||
auto getAreaPoint = [&](const IdTable* idtable, size_t row, | ||
size_t col) -> std::optional<GeoPoint> { | ||
std::string areastring = getAreaString(idtable, row, col); | ||
|
||
Box areaBox; | ||
try { | ||
areaBox = calculateBoundingBoxOfArea(areastring); | ||
} catch (...) { | ||
// nothing to do. When parsing a point or an area fails, a warning message | ||
// gets printed at another place and the point/area just gets skipped | ||
return std::nullopt; | ||
} | ||
Point p = calculateMidpointOfBox(areaBox); | ||
return GeoPoint(p.get<1>(), p.get<0>()); | ||
}; | ||
|
||
auto point1 = getPoint(idTableLeft, rowLeft, leftPointCol); | ||
auto point2 = getPoint(idTableRight, rowRight, rightPointCol); | ||
if (!point1.has_value() || !point2.has_value()) { | ||
return Id::makeUndefined(); | ||
if (useMidpointForAreas_) { | ||
if (!point1) { | ||
point1 = getAreaPoint(idTableLeft, rowLeft, leftPointCol); | ||
} | ||
|
||
if (!point2) { | ||
point2 = getAreaPoint(idTableRight, rowRight, rightPointCol); | ||
} | ||
|
||
if (!point1.has_value() || !point2.has_value()) { | ||
return Id::makeUndefined(); | ||
} | ||
return Id::makeFromDouble( | ||
ad_utility::detail::wktDistImpl(point1.value(), point2.value())); | ||
} else { | ||
auto geometry1 = | ||
getAreaOrPointGeometry(idTableLeft, rowLeft, leftPointCol, point1); | ||
auto geometry2 = | ||
getAreaOrPointGeometry(idTableRight, rowRight, rightPointCol, point2); | ||
if (!geometry1.has_value() || !geometry2.has_value()) { | ||
return Id::makeUndefined(); | ||
} | ||
// convert to m and return. Note that the 78630 is an approximation, which | ||
// does not provide accurate results for the poles. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What exactly is |
||
return Id::makeFromDouble(boost::apply_visitor(DistanceVisitor(), | ||
geometry1.value(), | ||
geometry2.value()) * | ||
78630); | ||
} | ||
return Id::makeFromDouble( | ||
ad_utility::detail::wktDistImpl(point1.value(), point2.value())); | ||
} | ||
|
||
// ____________________________________________________________________________ | ||
|
@@ -241,7 +326,7 @@ | |
|
||
// ____________________________________________________________________________ | ||
std::vector<Box> SpatialJoinAlgorithms::computeBoundingBox( | ||
const Point& startPoint) const { | ||
const Point& startPoint, double additionalDist) const { | ||
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, | ||
rightJoinCol, rightSelectedCols, numColumns, maxDist, | ||
maxResults] = params_; | ||
|
@@ -254,8 +339,9 @@ | |
auto archaversine = [](double theta) { return std::acos(1 - 2 * theta); }; | ||
|
||
// safety buffer for numerical inaccuracies | ||
double maxDistInMetersBuffer; | ||
if (maxDist.value() < 10) { | ||
double maxDistInMetersBuffer = | ||
static_cast<double>(maxDist.value()) + additionalDist; | ||
if (maxDistInMetersBuffer < 10) { | ||
maxDistInMetersBuffer = 10; | ||
} else if (static_cast<double>(maxDist.value()) < | ||
static_cast<double>(std::numeric_limits<long long>::max()) / | ||
|
@@ -442,16 +528,54 @@ | |
return std::array{northPoleReached, southPoleReached}; | ||
} | ||
|
||
struct BoundingBoxVisitor : public boost::static_visitor<Box> { | ||
template <typename Geometry> | ||
Box operator()(const Geometry& geometry) const { | ||
Box box; | ||
bg::envelope(geometry, box); | ||
return box; | ||
} | ||
}; | ||
|
||
// ____________________________________________________________________________ | ||
Box SpatialJoinAlgorithms::calculateBoundingBoxOfArea( | ||
const std::string& wktString) const { | ||
AnyGeometry geometry; | ||
boost::geometry::read_wkt(wktString, geometry); | ||
return boost::apply_visitor(BoundingBoxVisitor(), geometry); | ||
} | ||
|
||
// ____________________________________________________________________________ | ||
Point SpatialJoinAlgorithms::calculateMidpointOfBox(const Box& box) const { | ||
double lng = (box.min_corner().get<0>() + box.max_corner().get<0>()) / 2.0; | ||
double lat = (box.min_corner().get<1>() + box.max_corner().get<1>()) / 2.0; | ||
return Point(lng, lat); | ||
} | ||
|
||
// ____________________________________________________________________________ | ||
double SpatialJoinAlgorithms::getMaxDistFromMidpointToAnyPointInsideTheBox( | ||
const Box& box, std::optional<Point> midpoint) const { | ||
if (!midpoint) { | ||
midpoint = calculateMidpointOfBox(box); | ||
} | ||
double distLng = | ||
std::abs(box.min_corner().get<0>() - midpoint.value().get<0>()); | ||
double distLat = | ||
std::abs(box.min_corner().get<1>() - midpoint.value().get<1>()); | ||
// convert to meters and return | ||
return (distLng + distLat) * 40075000 / 360; | ||
} | ||
|
||
// ____________________________________________________________________________ | ||
Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { | ||
auto printWarning = [alreadyWarned = false, | ||
&spatialJoin = spatialJoin_]() mutable { | ||
if (!alreadyWarned) { | ||
std::string warning = | ||
"The input to a spatial join contained at least one element, " | ||
"that is not a point geometry and is thus skipped. Note that " | ||
"QLever currently only accepts point geometries for the " | ||
"spatial joins"; | ||
"that is not a point or polygon geometry and is thus skipped. Note " | ||
"that QLever currently only accepts point or polygon geometries for " | ||
"the spatial joins"; | ||
AD_LOG_WARN << warning << std::endl; | ||
alreadyWarned = true; | ||
if (spatialJoin.has_value()) { | ||
|
@@ -478,38 +602,67 @@ | |
std::swap(smallerResJoinCol, otherResJoinCol); | ||
} | ||
|
||
// build rtree with one child | ||
bgi::rtree<Value, bgi::quadratic<16>, bgi::indexable<Value>, | ||
bgi::equal_to<Value>, ad_utility::AllocatorWithLimit<Value>> | ||
rtree(bgi::quadratic<16>{}, bgi::indexable<Value>{}, | ||
bgi::equal_to<Value>{}, qec_->getAllocator()); | ||
for (size_t i = 0; i < smallerResult->numRows(); i++) { | ||
// get point of row i | ||
auto geopoint = getPoint(smallerResult, i, smallerResJoinCol); | ||
Box bbox; | ||
|
||
if (!geopoint) { | ||
printWarning(); | ||
continue; | ||
try { | ||
std::string areastring = betweenQuotes( | ||
ExportQueryExecutionTrees::idToStringAndType( | ||
qec_->getIndex(), smallerResult->at(i, smallerResJoinCol), {}) | ||
.value() | ||
.first); | ||
bbox = calculateBoundingBoxOfArea(areastring); | ||
} catch (...) { | ||
printWarning(); | ||
continue; | ||
} | ||
} else { | ||
// create a box with a side length of at most 1mm to approximate the point | ||
Comment on lines
+616
to
+628
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As stated, separate the parsing from the remaining functionality. |
||
bbox = Box(Point(geopoint.value().getLng(), geopoint.value().getLat()), | ||
Point(geopoint.value().getLng() + 0.00000001, | ||
geopoint.value().getLat() + 0.00000001)); | ||
} | ||
|
||
Point p(geopoint.value().getLng(), geopoint.value().getLat()); | ||
|
||
// add every point together with the row number into the rtree | ||
rtree.insert(std::make_pair(std::move(p), i)); | ||
// add every box together with the row number into the rtree | ||
rtree.insert(std::make_pair(std::move(bbox), i)); | ||
} | ||
|
||
// query rtree with the other child | ||
std::vector<Value, ad_utility::AllocatorWithLimit<Value>> results{ | ||
qec_->getAllocator()}; | ||
for (size_t i = 0; i < otherResult->numRows(); i++) { | ||
auto geopoint1 = getPoint(otherResult, i, otherResJoinCol); | ||
auto geopoint = getPoint(otherResult, i, otherResJoinCol); | ||
std::vector<Box> bbox; | ||
|
||
if (!geopoint1) { | ||
printWarning(); | ||
continue; | ||
if (!geopoint) { | ||
try { | ||
std::string areastring = betweenQuotes( | ||
ExportQueryExecutionTrees::idToStringAndType( | ||
qec_->getIndex(), otherResult->at(i, otherResJoinCol), {}) | ||
.value() | ||
.first); | ||
auto areaBox = calculateBoundingBoxOfArea(areastring); | ||
auto midpoint = calculateMidpointOfBox(areaBox); | ||
bbox = computeBoundingBox( | ||
midpoint, | ||
getMaxDistFromMidpointToAnyPointInsideTheBox(areaBox, midpoint)); | ||
} catch (...) { | ||
printWarning(); | ||
continue; | ||
} | ||
} else { | ||
bbox = computeBoundingBox( | ||
Point(geopoint.value().getLng(), geopoint.value().getLat())); | ||
Comment on lines
+645
to
+663
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is an exact code dupliacation that can be extracted. |
||
} | ||
|
||
Point p(geopoint1.value().getLng(), geopoint1.value().getLat()); | ||
|
||
// query the other rtree for every point using the following bounding box | ||
std::vector<Box> bbox = computeBoundingBox(p); | ||
results.clear(); | ||
|
||
ql::ranges::for_each(bbox, [&](const Box& bbox) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function should accept and return
std::string_view
.As is you have two uneccesary string copies.