Skip to content

Commit

Permalink
This is commented and very clean.
Browse files Browse the repository at this point in the history
The only thing that is missing, is some corner case tests, and maybe cleaning up the parsing of the active dataset clauses.

Signed-off-by: Johannes Kalmbach <johannes.kalmbach@gmail.com>
  • Loading branch information
joka921 committed Jan 9, 2025
1 parent f2524a8 commit 3a574ea
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 14 deletions.
21 changes: 15 additions & 6 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,8 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) {
}

if (useHashMapOptimization) {
// Helper lambda that calls `computeGroupByForHashMapOptimization` for the
// given `subresults`.
auto computeWithHashMap = [this, &metadataForUnsequentialData,
&groupByCols](auto&& subresults) {
auto doCompute = [&]<int NumCols> {
Expand All @@ -383,9 +385,10 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) {
return ad_utility::callFixedSize(groupByCols.size(), doCompute);
};

// Now call `computeWithHashMap` and return the result. It expects a range
// of results, so if the result is fully materialized, we create an array
// with a single element.
if (subresult->isFullyMaterialized()) {
// `computeWithHashMap` takes a range, so we artificially create one with
// a single input.
return computeWithHashMap(
std::array{std::pair{std::cref(subresult->idTable()),
std::cref(subresult->localVocab())}});
Expand Down Expand Up @@ -1513,29 +1516,35 @@ Result GroupBy::computeGroupByForHashMapOptimization(
NUM_GROUP_COLUMNS == 0);
LocalVocab localVocab;

// Initialize aggregation data
// Initialize the data for the aggregates of the GROUP BY operation.
HashMapAggregationData<NUM_GROUP_COLUMNS> aggregationData(
getExecutionContext()->getAllocator(), aggregateAliases,
columnIndices.size());

// Process the input blocks (pairs of `IdTable` and `LocalVocab`) one after
// the other.
ad_utility::Timer lookupTimer{ad_utility::Timer::Stopped};
ad_utility::Timer aggregationTimer{ad_utility::Timer::Stopped};
for (const auto& [inputTableRef, inputLocalVocabRef] : subresults) {
// Also support `std::reference_wrapper` as the input.
const IdTable& inputTable = inputTableRef;
const LocalVocab& inputLocalVocab = inputLocalVocabRef;

// Merge the local vocab of each input block.
//
// NOTE: If the input blocks have very similar or even identical non-empty
// local vocabs, no deduplication is performed.
localVocab.mergeWith(std::span{&inputLocalVocab, 1});
// Initialize evaluation context
// Setup the `EvaluationContext` for this input block.
sparqlExpression::EvaluationContext evaluationContext(
*getExecutionContext(), _subtree->getVariableColumns(), inputTable,
getExecutionContext()->getAllocator(), localVocab, cancellationHandle_,
deadline_);

evaluationContext._groupedVariables = ad_utility::HashSet<Variable>{
_groupByVariables.begin(), _groupByVariables.end()};
evaluationContext._isPartOfGroupBy = true;

// Iterate of the rows of this input block. Process (up to)
// `GROUP_BY_HASH_MAP_BLOCK_SIZE` rows at a time.
for (size_t i = 0; i < inputTable.size();
i += GROUP_BY_HASH_MAP_BLOCK_SIZE) {
checkCancellation();
Expand Down
22 changes: 14 additions & 8 deletions src/parser/sparqlParser/SparqlQleverVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include "engine/sparqlExpressions/SampleExpression.h"
#include "engine/sparqlExpressions/StdevExpression.h"
#include "engine/sparqlExpressions/UuidExpressions.h"
#include "generated/SparqlAutomaticParser.h"
#include "global/Constants.h"
#include "global/RuntimeParameters.h"
#include "parser/GraphPatternOperation.h"
Expand Down Expand Up @@ -1370,7 +1369,6 @@ SparqlFilter Visitor::visit(Parser::FilterRContext* ctx) {
// expression contains unbound variables, because the variables of the FILTER
// might be bound after the filter appears in the query (which is perfectly
// legal).
auto pimpl = visitExpressionPimpl(ctx->constraint());
return SparqlFilter{visitExpressionPimpl(ctx->constraint())};
}

Expand Down Expand Up @@ -2429,17 +2427,25 @@ SparqlExpression::Ptr Visitor::visit(Parser::StrReplaceExpressionContext* ctx) {
// ____________________________________________________________________________________
ExpressionPtr Visitor::visitExists(Parser::GroupGraphPatternContext* pattern,
bool negate) {
// The argument of the EXISTS is a completely independent GroupGraphPattern
// (except for the FROM [NAMED] clauses), so we have to back up and restore
// all global state when parsing EXISTS.
auto queryBackup = std::exchange(parsedQuery_, ParsedQuery{});
auto visibleVariablesSoFar = std::move(visibleVariables_);
visibleVariables_.clear();

// Parse the argument of EXISTS.
auto group = visit(pattern);
ParsedQuery query = std::exchange(parsedQuery_, std::move(queryBackup));
query.selectClause().setAsterisk();
query._rootGraphPattern = std::move(group);
query.datasetClauses_ = activeDatasetClauses_;
ParsedQuery argumentOfExists =
std::exchange(parsedQuery_, std::move(queryBackup));
argumentOfExists.selectClause().setAsterisk();
argumentOfExists._rootGraphPattern = std::move(group);

// EXISTS inherits the FROM [NAMED] clauses from the outer argumentOfExists.
argumentOfExists.datasetClauses_ = activeDatasetClauses_;
visibleVariables_ = std::move(visibleVariablesSoFar);
auto exists =
std::make_unique<sparqlExpression::ExistsExpression>(std::move(query));
auto exists = std::make_unique<sparqlExpression::ExistsExpression>(
std::move(argumentOfExists));
if (negate) {
return sparqlExpression::makeUnaryNegateExpression(std::move(exists));
} else {
Expand Down
3 changes: 3 additions & 0 deletions src/parser/sparqlParser/SparqlQleverVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ class SparqlQleverVisitor {
// query. This may contain duplicates. A variable is added via
// `addVisibleVariable`.
std::vector<Variable> visibleVariables_{};

// The FROM [NAMED] clauses of the query that is currently being parsed.
// Those are currently needed when parsing an EXISTS clause inside the query.
ParsedQuery::DatasetClauses activeDatasetClauses_;
PrefixMap prefixMap_{};
// We need to remember the prologue (prefix declarations) when we encounter it
Expand Down

0 comments on commit 3a574ea

Please sign in to comment.