Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph Store HTTP Protocol (GET, POST) back end #1668

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ add_library(engine
CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
Describe.cpp)
Describe.cpp GraphStoreProtocol.cpp)
qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
111 changes: 111 additions & 0 deletions src/engine/GraphStoreProtocol.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Authors: Julian Mundhahs <mundhahj@tf.uni-freiburg.de>

#include "engine/GraphStoreProtocol.h"

#include <boost/beast.hpp>

// ____________________________________________________________________________
GraphOrDefault GraphStoreProtocol::extractTargetGraph(
const ad_utility::url_parser::ParamValueMap& params) {
const std::optional<std::string> graphIri =
ad_utility::url_parser::checkParameter(params, "graph", std::nullopt);
const bool isDefault =
ad_utility::url_parser::checkParameter(params, "default", "").has_value();
if (!(graphIri.has_value() || isDefault)) {
throw std::runtime_error(
"No graph IRI specified in the request. Specify one using either the "
"query parameter `default` or `graph=<iri>`.");
}
if (graphIri.has_value() && isDefault) {
throw std::runtime_error(
"Only one of `default` and `graph` may be used for graph "
"identification.");
}
if (graphIri.has_value()) {
return GraphRef::fromIrirefWithoutBrackets(graphIri.value());
} else {
AD_CORRECTNESS_CHECK(isDefault);
return DEFAULT{};
}
}

// ____________________________________________________________________________
void GraphStoreProtocol::throwUnsupportedMediatype(const string& mediatype) {
throw UnsupportedMediatypeError(absl::StrCat(
"Mediatype \"", mediatype,
"\" is not supported for SPARQL Graph Store HTTP Protocol in QLever. "
"Supported: ",
toString(ad_utility::MediaType::turtle), ", ",
toString(ad_utility::MediaType::ntriples), "."));
}

// ____________________________________________________________________________
void GraphStoreProtocol::throwUnsupportedHTTPMethod(const std::string& method) {
throw std::runtime_error(absl::StrCat(
method,
" in the SPARQL Graph Store HTTP Protocol is not yet implemented "
"in QLever."));
}

// ____________________________________________________________________________
std::vector<TurtleTriple> GraphStoreProtocol::parseTriples(
const string& body, const ad_utility::MediaType contentType) {
using Re2Parser = RdfStringParser<TurtleParser<Tokenizer>>;
std::vector<TurtleTriple> triples;
switch (contentType) {
case ad_utility::MediaType::turtle:
case ad_utility::MediaType::ntriples: {
auto parser = Re2Parser();
parser.setInputStream(body);
triples = parser.parseAndReturnAllTriples();
break;
}
default: {
throwUnsupportedMediatype(toString(contentType));
}
}
return triples;
}

// ____________________________________________________________________________
std::vector<SparqlTripleSimpleWithGraph> GraphStoreProtocol::convertTriples(
const GraphOrDefault& graph, std::vector<TurtleTriple> triples) {
SparqlTripleSimpleWithGraph::Graph tripleGraph{std::monostate{}};
if (std::holds_alternative<GraphRef>(graph)) {
tripleGraph = Iri(std::get<GraphRef>(graph).toStringRepresentation());
}
auto transformTurtleTriple = [&tripleGraph](TurtleTriple triple) {
AD_CORRECTNESS_CHECK(triple.graphIri_.isId() &&
triple.graphIri_.getId() ==
qlever::specialIds().at(DEFAULT_GRAPH_IRI));

return SparqlTripleSimpleWithGraph(std::move(triple.subject_),
std::move(triple.predicate_),
std::move(triple.object_), tripleGraph);
};
return ad_utility::transform(triples, transformTurtleTriple);
}

// ____________________________________________________________________________
ParsedQuery GraphStoreProtocol::transformGet(const GraphOrDefault& graph) {
ParsedQuery res;
res._clause = parsedQuery::ConstructClause(
{{Variable("?s"), Variable("?p"), Variable("?o")}});
res._rootGraphPattern = {};
parsedQuery::GraphPattern selectSPO;
selectSPO._graphPatterns.emplace_back(parsedQuery::BasicGraphPattern{
{SparqlTriple(Variable("?s"), "?p", Variable("?o"))}});
if (std::holds_alternative<ad_utility::triple_component::Iri>(graph)) {
parsedQuery::GroupGraphPattern selectSPOWithGraph{
std::move(selectSPO),
std::get<ad_utility::triple_component::Iri>(graph)};
res._rootGraphPattern._graphPatterns.emplace_back(
std::move(selectSPOWithGraph));
} else {
AD_CORRECTNESS_CHECK(std::holds_alternative<DEFAULT>(graph));
res._rootGraphPattern = std::move(selectSPO);
}
return res;
}
139 changes: 139 additions & 0 deletions src/engine/GraphStoreProtocol.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Copyright 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Authors: Julian Mundhahs <mundhahj@tf.uni-freiburg.de>

#pragma once

#include <gtest/gtest_prod.h>

#include "parser/ParsedQuery.h"
#include "parser/RdfParser.h"
#include "util/http/HttpUtils.h"
#include "util/http/UrlParser.h"

// The mediatype of a request could not be determined.
class UnknownMediatypeError : public std::runtime_error {
public:
explicit UnknownMediatypeError(std::string_view msg)
: std::runtime_error{std::string{msg}} {}
};

// The mediatype of a request is not supported.
class UnsupportedMediatypeError : public std::runtime_error {
public:
explicit UnsupportedMediatypeError(std::string_view msg)
: std::runtime_error{std::string{msg}} {}
};

// Transform SPARQL Graph Store Protocol requests to their equivalent
// ParsedQuery (SPARQL Query or Update).
class GraphStoreProtocol {
private:
// Extract the mediatype from a request.
static std::optional<ad_utility::MediaType> extractMediatype(
const ad_utility::httpUtils::HttpRequest auto& rawRequest) {
std::string contentTypeString;
if (rawRequest.find(boost::beast::http::field::content_type) !=
rawRequest.end()) {
contentTypeString =
rawRequest.at(boost::beast::http::field::content_type);
}
if (contentTypeString.empty()) {
// If the mediatype is not given, return an error.
// Note: The specs also allow to try to determine the media type from the
// content.
throw UnknownMediatypeError("Mediatype empty or not set.");
}
return ad_utility::getMediaTypeFromAcceptHeader(contentTypeString);
}
FRIEND_TEST(GraphStoreProtocolTest, extractMediatype);

// Throws the error if a mediatype is not supported.
[[noreturn]] static void throwUnsupportedMediatype(
const std::string& mediatype);

// Throws the error if an HTTP method is not supported.
[[noreturn]] static void throwUnsupportedHTTPMethod(
const std::string& method);

// Parse the triples from the request body according to the content type.
static std::vector<TurtleTriple> parseTriples(
const std::string& body, const ad_utility::MediaType contentType);
FRIEND_TEST(GraphStoreProtocolTest, parseTriples);

// Transforms the triples from `TurtleTriple` to `SparqlTripleSimpleWithGraph`
// and sets the correct graph.
static std::vector<SparqlTripleSimpleWithGraph> convertTriples(
const GraphOrDefault& graph, std::vector<TurtleTriple> triples);
FRIEND_TEST(GraphStoreProtocolTest, convertTriples);

// Transform a SPARQL Graph Store Protocol POST to an equivalent ParsedQuery
// which is an SPARQL Update.
static ParsedQuery transformPost(
const ad_utility::httpUtils::HttpRequest auto& rawRequest,
const GraphOrDefault& graph) {
using namespace boost::beast::http;
auto contentType = extractMediatype(rawRequest);
// A media type is set but not one of the supported ones as per the QLever
// MediaType code.
if (!contentType.has_value()) {
throwUnsupportedMediatype(rawRequest.at(field::content_type));
}

Check warning on line 81 in src/engine/GraphStoreProtocol.h

View check run for this annotation

Codecov / codecov/patch

src/engine/GraphStoreProtocol.h#L80-L81

Added lines #L80 - L81 were not covered by tests
auto triples = parseTriples(rawRequest.body(), contentType.value());
auto convertedTriples = convertTriples(graph, std::move(triples));
updateClause::GraphUpdate up{std::move(convertedTriples), {}};
ParsedQuery res;
res._clause = parsedQuery::UpdateClause{up};
return res;
}
FRIEND_TEST(GraphStoreProtocolTest, transformPost);

// Transform a SPARQL Graph Store Protocol GET to an equivalent ParsedQuery
// which is an SPARQL Query.
static ParsedQuery transformGet(const GraphOrDefault& graph);
FRIEND_TEST(GraphStoreProtocolTest, transformGet);

public:
// Every Graph Store Protocol request has equivalent SPARQL Query or Update.
// Transform the Graph Store Protocol request into it's equivalent Query or
// Update.
static ParsedQuery transformGraphStoreProtocol(
const ad_utility::httpUtils::HttpRequest auto& rawRequest) {
ad_utility::url_parser::ParsedUrl parsedUrl =
ad_utility::url_parser::parseRequestTarget(rawRequest.target());
// We only support passing the target graph as a query parameter (`Indirect
// Graph Identification`). `Direct Graph Identification` (the URL is the
// graph) is not supported. See also
// https://www.w3.org/TR/2013/REC-sparql11-http-rdf-update-20130321/#graph-identification.
GraphOrDefault graph = extractTargetGraph(parsedUrl.parameters_);

using enum boost::beast::http::verb;
auto method = rawRequest.method();
if (method == get) {
return transformGet(graph);
} else if (method == put) {
throwUnsupportedHTTPMethod("PUT");
} else if (method == delete_) {
throwUnsupportedHTTPMethod("DELETE");
} else if (method == post) {
return transformPost(rawRequest, graph);
} else if (method == head) {
throwUnsupportedHTTPMethod("HEAD");
} else if (method == patch) {
throwUnsupportedHTTPMethod("PATCH");
} else {
throw std::runtime_error(
absl::StrCat("Unsupported HTTP method \"",
std::string_view{rawRequest.method_string()},
"\" for the SPARQL Graph Store HTTP Protocol."));
}
}

private:
// Extract the graph to be acted upon using from the URL query parameters
// (`Indirect Graph Identification`). See
// https://www.w3.org/TR/2013/REC-sparql11-http-rdf-update-20130321/#indirect-graph-identification
static GraphOrDefault extractTargetGraph(
const ad_utility::url_parser::ParamValueMap& params);
FRIEND_TEST(GraphStoreProtocolTest, extractTargetGraph);
};
42 changes: 9 additions & 33 deletions src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <string>
#include <vector>

#include "GraphStoreProtocol.h"
#include "engine/ExecuteUpdate.h"
#include "engine/ExportQueryExecutionTrees.h"
#include "engine/QueryPlanner.h"
Expand Down Expand Up @@ -347,8 +348,8 @@

// We always want to call `Server::checkParameter` with the same first
// parameter.
auto checkParameter =
std::bind_front(&Server::checkParameter, std::cref(parameters));
auto checkParameter = std::bind_front(&ad_utility::url_parser::checkParameter,
std::cref(parameters));

Check warning on line 352 in src/engine/Server.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/Server.cpp#L351-L352

Added lines #L351 - L352 were not covered by tests

// Check the access token. If an access token is provided and the check fails,
// throw an exception and do not process any part of the query (even if the
Expand Down Expand Up @@ -537,9 +538,11 @@
std::pair<bool, bool> Server::determineResultPinning(
const ad_utility::url_parser::ParamValueMap& params) {
const bool pinSubtrees =
checkParameter(params, "pinsubtrees", "true").has_value();
ad_utility::url_parser::checkParameter(params, "pinsubtrees", "true")
.has_value();
const bool pinResult =
checkParameter(params, "pinresult", "true").has_value();
ad_utility::url_parser::checkParameter(params, "pinresult", "true")
.has_value();
return {pinSubtrees, pinResult};
}

Expand Down Expand Up @@ -729,17 +732,11 @@
}
}

// ____________________________________________________________________________
class NoSupportedMediatypeError : public std::runtime_error {
public:
explicit NoSupportedMediatypeError(std::string_view msg)
: std::runtime_error{std::string{msg}} {}
};

// ____________________________________________________________________________
MediaType Server::determineMediaType(
const ad_utility::url_parser::ParamValueMap& params,
const ad_utility::httpUtils::HttpRequest auto& request) {
using namespace ad_utility::url_parser;
// The following code block determines the media type to be used for the
// result. The media type is either determined by the "Accept:" header of
// the request or by the URL parameter "action=..." (for TSV and CSV export,
Expand Down Expand Up @@ -997,7 +994,7 @@
} catch (const QueryAlreadyInUseError& e) {
responseStatus = http::status::conflict;
exceptionErrorMsg = e.what();
} catch (const NoSupportedMediatypeError& e) {
} catch (const UnknownMediatypeError& e) {

Check warning on line 997 in src/engine/Server.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/Server.cpp#L997

Added line #L997 was not covered by tests
responseStatus = http::status::bad_request;
exceptionErrorMsg = e.what();
} catch (const ad_utility::CancellationException& e) {
Expand Down Expand Up @@ -1118,24 +1115,3 @@
return true;
}
}

// _____________________________________________________________________________
std::optional<std::string> Server::checkParameter(
const ad_utility::url_parser::ParamValueMap& parameters,
std::string_view key, std::optional<std::string> value) {
auto param =
ad_utility::url_parser::getParameterCheckAtMostOnce(parameters, key);
if (!param.has_value()) {
return std::nullopt;
}
std::string parameterValue = param.value();

// If value is given, but not equal to param value, return std::nullopt. If
// no value is given, set it to param value.
if (value == std::nullopt) {
value = parameterValue;
} else if (value != parameterValue) {
return std::nullopt;
}
return value;
}
12 changes: 0 additions & 12 deletions src/engine/Server.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,18 +256,6 @@ class Server {
/// HTTP error response.
bool checkAccessToken(std::optional<std::string_view> accessToken) const;

/// Checks if a URL parameter exists in the request, and it matches the
/// expected `value`. If yes, return the value, otherwise return
/// `std::nullopt`. If `value` is `std::nullopt`, only check if the key
/// exists. We need this because we have parameters like "cmd=stats", where a
/// fixed combination of the key and value determines the kind of action, as
/// well as parameters like "index-decription=...", where the key determines
/// the kind of action. If the key is not found, always return `std::nullopt`.
static std::optional<std::string> checkParameter(
const ad_utility::url_parser::ParamValueMap& parameters,
std::string_view key, std::optional<std::string> value);
FRIEND_TEST(ServerTest, checkParameter);

/// Check if user-provided timeout is authorized with a valid access-token or
/// lower than the server default. Return an empty optional and send a 403
/// Forbidden HTTP response if the change is not allowed. Return the new
Expand Down
4 changes: 4 additions & 0 deletions src/parser/TripleComponent.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ class TripleComponent {
}
[[nodiscard]] Variable& getVariable() { return std::get<Variable>(_variant); }

bool isId() const { return std::holds_alternative<Id>(_variant); }
const Id& getId() const { return std::get<Id>(_variant); }
Id& getId() { return std::get<Id>(_variant); }

/// Convert to an RDF literal. `std::strings` will be emitted directly,
/// `int64_t` is converted to a `xsd:integer` literal, and a `double` is
/// converted to a `xsd:double`.
Expand Down
3 changes: 2 additions & 1 deletion src/util/http/MediaTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ using enum MediaType;
// specified in the request. It's "application/sparql-results+json", as
// required by the SPARQL standard.
constexpr std::array SUPPORTED_MEDIA_TYPES{
sparqlJson, sparqlXml, qleverJson, tsv, csv, turtle, octetStream};
sparqlJson, sparqlXml, qleverJson, tsv, csv, turtle, ntriples, octetStream};

// _____________________________________________________________
const ad_utility::HashMap<MediaType, MediaTypeImpl>& getAllMediaTypes() {
Expand All @@ -40,6 +40,7 @@ const ad_utility::HashMap<MediaType, MediaTypeImpl>& getAllMediaTypes() {
add(sparqlXml, "application", "sparql-results+xml", {});
add(qleverJson, "application", "qlever-results+json", {});
add(turtle, "text", "turtle", {".ttl"});
add(ntriples, "application", "n-triples", {".nt"});
add(octetStream, "application", "octet-stream", {});
return t;
}();
Expand Down
1 change: 1 addition & 0 deletions src/util/http/MediaTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ enum class MediaType {
tsv,
csv,
turtle,
ntriples,
octetStream
};

Expand Down
Loading
Loading