From 373329150f49ff6c3641ccaeaa5f6c09767d896a Mon Sep 17 00:00:00 2001 From: Liss Heidrich <31625940+Clueliss@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:18:05 +0200 Subject: [PATCH] v1.3.0 (#99) --- .dockerignore | 8 +- .github/workflows/push.yaml | 29 - .github/workflows/release.yaml | 85 - .gitignore | 5 +- CMakeLists.txt | 195 +- Dockerfile | 80 +- LICENSE | 661 -- LICENSE-APACHE | 201 + thirdparty/csv-parser/LICENSE => LICENSE-MIT | 2 +- README.MD | 230 +- cmake/boilerplate_init.cmake | 6 +- cmake/component-config.cmake.in | 5 + cmake/conan_cmake.cmake | 23 +- cmake/dummy-config.cmake.in | 5 + cmake/install_library.cmake | 79 + cmake/main-component-config.cmake.in | 40 + cmake/version.hpp.in | 12 - conanfile.py | 169 + conanfile.txt | 22 - execs/CMakeLists.txt | 60 + execs/tentris-loader/CMakeLists.txt | 24 + .../src/dice/tentris-loader/TentrisLoader.cpp | 151 + execs/tentris-server/CMakeLists.txt | 27 + .../src/dice/tentris-server/TentrisServer.cpp | 157 + execs/tools/CMakeLists.txt | 2 + execs/tools/deduplicated-nt/CMakeLists.txt | 34 + .../tools/deduplicated_nt/DeduplicatedNT.cpp | 105 + execs/tools/rdf2ids/CMakeLists.txt | 35 + .../src/dice/tools/rdf2ids/RDF2IDs.cpp | 114 + libs/CMakeLists.txt | 7 + libs/endpoint/CMakeLists.txt | 42 + .../dice/endpoint/ParseSPARQLQueryParam.hpp | 37 + .../dice/endpoint/ParseSPARQLUpdateParam.hpp | 41 + .../endpoint/SparqlJsonResultSAXWriter.hpp | 154 + .../src/dice/endpoint/CountEndpoint.cpp | 51 + .../src/dice/endpoint/CountEndpoint.hpp | 33 + .../endpoint/src/dice/endpoint/HTTPServer.cpp | 53 + .../endpoint/src/dice/endpoint/HTTPServer.hpp | 39 + .../src/dice/endpoint/SparqlEndpoint.cpp | 72 + .../src/dice/endpoint/SparqlEndpoint.hpp | 33 + .../src/dice/endpoint/SparqlQueryCache.cpp | 7 + .../src/dice/endpoint/SparqlQueryCache.hpp | 13 + .../dice/endpoint/SparqlStreamingEndpoint.cpp | 74 + .../dice/endpoint/SparqlStreamingEndpoint.hpp | 32 + .../src/dice/endpoint}/SyncedLRUCache.hpp | 108 +- libs/node-store/CMakeLists.txt | 36 + .../dice/node-store/MetallBNodeBackend.cpp | 17 + .../dice/node-store/MetallBNodeBackend.hpp | 28 + .../src/dice/node-store/MetallIRIBackend.cpp | 18 + .../src/dice/node-store/MetallIRIBackend.hpp | 28 + .../dice/node-store/MetallLiteralBackend.cpp | 31 + .../dice/node-store/MetallLiteralBackend.hpp | 36 + .../dice/node-store/MetallNodeTypeStorage.hpp | 70 + .../dice/node-store/MetallVariableBackend.cpp | 24 + .../dice/node-store/MetallVariableBackend.hpp | 33 + .../PersistentNodeStorageBackend.cpp | 54 + .../PersistentNodeStorageBackend.hpp | 35 + .../PersistentNodeStorageBackendImpl.cpp | 137 + .../PersistentNodeStorageBackendImpl.hpp | 70 + .../src/dice/node-store/metall_manager.hpp | 19 + libs/rdf-tensor/CMakeLists.txt | 25 + libs/rdf-tensor/cmake/version.hpp.in | 13 + .../src/dice/rdf-tensor/HypertrieTrait.hpp | 42 + .../src/dice/rdf-tensor/NodeWrapper.hpp | 52 + libs/rdf-tensor/src/dice/rdf-tensor/Query.hpp | 16 + .../src/dice/rdf-tensor/RDFNodeHashes.hpp | 43 + .../src/dice/rdf-tensor/RDFTensor.hpp | 14 + .../src/dice/rdf-tensor/metall_manager.hpp | 14 + libs/sparql2tensor/CMakeLists.txt | 40 + .../parser/visitors/PrologueVisitor.hpp | 28 + .../parser/visitors/SelectAskQueryVisitor.hpp | 130 + .../src/dice/sparql2tensor/SPARQLQuery.cpp | 59 + .../src/dice/sparql2tensor/SPARQLQuery.hpp | 45 + .../src/dice/sparql2tensor/UPDATEQuery.cpp | 191 + .../src/dice/sparql2tensor/UPDATEQuery.hpp | 18 + .../parser/exception/SPARQLErrorListener.cpp | 16 + .../parser/exception/SPARQLErrorListener.hpp | 26 + .../parser/visitors/PrologueVisitor.cpp | 28 + .../parser/visitors/SelectAskQueryVisitor.cpp | 480 + libs/tentris/CMakeLists.txt | 15 + libs/tentris/src/dice/tentris.hpp | 6 + libs/triple-store/CMakeLists.txt | 31 + .../src/dice/triple-store/TripleStore.cpp | 157 + .../src/dice/triple-store/TripleStore.hpp | 96 + src/exec/TentrisServer.cpp | 105 - src/exec/TentrisTerminal.cpp | 285 - src/exec/VersionStrings.hpp | 11 - src/exec/config/ExecutableConfig.hpp | 181 - src/exec/config/ServerConfig.hpp | 55 - src/exec/config/TerminalConfig.hpp | 39 - src/exec/tools/IDs2Hypertrie.cpp | 336 - src/exec/tools/RDF2IDs.cpp | 58 - src/lib/tentris/http/QueryResultState.hpp | 31 - src/lib/tentris/http/SparqlEndpoint.hpp | 252 - .../AtomicQueryExecutionPackageCache.hpp | 25 - src/lib/tentris/store/AtomicTripleStore.hpp | 24 - .../tentris/store/QueryExecutionPackage.hpp | 163 - .../store/QueryExecutionPackageCache.hpp | 14 - src/lib/tentris/store/RDF/SerdParser.hpp | 221 - src/lib/tentris/store/RDF/TermStore.hpp | 136 - src/lib/tentris/store/SPARQL/ParsedSPARQL.hpp | 434 - .../store/SparqlJsonResultSAXWriter.hpp | 143 - src/lib/tentris/store/TripleStore.hpp | 149 - .../store/config/AtomicTripleStoreConfig.cpp | 47 - src/lib/tentris/tensor/BoolHypertrie.hpp | 30 - src/lib/tentris/util/FmtHelper.hpp | 70 - src/lib/tentris/util/LogHelper.hpp | 154 - src/lib/tentris/util/SingletonFactory.hpp | 44 - test_fetch_content/CMakeLists.txt | 21 + test_fetch_content/example.cpp | 6 + test_package/CMakeLists.txt | 17 + test_package/conanfile.py | 25 + test_package/example.cpp | 6 + tests/CMakeLists.txt | 30 - tests/TestSPARQLParser.cpp | 160 - tests/TestTermStore.cpp | 26 - tests/Tests.cpp | 10 - tests/queries/DBpedia.txt | 500 - tests/queries/sp2b.txt | 13 - tests/queries/swdf.txt | 175 - .../RapidJSON/include/rapidjson/allocators.h | 284 - .../include/rapidjson/cursorstreamwrapper.h | 78 - .../RapidJSON/include/rapidjson/document.h | 2737 ------ .../include/rapidjson/encodedstream.h | 299 - .../RapidJSON/include/rapidjson/encodings.h | 716 -- .../RapidJSON/include/rapidjson/error/en.h | 74 - .../RapidJSON/include/rapidjson/error/error.h | 161 - .../include/rapidjson/filereadstream.h | 99 - .../include/rapidjson/filewritestream.h | 104 - thirdparty/RapidJSON/include/rapidjson/fwd.h | 151 - .../include/rapidjson/internal/biginteger.h | 290 - .../include/rapidjson/internal/clzll.h | 71 - .../include/rapidjson/internal/diyfp.h | 257 - .../include/rapidjson/internal/dtoa.h | 245 - .../include/rapidjson/internal/ieee754.h | 78 - .../include/rapidjson/internal/itoa.h | 308 - .../include/rapidjson/internal/meta.h | 186 - .../include/rapidjson/internal/pow10.h | 55 - .../include/rapidjson/internal/regex.h | 739 -- .../include/rapidjson/internal/stack.h | 232 - .../include/rapidjson/internal/strfunc.h | 69 - .../include/rapidjson/internal/strtod.h | 290 - .../include/rapidjson/internal/swap.h | 46 - .../include/rapidjson/istreamwrapper.h | 128 - .../include/rapidjson/memorybuffer.h | 70 - .../include/rapidjson/memorystream.h | 71 - .../include/rapidjson/msinttypes/inttypes.h | 316 - .../include/rapidjson/msinttypes/stdint.h | 300 - .../include/rapidjson/ostreamwrapper.h | 81 - .../RapidJSON/include/rapidjson/pointer.h | 1415 --- .../include/rapidjson/prettywriter.h | 277 - .../RapidJSON/include/rapidjson/rapidjson.h | 692 -- .../RapidJSON/include/rapidjson/reader.h | 2244 ----- .../RapidJSON/include/rapidjson/schema.h | 2496 ----- .../RapidJSON/include/rapidjson/stream.h | 223 - .../include/rapidjson/stringbuffer.h | 121 - .../RapidJSON/include/rapidjson/writer.h | 710 -- thirdparty/RapidJSON/license.txt | 57 - thirdparty/csv-parser/csv.hpp | 8355 ----------------- 159 files changed, 4370 insertions(+), 30334 deletions(-) delete mode 100644 .github/workflows/push.yaml delete mode 100644 .github/workflows/release.yaml delete mode 100644 LICENSE create mode 100644 LICENSE-APACHE rename thirdparty/csv-parser/LICENSE => LICENSE-MIT (92%) create mode 100644 cmake/component-config.cmake.in create mode 100644 cmake/dummy-config.cmake.in create mode 100644 cmake/install_library.cmake create mode 100644 cmake/main-component-config.cmake.in delete mode 100644 cmake/version.hpp.in create mode 100644 conanfile.py delete mode 100644 conanfile.txt create mode 100644 execs/CMakeLists.txt create mode 100644 execs/tentris-loader/CMakeLists.txt create mode 100644 execs/tentris-loader/src/dice/tentris-loader/TentrisLoader.cpp create mode 100644 execs/tentris-server/CMakeLists.txt create mode 100644 execs/tentris-server/src/dice/tentris-server/TentrisServer.cpp create mode 100644 execs/tools/CMakeLists.txt create mode 100644 execs/tools/deduplicated-nt/CMakeLists.txt create mode 100644 execs/tools/deduplicated-nt/src/dice/tools/deduplicated_nt/DeduplicatedNT.cpp create mode 100644 execs/tools/rdf2ids/CMakeLists.txt create mode 100644 execs/tools/rdf2ids/src/dice/tools/rdf2ids/RDF2IDs.cpp create mode 100644 libs/CMakeLists.txt create mode 100644 libs/endpoint/CMakeLists.txt create mode 100644 libs/endpoint/private-include/dice/endpoint/ParseSPARQLQueryParam.hpp create mode 100644 libs/endpoint/private-include/dice/endpoint/ParseSPARQLUpdateParam.hpp create mode 100644 libs/endpoint/private-include/dice/endpoint/SparqlJsonResultSAXWriter.hpp create mode 100644 libs/endpoint/src/dice/endpoint/CountEndpoint.cpp create mode 100644 libs/endpoint/src/dice/endpoint/CountEndpoint.hpp create mode 100644 libs/endpoint/src/dice/endpoint/HTTPServer.cpp create mode 100644 libs/endpoint/src/dice/endpoint/HTTPServer.hpp create mode 100644 libs/endpoint/src/dice/endpoint/SparqlEndpoint.cpp create mode 100644 libs/endpoint/src/dice/endpoint/SparqlEndpoint.hpp create mode 100644 libs/endpoint/src/dice/endpoint/SparqlQueryCache.cpp create mode 100644 libs/endpoint/src/dice/endpoint/SparqlQueryCache.hpp create mode 100644 libs/endpoint/src/dice/endpoint/SparqlStreamingEndpoint.cpp create mode 100644 libs/endpoint/src/dice/endpoint/SparqlStreamingEndpoint.hpp rename {src/lib/tentris/util => libs/endpoint/src/dice/endpoint}/SyncedLRUCache.hpp (54%) create mode 100644 libs/node-store/CMakeLists.txt create mode 100644 libs/node-store/src/dice/node-store/MetallBNodeBackend.cpp create mode 100644 libs/node-store/src/dice/node-store/MetallBNodeBackend.hpp create mode 100644 libs/node-store/src/dice/node-store/MetallIRIBackend.cpp create mode 100644 libs/node-store/src/dice/node-store/MetallIRIBackend.hpp create mode 100644 libs/node-store/src/dice/node-store/MetallLiteralBackend.cpp create mode 100644 libs/node-store/src/dice/node-store/MetallLiteralBackend.hpp create mode 100644 libs/node-store/src/dice/node-store/MetallNodeTypeStorage.hpp create mode 100644 libs/node-store/src/dice/node-store/MetallVariableBackend.cpp create mode 100644 libs/node-store/src/dice/node-store/MetallVariableBackend.hpp create mode 100644 libs/node-store/src/dice/node-store/PersistentNodeStorageBackend.cpp create mode 100644 libs/node-store/src/dice/node-store/PersistentNodeStorageBackend.hpp create mode 100644 libs/node-store/src/dice/node-store/PersistentNodeStorageBackendImpl.cpp create mode 100644 libs/node-store/src/dice/node-store/PersistentNodeStorageBackendImpl.hpp create mode 100644 libs/node-store/src/dice/node-store/metall_manager.hpp create mode 100644 libs/rdf-tensor/CMakeLists.txt create mode 100644 libs/rdf-tensor/cmake/version.hpp.in create mode 100644 libs/rdf-tensor/src/dice/rdf-tensor/HypertrieTrait.hpp create mode 100644 libs/rdf-tensor/src/dice/rdf-tensor/NodeWrapper.hpp create mode 100644 libs/rdf-tensor/src/dice/rdf-tensor/Query.hpp create mode 100644 libs/rdf-tensor/src/dice/rdf-tensor/RDFNodeHashes.hpp create mode 100644 libs/rdf-tensor/src/dice/rdf-tensor/RDFTensor.hpp create mode 100644 libs/rdf-tensor/src/dice/rdf-tensor/metall_manager.hpp create mode 100644 libs/sparql2tensor/CMakeLists.txt create mode 100644 libs/sparql2tensor/private-include/dice/sparql2tensor/parser/visitors/PrologueVisitor.hpp create mode 100644 libs/sparql2tensor/private-include/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.hpp create mode 100644 libs/sparql2tensor/src/dice/sparql2tensor/SPARQLQuery.cpp create mode 100644 libs/sparql2tensor/src/dice/sparql2tensor/SPARQLQuery.hpp create mode 100644 libs/sparql2tensor/src/dice/sparql2tensor/UPDATEQuery.cpp create mode 100644 libs/sparql2tensor/src/dice/sparql2tensor/UPDATEQuery.hpp create mode 100644 libs/sparql2tensor/src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.cpp create mode 100644 libs/sparql2tensor/src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.hpp create mode 100644 libs/sparql2tensor/src/dice/sparql2tensor/parser/visitors/PrologueVisitor.cpp create mode 100644 libs/sparql2tensor/src/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.cpp create mode 100644 libs/tentris/CMakeLists.txt create mode 100644 libs/tentris/src/dice/tentris.hpp create mode 100644 libs/triple-store/CMakeLists.txt create mode 100644 libs/triple-store/src/dice/triple-store/TripleStore.cpp create mode 100644 libs/triple-store/src/dice/triple-store/TripleStore.hpp delete mode 100644 src/exec/TentrisServer.cpp delete mode 100644 src/exec/TentrisTerminal.cpp delete mode 100644 src/exec/VersionStrings.hpp delete mode 100644 src/exec/config/ExecutableConfig.hpp delete mode 100644 src/exec/config/ServerConfig.hpp delete mode 100644 src/exec/config/TerminalConfig.hpp delete mode 100644 src/exec/tools/IDs2Hypertrie.cpp delete mode 100644 src/exec/tools/RDF2IDs.cpp delete mode 100644 src/lib/tentris/http/QueryResultState.hpp delete mode 100644 src/lib/tentris/http/SparqlEndpoint.hpp delete mode 100644 src/lib/tentris/store/AtomicQueryExecutionPackageCache.hpp delete mode 100644 src/lib/tentris/store/AtomicTripleStore.hpp delete mode 100644 src/lib/tentris/store/QueryExecutionPackage.hpp delete mode 100644 src/lib/tentris/store/QueryExecutionPackageCache.hpp delete mode 100644 src/lib/tentris/store/RDF/SerdParser.hpp delete mode 100644 src/lib/tentris/store/RDF/TermStore.hpp delete mode 100644 src/lib/tentris/store/SPARQL/ParsedSPARQL.hpp delete mode 100644 src/lib/tentris/store/SparqlJsonResultSAXWriter.hpp delete mode 100644 src/lib/tentris/store/TripleStore.hpp delete mode 100644 src/lib/tentris/store/config/AtomicTripleStoreConfig.cpp delete mode 100644 src/lib/tentris/tensor/BoolHypertrie.hpp delete mode 100644 src/lib/tentris/util/FmtHelper.hpp delete mode 100644 src/lib/tentris/util/LogHelper.hpp delete mode 100644 src/lib/tentris/util/SingletonFactory.hpp create mode 100644 test_fetch_content/CMakeLists.txt create mode 100644 test_fetch_content/example.cpp create mode 100644 test_package/CMakeLists.txt create mode 100644 test_package/conanfile.py create mode 100644 test_package/example.cpp delete mode 100644 tests/CMakeLists.txt delete mode 100644 tests/TestSPARQLParser.cpp delete mode 100644 tests/TestTermStore.cpp delete mode 100644 tests/Tests.cpp delete mode 100644 tests/queries/DBpedia.txt delete mode 100644 tests/queries/sp2b.txt delete mode 100644 tests/queries/swdf.txt delete mode 100644 thirdparty/RapidJSON/include/rapidjson/allocators.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/cursorstreamwrapper.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/document.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/encodedstream.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/encodings.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/error/en.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/error/error.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/filereadstream.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/filewritestream.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/fwd.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/biginteger.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/clzll.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/diyfp.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/dtoa.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/ieee754.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/itoa.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/meta.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/pow10.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/regex.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/stack.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/strfunc.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/strtod.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/internal/swap.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/istreamwrapper.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/memorybuffer.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/memorystream.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/msinttypes/inttypes.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/msinttypes/stdint.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/ostreamwrapper.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/pointer.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/prettywriter.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/rapidjson.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/reader.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/schema.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/stream.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/stringbuffer.h delete mode 100644 thirdparty/RapidJSON/include/rapidjson/writer.h delete mode 100644 thirdparty/RapidJSON/license.txt delete mode 100644 thirdparty/csv-parser/csv.hpp diff --git a/.dockerignore b/.dockerignore index ee7b04fb..ae44934b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,7 +1,11 @@ + ## project specific -/src/lib/tentris/tentris_version.hpp +**/tentris_version.hpp +**/build*/** +**/cmake-*/** +tests # log files -tentris_* +tentris.log # Created by https://www.toptal.com/developers/gitignore/api/c++,conan,jetbrains+all,cmake # Edit at https://www.toptal.com/developers/gitignore?templates=c++,conan,jetbrains+all,cmake diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml deleted file mode 100644 index 7d93dded..00000000 --- a/.github/workflows/push.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: push - -on: - push: - branches: - - '*' - -jobs: - main: - runs-on: ubuntu-latest - steps: - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - name: Cache Docker layers - uses: actions/cache@v2 - with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - name: Build image - uses: docker/build-push-action@v2 - with: - push: false - tags: | - tentris:latest - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache - diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml deleted file mode 100644 index 2f900422..00000000 --- a/.github/workflows/release.yaml +++ /dev/null @@ -1,85 +0,0 @@ -name: release - -on: - push: - tags: - - '*' - -jobs: - main: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v1 - - name: Get the Ref - id: get-ref - uses: ankitvgupta/ref-to-tag-action@master - with: - ref: ${{ github.ref }} - head_ref: ${{ github.head_ref }} - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - name: Cache Docker layers - uses: actions/cache@v2 - with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - name: Login to Docker registry - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_REGISTRY_USER }} - password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }} - - name: Build and load image - id: docker_build - uses: docker/build-push-action@v2 - with: - load: true # We can't load and push at the same time... - tags: | - dicegroup/tentris_server:current_build - dicegroup/tentris_server:${{ steps.get-ref.outputs.tag }} - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache - - name: Push image - run: | - docker push dicegroup/tentris_server:${{ steps.get-ref.outputs.tag }} - - name: Extract binaries - run: | - container_id=$(docker create dicegroup/tentris_server:current_build) - docker cp ${container_id}:/tentris_server ./tentris_server - docker cp ${container_id}:/tentris_terminal ./tentris_terminal - docker cp ${container_id}:/rdf2ids ./rdf2ids - docker cp ${container_id}:/ids2hypertrie ./ids2hypertrie - zip benchmarktools_clang11_libstdcxx10.zip rdf2ids ids2hypertrie - zip tentris_clang11_libstdcxx10.zip tentris_server tentris_terminal - - name: Create Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ steps.get-ref.outputs.tag }} - release_name: ${{ steps.get-ref.outputs.tag }} - draft: true - prerelease: false - - name: Upload tentris - id: upload-tentris-asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: tentris_clang11_libstdcxx10.zip - asset_name: tentris_${{ steps.get-ref.outputs.tag }}_clang11_libstdcxx10.zip - asset_content_type: application/zip - - name: Upload benchmarktools - id: upload-benchmarktools-asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: benchmarktools_clang11_libstdcxx10.zip - asset_name: benchmarktools_${{ steps.get-ref.outputs.tag }}_clang11_libstdcxx10.zip - asset_content_type: application/zip diff --git a/.gitignore b/.gitignore index ee7b04fb..deb919bc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ ## project specific -/src/lib/tentris/tentris_version.hpp +**/tentris_version.hpp + # log files -tentris_* +tentris.log # Created by https://www.toptal.com/developers/gitignore/api/c++,conan,jetbrains+all,cmake # Edit at https://www.toptal.com/developers/gitignore?templates=c++,conan,jetbrains+all,cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index b762a69b..5eed169d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,194 +1,23 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(tentris - LANGUAGES CXX - VERSION 1.1.3) -set(CMAKE_CXX_STANDARD 20) - -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/version.hpp.in ${CMAKE_CURRENT_SOURCE_DIR}/src/lib/tentris/tentris_version.hpp) + VERSION 1.3.0 + DESCRIPTION "tensor-based triplestore") include(cmake/boilerplate_init.cmake) boilerplate_init() -include(cmake/conan_cmake.cmake) -install_packages_via_conan("${CMAKE_SOURCE_DIR}/conanfile.txt" "${CONAN_OPTIONS}") - -if(DEFINED ${TENTRIS_MARCH}) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=${TENTRIS_MARCH}") -endif() -if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -fomit-frame-pointer -momit-leaf-frame-pointer") -else () - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -fomit-frame-pointer") +option(CONAN_CMAKE "If this should use conan cmake to fetch dependencies" On) +if (IS_TOP_LEVEL AND CONAN_CMAKE) + include(cmake/conan_cmake.cmake) + install_packages_via_conan("${CMAKE_CURRENT_SOURCE_DIR}/conanfile.py" "") endif () -if (TENTRIS_STATIC) - SET(CMAKE_FIND_LIBRARY_SUFFIXES .a) -endif () - -if (TENTRIS_BUILD_WITH_TCMALLOC) - find_library(TCMALLOCMINIMAL tcmalloc_minimal) - if (NOT TCMALLOCMINIMAL) - find_library(TCMALLOCMINIMAL tcmalloc-minimal) - endif() - if (NOT TCMALLOCMINIMAL) - message(FATAL_ERROR "Neither tcmalloc-minimal nor tcmalloc_minimal was found") - endif() - message("tcmalloc minimal ${TCMALLOCMINIMAL}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${TCMALLOCMINIMAL}") - if(TENTRIS_STATIC) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--eh-frame-hdr") - endif() -endif() -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -g -O0") - -# Lightweight C++ command line option parser https://github.com/jarro2783/cxxopts -include(FetchContent) -FetchContent_Declare( - cxxopts - GIT_REPOSITORY https://github.com/jarro2783/cxxopts.git - GIT_TAG v2.2.1 - GIT_SHALLOW TRUE -) -set(CXXOPTS_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) -set(CXXOPTS_BUILD_TESTS OFF CACHE BOOL "" FORCE) -set(CXXOPTS_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) -set(CXXOPTS_ENABLE_WARNINGS OFF CACHE BOOL "" FORCE) -FetchContent_MakeAvailable(cxxopts) - -add_library(csv-parser INTERFACE) -target_include_directories(csv-parser INTERFACE - thirdparty/csv-parser - ) - - -add_library(rapidjson INTERFACE) -target_include_directories(rapidjson INTERFACE - thirdparty/RapidJSON/include - ) - -find_package(tsl-hopscotch-map REQUIRED) -find_package(fmt REQUIRED) -find_package(hypertrie REQUIRED) -find_package(sparql-parser-base REQUIRED) -find_package(rdf-parser REQUIRED) -if (TENTRIS_STATIC) - SET(Boost_USE_STATIC_LIBS ON) -endif () - -find_package(Boost REQUIRED COMPONENTS system log_setup log thread) -if (TENTRIS_STATIC) - SET(RESTINIO_USE_BOOST_ASIO=static) -endif () -find_package(restinio REQUIRED) -find_package(string-view-lite REQUIRED) -find_package(optional-lite REQUIRED) -find_package(serd REQUIRED) - -# make a library of the code -add_library(tentris INTERFACE) - -include(FetchContent) -FetchContent_Declare( - cppitertools - GIT_REPOSITORY https://github.com/ryanhaining/cppitertools.git - GIT_TAG v2.1 - GIT_SHALLOW TRUE) - -FetchContent_MakeAvailable(cppitertools) - -target_link_libraries(tentris - INTERFACE - stdc++fs # for #include - cxxopts - rapidjson - sparql-parser-base::sparql-parser-base - tsl::hopscotch_map - fmt::fmt - hypertrie::hypertrie - rdf-parser::rdf-parser - Boost::Boost - serd::serd - cppitertools::cppitertools - ) - -target_include_directories(tentris INTERFACE - # todo: move code to include/Dice/tentris - src/lib/ - ${Boost_INCLUDE_DIRS} - ) - -# for rt and pthread linkage see: -# * https://stackoverflow.com/questions/58848694/gcc-whole-archive-recipe-for-static-linking-to-pthread-stopped-working-in-rec -# * https://stackoverflow.com/questions/35116327/when-g-static-link-pthread-cause-segmentation-fault-why -if (TENTRIS_STATIC) - set(TENTRIS_STATIC_DEF -static) - set(TENTRIS_PTHREAD_DEF -Wl,--whole-archive -lrt -lpthread -Wl,--no-whole-archive) -else() - set(TENTRIS_PTHREAD_DEF -lpthread) +if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt) + if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) + endif () endif () -# main executable targets -add_executable(tentris_server src/exec/TentrisServer.cpp src/exec/config/ServerConfig.hpp src/exec/config/TerminalConfig.hpp) - -target_link_libraries(tentris_server - PRIVATE - ${TENTRIS_STATIC_DEF} - ${TENTRIS_PTHREAD_DEF} - tentris - restinio::restinio - nonstd::string-view-lite - nonstd::optional-lite - nonstd::variant-lite - ) - -set_target_properties(tentris_server PROPERTIES LINK_FLAGS_RELEASE -s) - -add_dependencies(tentris_server tentris) - -add_executable(tentris_terminal src/exec/TentrisTerminal.cpp src/exec/config/ServerConfig.hpp src/exec/config/TerminalConfig.hpp) -target_link_libraries(tentris_terminal - ${TENTRIS_STATIC_DEF} - ${TENTRIS_PTHREAD_DEF} - tentris - ) - -set_target_properties(tentris_terminal PROPERTIES LINK_FLAGS_RELEASE -s) - -add_dependencies(tentris_terminal tentris) - -add_executable(rdf2ids src/exec/tools/RDF2IDs.cpp) -target_link_libraries(rdf2ids - ${TENTRIS_STATIC_DEF} - ${TENTRIS_PTHREAD_DEF} - tentris - ) - -set_target_properties(rdf2ids PROPERTIES LINK_FLAGS_RELEASE -s) -add_dependencies(rdf2ids tentris) - - -add_executable(ids2hypertrie src/exec/tools/IDs2Hypertrie.cpp) -target_link_libraries(ids2hypertrie - ${TENTRIS_STATIC_DEF} - ${TENTRIS_PTHREAD_DEF} - tentris - csv-parser - ) - -add_dependencies(ids2hypertrie tentris) - -if (CMAKE_BUILD_TYPE MATCHES "Release") - set_property(TARGET tentris_server PROPERTY INTERPROCEDURAL_OPTIMIZATION True) - set_property(TARGET tentris_terminal PROPERTY INTERPROCEDURAL_OPTIMIZATION True) - - set_property(TARGET ids2hypertrie PROPERTY INTERPROCEDURAL_OPTIMIZATION True) - set_property(TARGET rdf2ids PROPERTY INTERPROCEDURAL_OPTIMIZATION True) -endif () - -option(TENTRIS_BUILD_TESTS "build tests alongside the project" OFF) -if (TENTRIS_BUILD_TESTS) - enable_testing() # enable this to enable testing - add_subdirectory(tests) -endif () +add_subdirectory(libs) diff --git a/Dockerfile b/Dockerfile index da26be8a..27e54024 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,72 +1,80 @@ -FROM ubuntu:22.04 AS builder -ARG DEBIAN_FRONTEND=noninteractive -ARG TENTRIS_MARCH="x86-64" +FROM alpine:3.17 AS builder +ARG MARCH="x86-64-v3" -RUN apt-get -qq update && \ - apt-get -qq install -y make cmake uuid-dev git openjdk-11-jdk python3-pip python3-setuptools python3-wheel libstdc++-11-dev clang-14 g++-11 pkg-config lld autoconf libtool -RUN rm /usr/bin/ld && ln -s /usr/bin/lld-14 /usr/bin/ld -ARG CXX="clang++-14" -ARG CC="clang-14" -ENV CXXFLAGS="${CXXFLAGS} -march=${TENTRIS_MARCH}" -ENV CMAKE_EXE_LINKER_FLAGS="-L/usr/local/lib/x86_64-linux-gnu -L/lib/x86_64-linux-gnu -L/usr/lib/x86_64-linux-gnu -L/usr/local/lib" +RUN apk update && \ + apk add \ + make cmake autoconf automake pkgconfig \ + gcc g++ gdb \ + clang15 clang15-dev clang15-libs clang15-extra-tools clang15-static lldb llvm15 llvm15-dev lld \ + openjdk11-jdk \ + pythonispython3 py3-pip \ + bash git libtool util-linux-dev linux-headers -# Compile more recent tcmalloc-minimal with clang-14 + -march -RUN git clone --quiet --branch gperftools-2.8.1 https://github.com/gperftools/gperftools +ARG CC="clang" +ARG CXX="clang++" +ENV CXXFLAGS="${CXXFLAGS} -march=${MARCH}" +RUN rm /usr/bin/ld && ln -s /usr/bin/lld /usr/bin/ld # use lld as default linker + + +# Compile more recent tcmalloc-minimal with clang-14 + -march +RUN git clone --quiet --branch gperftools-2.9.1 --depth 1 https://github.com/gperftools/gperftools WORKDIR /gperftools RUN ./autogen.sh -RUN export LDFLAGS="${CMAKE_EXE_LINKER_FLAGS}" && ./configure \ +RUN ./configure \ --enable-minimal \ --disable-debugalloc \ --enable-sized-delete \ --enable-dynamic-sized-delete-support && \ - make -j && \ + make -j$(nproc) && \ make install WORKDIR / +ENV CONAN_DISABLE_STRICT_MODE=1 + # install and configure conan -RUN pip3 install "conan<2" && \ +RUN pip3 install conan==1.62.0 && \ conan user && \ conan profile new --detect default && \ + conan profile update settings.compiler=clang default && \ conan profile update settings.compiler.libcxx=libstdc++11 default && \ + conan profile update settings.compiler.cppstd=20 default && \ conan profile update env.CXXFLAGS="${CXXFLAGS}" default && \ - conan profile update env.CMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" default && \ conan profile update env.CXX="${CXX}" default && \ conan profile update env.CC="${CC}" default && \ - conan profile update options.boost:extra_b2_flags="cxxflags=\\\"${CXXFLAGS}\\\"" default + conan profile update options.boost:extra_b2_flags="cxxflags=\\\"${CXXFLAGS}\\\"" default && \ + conan profile update options.boost:header_only=True default && \ + conan profile update options.restinio:asio=boost default # add conan repositories RUN conan remote add dice-group https://conan.dice-research.org/artifactory/api/conan/tentris # build and cache dependencies via conan WORKDIR /conan_cache -COPY conanfile.txt conanfile.txt -RUN ln -s /usr/bin/clang-14 /usr/bin/clang # required by meson for building serd -RUN conan install . --build=missing --profile default > conan_build.log - +COPY conanfile.py . +COPY CMakeLists.txt . +RUN conan install . --build=* --profile default # import project files WORKDIR /tentris -COPY thirdparty thirdparty -COPY src src +COPY libs libs +COPY execs execs COPY cmake cmake -COPY CMakeLists.txt CMakeLists.txt -COPY conanfile.txt conanfile.txt +COPY CMakeLists.txt . +COPY conanfile.py . ##build -WORKDIR /tentris/build -RUN conan install .. --build=missing -# todo: should be replaced with toolchain file like https://github.com/ruslo/polly/blob/master/clang-libcxx17-static.cmake +WORKDIR /tentris/execs/build RUN cmake \ - -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" \ -DCMAKE_BUILD_TYPE=Release \ - -DTENTRIS_BUILD_WITH_TCMALLOC=true \ - -DTENTRIS_STATIC=true \ + -DWITH_TCMALLOC=true \ + -DSTATIC=true \ + -DMARCH=${MARCH} \ .. RUN make -j $(nproc) + FROM scratch -COPY --from=builder /tentris/build/tentris_server /tentris_server -COPY --from=builder /tentris/build/tentris_terminal /tentris_terminal -COPY --from=builder /tentris/build/ids2hypertrie /ids2hypertrie -COPY --from=builder /tentris/build/rdf2ids /rdf2ids -COPY LICENSE LICENSE +COPY --from=builder /tentris/execs/build/tentris-server/tentris_server /tentris_server +COPY --from=builder /tentris/execs/build/tentris-loader/tentris_loader /tentris_loader +COPY --from=builder /tentris/execs/build/tools/deduplicated-nt/deduplicated_nt /deduplicated_nt +COPY --from=builder /tentris/execs/build/tools/rdf2ids/rdf2ids /rdf2ids COPY README.MD README.MD ENTRYPOINT ["/tentris_server"] diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 2786991f..00000000 --- a/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - TENTRIS: Tensor-based Triple Store - Copyright (C) 2018 - today Alexander Bigerl - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/thirdparty/csv-parser/LICENSE b/LICENSE-MIT similarity index 92% rename from thirdparty/csv-parser/LICENSE rename to LICENSE-MIT index da835973..5539ba2c 100644 --- a/thirdparty/csv-parser/LICENSE +++ b/LICENSE-MIT @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2017-2019 Vincent La +Copyright (c) 2024 Data Science Group at Paderborn University, Paderborn, Germany Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.MD b/README.MD index 257ffa7e..b3661719 100644 --- a/README.MD +++ b/README.MD @@ -1,228 +1,86 @@ -# Tᴇɴᴛʀɪs: A Tensor-based Triple Store +# Tᴇɴᴛʀɪs: A Tensor-based Triple Store

Tᴇɴᴛʀɪs Logo

Tᴇɴᴛʀɪs is a tensor-based RDF triple store with SPARQL support. It is introduced and described in: -> Alexander Bigerl, Felix Conrads, Charlotte Behning, Mohamed Ahmed Sherif, Muhammad Saleem and Axel-Cyrille Ngonga Ngomo (2020) **Tentris – A Tensor-Based Triple Store.** In: The Semantic Web – ISWC 2020 - -https://tentris.dice-research.org/iswc2020/ - -``` -@InProceedings{bigerl2020tentris, - author = {Bigerl, Alexander and Conrads, Felix and Behning, Charlotte and Sherif, Mohamed Ahmed and Saleem, Muhammad and Ngonga Ngomo, Axel-Cyrille}, - booktitle = {The Semantic Web -- ISWC 2020}, - publisher = {Springer International Publishing}, - title = { {T}entris -- {A} {T}ensor-{B}ased {T}riple {S}tore}, - pages = {56--73}, - url = {https://papers.dice-research.org/2020/ISWC_Tentris/iswc2020_tentris_public.pdf}, - year = 2020, - isbn = {978-3-030-62419-4} -} -``` +> [Alexander Bigerl, Felix Conrads, Charlotte Behning, Mohamed Ahmed Sherif, Muhammad Saleem and Axel-Cyrille Ngonga Ngomo (2020) +**Tentris – A Tensor-Based Triple Store. +** In: The Semantic Web – ISWC 2020](https://tentris.dice-research.org/iswc2020/) and -> Alexander Bigerl, Lixi Conrads, Charlotte Behning, Muhammad Saleem and Axel-Cyrille Ngonga Ngomo (2022) Hashing the Hypertrie: Space- and Time-Efficient Indexing for SPARQL in Tensors. In: The Semantic Web – ISWC 2022 Hashing the Hypertrie: Space- and Time-Efficient Indexing for SPARQL in Tensors - -https://tentris.dice-research.org/iswc2022/ +> [Alexander Bigerl, Lixi Conrads, Charlotte Behning, Muhammad Saleem and Axel-Cyrille Ngonga Ngomo (2022) Hashing the Hypertrie: Space- and Time-Efficient Indexing for SPARQL in Tensors. In: The Semantic Web – ISWC 2022 Hashing the Hypertrie: Space- and Time-Efficient Indexing for SPARQL in Tensors](https://tentris.dice-research.org/iswc2022/) -``` -@InProceedings{bigerl2022hashing-the-hypertrie, - author = {Bigerl, Alexander and Conrads, Lixi and Behning, Charlotte and Sherif, Mohamed Ahmed and Saleem, Muhammad and Ngonga Ngomo, Axel-Cyrille}, - booktitle = {The Semantic Web -- ISWC 2022}, - publisher = {Springer International Publishing}, - title = { {H}ashing the {H}ypertrie: {S}pace- and {T}ime-{E}fficient {I}ndexing for {SPARQL} in {T}ensors}, - url = {https://papers.dice-research.org/2022/ISWC_Hashing_the_Hypertrie/iswc2022_hashing_the_hypertrie_public.pdf}, - year = 2022 -} - -``` +## Get It - +* download [static prebuilt binaries](https://github.com/dice-group/tentris-private/releases) + and [try them out](#running-tentris) +* [build it with docker](#docker) -## Key Features -* fast tensor-based in-memory storage and query processing -* SPARQL Protocol conform HTTP interface -* supports at the moment SPARQL queries with SELECT + opt. DISTINCT + basic graph pattern -* available for Linux x86-64 +## Running Tᴇɴᴛʀɪs -
Current limitations: +
-* no persistance -* SPARQL support limited to SELECT + opt. DISTINCT + basic graph pattern -* data loading only possible at startup +#### Bulk-load Data -
+Provide an NTRIPLE or TURTLE file to build the an index. By default, the index is stored in the current directory. The +path can be changed with the option `--storage`. -## Get It -* download [static prebuilt binaries](https://github.com/dice-group/tentris/releases) and [try them out](#running-tentris) -* pull a [docker image](https://hub.docker.com/repository/docker/dicegroup/tentris_server) and [try them out](#docker) -* [build it yourself](#build-it-yourself) +```shell +tentris_loader --file my_nt_file.nt +``` -## running Tᴇɴᴛʀɪs -Tᴇɴᴛʀɪs provides two ways of running it. Either as a HTTP endpoint or as a interactive commandline tool. Make sure you build Tᴇɴᴛʀɪs successfully, before proceeding below. -### HTTP endpoint -
+#### Start HTTP endpoint +To start Tᴇɴᴛʀɪs as a HTTP endpoint on port 9080 run now: -#### Start -To start Tᴇɴᴛʀɪs as a HTTP endpoint run -``` -tentris_server -p 9080 -f my_nt_file.nt ``` -to load the data from the provided `.nt` file and serve SPARQL endpoint at port 9080. -For more options commandline options see ` tentris_server --help`. +tentris_server -p 9080 +``` #### Query -The endpoint may now be queried locally at: `127.0.0.1:9080/sparql?query=*your query*`. - -*Notice*: the query string `*your query*` must be URL encoded. -You can use any online URL encoder like . -An additional endpoint is provided at `127.0.0.1:9080/stream` using chunk encoded HTTP response. This endpoint should be used for very large responses (>1mio results). +The SPARQL endpoint may now be queried locally at: `127.0.0.1:9080/sparql?query=*your query*`. You can execute queries +with the following curl command: - -#### Usage Example - -Consider the query below against a [SP²Bench](http://dbis.informatik.uni-freiburg.de/forschung/projekte/SP2B/) data set: -``` -PREFIX rdf: -PREFIX bench: - -SELECT DISTINCT ?article -WHERE { - ?article rdf:type bench:Article . - ?article ?property ?value -} -``` - -To run the query start Tᴇɴᴛʀɪs with: +```shell +curl -G \ +--data-urlencode 'query=SELECT * WHERE { ?s ?p ?o . }' \ +'127.0.0.1:9080/sparql' ``` -tentris_server -p 3030 -f sp2b.nt -``` -You can find a populated sp2b.nt file in [tests/dataset/sp2b.nt](tests/dataset/sp2b.nt). - -now, visit the follwing IRI in a browser to send the query to your Tᴇɴᴛʀɪs endpoint: - - -
-### CLI Endpoint +If you want to type the query in your browser, the query string must be URL encoded. You can use any online URL encoder +like . -
- -For small experiments it is sometimes more convenient to use a commandline tool for querying an RDF graph. -Therefore, Tᴇɴᴛʀɪs provides a commandline interface. - -To start Tᴇɴᴛʀɪs as a interactive commandline tool, run: -``` -tentris_terminal -f my_nt_file.nt -``` +The following endpoints are available: +Available endpoints: -After the RDF data from `my_nt_file.nt` is loaded, you type your query and hit ENTER. After the result was printed, you can enter your next query. - -For more commandline options see `tentris_terminal --help`. +- HTTP GET `/sparql?query=` for normal queries +- HTTP GET `/stream?query=` for queries with huge results +- HTTP GET `/count?query=` as a workaround for count (consumes a select query)
- ## Docker -Using the Tᴇɴᴛʀɪs docker image is really easy. Find necessary steps below. -
-* A docker image is available on [docker hub](https://hub.docker.com/r/dicegroup/tentris_server). Get it with +Use the [Dockerfile](./Dockerfile) to build tentris. + +* A docker image is available on [docker hub](https://hub.docker.com/r/dicegroup/tentris_server). Get it with ```shell script + docker build -f Dockerfile . docker pull dicegroup/tentris_server ``` -* To show the available commandline options, run - ```shell script - docker run --rm dicegroup/tentris_server --help - ``` -* Tᴇɴᴛʀɪs uses by default the port 9080, so make sure you forward it, e.g. - ```shell script - docker run --publish=9080:9080 dicegroup/tentris_server - ``` -* To load data, mount its enclosing directory to the container and tell Tᴇɴᴛʀɪs, to load it: - ```shell script - docker run -v /localfolder:/datasets --publish=9080:9080 dicegroup/tentris_server -f /datasets/yourRDFfile.nt - ``` -* By default, Tᴇɴᴛʀɪs writes logs to the `/tentris` in the container. To make logs available outside the container, you can mount them as well: - ```shell script - docker run -v /local-log-dir:/tentris --publish=9080:9080 dicegroup/tentris_server - ``` -* The other command-line tools `tentris_terminal`, `ids2hypertrie` and `ids2hypertrie` are also available in the container. Run them like: - ```shell script - docker run -it dicegroup/tentris_server tentris_terminal - ``` -
-## Build It Yourself +
-To build Tᴇɴᴛʀɪs yourself, you need some experience with building C++ projects. +## Build It Yourself
+Tᴇɴᴛʀɪs is known to build on Ubuntu 22.04 and newer. +Building was tested with Clang 15. As standard library, only libstdc++11 (v12) was tested. For details +refer to the [Dockerfile](./Dockerfile) or github actions. -### Build Tools - -Tᴇɴᴛʀɪs is known to build successfully on Ubuntu 20.04 and newer. -Building was tested with clang 14 and libstdc++-11-dev (gcc's standard library). - -The following packages are required to build Tᴇɴᴛʀɪs: -``` -sudo apt install make cmake uuid-dev git openjdk-11-jdk python3-pip python3-setuptools python3-wheel libstdc++-11-dev clang-14 g++-11 pkg-config lld autoconf libtoolmake cmake uuid-dev git openjdk-11-jdk python3-pip python3-setuptools python3-wheel libstdc++-11-dev clang-14 g++-11 pkg-config lld autoconf libtool -``` -and set the compilers to be used in your current shell: -```bash -export CXX="clang++-14" -export CC="clang-14" -``` -Additionally, a conan v1.x is required: -``` -pip3 install --user "conan<2" -``` - -### Dependencies -Most required dependencies are installed via conan. Therefore, Add the respective remotes: -```shell script -conan remote add dice-group https://conan.dice-research.org/artifactory/api/conan/tentris -``` -### Pull & Build -After you installed all dependencies, you are ready to build Tᴇɴᴛʀɪs. -Make sure you are connected to the internet as Tᴇɴᴛʀɪs downloads things at several points throughout the build processes. - -If you did not so far, clone Tᴇɴᴛʀɪs: -``` -git clone https://github.com/dice-group/tentris.git -``` -Make sure you are in the cloned folder: -``` -cd tentris -``` -Now, make a build directory and enter it. -```shell script -mkdir build -cd build -``` -Get and build the dependencies with conan: -```shell script -conan install .. --build=missing --settings compiler.libcxx="libstdc++11" -``` -Generate the build skripts with CMAKE and run the build: -```shell script -cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release .. -make -j tentris_server tentris_terminal -``` -Now is the time to get yourself a coffee. In about -When you build Tᴇɴᴛʀɪs for the first time, it will take some time. - -The binaries will be located at `tentris/build/bin`. - -### Debug & Test -To compile Tᴇɴᴛʀɪs with debugging symbols, proceed as above but change the cmake command to `cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug .. `. - -To compile the tests, run `cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DTENTRIS_BUILD_TESTS=True .. ` for debugging or `cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release -DTENTRIS_BUILD_TESTS=True .. ` for release. - -
+ diff --git a/cmake/boilerplate_init.cmake b/cmake/boilerplate_init.cmake index 675e2811..a70a8beb 100644 --- a/cmake/boilerplate_init.cmake +++ b/cmake/boilerplate_init.cmake @@ -1,6 +1,6 @@ macro(boilerplate_init) ## enforce standard compliance - set(CMAKE_CXX_STANDARD_REQUIRED ON) + set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_CXX_EXTENSIONS OFF) ## C++ compiler flags @@ -24,4 +24,6 @@ macro(boilerplate_init) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) endif () endif () -endmacro() + + string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}" IS_TOP_LEVEL) +endmacro() \ No newline at end of file diff --git a/cmake/component-config.cmake.in b/cmake/component-config.cmake.in new file mode 100644 index 00000000..d8c788f1 --- /dev/null +++ b/cmake/component-config.cmake.in @@ -0,0 +1,5 @@ +# Dummy config file +# When a dependency is added with add_subdirectory, but searched with find_package + +# Redirect to the directory added with add_subdirectory +add_subdirectory(@PROJECT_SOURCE_DIR@ @PROJECT_BINARY_DIR@) \ No newline at end of file diff --git a/cmake/conan_cmake.cmake b/cmake/conan_cmake.cmake index 1cd79894..fcc6a1eb 100644 --- a/cmake/conan_cmake.cmake +++ b/cmake/conan_cmake.cmake @@ -3,25 +3,24 @@ macro(install_packages_via_conan conanfile conan_options) list(APPEND CMAKE_MODULE_PATH ${CMAKE_BINARY_DIR}) list(APPEND CMAKE_PREFIX_PATH ${CMAKE_BINARY_DIR}) - if (NOT EXISTS "${CMAKE_BINARY_DIR}/conan.cmake") message(STATUS "Downloading conan.cmake from https://github.com/conan-io/cmake-conan") file(DOWNLOAD "https://raw.githubusercontent.com/conan-io/cmake-conan/0.18.1/conan.cmake" "${CMAKE_BINARY_DIR}/conan.cmake" TLS_VERIFY ON) endif () - include(${CMAKE_BINARY_DIR}/conan.cmake) conan_cmake_autodetect(settings) - conan_check(VERSION 1 DETECT_QUIET) - if (CONAN_CMD) - conan_cmake_install(PATH_OR_REFERENCE ${conanfile} - BUILD missing - SETTINGS ${settings} - OPTIONS "${conan_options}" - ENV_HOST "CC=${CMAKE_C_COMPILER};CXX=${CMAKE_CXX_COMPILER}") - else () - message(WARNING "No conan executable was found. Dependency retrieval via conan is disabled. System dependencies will be used if available.") - endif () + + if (IS_TOP_LEVEL AND BUILD_TESTING) + set(CONAN_HYPERTRIE_WITH_TEST_DEPS "True") + else() + set(CONAN_HYPERTRIE_WITH_TEST_DEPS "False") + endif() + conan_cmake_install(PATH_OR_REFERENCE ${conanfile} + BUILD missing + SETTINGS ${settings} + OPTIONS "${conan_options}" + GENERATOR "CMakeDeps") endmacro() \ No newline at end of file diff --git a/cmake/dummy-config.cmake.in b/cmake/dummy-config.cmake.in new file mode 100644 index 00000000..d8c788f1 --- /dev/null +++ b/cmake/dummy-config.cmake.in @@ -0,0 +1,5 @@ +# Dummy config file +# When a dependency is added with add_subdirectory, but searched with find_package + +# Redirect to the directory added with add_subdirectory +add_subdirectory(@PROJECT_SOURCE_DIR@ @PROJECT_BINARY_DIR@) \ No newline at end of file diff --git a/cmake/install_library.cmake b/cmake/install_library.cmake new file mode 100644 index 00000000..577cdd18 --- /dev/null +++ b/cmake/install_library.cmake @@ -0,0 +1,79 @@ +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +function(install_component COMPONENT_NAME INCLUDE_PATH) + + target_include_directories( + ${COMPONENT_NAME} PUBLIC $/${PROJECT_NAME}/${COMPONENT_NAME}) + + install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${INCLUDE_PATH}/ + DESTINATION include/${PROJECT_NAME}/${COMPONENT_NAME}/ + FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h") + + install(TARGETS ${COMPONENT_NAME} + EXPORT ${COMPONENT_NAME}-config + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}/${COMPONENT_NAME}/ + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}/${COMPONENT_NAME}/ + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}/${PROJECT_NAME}/${COMPONENT_NAME}/ + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/${COMPONENT_NAME}/ + ) + + install( + EXPORT ${COMPONENT_NAME}-config + FILE ${COMPONENT_NAME}-config.cmake + NAMESPACE ${PROJECT_NAME}:: + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/cmake/${PROJECT_NAME}/${COMPONENT_NAME}/) + + write_basic_package_version_file(${COMPONENT_NAME}-config-version.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMinorVersion) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${COMPONENT_NAME}-config-version.cmake DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/${COMPONENT_NAME}/) +endfunction() + +function(install_interface_component COMPONENT_NAME INCLUDE_PATH) + + target_include_directories( + ${COMPONENT_NAME} INTERFACE $/${PROJECT_NAME}/${COMPONENT_NAME}) + + install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${INCLUDE_PATH}/ + DESTINATION include/${PROJECT_NAME}/${COMPONENT_NAME}/ + FILES_MATCHING PATTERN "*.hpp" PATTERN "*.h") + + install(TARGETS ${COMPONENT_NAME} + EXPORT ${COMPONENT_NAME}-config + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/${COMPONENT_NAME}/ + ) + + install( + EXPORT ${COMPONENT_NAME}-config + FILE ${COMPONENT_NAME}-config.cmake + NAMESPACE ${PROJECT_NAME}:: + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/cmake/${PROJECT_NAME}/${COMPONENT_NAME}/) + + write_basic_package_version_file(${COMPONENT_NAME}-config-version.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMinorVersion) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${COMPONENT_NAME}-config-version.cmake DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/${COMPONENT_NAME}/) +endfunction() + +function(install_package) + + write_basic_package_version_file("${CMAKE_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion) + + configure_package_config_file( + "${PROJECT_SOURCE_DIR}/cmake/main-component-config.cmake.in" + "${CMAKE_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/cmake/${PROJECT_NAME}/) + + write_basic_package_version_file( + "${PROJECT_NAME}-config-version.cmake" + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion) + + install(FILES "${CMAKE_BINARY_DIR}/${PROJECT_NAME}-config.cmake" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake" + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/cmake/${PROJECT_NAME}/) +endfunction() + diff --git a/cmake/main-component-config.cmake.in b/cmake/main-component-config.cmake.in new file mode 100644 index 00000000..1de60f21 --- /dev/null +++ b/cmake/main-component-config.cmake.in @@ -0,0 +1,40 @@ +# parent package with with targets in components +# components are expected to contain the actual target +# each component's config should be in a equally named subdirectory, i.e.: ${CMAKE_INSTALL_DATAROOTDIR}/cmake/@PROJECT_NAME@/${component}/${component}-config.cmake + + +file(GLOB @PROJECT_NAME@_available_components LIST_DIRECTORIES true ${CMAKE_CURRENT_LIST_DIR}/*) + +# available components are listed here +set(@PROJECT_NAME@_available_components endpoint node-store rdf-tensor sparql2tensor tentris triple-store) + +# check if the user provided components are actually available +foreach(component ${@PROJECT_NAME@_FIND_COMPONENTS}) + if(NOT component IN_LIST @PROJECT_NAME@_available_components) + message(FATAL_ERROR "Component ${component} does not exist in package @PROJECT_NAME@. Available components are ${@PROJECT_NAME@_available_components}") + endif() +endforeach() + +# default component @PROJECT_NAME@::@PROJECT_NAME@ is always included +include(${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@/@PROJECT_NAME@-config.cmake) + +# add transitive dependencies among components +set(transitive_components "") +foreach(component ${@PROJECT_NAME@_FIND_COMPONENTS}) + if(${component} STREQUAL "tentris") + list(APPEND transtive_components ${@PROJECT_NAME@_available_components}) + elseif(${component} STREQUAL "endpoint") + list(APPEND transtive_components node-store rdf-tensor sparql2tensor triple-store) + elseif(${component} STREQUAL "triple-store") + list(APPEND transtive_components node-store rdf-tensor sparql2tensor triple-store) + elseif(${component} STREQUAL "node-store" OR ${component} STREQUAL "sparql2tensor") + list(APPEND transtive_components rdf-tensor) + endif() +endforeach() +list(APPEND @PROJECT_NAME@_FIND_COMPONENTS ${transitive_components}) +list(REMOVE_DUPLICATES @PROJECT_NAME@_FIND_COMPONENTS) + +# include all listed components +foreach(component ${@PROJECT_NAME@_FIND_COMPONENTS}) + include(${CMAKE_CURRENT_LIST_DIR}/${component}/${component}-config.cmake) +endforeach() diff --git a/cmake/version.hpp.in b/cmake/version.hpp.in deleted file mode 100644 index d10471eb..00000000 --- a/cmake/version.hpp.in +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef TENTRIS_VERSION_HPP -#define TENTRIS_VERSION_HPP - -#include - -namespace Dice::tentris { - inline constexpr const char name[] = "@PROJECT_NAME@"; - inline constexpr const char version[] = "@tentris_VERSION@"; - inline constexpr std::array version_tuple = {@tentris_VERSION_MAJOR@, @tentris_VERSION_MINOR@, @tentris_VERSION_PATCH@}; -}// namespace Dice::tentris - -#endif//TENTRIS_VERSION_HPP diff --git a/conanfile.py b/conanfile.py new file mode 100644 index 00000000..7c1adc1d --- /dev/null +++ b/conanfile.py @@ -0,0 +1,169 @@ +import os +import re + +from conan import ConanFile +from conan.tools.cmake import CMake +from conan.tools.files import rmdir, load + + +class Recipe(ConanFile): + url = "https://tentris.dice-research.org" + topics = ("triplestore", "sparql", "rdf", "sematic-web", "tensor") + settings = "os", "compiler", "build_type", "arch" + options = { + "shared": [True, False], + "fPIC": [True, False], + "with_exec_deps": [True, False], + } + default_options = { + "shared": False, + "fPIC": True, + "with_exec_deps": False, + "restinio/*:asio": "boost", + } + + def requirements(self): + public_reqs = [ + "boost/1.84.0", + "fmt/8.1.1", + "restinio/0.6.17", + "expected-lite/0.6.3", # overrides restinio dependency + "hypertrie/0.9.4", + "metall/0.21", + "rdf4cpp/0.0.8.1", + "dice-hash/0.4.0", + "robin-hood-hashing/3.11.5", + "cxxopts/2.2.1", + "sparql-parser-base/0.3.0", + "taskflow/3.4.0", + "cppitertools/2.1", + "spdlog/1.10.0", + "rapidjson/cci.20220822", + ] + + private_reqs = [ + ] + + exec_reqs = [ + "nlohmann_json/3.11.2", + "vincentlaucsb-csv-parser/2.1.3", + ] + for req in public_reqs: + self.requires(req) + for req in private_reqs: + self.requires(req, private=True) + + if self.options.get_safe("with_exec_deps"): + for req in exec_reqs: + self.requires(req) + + generators = ("cmake_find_package",) + + # Sources are located in the same place as this recipe, copy them to the recipe + exports_sources = "libs/*", "CMakeLists.txt", "cmake/*" + + def config_options(self): + if self.settings.os == "Windows": + del self.options.fPIC + + def set_name(self): + if not hasattr(self, 'name') or self.version is None: + cmake_file = load(self, os.path.join(self.recipe_folder, "CMakeLists.txt")) + self.name = re.search(r"project\(\s*([a-z\-]+)\s+VERSION", cmake_file).group(1) + + def set_version(self): + if not hasattr(self, 'version') or self.version is None: + cmake_file = load(self, os.path.join(self.recipe_folder, "CMakeLists.txt")) + self.version = re.search(r"project\([^)]*VERSION\s+(\d+\.\d+.\d+)[^)]*\)", cmake_file).group(1) + if not hasattr(self, 'description') or self.description is None: + cmake_file = load(self, os.path.join(self.recipe_folder, "CMakeLists.txt")) + self.description = re.search(r"project\([^)]*DESCRIPTION\s+\"([^\"]+)\"[^)]*\)", cmake_file).group(1) + + _cmake = None + + def _configure_cmake(self): + if self._cmake: + return self._cmake + self._cmake = CMake(self) + self._cmake.definitions['CONAN_CMAKE'] = False + self._cmake.configure() + return self._cmake + + def build(self): + cmake = self._configure_cmake() + cmake.build() + + def package(self): + cmake = self._configure_cmake() + cmake.install() + for dir in ("res", "share"): + rmdir(os.path.join(self.package_folder, dir)) + + def package_info(self): + self.cpp_info.components["global"].set_property("cmake_target_name", "tentris::tentris") + self.cpp_info.components["global"].names["cmake_find_package_multi"] = "tentris" + self.cpp_info.components["global"].names["cmake_find_package"] = "tentris" + self.cpp_info.components["global"].includedirs = [f"include/tentris/tentris"] + self.cpp_info.components["global"].libdirs = [] + self.cpp_info.set_property("cmake_file_name", "tentris") + self.cpp_info.components["global"].requires = [ + "node-store", "rdf-tensor", "sparql2tensor", "triple-store", "endpoint", + "boost::boost", + "fmt::fmt", + "restinio::restinio", + "hypertrie::hypertrie", + "metall::metall", + "rdf4cpp::rdf4cpp", + "sparql-parser-base::sparql-parser-base", + "dice-hash::dice-hash", + "cxxopts::cxxopts", + "robin-hood-hashing::robin-hood-hashing", + "expected-lite::expected-lite", + "restinio::restinio", + "taskflow::taskflow", + "cppitertools::cppitertools", + "spdlog::spdlog", + ] + + for component in ["node-store", "rdf-tensor", "sparql2tensor", "triple-store", "endpoint"]: + self.cpp_info.components[f"{component}"].names["cmake_find_package_multi"] = f"{component}" + self.cpp_info.components[f"{component}"].names["cmake_find_package"] = f"{component}" + self.cpp_info.components[f"{component}"].includedirs = [f"include/tentris/{component}"] + + for component in ["node-store", "sparql2tensor", "triple-store", "endpoint"]: + self.cpp_info.components[f"{component}"].libdirs = [f"lib/tentris/{component}"] + self.cpp_info.components[f"{component}"].libs = [f"{component}"] + + self.cpp_info.components["rdf-tensor"].requires = [ + "rdf4cpp::rdf4cpp", + "hypertrie::hypertrie", + "boost::boost", + "metall::metall", + ] + + self.cpp_info.components["node-store"].requires = [ + "rdf-tensor", + ] + + self.cpp_info.components["sparql2tensor"].requires = [ + "node-store", + "robin-hood-hashing::robin-hood-hashing", + "sparql-parser-base::sparql-parser-base", + ] + + self.cpp_info.components["triple-store"].requires = [ + "sparql2tensor", + "rdf-tensor", + ] + self.cpp_info.components["endpoint"].requires = [ + "rdf-tensor", + "restinio::restinio", + "taskflow::taskflow", + "cppitertools::cppitertools", + "spdlog::spdlog", + "rapidjson::rapidjson", + ] + if self.options.get_safe("with_exec_deps"): + self.cpp_info.components["global"].requires += [ + "vincentlaucsb-csv-parser::vincentlaucsb-csv-parser", + "nlohmann_json::nlohmann_json"] diff --git a/conanfile.txt b/conanfile.txt deleted file mode 100644 index f7e4ac3e..00000000 --- a/conanfile.txt +++ /dev/null @@ -1,22 +0,0 @@ -[requires] -boost/1.82.0 -fmt/7.1.3 -restinio/0.6.12 -hypertrie/0.6.1@dice-group/stable -rdf-parser/0.13.0@dice-group/stable -sparql-parser-base/0.2.2@dice-group/stable -serd/0.30.16 - -[options] -restinio:asio=boost -restinio:with_zlib=True -boost:shared=False -*:shared=False -sparql-parser-base:sparql_version=1.0 - - -[generators] -cmake_find_package -cmake_paths -CMakeDeps -CMakeToolchain \ No newline at end of file diff --git a/execs/CMakeLists.txt b/execs/CMakeLists.txt new file mode 100644 index 00000000..f27f482d --- /dev/null +++ b/execs/CMakeLists.txt @@ -0,0 +1,60 @@ +cmake_minimum_required(VERSION 3.18) +project(tentris-binaries) + +include(${CMAKE_SOURCE_DIR}/../cmake/boilerplate_init.cmake) + +boilerplate_init() +if (NOT IS_TOP_LEVEL) + message(FATAL_ERROR "Must only be used as CMake top-level project.") +endif () + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt) + if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE) + endif () +endif () + +if (DEFINED ${MARCH}) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=${MARCH} -mtune=${MARCH}") +endif () + +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") +if (CMAKE_CXX_COMPILER_ID MATCHES " Clang") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fomit-frame-pointer -momit-leaf-frame-pointer") +else () + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fomit-frame-pointer") +endif () + +option(STATIC "Build tentris executables statically" OFF) +if (STATIC) + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--eh-frame-hdr -static") + SET(CMAKE_FIND_LIBRARY_SUFFIXES .a) +endif () + +option(WITH_TCMALLOC "Build tentris with tcmalloc." OFF) +if (WITH_TCMALLOC) + find_library(TCMALLOCMINIMAL tcmalloc_minimal) + if (NOT TCMALLOCMINIMAL) + find_library(TCMALLOCMINIMAL tcmalloc-minimal) + endif () + if (NOT TCMALLOCMINIMAL) + message(FATAL_ERROR "Neither tcmalloc-minimal nor tcmalloc_minimal was found") + endif () + message(STATUS "tcmalloc minimal ${TCMALLOCMINIMAL}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${TCMALLOCMINIMAL}") +endif () + + +# set library options +include(${CMAKE_SOURCE_DIR}/../cmake/conan_cmake.cmake) +install_packages_via_conan("${CMAKE_SOURCE_DIR}/../conanfile.py" "with_exec_deps=True ") + +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/libtentris) + +add_subdirectory(tentris-server) +add_subdirectory(tentris-loader) +add_subdirectory(tools) diff --git a/execs/tentris-loader/CMakeLists.txt b/execs/tentris-loader/CMakeLists.txt new file mode 100644 index 00000000..e401394a --- /dev/null +++ b/execs/tentris-loader/CMakeLists.txt @@ -0,0 +1,24 @@ +add_executable(tentris_loader + src/dice/tentris-loader/TentrisLoader.cpp + ) + +find_package(Threads REQUIRED) +find_package(spdlog REQUIRED) +find_package(cxxopts REQUIRED) +find_package(Metall REQUIRED) + +target_link_libraries(tentris_loader PRIVATE + Threads::Threads + tentris::triple-store + tentris::node-store + spdlog::spdlog + cxxopts::cxxopts + Metall::Metall + ) + +if (CMAKE_BUILD_TYPE MATCHES "Release") + set_target_properties(tentris_loader PROPERTIES LINK_FLAGS_RELEASE -s) + include(CheckIPOSupported) + check_ipo_supported(RESULT result LANGUAGES CXX) # fatal error if IPO is not supported + set_property(TARGET tentris_loader PROPERTY INTERPROCEDURAL_OPTIMIZATION True) +endif () diff --git a/execs/tentris-loader/src/dice/tentris-loader/TentrisLoader.cpp b/execs/tentris-loader/src/dice/tentris-loader/TentrisLoader.cpp new file mode 100644 index 00000000..98318eae --- /dev/null +++ b/execs/tentris-loader/src/dice/tentris-loader/TentrisLoader.cpp @@ -0,0 +1,151 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +int main(int argc, char *argv[]) { + using namespace dice; + namespace fs = std::filesystem; + + std::string version = fmt::format("tentris-loader v{} is using hypertrie v{} and rdf4cpp {}.", dice::tentris::version, hypertrie::version, dice::tentris::rdf4cpp_version); + cxxopts::Options options("tentris-loader", + fmt::format("{}\nA tensor-based triple store.", version)); + options.add_options() // + ("s,storage", "Location where the index is stored.", cxxopts::value()->default_value(fs::current_path().string())) + ("f,file", "A N-Triples or Turtle file.", cxxopts::value()) // + ("b,bulksize", "Bulk-size for loading RDF files. A larger value results in a higher memory consumption during loading RDF data but may result in shorter loading times.", cxxopts::value()->default_value("1000000"))// + ("l,loglevel", fmt::format("Details of logging. Available values are: [{}, {}, {}, {}, {}, {}, {}]", // + spdlog::level::to_string_view(spdlog::level::trace), // + spdlog::level::to_string_view(spdlog::level::debug), // + spdlog::level::to_string_view(spdlog::level::info), // + spdlog::level::to_string_view(spdlog::level::warn), // + spdlog::level::to_string_view(spdlog::level::err), // + spdlog::level::to_string_view(spdlog::level::critical), // + spdlog::level::to_string_view(spdlog::level::off)), // + cxxopts::value()->default_value("info")) // + ("logfile", "If log is written to files.", cxxopts::value()->default_value("true")) // + ("logstdout", "If log is written to stdout.", cxxopts::value()->default_value("false")) // + ("logfiledir", "A folder path where to write the logfiles. Default is the current working directory.", cxxopts::value()->default_value(fs::current_path().string())) // + ("v,version", "Version info.") // + ("h,help", "Print this help page.") // + ; + + auto parsed_args = options.parse(argc, argv); + if (parsed_args.count("help")) { + std::cout << options.help() << std::endl; + exit(0); + } else if (parsed_args.count("version")) { + std::cout << version << std::endl; + exit(0); + } + if (not parsed_args.count("file")) { + std::cout << "Please provide an RDF file." << std::endl; + std::cout << options.help() << std::endl; + exit(0); + } + + using metall_manager = rdf_tensor::metall_manager; + + auto const storage_path = fs::absolute(fs::path{parsed_args["storage"].as()}).append("tentris_data"); + if (fs::exists(storage_path)) { + std::cout << "Index storage path (-s,--storage) " << storage_path.string() << " already exists. Please provide a different path."; + exit(0); + } + + // init logger + const auto log_level = spdlog::level::from_str(parsed_args["loglevel"].as()); + spdlog::set_level(log_level); + std::vector> sinks; + if (parsed_args["logfile"].as()) { + // Create a file rotating logger with 5mb size max and 10 rotated files + const auto max_size = 1048576 * 5; + const auto max_files = 10; + auto file_sink = std::make_shared(parsed_args["logfiledir"].as() + "/tentris.log", max_size, max_files); + file_sink->set_level(log_level); + sinks.emplace_back(std::move(file_sink)); + } + if (parsed_args["logstdout"].as()) { + auto console_sink = std::make_shared(); + console_sink->set_level(log_level); + sinks.emplace_back(std::move(console_sink)); + } + auto logger = std::make_shared("tentris logger", sinks.begin(), sinks.end()); + logger->set_level(log_level); + spdlog::set_default_logger(logger); + spdlog::set_pattern("%Y-%m-%dT%T.%e%z | %n | %t | %l | %v"); + spdlog::info(version); + spdlog::flush_every(std::chrono::seconds{5}); + + // init storage + { + metall_manager{metall::create_only, storage_path.c_str()}; + } + metall_manager storage_manager{metall::open_only, storage_path.c_str()}; + // set up node store + { + using namespace rdf4cpp::rdf::storage::node; + using namespace dice::node_store; + auto *nodestore_backend = storage_manager.find_or_construct("node-store")(storage_manager.get_allocator()); + NodeStorage::default_instance( + NodeStorage::new_instance(nodestore_backend)); + } + // setup triple store + auto &ht_context = *storage_manager.find_or_construct("hypertrie-context")(storage_manager.get_allocator()); + auto &rdf_tensor = *storage_manager.find_or_construct("rdf-tensor")(3, rdf_tensor::HypertrieContext_ptr{&ht_context}); + triple_store::TripleStore triplestore{rdf_tensor}; + fs::path ttl_file(parsed_args["file"].as()); + + {// load data + spdlog::info("Loading triples from file {}.", fs::absolute(ttl_file).string()); + spdlog::stopwatch loading_time; + spdlog::stopwatch batch_loading_time; + size_t total_processed_entries = 0; + size_t total_inserted_entries = 0; + size_t final_hypertrie_size_after = 0; + + triplestore.load_ttl( + parsed_args["file"].as(), + parsed_args["bulksize"].as(), + [&](size_t processed_entries, + size_t inserted_entries, + size_t hypertrie_size_after) noexcept { + std::chrono::duration batch_duration = batch_loading_time.elapsed(); + spdlog::info("batch: {:>10.3} mio triples processed, {:>10.3} mio triples added, {} elapsed, {:>10.3} mio triples in storage.", + (double(processed_entries) / 1'000'000), + (double(inserted_entries) / 1'000'000), + (batch_duration.count()), + (double(hypertrie_size_after) / 1'000'000)); + total_processed_entries = processed_entries; + total_inserted_entries = inserted_entries; + final_hypertrie_size_after = hypertrie_size_after; + batch_loading_time.reset(); + }, + [](rdf_tensor::parser::ParsingError const &error) noexcept { + std::ostringstream oss; + oss << error; + spdlog::warn(oss.str());// spdlog does not want to use the ostream operator for ParsingError + }); + spdlog::info("loading finished: {} triples processed, {} triples added, {} elapsed, {} triples in storage.", + total_processed_entries, total_inserted_entries, std::chrono::duration(loading_time.elapsed()).count(), final_hypertrie_size_after); + const auto cards = triplestore.get_hypertrie().get_cards({0, 1, 2}); + spdlog::info("Storage stats: {} triples ({} distinct subjects, {} distinct predicates, {} distinct objects)", + triplestore.size(), cards[0], cards[1], cards[2]); + } + + // create snapshot + spdlog::info("Creating snapshot: {}_snapshot", storage_path.string()); + auto snapshot_path = fs::absolute(storage_path.string().append("_snapshot")); + storage_manager.snapshot(snapshot_path.c_str()); + spdlog::info("Finished loading: {}.", ttl_file.string()); +} \ No newline at end of file diff --git a/execs/tentris-server/CMakeLists.txt b/execs/tentris-server/CMakeLists.txt new file mode 100644 index 00000000..b1a0112b --- /dev/null +++ b/execs/tentris-server/CMakeLists.txt @@ -0,0 +1,27 @@ +add_executable(tentris_server + src/dice/tentris-server/TentrisServer.cpp + ) + +find_package(Threads REQUIRED) +find_package(Taskflow REQUIRED) +find_package(spdlog REQUIRED) +find_package(cxxopts REQUIRED) +find_package(restinio REQUIRED) +find_package(Metall REQUIRED) + +target_link_libraries(tentris_server PRIVATE + Threads::Threads + tentris::tentris + restinio::restinio + Taskflow::Taskflow + spdlog::spdlog + cxxopts::cxxopts + Metall::Metall + ) + +if (CMAKE_BUILD_TYPE MATCHES "Release") + set_target_properties(tentris_server PROPERTIES LINK_FLAGS_RELEASE -s) + include(CheckIPOSupported) + check_ipo_supported(RESULT result LANGUAGES CXX) # fatal error if IPO is not supported + set_property(TARGET tentris_server PROPERTY INTERPROCEDURAL_OPTIMIZATION True) +endif () diff --git a/execs/tentris-server/src/dice/tentris-server/TentrisServer.cpp b/execs/tentris-server/src/dice/tentris-server/TentrisServer.cpp new file mode 100644 index 00000000..3f391afa --- /dev/null +++ b/execs/tentris-server/src/dice/tentris-server/TentrisServer.cpp @@ -0,0 +1,157 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +int main(int argc, char *argv[]) { + using namespace dice; + namespace fs = std::filesystem; + + /* + * Parse Commandline Arguments + */ + std::string version = fmt::format("tentris-server v{} is based on hypertrie v{} and rdf4cpp {}.", dice::tentris::version, hypertrie::version, dice::tentris::rdf4cpp_version); + + cxxopts::Options options("tentris-server", + fmt::format("{}\nA tensor-based triple store.", version)); + options.add_options() // + ("s,storage", "Location where the index is stored.", cxxopts::value()->default_value(fs::current_path().string())) // + ("t,timeout", "Time out in seconds for answering requests.", cxxopts::value()->default_value("180")) // + ("j,threads", "Number of threads used by the endpoint.", cxxopts::value()->default_value(std::to_string(std::thread::hardware_concurrency()))) // + ("p,port", "Port to be used by the endpoint.", cxxopts::value()->default_value("9080")) // + ("l,loglevel", fmt::format("Details of logging. Available values are: [{}, {}, {}, {}, {}, {}, {}]", // + spdlog::level::to_string_view(spdlog::level::trace), // + spdlog::level::to_string_view(spdlog::level::debug), // + spdlog::level::to_string_view(spdlog::level::info), // + spdlog::level::to_string_view(spdlog::level::warn), // + spdlog::level::to_string_view(spdlog::level::err), // + spdlog::level::to_string_view(spdlog::level::critical), // + spdlog::level::to_string_view(spdlog::level::off)), // + cxxopts::value()->default_value("info")) // + ("logfile", "If log is written to files.", cxxopts::value()->default_value("true")) // + ("logstdout", "If log is written to stdout.", cxxopts::value()->default_value("false")) // + ("logfiledir", "A folder path where to write the logfiles. Default is the current working directory.", cxxopts::value()->default_value(fs::current_path().string()))// + ("v,version", "Version info.") // + ("h,help", "Print this help page.") // + ; + + auto parsed_args = options.parse(argc, argv); + if (parsed_args.count("help")) { + std::cout << options.help() << std::endl; + exit(0); + } else if (parsed_args.count("version")) { + std::cout << version << std::endl; + exit(0); + } + + /* + * Initialize logger + */ + const auto log_level = spdlog::level::from_str(parsed_args["loglevel"].as()); + spdlog::set_level(log_level); + + std::vector> sinks; + + if (parsed_args["logfile"].as()) { + // Create a file rotating logger with 5mb size max and 10 rotated files + const auto max_size = 1048576 * 5; + const auto max_files = 10; + auto file_sink = std::make_shared(parsed_args["logfiledir"].as() + "/tentris.log", max_size, max_files); + file_sink->set_level(log_level); + sinks.emplace_back(std::move(file_sink)); + } + + if (parsed_args["logstdout"].as()) { + auto console_sink = std::make_shared(); + console_sink->set_level(log_level); + sinks.emplace_back(std::move(console_sink)); + } + + auto logger = std::make_shared("tentris logger", sinks.begin(), sinks.end()); + logger->set_level(log_level); + spdlog::set_default_logger(logger); + spdlog::set_pattern("%Y-%m-%dT%T.%e%z | %n | %t | %l | %v"); + spdlog::info(version); + spdlog::flush_every(std::chrono::seconds{5}); + + + /* + * Initialize storage, executor and endpoints + */ + const endpoint::EndpointCfg endpoint_cfg{ + .port = parsed_args["port"].as(), + .threads = parsed_args["threads"].as(), + .timeout_duration = std::chrono::seconds{parsed_args["timeout"].as()}}; + + using metall_manager = rdf_tensor::metall_manager; + + auto const storage_path = fs::absolute(fs::path{parsed_args["storage"].as()}).append("tentris_data"); + if (not metall_manager::consistent(storage_path.c_str())) { + spdlog::info("No index storage or corrupted index storage found at {}. Checking for snapshot.", storage_path); + auto const snapshot_path = storage_path.string().append("_snapshot"); + if (metall_manager::consistent(snapshot_path.c_str())) { + spdlog::info("Found snapshot at {}.", storage_path.string()); + spdlog::info("Reconstructing index."); + metall_manager storage_manager{metall::open_only, snapshot_path.c_str()}; + storage_manager.snapshot(storage_path.c_str()); + spdlog::info("Reconstructed index at {}.", storage_path.string()); + } else { + spdlog::info("No snapshot found. Exiting."); + std::cout << "No snapshot found. Please create a new index using tentris_loader." << std::endl; + exit(0); + } + } else { + spdlog::info("Existing index storage at {}.", storage_path.string()); + } + metall_manager storage_manager{metall::open_only, storage_path.c_str()}; + + + {// set up node store + using namespace rdf4cpp::rdf::storage::node; + using namespace dice::node_store; + auto *nodestore_backend = storage_manager.find_or_construct("node-store")(storage_manager.get_allocator()); + NodeStorage::default_instance( + NodeStorage::new_instance(nodestore_backend)); + } + + // setup triple store + auto &rdf_tensor = [&storage_manager]() -> rdf_tensor::BoolHypertrie & { + auto [ptr, cnt] = storage_manager.find("rdf-tensor"); + if (cnt != 1UL) { + spdlog::error("Storage is readable but contains no rdf-tensor with index data. Please create a new index using tentris_loader."); + exit(0); + } + return *ptr; + }(); + { + triple_store::TripleStore triplestore{rdf_tensor}; + // initialize task runners + tf::Executor executor(endpoint_cfg.threads); + // setup and configure endpoints + endpoint::HTTPServer http_server{executor, triplestore, endpoint_cfg}; + const auto cards = triplestore.get_hypertrie().get_cards({0, 1, 2}); + spdlog::info("Storage stats: {} triples ({} distinct subjects, {} distinct predicates, {} distinct objects)", + triplestore.size(), cards[0], cards[1], cards[2]); + spdlog::info("SPARQL endpoint serving sparkling linked data treasures on {} threads at http://0.0.0.0:{}/ with {} request timeout.", + endpoint_cfg.threads, endpoint_cfg.port, endpoint_cfg.timeout_duration); + + // start http server + http_server(); + } + + // warping up node storage + spdlog::info("Shutdown successful."); + return EXIT_SUCCESS; +} diff --git a/execs/tools/CMakeLists.txt b/execs/tools/CMakeLists.txt new file mode 100644 index 00000000..73788b59 --- /dev/null +++ b/execs/tools/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(rdf2ids) +add_subdirectory(deduplicated-nt) \ No newline at end of file diff --git a/execs/tools/deduplicated-nt/CMakeLists.txt b/execs/tools/deduplicated-nt/CMakeLists.txt new file mode 100644 index 00000000..f64b8579 --- /dev/null +++ b/execs/tools/deduplicated-nt/CMakeLists.txt @@ -0,0 +1,34 @@ +find_package(Threads REQUIRED) +find_package(dice-sparse-map REQUIRED) +find_package(robin_hood REQUIRED) +find_package(dice-hash REQUIRED) +find_package(spdlog REQUIRED) +find_package(cxxopts REQUIRED) +find_package(rdf4cpp REQUIRED) + +add_executable(deduplicated_nt + src/dice/tools/deduplicated_nt/DeduplicatedNT.cpp + ) + +target_link_libraries(deduplicated_nt PRIVATE + Threads::Threads + rdf4cpp::rdf4cpp + dice-sparse-map::dice-sparse-map + robin_hood::robin_hood + dice-hash::dice-hash + spdlog::spdlog + cxxopts::cxxopts + tentris::rdf-tensor + ) + +target_include_directories(deduplicated_nt PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src + ) + + +if (CMAKE_BUILD_TYPE MATCHES "Release") + set_target_properties(deduplicated_nt PROPERTIES LINK_FLAGS_RELEASE -s) + include(CheckIPOSupported) + check_ipo_supported(RESULT result LANGUAGES CXX) # fatal error if IPO is not supported + set_property(TARGET deduplicated_nt PROPERTY INTERPROCEDURAL_OPTIMIZATION True) +endif () diff --git a/execs/tools/deduplicated-nt/src/dice/tools/deduplicated_nt/DeduplicatedNT.cpp b/execs/tools/deduplicated-nt/src/dice/tools/deduplicated_nt/DeduplicatedNT.cpp new file mode 100644 index 00000000..07221436 --- /dev/null +++ b/execs/tools/deduplicated-nt/src/dice/tools/deduplicated_nt/DeduplicatedNT.cpp @@ -0,0 +1,105 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + using namespace dice; + namespace fs = std::filesystem; + + /* + * Parse Commandline Arguments + */ + std::string version = fmt::format("deduplicate_nt v{} is based on rdf4cpp {}.", dice::tentris::version, dice::tentris::rdf4cpp_version); + + cxxopts::Options options("deduplicate_nt", + fmt::format("{}\nDeduplicating RDF files (TURTLE, NTRIPLE). Result is serialized in NTRIPLE on console out. Logs are written to console error.", version)); + options.add_options() // + ("f,file", "TURTLE or NTRIPLE RDF file that should be processed.", cxxopts::value()) // + ("m,limit", "Maximum number of result triples. When the limit is reached, the tool quits.", cxxopts::value()->default_value(fmt::format("{}", std::numeric_limits::max())))// + ("l,loglevel", fmt::format("Details of logging. Available values are: [{}, {}, {}, {}, {}, {}, {}]", // + spdlog::level::to_string_view(spdlog::level::trace), // + spdlog::level::to_string_view(spdlog::level::debug), // + spdlog::level::to_string_view(spdlog::level::info), // + spdlog::level::to_string_view(spdlog::level::warn), // + spdlog::level::to_string_view(spdlog::level::err), // + spdlog::level::to_string_view(spdlog::level::critical), // + spdlog::level::to_string_view(spdlog::level::off)), // + cxxopts::value()->default_value("info")) // + ("v,version", "Version info.") // + ("h,help", "Print this help page.") // + ; + + auto parsed_args = options.parse(argc, argv); + if (parsed_args.count("help")) { + std::cerr << options.help() << std::endl; + exit(EXIT_SUCCESS); + } else if (parsed_args.count("version")) { + std::cerr << version << std::endl; + exit(EXIT_SUCCESS); + } + + /* + * Initialize logger + */ + const auto log_level = spdlog::level::from_str(parsed_args["loglevel"].as()); + spdlog::set_default_logger(spdlog::stderr_color_mt("rdf2ids logger")); + spdlog::set_level(log_level); + spdlog::set_pattern("%Y-%m-%dT%T.%e%z | %n | %t | %l | %v"); + spdlog::info(version); + + auto const limit = parsed_args["limit"].as(); + size_t count = 0; + + // write TSV to std::cout + { + // terminate when the limit is reached + auto terminate_at_limit = [&count, &limit] { + if (++count > limit) { + std::cout.flush(); + spdlog::info("Limit of {} triples reached.", limit); + spdlog::info("Shutdown successful."); + exit(EXIT_SUCCESS); + } + }; + + auto file_path = parsed_args["file"].as(); + std::ifstream ifs{file_path}; + + if (!ifs.is_open()) { + throw std::runtime_error{"unable to open provided file " + file_path}; + } + + dice::sparse_map::sparse_set deduplication; + for (rdf4cpp::rdf::parser::IStreamQuadIterator qit{ifs}; qit != rdf4cpp::rdf::parser::IStreamQuadIterator{}; ++qit) { + if (qit->has_value()) { + auto const &quad = qit->value(); + auto const hash = hash::dice_hash_templates::dice_hash(std::array{ + quad.subject().backend_handle().raw(), + quad.predicate().backend_handle().raw(), + quad.object().backend_handle().raw()}); + if (not deduplication.contains(hash)) { + terminate_at_limit(); + std::cout << fmt::format("{} {} {} . \n", std::string(quad.subject()), std::string(quad.predicate()), std::string(quad.object())); + deduplication.insert(hash); + } + } else { + std::cerr << qit->error() << '\n'; + } + } + } + + spdlog::info("Shutdown successful."); + return EXIT_SUCCESS; +} diff --git a/execs/tools/rdf2ids/CMakeLists.txt b/execs/tools/rdf2ids/CMakeLists.txt new file mode 100644 index 00000000..9090bd7d --- /dev/null +++ b/execs/tools/rdf2ids/CMakeLists.txt @@ -0,0 +1,35 @@ +find_package(Threads REQUIRED) +find_package(dice-sparse-map REQUIRED) +find_package(robin_hood REQUIRED) +find_package(dice-hash REQUIRED) +find_package(spdlog REQUIRED) +find_package(cxxopts REQUIRED) +find_package(vincentlaucsb-csv-parser REQUIRED) +find_package(rdf4cpp REQUIRED) + +add_executable(rdf2ids + src/dice/tools/rdf2ids/RDF2IDs.cpp + ) + +target_link_libraries(rdf2ids PRIVATE + Threads::Threads + dice-hash::dice-hash + rdf4cpp::rdf4cpp + dice-sparse-map::dice-sparse-map + robin_hood::robin_hood + spdlog::spdlog + cxxopts::cxxopts + vincentlaucsb-csv-parser::vincentlaucsb-csv-parser + tentris::rdf-tensor + ) + +target_include_directories(rdf2ids PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src + ) + +if (CMAKE_BUILD_TYPE MATCHES "Release") + set_target_properties(rdf2ids PROPERTIES LINK_FLAGS_RELEASE -s) + include(CheckIPOSupported) + check_ipo_supported(RESULT result LANGUAGES CXX) # fatal error if IPO is not supported + set_property(TARGET rdf2ids PROPERTY INTERPROCEDURAL_OPTIMIZATION True) +endif () diff --git a/execs/tools/rdf2ids/src/dice/tools/rdf2ids/RDF2IDs.cpp b/execs/tools/rdf2ids/src/dice/tools/rdf2ids/RDF2IDs.cpp new file mode 100644 index 00000000..bf669856 --- /dev/null +++ b/execs/tools/rdf2ids/src/dice/tools/rdf2ids/RDF2IDs.cpp @@ -0,0 +1,114 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + using namespace dice; + namespace fs = std::filesystem; + + /* + * Parse Commandline Arguments + */ + std::string version = fmt::format("rdf2ids v{} is based on rdf4cpp {}.", dice::tentris::version, dice::tentris::rdf4cpp_version); + + cxxopts::Options options("rdf2ids", + fmt::format("{}\nConverting RDF 2 ID triples in tsv format. Result is written to stdout", version)); + options.add_options() // + ("d,distinct", "Store each id triple only once.", cxxopts::value()->default_value("false")) // + ("f,file", "TURTLE or NTRIPLE RDF file that should be processed.", cxxopts::value()) // + ("m,limit", "Maximum number of id triples returned.", cxxopts::value()->default_value(fmt::format("{}", std::numeric_limits::max())))// + ("l,loglevel", fmt::format("Details of logging. Available values are: [{}, {}, {}, {}, {}, {}, {}]", // + spdlog::level::to_string_view(spdlog::level::trace), // + spdlog::level::to_string_view(spdlog::level::debug), // + spdlog::level::to_string_view(spdlog::level::info), // + spdlog::level::to_string_view(spdlog::level::warn), // + spdlog::level::to_string_view(spdlog::level::err), // + spdlog::level::to_string_view(spdlog::level::critical), // + spdlog::level::to_string_view(spdlog::level::off)), // + cxxopts::value()->default_value("info")) // + ("v,version", "Version info.") // + ("h,help", "Print this help page.") // + ; + + auto parsed_args = options.parse(argc, argv); + if (parsed_args.count("help")) { + std::cerr << options.help() << std::endl; + exit(EXIT_SUCCESS); + } else if (parsed_args.count("version")) { + std::cerr << version << std::endl; + exit(EXIT_SUCCESS); + } + + /* + * Initialize logger + */ + const auto log_level = spdlog::level::from_str(parsed_args["loglevel"].as()); + spdlog::set_default_logger(spdlog::stderr_color_mt("rdf2ids logger")); + spdlog::set_level(log_level); + spdlog::set_pattern("%Y-%m-%dT%T.%e%z | %n | %t | %l | %v"); + spdlog::info(version); + + auto const limit = parsed_args["limit"].as(); + size_t count = 0; + + // write TSV to std::cout + auto tsv_writer = csv::make_tsv_writer(std::cout); + { + // terminate when the limit is reached + auto terminate_at_limit = [&count, &limit, &tsv_writer] { + if (++count > limit) { + tsv_writer.flush(); + spdlog::info("Limit of {} entries reached.", limit); + spdlog::info("Shutdown successful."); + exit(EXIT_SUCCESS); + } + }; + + auto file_path = parsed_args["file"].as(); + std::ifstream ifs{file_path}; + + if (!ifs.is_open()) { + throw std::runtime_error{"unable to open provided file " + file_path}; + } + + dice::sparse_map::sparse_set deduplication; + bool const deduplicate = parsed_args["distinct"].as(); + for (rdf4cpp::rdf::parser::IStreamQuadIterator qit{ifs}; qit != rdf4cpp::rdf::parser::IStreamQuadIterator{}; ++qit) { + if (qit->has_value()) { + auto const &quad = qit->value(); + std::array const id_triple{ + quad.subject().backend_handle().raw(), + quad.predicate().backend_handle().raw(), + quad.object().backend_handle().raw()}; + if (deduplicate) { + auto const hash = hash::dice_hash_templates::dice_hash(id_triple); + if (not deduplication.contains(hash)) { + terminate_at_limit(); + tsv_writer << id_triple; + deduplication.insert(hash); + } + } else { + terminate_at_limit(); + tsv_writer << id_triple; + } + } else { + std::cerr << qit->error() << '\n'; + } + } + } + + spdlog::info("Shutdown successful."); + return EXIT_SUCCESS; +} diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt new file mode 100644 index 00000000..35f7663c --- /dev/null +++ b/libs/CMakeLists.txt @@ -0,0 +1,7 @@ +add_subdirectory(rdf-tensor) +add_subdirectory(node-store) +add_subdirectory(sparql2tensor) +add_subdirectory(triple-store) +add_subdirectory(endpoint) +add_subdirectory(tentris) +install_package() \ No newline at end of file diff --git a/libs/endpoint/CMakeLists.txt b/libs/endpoint/CMakeLists.txt new file mode 100644 index 00000000..f27e60cf --- /dev/null +++ b/libs/endpoint/CMakeLists.txt @@ -0,0 +1,42 @@ +find_package(Taskflow REQUIRED) +find_package(restinio REQUIRED) +find_package(spdlog REQUIRED) +find_package(cppitertools REQUIRED) +find_package(RapidJSON REQUIRED) + +add_library(endpoint + src/dice/endpoint/HTTPServer.cpp + src/dice/endpoint/SparqlEndpoint.cpp + src/dice/endpoint/CountEndpoint.cpp + src/dice/endpoint/SparqlStreamingEndpoint.cpp + src/dice/endpoint/SparqlQueryCache.cpp + ) +add_library(tentris::endpoint ALIAS endpoint) + +target_include_directories(endpoint PUBLIC + $ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/private-include + ) + +set_target_properties(endpoint PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO + ) + +target_link_libraries(endpoint PUBLIC + tentris::triple-store + tentris::node-store + restinio::restinio + Taskflow::Taskflow + PRIVATE + spdlog::spdlog + cppitertools::cppitertools + RapidJSON::RapidJSON + ) + +include(${PROJECT_SOURCE_DIR}/cmake/install_library.cmake) +install_component(endpoint src) \ No newline at end of file diff --git a/libs/endpoint/private-include/dice/endpoint/ParseSPARQLQueryParam.hpp b/libs/endpoint/private-include/dice/endpoint/ParseSPARQLQueryParam.hpp new file mode 100644 index 00000000..09d38f8a --- /dev/null +++ b/libs/endpoint/private-include/dice/endpoint/ParseSPARQLQueryParam.hpp @@ -0,0 +1,37 @@ +#ifndef TENTRIS_PARSESPARQLQUERYPARAM_HPP +#define TENTRIS_PARSESPARQLQUERYPARAM_HPP + +#include + +#include +#include + +#include + +#include + +namespace dice::endpoint { + + inline std::shared_ptr parse_sparql_query_param(restinio::request_handle_t &req, SparqlQueryCache &cache) { + using namespace dice::sparql2tensor; + using namespace restinio; + const auto qp = parse_query(req->header().query()); + if (not qp.has("query")) { + static auto const message = "Query parameter 'query' is missing."; + spdlog::warn("HTTP response {}: {}", status_bad_request(), message); + req->create_response(status_bad_request()).set_body(message).done(); + return {}; + } + std::string sparql_query_str = std::string{qp["query"]}; + SPARQLQuery sparql_query; + try { + return cache[sparql_query_str]; + } catch (std::exception &ex) { + static auto const message = "Value of query parameter 'query' is not parsable."; + spdlog::warn("HTTP response {}: {} (detail: {})", status_bad_request(), message, ex.what()); + req->create_response(status_bad_request()).set_body(message).done(); + return {}; + } + } +}// namespace dice::endpoint +#endif//TENTRIS_PARSESPARQLQUERYPARAM_HPP diff --git a/libs/endpoint/private-include/dice/endpoint/ParseSPARQLUpdateParam.hpp b/libs/endpoint/private-include/dice/endpoint/ParseSPARQLUpdateParam.hpp new file mode 100644 index 00000000..52bdf5c4 --- /dev/null +++ b/libs/endpoint/private-include/dice/endpoint/ParseSPARQLUpdateParam.hpp @@ -0,0 +1,41 @@ +#ifndef TENTRIS_PARSESPARQLUPDATEPARAM_HPP +#define TENTRIS_PARSESPARQLUPDATEPARAM_HPP + +#include + +#include +#include +#include + +#include + + +namespace dice::endpoint { + + inline sparql2tensor::UPDATEDATAQueryData parse_sparql_update_param(restinio::request_handle_t &req) { + using namespace dice::sparql2tensor; + using namespace restinio; + auto content_type = req->header().opt_value_of(http_field::content_type); + auto content_type_value = http_field_parsers::content_type_value_t::try_parse(*content_type); + if (not content_type_value.has_value() or + content_type_value.value().media_type.type != "application" or + content_type_value.value().media_type.subtype != "sparql-update") { + throw std::runtime_error("Expected content-type: application/sparql-update"); + } + std::string sparql_update_str{req->body()}; + try { + auto update_query = UPDATEDATAQueryData::parse(sparql_update_str); + return update_query; + } catch (std::exception &ex) { + static constexpr auto message = "Value of parameter 'update' is not parsable: "; + throw std::runtime_error{std::string{message} + ex.what()}; + } catch (...) { + static constexpr auto message = "Unknown error"; + throw std::runtime_error{message}; + } + } + +}// namespace dice::endpoint + + +#endif//TENTRIS_PARSESPARQLUPDATEPARAM_HPP diff --git a/libs/endpoint/private-include/dice/endpoint/SparqlJsonResultSAXWriter.hpp b/libs/endpoint/private-include/dice/endpoint/SparqlJsonResultSAXWriter.hpp new file mode 100644 index 00000000..a02c81a3 --- /dev/null +++ b/libs/endpoint/private-include/dice/endpoint/SparqlJsonResultSAXWriter.hpp @@ -0,0 +1,154 @@ +#ifndef TENTRIS_SPARQLJSONRESULTSAXWRITER_HPP +#define TENTRIS_SPARQLJSONRESULTSAXWRITER_HPP + +#include + +#define RAPIDJSON_HAS_STDSTRING 1 + +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace dice::endpoint { + + class SparqlJsonResultSAXWriter { + using Node = rdf4cpp::rdf::Node; + using Literal = rdf4cpp::rdf::Literal; + using IRI = rdf4cpp::rdf::IRI; + using BlankNode = rdf4cpp::rdf::BlankNode; + using Variable = rdf4cpp::rdf::query::Variable; + using Entry = dice::rdf_tensor::Entry; + + std::size_t number_of_solutions_ = 0; + std::size_t number_of_bindings_ = 0; + + size_t buffer_size; + rapidjson::StringBuffer buffer; + rapidjson::Writer writer; + + std::vector variables_; + + inline static auto to_rapidjson(std::string_view view) { + return rapidjson::GenericStringRef(view.data() ? view.data() : "", view.size()); + } + public: + explicit SparqlJsonResultSAXWriter(const std::vector& variables, size_t buffer_size) + : buffer_size(buffer_size), + buffer(nullptr, size_t(buffer_size * 1.3)), + writer(buffer) { + writer.StartObject(); + writer.Key("head"); + for (auto const &var : variables) { + variables_.emplace_back(var.name()); + } + { + writer.StartObject(); + writer.Key("vars"); + { + writer.StartArray(); + for (const auto &var : variables_) + writer.String(to_rapidjson(var)); + writer.EndArray(); + } + writer.EndObject(); + } + writer.Key("results"); + writer.StartObject(); + writer.Key("bindings"); + writer.StartArray(); + } + + void close() { + writer.EndArray(); + writer.EndObject(); + writer.EndObject(); + } + + void add(Entry const &entry) { + + for (size_t i = 0; i < size_t(entry.value()); ++i) { + writer.StartObject(); + for (const auto &[term, var] : iter::zip(entry.key(), variables_)) { + if (term.null()) + continue; + writer.Key(to_rapidjson(var)); + writer.StartObject(); + writer.Key("type"); + if (term.is_iri()) { + writer.String("uri"); + writer.Key("value"); + auto const &identifier = ((IRI) term).identifier(); + writer.String(identifier.data(), identifier.size()); + } else if (term.is_literal()) { + writer.String("literal"); + + auto literal = (Literal) term; + + static const IRI xsd_str{"http://www.w3.org/2001/XMLSchema#string"}; + auto datatype = literal.datatype(); + if (datatype != xsd_str) { + auto const &lang = literal.language_tag(); + if (not lang.empty()) { + writer.Key("xml:lang"); + writer.String(lang.data(), lang.size()); + } else { + writer.Key("datatype"); + writer.String(datatype.identifier().data(), datatype.identifier().size()); + } + } + writer.Key("value"); + writer.String(to_rapidjson(literal.lexical_form())); + + } else if (term.is_blank_node()) { + writer.String("bnode"); + writer.Key("value"); + auto const &identifier = ((BlankNode) term).identifier(); + writer.String(identifier.data(), identifier.size()); + } else { + throw std::runtime_error("Node with incorrect type (none of Literal, BNode, URI) detected."); + } + + writer.EndObject(); + number_of_bindings_++; + } + writer.EndObject(); + } + + number_of_solutions_ += entry.value(); + } + + [[nodiscard]] std::size_t size() const { + return buffer.GetSize(); + } + + [[nodiscard]] std::size_t number_of_written_solutions() const { + return number_of_solutions_; + } + + [[nodiscard]] std::size_t number_of_written_bindings() const { + return number_of_bindings_; + } + + [[nodiscard]] bool full() const { + return buffer.GetSize() > this->buffer_size; + }; + + std::string_view string_view() { + writer.Flush(); + return {buffer.GetString(), buffer.GetSize()}; + } + + void clear() { + this->buffer.Clear(); + } + }; +}// namespace dice::endpoint + +#endif//TENTRIS_SPARQLJSONRESULTSAXWRITER_HPP diff --git a/libs/endpoint/src/dice/endpoint/CountEndpoint.cpp b/libs/endpoint/src/dice/endpoint/CountEndpoint.cpp new file mode 100644 index 00000000..ef0078cb --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/CountEndpoint.cpp @@ -0,0 +1,51 @@ +#include "CountEndpoint.hpp" + +#include + +#include "dice/endpoint/ParseSPARQLQueryParam.hpp" +#include "dice/endpoint/SparqlJsonResultSAXWriter.hpp" + +namespace dice::endpoint { + CountEndpoint::CountEndpoint(tf::Executor &executor, + triple_store::TripleStore &triplestore, + SparqlQueryCache &sparql_query_cache, + std::chrono::seconds timeoutDuration) + : executor_(executor), + triplestore_(triplestore), + sparql_query_cache_(sparql_query_cache), + timeout_duration_(timeoutDuration) {} + + restinio::request_handling_status_t CountEndpoint::operator()( + restinio::request_handle_t req, + [[maybe_unused]] restinio::router::route_params_t params) { + auto timeout = (timeout_duration_.count()) ? std::chrono::steady_clock::now() + this->timeout_duration_ : std::chrono::steady_clock::time_point::max(); + if (executor_.num_topologies() < executor_.num_workers()) { + executor_.silent_async([this, timeout](restinio::request_handle_t req) { + using namespace dice::sparql2tensor; + using namespace restinio; + + std::shared_ptr sparql_query = parse_sparql_query_param(req, this->sparql_query_cache_); + if (not sparql_query) + return; + + try { + size_t count = this->triplestore_.count(*sparql_query, timeout); + + req->create_response(status_ok()) + .set_body(fmt::format("{}", count)) + .done(); + spdlog::info("HTTP response {}: counted {} results", status_ok(), count); + } catch (std::runtime_error const &timeout_exception) { + const auto timeout_message = fmt::format("Request processing timed out after {}.", this->timeout_duration_); + spdlog::warn("HTTP response {}: {}", status_gateway_time_out(), timeout_message); + req->create_response(status_gateway_time_out()).set_body(timeout_message).done(); + } + }, + std::move(req)); + return restinio::request_accepted(); + } else { + spdlog::warn("Handling request was rejected. All workers are busy."); + return restinio::request_rejected(); + } + } +}// namespace dice::endpoint \ No newline at end of file diff --git a/libs/endpoint/src/dice/endpoint/CountEndpoint.hpp b/libs/endpoint/src/dice/endpoint/CountEndpoint.hpp new file mode 100644 index 00000000..914f416b --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/CountEndpoint.hpp @@ -0,0 +1,33 @@ +#ifndef TENTRIS_COUNTENDPOINT_HPP +#define TENTRIS_COUNTENDPOINT_HPP + +#include +#include + +#include +#include + +#include + +namespace dice::endpoint { + + class CountEndpoint { + + tf::Executor &executor_; + + triple_store::TripleStore &triplestore_; + + SparqlQueryCache &sparql_query_cache_; + + std::chrono::seconds timeout_duration_; + + public: + CountEndpoint(tf::Executor &executor, triple_store::TripleStore &triplestore, SparqlQueryCache &sparql_query_cache, std::chrono::seconds timeoutDuration); + + restinio::request_handling_status_t operator()( + restinio::request_handle_t req, + restinio::router::route_params_t params); + }; + +}// namespace dice::endpoint +#endif//TENTRIS_COUNTENDPOINT_HPP diff --git a/libs/endpoint/src/dice/endpoint/HTTPServer.cpp b/libs/endpoint/src/dice/endpoint/HTTPServer.cpp new file mode 100644 index 00000000..596a7ab8 --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/HTTPServer.cpp @@ -0,0 +1,53 @@ +#include "HTTPServer.hpp" + +#include "dice/endpoint/CountEndpoint.hpp" +#include "dice/endpoint/SparqlEndpoint.hpp" +#include "dice/endpoint/SparqlStreamingEndpoint.hpp" + +#include + +namespace dice::endpoint { + + struct tentris_restinio_traits : public restinio::traits_t< + restinio::null_timer_manager_t, + restinio::null_logger_t, + restinio::router::express_router_t<>> { + static constexpr bool use_connection_count_limiter = true; + }; + + HTTPServer::HTTPServer(tf::Executor &executor, triple_store::TripleStore &triplestore, EndpointCfg const &cfg) + : executor_(executor), + triplestore_(triplestore), + sparql_query_cache_(),// TODO: override default parameter + router_(std::make_unique>()), + cfg_(cfg) {} + + void HTTPServer::operator()() { + spdlog::info("Available endpoints:"); + router_->http_get(R"(/sparql)", + SPARQLEndpoint{executor_, triplestore_, sparql_query_cache_, cfg_.timeout_duration}); + spdlog::info(" GET /sparql?query= for normal queries"); + + router_->http_get(R"(/stream)", + SPARQLStreamingEndpoint{executor_, triplestore_, sparql_query_cache_, cfg_.timeout_duration}); + spdlog::info(" GET /stream?query= for queries with huge results"); + + router_->http_get(R"(/count)", + CountEndpoint{executor_, triplestore_, sparql_query_cache_, cfg_.timeout_duration}); + spdlog::info(" GET /count?query= as a workaround for count"); + + + router_->non_matched_request_handler( + [](auto req) -> restinio::request_handling_status_t { + return req->create_response(restinio::status_not_found()).connection_close().done(); + }); + + spdlog::info("Use Ctrl+C on the terminal or SIGINT to shut down tentris gracefully. If tentris is killed or crashes, the index files will be corrupted."); + restinio::run( + restinio::on_thread_pool(cfg_.threads) + .max_parallel_connections(cfg_.threads) + .address("0.0.0.0") + .port(cfg_.port) + .request_handler(std::move(router_))); + } +}// namespace dice::endpoint \ No newline at end of file diff --git a/libs/endpoint/src/dice/endpoint/HTTPServer.hpp b/libs/endpoint/src/dice/endpoint/HTTPServer.hpp new file mode 100644 index 00000000..5b5c756e --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/HTTPServer.hpp @@ -0,0 +1,39 @@ +#ifndef TENTRIS_HTTPSERVER_HPP +#define TENTRIS_HTTPSERVER_HPP + +#include +#include + +#include +#include + +#include + + +namespace dice::endpoint { + + struct EndpointCfg { + uint16_t port; + uint16_t threads; + std::chrono::seconds timeout_duration; + }; + + class HTTPServer { + tf::Executor &executor_; + triple_store::TripleStore &triplestore_; + SparqlQueryCache sparql_query_cache_; + std::unique_ptr> router_; + EndpointCfg cfg_; + + public: + HTTPServer(tf::Executor &executor, triple_store::TripleStore &triplestore, EndpointCfg const &cfg); + + restinio::router::express_router_t<> &router(){ + return *router_; + } + + void operator()(); + }; +}// namespace dice::endpoint + +#endif//TENTRIS_HTTPSERVER_HPP diff --git a/libs/endpoint/src/dice/endpoint/SparqlEndpoint.cpp b/libs/endpoint/src/dice/endpoint/SparqlEndpoint.cpp new file mode 100644 index 00000000..ee49a731 --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/SparqlEndpoint.cpp @@ -0,0 +1,72 @@ +#include "SparqlEndpoint.hpp" + +#include + +#include "dice/endpoint/ParseSPARQLQueryParam.hpp" +#include "dice/endpoint/SparqlJsonResultSAXWriter.hpp" + +namespace dice::endpoint { + + SPARQLEndpoint::SPARQLEndpoint(tf::Executor &executor, + triple_store::TripleStore &triplestore, + SparqlQueryCache &sparql_query_cache, + std::chrono::seconds timeoutDuration) + : executor_(executor), + triplestore_(triplestore), + sparql_query_cache_(sparql_query_cache), + timeout_duration_(timeoutDuration) {} + + restinio::request_handling_status_t SPARQLEndpoint::operator()( + restinio::request_handle_t req, + [[maybe_unused]] restinio::router::route_params_t params) { + auto timeout = (timeout_duration_.count()) ? std::chrono::steady_clock::now() + this->timeout_duration_ : std::chrono::steady_clock::time_point::max(); + if (executor_.num_topologies() < executor_.num_workers()) { + executor_.silent_async([this, timeout](restinio::request_handle_t req) { + using namespace dice::sparql2tensor; + using namespace restinio; + + std::shared_ptr sparql_query = parse_sparql_query_param(req, this->sparql_query_cache_); + if (not sparql_query) + return; + + try { + if (sparql_query->ask_) { + bool ask_res = this->triplestore_.eval_ask(*sparql_query, timeout); + std::string res = ask_res ? "true" : "false"; + req->create_response(status_ok()) + .append_header(http_field::content_type, "application/sparql-results+json") + .set_body(R"({ "head" : {}, "boolean" : )" + res + " }") + .done(); + } else { + endpoint::SparqlJsonResultSAXWriter json_writer{sparql_query->projected_variables_, 100'000}; + + for (auto const &entry : this->triplestore_.eval_select(*sparql_query, timeout)) { + json_writer.add(entry); + } + json_writer.close(); + + req->create_response(status_ok()) + .append_header(http_field::content_type, "application/sparql-results+json") + .set_body(std::string{json_writer.string_view()}) + .done(); + spdlog::info("HTTP response {}: {} variables, {} solutions, {} bindings", + status_ok(), + sparql_query->projected_variables_.size(), + json_writer.number_of_written_solutions(), + json_writer.number_of_written_bindings()); + } + } catch (std::runtime_error const &timeout_exception) { + const auto timeout_message = fmt::format("Request processing timed out after {}.", this->timeout_duration_); + spdlog::warn("HTTP response {}: {}", status_gateway_time_out(), timeout_message); + req->create_response(status_gateway_time_out()).set_body(timeout_message).done(); + } + }, + std::move(req)); + return restinio::request_accepted(); + } else { + spdlog::warn("Handling request was rejected. All workers are busy."); + return restinio::request_rejected(); + } + } + +}// namespace dice::endpoint \ No newline at end of file diff --git a/libs/endpoint/src/dice/endpoint/SparqlEndpoint.hpp b/libs/endpoint/src/dice/endpoint/SparqlEndpoint.hpp new file mode 100644 index 00000000..37a180fa --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/SparqlEndpoint.hpp @@ -0,0 +1,33 @@ +#ifndef TENTRIS_SPARQLENDPOINT_HPP +#define TENTRIS_SPARQLENDPOINT_HPP + +#include +#include + +#include +#include + +#include + +namespace dice::endpoint { + + class SPARQLEndpoint { + + tf::Executor &executor_; + + triple_store::TripleStore &triplestore_; + + SparqlQueryCache &sparql_query_cache_; + + std::chrono::seconds timeout_duration_; + + public: + SPARQLEndpoint(tf::Executor &executor, triple_store::TripleStore &triplestore, SparqlQueryCache &sparql_query_cache, std::chrono::seconds timeoutDuration); + + restinio::request_handling_status_t operator()( + restinio::request_handle_t req, + restinio::router::route_params_t params); + }; + +}// namespace dice::endpoint +#endif//TENTRIS_SPARQLENDPOINT_HPP diff --git a/libs/endpoint/src/dice/endpoint/SparqlQueryCache.cpp b/libs/endpoint/src/dice/endpoint/SparqlQueryCache.cpp new file mode 100644 index 00000000..fe33a6cf --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/SparqlQueryCache.cpp @@ -0,0 +1,7 @@ +#include "SparqlQueryCache.hpp" + +#include + +namespace dice::endpoint { + template class SyncedLRUCache; +}// namespace dice::endpoint \ No newline at end of file diff --git a/libs/endpoint/src/dice/endpoint/SparqlQueryCache.hpp b/libs/endpoint/src/dice/endpoint/SparqlQueryCache.hpp new file mode 100644 index 00000000..1a732444 --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/SparqlQueryCache.hpp @@ -0,0 +1,13 @@ +#ifndef TENTRIS_SPARQLQUERYCACHE_HPP +#define TENTRIS_SPARQLQUERYCACHE_HPP + +#include + +#include + +namespace dice::endpoint { + + using SparqlQueryCache = SyncedLRUCache; + +} +#endif//TENTRIS_SPARQLQUERYCACHE_HPP diff --git a/libs/endpoint/src/dice/endpoint/SparqlStreamingEndpoint.cpp b/libs/endpoint/src/dice/endpoint/SparqlStreamingEndpoint.cpp new file mode 100644 index 00000000..8d121160 --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/SparqlStreamingEndpoint.cpp @@ -0,0 +1,74 @@ +#include "SparqlStreamingEndpoint.hpp" + +#include + +#include "dice/endpoint/ParseSPARQLQueryParam.hpp" +#include "dice/endpoint/SparqlJsonResultSAXWriter.hpp" + +namespace dice::endpoint { + + + SPARQLStreamingEndpoint::SPARQLStreamingEndpoint(tf::Executor &executor, + triple_store::TripleStore &triplestore, + SparqlQueryCache &sparql_query_cache, + std::chrono::seconds timeoutDuration) + : executor_(executor), + triplestore_(triplestore), + sparql_query_cache_(sparql_query_cache), + timeout_duration_(timeoutDuration) { + } + restinio::request_handling_status_t SPARQLStreamingEndpoint::operator()( + restinio::request_handle_t req, + [[maybe_unused]] restinio::router::route_params_t params) { + auto timeout = (timeout_duration_.count()) ? std::chrono::steady_clock::now() + this->timeout_duration_ : std::chrono::steady_clock::time_point::max(); + if (executor_.num_topologies() < executor_.num_workers()) { + executor_.silent_async([this, timeout](restinio::request_handle_t req) { + using namespace dice::sparql2tensor; + using namespace restinio; + + std::shared_ptr sparql_query = parse_sparql_query_param(req, this->sparql_query_cache_); + if (not sparql_query) + return; + + bool asio_write_failed = false; + + endpoint::SparqlJsonResultSAXWriter json_writer{sparql_query->projected_variables_, 100'000}; + + response_builder_t resp = req->template create_response(); + resp.append_header(http_field::content_type, "application/sparql-results+json"); + + try { + for (auto const &entry : this->triplestore_.eval_select(*sparql_query, timeout)) { + json_writer.add(entry); + if (json_writer.full()) { + resp.append_chunk(std::string{json_writer.string_view()}); + resp.flush([&](auto const &status) { asio_write_failed = status.failed(); }); + if (asio_write_failed) { + spdlog::warn("Writing chunked HTTP response failed."); + return; + } + json_writer.clear(); + } + } + json_writer.close(); + resp.append_chunk(std::string{json_writer.string_view()}); + resp.done(); + spdlog::info("HTTP response {}: {} variables, {} solutions, {} bindings", + status_ok(), + sparql_query->projected_variables_.size(), + json_writer.number_of_written_solutions(), + json_writer.number_of_written_bindings()); + } catch (std::runtime_error const &timeout_exception) { + const auto timeout_message = fmt::format("Request processing timed out after {}.", this->timeout_duration_); + spdlog::warn("HTTP response {}: {}", status_gateway_time_out(), timeout_message); + req->create_response(status_gateway_time_out()).set_body(timeout_message).done(); + } + }, + std::move(req)); + return restinio::request_accepted(); + } else { + spdlog::warn("Handling request was rejected. All workers are busy."); + return restinio::request_rejected(); + } + } +}// namespace dice::endpoint \ No newline at end of file diff --git a/libs/endpoint/src/dice/endpoint/SparqlStreamingEndpoint.hpp b/libs/endpoint/src/dice/endpoint/SparqlStreamingEndpoint.hpp new file mode 100644 index 00000000..0fc11bf4 --- /dev/null +++ b/libs/endpoint/src/dice/endpoint/SparqlStreamingEndpoint.hpp @@ -0,0 +1,32 @@ +#ifndef TENTRIS_SPARQLSTREAMINGENDPOINT_HPP +#define TENTRIS_SPARQLSTREAMINGENDPOINT_HPP + +#include +#include + +#include +#include + +#include + +namespace dice::endpoint { + + class SPARQLStreamingEndpoint { + tf::Executor &executor_; + + triple_store::TripleStore &triplestore_; + + SparqlQueryCache &sparql_query_cache_; + + std::chrono::seconds timeout_duration_; + + public: + SPARQLStreamingEndpoint(tf::Executor &executor, triple_store::TripleStore &triplestore, SparqlQueryCache &sparql_query_cache, std::chrono::seconds timeoutDuration); + + restinio::request_handling_status_t operator()( + restinio::request_handle_t req, + restinio::router::route_params_t params); + }; +}// namespace dice::endpoint + +#endif//TENTRIS_SPARQLSTREAMINGENDPOINT_HPP diff --git a/src/lib/tentris/util/SyncedLRUCache.hpp b/libs/endpoint/src/dice/endpoint/SyncedLRUCache.hpp similarity index 54% rename from src/lib/tentris/util/SyncedLRUCache.hpp rename to libs/endpoint/src/dice/endpoint/SyncedLRUCache.hpp index 9daf5c20..5b9d720c 100644 --- a/src/lib/tentris/util/SyncedLRUCache.hpp +++ b/libs/endpoint/src/dice/endpoint/SyncedLRUCache.hpp @@ -31,117 +31,115 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ + #include #include #include #include #include #include +#include -#include - -namespace tentris::util::sync { - - template - struct KeyValuePair { - K key; - std::shared_ptr value; +#include +#include - KeyValuePair(K key) : key(std::move(key)), value(std::make_shared(this->key)) {} - }; +#include +namespace dice::endpoint { template class SyncedLRUCache { - public: - using map_type = tsl::hopscotch_map>::iterator, Dice::hash::DiceHash>; - using node_type = KeyValuePair; - using list_type = std::list>; - using Lock = std::mutex; - using Guard = std::lock_guard; - using value_ptr = std::shared_ptr; - // Disallow copying. - SyncedLRUCache(const SyncedLRUCache &) = delete; + struct node_type { + Key key; + std::shared_ptr value; - SyncedLRUCache &operator=(const SyncedLRUCache &) = delete; + node_type(Key key) : key(std::move(key)), value(std::make_shared(this->key)) {} + }; + + using list_type = std::list; + using map_type = robin_hood::unordered_map>; + using value_ptr = std::shared_ptr; private: - mutable Lock lock_; + mutable std::mutex lock_; map_type cache_; - list_type keys_; - size_t maxSize_; - size_t elasticity_; - public: + list_type lru_list_; + size_t const max_size_; + size_t const elasticity_; + public: /** * the maxSize is the soft limit of keys and (maxSize + elasticity) is the * hard limit - * the cache is allowed to grow till (maxSize + elasticity) and is pruned back + * the cache is allowed to grow till (max_size + elasticity) and is pruned back * to maxSize keys * set maxSize = 0 for an unbounded cache (but in that case, you're better off * using a std::unordered_map * directly anyway! :) */ - explicit SyncedLRUCache(size_t maxSize = 1000, size_t elasticity = 100) - : maxSize_(maxSize), elasticity_(elasticity) {} + explicit SyncedLRUCache(size_t max_size = 1000, size_t elasticity = 100) noexcept + : max_size_(max_size), elasticity_(elasticity) {} + + // Disallow copying. + SyncedLRUCache(const SyncedLRUCache &) = delete; - [[nodiscard]] size_t size() const { - Guard g(lock_); + SyncedLRUCache &operator=(const SyncedLRUCache &) = delete; + + [[nodiscard]] size_t size() const noexcept { + std::lock_guard g(lock_); return cache_.size(); } - [[nodiscard]] bool empty() const { - Guard g(lock_); + [[nodiscard]] bool empty() const noexcept { + std::lock_guard g(lock_); return cache_.empty(); } - void clear() { - Guard g(lock_); + void clear() noexcept { + std::lock_guard g(lock_); cache_.clear(); - keys_.clear(); + lru_list_.clear(); } - [[nodiscard]] value_ptr &operator[](const Key &key) { - Guard g(lock_); + [[nodiscard]] std::shared_ptr operator[](Key const &key) noexcept(std::is_nothrow_constructible_v) { + std::lock_guard g(lock_); + spdlog::trace("Query cache entries: {}/{} (elastic: {})", cache_.size(), max_size(), max_allowed_size()); const auto iter = cache_.find(key); - logging::logTrace(fmt::format("cache size: {} lru size: {}", cache_.size(), keys_.size())); if (iter == cache_.end()) { - auto &key_value = keys_.emplace_front(key); - cache_[key] = keys_.begin(); + auto &key_value = lru_list_.emplace_front(key); + cache_[key] = lru_list_.begin(); prune(); return key_value.value; } else { - keys_.splice(keys_.begin(), keys_, iter->second); + lru_list_.splice(lru_list_.begin(), lru_list_, iter->second); return iter->second->value; } } - size_t getMaxSize() const { return maxSize_; } + [[nodiscard]] size_t max_size() const noexcept { return max_size_; } - size_t getElasticity() const { return elasticity_; } + [[nodiscard]] size_t elasticity() const noexcept { return elasticity_; } - size_t getMaxAllowedSize() const { return maxSize_ + elasticity_; } + [[nodiscard]] size_t max_allowed_size() const noexcept { return max_size_ + elasticity_; } - private: + bool is_compiled(); - size_t prune() { - size_t maxAllowed = maxSize_ + elasticity_; - if (maxSize_ == 0 || cache_.size() < maxAllowed) { + private: + size_t prune() noexcept { + if (max_size_ == 0 || cache_.size() < max_allowed_size()) { return 0; } size_t count = 0; - while (cache_.size() > maxSize_) { - cache_.erase(keys_.back().key); - keys_.pop_back(); + while (cache_.size() > max_size_) { + cache_.erase(lru_list_.back().key); + lru_list_.pop_back(); ++count; } return count; } - - }; -} +}// namespace dice::endpoint -#endif //TENTRIS_SYNCEDLRUCACHE_HPP +#endif//TENTRIS_SYNCEDLRUCACHE_HPP diff --git a/libs/node-store/CMakeLists.txt b/libs/node-store/CMakeLists.txt new file mode 100644 index 00000000..5a09b3ba --- /dev/null +++ b/libs/node-store/CMakeLists.txt @@ -0,0 +1,36 @@ + +# Find cmake packages +find_package(hypertrie REQUIRED) +find_package(robin_hood REQUIRED) +find_package(dice-hash REQUIRED) +find_package(Boost REQUIRED) + +# Define the library +add_library(node-store + src/dice/node-store/PersistentNodeStorageBackendImpl.cpp + src/dice/node-store/PersistentNodeStorageBackend.cpp + src/dice/node-store/MetallBNodeBackend.cpp + src/dice/node-store/MetallIRIBackend.cpp + src/dice/node-store/MetallLiteralBackend.cpp + src/dice/node-store/MetallVariableBackend.cpp + ) +add_library(tentris::node-store ALIAS node-store) + +target_link_libraries(node-store PUBLIC + tentris::rdf-tensor + ) + +target_include_directories(node-store PUBLIC + $ + ) + +set_target_properties(node-store PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO + ) + +include(${PROJECT_SOURCE_DIR}/cmake/install_library.cmake) +install_component(node-store src) diff --git a/libs/node-store/src/dice/node-store/MetallBNodeBackend.cpp b/libs/node-store/src/dice/node-store/MetallBNodeBackend.cpp new file mode 100644 index 00000000..17d31f93 --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallBNodeBackend.cpp @@ -0,0 +1,17 @@ +#include "MetallBNodeBackend.hpp" +#include +namespace dice::node_store { + + MetallBNodeBackend::MetallBNodeBackend(std::string_view identifier, metall_manager::allocator_type const &allocator) noexcept + : identifier_(identifier, allocator), + hash_(View(*this).hash()) {} + MetallBNodeBackend::MetallBNodeBackend(rdf4cpp::rdf::storage::node::view::BNodeBackendView view, metall_manager::allocator_type const &allocator) noexcept + : identifier_(view.identifier, allocator), + hash_(View(*this).hash()) {} + std::string_view MetallBNodeBackend::identifier() const noexcept { + return identifier_; + } + MetallBNodeBackend::operator View() const noexcept { + return {.identifier = identifier()}; + } +}// namespace dice::node_store \ No newline at end of file diff --git a/libs/node-store/src/dice/node-store/MetallBNodeBackend.hpp b/libs/node-store/src/dice/node-store/MetallBNodeBackend.hpp new file mode 100644 index 00000000..32562d34 --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallBNodeBackend.hpp @@ -0,0 +1,28 @@ +#ifndef RDF4CPP_METALLBNODEBACKEND_HPP +#define RDF4CPP_METALLBNODEBACKEND_HPP + +#include + +#include + + +namespace dice::node_store { + + class MetallBNodeBackend { + metall_string identifier_; + size_t hash_; + + public: + using View = rdf4cpp::rdf::storage::node::view::BNodeBackendView; + + explicit MetallBNodeBackend(std::string_view identifier, metall_manager::allocator_type const &allocator) noexcept; + MetallBNodeBackend(rdf4cpp::rdf::storage::node::view::BNodeBackendView view, metall_manager::allocator_type const &allocator) noexcept; + [[nodiscard]] std::string_view identifier() const noexcept; + + [[nodiscard]] size_t hash() const noexcept { return hash_; } + + explicit operator View() const noexcept; + }; +}// namespace dice::node_store + +#endif//RDF4CPP_METALLBNODEBACKEND_HPP diff --git a/libs/node-store/src/dice/node-store/MetallIRIBackend.cpp b/libs/node-store/src/dice/node-store/MetallIRIBackend.cpp new file mode 100644 index 00000000..d7a66dd3 --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallIRIBackend.cpp @@ -0,0 +1,18 @@ +#include "MetallIRIBackend.hpp" +#include + +namespace dice::node_store { + + MetallIRIBackend::MetallIRIBackend(std::string_view iri, metall_manager::allocator_type const &allocator) noexcept + : iri(iri, allocator), + hash_(View(*this).hash()) {} + MetallIRIBackend::MetallIRIBackend(rdf4cpp::rdf::storage::node::view::IRIBackendView iri, metall_manager::allocator_type const &allocator) noexcept + : iri(iri.identifier, allocator), + hash_(View(*this).hash()) {} + std::string_view MetallIRIBackend::identifier() const noexcept { + return iri; + } + MetallIRIBackend::operator rdf4cpp::rdf::storage::node::view::IRIBackendView() const noexcept { + return {.identifier = identifier()}; + } +}// namespace dice::node_store diff --git a/libs/node-store/src/dice/node-store/MetallIRIBackend.hpp b/libs/node-store/src/dice/node-store/MetallIRIBackend.hpp new file mode 100644 index 00000000..67c213fc --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallIRIBackend.hpp @@ -0,0 +1,28 @@ +#ifndef RDF4CPP_METALLIRIBACKEND_HPP +#define RDF4CPP_METALLIRIBACKEND_HPP + +#include + +#include + +namespace dice::node_store { + class MetallIRIBackend { + metall_string iri; + size_t hash_; + + public: + using View = rdf4cpp::rdf::storage::node::view::IRIBackendView; + + explicit MetallIRIBackend(std::string_view iri, metall_manager::allocator_type const &allocator) noexcept; + explicit MetallIRIBackend(rdf4cpp::rdf::storage::node::view::IRIBackendView, metall_manager::allocator_type const &allocator) noexcept; + [[nodiscard]] std::string_view identifier() const noexcept; + + [[nodiscard]] size_t hash() const noexcept { return hash_; } + + explicit operator rdf4cpp::rdf::storage::node::view::IRIBackendView() const noexcept; + }; + +}// namespace dice::node_store + + +#endif//RDF4CPP_METALLIRIBACKEND_HPP diff --git a/libs/node-store/src/dice/node-store/MetallLiteralBackend.cpp b/libs/node-store/src/dice/node-store/MetallLiteralBackend.cpp new file mode 100644 index 00000000..67d9b65e --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallLiteralBackend.cpp @@ -0,0 +1,31 @@ +#include "MetallLiteralBackend.hpp" +#include +#include +namespace dice::node_store { + + MetallLiteralBackend::MetallLiteralBackend(std::string_view lexical, const rdf4cpp::rdf::storage::node::identifier::NodeID &datatype_id, std::string_view lang_tag, metall_manager::allocator_type const &allocator) noexcept + : datatype_id_(datatype_id), + lexical(lexical, allocator), + lang_tag(lang_tag, allocator), + hash_(View(*this).hash()) {} + MetallLiteralBackend::MetallLiteralBackend(rdf4cpp::rdf::storage::node::view::LiteralBackendView view, metall_manager::allocator_type const &allocator) noexcept + : datatype_id_(view.datatype_id), + lexical(view.lexical_form, allocator), + lang_tag(view.language_tag, allocator), + hash_(View(*this).hash()) {} + std::string_view MetallLiteralBackend::language_tag() const noexcept { + return lang_tag; + } + const rdf4cpp::rdf::storage::node::identifier::NodeID &MetallLiteralBackend::datatype_id() const noexcept { + return datatype_id_; + } + std::string_view MetallLiteralBackend::lexical_form() const noexcept { + return lexical; + } + MetallLiteralBackend::operator rdf4cpp::rdf::storage::node::view::LiteralBackendView() const noexcept { + return {.datatype_id = datatype_id(), + .lexical_form = lexical_form(), + .language_tag = language_tag()}; + } + +}// namespace dice::node_store diff --git a/libs/node-store/src/dice/node-store/MetallLiteralBackend.hpp b/libs/node-store/src/dice/node-store/MetallLiteralBackend.hpp new file mode 100644 index 00000000..e991198e --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallLiteralBackend.hpp @@ -0,0 +1,36 @@ +#ifndef RDF4CPP_METALLLITERALBACKEND_HPP +#define RDF4CPP_METALLLITERALBACKEND_HPP + +#include + +#include + +namespace dice::node_store { + + class MetallLiteralBackend { + rdf4cpp::rdf::storage::node::identifier::NodeID datatype_id_; + metall_string lexical; + metall_string lang_tag; + size_t hash_; + + public: + using View = rdf4cpp::rdf::storage::node::view::LiteralBackendView; + + MetallLiteralBackend(std::string_view lexical, const rdf4cpp::rdf::storage::node::identifier::NodeID &datatype_id, std::string_view lang_tag, metall_manager::allocator_type const &allocator) noexcept; + MetallLiteralBackend(rdf4cpp::rdf::storage::node::view::LiteralBackendView view, metall_manager::allocator_type const &allocator) noexcept; + + [[nodiscard]] std::string_view lexical_form() const noexcept; + + [[nodiscard]] const rdf4cpp::rdf::storage::node::identifier::NodeID &datatype_id() const noexcept; + + [[nodiscard]] std::string_view language_tag() const noexcept; + + [[nodiscard]] size_t hash() const noexcept { return hash_; } + + explicit operator rdf4cpp::rdf::storage::node::view::LiteralBackendView() const noexcept; + }; + +}// namespace dice::node_store + + +#endif//RDF4CPP_METALLLITERALBACKEND_HPP diff --git a/libs/node-store/src/dice/node-store/MetallNodeTypeStorage.hpp b/libs/node-store/src/dice/node-store/MetallNodeTypeStorage.hpp new file mode 100644 index 00000000..e08bbe50 --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallNodeTypeStorage.hpp @@ -0,0 +1,70 @@ +#ifndef RDF4CPP_METALLNODETYPESTORAGE_HPP +#define RDF4CPP_METALLNODETYPESTORAGE_HPP + +#include + + +#include + +#include + + +namespace dice::node_store { + /** + * Storage for one of the Node Backend types. Includes a shared mutex to synchronize access and bidirectional mappings between the Backend type and identifier::NodeID. + * @tparam BackendType_t one of BNodeBackend, IRIBackend, LiteralBackend and VariableBackend. + */ + template + struct MetallNodeTypeStorage { + using allocator_type = rdf_tensor::allocator_type; + using Backend = BackendType_t; + using Backend_allocator_type = metall_manager::allocator_type; + using Backend_ptr = typename Backend_allocator_type::pointer; + using BackendView = typename Backend::View; + struct BackendTypeHash { + [[nodiscard]] size_t operator()(Backend_ptr const &x) const noexcept { + return x->hash(); + } + [[nodiscard]] size_t operator()(BackendView const &x) const noexcept { + return x.hash(); + } + }; + + struct BackendTypeEqual { + using is_transparent = void; + + bool operator()(Backend_ptr const &lhs, Backend_ptr const &rhs) const noexcept { + if (bool(lhs) and bool(rhs)) + return lhs.get() == rhs.get(); + else + return bool(lhs) == bool(rhs); + } + bool operator()(BackendView const &lhs, Backend_ptr const &rhs) const noexcept { + if (rhs) + return lhs == BackendView(*rhs); + else + return false; + } + }; + + struct NodeIDHash { + [[nodiscard]] size_t operator()(rdf4cpp::rdf::storage::node::identifier::NodeID const &x) const noexcept { + return x.value(); + } + }; + + mutable std::shared_mutex mutex; + dice::sparse_map::sparse_map, + metall_manager::allocator_type>> + id2data; + dice::sparse_map::sparse_map>> + data2id; + + Backend_allocator_type backend_allocator; + + explicit MetallNodeTypeStorage(rdf_tensor::allocator_type const &alloc) : mutex(), id2data(alloc), data2id(alloc), backend_allocator(alloc) {} + }; +}// namespace dice::node_store + +#endif//RDF4CPP_METALLNODETYPESTORAGE_HPP diff --git a/libs/node-store/src/dice/node-store/MetallVariableBackend.cpp b/libs/node-store/src/dice/node-store/MetallVariableBackend.cpp new file mode 100644 index 00000000..56667f25 --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallVariableBackend.cpp @@ -0,0 +1,24 @@ +#include "MetallVariableBackend.hpp" +#include + +namespace dice::node_store { + + MetallVariableBackend::MetallVariableBackend(std::string_view name, bool anonymous, metall_manager::allocator_type const &allocator) noexcept + : name_(name, allocator), + anonymous_(anonymous), + hash_(View(*this).hash()) {} + MetallVariableBackend::MetallVariableBackend(rdf4cpp::rdf::storage::node::view::VariableBackendView view, metall_manager::allocator_type const &allocator) noexcept + : name_(view.name, allocator), + anonymous_(view.is_anonymous), + hash_(View(*this).hash()) {} + bool MetallVariableBackend::is_anonymous() const noexcept { + return anonymous_; + } + std::string_view MetallVariableBackend::name() const noexcept { + return name_; + } + MetallVariableBackend::operator rdf4cpp::rdf::storage::node::view::VariableBackendView() const noexcept { + return {.name = name(), + .is_anonymous = is_anonymous()}; + } +}// namespace dice::node_store diff --git a/libs/node-store/src/dice/node-store/MetallVariableBackend.hpp b/libs/node-store/src/dice/node-store/MetallVariableBackend.hpp new file mode 100644 index 00000000..033b5c7c --- /dev/null +++ b/libs/node-store/src/dice/node-store/MetallVariableBackend.hpp @@ -0,0 +1,33 @@ +#ifndef RDF4CPP_METALLVARIABLEBACKEND_HPP +#define RDF4CPP_METALLVARIABLEBACKEND_HPP + +#include + +#include + +namespace dice::node_store { + + class MetallVariableBackend { + metall_string name_; + bool anonymous_; + size_t hash_; + + public: + using View = rdf4cpp::rdf::storage::node::view::VariableBackendView; + + explicit MetallVariableBackend(std::string_view name, bool anonymous, metall_manager::allocator_type const &allocator) noexcept; + explicit MetallVariableBackend(rdf4cpp::rdf::storage::node::view::VariableBackendView, metall_manager::allocator_type const &allocator) noexcept; + + [[nodiscard]] bool is_anonymous() const noexcept; + + [[nodiscard]] std::string_view name() const noexcept; + + [[nodiscard]] size_t hash() const noexcept { return hash_; } + + explicit operator rdf4cpp::rdf::storage::node::view::VariableBackendView() const noexcept; + }; + +}// namespace dice::node_store + + +#endif//RDF4CPP_METALLVARIABLEBACKEND_HPP diff --git a/libs/node-store/src/dice/node-store/PersistentNodeStorageBackend.cpp b/libs/node-store/src/dice/node-store/PersistentNodeStorageBackend.cpp new file mode 100644 index 00000000..502b106e --- /dev/null +++ b/libs/node-store/src/dice/node-store/PersistentNodeStorageBackend.cpp @@ -0,0 +1,54 @@ +#include "PersistentNodeStorageBackend.hpp" +namespace dice::node_store { + + PersistentNodeStorageBackend::PersistentNodeStorageBackend(PersistentNodeStorageBackendImpl *impl) + : INodeStorageBackend(), impl_(impl) {} + rdf4cpp::rdf::storage::node::identifier::NodeID PersistentNodeStorageBackend::find_or_make_id(const rdf4cpp::rdf::storage::node::view::BNodeBackendView &view) noexcept { + return impl_->find_or_make_id(view); + } + rdf4cpp::rdf::storage::node::identifier::NodeID PersistentNodeStorageBackend::find_or_make_id(const rdf4cpp::rdf::storage::node::view::IRIBackendView &view) noexcept { + return impl_->find_or_make_id(view); + } + rdf4cpp::rdf::storage::node::identifier::NodeID PersistentNodeStorageBackend::find_or_make_id(const rdf4cpp::rdf::storage::node::view::LiteralBackendView &view) noexcept { + return impl_->find_or_make_id(view); + } + rdf4cpp::rdf::storage::node::identifier::NodeID PersistentNodeStorageBackend::find_or_make_id(const rdf4cpp::rdf::storage::node::view::VariableBackendView &view) noexcept { + return impl_->find_or_make_id(view); + } + rdf4cpp::rdf::storage::node::identifier::NodeID PersistentNodeStorageBackend::find_id(const rdf4cpp::rdf::storage::node::view::BNodeBackendView &view) const noexcept { + return impl_->find_id(view); + } + rdf4cpp::rdf::storage::node::identifier::NodeID PersistentNodeStorageBackend::find_id(const rdf4cpp::rdf::storage::node::view::IRIBackendView &view) const noexcept { + return impl_->find_id(view); + } + rdf4cpp::rdf::storage::node::identifier::NodeID PersistentNodeStorageBackend::find_id(const rdf4cpp::rdf::storage::node::view::LiteralBackendView &view) const noexcept { + return impl_->find_id(view); + } + rdf4cpp::rdf::storage::node::identifier::NodeID PersistentNodeStorageBackend::find_id(const rdf4cpp::rdf::storage::node::view::VariableBackendView &view) const noexcept { + return impl_->find_id(view); + } + rdf4cpp::rdf::storage::node::view::IRIBackendView PersistentNodeStorageBackend::find_iri_backend_view(rdf4cpp::rdf::storage::node::identifier::NodeID id) const { + return impl_->find_iri_backend_view(id); + } + rdf4cpp::rdf::storage::node::view::LiteralBackendView PersistentNodeStorageBackend::find_literal_backend_view(rdf4cpp::rdf::storage::node::identifier::NodeID id) const { + return impl_->find_literal_backend_view(id); + } + rdf4cpp::rdf::storage::node::view::BNodeBackendView PersistentNodeStorageBackend::find_bnode_backend_view(rdf4cpp::rdf::storage::node::identifier::NodeID id) const { + return impl_->find_bnode_backend_view(id); + } + rdf4cpp::rdf::storage::node::view::VariableBackendView PersistentNodeStorageBackend::find_variable_backend_view(rdf4cpp::rdf::storage::node::identifier::NodeID id) const { + return impl_->find_variable_backend_view(id); + } + bool PersistentNodeStorageBackend::erase_iri([[maybe_unused]] rdf4cpp::rdf::storage::node::identifier::NodeID id) const { + throw std::runtime_error{"Not implemented."}; + } + bool PersistentNodeStorageBackend::erase_literal([[maybe_unused]] rdf4cpp::rdf::storage::node::identifier::NodeID id) const { + throw std::runtime_error{"Not implemented."}; + } + bool PersistentNodeStorageBackend::erase_bnode([[maybe_unused]] rdf4cpp::rdf::storage::node::identifier::NodeID id) const { + throw std::runtime_error{"Not implemented."}; + } + bool PersistentNodeStorageBackend::erase_variable([[maybe_unused]] rdf4cpp::rdf::storage::node::identifier::NodeID id) const { + throw std::runtime_error{"Not implemented."}; + } +}// namespace dice::node_store \ No newline at end of file diff --git a/libs/node-store/src/dice/node-store/PersistentNodeStorageBackend.hpp b/libs/node-store/src/dice/node-store/PersistentNodeStorageBackend.hpp new file mode 100644 index 00000000..cde59549 --- /dev/null +++ b/libs/node-store/src/dice/node-store/PersistentNodeStorageBackend.hpp @@ -0,0 +1,35 @@ +#ifndef TENTRIS_PERSISTENTNODESTORAGEBACKEND_HPP +#define TENTRIS_PERSISTENTNODESTORAGEBACKEND_HPP + +#include "dice/node-store/PersistentNodeStorageBackendImpl.hpp" + +namespace dice::node_store { + + class PersistentNodeStorageBackend : public rdf4cpp::rdf::storage::node::INodeStorageBackend { + PersistentNodeStorageBackendImpl *impl_; + + public: + explicit PersistentNodeStorageBackend(PersistentNodeStorageBackendImpl *impl); + + ~PersistentNodeStorageBackend() override = default; + + rdf4cpp::rdf::storage::node::identifier::NodeID find_or_make_id(const rdf4cpp::rdf::storage::node::view::BNodeBackendView &view) noexcept override; + rdf4cpp::rdf::storage::node::identifier::NodeID find_or_make_id(const rdf4cpp::rdf::storage::node::view::IRIBackendView &view) noexcept override; + rdf4cpp::rdf::storage::node::identifier::NodeID find_or_make_id(const rdf4cpp::rdf::storage::node::view::LiteralBackendView &view) noexcept override; + rdf4cpp::rdf::storage::node::identifier::NodeID find_or_make_id(const rdf4cpp::rdf::storage::node::view::VariableBackendView &view) noexcept override; + rdf4cpp::rdf::storage::node::identifier::NodeID find_id(const rdf4cpp::rdf::storage::node::view::BNodeBackendView &view) const noexcept override; + rdf4cpp::rdf::storage::node::identifier::NodeID find_id(const rdf4cpp::rdf::storage::node::view::IRIBackendView &view) const noexcept override; + rdf4cpp::rdf::storage::node::identifier::NodeID find_id(const rdf4cpp::rdf::storage::node::view::LiteralBackendView &view) const noexcept override; + rdf4cpp::rdf::storage::node::identifier::NodeID find_id(const rdf4cpp::rdf::storage::node::view::VariableBackendView &view) const noexcept override; + rdf4cpp::rdf::storage::node::view::IRIBackendView find_iri_backend_view(rdf4cpp::rdf::storage::node::identifier::NodeID id) const override; + rdf4cpp::rdf::storage::node::view::LiteralBackendView find_literal_backend_view(rdf4cpp::rdf::storage::node::identifier::NodeID id) const override; + rdf4cpp::rdf::storage::node::view::BNodeBackendView find_bnode_backend_view(rdf4cpp::rdf::storage::node::identifier::NodeID id) const override; + rdf4cpp::rdf::storage::node::view::VariableBackendView find_variable_backend_view(rdf4cpp::rdf::storage::node::identifier::NodeID id) const override; + bool erase_iri(rdf4cpp::rdf::storage::node::identifier::NodeID id) const override; + bool erase_literal(rdf4cpp::rdf::storage::node::identifier::NodeID id) const override; + bool erase_bnode(rdf4cpp::rdf::storage::node::identifier::NodeID id) const override; + bool erase_variable(rdf4cpp::rdf::storage::node::identifier::NodeID id) const override; + }; +}// namespace dice::node_store + +#endif//TENTRIS_PERSISTENTNODESTORAGEBACKEND_HPP diff --git a/libs/node-store/src/dice/node-store/PersistentNodeStorageBackendImpl.cpp b/libs/node-store/src/dice/node-store/PersistentNodeStorageBackendImpl.cpp new file mode 100644 index 00000000..d0f9ec20 --- /dev/null +++ b/libs/node-store/src/dice/node-store/PersistentNodeStorageBackendImpl.cpp @@ -0,0 +1,137 @@ +#include "PersistentNodeStorageBackendImpl.hpp" + +#include +#include +#include + +namespace dice::node_store { + using namespace rdf4cpp::rdf::storage::node; + PersistentNodeStorageBackendImpl::PersistentNodeStorageBackendImpl(metall_manager::allocator_type const &allocator) + : allocator(allocator), + bnode_storage_(allocator), + iri_storage_(allocator), + literal_storage_(allocator), + variable_storage_(allocator) { + // some iri's like xsd:string are there by default + using namespace rdf4cpp::rdf; + for (const auto &[iri, id] : rdf4cpp::rdf::datatypes::registry::reserved_datatype_ids) { + auto mem = iri_storage_.backend_allocator.allocate(1); + iri_storage_.backend_allocator.construct(mem, iri, allocator); + auto [iter, inserted_successfully] = iri_storage_.data2id.emplace(mem, id.to_underlying()); + assert(inserted_successfully); + iri_storage_.id2data.emplace(id.to_underlying(), iter->first); + } + } + + /** + * Synchronized lookup (and creation) of IDs by a provided view of a Node Backend. + * @tparam Backend_t the Backend type. One of BNodeBackend, IRIBackend, LiteralBackend or VariableBackend + * @tparam create_if_not_present enables code for creating non-existing Node Backends + * @tparam NextIDFromView_func type of a function to generate the next ID which is assigned in case a new Node Backend is created + * @param view contains the data of the requested Node Backend + * @param storage the storage where the Node Backend is looked up + * @param next_id_func function to generate the next ID which is assigned in case a new Node Backend is created + * @return the NodeID for the looked up Node Backend. Result is null() if there was no matching Node Backend. + */ + template + inline identifier::NodeID lookup_or_insert_impl(typename Backend_t::View const &view, + auto &storage, + NextIDFromView_func next_id_func = nullptr) noexcept { + std::shared_lock shared_lock{storage.mutex}; + auto found = storage.data2id.find(view); + if (found == storage.data2id.end()) { + if constexpr (create_if_not_present) { + shared_lock.unlock(); + std::unique_lock unique_lock{storage.mutex}; + // update found (might have changed in the meantime) + found = storage.data2id.find(view); + if (found == storage.data2id.end()) { + identifier::NodeID id = next_id_func(view); + auto mem = storage.backend_allocator.allocate(1); + storage.backend_allocator.construct(mem, view, storage.backend_allocator); + auto [found2, inserted_successfully] = storage.data2id.emplace(mem, id); + assert(inserted_successfully); + storage.id2data.emplace(id, found2->first); + return id; + } else { + unique_lock.unlock(); + return found->second; + } + } else { + return {}; + } + } else { + shared_lock.unlock(); + return found->second; + } + } + + identifier::NodeID PersistentNodeStorageBackendImpl::find_or_make_id(view::LiteralBackendView const &view) noexcept { + return lookup_or_insert_impl( + view, literal_storage_, + [this]([[maybe_unused]] view::LiteralBackendView const &literal_view) { + return identifier::NodeID{next_literal_id++, + identifier::iri_node_id_to_literal_type(literal_view.datatype_id)}; + }); + } + + identifier::NodeID PersistentNodeStorageBackendImpl::find_or_make_id(view::IRIBackendView const &view) noexcept { + return lookup_or_insert_impl( + view, iri_storage_, + [this]([[maybe_unused]] view::IRIBackendView const &view) { + return next_iri_id++; + }); + } + + identifier::NodeID PersistentNodeStorageBackendImpl::find_or_make_id(view::BNodeBackendView const &view) noexcept { + return lookup_or_insert_impl( + view, bnode_storage_, + [this]([[maybe_unused]] view::BNodeBackendView const &view) { + return next_bnode_id++; + }); + } + identifier::NodeID PersistentNodeStorageBackendImpl::find_or_make_id(view::VariableBackendView const &view) noexcept { + return lookup_or_insert_impl( + view, variable_storage_, + [this]([[maybe_unused]] view::VariableBackendView const &view) { + return next_variable_id++; + }); + } + + identifier::NodeID PersistentNodeStorageBackendImpl::find_id(const view::BNodeBackendView &view) const noexcept { + return lookup_or_insert_impl( + view, bnode_storage_); + } + identifier::NodeID PersistentNodeStorageBackendImpl::find_id(const view::IRIBackendView &view) const noexcept { + return lookup_or_insert_impl( + view, iri_storage_); + } + identifier::NodeID PersistentNodeStorageBackendImpl::find_id(const view::LiteralBackendView &view) const noexcept { + return lookup_or_insert_impl( + view, literal_storage_); + } + identifier::NodeID PersistentNodeStorageBackendImpl::find_id(const view::VariableBackendView &view) const noexcept { + return lookup_or_insert_impl( + view, variable_storage_); + } + + template + typename NodeTypeStorage::BackendView find_backend_view(NodeTypeStorage &storage, identifier::NodeID id) { + std::shared_lock shared_lock{storage.mutex}; + return typename NodeTypeStorage::BackendView(*storage.id2data.at(id)); + } + + view::IRIBackendView PersistentNodeStorageBackendImpl::find_iri_backend_view(identifier::NodeID id) const { + return find_backend_view(iri_storage_, id); + } + view::LiteralBackendView PersistentNodeStorageBackendImpl::find_literal_backend_view(identifier::NodeID id) const { + return find_backend_view(literal_storage_, id); + } + view::BNodeBackendView PersistentNodeStorageBackendImpl::find_bnode_backend_view(identifier::NodeID id) const { + return find_backend_view(bnode_storage_, id); + } + view::VariableBackendView PersistentNodeStorageBackendImpl::find_variable_backend_view(identifier::NodeID id) const { + return find_backend_view(variable_storage_, id); + } + +}// namespace dice::node_store \ No newline at end of file diff --git a/libs/node-store/src/dice/node-store/PersistentNodeStorageBackendImpl.hpp b/libs/node-store/src/dice/node-store/PersistentNodeStorageBackendImpl.hpp new file mode 100644 index 00000000..777fac64 --- /dev/null +++ b/libs/node-store/src/dice/node-store/PersistentNodeStorageBackendImpl.hpp @@ -0,0 +1,70 @@ +#ifndef TENTRIS_PERSISTENTNODESTORAGEBACKENDIMPL_HPP +#define TENTRIS_PERSISTENTNODESTORAGEBACKENDIMPL_HPP + +#include +#include + +#include +#include + +#include "dice/node-store/MetallBNodeBackend.hpp" +#include "dice/node-store/MetallIRIBackend.hpp" +#include "dice/node-store/MetallLiteralBackend.hpp" +#include "dice/node-store/MetallNodeTypeStorage.hpp" +#include "dice/node-store/MetallVariableBackend.hpp" + + +namespace dice::node_store { + + class PersistentNodeStorageBackendImpl { + using RDFNodeType = rdf4cpp::rdf::storage::node::identifier::RDFNodeType; + using NodeID = rdf4cpp::rdf::storage::node::identifier::NodeID; + using LiteralType = rdf4cpp::rdf::storage::node::identifier::LiteralType; + using LiteralID = rdf4cpp::rdf::storage::node::identifier::LiteralID; + using LiteralBackendView = rdf4cpp::rdf::storage::node::view::LiteralBackendView; + using BNodeBackendView = rdf4cpp::rdf::storage::node::view::BNodeBackendView; + using IRIBackendView = rdf4cpp::rdf::storage::node::view::IRIBackendView; + using VariableBackendView = rdf4cpp::rdf::storage::node::view::VariableBackendView; + + public: + template + using pointer = typename metall_manager::allocator_type::pointer; + + private: + metall_manager::allocator_type allocator; + MetallNodeTypeStorage bnode_storage_; + MetallNodeTypeStorage iri_storage_; + MetallNodeTypeStorage literal_storage_; + MetallNodeTypeStorage variable_storage_; + + constexpr static rdf4cpp::rdf::storage::node::identifier::NodeStorageID manager_id = rdf4cpp::rdf::storage::node::identifier::NodeStorageID{0}; + + LiteralID next_literal_id = NodeID::min_literal_id; + NodeID next_bnode_id = NodeID::min_bnode_id; + NodeID next_iri_id = NodeID::min_iri_id; + NodeID next_variable_id = NodeID::min_variable_id; + + + public: + explicit PersistentNodeStorageBackendImpl(metall_manager::allocator_type const &allocator); + + + [[nodiscard]] NodeID find_or_make_id(BNodeBackendView const &) noexcept; + [[nodiscard]] NodeID find_or_make_id(IRIBackendView const &) noexcept; + [[nodiscard]] NodeID find_or_make_id(LiteralBackendView const &) noexcept; + [[nodiscard]] NodeID find_or_make_id(VariableBackendView const &) noexcept; + + [[nodiscard]] NodeID find_id(BNodeBackendView const &) const noexcept; + [[nodiscard]] NodeID find_id(IRIBackendView const &) const noexcept; + [[nodiscard]] NodeID find_id(LiteralBackendView const &) const noexcept; + [[nodiscard]] NodeID find_id(VariableBackendView const &) const noexcept; + + [[nodiscard]] IRIBackendView find_iri_backend_view(NodeID id) const; + [[nodiscard]] LiteralBackendView find_literal_backend_view(NodeID id) const; + [[nodiscard]] BNodeBackendView find_bnode_backend_view(NodeID id) const; + [[nodiscard]] VariableBackendView find_variable_backend_view(NodeID id) const; + }; + +}// namespace dice::node_store + +#endif//TENTRIS_PERSISTENTNODESTORAGEBACKENDIMPL_HPP diff --git a/libs/node-store/src/dice/node-store/metall_manager.hpp b/libs/node-store/src/dice/node-store/metall_manager.hpp new file mode 100644 index 00000000..b0ee449e --- /dev/null +++ b/libs/node-store/src/dice/node-store/metall_manager.hpp @@ -0,0 +1,19 @@ +#ifndef TENTRIS_METALL_ALLOCATOR_HPP +#define TENTRIS_METALL_ALLOCATOR_HPP + + +#ifndef BOOST_BIND_GLOBAL_PLACEHOLDERS +#define BOOST_BIND_GLOBAL_PLACEHOLDERS +#endif + +#include + +#include + + +namespace dice::node_store { + + using metall_manager = rdf_tensor::metall_manager; + using metall_string = metall::container::basic_string, metall_manager::allocator_type>; +} +#endif//TENTRIS_METALL_ALLOCATOR_HPP diff --git a/libs/rdf-tensor/CMakeLists.txt b/libs/rdf-tensor/CMakeLists.txt new file mode 100644 index 00000000..b8fa2fce --- /dev/null +++ b/libs/rdf-tensor/CMakeLists.txt @@ -0,0 +1,25 @@ +# Find cmake packages +find_package(Boost REQUIRED) +find_package(hypertrie REQUIRED) +find_package(Metall REQUIRED) +find_package(rdf4cpp REQUIRED) +find_package(dice-hash REQUIRED) + +# Define the library +add_library(rdf-tensor INTERFACE) +add_library(tentris::rdf-tensor ALIAS rdf-tensor) +target_link_libraries(rdf-tensor INTERFACE + rdf4cpp::rdf4cpp + hypertrie::hypertrie + Metall::Metall + Boost::headers + dice-hash::dice-hash + ) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/version.hpp.in ${CMAKE_CURRENT_SOURCE_DIR}/src/dice/tentris/tentris_version.hpp) +target_include_directories(rdf-tensor INTERFACE + $ + ) + +include(${PROJECT_SOURCE_DIR}/cmake/install_library.cmake) +install_interface_component(rdf-tensor src) \ No newline at end of file diff --git a/libs/rdf-tensor/cmake/version.hpp.in b/libs/rdf-tensor/cmake/version.hpp.in new file mode 100644 index 00000000..84f14add --- /dev/null +++ b/libs/rdf-tensor/cmake/version.hpp.in @@ -0,0 +1,13 @@ +#ifndef TENTRIS_VERSION_HPP +#define TENTRIS_VERSION_HPP + +#include + +namespace dice::tentris { + inline constexpr const char name[] = "tentris"; + inline constexpr const char version[] = "@PROJECT_VERSION@"; + inline constexpr std::array version_tuple = {@PROJECT_VERSION_MAJOR@, @PROJECT_VERSION_MINOR@, @PROJECT_VERSION_PATCH@}; + inline constexpr const char rdf4cpp_version[] = "@rdf4cpp_VERSION@"; +}// namespace dice::tentris + +#endif//TENTRIS_VERSION_HPP diff --git a/libs/rdf-tensor/src/dice/rdf-tensor/HypertrieTrait.hpp b/libs/rdf-tensor/src/dice/rdf-tensor/HypertrieTrait.hpp new file mode 100644 index 00000000..296f46bd --- /dev/null +++ b/libs/rdf-tensor/src/dice/rdf-tensor/HypertrieTrait.hpp @@ -0,0 +1,42 @@ +#ifndef TENTRIS_HYPERTRIETRAIT_HPP +#define TENTRIS_HYPERTRIETRAIT_HPP + +#include "dice/rdf-tensor/NodeWrapper.hpp" + +#include +#include +#include + +namespace dice::rdf_tensor { + using key_part_type = NodeWrapper; + template + using map_type = dice::sparse_map::sparse_map, + std::equal_to, + typename std::allocator_traits::template rebind_alloc>, + dice::sparse_map::sh::power_of_two_growth_policy<2>, + dice::sparse_map::sh::exception_safety::basic, + dice::sparse_map::sh::sparsity::high>; + + template + using set_type = dice::sparse_map::sparse_set< + Key, + dice::hash::DiceHashMartinus, + std::equal_to, + typename std::allocator_traits::template rebind_alloc, + dice::sparse_map::sh::power_of_two_growth_policy<2>, + dice::sparse_map::sh::exception_safety::basic, + dice::sparse_map::sh::sparsity::high>; + + using htt_t = dice::hypertrie::Hypertrie_trait; + + using SliceKey = dice::hypertrie::SliceKey; + using Key = dice::hypertrie::Key; + using NonZeroEntry = dice::hypertrie::NonZeroEntry; +}// namespace dice::rdf-tensor +#endif//TENTRIS_HYPERTRIETRAIT_HPP diff --git a/libs/rdf-tensor/src/dice/rdf-tensor/NodeWrapper.hpp b/libs/rdf-tensor/src/dice/rdf-tensor/NodeWrapper.hpp new file mode 100644 index 00000000..50d33dc8 --- /dev/null +++ b/libs/rdf-tensor/src/dice/rdf-tensor/NodeWrapper.hpp @@ -0,0 +1,52 @@ +#ifndef TENTRIS_NODEWRAPPER_HPP +#define TENTRIS_NODEWRAPPER_HPP + +#include +#include +//#include + +namespace dice::rdf_tensor { + using namespace rdf4cpp::rdf; + + class NodeWrapper : public Node { + protected: + explicit NodeWrapper(NodeBackendHandle id) noexcept : Node(id) {} + + public: + NodeWrapper() noexcept = default; + + NodeWrapper(Node node) noexcept : Node(node) {} + + bool operator==(const NodeWrapper &other) const noexcept { + return this->backend_handle().raw() == other.backend_handle().raw(); + } + + bool operator!=(const NodeWrapper &other) const noexcept { + return this->backend_handle().raw() != other.backend_handle().raw(); + } + + auto operator<=>(const NodeWrapper &other) const noexcept { + return this->backend_handle().raw() <=> other.backend_handle().raw(); + }; + + operator std::optional() const noexcept { + return (Node) * this; + }; + }; +};// namespace dice::rdf-tensor + +template +struct dice::hash::dice_hash_overload { + inline static std::size_t dice_hash(dice::rdf_tensor::NodeWrapper const &x) noexcept { + return Policy::hash_fundamental(x.backend_handle().raw()); + } +}; + +template<> +struct std::hash { + size_t operator()(dice::rdf_tensor::NodeWrapper const &x) const noexcept { + return x.backend_handle().raw(); + } +}; + +#endif//TENTRIS_NODEWRAPPER_HPP diff --git a/libs/rdf-tensor/src/dice/rdf-tensor/Query.hpp b/libs/rdf-tensor/src/dice/rdf-tensor/Query.hpp new file mode 100644 index 00000000..4cc500d5 --- /dev/null +++ b/libs/rdf-tensor/src/dice/rdf-tensor/Query.hpp @@ -0,0 +1,16 @@ +#ifndef TENTRIS_QUERY_HPP +#define TENTRIS_QUERY_HPP + +#include "dice/rdf-tensor/HypertrieTrait.hpp" +#include "dice/rdf-tensor/metall_manager.hpp" +#include + +namespace dice::rdf_tensor { + using COUNTED_t = std::size_t; + using Entry = dice::query::Entry; + using DISTINCT_t = bool; + using UncountedEntry = dice::query::Entry; + using Query = dice::query::Query; +}// namespace dice::rdf-tensor + +#endif//TENTRIS_QUERY_HPP diff --git a/libs/rdf-tensor/src/dice/rdf-tensor/RDFNodeHashes.hpp b/libs/rdf-tensor/src/dice/rdf-tensor/RDFNodeHashes.hpp new file mode 100644 index 00000000..d7c346bd --- /dev/null +++ b/libs/rdf-tensor/src/dice/rdf-tensor/RDFNodeHashes.hpp @@ -0,0 +1,43 @@ +#ifndef TENTRIS_NODEHASHES_HPP +#define TENTRIS_NODEHASHES_HPP + +#include +#include + +namespace dice::hash { + template + struct dice_hash_overload { + inline static std::size_t dice_hash(rdf4cpp::rdf::Node const &x) noexcept { + return Policy::hash_fundamental(x.backend_handle().raw()); + } + }; + + template + struct dice_hash_overload { + inline static std::size_t dice_hash(rdf4cpp::rdf::query::Variable const &x) noexcept { + return Policy::hash_fundamental(x.backend_handle().raw()); + } + }; + + template + struct dice_hash_overload { + inline static std::size_t dice_hash(rdf4cpp::rdf::Literal const &x) noexcept { + return Policy::hash_fundamental(x.backend_handle().raw()); + } + }; + + template + struct dice_hash_overload { + inline static std::size_t dice_hash(rdf4cpp::rdf::IRI const &x) noexcept { + return Policy::hash_fundamental(x.backend_handle().raw()); + } + }; + + template + struct dice_hash_overload { + inline static std::size_t dice_hash(rdf4cpp::rdf::BlankNode const &x) noexcept { + return Policy::hash_fundamental(x.backend_handle().raw()); + } + }; +}// namespace dice::hash +#endif//TENTRIS_NODEHASHES_HPP diff --git a/libs/rdf-tensor/src/dice/rdf-tensor/RDFTensor.hpp b/libs/rdf-tensor/src/dice/rdf-tensor/RDFTensor.hpp new file mode 100644 index 00000000..040468e9 --- /dev/null +++ b/libs/rdf-tensor/src/dice/rdf-tensor/RDFTensor.hpp @@ -0,0 +1,14 @@ +#ifndef TENTRIS_RDFTENSOR_HPP +#define TENTRIS_RDFTENSOR_HPP + +#include "dice/rdf-tensor/HypertrieTrait.hpp" +#include "dice/rdf-tensor/metall_manager.hpp" + +namespace dice::rdf_tensor { + using HypertrieContext = dice::hypertrie::HypertrieContext; + using BoolHypertrie = dice::hypertrie::Hypertrie; + using const_BoolHypertrie = dice::hypertrie::const_Hypertrie; + using HypertrieBulkInserter = dice::hypertrie::BulkInserter; + using HypertrieContext_ptr = dice::hypertrie::HypertrieContext_ptr; +}// namespace dice::rdf-tensor +#endif//TENTRIS_RDFTENSOR_HPP diff --git a/libs/rdf-tensor/src/dice/rdf-tensor/metall_manager.hpp b/libs/rdf-tensor/src/dice/rdf-tensor/metall_manager.hpp new file mode 100644 index 00000000..99d032f9 --- /dev/null +++ b/libs/rdf-tensor/src/dice/rdf-tensor/metall_manager.hpp @@ -0,0 +1,14 @@ +#ifndef TENTRIS_METALL_MANAGER_HPP +#define TENTRIS_METALL_MANAGER_HPP + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnull-pointer-arithmetic" +#include +#pragma GCC diagnostic pop + +namespace dice::rdf_tensor { + using metall_manager = metall::basic_manager; + using allocator_type = metall_manager::allocator_type; +}// namespace dice::rdf-tensor + +#endif//TENTRIS_METALL_MANAGER_HPP diff --git a/libs/sparql2tensor/CMakeLists.txt b/libs/sparql2tensor/CMakeLists.txt new file mode 100644 index 00000000..6ade03a2 --- /dev/null +++ b/libs/sparql2tensor/CMakeLists.txt @@ -0,0 +1,40 @@ +# Find cmake packages +find_package(sparql-parser-base REQUIRED) +find_package(robin_hood REQUIRED) +find_package(dice-hash REQUIRED) + +# Define the library +add_library(sparql2tensor + src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.cpp + src/dice/sparql2tensor/parser/visitors/PrologueVisitor.cpp + src/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.cpp + src/dice/sparql2tensor/SPARQLQuery.cpp + src/dice/sparql2tensor/UPDATEQuery.cpp + ) + +add_library(tentris::sparql2tensor ALIAS sparql2tensor) + +target_include_directories(sparql2tensor PUBLIC + $ + PRIVATE + private-include + ) + +target_link_libraries(sparql2tensor PUBLIC + tentris::rdf-tensor + robin_hood::robin_hood + PRIVATE + sparql-parser-base::sparql-parser-base + ) + + +set_target_properties(sparql2tensor PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO + ) + +include(${PROJECT_SOURCE_DIR}/cmake/install_library.cmake) +install_component(sparql2tensor src) diff --git a/libs/sparql2tensor/private-include/dice/sparql2tensor/parser/visitors/PrologueVisitor.hpp b/libs/sparql2tensor/private-include/dice/sparql2tensor/parser/visitors/PrologueVisitor.hpp new file mode 100644 index 00000000..ac85657d --- /dev/null +++ b/libs/sparql2tensor/private-include/dice/sparql2tensor/parser/visitors/PrologueVisitor.hpp @@ -0,0 +1,28 @@ +#ifndef DICE_SPARQL_PROLOGUEVISITOR_HPP +#define DICE_SPARQL_PROLOGUEVISITOR_HPP + +#include + +#include + +#include + + +namespace dice::sparql2tensor::parser::visitors { + + using namespace dice::sparql_parser::base; + + class PrologueVisitor : public SparqlParserBaseVisitor { + rdf4cpp::rdf::parser::IStreamQuadIterator::prefix_storage_type prefixes_; + + public: + std::any visitPrologue(SparqlParser::PrologueContext *) override; + + std::any visitBaseDecl(SparqlParser::BaseDeclContext *) override; + + std::any visitPrefixDecl(SparqlParser::PrefixDeclContext *) override; + }; + +}// namespace dice::sparql2tensor::parser::visitors + +#endif//DICE_SPARQL_PROLOGUEVISITOR_HPP diff --git a/libs/sparql2tensor/private-include/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.hpp b/libs/sparql2tensor/private-include/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.hpp new file mode 100644 index 00000000..f4f5c016 --- /dev/null +++ b/libs/sparql2tensor/private-include/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.hpp @@ -0,0 +1,130 @@ +#ifndef DICE_SPARQL_SELECTASKQUERYVISITOR_HPP +#define DICE_SPARQL_SELECTASKQUERYVISITOR_HPP + +#include + +#include + +#include "dice/sparql2tensor/SPARQLQuery.hpp" + +#include + +namespace dice::sparql2tensor::parser::visitors { + + using namespace dice::sparql_parser::base; + + class SelectAskQueryVisitor : public SparqlParserBaseVisitor { + + private: + SPARQLQuery *const query; + rdf4cpp::rdf::Node active_subject; + rdf4cpp::rdf::Node active_predicate; + char var_id = 'a'; + /* for the construction of the operand dependency graph */ + // stack of group graph patterns + std::vector> group_patterns; + // stack of operands appearing in optional patterns; one vector per graph pattern + std::vector> opt_operands; + // stack of operands appearing in union patterns found in optional patterns; one vector per graph pattern + // it is used to avoid creating cartesian connections between optional operands of the same union pattern + // once an optional sub graph pattern is visited, this vector needs to be cleared + std::vector> union_operands; + /* for the "query rewriting" */ + std::vector> triples_blocks; + std::vector> optional_blocks; + + public: + SelectAskQueryVisitor() = delete; + + explicit SelectAskQueryVisitor(SPARQLQuery *q) : query{q} {} + + std::any visitAskQuery(SparqlParser::AskQueryContext *ctx) override; + + std::any visitSelectQuery(SparqlParser::SelectQueryContext *) override; + + std::any visitSelectClause(SparqlParser::SelectClauseContext *) override; + + std::any visitWhereClause(SparqlParser::WhereClauseContext *) override; + + std::any visitGroupGraphPattern(SparqlParser::GroupGraphPatternContext *) override; + + std::any visitGroupGraphPatternSub(SparqlParser::GroupGraphPatternSubContext *) override; + + std::any visitTriplesBlock(SparqlParser::TriplesBlockContext *) override; + + std::any visitTriplesSameSubjectPath(SparqlParser::TriplesSameSubjectPathContext *) override; + + std::any visitPropertyListPathNotEmpty(SparqlParser::PropertyListPathNotEmptyContext *) override; + + std::any visitVarOrTerm(SparqlParser::VarOrTermContext *) override; + + std::any visitIri(SparqlParser::IriContext *) override; + + std::any visitBlankNode(SparqlParser::BlankNodeContext *) override; + + std::any visitVar(SparqlParser::VarContext *) override; + + std::any visitObjectListPath(SparqlParser::ObjectListPathContext *) override; + + std::any visitObjectList(SparqlParser::ObjectListContext *) override; + + std::any visitObjectPath(SparqlParser::ObjectPathContext *) override; + + std::any visitObject(SparqlParser::ObjectContext *) override; + + std::any visitPath(SparqlParser::PathContext *) override; + + std::any visitPathAlternative(SparqlParser::PathAlternativeContext *) override; + + std::any visitPathSequence(SparqlParser::PathSequenceContext *) override; + + std::any visitPathEltOrInverse(SparqlParser::PathEltOrInverseContext *) override; + + std::any visitPathElt(SparqlParser::PathEltContext *) override; + + std::any visitRdfLiteral(SparqlParser::RdfLiteralContext *) override; + + std::any visitNumericLiteral(SparqlParser::NumericLiteralContext *) override; + + std::any visitBooleanLiteral(SparqlParser::BooleanLiteralContext *) override; + + std::any visitString(SparqlParser::StringContext *) override; + + private: + void register_var(rdf4cpp::rdf::query::Variable const &var); + + /** + * @brief: Creates a new node in the operand dependency graph and the dependencies between + * the new node and the nodes corresponding to triple patterns of the same group graph pattern. + * @param tp A triple pattern + */ + void add_tp(rdf4cpp::rdf::query::TriplePattern const &tp); + + /** + * @brief: Creates dependencies between the nodes (i.e., triple patterns) of differnt group graph patterns. + * @param prev_group The previous group graph pattern. + * @param cur_group The current group graph pattern. + * @param bidirectional Whether the edges should be bidirectional (e.g., in OPTIONAL they are unidirectional). + */ + void group_dependencies(std::vector const &prev_group, std::vector const &cur_group, bool bidirectional = false); + + /** + * @brief: Creates connections between group graph patterns. + * Used to capture cartesian products between different optional group graph patterns. + * @param prev_group The previous group graph pattern. + * @param cur_group The current group graph pattern. + */ + void group_connections(std::vector const &prev_group, std::vector const &cur_group); + + /** + * @brief: A visitor for well-designed SPARQL patterns only. + * @param ctx A GroupGraphPatternSub context. + * @param gou_ctxs A vector of GroupOrUnionGraphPattern contexts. + */ + void visitWellDesignedPattern(SparqlParser::GroupGraphPatternSubContext *ctx, + std::vector gou_ctxs); + }; + +}// namespace dice::sparql2tensor::parser::visitors + +#endif//DICE_SPARQL_SELECTASKQUERYVISITOR_HPP \ No newline at end of file diff --git a/libs/sparql2tensor/src/dice/sparql2tensor/SPARQLQuery.cpp b/libs/sparql2tensor/src/dice/sparql2tensor/SPARQLQuery.cpp new file mode 100644 index 00000000..476da61e --- /dev/null +++ b/libs/sparql2tensor/src/dice/sparql2tensor/SPARQLQuery.cpp @@ -0,0 +1,59 @@ +#include "SPARQLQuery.hpp" + +#include +#include + +#include "dice/sparql2tensor/parser/visitors/PrologueVisitor.hpp" + +#include "dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.hpp" + + +namespace dice::sparql2tensor { + + SPARQLQuery dice::sparql2tensor::SPARQLQuery::parse(std::string const &sparql_query_str) { + antlr4::ANTLRInputStream input(sparql_query_str); + dice::sparql_parser::base::SparqlLexer lexer(&input); + antlr4::CommonTokenStream tokens(&lexer); + dice::sparql_parser::base::SparqlParser parser(&tokens); + + auto q_ctx = parser.query(); + + if (not q_ctx->selectQuery() and not q_ctx->askQuery()) + throw std::runtime_error("Only SELECT & ASK queries are supported currently."); + + SPARQLQuery p_sparql{}; + if (q_ctx->prologue()) { + parser::visitors::PrologueVisitor p_visitor{}; + p_sparql.prefixes_ = std::any_cast>(p_visitor.visitPrologue(q_ctx->prologue())); + } + + parser::visitors::SelectAskQueryVisitor visitor{&p_sparql}; + if (q_ctx->selectQuery()) + visitor.visitSelectQuery(q_ctx->selectQuery()); + else if(q_ctx->askQuery()) + visitor.visitAskQuery(q_ctx->askQuery()); + + return p_sparql; + } + + bool SPARQLQuery::is_distinct() const noexcept { + return distinct_; + } + + std::vector SPARQLQuery::get_slice_keys() const { + std::vector slice_keys; + slice_keys.reserve(triple_patterns_.size()); + for (auto const &tp : triple_patterns_) { + rdf_tensor::SliceKey slice_key; + slice_key.reserve(3); + for (auto const &node : tp) { + if (node.is_variable()) + slice_key.push_back(std::nullopt); + else + slice_key.push_back(node); + } + slice_keys.push_back(std::move(slice_key)); + } + return slice_keys; + } +}// namespace dice::sparql2tensor \ No newline at end of file diff --git a/libs/sparql2tensor/src/dice/sparql2tensor/SPARQLQuery.hpp b/libs/sparql2tensor/src/dice/sparql2tensor/SPARQLQuery.hpp new file mode 100644 index 00000000..ea4f946c --- /dev/null +++ b/libs/sparql2tensor/src/dice/sparql2tensor/SPARQLQuery.hpp @@ -0,0 +1,45 @@ +#ifndef DICE_SPARQL_PARSEDSPARQL_HPP +#define DICE_SPARQL_PARSEDSPARQL_HPP + +#include + +#include +#include +#include +#include + +#include + +namespace dice::sparql2tensor { + + struct SPARQLQuery { + dice::query::OperandDependencyGraph odg_; + + std::vector projected_variables_; + + robin_hood::unordered_map> var_to_id_; + + std::vector triple_patterns_; + + robin_hood::unordered_map prefixes_; + + bool distinct_ = false; + + bool ask_ = false; + + bool project_all_variables_ = false; + + SPARQLQuery() = default; + + static SPARQLQuery parse(std::string const &sparql_query_str); + + SPARQLQuery(std::string const &sparql_query_str) : SPARQLQuery(SPARQLQuery::parse(sparql_query_str)) {} + + [[nodiscard]] bool is_distinct() const noexcept; + + std::vector get_slice_keys() const; + }; + +}// namespace dice::sparql2tensor + +#endif//DICE_SPARQL_PARSEDSPARQL_HPP \ No newline at end of file diff --git a/libs/sparql2tensor/src/dice/sparql2tensor/UPDATEQuery.cpp b/libs/sparql2tensor/src/dice/sparql2tensor/UPDATEQuery.cpp new file mode 100644 index 00000000..c7e16a20 --- /dev/null +++ b/libs/sparql2tensor/src/dice/sparql2tensor/UPDATEQuery.cpp @@ -0,0 +1,191 @@ +#include "UPDATEQuery.hpp" + +#include +#include + +#include +#include + +#include "dice/sparql2tensor/parser/visitors/PrologueVisitor.hpp" +#include "dice/sparql2tensor/parser/exception/SPARQLErrorListener.hpp" + +namespace dice::sparql2tensor { + + static bool is_alpha(char const ch) noexcept { + return std::isalpha(ch); + } + + static bool is_ws(char const ch) noexcept { + return std::isspace(ch); + } + + /** + * @brief reads a single word (determined by matcher) from the start of s + * @param s input string; will be modified to not include the word after extraction + * @param matcher determines the charset the word is made of + * @return the extracted word + */ + template requires std::is_nothrow_invocable_r_v + static std::string_view read_word(std::string_view &s, CharMatcher &&matcher) noexcept { + auto const first_word_begin = std::find_if_not(s.begin(), s.end(), is_ws); + auto const first_word_end = std::find_if_not(first_word_begin, s.end(), std::forward(matcher)); + + auto word = s.substr(std::distance(s.begin(), first_word_begin), std::distance(first_word_begin, first_word_end)); + s.remove_prefix(std::distance(s.begin(), first_word_end)); + + return word; + } + + /** + * @brief reads a single word (determined by matcher) from the end of s + * @param s input string; will be modified to not include the word after extraction + * @param matcher determines the charset the word is made of + * @return the extracted word + */ + template requires std::is_nothrow_invocable_r_v + static std::string_view read_word_rev(std::string_view &s, CharMatcher &&matcher) noexcept { + auto const first_word_rbegin = std::find_if_not(s.rbegin(), s.rend(), is_ws); + auto const first_word_rend = std::find_if_not(first_word_rbegin, s.rend(), std::forward(matcher)); + + auto word = s.substr(std::distance(first_word_rend, s.rend()), std::distance(first_word_rbegin, first_word_rend)); + s.remove_suffix(std::distance(s.rbegin(), first_word_rend)); + + return word; + } + + /** + * @brief extracts the prologue from an update query + * @param s the whole query, will be modified to not include the extracted prologue afterwards + * @return the extracted prologue + */ + static std::string_view read_prologue(std::string_view &s) noexcept { + auto const query_body_begin = s.find_first_of('{'); + if (query_body_begin == std::string_view::npos) { + // body begin not found, error will be handled by calling function + return ""; + } + + auto const prologue_last_char = s.substr(0, query_body_begin).find_last_of('>'); + if (prologue_last_char == std::string_view::npos) { + // no prologue found + return ""; + } + + auto const prologue = s.substr(0, prologue_last_char + 1); + s.remove_prefix(prologue_last_char + 1); + + return prologue; + } + + enum struct QueryType { + INSERT_DATA, + DELETE_DATA, + UNKNOWN, + }; + + /** + * @brief reads the beginning of the actual query (after prologue) and tries to recognize the query type + * @param s the whole query without the prologue, will be modified to not include the query type + * @return the extracted query type + * + * @example + * @code + * std::string_view s = "DELETE DATA { ... }"; + * QueryType const query_type = read_query_type(s); + * + * assert(query_type == QueryType::DELETE_DATA); + * assert(s == " { ... }"); + * @endcode + */ + static QueryType read_query_type(std::string_view &s) noexcept { + auto const first_word = read_word(s, is_alpha); + auto const second_word = read_word(s, is_alpha); + + if (second_word != "DATA") { + return QueryType::UNKNOWN; + } + + if (first_word == "DELETE") { + return QueryType::DELETE_DATA; + } else if (first_word == "INSERT") { + return QueryType::INSERT_DATA; + } + + return QueryType::UNKNOWN; + } + + UPDATEDATAQueryData UPDATEDATAQueryData::parse(std::string_view const sparql_update_str) { + std::string_view rest_mut = sparql_update_str; + auto const prologue = read_prologue(rest_mut); + + UPDATEDATAQueryData update_query; + + // expected structure for fast path: 'prologue... (DELETE|INSERT) DATA { triples... }' + auto const query_type = read_query_type(rest_mut); + auto const third_word = read_word(rest_mut, [](char const ch) noexcept { return ch == '{'; }); + + if (query_type != QueryType::UNKNOWN) { + // fast path for DELETE DATA / INSERT DATA + + if (third_word != "{") { + // missing (or too many) '{' after '(DELETE|INSERT) DATA' + std::ostringstream err; + err << "syntax error: expected '{' after " << (query_type == QueryType::DELETE_DATA ? "DELETE DATA" : "INSERT DATA"); + throw std::runtime_error{err.str()}; + } + + auto const last_word = read_word_rev(rest_mut, [](char const ch) noexcept { return ch == '}'; }); + + if (last_word != "}") { + // closing brace is missing from query + throw std::runtime_error{"syntax error: expected '}' at end of query"}; + } + + using namespace rdf_tensor::parser; + + { // parse only prologue using antlr + parser::exception::SPARQLErrorListener error_listener{}; + antlr4::ANTLRInputStream input{prologue}; + dice::sparql_parser::base::SparqlLexer lexer{&input}; + antlr4::CommonTokenStream tokens{&lexer}; + dice::sparql_parser::base::SparqlParser parser{&tokens}; + parser.removeErrorListeners(); + parser.addErrorListener(&error_listener); + + auto update_ctx = parser.updateCommand(); + + { // visit prologue and store prefixes + parser::visitors::PrologueVisitor p_visitor{}; + for (auto prefix_ctx : update_ctx->prologue()) { + auto cur_prefixes = std::any_cast(p_visitor.visitPrologue(prefix_ctx)); + update_query.prefixes.insert(cur_prefixes.begin(), cur_prefixes.end()); + } + } + } + + std::vector entries; + + { // try to parse all triples between '{' and '}' with rdf4cpp and then store them in 'entries' + std::istringstream iss{std::string{rest_mut}}; + for (IStreamQuadIterator qit{iss, ParsingFlag::NoParsePrefix, update_query.prefixes}; qit != IStreamQuadIterator{}; ++qit) { + if (qit->has_value()) { + auto const &quad = **qit; + entries.push_back(rdf_tensor::NonZeroEntry{{quad.subject(), quad.predicate(), quad.object()}}); + } else { + std::ostringstream oss; + oss << qit->error(); + throw std::runtime_error{oss.str()}; + } + } + } + + update_query.is_delete = query_type == QueryType::DELETE_DATA; + update_query.entries = std::move(entries); + } else { + throw std::runtime_error{"Currently only DELETE DATA and INSERT DATA updates are supported"}; + } + + return update_query; + } + +}// namespace dice::sparql2tensor \ No newline at end of file diff --git a/libs/sparql2tensor/src/dice/sparql2tensor/UPDATEQuery.hpp b/libs/sparql2tensor/src/dice/sparql2tensor/UPDATEQuery.hpp new file mode 100644 index 00000000..16b36d5f --- /dev/null +++ b/libs/sparql2tensor/src/dice/sparql2tensor/UPDATEQuery.hpp @@ -0,0 +1,18 @@ +#ifndef DICE_SPARQL_UPDATEQUERY_HPP +#define DICE_SPARQL_UPDATEQUERY_HPP + +#include + + +namespace dice::sparql2tensor { + + struct UPDATEDATAQueryData { + rdf_tensor::parser::IStreamQuadIterator::prefix_storage_type prefixes; + bool is_delete; // is this query DELETE DATA? (otherwise is INSERT DATA) + std::vector entries; + + static UPDATEDATAQueryData parse(std::string_view sparql_update_str); + }; +} + +#endif//DICE_SPARQL_UPDATEQUERY_HPP diff --git a/libs/sparql2tensor/src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.cpp b/libs/sparql2tensor/src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.cpp new file mode 100644 index 00000000..aa67ceac --- /dev/null +++ b/libs/sparql2tensor/src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.cpp @@ -0,0 +1,16 @@ +#include "dice/sparql2tensor/parser/exception/SPARQLErrorListener.hpp" + +namespace dice::sparql2tensor::parser::exception { + + void SPARQLErrorListener::syntaxError([[maybe_unused]] antlr4::Recognizer *recognizer, + [[maybe_unused]] antlr4::Token *offendingSymbol, + size_t line, + size_t charPositionInLine, + const std::string &msg, + [[maybe_unused]] std::exception_ptr e) { + std::ostringstream error_msg; + error_msg << "Syntax error: " << msg << ". At line: " << line << " and position: " << charPositionInLine; + throw std::runtime_error(error_msg.str()); + } + +} // namespace dice::sparql2tensor::parser::exception diff --git a/libs/sparql2tensor/src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.hpp b/libs/sparql2tensor/src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.hpp new file mode 100644 index 00000000..7955718f --- /dev/null +++ b/libs/sparql2tensor/src/dice/sparql2tensor/parser/exception/SPARQLErrorListener.hpp @@ -0,0 +1,26 @@ +#ifndef DICE_SPARQL_SPARQLERRORLISTENER_HPP +#define DICE_SPARQL_SPARQLERRORLISTENER_HPP + +#include + +#include + +#include + + +namespace dice::sparql2tensor::parser::exception { + + class SPARQLErrorListener : public antlr4::BaseErrorListener { + + void syntaxError(antlr4::Recognizer *recognizer, + antlr4::Token *offendingSymbol, + size_t line, + size_t charPositionInLine, + const std::string &msg, + std::exception_ptr e) override; + + }; + +}// namespace dice::sparql2tensor::parser::exception + +#endif//TENTRIS_BINARIES_SPARQLERRORLISTENER_HPP diff --git a/libs/sparql2tensor/src/dice/sparql2tensor/parser/visitors/PrologueVisitor.cpp b/libs/sparql2tensor/src/dice/sparql2tensor/parser/visitors/PrologueVisitor.cpp new file mode 100644 index 00000000..e47ad8a6 --- /dev/null +++ b/libs/sparql2tensor/src/dice/sparql2tensor/parser/visitors/PrologueVisitor.cpp @@ -0,0 +1,28 @@ +#include "dice/sparql2tensor/parser/visitors/PrologueVisitor.hpp" + +namespace dice::sparql2tensor::parser::visitors { + + std::any PrologueVisitor::visitPrologue(SparqlParser::PrologueContext *ctx) { + prefixes_.clear(); + for (auto pref_ctx : ctx->prefixDecl()) + visitPrefixDecl(pref_ctx); + for ([[maybe_unused]] auto base_ctx : ctx->baseDecl()) + throw std::runtime_error("Base Declarations not supported yet."); + return prefixes_; + } + + std::any PrologueVisitor::visitBaseDecl([[maybe_unused]] SparqlParser::BaseDeclContext *ctx) { + return nullptr; + } + + std::any PrologueVisitor::visitPrefixDecl(SparqlParser::PrefixDeclContext *ctx) { + std::string prefix{}; + if (ctx->PNAME_NS()) + prefix = ctx->PNAME_NS()->getText(); + auto ns = ctx->IRIREF()->getText(); + prefixes_[prefix.substr(0, prefix.size() - 1)] = ns.substr(1, ns.size() - 2); + return nullptr; + } + + +}// namespace dice::sparql2tensor::parser::visitors \ No newline at end of file diff --git a/libs/sparql2tensor/src/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.cpp b/libs/sparql2tensor/src/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.cpp new file mode 100644 index 00000000..e407ec9d --- /dev/null +++ b/libs/sparql2tensor/src/dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.cpp @@ -0,0 +1,480 @@ +#include "dice/sparql2tensor/parser/visitors/SelectAskQueryVisitor.hpp" + +#include + +#include +#include + +namespace dice::sparql2tensor::parser::visitors { + + std::any SelectAskQueryVisitor::visitAskQuery(SparqlParser::AskQueryContext *ctx) { + if (auto where_clause_ctx = ctx->whereClause(); where_clause_ctx) + visitWhereClause(where_clause_ctx); + else + throw std::runtime_error("Query does not contain a WHERE clause"); + return nullptr; + } + + std::any SelectAskQueryVisitor::visitSelectQuery(SparqlParser::SelectQueryContext *ctx) { + if (auto where_clause_ctx = ctx->whereClause(); where_clause_ctx) + visitWhereClause(where_clause_ctx); + else + throw std::runtime_error("Query does not contain a WHERE clause"); + visitSelectClause(ctx->selectClause()); + return nullptr; + } + + std::any SelectAskQueryVisitor::visitSelectClause(SparqlParser::SelectClauseContext *ctx) { + if (auto select_modifier_ctx = ctx->selectModifier(); select_modifier_ctx) { + if (select_modifier_ctx->DISTINCT()) + query->distinct_ = true; + } + if (ctx->ASTERISK()) { + query->project_all_variables_ = true; + std::unordered_set seen_vars; + // set all non-anonymous variables from the triple patterns + for (auto const &tp : query->triple_patterns_) { + for (auto const &node : tp) { + if (node.is_variable()) { + auto var = (rdf4cpp::rdf::query::Variable) node; + if (not var.is_anonymous()) { + auto [_, was_new] = seen_vars.insert(var); + if (was_new) + query->projected_variables_.push_back(var); + } + } + } + } + } else { + for (auto sel_ctx : ctx->selectVariables()) { + if (auto var_ctx = sel_ctx->var(); var_ctx) { + auto var = std::any_cast(visitVar(var_ctx)); + register_var(var); + query->projected_variables_.push_back(var); + } else { + throw std::runtime_error("Expressions in SELECT clause are not supported yet."); + } + } + } + if (query->projected_variables_.empty()) + throw std::runtime_error("At least one variable should be projected."); + return nullptr; + } + + std::any SelectAskQueryVisitor::visitWhereClause(SparqlParser::WhereClauseContext *ctx) { + // push a new entry into the stacks, as we are about to visit a graph pattern + group_patterns.emplace_back(); + triples_blocks.emplace_back(); + optional_blocks.emplace_back(); + visitGroupGraphPattern(ctx->groupGraphPattern()); + // pop the top entry of the stacks, as we have finished visiting the graph pattern + optional_blocks.pop_back(); + triples_blocks.pop_back(); + group_patterns.pop_back(); + return nullptr; + } + + std::any SelectAskQueryVisitor::visitGroupGraphPattern(SparqlParser::GroupGraphPatternContext *ctx) { + if (ctx->subSelect()) + throw std::runtime_error("Subqueries are not supported yet"); + else if (auto group_graph_pattern_sub_ctx = ctx->groupGraphPatternSub(); group_graph_pattern_sub_ctx) + visitGroupGraphPatternSub(group_graph_pattern_sub_ctx); + else + throw std::runtime_error("Malformed query"); + return nullptr; + } + + std::any SelectAskQueryVisitor::visitGroupGraphPatternSub(SparqlParser::GroupGraphPatternSubContext *ctx) { + visitWellDesignedPattern(ctx, {}); + return nullptr; + } + + void SelectAskQueryVisitor::visitWellDesignedPattern(SparqlParser::GroupGraphPatternSubContext *ctx, + std::vector gou_ctxs) { + // store the context of the first triples block, if it is provided + if (auto triples_block = ctx->triplesBlock(); triples_block) + triples_blocks.back().push_back(triples_block); + // iterate over all GroupGraphPatternSubs + for (auto sub_ctx : ctx->groupGraphPatternSubList()) { + if (auto graph_pattern_not_triples_ctx = sub_ctx->graphPatternNotTriples(); graph_pattern_not_triples_ctx) { + // store all GroupOrUnionGraphPatterns that appear in the pattern + if (auto group_or_union_graph_pattern_ctx = graph_pattern_not_triples_ctx->groupOrUnionGraphPattern(); group_or_union_graph_pattern_ctx) + gou_ctxs.push_back(group_or_union_graph_pattern_ctx); + // store all OptionalGraphPatterns that appear in the pattern + else if (auto optional_graph_pattern_ctx = sub_ctx->graphPatternNotTriples()->optionalGraphPattern(); optional_graph_pattern_ctx) + optional_blocks.back().push_back(optional_graph_pattern_ctx); + } + // store all triples blocks that appear in the pattern + if (auto triples_block_ctx = sub_ctx->triplesBlock(); triples_block_ctx) + triples_blocks.back().push_back(triples_block_ctx); + } + // the current pattern does not contain any GroupOrUnionGraphPatterns + if (gou_ctxs.empty()) { + // visit all triples blocks first + for (auto tb_ctx : triples_blocks.back()) { + visitTriplesBlock(tb_ctx); + } + // if we are in an optional pattern we need to capture dependencies + if (not opt_operands.empty()) { + // dependencies with parent group + group_dependencies(group_patterns[group_patterns.size() - 2], group_patterns.back()); + // cartesian connections between optional patterns + for (auto cur_op : group_patterns.back()) { + for (auto opt_op : opt_operands.back()) { + // do not connect groups of the same union pattern + if (std::ranges::find(union_operands.back(), opt_op) == union_operands.back().end()) { + query->odg_.add_connection(cur_op, opt_op); + query->odg_.add_connection(opt_op, cur_op); + } + } + } + for (auto cur_op : group_patterns.back()) { + union_operands.back().push_back(cur_op); + opt_operands.back().push_back(cur_op); + } + } + opt_operands.emplace_back(); + union_operands.emplace_back(); + // visit all optional patterns + for (auto opt_ctx : optional_blocks.back()) { + // push a new vector into the stacks, as we are going to visit a new graph pattern + group_patterns.emplace_back(); + triples_blocks.emplace_back(); + optional_blocks.emplace_back(); + visitWellDesignedPattern(opt_ctx->groupGraphPattern()->groupGraphPatternSub(), {}); + // clear the vector from the operands of the visited graph pattern + // the top vector of the stack is shared across all optional subgraph pattern of the current graph pattern + union_operands.back().clear(); + // pop the top vector from the stack, as we have finished processing the graph pattern + optional_blocks.pop_back(); + triples_blocks.pop_back(); + group_patterns.pop_back(); + } + union_operands.pop_back(); + opt_operands.pop_back(); + // prepare for the next union + group_patterns.back().clear(); + } + // the pattern contains at least one GroupOrUnionGraphPattern + // in case of multiple GroupOrUnionGraphPatterns, join operations are distributed over unions + else { + SparqlParser::GroupOrUnionGraphPatternContext *cur_gou_ctx = gou_ctxs.back(); + gou_ctxs.pop_back(); + size_t current_tbs = triples_blocks.back().size(); + size_t current_opts = optional_blocks.back().size(); + // visit each group graph pattern of the GroupOrUnionGraphPattern + // while visiting each group graph pattern, the triples and optional blocks stored until this point will also be visited + for (auto grp_ctx : cur_gou_ctx->groupGraphPattern()) { + visitWellDesignedPattern(grp_ctx->groupGraphPatternSub(), gou_ctxs); + // we resize the vectors in order to keep only the blocks that were present before visiting grp_ctx + triples_blocks.back().resize(current_tbs); + optional_blocks.back().resize(current_opts); + } + } + } + + std::any SelectAskQueryVisitor::visitTriplesBlock(SparqlParser::TriplesBlockContext *ctx) { + for (auto sub_ctx : ctx->triplesSameSubjectPath()) + visitTriplesSameSubjectPath(sub_ctx); + return nullptr; + } + + std::any SelectAskQueryVisitor::visitTriplesSameSubjectPath(SparqlParser::TriplesSameSubjectPathContext *ctx) { + if (ctx->varOrTerm() and ctx->propertyListPathNotEmpty()) { + active_subject = std::any_cast(visitVarOrTerm(ctx->varOrTerm())); + if (active_subject.is_variable()) + register_var(rdf4cpp::rdf::query::Variable(active_subject)); + visitPropertyListPathNotEmpty(ctx->propertyListPathNotEmpty()); + } else if (ctx->triplesNodePath() and ctx->propertyListPath()) { + return nullptr; + } + return nullptr; + } + + std::any SelectAskQueryVisitor::visitPropertyListPathNotEmpty(SparqlParser::PropertyListPathNotEmptyContext *ctx) { + if (ctx->verbPath()) { + active_predicate = std::any_cast(visitPath(ctx->verbPath()->path())); + } else { + auto var = std::any_cast(visitVar(ctx->verbSimple()->var())); + register_var(var); + active_predicate = rdf4cpp::rdf::Node(var); + } + auto object_list_path_ctx = ctx->objectListPath(); + if (not object_list_path_ctx) + throw std::runtime_error("Triple requires at least one object"); + visitObjectListPath(object_list_path_ctx); + for (auto prop_ctx : ctx->propertyListPathNotEmptyList()) { + if (auto verb_path_ctx = prop_ctx->verbPath(); verb_path_ctx) { + active_predicate = std::any_cast(visitPath(verb_path_ctx->path())); + } else { + auto var = std::any_cast(visitVar(prop_ctx->verbSimple()->var())); + register_var(var); + active_predicate = rdf4cpp::rdf::Node(var); + } + auto object_list_ctx = prop_ctx->objectList(); + if (not object_list_ctx) + throw std::runtime_error("Triple requires at least one object"); + visitObjectList(object_list_ctx); + } + return nullptr; + } + + std::any SelectAskQueryVisitor::visitVarOrTerm(SparqlParser::VarOrTermContext *ctx) { + return rdf4cpp::rdf::Node([&]() -> rdf4cpp::rdf::Node { + if (ctx->var()) { + return std::any_cast(visitVar(ctx->var())); + } else { + if (auto iri_ctx = ctx->graphTerm()->iri()) + return std::any_cast(visitIri(iri_ctx)); + else if (auto blank_node_ctx = ctx->graphTerm()->blankNode(); blank_node_ctx) + return std::any_cast(visitBlankNode(blank_node_ctx)); + else if (auto rdf_literal_ctx = ctx->graphTerm()->rdfLiteral(); rdf_literal_ctx) + return std::any_cast(visitRdfLiteral(rdf_literal_ctx)); + else if (auto boolean_literal_ctx = ctx->graphTerm()->booleanLiteral(); boolean_literal_ctx) + return std::any_cast(visitBooleanLiteral(boolean_literal_ctx)); + else if (auto numberic_literal_ctx = ctx->graphTerm()->numericLiteral(); numberic_literal_ctx) + return std::any_cast(visitNumericLiteral(numberic_literal_ctx)); + else + throw std::runtime_error("RDF collections are not supported yet."); + } + }()); + } + + std::any SelectAskQueryVisitor::visitIri(SparqlParser::IriContext *ctx) { + if (ctx->IRIREF()) { + auto iri = ctx->IRIREF()->getText(); + return rdf4cpp::rdf::IRI(iri.substr(1, iri.size() - 2)); + } + std::string predicate = ctx->prefixedName()->PNAME_LN()->getText(); + std::size_t split = predicate.find(':'); + try { + return rdf4cpp::rdf::IRI(query->prefixes_.at(predicate.substr(0, split)) + predicate.substr(split + 1)); + } catch (...) { + throw std::out_of_range("Prefix " + predicate.substr(0, split) + " not declared."); + } + } + + std::any SelectAskQueryVisitor::visitBlankNode(SparqlParser::BlankNodeContext *ctx) { + if (auto blank_node_label_ctx = ctx->BLANK_NODE_LABEL(); blank_node_label_ctx) + return rdf4cpp::rdf::query::Variable(blank_node_label_ctx->getText().substr(2), true); + else + throw std::runtime_error("BlankNode ANON not supported."); + } + + std::any SelectAskQueryVisitor::visitVar(SparqlParser::VarContext *ctx) { + return rdf4cpp::rdf::query::Variable(ctx->getText().substr(1)); + } + + std::any SelectAskQueryVisitor::visitObjectListPath(SparqlParser::ObjectListPathContext *ctx) { + for (auto objp_ctx : ctx->objectPath()) + visitObjectPath(objp_ctx); + return nullptr; + } + + std::any SelectAskQueryVisitor::visitObjectList(SparqlParser::ObjectListContext *ctx) { + for (auto obj_ctx : ctx->object()) + visitObject(obj_ctx); + return nullptr; + } + + std::any SelectAskQueryVisitor::visitObjectPath(SparqlParser::ObjectPathContext *ctx) { + if (auto var_or_term_ctx = ctx->graphNodePath()->varOrTerm(); var_or_term_ctx) { + auto obj = std::any_cast(visitVarOrTerm(var_or_term_ctx)); + if (obj.is_variable()) + register_var(rdf4cpp::rdf::query::Variable(obj)); + query->triple_patterns_.emplace_back(active_subject, active_predicate, obj); + add_tp(query->triple_patterns_.back()); + } else { + throw std::runtime_error("not supported"); + } + return nullptr; + } + + std::any SelectAskQueryVisitor::visitObject(SparqlParser::ObjectContext *ctx) { + if (auto var_or_term_ctx = ctx->graphNode()->varOrTerm(); var_or_term_ctx) { + auto obj = std::any_cast(visitVarOrTerm(var_or_term_ctx)); + if (obj.is_variable()) + register_var(rdf4cpp::rdf::query::Variable(obj)); + query->triple_patterns_.emplace_back(active_subject, active_predicate, obj); + add_tp(query->triple_patterns_.back()); + } else { + throw std::runtime_error("not supported"); + } + return nullptr; + } + + std::any SelectAskQueryVisitor::visitPath(SparqlParser::PathContext *ctx) { + if (auto path_alternative_ctx = ctx->pathAlternative(); path_alternative_ctx) + return visitPathAlternative(path_alternative_ctx); + else + throw std::runtime_error("Malformed query."); + } + + std::any SelectAskQueryVisitor::visitPathAlternative(SparqlParser::PathAlternativeContext *ctx) { + if (ctx->pathSequence().size() > 1) + throw std::runtime_error("Property paths are not supported yet"); + return visitPathSequence(ctx->pathSequence(0)); + } + + std::any SelectAskQueryVisitor::visitPathSequence(SparqlParser::PathSequenceContext *ctx) { + if (ctx->pathEltOrInverse().size() > 1) + throw std::runtime_error("Property paths are not supported yet"); + return visitPathEltOrInverse(ctx->pathEltOrInverse(0)); + } + + std::any SelectAskQueryVisitor::visitPathEltOrInverse(SparqlParser::PathEltOrInverseContext *ctx) { + if (ctx->INVERSE()) + throw std::runtime_error("Property paths are not supported yet"); + return visitPathElt(ctx->pathElt()); + } + + std::any SelectAskQueryVisitor::visitPathElt(SparqlParser::PathEltContext *ctx) { + auto path_primary_ctx = ctx->pathPrimary(); + if (auto iri_ctx = path_primary_ctx->iri(); iri_ctx) + return rdf4cpp::rdf::Node(std::any_cast(visitIri(iri_ctx))); + else if (path_primary_ctx->A()) + return rdf4cpp::rdf::Node(rdf4cpp::rdf::IRI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")); + else if (path_primary_ctx->NEGATION()) + throw std::runtime_error("Property paths are not supported yet"); + else + return visitPath(ctx->pathPrimary()->path()); + } + + std::any SelectAskQueryVisitor::visitRdfLiteral(SparqlParser::RdfLiteralContext *ctx) { + auto value = std::any_cast(visitString(ctx->string())); + if (auto iri_ctx = ctx->iri(); iri_ctx) + return rdf4cpp::rdf::Literal(value, std::any_cast(visitIri(iri_ctx))); + else if (auto langtag_ctx = ctx->LANGTAG(); langtag_ctx) + return rdf4cpp::rdf::Literal(value, langtag_ctx->getText().substr(1)); + else + return rdf4cpp::rdf::Literal(value); + } + + std::any SelectAskQueryVisitor::visitNumericLiteral(SparqlParser::NumericLiteralContext *ctx) { + auto number = ctx->getText(); + if (auto pos_literal_ctx = ctx->numericLiteralPositive(); pos_literal_ctx) { + if (pos_literal_ctx->DECIMAL_POSITIVE()) + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#decimal")); + else if (pos_literal_ctx->DOUBLE_POSITIVE()) + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#double")); + else + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#integer")); + } else if (auto neg_literal_ctx = ctx->numericLiteralNegative(); neg_literal_ctx) { + if (neg_literal_ctx->DECIMAL_NEGATIVE()) + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#decimal")); + else if (neg_literal_ctx->DOUBLE_NEGATIVE()) + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#double")); + else + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#integer")); + } else { + auto unsigned_literal_ctx = ctx->numericLiteralUnsigned(); + if (unsigned_literal_ctx->DECIMAL()) + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#decimal")); + else if (unsigned_literal_ctx->DOUBLE()) + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#double")); + else + return rdf4cpp::rdf::Literal(number, rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#integer")); + } + } + + std::any SelectAskQueryVisitor::visitBooleanLiteral(SparqlParser::BooleanLiteralContext *ctx) { + if (ctx->TRUE()) + return rdf4cpp::rdf::Literal("true", rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#boolean")); + else + return rdf4cpp::rdf::Literal("false", rdf4cpp::rdf::IRI("http://www.w3.org/2001/XMLSchema#boolean")); + } + + std::any SelectAskQueryVisitor::visitString(SparqlParser::StringContext *ctx) { + std::string value = ctx->getText(); + if (ctx->STRING_LITERAL1() or ctx->STRING_LITERAL2()) + return value.substr(1, value.size() - 2); + else + return value.substr(3, value.size() - 6); + } + + void SelectAskQueryVisitor::register_var(rdf4cpp::rdf::query::Variable const &var) { + if (query->var_to_id_.contains(var)) + return; + query->var_to_id_[var] = var_id; + var_id++; + } + + void SelectAskQueryVisitor::add_tp(rdf4cpp::rdf::query::TriplePattern const &tp) { + std::vector var_ids{}; + for (auto const &node : tp) { + if (not node.is_variable()) + continue; + var_ids.push_back(query->var_to_id_[rdf4cpp::rdf::query::Variable(node)]); + } + // create new node in the operand dependency graph + auto v_id = query->odg_.add_operand(var_ids); + auto &gp = group_patterns.back(); + // iterate over the tps of the group and capture dependencies + for (auto iter = gp.rbegin(); iter != gp.rend(); iter++) { + boost::container::flat_set done{};// only one edge per label between two nodes + auto const &tp_vars = query->odg_.operand_var_ids(*iter); + bool cart = true; + for (auto const &var : var_ids) { + for (auto const &tp_var : tp_vars) { + if (var == tp_var) { + cart = false; + if (done.contains(var)) + continue; + done.insert(var); + query->odg_.add_dependency(*iter, v_id, var); + query->odg_.add_dependency(v_id, *iter, var); + } + } + } + // the triple patterns do not share a variable --> cartesian join + if (cart) { + query->odg_.add_dependency(*iter, v_id); + query->odg_.add_dependency(v_id, *iter); + } + } + // add current tp/node to the active group pattern + gp.push_back(v_id); + } + + void SelectAskQueryVisitor::group_dependencies(std::vector const &prev_group, + std::vector const &cur_group, + bool bidirectional) { + // iterate of the triple patterns (nodes) of the previous group + for (const auto &prev_tp : prev_group) { + // get the variable ids of the node + auto const &prev_labels = query->odg_.operand_var_ids(prev_tp); + // iterate over the triple patterns (nodes) of the current group + for (const auto &cur_tp : cur_group) { + // get the variable ids of the node + auto const &cur_labels = query->odg_.operand_var_ids(cur_tp); + bool done = false; + // create labelled dependencies if the nodes share variable ids + for (auto const &prev_label : prev_labels) { + if (std::find(cur_labels.begin(), cur_labels.end(), prev_label) != cur_labels.end()) { + query->odg_.add_dependency(prev_tp, cur_tp, prev_label); + if (bidirectional) + query->odg_.add_dependency(cur_tp, prev_tp, prev_label); + done = true; + } + } + // if the nodes do not share a label, create an unlabelled dependency + if (not done) { + query->odg_.add_dependency(prev_tp, cur_tp); + if (bidirectional) + query->odg_.add_dependency(cur_tp, prev_tp); + } + } + } + } + + void SelectAskQueryVisitor::group_connections(std::vector const &prev_group, + std::vector const &cur_group) { + for (const auto &prev_tp : prev_group) { + for (const auto &cur_tp : cur_group) { + query->odg_.add_connection(prev_tp, cur_tp); + query->odg_.add_connection(cur_tp, prev_tp); + } + } + } + +}// namespace dice::sparql2tensor::parser::visitors \ No newline at end of file diff --git a/libs/tentris/CMakeLists.txt b/libs/tentris/CMakeLists.txt new file mode 100644 index 00000000..281d13e3 --- /dev/null +++ b/libs/tentris/CMakeLists.txt @@ -0,0 +1,15 @@ +# Find cmake packages + +# Define the library +add_library(tentris INTERFACE) +add_library(tentris::tentris ALIAS tentris) +target_link_libraries(tentris INTERFACE + tentris::endpoint + ) + +target_include_directories(tentris INTERFACE + $ + ) + +include(${PROJECT_SOURCE_DIR}/cmake/install_library.cmake) +install_interface_component(tentris src) \ No newline at end of file diff --git a/libs/tentris/src/dice/tentris.hpp b/libs/tentris/src/dice/tentris.hpp new file mode 100644 index 00000000..d6b2fb89 --- /dev/null +++ b/libs/tentris/src/dice/tentris.hpp @@ -0,0 +1,6 @@ +#ifndef TENTRIS_BINARIES_TENTRIS_HPP +#define TENTRIS_BINARIES_TENTRIS_HPP + +#include + +#endif//TENTRIS_BINARIES_TENTRIS_HPP diff --git a/libs/triple-store/CMakeLists.txt b/libs/triple-store/CMakeLists.txt new file mode 100644 index 00000000..0201fc3b --- /dev/null +++ b/libs/triple-store/CMakeLists.txt @@ -0,0 +1,31 @@ +# Find cmake packages +find_package(dice-hash REQUIRED) + +# Define the library +add_library(triple-store + src/dice/triple-store/TripleStore.cpp + ) + +add_library(tentris::triple-store ALIAS triple-store) + +target_include_directories(triple-store PUBLIC + $ + ) + +target_link_libraries(triple-store PUBLIC + tentris::sparql2tensor + tentris::rdf-tensor + ) + +## Packaging and exporting the target + +set_target_properties(triple-store PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO + ) + +include(${PROJECT_SOURCE_DIR}/cmake/install_library.cmake) +install_component(triple-store src) diff --git a/libs/triple-store/src/dice/triple-store/TripleStore.cpp b/libs/triple-store/src/dice/triple-store/TripleStore.cpp new file mode 100644 index 00000000..3e041248 --- /dev/null +++ b/libs/triple-store/src/dice/triple-store/TripleStore.cpp @@ -0,0 +1,157 @@ +#include "TripleStore.hpp" + +#include + +#include + +namespace dice::triple_store { + TripleStore::TripleStore(TripleStore::BoolHypertrie &hypertrie) : hypertrie_(hypertrie) {} + + void TripleStore::load_ttl(std::string const &file_path, uint32_t bulk_size, + rdf_tensor::HypertrieBulkInserter::BulkInserted_callback const &call_back, + std::function const &error_callback) { + std::ifstream ifs{file_path}; + + if (!ifs.is_open()) { + throw std::runtime_error{"unable to open provided file " + file_path}; + } + + HypertrieBulkInserter bulk_inserter{hypertrie_, bulk_size, call_back}; + for (rdf4cpp::rdf::parser::IStreamQuadIterator qit{ifs}; qit != rdf4cpp::rdf::parser::IStreamQuadIterator{}; ++qit) { + if (qit->has_value()) { + auto const &quad = qit->value(); + bulk_inserter.add( + hypertrie::internal::raw::SingleEntry<3, htt_t>{{quad.subject(), quad.predicate(), quad.object()}}); + } else { + error_callback(qit->error()); + } + } + } + + bool TripleStore::is_rdf_list(rdf4cpp::rdf::Node list) const noexcept { + using IRI = rdf4cpp::rdf::IRI; + IRI rdf_nil("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"); + + if (list == rdf_nil) return true;// empty collection + + auto prop_obj = std::get<0>(hypertrie_[rdf_tensor::SliceKey{list, std::nullopt, std::nullopt}]); + if (prop_obj.empty()) return false; + + { + IRI rdf_first("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"); + auto has_first = std::get<0>(prop_obj[rdf_tensor::SliceKey{rdf_first, std::nullopt}]); + if (has_first.size() != 1) return false; + } + + { + IRI rdf_rest("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"); + auto has_rest = std::get<0>(prop_obj[rdf_tensor::SliceKey{rdf_rest, std::nullopt}]); + if (has_rest.size() != 1) return false; + } + + return true; + } + std::vector TripleStore::get_rdf_list(rdf4cpp::rdf::Node list) const { + using IRI = rdf4cpp::rdf::IRI; + using Node = rdf4cpp::rdf::Node; + + IRI rdf_first("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"); + IRI rdf_rest("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"); + IRI rdf_nil("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"); + + std::vector node_vector; + auto head = list; + while (head != rdf_nil) { + auto element = std::get<0>(hypertrie_[rdf_tensor::SliceKey{list, rdf_first, std::nullopt}]); + if (element.size() > 1) + throw std::runtime_error("Invalid RDF seq. Multiple first elements for list node {}" + std::string(head)); + if (element.empty()) + throw std::runtime_error("Invalid RDF seq. No first elements for list node {}" + std::string(head)); + + node_vector.push_back((*element.begin())[0]); + auto rest = std::get<0>(hypertrie_[rdf_tensor::SliceKey{list, rdf_rest, std::nullopt}]); + if (rest.size() > 1) { + throw std::runtime_error("Invalid RDF seq. Multiple rest elements for list node {}" + std::string(head)); + } else if (rest.size() == 1) { + head = (*element.begin())[0]; + } else /* rest.size() == 0 */ { + + head = rdf_nil;// this is not canonical but seems better than throwing an error + } + } + return node_vector; + } + + /** + * @brief Generates the tensor operands of a query + * @param slice_keys The slice keys corresponding to the query being evaluated + * @return A vector of tensor operands (const_BoolHypertries). + */ + std::vector generate_operands(rdf_tensor::BoolHypertrie rdf_tensor, std::vector const &slice_keys) { + using const_BoolHypertrie = rdf_tensor::const_BoolHypertrie; + using BoolHypertrie = rdf_tensor::BoolHypertrie; + + std::vector operands; + for (auto const &slice_key : slice_keys) { + auto slice_result = rdf_tensor[slice_key]; + if (slice_key.get_fixed_depth() == 3) { + auto entry_exists = std::get(slice_result); + BoolHypertrie ht_0{0, rdf_tensor.context()}; + if (entry_exists) + ht_0.set({}, true); + operands.push_back(ht_0); + } else { + auto operand = std::get(slice_result); + operands.push_back(std::move(operand)); + } + } + return operands; + } + + std::generator TripleStore::eval_select(const sparql2tensor::SPARQLQuery &query, std::chrono::steady_clock::time_point endtime) const { + auto operands = generate_operands(hypertrie_, query.get_slice_keys()); + std::vector proj_vars_id{}; + for (auto const &proj_var : query.projected_variables_) { + proj_vars_id.push_back(query.var_to_id_.at(proj_var)); + } + rdf_tensor::Query q{query.odg_, operands, proj_vars_id, endtime}; + if (query.distinct_) { + rdf_tensor::Entry entry; + entry.key().resize(query.projected_variables_.size()); + for (auto const &distinct_entry : dice::query::Evaluation::evaluate(q)) { + std::copy(distinct_entry.key().begin(), distinct_entry.key().end(), entry.key().begin()); + co_yield entry; + } + } else { + for (auto const &entry : dice::query::Evaluation::evaluate(q)) { + co_yield entry; + } + } + } + bool TripleStore::eval_ask(const sparql2tensor::SPARQLQuery &query, std::chrono::steady_clock::time_point endtime) const { + auto operands = generate_operands(hypertrie_, query.get_slice_keys()); + rdf_tensor::Query q{query.odg_, operands, {}, endtime}; + return dice::query::Evaluation::evaluate_ask(q); + } + size_t TripleStore::count(const sparql2tensor::SPARQLQuery &query, std::chrono::steady_clock::time_point endtime) const { + using namespace sparql2tensor; + if (query.triple_patterns_.size() == 1) {// O(1) + auto slice_key = query.get_slice_keys()[0]; + if (slice_key.get_fixed_depth() == 3) + return (size_t) std::get(get_hypertrie()[slice_key]); + else + return std::get(get_hypertrie()[slice_key]).size(); + } else { + size_t count = 0; + for (auto const &entry : this->eval_select(query, endtime)) + count += entry.value(); + return count; + } + } + bool TripleStore::contains(const rdf4cpp::rdf::Statement &statement) const { + return hypertrie_[Key{statement.subject(), statement.predicate(), statement.object()}]; + } + size_t TripleStore::size() const { + return hypertrie_.size(); + } +}// namespace dice::triple_store diff --git a/libs/triple-store/src/dice/triple-store/TripleStore.hpp b/libs/triple-store/src/dice/triple-store/TripleStore.hpp new file mode 100644 index 00000000..bce3869b --- /dev/null +++ b/libs/triple-store/src/dice/triple-store/TripleStore.hpp @@ -0,0 +1,96 @@ +#ifndef TENTRIS_STORE_TRIPLESTORE +#define TENTRIS_STORE_TRIPLESTORE + +#include +#include + +#include + +#ifndef BOOST_BIND_GLOBAL_PLACEHOLDERS +#define BOOST_BIND_GLOBAL_PLACEHOLDERS +#endif +#include + +namespace dice::triple_store { + class TripleStore { + using HypertrieContext = rdf_tensor::HypertrieContext; + using HypertrieContext_ptr = rdf_tensor::HypertrieContext_ptr; + using BoolHypertrie = rdf_tensor::BoolHypertrie; + using const_BoolHypertrie = rdf_tensor::const_BoolHypertrie; + using Key = rdf_tensor::Key; + using htt_t = rdf_tensor::htt_t; + + public: + using HypertrieBulkInserter = rdf_tensor::HypertrieBulkInserter; + using allocator_type = rdf_tensor::allocator_type; + + private: + BoolHypertrie &hypertrie_; + + public: + explicit TripleStore(BoolHypertrie &hypertrie); + + [[nodiscard]] BoolHypertrie const &get_hypertrie() const { + return hypertrie_; + } + + /** + * This function enforces stricter requirements upon rdf:Lists than described in D.3 RDF collections. + * An rdf:List must either be the IRI rdf:nil or must have the properties rdf:first and rdf:rest, both with cardinality 1. + * @param list the node to be checked if it is a list + * @return if list is an rdf:List + */ + [[nodiscard]] bool is_rdf_list(rdf4cpp::rdf::Node list) const noexcept; + + /** + * Returns the items of an rdf:List as vector. + * + * Restrictions from is_rdf_list(rdf4cpp::rdf::Node) const noexcept apply. + * + * @param list the start node of the list + * @return the elements of the list as vector + * @throws std::runtime_error If the list is malformed. + */ + std::vector get_rdf_list(rdf4cpp::rdf::Node list) const; + + /** + * @brief Loads a turtle file into this triplestore + * @param file_path path to the file + * @param bulk_size number of entries to insert at once + * @param call_back function to call when a bulk is inserted + * @param error_callback function to call when an error is encountered in the file + */ + void load_ttl( + std::string const &file_path, + uint32_t bulk_size = 1'000'000, + HypertrieBulkInserter::BulkInserted_callback const &call_back = [](size_t, size_t, size_t) -> void {}, + std::function const &error_callback = [](rdf_tensor::parser::ParsingError const &) -> void {}); + + /** + * @brief Evaluation of SPARQL SELECT queries. + * @param query The parsed SPARQL query. + * @param endtime The timeout value + * @return A generator yielding the solutions of the query + */ + std::generator + eval_select(const sparql2tensor::SPARQLQuery &query, + std::chrono::steady_clock::time_point endtime = std::chrono::steady_clock::time_point::max()) const; + + /** + * @brief Evaluation of SPARQL ASK queries. + * @param query The parsed SPARQL query. + * @param endtime The timeout value + * @return The result of the ask query (true or false). + */ + bool eval_ask(const sparql2tensor::SPARQLQuery &query, + std::chrono::steady_clock::time_point endtime = std::chrono::steady_clock::time_point::max()) const; + + size_t count(const sparql2tensor::SPARQLQuery &query, + std::chrono::steady_clock::time_point endtime = std::chrono::steady_clock::time_point::max()) const; + + bool contains(const rdf4cpp::rdf::Statement &statement) const; + + [[nodiscard]] size_t size() const; + }; +}; // namespace dice::triple_store +#endif//TENTRIS_STORE_TRIPLESTORE diff --git a/src/exec/TentrisServer.cpp b/src/exec/TentrisServer.cpp deleted file mode 100644 index 2950a8ea..00000000 --- a/src/exec/TentrisServer.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include -#include - -#include -#include -#include -#include -#include - -#include - -#include "config/ServerConfig.hpp" -#include "VersionStrings.hpp" - - -void bulkload(const std::string &triple_file, size_t bulksize) { - namespace fs = std::filesystem; - using namespace fmt::literals; - using namespace tentris::logging; - - // log the starting time and print resource usage information - auto loading_start_time = log_health_data(); - - if (fs::is_regular_file(triple_file)) { - log("nt-file: {} loading ..."_format(triple_file)); - ::tentris::store::AtomicTripleStore::getInstance().bulkloadRDF(triple_file, bulksize); - } else { - log("nt-file {} was not found."_format(triple_file)); - log("Exiting ..."); - std::exit(EXIT_FAILURE); - } - // log the end time and print resource usage information - auto loading_end_time = log_health_data(); - // log the time it tool to load the file - log_duration(loading_start_time, loading_end_time); -} - -struct tentris_restinio_traits : public restinio::traits_t< - restinio::null_timer_manager_t, -#ifdef DEBUG - restinio::shared_ostream_logger_t, -#else - restinio::null_logger_t, -#endif - restinio::router::express_router_t<> ->{ - static constexpr bool use_connection_count_limiter = true; -}; - - -int main(int argc, char *argv[]) { - using namespace tentris::http; - using namespace tentris::store::config; - using namespace fmt::literals; - using namespace tentris::logging; - - ServerConfig cfg{argc, argv}; - - init_logging(cfg.logstdout, cfg.logfile, cfg.logfiledir, cfg.loglevel); - - log("Running {} with {}"_format(tentris_version_string, hypertrie_version_string)); - - auto &store_cfg = AtomicTripleStoreConfig::getInstance(); - store_cfg.rdf_file = cfg.rdf_file; - store_cfg.timeout = cfg.timeout; - store_cfg.cache_size = cfg.cache_size; - store_cfg.threads = cfg.threads; - - // bulkload file - if (not cfg.rdf_file.empty()) { - bulkload(cfg.rdf_file, cfg.bulksize); - } else { - log("No file loaded."); - } - - // create endpoint - using namespace restinio; - auto router = std::make_unique>(); - router->http_get( - R"(/sparql)", - tentris::http::sparql_endpoint::SparqlEndpoint{}); - router->http_get( - R"(/stream)", - tentris::http::sparql_endpoint::SparqlEndpoint{}); - - router->non_matched_request_handler( - [](auto req) -> restinio::request_handling_status_t { - return req->create_response(restinio::status_not_found()).connection_close().done(); - }); - - // Launching a server with custom traits. - - log("SPARQL endpoint serving sparkling linked data treasures on {} threads at http://0.0.0.0:{}/sparql?query="_format(cfg.threads, cfg.port)); - - restinio::run( - restinio::on_thread_pool(cfg.threads) - .max_parallel_connections(cfg.threads) - .address("0.0.0.0") - .port(cfg.port) - .request_handler(std::move(router)) - .handle_request_timeout(cfg.timeout) - .write_http_response_timelimit(cfg.timeout)); - log("Shutdown successful."); - return EXIT_SUCCESS; -} diff --git a/src/exec/TentrisTerminal.cpp b/src/exec/TentrisTerminal.cpp deleted file mode 100644 index 1c17c61a..00000000 --- a/src/exec/TentrisTerminal.cpp +++ /dev/null @@ -1,285 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -#include "config/TerminalConfig.hpp" -#include "VersionStrings.hpp" - -using namespace tentris::store; -using namespace tentris::logging; -using namespace tentris::store::cache; -using namespace tentris::store::sparql; -using namespace std::filesystem; -using namespace iter; -using namespace tentris::tensor; -using namespace std::chrono; - -using Variable = Dice::sparql::Variable; - -TerminalConfig cfg; - -bool onlystdout = false; - -using Errors = tentris::http::ResultState; - -std::ostream &logsink() { - if (onlystdout) - return std::cout; - else - return std::cerr; -} - -inline std::string tp2s(time_point_t timepoint) { - auto in_time_t = system_clock::to_time_t( - system_clock::now() + duration_cast(timepoint - steady_clock::now())); - - std::stringstream ss; - ss << std::put_time(std::localtime(&in_time_t), "%Y-%m-%d %X"); - return ss.str(); -}; - -time_point_t query_start; -time_point_t query_end; -time_point_t parse_start; -time_point_t parse_end; -time_point_t execute_start; -time_point_t execute_end; - -Errors error; -size_t number_of_bindings; - -time_point_t timeout; -time_point_t actual_timeout; - -template -void -writeNTriple(std::ostream &stream, const std::shared_ptr &query_package) { - const std::vector &vars = query_package->getQueryVariables(); - stream << fmt::format("{}\n", fmt::join(vars, ",")); - - uint timeout_check = 0; - size_t result_count = 0; - - bool first = true; - - if (not query_package->is_trivial_empty) { - std::shared_ptr raw_results = query_package->getEinsum(timeout); - auto &results = *static_cast *>(raw_results.get()); - for (const auto &result : results) { - if (first) { - first = false; - execute_end = steady_clock::now(); - } - - std::stringstream ss; - bool inner_first = true; - for (auto binding : result.key) { - if (inner_first) - inner_first = false; - else - ss << ","; - if (binding != nullptr) - ss << binding->getIdentifier(); - } - ss << "\n"; - - std::string binding_string = ss.str(); - - for ([[maybe_unused]] const auto c : iter::range(result.value)) { - stream << binding_string; - ++result_count; - if (++timeout_check == 500) { - timeout_check = 0; - stream.flush(); - if (auto current_time = steady_clock::now(); current_time > timeout) { - ::error = Errors::SERIALIZATION_TIMEOUT; - actual_timeout = current_time; - number_of_bindings = result_count; - return; - } - } - } - } - } - if (first) { // if no bindings are returned - execute_end = steady_clock::now(); - } - number_of_bindings = result_count; -} - -template -inline void runCMDQuery(const std::shared_ptr &query_package, - const time_point_t timeout) { - // calculate the result - // check if it timed out - if (steady_clock::now() < timeout) { - writeNTriple(std::cout, query_package); - } else { - ::error = Errors::PROCESSING_TIMEOUT; - actual_timeout = steady_clock::now(); - } -} - -void commandlineInterface(QueryExecutionPackage_cache &querypackage_cache) { - std::string sparql_str; - while (std::getline(std::cin, sparql_str)) { - - query_start = steady_clock::now(); - - - number_of_bindings = 0; - ::error = Errors::OK; - - - try { - parse_start = steady_clock::now(); - std::shared_ptr query_package = querypackage_cache[sparql_str]; - - timeout = steady_clock::now() + cfg.timeout; - - parse_end = steady_clock::now(); - execute_start = steady_clock::now(); - - switch (query_package->getSelectModifier()) { - case SelectModifier::NONE: { - runCMDQuery(query_package, timeout); - break; - } - case SelectModifier::REDUCE: - [[fallthrough]]; - case SelectModifier::DISTINCT: { - runCMDQuery(query_package, timeout); - break; - } - default: - break; - } - } catch (const std::invalid_argument &e) { - ::error = Errors::UNPARSABLE; - logDebug(fmt::format("UNPARSABLE reason: {}", e.what())); - } catch (const std::exception &e) { - ::error = Errors::UNEXPECTED; - logDebug(fmt::format("UNEXPECTED reason: {}", e.what())); - } catch (...) { - ::error = Errors::SEVERE_UNEXPECTED; - } - query_end = steady_clock::now(); - - - auto parsing_time = duration_cast(parse_end - parse_start); - auto execution_time = duration_cast(execute_end - execute_start); - auto total_time = duration_cast(query_end - query_start); - auto serialization_time = total_time - execution_time - parsing_time; - switch (::error) { - case Errors::OK: - logsink() << "SUCCESSFUL\n"; - break; - case Errors::UNPARSABLE: - logsink() << "ERROR: UNPARSABLE QUERY\n"; - break; - case Errors::PROCESSING_TIMEOUT: - logsink() << "ERROR: TIMEOUT DURING PROCESSING\n"; - break; - case Errors::SERIALIZATION_TIMEOUT: - logsink() << "ERROR: TIMEOUT DURING SERIALIZATION\n"; - break; - case Errors::UNEXPECTED: - logsink() << "ERROR: UNEXPECTED\n"; - break; - case Errors::SEVERE_UNEXPECTED: - logsink() << "ERROR: SEVERE UNEXPECTED\n"; - break; - default: - break; - } - - - logsink() << fmt::format("start: {}\n", tp2s(query_start)); - logsink() << fmt::format("planned timeout: {}\n", tp2s(timeout)); - if (::error == Errors::PROCESSING_TIMEOUT or ::error == Errors::SERIALIZATION_TIMEOUT) - logsink() << fmt::format("actual timeout: {}\n", tp2s(actual_timeout)); - logsink() << fmt::format("end: {}\n", tp2s(query_end)); - - if (::error == Errors::OK or ::error == Errors::PROCESSING_TIMEOUT or - ::error == Errors::SERIALIZATION_TIMEOUT) { - logsink() << "number of bindings: " << fmt::format("{:18}", number_of_bindings) << "\n"; - - logsink() << "parsing time: " << fmt::format("{:18}", parsing_time.count()) << " ns\n"; - - - logsink() << "execution time: " << fmt::format("{:18}", execution_time.count()) << " ns\n"; - if (::error != Errors::PROCESSING_TIMEOUT) - logsink() << "serialization time: " << fmt::format("{:18}", serialization_time.count()) << " ns\n"; - } - - logsink() << "total time: " << fmt::format("{:18}", total_time.count()) << " ns\n"; - logsink() << "total time: " - << fmt::format("{:12}", duration_cast(total_time).count()) - << " ms\n"; - - logsink().flush(); - } - std::raise(SIGINT); -} - - -int main(int argc, char *argv[]) { - cfg = TerminalConfig{argc, argv}; - tentris::logging::init_logging(cfg.logstdout, cfg.logfile, cfg.logfiledir, cfg.loglevel); - - logsink() << fmt::format("Running {} with {}", tentris_version_string, hypertrie_version_string) << std::endl; - - TripleStore triplestore{}; - - QueryExecutionPackage_cache executionpackage_cache{cfg.cache_size}; - - - onlystdout = cfg.onlystdout; - - if (not cfg.rdf_file.empty()) { - logsink() << "Loading file " << cfg.rdf_file << " ..." << std::endl; - auto start_time = steady_clock::now(); - AtomicTripleStore::getInstance().bulkloadRDF(cfg.rdf_file, cfg.bulksize); - auto duration = steady_clock::now() - start_time; - logsink() << fmt::format("... loading finished. {} triples loaded.", AtomicTripleStore::getInstance().size()) - << std::endl; - logsink() << "duration: {} h {} min {} s"_format( - (duration_cast(duration) % 24).count(), - (duration_cast(duration) % 60).count(), - (duration_cast(duration) % 60).count()) << std::endl; - } - - - std::thread commandline_client{commandlineInterface, std::ref(executionpackage_cache)}; - // wait for keyboard interrupt - while (true) { - sigset_t wset; - sigemptyset(&wset); - sigaddset(&wset, SIGINT); - int number; - - if (int status = sigwait(&wset, &number); status != 0) { - log("Set contains an invalid signal number."); - break; - } - if (number == SIGINT) { - logDebug("Exiting by Signal {}."_format(strsignal(number))); - break; - } - } -} diff --git a/src/exec/VersionStrings.hpp b/src/exec/VersionStrings.hpp deleted file mode 100644 index 9b8fcd23..00000000 --- a/src/exec/VersionStrings.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef TENTRIS_VERSIONSTRINGS_HPP -#define TENTRIS_VERSIONSTRINGS_HPP - -#include -#include - -inline const std::string tentris_version_string = std::string{} + Dice::tentris::name + " " + Dice::tentris::version; - -inline const std::string hypertrie_version_string = std::string{} + Dice::hypertrie::name + " " + Dice::hypertrie::version; - -#endif //TENTRIS_VERSIONSTRINGS_HPP diff --git a/src/exec/config/ExecutableConfig.hpp b/src/exec/config/ExecutableConfig.hpp deleted file mode 100644 index 0168ce52..00000000 --- a/src/exec/config/ExecutableConfig.hpp +++ /dev/null @@ -1,181 +0,0 @@ -#ifndef TENTRIS_EXECUTABLECONFIG_HPP -#define TENTRIS_EXECUTABLECONFIG_HPP - -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include -#include -#include "../VersionStrings.hpp" - -namespace { - using namespace fmt::literals; - namespace fs = std::filesystem; - namespace logging = boost::log; -} - -struct ExecutableConfig { -protected: - - static std::map log_severity_mapping; - - cxxopts::Options options{"tentris", "to be overwritten"}; -public: - /** - * The relative or absolute path to the RDF file that TENTRIS loads on startup. - */ - mutable std::string rdf_file{}; - /** - * The timeout for query processing of single queries. - */ - mutable std::chrono::steady_clock::duration timeout; - /** - * Max number queries that may be cached. - */ - mutable size_t cache_size; - - mutable logging::trivial::severity_level loglevel; - - mutable bool logfile; - - mutable size_t bulksize; - - mutable bool logstdout; - - mutable std::string logfiledir; - - /** - * Initialization of command argument parser. - */ - ExecutableConfig() { - addOptions(); - } - -protected: - void addOptions() { - options.add_options() - ("f,file", "ntriple file to load at startup", cxxopts::value()) - ("t,timeout", "time in seconds until processing a request is canceled by the server", - cxxopts::value()->default_value("180")) - ("l,cache_size", "Max number queries that may be cached.", - cxxopts::value()->default_value("500")) - ("loglevel", "Sets the logging level. Valid values are: [trace, debug, info, warning, error, fatal]", - cxxopts::value()->default_value("info")) - ("logfile", - "If log is written to file.", - cxxopts::value()->default_value("true")) - ("b,bulksize", - "Number of triples that are inserted at once. A larger value results in a higher memory consumption during loading RDF data but may result in shorter loading times.", - cxxopts::value()->default_value("1000000")) - ("logstdout", - "If log is written to stdout.", - cxxopts::value()->default_value("false")) - ("logfiledir", - "A folder path where to write the logfiles. Default is the current working directory.", - cxxopts::value()->default_value(fs::current_path().string())); - } - -public: - - ExecutableConfig(int argc, char ** &argv) : ExecutableConfig{} { - initConfig(argc, argv); - } - - /** - * Parses the command line arguments. - * @param argc number of arguments - * @param argv array of char arrays with arguments - */ - - void initConfig(int argc, char **&argv) { - try { - cxxopts::ParseResult arguments = options.parse(argc, argv); - parseArguments(arguments); - - } catch (cxxopts::option_not_exists_exception &ex) { - if (std::string{"Option ‘help’ does not exist"} == ex.what()) { - std::cout << options.help() << std::endl; - exit(EXIT_SUCCESS); - } else { - std::cout << ex.what() << std::endl; - exit(EXIT_FAILURE); - } - } catch (cxxopts::argument_incorrect_type &ex) { - std::cout << ex.what() << std::endl; - exit(EXIT_FAILURE); - } - } - -protected: - virtual void parseArguments(const cxxopts::ParseResult &arguments) { - if (arguments.count("file")) - rdf_file = arguments["file"].as(); - - - auto timeout_ = arguments["timeout"].as(); - if (timeout_ == 0) - timeout = std::chrono::hours(24 * 7); // one week is default processing time. - else - timeout = std::chrono::seconds(timeout_); - - - auto cache_size_ = arguments["cache_size"].as(); - - if (cache_size_ == 0) - cache_size = 1; - else - cache_size = cache_size_; - - - auto loglevel_str = arguments["loglevel"].as(); - auto found = log_severity_mapping.find(loglevel_str); - if (found != log_severity_mapping.end()) { - loglevel = found->second; - } else { - throw cxxopts::argument_incorrect_type( - "{} is not a valid log level. Valid values are: [trace, debug, info, warning, error, fatal]"_format( - loglevel_str)); - } - - - logfile = arguments["logfile"].as(); - - - logstdout = arguments["logstdout"].as(); - - bulksize = arguments["bulksize"].as(); - - - logfiledir = arguments["logfiledir"].as(); - if (not fs::exists(logfiledir)) { - try { - fs::create_directories(logfiledir); - } catch (const fs::filesystem_error &exc) { - throw cxxopts::argument_incorrect_type( - "Could not creat directory {}. Error: {}"_format(logfiledir, exc.what())); - } - } - if (not fs::is_directory(logfiledir)) - throw cxxopts::argument_incorrect_type("{} is not a valid directory path"_format(logfiledir)); - } -}; - -std::map ExecutableConfig::log_severity_mapping{ - {"trace", logging::trivial::trace}, - {"debug", logging::trivial::debug}, - {"info", logging::trivial::info}, - {"warning", logging::trivial::warning}, - {"error", logging::trivial::error}, - {"fatal", logging::trivial::fatal} -}; - - -#endif //TENTRIS_EXECUTABLECONFIG_HPP diff --git a/src/exec/config/ServerConfig.hpp b/src/exec/config/ServerConfig.hpp deleted file mode 100644 index e4caeddd..00000000 --- a/src/exec/config/ServerConfig.hpp +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef TENTRIS_SERVERCONFIG_HPP -#define TENTRIS_SERVERCONFIG_HPP - -#include "ExecutableConfig.hpp" - -#include - - -struct ServerConfig : public ExecutableConfig { - - /** - * The network port where Tentris is available. - */ - mutable uint16_t port; - - /** - * Number of threads used to serve http results. Each thread may use multiple others while calculating the result. - */ - mutable uint threads; - - ServerConfig() { - options = cxxopts::Options{ - "tentris_server", - "Tentris HTTP SPARQL endpoint" - "\n {}\n {}\n"_format(tentris_version_string, hypertrie_version_string)}; - addOptions(); - - options.add_options() - ("p,port", "port to run server", cxxopts::value()->default_value("9080")) - ("c,threads", "How many threads are used for handling http requests", - cxxopts::value()->default_value("{}"_format(std::thread::hardware_concurrency()))); - } - - ServerConfig(int argc, char **&argv) : ServerConfig{} { - initConfig(argc, argv); - } - -protected: - void parseArguments(const cxxopts::ParseResult &arguments) override { - ExecutableConfig::parseArguments(arguments); - - - port = arguments["port"].as(); - - - auto threads_ = arguments["threads"].as(); - - if (threads_ != 0) - threads = threads_; - - } - -}; - -#endif //TENTRIS_SERVERCONFIG_HPP diff --git a/src/exec/config/TerminalConfig.hpp b/src/exec/config/TerminalConfig.hpp deleted file mode 100644 index c8ab115d..00000000 --- a/src/exec/config/TerminalConfig.hpp +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef TENTRIS_TERMINALCONFIG_HPP -#define TENTRIS_TERMINALCONFIG_HPP - -#include "ExecutableConfig.hpp" - - -struct TerminalConfig : public ExecutableConfig { - - mutable bool onlystdout; - - TerminalConfig() { - options = cxxopts::Options{ - "tentris_terminal", - "Tentris SPARQL endpoint with terminal interface. " - "\n {}\n {}\n\n"_format( - tentris_version_string, hypertrie_version_string)}; - addOptions(); - - options.add_options() - ("s,onlystdout", "Print non-payload info messages to stdout instead of stderr.", - cxxopts::value()->default_value("false")); - } - - - TerminalConfig(int argc, char **&argv) : TerminalConfig{} { - initConfig(argc, argv); - } - -protected: - - void parseArguments(const cxxopts::ParseResult &arguments) override { - ExecutableConfig::parseArguments(arguments); - - onlystdout = arguments["onlystdout"].as(); - - } -}; - -#endif //TENTRIS_TERMINALCONFIG_HPP diff --git a/src/exec/tools/IDs2Hypertrie.cpp b/src/exec/tools/IDs2Hypertrie.cpp deleted file mode 100644 index 4152003e..00000000 --- a/src/exec/tools/IDs2Hypertrie.cpp +++ /dev/null @@ -1,336 +0,0 @@ -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -namespace tentris::IDs2Hypertrie { - void writeNodeStatsTSVs(const auto &storage_3_uncompressed, const auto &storage_2_uncompressed, - const auto &storage_2_compressed, const auto &storage_1_uncompressed, - const auto &storage_1_compressed); - - void writeNodeCountComparisonTSVs(const auto &storage_2_uncompressed, const auto &storage_2_compressed, - const auto &storage_1_uncompressed, const auto &storage_1_compressed); - - void loadIDsAndWriteOutStats(const std::string &csv_file_path); -} -int main(int argc, char *argv[]) { - using namespace fmt::literals; - if (argc != 2) { - std::cerr << "Please provide exactly one CSV file with triple IDS only and no headings." << std::endl; - exit(EXIT_FAILURE); - } - - std::string csv_file_path{argv[1]}; - if (not std::filesystem::is_regular_file(csv_file_path)) { - std::cerr << "{} is not a file."_format(csv_file_path) << std::endl; - exit(EXIT_FAILURE); - } - - tentris::IDs2Hypertrie::loadIDsAndWriteOutStats(csv_file_path); -} - -namespace tentris::IDs2Hypertrie { - using namespace tentris::store; - using namespace fmt::literals; - using namespace std::chrono; - - using key_part_type = size_t; - using tr = hypertrie::Hypertrie_t; - - - void loadIDsAndWriteOutStats(const std::string &csv_file_path) { - hypertrie::Hypertrie trie(3); - - csv::CSVFormat format; - format.delimiter('\t').quote(false); - - - csv::CSVReader tsv_reader(csv_file_path, format); - - // Iterate through each line and split the content using delimiter - unsigned long count = 0; - auto start = steady_clock::now(); - - try { - hypertrie::BulkInserter bulk_inserter{trie, 0}; - - for (csv::CSVRow &row: tsv_reader) { // Input iterator - row[0].get(); - bulk_inserter.add({row[0].get(), - row[1].get(), - row[2].get()}); - ++count; - - if (bulk_inserter.size() == 1'000'000) { - bulk_inserter.flush(); - std::cerr << "{:>10.3} mio triples processed.\n"_format(double(count) / 1'000'000); - std::cerr << "{:>10.3} mio triples loaded.\n"_format(double(trie.size()) / 1'000'000); - } - } - - bulk_inserter.flush(true); - - } catch (...) { - throw std::invalid_argument{"A parsing error occurred while parsing {}"_format(csv_file_path)}; - } - auto end = steady_clock::now(); - auto duration = end - start; - - constexpr static auto uncompressed = hypertrie::internal::raw::NodeCompression::uncompressed; - constexpr static auto compressed = hypertrie::internal::raw::NodeCompression::compressed; - - auto &storage = trie.context()->rawContext().storage; - - const auto &storage_3_uncompressed = storage.getNodeStorage<3UL, uncompressed>(); - const auto &storage_2_uncompressed = storage.getNodeStorage<2UL, uncompressed>(); - const auto &storage_2_compressed = storage.getNodeStorage<2UL, compressed>(); - const auto &storage_1_uncompressed = storage.getNodeStorage<1UL, uncompressed>(); - const auto &storage_1_compressed = storage.getNodeStorage<1UL, compressed>(); - - writeNodeStatsTSVs(storage_3_uncompressed, storage_2_uncompressed, storage_2_compressed, storage_1_uncompressed, - storage_1_compressed); - - writeNodeCountComparisonTSVs(storage_2_uncompressed, storage_2_compressed, storage_1_uncompressed, - storage_1_compressed); - - - std::cerr << "## total ## \n" - << "triples processed: {}\n"_format(count) - << "triples loaded: {}\n"_format(trie.size()) - << "hypertrie size estimation: {:d} kB\n"_format(tentris::logging::get_memory_usage()) - << "duration: {} h {} min {}.{:03d} s = {} ms\n"_format( - (std::chrono::duration_cast(duration)).count(), - (std::chrono::duration_cast(duration) % 60).count(), - (std::chrono::duration_cast(duration) % 60).count(), - (std::chrono::duration_cast(duration) % 1000).count(), - std::chrono::duration_cast(duration).count()) - << "# hypertrie stats #\n" - << "depth 3 uncompressed nodes: {}\n"_format(storage_3_uncompressed.size()) - << "depth 2 uncompressed nodes: {}\n"_format(storage.getNodeStorage<2UL, uncompressed>().size()) - << "depth 2 compressed nodes: {}\n"_format(storage.getNodeStorage<2UL, compressed>().size()) - << "depth 1 uncompressed nodes: {}\n"_format(storage.getNodeStorage<1UL, uncompressed>().size()) - << "depth 1 compressed nodes: {}\n"_format(storage.getNodeStorage<1UL, compressed>().size()); - } - - void writeNodeCountComparisonTSVs(const auto &storage_2_uncompressed, const auto &storage_2_compressed, - const auto &storage_1_uncompressed, const auto &storage_1_compressed) { - { - std::ofstream tsv_depth_2_comp("depth_2_node_count_comparision.tsv"); - auto csv_writer = csv::make_tsv_writer(tsv_depth_2_comp); - - csv_writer << std::make_tuple("hypertrie_type", "uncompressed_nodes", "compressed_nodes"); - - { // baseline - size_t uc_nodes = [&]() { - size_t old_uc = 0; - for (auto[hash, node] : storage_2_uncompressed) - old_uc += node->ref_count(); - for (auto[hash, node] : storage_2_compressed) - old_uc += node->ref_count(); - return old_uc; - }(); - - size_t c_nodes = 0; - - csv_writer << std::make_tuple("baseline", uc_nodes, c_nodes); - } - - { // compression - size_t uc_nodes = [&]() { - size_t old_uc = 0; - for (auto[hash, node] : storage_2_uncompressed) - old_uc += node->ref_count(); - return old_uc; - }(); - - size_t c_nodes = [&]() { - size_t c_nodes = 0; - for (auto[hash, node] : storage_2_compressed) - c_nodes += node->ref_count(); - return c_nodes; - }(); - - csv_writer << std::make_tuple("compression", uc_nodes, c_nodes); - } - - { // hash - size_t uc_nodes = storage_2_uncompressed.size() + storage_2_compressed.size(); - - size_t c_nodes = 0; - - csv_writer << std::make_tuple("hash", uc_nodes, c_nodes); - } - - { // hash+compression and hash+compression+inline - size_t uc_nodes = storage_2_uncompressed.size(); - - size_t c_nodes = storage_2_compressed.size(); - - csv_writer << std::make_tuple("hash+compression", uc_nodes, c_nodes); - csv_writer << std::make_tuple("hash+compression+inline", uc_nodes, c_nodes); - } - } - - { - std::ofstream tsv_depth_1_comp("depth_1_node_count_comparision.tsv"); - auto csv_writer = csv::make_tsv_writer(tsv_depth_1_comp); - - csv_writer << std::make_tuple("hypertrie_type", "uncompressed_nodes", "compressed_nodes"); - - { // baseline - size_t c_depth2_nodes = [&]() { - size_t depth2nodes = 0; - for (auto[hash, node] : storage_2_uncompressed) - depth2nodes += node->ref_count(); - return depth2nodes; - }(); - - size_t depth1_nodes = [&]() { - size_t uc_nodes = 0; - for (auto[hash, node] : storage_1_uncompressed) - uc_nodes += node->ref_count(); - for (auto[hash, node] : storage_1_compressed) - uc_nodes += node->ref_count(); - return uc_nodes; - }(); - - size_t uc_nodes = c_depth2_nodes + (depth1_nodes / 2); - - size_t c_nodes = 0; - - csv_writer << std::make_tuple("baseline", uc_nodes, c_nodes); - } - - { // compression - size_t uc_nodes = [&]() { - size_t uc_nodes = 0; - for (auto[hash, node] : storage_1_uncompressed) - uc_nodes += node->ref_count(); - return uc_nodes; - }() / 2; - - size_t c_nodes = [&]() { - size_t x = 0; - for (auto[hash, node] : storage_1_compressed) - x += node->ref_count(); - return x; - }(); - c_nodes = c_nodes / 2; - - csv_writer << std::make_tuple("compression", uc_nodes, c_nodes); - } - - { // hash - using TensorHash = hypertrie::internal::raw::TensorHash; - - size_t compressed_nodes_count = [&]() { - robin_hood::unordered_set c_d1_hashes; - - // add the hashes from depth 1 compressed nodes. - for (auto[hash, node] : storage_1_compressed) - c_d1_hashes.insert(hash); - - // break apart the depth 2 compressed nodes and a Hash for each of both key parts - for (auto[hash, node] : storage_2_compressed) { - c_d1_hashes.insert( - TensorHash::getCompressedNodeHash<1, key_part_type>( - {node->key()[0]}, true)); - c_d1_hashes.insert( - TensorHash::getCompressedNodeHash<1, key_part_type>( - {node->key()[1]}, true)); - } - - return c_d1_hashes.size(); - }(); - - size_t uc_nodes = compressed_nodes_count + storage_1_uncompressed.size(); - - size_t c_nodes = 0; - - csv_writer << std::make_tuple("hash", uc_nodes, c_nodes); - } - - { // hash+compression and hash+compression+inline - size_t uc_nodes = storage_1_uncompressed.size(); - - size_t c_nodes = storage_1_compressed.size(); - - csv_writer << std::make_tuple("hash+compression", uc_nodes, c_nodes); - csv_writer << std::make_tuple("hash+compression+inline", uc_nodes, 0); - } - - } - } - - void writeNodeStatsTSVs(const auto &storage_3_uncompressed, const auto &storage_2_uncompressed, - const auto &storage_2_compressed, const auto &storage_1_uncompressed, - const auto &storage_1_compressed) { - auto extactCompressionTag = [](const auto &hash) { return (hash.isCompressed()) ? "c" : "u"; }; - - { - std::ofstream tsv_depth_3("depth_3_nodes_stats.tsv"); // Can also use ofstream, etc. - auto csv_writer = csv::make_tsv_writer(tsv_depth_3); - - - csv_writer - << std::make_tuple("node_type", "node_size", "dimension_1_size", "dimension_2_size", - "dimension_3_size", - "reference_count"); - for (auto[hash, node] : storage_3_uncompressed) { - csv_writer << std::make_tuple(extactCompressionTag(hash), node->size(), node->edges(0).size(), - node->edges(1).size(), - node->edges(2).size(), - node->ref_count()); - } - } - - { - std::ofstream tsv_depth_2("depth_2_nodes_stats.tsv"); // Can also use ofstream, etc. - auto csv_writer = csv::make_tsv_writer(tsv_depth_2); - - csv_writer - << std::make_tuple("node_type", "node_size", "dimension_1_size", "dimension_2_size", - "reference_count"); - - for (auto[hash, node] : storage_2_compressed) { - csv_writer << std::make_tuple(extactCompressionTag(hash), node->size(), 1, 1, node->ref_count()); - } - - for (auto[hash, node] : storage_2_uncompressed) { - csv_writer - << std::make_tuple(extactCompressionTag(hash), node->size(), node->edges(0).size(), - node->edges(1).size(), node->ref_count()); - } - } - - { - std::ofstream tsv_depth_1("depth_1_nodes_stats.tsv"); // Can also use ofstream, etc. - auto csv_writer = csv::make_tsv_writer(tsv_depth_1); - - csv_writer << std::make_tuple("node_type", "node_size", "dimension_1_size", "reference_count"); - - for (auto[hash, node] : storage_1_compressed) { - csv_writer << std::make_tuple(extactCompressionTag(hash), node->size(), 1, node->ref_count()); - } - - for (auto[hash, node] : storage_1_uncompressed) { - csv_writer << std::make_tuple(extactCompressionTag(hash), node->size(), node->edges(0).size(), - node->ref_count()); - } - } - } - - -} \ No newline at end of file diff --git a/src/exec/tools/RDF2IDs.cpp b/src/exec/tools/RDF2IDs.cpp deleted file mode 100644 index 3a8867c2..00000000 --- a/src/exec/tools/RDF2IDs.cpp +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include -#include - -#include -#include - -int main(int argc, char *argv[]) { - using namespace tentris::store; - using namespace fmt::literals; - using namespace std::chrono; - using Triple = Dice::rdf::Triple; - - if (argc != 2) { - std::cerr << "Please provide exactly one triple file as commandline argument." << std::endl; - exit(EXIT_FAILURE); - } - - std::string rdf_file{argv[1]}; - if (not std::filesystem::is_regular_file(rdf_file)) { - std::cerr << "{} is not a file."_format(rdf_file) << std::endl; - exit(EXIT_FAILURE); - } - - fmt::print(stderr, "To store the result pipe stdout (not stderr!) to a file. Output format is TSV (tab-separated file, extension: .tsv).\n"); - - rdf::TermStore ts{}; - unsigned long count = 0; - - try { - auto start = steady_clock::now(); - fmt::print("S\tP\tO\n"); - for (const Triple &triple : rdf::SerdParser{rdf_file}) { - std::array id_triple{ - (uintptr_t) ts[triple.subject()], - (uintptr_t) ts[triple.predicate()], - (uintptr_t) ts[triple.object()]}; - fmt::print("{}\t{}\t{}\n", id_triple[0], id_triple[1], id_triple[2]); - ++count; - if (count % 1'000'000 == 0) { - fmt::print(stderr, "triples processed: {:>10.3}\n", double(count) / 1'000'000); - } - } - auto end = steady_clock::now(); - auto duration = end - start; - - fmt::print(stderr, "total triples processed: {}\n", count); - fmt::print(stderr, "duration: {} h {} min {}.{:03d} s = {} ms\n", - (std::chrono::duration_cast(duration)).count(), - (std::chrono::duration_cast(duration) % 60).count(), - (std::chrono::duration_cast(duration) % 60).count(), - (std::chrono::duration_cast(duration) % 1000).count(), - std::chrono::duration_cast(duration).count()); - } catch (...) { - throw std::invalid_argument{ - "A parsing error occurred while parsing {}. Error occurred at {}th triple."_format(rdf_file, count)}; - } -} diff --git a/src/lib/tentris/http/QueryResultState.hpp b/src/lib/tentris/http/QueryResultState.hpp deleted file mode 100644 index 92e30cc6..00000000 --- a/src/lib/tentris/http/QueryResultState.hpp +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef TENTRIS_QUERYRESULTSTATE_HPP -#define TENTRIS_QUERYRESULTSTATE_HPP - -namespace tentris::http { - enum ResultState { - OK, UNPARSABLE, UNKNOWN_REQUEST, PROCESSING_TIMEOUT, SERIALIZATION_TIMEOUT, UNEXPECTED, SEVERE_UNEXPECTED, - }; - - std::ostream &operator<<(std::ostream &os, ResultState state) { - switch (state) { - case OK : - return os << "OK"; - case UNPARSABLE : - return os << "UNPARSABLE"; - case UNKNOWN_REQUEST : - return os << "UNKNOWN_REQUEST"; - case PROCESSING_TIMEOUT : - return os << "PROCESSING_TIMEOUT"; - case SERIALIZATION_TIMEOUT : - return os << "SERIALIZATION_TIMEOUT"; - case UNEXPECTED : - return os << "UNEXPECTED"; - case SEVERE_UNEXPECTED : - return os << "SEVERE_UNEXPECTED"; - default: - return os << static_cast(state); - } - } -} - -#endif //TENTRIS_QUERYRESULTSTATE_HPP diff --git a/src/lib/tentris/http/SparqlEndpoint.hpp b/src/lib/tentris/http/SparqlEndpoint.hpp deleted file mode 100644 index f16d8b7c..00000000 --- a/src/lib/tentris/http/SparqlEndpoint.hpp +++ /dev/null @@ -1,252 +0,0 @@ -#ifndef TENTRIS_SPARQLENDPOINT_HPP -#define TENTRIS_SPARQLENDPOINT_HPP - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "tentris/http/QueryResultState.hpp" -#include "tentris/store/SPARQL/ParsedSPARQL.hpp" -#include "tentris/store/AtomicQueryExecutionPackageCache.hpp" -#include "tentris/store/SparqlJsonResultSAXWriter.hpp" -#include "tentris/store/AtomicTripleStore.hpp" -#include "tentris/util/LogHelper.hpp" - -namespace tentris::http { - - namespace sparql_endpoint { - - using AtomicTripleStoreConfig = ::tentris::store::config::AtomicTripleStoreConfig; - using AtomicQueryExecutionCache = ::tentris::store::AtomicQueryExecutionCache; - using QueryExecutionPackage = ::tentris::store::cache::QueryExecutionPackage; - using Status = ResultState; - - using namespace ::tentris::logging; - using namespace ::tentris::store; - using namespace ::tentris::store::sparql; - using namespace ::tentris::tensor; - - using namespace std::string_literals; - using namespace ::std::chrono; - - /** - * Main SPARQL endpoint. Parses HTTP queries and returns SPARQL JSON Results. - */ - - template requires std::is_same_v or - std::is_same_v - struct SparqlEndpoint { - private: - using Term = Dice::rdf::Term; - using BNode = Dice::rdf::BNode; - using Literal = Dice::rdf::Literal; - using URIRef = Dice::rdf::URIRef; - using Triple = Dice::rdf::Triple; - using TriplePattern = Dice::sparql::TriplePattern; - using Variable = Dice::sparql::Variable; - public: - - constexpr static bool chunked_output = std::is_same_v; - constexpr static size_t chunk_size = 100'000'000UL; - - auto operator()(restinio::request_handle_t req, - [[maybe_unused]] auto params) -> restinio::request_handling_status_t { - using namespace std::string_literals; - auto start_time = steady_clock::now(); - log("request started."); - auto start_memory = get_memory_usage(); - logDebug("ram: {:d} kB"_format(start_memory)); - auto timeout = start_time + AtomicTripleStoreConfig::getInstance().timeout; - restinio::request_handling_status_t handled = restinio::request_rejected(); - Status status = Status::OK; - std::string error_message{}; - std::shared_ptr query_package; - std::string query_string{}; - try { - const auto query_params = restinio::parse_query( - req->header().query()); - if (query_params.has("query")) { - query_string = std::string(query_params["query"]); - log("query: {}"_format(query_string)); - // check if there is actually an query - try { - query_package = AtomicQueryExecutionCache::getInstance()[query_string]; - } catch (const std::invalid_argument &exc) { - status = Status::UNPARSABLE; - error_message = exc.what(); - } - if (status == Status::OK) { - status = runQuery(req, query_package, timeout); - } - } else { - status = Status::UNPARSABLE; - } - } catch (const std::exception &exc) { - // if the execution of the query should fail return an internal server error - status = Status::UNEXPECTED; - error_message = exc.what(); - } - catch (...) { - // if the execution of the query should fail return an internal server error - status = Status::SEVERE_UNEXPECTED; - } - - switch (status) { - case OK: - handled = restinio::request_accepted(); - break; - case UNPARSABLE: - logError(" ## unparsable query\n" - " query_string: {}"_format(query_string) - ); - handled = req->create_response(restinio::http_status_line_t{restinio::status_code::bad_request, - "Could not parse the requested query."s}).done(); - break; - case UNKNOWN_REQUEST: - logError("unknown HTTP command. Only HTTP GET and POST are supported."); - handled = req->create_response(restinio::status_not_implemented()).done(); - break; - case PROCESSING_TIMEOUT: - logError("timeout during request processing"); - handled = req->create_response(restinio::status_request_time_out()).done(); - break; - case SERIALIZATION_TIMEOUT: - // no REQUEST TIMEOUT response can be sent here because we stream results directly to the client. - // Thus, the code was already written to the header. - logError("timeout during writing the result"); - handled = restinio::request_accepted(); - break; - case UNEXPECTED: - logError(" ## unexpected internal error, exception_message: {}"_format(error_message) - ); - handled = req->create_response( - restinio::status_internal_server_error()).connection_close().done(); - break; - case SEVERE_UNEXPECTED: - logError(" ## severe unexpected internal error, exception_message: {}"_format(error_message) - ); - handled = req->create_response( - restinio::status_internal_server_error()).connection_close().done(); - break; - } - if (handled == restinio::request_rejected()) - logError(fmt::format("Handling the request was rejected.")); - auto end_memory = get_memory_usage(); - logDebug("ram: {:d} kB"_format(end_memory)); - logDebug("ram diff: {:+3d} kB"_format(long(end_memory) - long(start_memory))); - logDebug("request duration: {}"_format(toDurationStr(start_time, steady_clock::now()))); - log("request ended."); - return handled; - }; - - Status - static runQuery(restinio::request_handle_t &req, std::shared_ptr &query_package, - const time_point_t timeout) { - - switch (query_package->getSelectModifier()) { - case SelectModifier::NONE: { - return runQuery < COUNTED_t > (req, query_package, timeout); - } - case SelectModifier::DISTINCT: { - return runQuery < DISTINCT_t > (req, query_package, timeout); - } - default: - break; - } - logTrace("Query type is not yet supported."); - return Status::UNPARSABLE; - }; - - template - static void async_cleanup(std::shared_ptr raw_results) { - std::thread([raw_results{move(raw_results)}]() { - auto &results = *static_cast *>(raw_results.get()); - results.clear(); - }).detach(); - } - - template - static Status - runQuery(restinio::request_handle_t &req, std::shared_ptr &query_package, - const time_point_t timeout) { - if (steady_clock::now() >= timeout) { - return Status::PROCESSING_TIMEOUT; - } - - - // check if it timed out - const std::vector &vars = query_package->getQueryVariables(); - - - - if (query_package->is_trivial_empty) { - // create HTTP response object - auto resp = req->create_response(); - resp.append_header(restinio::http_field::content_type, "application/sparql-results+json"); - - SparqlJsonResultSAXWriter json_result(vars, 1'000UL); - json_result.close(); - resp.set_body(std::string{json_result.string_view()}); - resp.done(); - return Status::OK; - } else { - // create HTTP response object - restinio::response_builder_t resp = req->create_response(); - resp.append_header(restinio::http_field::content_type, "application/sparql-results+json"); - - std::shared_ptr raw_results = query_package->getEinsum(timeout); - auto &results = *static_cast *>(raw_results.get()); - - SparqlJsonResultSAXWriter json_result(vars, chunk_size); - - auto timout_check = 0; - for (const EinsumEntry &result : results) { - json_result.add(result); - if (++timout_check == 100) { - if (steady_clock::now() >= timeout) { - async_cleanup(std::move(raw_results)); - return Status::PROCESSING_TIMEOUT; - } - timout_check = 0; - } - if constexpr(chunked_output) { - if (json_result.full()) { - resp.append_chunk(std::string{json_result.string_view()}); - resp.flush(); - json_result.clear(); - } - } - } - - if (steady_clock::now() >= timeout) { - async_cleanup(std::move(raw_results)); - return Status::PROCESSING_TIMEOUT; - } - - json_result.close(); - - if constexpr(chunked_output) { - resp.append_chunk(std::string{json_result.string_view()}); - } else { - resp.set_body(std::string{json_result.string_view()}); - } - resp.done(); - - async_cleanup(std::move(raw_results)); - return Status::OK; - } - } - }; - - - }; - - -} // namespace tentris::http -#endif // TENTRIS_SPARQLENDPOINT_HPP diff --git a/src/lib/tentris/store/AtomicQueryExecutionPackageCache.hpp b/src/lib/tentris/store/AtomicQueryExecutionPackageCache.hpp deleted file mode 100644 index 2d103118..00000000 --- a/src/lib/tentris/store/AtomicQueryExecutionPackageCache.hpp +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef TENTRIS_ATOMIC_QUERY_EXECUTION_CACHE_STORE -#define TENTRIS_ATOMIC_QUERY_EXECUTION_CACHE_STORE - - -#include "tentris/util/SingletonFactory.hpp" -#include "tentris/store/QueryExecutionPackageCache.hpp" -#include "tentris/store/config/AtomicTripleStoreConfig.cpp" - -namespace tentris::util::sync { - template<> - inline ::tentris::store::cache::QueryExecutionPackage_cache * - SingletonFactory<::tentris::store::cache::QueryExecutionPackage_cache>::make_instance() { - const auto &config = ::tentris::store::config::AtomicTripleStoreConfig::getInstance(); - return new ::tentris::store::cache::QueryExecutionPackage_cache{config.cache_size}; - } -}; - -namespace tentris::store { - - /** - * A SingletonFactory that allows to share a single TripleStore instance between multiple threads. - */ - using AtomicQueryExecutionCache = ::tentris::util::sync::SingletonFactory<::tentris::store::cache::QueryExecutionPackage_cache>; -}; -#endif //TENTRIS_ATOMIC_QUERY_EXECUTION_CACHE_STORE diff --git a/src/lib/tentris/store/AtomicTripleStore.hpp b/src/lib/tentris/store/AtomicTripleStore.hpp deleted file mode 100644 index 2d4b69ca..00000000 --- a/src/lib/tentris/store/AtomicTripleStore.hpp +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef TENTRIS_ATOMIC_TRIPLE_STORE -#define TENTRIS_ATOMIC_TRIPLE_STORE - - -#include "tentris/util/SingletonFactory.hpp" -#include "tentris/store/TripleStore.hpp" -#include "tentris/store/config/AtomicTripleStoreConfig.cpp" - - -namespace tentris::util::sync { - template<> - inline ::tentris::store::TripleStore *SingletonFactory<::tentris::store::TripleStore>::make_instance() { - return new ::tentris::store::TripleStore{}; - } -}; - -namespace tentris::store { - - /** - * A SingletonFactory that allows to share a single TripleStore instance between multiple threads. - */ - using AtomicTripleStore = ::tentris::util::sync::SingletonFactory; -}; -#endif //TENTRIS_ATOMIC_TRIPLE_STORE diff --git a/src/lib/tentris/store/QueryExecutionPackage.hpp b/src/lib/tentris/store/QueryExecutionPackage.hpp deleted file mode 100644 index cfbc2e93..00000000 --- a/src/lib/tentris/store/QueryExecutionPackage.hpp +++ /dev/null @@ -1,163 +0,0 @@ -#ifndef TENTRIS_QUERYEXECUTIONPACKAGE_HPP -#define TENTRIS_QUERYEXECUTIONPACKAGE_HPP - -#include -#include -#include - -#include "tentris/store/RDF/TermStore.hpp" -#include "tentris/store/AtomicTripleStore.hpp" -#include "tentris/store/SPARQL/ParsedSPARQL.hpp" -#include "tentris/tensor/BoolHypertrie.hpp" - -namespace tentris::store { - class TripleStore; -}; - -namespace tentris::store::cache { - - /** - * A QueryExecutionPackage contains everything that is necessary to execute a given sparql query for a state of the - * RDF graph. - */ - struct QueryExecutionPackage { - using const_BoolHypertrie = ::tentris::tensor::const_BoolHypertrie; - using time_point_t = logging::time_point_t; - using SelectModifier = sparql::SelectModifier; - using Variable = Dice::sparql::Variable; - using ParsedSPARQL = sparql::ParsedSPARQL; - using Subscript = ::tentris::tensor::Subscript; - - private: - std::string sparql_string; - std::shared_ptr subscript; - SelectModifier select_modifier; - std::vector query_variables; - - public: - /** - * Indicates if the QueryExecutionPackage represents an distinct query or not. If it is distinct use only - * the methods with distinct in their names. Otherwise use only the methods with regular in their names - */ - - bool is_trivial_empty = false; - - private: - - std::vector operands{}; - - public: - QueryExecutionPackage() = delete; - - /** - * - * @param sparql_string sparql query to be parsed - * @param trie current try holding the data - * @param termIndex term store attached to the trie - * @throw std::invalid_argument the sparql query was not parsable - */ - explicit QueryExecutionPackage(const std::string &sparql_string) : sparql_string{sparql_string} { - using namespace logging; - logDebug(fmt::format("Parsing query: {}", sparql_string)); - ParsedSPARQL parsed_sparql{sparql_string}; - subscript = parsed_sparql.getSubscript(); - select_modifier = parsed_sparql.getSelectModifier(); - logDebug(fmt::format("Parsed subscript: {} [distinct = {}]", - subscript, - select_modifier == SelectModifier::DISTINCT)); - query_variables = parsed_sparql.getQueryVariables(); - - auto &triple_store = AtomicTripleStore::getInstance(); - - logDebug(fmt::format("Slicing TPs")); - for ([[maybe_unused]] const auto &[op_pos, tp]: iter::enumerate(parsed_sparql.getBgps())) { - logDebug(fmt::format("Slice key {}: ⟨{}⟩", op_pos, fmt::join(tp, ", "))); - std::variant op = triple_store.resolveTriplePattern(tp); - if (std::holds_alternative(op)) { - is_trivial_empty = not std::get(op); - logTrace(fmt::format("Operand {} is {}", op_pos, is_trivial_empty)); - } else { - auto bht = std::get(op); - if (not bht.empty()) { - logTrace(fmt::format("Operand {} size {}", op_pos, bht.size())); - operands.emplace_back(bht); - } else { - is_trivial_empty = true; - operands.clear(); - } - } - if (is_trivial_empty) { - logDebug(fmt::format("Query is trivially empty, i.e. the lastly sliced operand {} is emtpy.", op_pos)); - break; - } - } - } - - private: - /** - * Builds the operator tree for this query. - * @tparam RESULT_TYPE the type returned by the operand tree - * @param slice_keys slice keys to extract the operands from the hypertries. slice_keys and hypertries must be - * of equal length. - * @param subscript the subscript that spans the operator tree. - * @param hypertries a list of hypertries. typically this is a list containing the data base hypertrie multiple - * times. - * @return - */ - template - static std::shared_ptr generateEinsum(const std::shared_ptr &subscript, - const std::vector &hypertries, - const time_point_t &timeout) { - using namespace tensor; - return std::make_shared>(subscript, hypertries, timeout); - } - - public: - std::shared_ptr getEinsum(const time_point_t &timeout = time_point_t::max()) const { - using namespace tensor; - if (select_modifier == SelectModifier::NONE) - return generateEinsum(subscript, operands, timeout); - else - return generateEinsum(subscript, operands, timeout); - } - - const std::string &getSparqlStr() const { - return sparql_string; - } - - const std::shared_ptr &getSubscript() const { - return subscript; - } - - SelectModifier getSelectModifier() const { - return select_modifier; - } - - const std::vector &getQueryVariables() const { - return query_variables; - } - - friend struct ::fmt::formatter; - }; -} // namespace tentris::store::cache - -template<> -struct fmt::formatter { - template - constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } - - template - auto format(const tentris::store::cache::QueryExecutionPackage &p, FormatContext &ctx) { - using SelectModifier = tentris::store::sparql::SelectModifier; - return format_to(ctx.begin(), - " SPARQL: {}\n" - " subscript: {}\n" - " is_distinct: {}\n" - " is_trivial_empty: {}\n", - p.sparql_string, p.subscript, p.select_modifier == SelectModifier::DISTINCT, - p.is_trivial_empty); - } -}; - -#endif // TENTRIS_QUERYEXECUTIONPACKAGE_HPP - diff --git a/src/lib/tentris/store/QueryExecutionPackageCache.hpp b/src/lib/tentris/store/QueryExecutionPackageCache.hpp deleted file mode 100644 index 8a1eca65..00000000 --- a/src/lib/tentris/store/QueryExecutionPackageCache.hpp +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef TENTRIS_PARSEDSPARQLCACHES_HPP - -#define TENTRIS_PARSEDSPARQLCACHES_HPP - -#include "tentris/store/QueryExecutionPackage.hpp" -#include "tentris/util/SyncedLRUCache.hpp" -#include - -namespace tentris::store::cache { - using QueryExecutionPackage_cache = util::sync::SyncedLRUCache; - -} // namespace tentris::store::cache - -#endif // TENTRIS_PARSEDSPARQLCACHES_HPP diff --git a/src/lib/tentris/store/RDF/SerdParser.hpp b/src/lib/tentris/store/RDF/SerdParser.hpp deleted file mode 100644 index 2aa49e6d..00000000 --- a/src/lib/tentris/store/RDF/SerdParser.hpp +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef TENTRIS_SERDPARSER_H -#define TENTRIS_SERDPARSER_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace tentris::store::rdf { - - class BulkLoad { - using Triple = Dice::rdf::Triple; - using Term = Dice::rdf::Term; - using BNode = Dice::rdf::BNode; - using Literal = Dice::rdf::Literal; - using URIRef = Dice::rdf::URIRef; - using prefixes_map_type = tsl::hopscotch_map>; - prefixes_map_type prefixes{}; - - public: - boost::lockfree::spsc_queue result_queue{100000}; - bool parsing_done = false; - - public: - static std::shared_ptr parse(const std::string &file_path) { - - auto bulk_load = std::make_shared(); - - SerdReader *sr = serd_reader_new(SERD_TURTLE, (void *) bulk_load.get(), nullptr, &serd_base_callback, - &serd_prefix_callback, - &serd_callback, - nullptr); - - std::thread t([=]() { - [[maybe_unused]] SerdStatus status = serd_reader_read_file(sr, (uint8_t *) (file_path.data())); - bulk_load->parsing_done = true; - serd_reader_free(sr); - }); - t.detach(); - - return bulk_load; - - } - - - private: - - static auto getBNode(const SerdNode *node) -> Term { - return BNode(std::string(std::string_view{(char *) (node->buf), size_t(node->n_bytes)})); - } - - static auto getURI(const SerdNode *node) -> Term { - return URIRef(std::string(std::string_view{(char *) (node->buf), size_t(node->n_bytes)})); - } - - auto getPrefixedUri(const SerdNode *node) const -> Term { - std::string_view uri_node_view{(char *) (node->buf), size_t(node->n_bytes)}; - - std::vector prefix_and_suffix{}; - boost::split(prefix_and_suffix, uri_node_view, [](char c) { return c == ':'; }); - - assert(prefix_and_suffix.size() == 2); - assert(prefixes.count(std::string{prefix_and_suffix[0]})); - return URIRef(fmt::format("{}{}", prefixes.find(prefix_and_suffix[0])->second, prefix_and_suffix[1])); - } - - static auto getLiteral(const SerdNode *literal, const SerdNode *type_node, const SerdNode *lang_node) -> Term { - std::string literal_value = std::string{(char *) (literal->buf), size_t(literal->n_bytes)}; - if (type_node != nullptr) - return Literal(literal_value, std::nullopt, - std::string{(char *) (type_node->buf), size_t(type_node->n_bytes)}); - else if (lang_node != nullptr) - return Literal(literal_value, std::string{(char *) (lang_node->buf), size_t(lang_node->n_bytes)}, - std::nullopt); - else - return Literal(literal_value, std::nullopt, std::nullopt); - }; - - static auto serd_base_callback(void *handle, const SerdNode *uri) -> SerdStatus { - auto &load = *((BulkLoad *) handle); - load.prefixes[""] = std::string((char *) (uri->buf), uri->n_bytes); - return SERD_SUCCESS; - } - - static auto serd_prefix_callback(void *handle, const SerdNode *name, const SerdNode *uri) -> SerdStatus { - auto &load = *((BulkLoad *) handle); - load.prefixes[std::string((char *) (name->buf), name->n_bytes)] - = std::string((char *) (uri->buf), uri->n_bytes); - return SERD_SUCCESS; - } - - static auto - serd_callback(void *handle, [[maybe_unused]] SerdStatementFlags flags, [[maybe_unused]] const SerdNode *graph, - const SerdNode *subject, - const SerdNode *predicate, const SerdNode *object, const SerdNode *object_datatype, - const SerdNode *object_lang) -> SerdStatus { - auto &bulk_load = *((BulkLoad *) handle); - Term subject_term; - Term predicate_term; - Term object_term; - - switch (subject->type) { - case SERD_CURIE: - subject_term = bulk_load.getPrefixedUri(subject); - break; - case SERD_URI: - subject_term = getURI(subject); - break; - case SERD_BLANK: { - subject_term = getBNode(subject); - } - break; - default: - return SERD_ERR_BAD_SYNTAX; - } - - switch (predicate->type) { - case SERD_CURIE: - predicate_term = bulk_load.getPrefixedUri(predicate); - break; - case SERD_URI: - predicate_term = getURI(predicate); - break; - default: - return SERD_ERR_BAD_SYNTAX; - } - - switch (object->type) { - case SERD_CURIE: - object_term = bulk_load.getPrefixedUri(object); - break; - case SERD_LITERAL: - object_term = getLiteral(object, object_datatype, object_lang); - break; - case SERD_BLANK: - object_term = getBNode(object); - break; - case SERD_URI: - object_term = getURI(object); - break; - default: - return SERD_ERR_BAD_SYNTAX; - } - while (bulk_load.result_queue.write_available() == 0) { - using namespace std::this_thread; // sleep_for, sleep_until - using namespace std::chrono; // nanoseconds, steady_clock, seconds - sleep_for(milliseconds(5)); - } - bulk_load.result_queue.push({std::move(subject_term), std::move(predicate_term), std::move(object_term)}); - return SERD_SUCCESS; - } - }; - - class SerdParser { - using Triple = Dice::rdf::Triple; - using Term = Dice::rdf::Term; - using BNode = Dice::rdf::BNode; - using Literal = Dice::rdf::Literal; - using URIRef = Dice::rdf::URIRef; - - std::string file_name_; - - public: - explicit SerdParser(std::string file_name) : file_name_(std::move(file_name)) {} - - - public: - class Iterator { - - bool done_; - std::shared_ptr bulk_load; - Triple result; - - public: - explicit Iterator(const std::string &file_name) : done_(false), bulk_load(BulkLoad::parse(file_name)) { - while (not bulk_load->result_queue.pop(result)) { - if (bulk_load->parsing_done) { - if (not bulk_load->result_queue.pop(result)) { - done_ = true; - return; - } - } - } - }; - - void operator++() { - while (not bulk_load->result_queue.pop(result)) { - if (bulk_load->parsing_done) { - if (not bulk_load->result_queue.pop(result)) { - done_ = true; - } - return; - } - } - } - - void operator++(int) { operator++(); } - - operator bool() const { return not done_; } - - const Triple &operator*() { return result; } - }; - - - Iterator begin() { return Iterator(file_name_); } - - bool end() { return false; } - - }; -} - -#endif //TENTRIS_SERDPARSER_H diff --git a/src/lib/tentris/store/RDF/TermStore.hpp b/src/lib/tentris/store/RDF/TermStore.hpp deleted file mode 100644 index 5d6f6519..00000000 --- a/src/lib/tentris/store/RDF/TermStore.hpp +++ /dev/null @@ -1,136 +0,0 @@ -#ifndef TENTRIS_STORE_RDFTERMINDEX -#define TENTRIS_STORE_RDFTERMINDEX - -#include -#include -#include - -#include - -#include - -namespace tentris::store::rdf { - - - /** - * A hash for Terms that returns for Term* the hash hash(Term) instead of hash(Term*). - */ - struct TermHash { - private: - using Term = Dice::rdf::Term; - public: - size_t operator()(Term const &term) const { - return ::Dice::hash::dice_hash(term); - } - - size_t operator()(std::unique_ptr const &term_ptr) const { - return ::Dice::hash::dice_hash(*term_ptr); - } - - size_t operator()(Term const *const term_ptr) const { - return ::Dice::hash::dice_hash(*term_ptr); - } - }; - - - class TermStore { - using Term = Dice::rdf::Term; - using BNode = Dice::rdf::BNode; - using Literal = Dice::rdf::Literal; - using URIRef = Dice::rdf::URIRef; - public: - using set_type = tsl::sparse_set, - TermHash, - std::equal_to<>, - std::allocator>, - tsl::sh::power_of_two_growth_policy<2>, - tsl::sh::exception_safety::basic>; - using const_iterator = set_type::const_iterator; - - private: - - set_type terms{}; - public: - using ptr_type = Term const *; - - [[nodiscard]] bool contains(const Term &term) const { - auto term_hash = TermHash()(term); - return contains(term, term_hash); - } - - [[nodiscard]] bool contains(const Term &term, const std::size_t &term_hash) const { - auto found = terms.find(term, term_hash); - return found != terms.end(); - } - - [[nodiscard]] bool valid(ptr_type term_ptr) const { - auto term_hash = TermHash()(term_ptr); - auto found = terms.find(*term_ptr, term_hash); - return found != terms.end(); - } - - [[nodiscard]] ptr_type get(const Term &term) const { - auto term_hash = TermHash()(term); - return get(term, term_hash); - } - - [[nodiscard]] ptr_type get(const Term &term, const std::size_t &term_hash) const { - auto found = terms.find(term, term_hash); - if (found != terms.end()) - return (*found).get(); - else { - throw std::out_of_range{"Term {} not in TermStore."}; - } - } - - [[nodiscard]] ptr_type find(const Term &term, const std::size_t &term_hash) const { - if (auto found = terms.find(term, term_hash); found != terms.end()) { - return (*found).get(); - } else { - return nullptr; - } - } - - [[nodiscard]] ptr_type find(const Term &term) const { - auto term_hash = TermHash()(term); - return find(term, term_hash); - } - - ptr_type operator[](const Term &term) { - auto term_hash = TermHash()(term); - auto found = terms.find(term, term_hash); - if (found != terms.end()) - return (*found).get(); - else { - const auto &[iter, success] = terms.emplace(std::make_unique(term)); - assert(success); - return (*iter).get(); - } - } - - friend class fmt::formatter; - - [[nodiscard]] std::size_t size() const { - return terms.size(); - } - - }; -}; - -template<> -struct fmt::formatter { - template - constexpr auto parse(ParseContext &ctx) { return ctx.begin(); } - - template - auto format(const tentris::store::rdf::TermStore &p, FormatContext &ctx) { - return format_to(ctx.begin(), - " Entries:\n" - " {}\n", - join(p.terms.begin(), p.terms.end(), "\n ")); - } -}; - - -#endif //TENTRIS_STORE_RDFTERMINDEX - diff --git a/src/lib/tentris/store/SPARQL/ParsedSPARQL.hpp b/src/lib/tentris/store/SPARQL/ParsedSPARQL.hpp deleted file mode 100644 index 01be32d7..00000000 --- a/src/lib/tentris/store/SPARQL/ParsedSPARQL.hpp +++ /dev/null @@ -1,434 +0,0 @@ -#ifndef TENTRIS_SPARQLPARSER_HPP -#define TENTRIS_SPARQLPARSER_HPP - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include - -#include -#include - -#include -#include -#include -#include - - -namespace tentris::store::sparql { - - namespace { - using Subscript = einsum::internal::Subscript; - namespace parser = Dice::sparql_parser::base; - using SparqlParser = parser::SparqlParser; - using namespace fmt::literals; - } - - - enum SelectModifier { - NONE, - DISTINCT, - REDUCE - }; - - class LexerErrorListener : public antlr4::BaseErrorListener { - using Variable = Dice::sparql::Variable; - public: - LexerErrorListener() = default; - - void - syntaxError([[maybe_unused]]antlr4::Recognizer *recognizer, [[maybe_unused]]antlr4::Token *offendingSymbol, - [[maybe_unused]]size_t line, [[maybe_unused]]size_t charPositionInLine, const std::string &msg, - [[maybe_unused]]std::exception_ptr e) override { - throw std::invalid_argument{msg}; - } - }; - - class ParserErrorListener : public antlr4::BaseErrorListener { - public: - ParserErrorListener() = default; - - void - syntaxError([[maybe_unused]]antlr4::Recognizer *recognizer, [[maybe_unused]]antlr4::Token *offendingSymbol, - [[maybe_unused]]size_t line, [[maybe_unused]]size_t charPositionInLine, const std::string &msg, - [[maybe_unused]]std::exception_ptr e) override { - throw std::invalid_argument{msg}; - } - }; - - - class ParsedSPARQL { - using Term = Dice::rdf::Term; - using BNode = Dice::rdf::BNode; - using Literal = Dice::rdf::Literal; - using URIRef = Dice::rdf::URIRef; - using TriplePattern = Dice::sparql::TriplePattern; - using VarOrTerm = Dice::sparql::VarOrTerm; - using Variable = Dice::sparql::Variable; - - using SparqlLexer = parser::SparqlLexer; - using ANTLRInputStream =antlr4::ANTLRInputStream; - using CommonTokenStream = antlr4::CommonTokenStream; - using QueryContext = SparqlParser::QueryContext; - std::string sparql_str; - - SelectModifier select_modifier = NONE; - - robin_hood::unordered_map prefixes{}; - std::vector query_variables{}; - robin_hood::unordered_set variables{}; - robin_hood::unordered_set anonym_variables{}; - std::vector bgps; - uint next_anon_var_id = 0; - std::shared_ptr subscript; - - public: - - ParsedSPARQL() = default; - - - explicit ParsedSPARQL(std::string sparqlstr) : - sparql_str{std::move(sparqlstr)} { - namespace ranges = std::ranges; - - std::istringstream str_stream{sparql_str}; - ANTLRInputStream input{str_stream}; - SparqlLexer lexer{&input}; - CommonTokenStream tokens{&lexer}; - SparqlParser parser{&tokens}; - // replace the error handler - auto lexerErrorListener = LexerErrorListener{}; - lexer.removeErrorListeners(); - lexer.addErrorListener(&lexerErrorListener); - - auto parserErrorListener = ParserErrorListener{}; - parser.removeParseListeners(); - parser.removeErrorListeners(); - parser.addErrorListener(&parserErrorListener); - // check that _query is present - QueryContext *_query = parser.query(); - if (_query == nullptr) - throw std::invalid_argument("The query was not parsable"); - else { - const std::vector &prefixDecl = _query->prologue()->prefixDecl(); - for (auto &prefix : prefixDecl) - // remove < and > from <...> - prefixes[prefix->PNAME_NS()->getText()] = std::string(prefix->IRI_REF()->getText(), 1, - prefix->IRI_REF()->getText().size() - 2); - - - SparqlParser::SelectQueryContext *select = _query->selectQuery(); - select_modifier = getSelectModifier(select); - bool all_vars = false; - if (std::vector vars = select->var(); not vars.empty()) - for (auto &var : vars) - query_variables.push_back(extractVariable(var)); - else - all_vars = true; - - std::queue tripleBlocks; - for (auto &block : select->whereClause()->groupGraphPattern()->triplesBlock()) - tripleBlocks.push(block); - while (not tripleBlocks.empty()) { - auto block = tripleBlocks.front(); - tripleBlocks.pop(); - SparqlParser::TriplesSameSubjectContext *triplesSameSubject = block->triplesSameSubject(); - - VarOrTerm subj = parseVarOrTerm(triplesSameSubject->varOrTerm()); - registerVariable(subj); - SparqlParser::PropertyListNotEmptyContext *propertyListNotEmpty = triplesSameSubject->propertyListNotEmpty(); - for (auto[pred_node, obj_nodes] : iter::zip(propertyListNotEmpty->verb(), - propertyListNotEmpty->objectList())) { - VarOrTerm pred = parseVerb(pred_node); - registerVariable(pred); - - for (auto &obj_node : obj_nodes->object()) { - VarOrTerm obj = parseObject(obj_node); - registerVariable(obj); - if(ranges::find(bgps, TriplePattern{subj, pred, obj}) == bgps.end()) - bgps.push_back(TriplePattern{subj, pred, obj}); - } - } - if (auto *next_block = block->triplesBlock(); next_block) - tripleBlocks.push(next_block); - } - for (const auto &variable : query_variables) - variables.insert(variable); - if (all_vars) - for (const auto &variable : variables) - if (not anonym_variables.contains(variable)) - query_variables.push_back(variable); - - - - using Label = Subscript::Label; - // generate subscript - robin_hood::unordered_map var_to_label{}; - Label next_label = 'a'; - for (const auto &var : variables) { - var_to_label[var] = next_label++; - } - std::vector> ops_labels{}; - for (const auto &bgp : bgps) { - std::vector