Skip to content

Commit b1b8234

Browse files
committed
add distrdf2ml paper
1 parent 2f031d4 commit b1b8234

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

aksw.bib

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12395,4 +12395,22 @@ @Article{stadler2022-lsq20
1239512395
url = {https://www.semantic-web-journal.net/system/files/swj3015.pdf},
1239612396
}
1239712397

12398+
@InProceedings{Draschner2021,
12399+
author = {Draschner, Carsten Felix and Stadler, Claus and Bakhshandegan Moghaddam, Farshad and Lehmann, Jens and Jabeen, Hajira},
12400+
booktitle = {Proceedings of the 30th ACM International Conference on Information \& Knowledge Management},
12401+
title = {{DistRDF2ML} - Scalable Distributed In-Memory Machine Learning Pipelines for {RDF} Knowledge Graphs},
12402+
year = {2021},
12403+
address = {New York, NY, USA},
12404+
pages = {4465–4474},
12405+
publisher = {Association for Computing Machinery},
12406+
series = {CIKM '21},
12407+
abstract = {This paper presents DistRDF2ML, the generic, scalable, and distributed framework for creating in-memory data preprocessing pipelines for Spark-based machine learning on RDF knowledge graphs. This framework introduces software modules that transform large-scale RDF data into ML-ready fixed-length numeric feature vectors. The developed modules are optimized to the multi-modal nature of knowledge graphs. DistRDF2ML provides aligned software design and usage principles as common data science stacks that offer an easy-to-use package for creating machine learning pipelines. The modules used in the pipeline, the hyper-parameters and the results are exported as a semantic structure that can be used to enrich the original knowledge graph. The semantic representation of metadata and machine learning results offers the advantage of increasing the machine learning pipelines' reusability, explainability, and reproducibility. The entire framework of DistRDF2ML is open source, integrated into the holistic SANSA stack, documented in scala-docs, and covered by unit tests. DistRDF2ML demonstrates its scalable design across different processing power configurations and (hyper-)parameter setups within various experiments. The framework brings the three worlds of knowledge graph engineers, distributed computation developers, and data scientists closer together and offers all of them the creation of explainable ML pipelines using a few lines of code.},
12408+
doi = {10.1145/3459637.3481999},
12409+
isbn = {9781450384469},
12410+
keywords = {group_aksw sys:relevantFor:infai stadler lehmann jabeen},
12411+
location = {Virtual Event, Queensland, Australia},
12412+
numpages = {10},
12413+
url = {https://svn.aksw.org/papers/2021/cikm-distrdf2ml/public.pdf},
12414+
}
12415+
1239812416
@Comment{jabref-meta: databaseType:bibtex;}

0 commit comments

Comments
 (0)