Skip to content

Commit

Permalink
Update documentation and sparql queries.
Browse files Browse the repository at this point in the history
  • Loading branch information
Sheeba-Samuel committed Apr 23, 2024
1 parent 1903839 commit b2728d4
Show file tree
Hide file tree
Showing 23 changed files with 326 additions and 4 deletions.
19 changes: 15 additions & 4 deletions docs/content/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,30 @@ analysis through queries that can be tailored to specific use cases.
## Resources

### SPARQL Endpoint
The SPARQL Endpoint can be queried [here](https://reproduceme.uni-jena.de/#/dataset/fairjupyter/query)

### SPARQL Queries
Some SPARQL queries that can be queried over FAIR Jupyter KG can be accessed [here](https://github.com/fusion-jena/fairjupyter/tree/main/sparql_query).

These include SPARQL queries to the knowledge graph that reproduce materials from the
original manuscript describing the dataset, other queries over the FAIR Jupyter graph, and federated queries between the FAIR Jupyter KG and [Wikidata](https://query.wikidata.org/).

### Original Dataset
Data: Sheeba Samuel, & Daniel Mietchen. (2023). Dataset of a Study of Computational reproducibility of Jupyter notebooks from biomedical publications [Data set]. Zenodo. [https://doi.org/10.5281/zenodo.8226725](https://doi.org/10.5281/zenodo.8226725)

Code: [https://github.com/fusion-jena/computational-reproducibility-pmc](https://github.com/fusion-jena/computational-reproducibility-pmc)

### Documentation

### FAIR Jupyter KG N-Triples Dump

### FAIR Jupyter KG Schema
The ontologies used for constructing Knowledge Graphs are:
* [The REPRODUCE-ME ontology](https://w3id.org/reproduceme/)
* [The PROV-O ontology](https://www.w3.org/TR/prov-o/)
* [The P-Plan ontology](http://purl.org/net/p-plan#)
* [The PAV ontology](http://purl.org/pav/)
* [The FaBiO ontology](http://purl.org/spar/fabio)
* [The DOAP ontology](http://usefulinc.com/ns/doap#)

### FAIR Jupyter KG Data Used (CSV)
The data used for constructing FAIR Jupyter Knowledge Graph is available [here](https://github.com/fusion-jena/fairjupyter/tree/main/data)


## Publication
Expand Down
11 changes: 11 additions & 0 deletions sparql_query/q1.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SELECT ?research_field (COUNT(DISTINCT ?article) AS ?number_of_articles)
WHERE {
?repository <http://purl.org/pav/retrievedFrom> ?article .
?article <http://www.w3.org/ns/prov-o#specializationOf> ?mesh .
?mesh <http://www.w3.org/ns/prov-o#generalizationOf> ?top_mesh .
?top_mesh <http://www.w3.org/2000/01/rdf-schema#label> ?research_field

}
GROUP BY ?research_field
ORDER BY DESC(?number_of_articles)
LIMIT 10
8 changes: 8 additions & 0 deletions sparql_query/q10.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
SELECT ?exception (COUNT(?exception) AS ?count)
WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> ;
<https://w3id.org/reproduceme/exception> ?exception .
}
GROUP BY ?exception
ORDER BY DESC(?count)
LIMIT 10
18 changes: 18 additions & 0 deletions sparql_query/q11.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT ?research_field (COUNT(?exception) AS ?exception_count)
WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> ;
<https://w3id.org/reproduceme/exception> ?exception ;
<http://purl.org/pav/retrievedFrom> ?repository .
?repository a <http://usefulinc.com/ns/doap#GitRepository> ;
<http://purl.org/pav/retrievedFrom> ?article ;
<https://w3id.org/reproduceme/notebooks_count> ?notebooks_count .
?article a <http://purl.org/spar/fabio/Article> ;
<http://www.w3.org/ns/prov-o#specializationOf> ?mesh .
?mesh <http://www.w3.org/ns/prov-o#generalizationOf> ?top_mesh .
?top_mesh <http://www.w3.org/2000/01/rdf-schema#label> ?research_field .
FILTER (xsd:integer(?notebooks_count)>0)
}
GROUP BY ?research_field
ORDER BY DESC(?exception_count)
LIMIT 10
16 changes: 16 additions & 0 deletions sparql_query/q12.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT (COUNT(?processed_different_result) AS ?count_different_result) (COUNT(?processed_same_result) AS ?count_same_result) (?count_same_result + ?count_different_result AS ?count_successful_executions)
WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> .
OPTIONAL { ?execution <https://w3id.org/reproduceme/exception> ?exception . }
OPTIONAL {
?execution <https://w3id.org/reproduceme/processed> ?processed_different_result .
FILTER ((xsd:integer(?processed_different_result) = 35) && !bound(?exception))
}
OPTIONAL {
?execution <https://w3id.org/reproduceme/processed> ?processed_same_result .
FILTER ((xsd:integer(?processed_same_result) = 51) && !bound(?exception))
}


}
7 changes: 7 additions & 0 deletions sparql_query/q13.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
SELECT ?notebook ?error ?description
WHERE {
?error a <https://w3id.org/reproduceme/NotebookCodeStyleError> ;
<https://w3id.org/reproduceme/description> ?description ;
<http://purl.org/pav/retrievedFrom> ?notebook .
}
LIMIT 10
15 changes: 15 additions & 0 deletions sparql_query/q14.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
SELECT DISTINCT ?notebook_url ?article_label ?keywords WHERE {
?article <https://w3id.org/reproduceme/keywords> ?keywords .
?article <http://www.w3.org/2000/01/rdf-schema#label> ?article_label .
?article <https://w3id.org/reproduceme/publishedIn> ?journal .
?journal <http://www.w3.org/2000/01/rdf-schema#label> ?journal_label .
FILTER (REGEX(LCASE(CONCAT(?keywords, " ", ?article_label, " ", ?journal_label)), "immun"))
FILTER (REGEX(LCASE(CONCAT(?keywords, " ", ?article_label, " ", ?journal_label)), "\\b(stem|differentiation)"))
?article ^<http://purl.org/pav/retrievedFrom> ?repository .
?notebook <http://purl.org/pav/retrievedFrom> ?repository .
?notebook <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://w3id.org/reproduceme/Notebook> .
?notebook <http://www.w3.org/2000/01/rdf-schema#label> ?notebook_label . # filename
?repository <https://w3id.org/reproduceme/url> ?repo_url_base . # find repo on GitHub
BIND(URI(CONCAT( ?repo_url_base, "/blob/master/", ?notebook_label)) AS ?notebook_url) # create clickable link to notebook on GitHub
FILTER (?notebook_url != "")
}
4 changes: 4 additions & 0 deletions sparql_query/q15.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
SELECT DISTINCT ?article ?keywords WHERE {
?article <https://w3id.org/reproduceme/keywords> ?keywords .
FILTER (REGEX(LCASE(?keywords), "open(.)source"))
}
11 changes: 11 additions & 0 deletions sparql_query/q16.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
SELECT DISTINCT ?exception (COUNT(?exception) AS ?count) WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> ;
<https://w3id.org/reproduceme/exception> ?exception ;
<http://purl.org/pav/retrievedFrom> ?repository .
?repository a <http://usefulinc.com/ns/doap#GitRepository> ;
<http://purl.org/pav/retrievedFrom> ?article .
?article <https://w3id.org/reproduceme/keywords> ?keywords .
FILTER (REGEX(LCASE(?keywords), "immun"))
}
GROUP BY ?exception
ORDER BY DESC(?count)
13 changes: 13 additions & 0 deletions sparql_query/q17.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?exception (COUNT(?exception) AS ?count) WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> ;
<https://w3id.org/reproduceme/exception> ?exception ;
<http://purl.org/pav/retrievedFrom> ?repository .
?repository a <http://usefulinc.com/ns/doap#GitRepository> ;
<http://purl.org/pav/retrievedFrom> ?article .
?article <https://w3id.org/reproduceme/publishedIn> ?journal .
?journal rdfs:label ?journal_name
FILTER (?journal_name="Nature")
}
GROUP BY ?exception
ORDER BY DESC(?count)
17 changes: 17 additions & 0 deletions sparql_query/q18.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT ?research_field (COUNT(?exception) AS ?exception_count)
WHERE {
?execution a <https://w3id.org/reproduceme/CellExecution> ;
<https://w3id.org/reproduceme/exception> ?exception ;
<http://purl.org/pav/retrievedFrom> ?repository .
?repository a <http://usefulinc.com/ns/doap#GitRepository> ;
<http://purl.org/pav/retrievedFrom> ?article ;
<https://w3id.org/reproduceme/notebooks_count> ?notebooks_count .
?article a <http://purl.org/spar/fabio/Article> ;
<http://www.w3.org/ns/prov-o#specializationOf> ?mesh .
?mesh <http://www.w3.org/ns/prov-o#generalizationOf> ?top_mesh .
?top_mesh <http://www.w3.org/2000/01/rdf-schema#label> ?research_field .
FILTER (?exception='ModuleNotFoundError')
}
GROUP BY ?research_field
ORDER BY DESC(?exception_count)
7 changes: 7 additions & 0 deletions sparql_query/q19.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT DISTINCT ?repo ?stargazers_count WHERE {
?repo <https://w3id.org/reproduceme/stargazers_count> ?count.
BIND(xsd:float(?count) AS ?stargazers_count)
FILTER ((?stargazers_count) > 0)
}
ORDER BY DESC(?stargazers_count)
22 changes: 22 additions & 0 deletions sparql_query/q2.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX repr: <https://w3id.org/reproduceme/>

SELECT ?research_field (COUNT(DISTINCT ?repository) as ?repository_count) (COUNT(DISTINCT ?repository_nb) as ?repositories_with_notebooks_count)
WHERE {
{
?repository <http://purl.org/pav/retrievedFrom> ?article .
}
UNION
{
?repository_nb <http://purl.org/pav/retrievedFrom> ?article ;
repr:notebooks_count ?notebooks_count .
FILTER(xsd:integer(?notebooks_count) > 0)
}
?article <http://www.w3.org/ns/prov-o#specializationOf> ?mesh .
?mesh <http://www.w3.org/ns/prov-o#generalizationOf> ?top_mesh .
?top_mesh rdfs:label ?research_field
}
GROUP BY ?research_field
ORDER BY DESC(?repository_count)
LIMIT 10
22 changes: 22 additions & 0 deletions sparql_query/q20.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

PREFIX wikidata_wd: <http://www.wikidata.org/entity/>
PREFIX wikidata_wdt: <http://www.wikidata.org/prop/direct/>

SELECT DISTINCT

?fj_article
?wikidata
?wikidata_label
?DOI

WHERE {
?fj_article <https://w3id.org/reproduceme/doi> ?doi .
BIND(UCASE(?doi) AS ?DOI)
service <https://query.wikidata.org/sparql> {
?wikidata wikidata_wdt:P356 ?DOI .
?wikidata rdfs:label ?wikidata_label .
FILTER (LANG(?wikidata_label) = "en")
}
}
LIMIT 100
21 changes: 21 additions & 0 deletions sparql_query/q21.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

PREFIX wikidata_wd: <http://www.wikidata.org/entity/>
PREFIX wikidata_wdt: <http://www.wikidata.org/prop/direct/>

SELECT DISTINCT

?fj_article
?wikidata
?wikidata_label
?pmc

WHERE {
?fj_article <https://w3id.org/reproduceme/pmc> ?pmc .
service <https://query.wikidata.org/sparql> {
?wikidata wikidata_wdt:P932 ?pmc .
?wikidata rdfs:label ?wikidata_label .
FILTER (LANG(?wikidata_label) = "en")
}
}
LIMIT 100
22 changes: 22 additions & 0 deletions sparql_query/q22.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

PREFIX wikidata_wd: <http://www.wikidata.org/entity/>
PREFIX wikidata_wdt: <http://www.wikidata.org/prop/direct/>

SELECT DISTINCT

?fj_article
?wikidata
?wikidata_label
?DOI

WHERE {
?fj_article <http://www.w3.org/ns/prov-o#specializationOf> ?mesh_url .
BIND(REPLACE(STR(?mesh_url), ".*MESH/D", "D") AS ?MESH)
service <https://query.wikidata.org/sparql> {
?wikidata wikidata_wdt:P486 ?MESH .
?wikidata rdfs:label ?wikidata_label .
FILTER (LANG(?wikidata_label) = "ml")
}
}
LIMIT 100
8 changes: 8 additions & 0 deletions sparql_query/q3.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
SELECT ?journal_name (COUNT(?article) as ?article_count)
WHERE {
?article <https://w3id.org/reproduceme/publishedIn> ?journal .
?journal <http://www.w3.org/2000/01/rdf-schema#label> ?journal_name .
}
GROUP BY ?journal_name
ORDER BY DESC(?article_count)
LIMIT 10
18 changes: 18 additions & 0 deletions sparql_query/q4.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
SELECT ?journal_name (COUNT(?repository) as ?repository_count) (COUNT(?repository_nb) as ?repositories_with_notebooks_count) WHERE
{
?article <https://w3id.org/reproduceme/publishedIn> ?journal .
?journal <http://www.w3.org/2000/01/rdf-schema#label> ?journal_name .
{
?repository <http://purl.org/pav/retrievedFrom> ?article .
}
UNION
{
?repository_nb <http://purl.org/pav/retrievedFrom> ?article ;
<https://w3id.org/reproduceme/notebooks_count> ?notebooks_count .
FILTER(<http://www.w3.org/2001/XMLSchema#integer>(?notebooks_count) > 0)
}
}
GROUP BY ?journal_name

ORDER BY DESC(?repository_count)
LIMIT 10
23 changes: 23 additions & 0 deletions sparql_query/q5.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
SELECT ?journal_name (COUNT(?repository_nb) AS ?repositories_with_notebooks_count)
?max_notebooks_count
WHERE {
{
SELECT ?journal (MAX(?notebooks_count) AS ?max_notebooks_count)
WHERE {
?article <https://w3id.org/reproduceme/publishedIn> ?journal .
?journal <http://www.w3.org/2000/01/rdf-schema#label> ?journal_name .
?repository_nb <http://purl.org/pav/retrievedFrom> ?article ;
<https://w3id.org/reproduceme/notebooks_count> ?notebooks_count .
FILTER(<http://www.w3.org/2001/XMLSchema#integer>(?notebooks_count) > 0)
}
GROUP BY ?journal
}
?article <https://w3id.org/reproduceme/publishedIn> ?journal .
?journal <http://www.w3.org/2000/01/rdf-schema#label> ?journal_name .
?repository_nb <http://purl.org/pav/retrievedFrom> ?article ;
<https://w3id.org/reproduceme/notebooks_count> ?notebooks_count .
FILTER(<http://www.w3.org/2001/XMLSchema#integer>(?notebooks_count) > 0)
}
GROUP BY ?journal_name ?max_notebooks_count
ORDER BY DESC(?repositories_with_notebooks_count)
LIMIT 10
8 changes: 8 additions & 0 deletions sparql_query/q6.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
SELECT ?language (COUNT(?notebook) as ?notebook_count)
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
<https://w3id.org/reproduceme/language> ?language .
}
GROUP BY ?language
ORDER BY DESC(?notebook_count)
LIMIT 10
12 changes: 12 additions & 0 deletions sparql_query/q7.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
SELECT ?created_year ?language (COUNT(?notebook) as ?notebook_count)
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
<http://purl.org/pav/retrievedFrom> ?repository ;
<https://w3id.org/reproduceme/language> ?language ;
<https://w3id.org/reproduceme/language_version> ?version .
?repository <https://w3id.org/reproduceme/created_at> ?created_at .
BIND(REPLACE(str(?created_at), "(\\d*)-.*", "$1") AS ?created_year)
}
GROUP BY ?created_year ?language
ORDER BY ?created_year ?language

14 changes: 14 additions & 0 deletions sparql_query/q8.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SELECT ?created_year ?minor_version (COUNT(?notebook) as ?count_minor_version)
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
<http://purl.org/pav/retrievedFrom> ?repository ;
<https://w3id.org/reproduceme/language> "python" ;
<https://w3id.org/reproduceme/language_version> ?version .
?repository <https://w3id.org/reproduceme/created_at> ?created_at .
BIND(REPLACE(str(?created_at), "(\\d*)-.*", "$1") AS ?created_year)
BIND(SUBSTR(?version, 1, 3) AS ?minor_version)
FILTER(?version != "3" && ?version != "1" && ?version != "ES2015")
}
GROUP BY ?created_year ?minor_version
ORDER BY ?created_year ?minor_version

14 changes: 14 additions & 0 deletions sparql_query/q9.rq
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
SELECT ?created_year ?major_version (COUNT(?notebook) as ?count_major_version)
WHERE {
?notebook a <https://w3id.org/reproduceme/Notebook> ;
<http://purl.org/pav/retrievedFrom> ?repository ;
<https://w3id.org/reproduceme/language> "python" ;
<https://w3id.org/reproduceme/language_version> ?version .
?repository <https://w3id.org/reproduceme/created_at> ?created_at .
BIND(REPLACE(str(?created_at), "(\\d*)-.*", "$1") AS ?created_year)
BIND(SUBSTR(?version, 1, 1) AS ?major_version)
FILTER(?version != "3" && ?version != "1" && ?version != "ES2015")
}
GROUP BY ?created_year ?major_version
ORDER BY ?created_year ?major_version

0 comments on commit b2728d4

Please sign in to comment.