From 67c49edd997219a4cda0ccf622b593451a947207 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABlle=20Salmon?= Date: Thu, 21 Sep 2023 13:48:07 +0200 Subject: [PATCH] feat: use correct property for labelling HAL, let users enter property --- NAMESPACE | 1 + R/build_sparql.R | 2 +- R/data.R | 2 +- R/spq_endpoint_info.R | 23 +++++++++++++++++++++++ R/spq_init.R | 22 +++++++++++++++++++--- R/spq_label.R | 8 ++++++-- R/spq_perform.R | 2 +- data-raw/create_usual_endpoints.R | 12 ------------ data-raw/usual_endpoints.csv | 12 ++++++------ data/usual_endpoints.rda | Bin 411 -> 470 bytes man/spq_endpoint_info.Rd | 20 ++++++++++++++++++++ man/spq_init.Rd | 7 ++++++- man/usual_endpoints.Rd | 2 +- tests/testthat/_snaps/spq_label.md | 24 ++++++++++++++++++++++++ tests/testthat/test-spq_label.R | 9 +++++++++ vignettes/articles/glitter_for_hal.Rmd | 2 +- 16 files changed, 119 insertions(+), 29 deletions(-) create mode 100644 R/spq_endpoint_info.R create mode 100644 man/spq_endpoint_info.Rd diff --git a/NAMESPACE b/NAMESPACE index 7a065558..6301bc75 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ export(spq_arrange) export(spq_assemble) export(spq_control_request) export(spq_count) +export(spq_endpoint_info) export(spq_filter) export(spq_group_by) export(spq_head) diff --git a/R/build_sparql.R b/R/build_sparql.R index 957b3eb8..28c6a3af 100644 --- a/R/build_sparql.R +++ b/R/build_sparql.R @@ -13,7 +13,7 @@ #' cat() spq_assemble = function(.query, strict = TRUE) { - endpoint = .query[["endpoint"]] + endpoint = .query[["endpoint_info"]][["endpoint_url"]] .query = spq_prefix(.query, auto = TRUE, prefixes = NULL) diff --git a/R/data.R b/R/data.R index 18bb5185..cd22d635 100644 --- a/R/data.R +++ b/R/data.R @@ -20,7 +20,7 @@ wikidata_url <- function() { #' \describe{ #' \item{name}{the abbreviated name of the SPARQL endpoint} #' \item{url}{the full address of the SPARQL endpoint} -#' ... +#' \item{label_property}{the property used for labelling} #' } "usual_endpoints" diff --git a/R/spq_endpoint_info.R b/R/spq_endpoint_info.R new file mode 100644 index 00000000..6475c214 --- /dev/null +++ b/R/spq_endpoint_info.R @@ -0,0 +1,23 @@ +#' Create the endpoint info object for `spq_init()` +#' +#' @param label_property Property used by the endpoint for labelling. +#' +#' @return A list to be used in `spq_init()`'s `endpoint_info` argument. +#' @export +#' +#' @examples +#' spq_endpoint_info(label_property = "skos:preflabel") +spq_endpoint_info <- function(label_property = "rdfs:prefLabel") { + + # TODO check property more + if (!is.character(label_property)) { + cli::cli_abort("Must provide a character as {.arg label_property}.") + } + + structure( + list( + label_property = label_property + ), + class = "glitter_endpoint_info" + ) +} diff --git a/R/spq_init.R b/R/spq_init.R index 187aea6f..7603c175 100644 --- a/R/spq_init.R +++ b/R/spq_init.R @@ -2,6 +2,9 @@ #' #' @param endpoint Endpoint, either name if it is in `usual_endpoints`, #' or an URL +#' @param endpoint_info Do not use for an usual endpoint in `usual_endpoints`! +#' Information about +#' the endpoint #' @param request_control An object as returned by [`spq_control_request()`] #' #' @return A query object @@ -28,22 +31,35 @@ spq_init = function( max_seconds = getOption("glitter.max_seconds", 120L), timeout = getOption("glitter.timeout", 1000L), request_type = c("url", "body-form") + ), + endpoint_info = spq_endpoint_info( + label_property = "rdfs:label" ) ) { if (!inherits(request_control, "glitter_request_control")) { cli::cli_abort("{.arg request_control} must be created by {.fun spq_control_request}.") + } + if (!inherits(endpoint_info, "glitter_endpoint_info")) { + cli::cli_abort("{.arg endpoint_info} must be created by {.fun spq_endpoint_info}.") } # if endpoint passed as name, get url endpoint = tolower(endpoint) usual_endpoint_info = usual_endpoints %>% dplyr::filter(.data$name == endpoint) - endpoint = if (nrow(usual_endpoint_info) > 0) { - dplyr::pull(usual_endpoint_info, .data$url) + if (nrow(usual_endpoint_info) > 0) { + endpoint = dplyr::pull(usual_endpoint_info, .data$url) + label_property = dplyr::pull(usual_endpoint_info, .data$label_property) } else { endpoint + label_property = NULL } + endpoint_info = list( + endpoint_url = endpoint, + label_property = label_property %||% endpoint_info[["label_property"]] + ) + query = list( prefixes_provided = tibble::tibble(name = NULL, url = NULL), prefixes_used = NULL, @@ -56,7 +72,7 @@ spq_init = function( group_by = NULL, order_by = NULL, offset = NULL, - endpoint = endpoint, + endpoint_info = endpoint_info, request_control = request_control ) diff --git a/R/spq_label.R b/R/spq_label.R index 11429833..88146d7f 100644 --- a/R/spq_label.R +++ b/R/spq_label.R @@ -35,6 +35,10 @@ spq_label <- function(.query, .required = FALSE, .languages = getOption("glitter.lang", "en$"), .overwrite = FALSE) { + + label_property <- .query[["endpoint_info"]][["label_property"]] %||% + "rdfs:label" + vars = purrr::map_chr(rlang::enquos(...), spq_treat_argument) if (!is.null(.languages)) .languages = tolower(.languages) @@ -56,14 +60,14 @@ spq_label <- function(.query, if (.required) { q = spq_add( query, - sprintf("%s rdfs:label %s_labell", x, x), + sprintf("%s %s %s_labell", x, label_property, x), .required = .required ) q = spq_filter(q, spq(filter)) } else { q = spq_add( query, - sprintf("%s rdfs:label %s_labell", x, x), + sprintf("%s %s %s_labell", x,label_property, x), .required = .required, .filter = filter ) diff --git a/R/spq_perform.R b/R/spq_perform.R index d1b4d8a6..7fb2297c 100644 --- a/R/spq_perform.R +++ b/R/spq_perform.R @@ -34,7 +34,7 @@ spq_perform = function(.query, "spq_init(endpoint)" ) } else { - endpoint = .query[["endpoint"]] + endpoint = .query[["endpoint_info"]][["endpoint_url"]] } diff --git a/data-raw/create_usual_endpoints.R b/data-raw/create_usual_endpoints.R index 6018b750..4580acf5 100644 --- a/data-raw/create_usual_endpoints.R +++ b/data-raw/create_usual_endpoints.R @@ -1,14 +1,2 @@ -endpoints=tibble::tibble(name=c("wikidata", - "dbpedia", - "databnf", - "isidore", - "hal", - "symogih"), - url=c("https://query.wikidata.org/", - "https://dbpedia.org/sparql", - "https://data.bnf.fr/sparql", - "https://isidore.science/sparql", - "http://sparql.archives-ouvertes.fr/sparql", - "http://bhp-publi.ish-lyon.cnrs.fr:8888/sparql")) usual_endpoints=readr::read_csv("data-raw/usual_endpoints.csv") usethis::use_data(usual_endpoints,overwrite=TRUE) diff --git a/data-raw/usual_endpoints.csv b/data-raw/usual_endpoints.csv index 806b844b..1c85b3b7 100644 --- a/data-raw/usual_endpoints.csv +++ b/data-raw/usual_endpoints.csv @@ -1,6 +1,6 @@ -name,url -wikidata,https://query.wikidata.org/ -dbpedia,https://dbpedia.org/sparql -databnf,https://data.bnf.fr/sparql -isidore,https://isidore.science/sparql -hal,http://sparql.archives-ouvertes.fr/sparql +name,url,label_property +wikidata,https://query.wikidata.org/,rdfs:label +dbpedia,https://dbpedia.org/sparql,rdfs:label +databnf,https://data.bnf.fr/sparql,rdfs:label +isidore,https://isidore.science/sparql,rdfs:label +hal,http://sparql.archives-ouvertes.fr/sparql,skos:prefLabel diff --git a/data/usual_endpoints.rda b/data/usual_endpoints.rda index 6481142e46877bb5f2b74d42482f3b2d67b15de9..fa33c89d3c34a1d7a594202495688ce619b659dd 100644 GIT binary patch literal 470 zcmV;{0V)1MT4*^jL0KkKS=DekrvL$9f589y$r%7AkPrj}5J120|DZqs013bWyGtvO zNlB!7Q`%G7h<=i7Pf*iCMw%Xk$+XBMl*t~cK% zjR&hsufqu7cuyR~a7J42gYweUdto(-rUBZGS`jH77QN=Wp&2aANk{iS0zJZkjT_Ki M$rRy2Lsh`$oNFE182|tP literal 411 zcmV;M0c8F{T4*^jL0KkKS?7F!4*&ru|G@wH$r%7AkPrj_5J120|DZqs012=GlQ57< zBAQ{Tg*{J0)YSC{q#mK8(Ke@}Ae2UuXwc9A000JnWD+P%CZpPs>Nb-JkRG4_WYZ+6 zDD?%91f9CI%KdB6HoEXFUQS zRFYX5Kp(1_UzzKsw zunjJXR!Yp2qdr^^?h0D4Hi-kJ`#0YiLMqvXV5^|RCUgdz4ilB&SWmt|QZihqU_o3#z&VagB#DJP zc<+pg!tQd2>&wF2Q#q(xoKGrIC_~o7DYNm;E`0Sey_5fS=7DRJRcPMA?ntK!5*+W4 F0pOXPy50Z) diff --git a/man/spq_endpoint_info.Rd b/man/spq_endpoint_info.Rd new file mode 100644 index 00000000..262f8f2c --- /dev/null +++ b/man/spq_endpoint_info.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/spq_endpoint_info.R +\name{spq_endpoint_info} +\alias{spq_endpoint_info} +\title{Create the endpoint info object for \code{spq_init()}} +\usage{ +spq_endpoint_info(label_property = "rdfs:prefLabel") +} +\arguments{ +\item{label_property}{Property used by the endpoint for labelling.} +} +\value{ +A list to be used in \code{spq_init()}'s \code{endpoint_info} argument. +} +\description{ +Create the endpoint info object for \code{spq_init()} +} +\examples{ +spq_endpoint_info(label_property = "skos:preflabel") +} diff --git a/man/spq_init.Rd b/man/spq_init.Rd index f0870847..5c5becc5 100644 --- a/man/spq_init.Rd +++ b/man/spq_init.Rd @@ -10,7 +10,8 @@ spq_init( "glitter R package (https://github.com/lvaudor/glitter)"), max_tries = getOption("glitter.max_tries", 3L), max_seconds = getOption("glitter.max_seconds", 120L), timeout = getOption("glitter.timeout", 1000L), request_type = c("url", - "body-form")) + "body-form")), + endpoint_info = spq_endpoint_info(label_property = "rdfs:label") ) } \arguments{ @@ -18,6 +19,10 @@ spq_init( or an URL} \item{request_control}{An object as returned by \code{\link[=spq_control_request]{spq_control_request()}}} + +\item{endpoint_info}{Do not use for an usual endpoint in \code{usual_endpoints}! +Information about +the endpoint} } \value{ A query object diff --git a/man/usual_endpoints.Rd b/man/usual_endpoints.Rd index 5d21f24c..c05f41d6 100644 --- a/man/usual_endpoints.Rd +++ b/man/usual_endpoints.Rd @@ -9,7 +9,7 @@ A data frame with usual SPARQL endpoints and abbreviated names \describe{ \item{name}{the abbreviated name of the SPARQL endpoint} \item{url}{the full address of the SPARQL endpoint} -... +\item{label_property}{the property used for labelling} } } \usage{ diff --git a/tests/testthat/_snaps/spq_label.md b/tests/testthat/_snaps/spq_label.md index a893c015..10b32c82 100644 --- a/tests/testthat/_snaps/spq_label.md +++ b/tests/testthat/_snaps/spq_label.md @@ -60,6 +60,30 @@ } +# spq_label() for not rdfs:label + + Code + spq_init(endpoint = "hal") %>% spq_add( + "haldoc:inria-00362381 dcterms:hasVersion ?version") %>% spq_add( + "?version dcterms:type ?type") %>% spq_label(type) + Output + PREFIX dcterms: + PREFIX skos: + PREFIX haldoc: + SELECT ?type (COALESCE(?type_labell,'') AS ?type_label) ?version + WHERE { + + haldoc:inria-00362381 dcterms:hasVersion ?version. + ?version dcterms:type ?type. + OPTIONAL { + ?type skos:prefLabel ?type_labell. + FILTER(lang(?type_labell) IN ('en')) + } + + + } + + # spq_label() .overwrite Code diff --git a/tests/testthat/test-spq_label.R b/tests/testthat/test-spq_label.R index 8a2758a7..0bca79c2 100644 --- a/tests/testthat/test-spq_label.R +++ b/tests/testthat/test-spq_label.R @@ -35,6 +35,15 @@ test_that("spq_label() works", { ) }) +test_that("spq_label() for not rdfs:label", { + expect_snapshot( + spq_init(endpoint = "hal") %>% + spq_add("haldoc:inria-00362381 dcterms:hasVersion ?version") %>% + spq_add("?version dcterms:type ?type") %>% + spq_label(type) + ) +}) + test_that("spq_label() .overwrite", { expect_snapshot( diff --git a/vignettes/articles/glitter_for_hal.Rmd b/vignettes/articles/glitter_for_hal.Rmd index 2aa62037..3c1d2124 100644 --- a/vignettes/articles/glitter_for_hal.Rmd +++ b/vignettes/articles/glitter_for_hal.Rmd @@ -85,7 +85,7 @@ On cherche les types associés aux versions de documents. Ces types sont associ query_docType=spq_init(endpoint = "hal") %>% spq_add("haldoc:inria-00362381 dcterms:hasVersion ?version") %>% # Ce doc a des versions ?version spq_add("?version dcterms:type ?type") %>% # ?version est un document de type ?type - spq_add("?type skos:prefLabel ?label") # ?type a pour étiquette ?label + spq_label(type) # ?type a pour étiquette ?label tib_docType=spq_perform(query_docType)