Skip to content

Commit

Permalink
feat: spq_label() aware of OPTIONAL (#205)
Browse files Browse the repository at this point in the history
  • Loading branch information
maelle authored Oct 20, 2023
1 parent 2bbac18 commit cdce3e0
Show file tree
Hide file tree
Showing 10 changed files with 215 additions and 56 deletions.
41 changes: 33 additions & 8 deletions R/build_parts.R → R/build_part_body.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#' Builds the "body" part of a query.
#' @param query a list with elements of the query
#' @param subject an anonymous variable (for instance,
#' and by default, "?subject") or item (for instance "wd:Q456"))
#' @param verb the property (for instance "wdt:P190")
Expand All @@ -14,17 +13,15 @@
#' @param within_distance if provided, north-west and south-east coordinates of
#' bounding box for the triple query.
#' @noRd
build_part_body = function(query = NA,
triple = NULL,
build_part_body = function(triple = NULL,
subject = NULL,
verb = NULL,
object = NULL,
required = TRUE,
within_box = c(NA, NA),
within_distance = c(NA, NA),
filter = NA) {

part_body = query[["body"]]
filter = NA,
other_triples) {

if (!is.null(triple)) {
elts = decompose_triple_pattern(triple)
Expand All @@ -42,11 +39,39 @@ build_part_body = function(query = NA,
)
} else {
new_triple = glue::glue("{subject} {verb} {object}.")
if (sub(".$", "", new_triple) %in% other_triples[["sibling_triple"]]) {
little_siblings = other_triples[other_triples[["sibling_triple"]] == sub(".$", "", new_triple),]
little_siblings = split(little_siblings, seq_len(nrow(little_siblings)))

sibling_triples = purrr::map_chr(
little_siblings,
~build_part_body(
triple = .x[["triple"]],
required = .x[["required"]],
within_box = .x[["within_box"]],
within_distance = .x[["within_distance"]],
filter = .x[["filter"]],
other_triples = other_triples
)
) %>%
paste(collapse = "")
} else {
sibling_triples = NA
}
}

if (!is.na(sibling_triples)) {
new_triple = sprintf("\n\t%s\n\t%s\n", new_triple, sibling_triples)
}


if (!is.na(filter)) {
new_triple = sprintf("\n\t%s\n\tFILTER(%s)\n", new_triple, filter)
}

if (!required) {
new_triple = if (!is.na(filter)) {
sprintf("OPTIONAL {\n\t%s\n\tFILTER(%s)\n}\n", new_triple, filter)
sprintf("OPTIONAL {%s}\n", new_triple)
} else {
sprintf("OPTIONAL {%s}", new_triple)
}
Expand Down Expand Up @@ -86,5 +111,5 @@ build_part_body = function(query = NA,
)
}

glue::glue("{part_body}\n{new_triple}")
sprintf("\n%s", new_triple)
}
8 changes: 6 additions & 2 deletions R/spq_add.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#' for the center coordinates to be retrieved directly from the query.
#' @param .prefixes Custom prefixes
#' @param .filter Filter for the triple. Only use this with `.required=FALSE`
#' @param .sibling_triple_pattern Triple this triple is to be grouped with,
#' especially (only?) useful if the sibling triple is optional.
#' @export
#' @section Examples:
#' ```r
Expand Down Expand Up @@ -60,7 +62,8 @@ spq_add = function(.query = NULL,
.label = NA,
.within_box = c(NA, NA),
.within_distance = c(NA, NA),
.filter = NULL) {
.filter = NULL,
.sibling_triple_pattern = NA) {
.query = .query %||% spq_init()

elts = decompose_triple_pattern(
Expand All @@ -84,7 +87,8 @@ spq_add = function(.query = NULL,
required = .required,
within_box = list(.within_box),
within_distance = list(.within_distance),
filter = .filter
filter = .filter,
sibling_triple = .sibling_triple_pattern
)

# variable tracking ----
Expand Down
16 changes: 11 additions & 5 deletions R/spq_assemble.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,23 @@ spq_assemble = function(.query, strict = TRUE) {
# body ----
triples_present = !is.null(.query[["triples"]])
body = if (triples_present) {

firstborn_triples = .query[["triples"]][is.na(.query[["triples"]][["sibling_triple"]]),]
firstborn_triples = split(firstborn_triples, seq_len(nrow(firstborn_triples)))

# they'll be built as we build their big siblings
other_triples = .query[["triples"]][!is.na(.query[["triples"]][["sibling_triple"]]),]

purrr::map_chr(
split(.query[["triples"]], seq_len(nrow(.query[["triples"]]))),
firstborn_triples,
~build_part_body(
query = .query,
triple = .x[["triple"]],
required = .x[["required"]],
within_box = .x[["within_box"]],
within_distance = .x[["within_distance"]],
filter = .x[["filter"]]
),
.query = .query
filter = .x[["filter"]],
other_triples = other_triples
)
) |>
paste0(collapse = "")
} else {
Expand Down
39 changes: 16 additions & 23 deletions R/spq_label.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ spq_label <- function(.query,
.languages = getOption("glitter.lang", "en$"),
.overwrite = FALSE) {

label_property <- .query[["endpoint_info"]][["label_property"]] %||%
"rdfs:label"
label_property <- .query[["endpoint_info"]][["label_property"]] %||%"rdfs:label"

vars = purrr::map_chr(rlang::enquos(...), spq_treat_argument)

Expand All @@ -55,33 +54,28 @@ spq_label <- function(.query,
if (is.null(.languages)) {
filter = NULL
} else {

languages_filter <- purrr::map_chr(.languages, create_lang_filter, x = x)

filter = paste(
languages_filter,
collapse = " || "
)
languages_filter = purrr::map_chr(.languages, create_lang_filter, x = x)
filter = paste(languages_filter, collapse = " || ")
}
if (.required) {
q = spq_add(
query,
sprintf("%s %s %s_labell", x, label_property, x),
.required = .required
)
if (!is.null(filter)) {
q = spq_filter(q, spq(filter))
}

triples_for_var = .query[["triples"]][
.query[["triples"]][["triple"]] %in%
.query[["vars"]][["triple"]][.query[["vars"]][["name"]] == x],
]
triple_for_var_optional <- all(!triples_for_var[["required"]])
sibling_triple_pattern = if (triple_for_var_optional) {
utils::tail(triples_for_var[["triple"]], n = 1)
} else {
NA
}

q = spq_add(
query,
sprintf("%s %s %s_labell", x, label_property, x),
.required = .required,
.filter = filter
.filter = filter,
.sibling_triple_pattern = sibling_triple_pattern
)
}


mutate_left <- sprintf("%s_label", sub("\\?", "", x))
mutate_right <- sprintf("coalesce(%s_labell, '')", un_question_mark(x))
Expand All @@ -90,8 +84,7 @@ spq_label <- function(.query,
q = do.call(spq_mutate, args_list)
q = spq_select(q, sprintf("-%s_labell", un_question_mark(x)))

# we add the language of the label
# because of regional variants
# we add the language of the label because of regional variants
if (!is.null(.languages)) {
if (length(.languages) > 1 || !endsWith(.languages, "$")) {
mutate_left <- sprintf("%s_label_lang", un_question_mark(x))
Expand Down
33 changes: 28 additions & 5 deletions R/tracking.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,18 @@ track_triples <- function(.query,
required,
within_box,
within_distance,
filter = NULL) {
filter = NULL,
sibling_triple = NA) {
if (triple %in% .query[["triples"]][["triple"]]) {
cli::cli_abort("Duplicate triple {.val triple}")
cli::cli_abort("Duplicate triple {.val triple}.")
}

if (!is.na(sibling_triple)) {
sibling_absent <- !(sibling_triple %in% .query[["triples"]][["triple"]])

if (sibling_absent) {
cli::cli_abort("Can't find sibling triple {.val sibling_triple}.")
}
}

no_within_box = (sum(is.na(within_box[[1]])) == 2)
Expand All @@ -84,18 +93,32 @@ track_triples <- function(.query,
required = required,
within_box = within_box,
within_distance = within_distance,
filter = filter
filter = filter,
sibling_triple = sibling_triple
)

.query[["triples"]] <- rbind(.query[["triples"]], new_triple)

.query
}

track_filters <- function(.query, filter) {
track_filters <- function(.query, filter, sibling_triple = NA) {

if (!is.na(sibling_triple)) {
sibling_absent <- !(sibling_triple %in% .query[["triples"]][["triple"]])

if (sibling_absent) {
cli::cli_abort("Can't find sibling triple {.val sibling_triple}.")
}
}

var <- str_extract(filter, "\\(\\?(.*?)\\)")
var <- sub("\\,.*", "", sub("\\(", "", sub("\\)", "", var)))
new_filter <- tibble::tibble(filter = filter, var = var)
new_filter <- tibble::tibble(
filter = filter,
var = var,
sibling_triple = sibling_triple
)

.query[["filters"]] <- rbind(.query[["filters"]], new_filter)

Expand Down
6 changes: 5 additions & 1 deletion man/spq_add.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/spq_assemble.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 87 additions & 3 deletions tests/testthat/_snaps/spq_label.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,19 +118,103 @@
# spq_label() .languages = NULL

Code
spq_init(endpoint = "hal") %>% spq_label(labo, .languages = NULL, .required = TRUE) %>%
spq_add("?labo dcterms:identifier ?labo_id", .required = FALSE) %>%
spq_init(endpoint = "hal") %>% spq_add("?labo dcterms:identifier ?labo_id",
.required = FALSE) %>% spq_label(labo, .languages = NULL, .required = TRUE) %>%
spq_filter(str_detect(labo_label, "EVS|(UMR 5600)|(Environnement Ville Soc)"))
Output
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?labo (COALESCE(?labo_labell,'') AS ?labo_label) ?labo_id
WHERE {
OPTIONAL {
?labo dcterms:identifier ?labo_id.
?labo skos:prefLabel ?labo_labell.
OPTIONAL {?labo dcterms:identifier ?labo_id.}
}
BIND(COALESCE(?labo_labell,'') AS ?labo_label)
FILTER(REGEX(?labo_label,"EVS|(UMR 5600)|(Environnement Ville Soc)"))
}

# spq_label() for optional thing

Code
spq_init() %>% spq_add("?film wdt:P31 wd:Q11424") %>% spq_add(
"?film wdt:P840 ?loc") %>% spq_add("?loc wdt:P625 ?coords") %>% spq_add(
"?film wdt:P3383 ?image") %>% spq_add("?film wdt:P921 ?subject", .required = FALSE) %>%
spq_add("?film wdt:P577 ?date") %>% spq_label(film, loc, subject) %>%
spq_head(10)
Output
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?coords ?date ?film (COALESCE(?film_labell,'') AS ?film_label) ?image ?loc (COALESCE(?loc_labell,'') AS ?loc_label) ?subject (COALESCE(?subject_labell,'') AS ?subject_label)
WHERE {
?film wdt:P31 wd:Q11424.
?film wdt:P840 ?loc.
?loc wdt:P625 ?coords.
?film wdt:P3383 ?image.
OPTIONAL {
?film wdt:P921 ?subject.
OPTIONAL {
?subject rdfs:label ?subject_labell.
FILTER(lang(?subject_labell) IN ('en'))
}
}
?film wdt:P577 ?date.
OPTIONAL {
?film rdfs:label ?film_labell.
FILTER(lang(?film_labell) IN ('en'))
}
OPTIONAL {
?loc rdfs:label ?loc_labell.
FILTER(lang(?loc_labell) IN ('en'))
}
}
LIMIT 10

---

Code
spq_init() %>% spq_add("?film wdt:P31 wd:Q11424") %>% spq_add(
"?film wdt:P840 ?loc") %>% spq_add("?loc wdt:P625 ?coords") %>% spq_add(
"?film wdt:P3383 ?image") %>% spq_add("?film wdt:P921 ?subject", .required = FALSE) %>%
spq_add("?film wdt:P577 ?date") %>% spq_label(film, loc, subject, .required = TRUE) %>%
spq_head(10)
Output
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?coords ?date ?film (COALESCE(?film_labell,'') AS ?film_label) ?image ?loc (COALESCE(?loc_labell,'') AS ?loc_label) ?subject (COALESCE(?subject_labell,'') AS ?subject_label)
WHERE {
?film wdt:P31 wd:Q11424.
?film wdt:P840 ?loc.
?loc wdt:P625 ?coords.
?film wdt:P3383 ?image.
OPTIONAL {
?film wdt:P921 ?subject.
?subject rdfs:label ?subject_labell.
FILTER(lang(?subject_labell) IN ('en'))
}
?film wdt:P577 ?date.
?film rdfs:label ?film_labell.
FILTER(lang(?film_labell) IN ('en'))
?loc rdfs:label ?loc_labell.
FILTER(lang(?loc_labell) IN ('en'))
}
LIMIT 10

Loading

0 comments on commit cdce3e0

Please sign in to comment.