From 5443fd7364724073d492fcc667dbc594ac6e6f06 Mon Sep 17 00:00:00 2001 From: antaldaniel Date: Wed, 25 Dec 2024 00:57:54 +0100 Subject: [PATCH] provenance, RDF --- DESCRIPTION | 6 +- NAMESPACE | 2 + R/agent.R | 121 +++++++ R/as_dublincore.R | 164 ++++++++++ R/dataset_df.R | 48 ++- R/describe.R | 18 ++ R/dublincore.R | 391 ++++++++++++++--------- R/get_bibentry.R | 15 +- R/id_to_column.R | 41 ++- R/n_triple.R | 63 +++- R/provenance.R | 29 +- R/publication_year.R | 10 +- R/subject.R | 3 +- R/var_namespace.R | 4 +- R/xsd_convert.R | 7 +- README.Rmd | 2 +- _pkgdown.yml | 8 +- data-raw/prepare_iris_dataset.R | 42 +-- data/iris_dataset.rda | Bin 2202 -> 2486 bytes man/datacite.Rd | 2 +- man/dataset_df.Rd | 16 +- man/describe.Rd | 25 ++ man/dublincore.Rd | 89 +++--- man/get_bibentry.Rd | 4 +- man/var_namespace.Rd | 4 +- tests/testthat/test-agent.R | 22 ++ tests/testthat/test-creator.R | 5 +- tests/testthat/test-dataset_df.R | 21 +- tests/testthat/test-dataset_to_triples.R | 1 + tests/testthat/test-defined.R | 3 - tests/testthat/test-describe.R | 8 + tests/testthat/test-dublincore.R | 86 ++++- tests/testthat/test-get_bibentry.R | 4 +- tests/testthat/test-id_to_column.R | 1 + tests/testthat/test-n_triple.R | 73 +++-- tests/testthat/test-publication_year.R | 7 +- tests/testthat/test-subject.R | 1 - tests/testthat/test-toBiblatex.R | 17 + tests/testthat/test-var_label.R | 6 +- tests/testthat/test-var_namespace.R | 6 +- vignettes/bibentry.Rmd | 14 +- vignettes/rdf.Rmd | 47 +++ 42 files changed, 1076 insertions(+), 360 deletions(-) create mode 100644 R/agent.R create mode 100644 R/as_dublincore.R create mode 100644 R/describe.R create mode 100644 man/describe.Rd create mode 100644 tests/testthat/test-agent.R create mode 100644 tests/testthat/test-describe.R create mode 100644 tests/testthat/test-toBiblatex.R create mode 100644 vignettes/rdf.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index ae97ac6..8fff0f1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dataset Title: Create Data Frames that are Easier to Exchange and Reuse -Version: 0.3.4 -Date: 2024-12-23 +Version: 0.3.4001 +Date: 2024-12-24 DOI: 10.32614/CRAN.package.dataset Language: en-US Authors@R: @@ -33,6 +33,7 @@ Imports: labelled, methods, pillar, + RefManageR, rlang, tibble, utils, @@ -40,6 +41,7 @@ Imports: RoxygenNote: 7.3.2 Suggests: knitr, + rdflib, rmarkdown, spelling, testthat (>= 3.0.0) diff --git a/NAMESPACE b/NAMESPACE index fbd9433..e7fc165 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,6 +60,7 @@ export(dataset_title) export(dataset_to_triples) export(defined) export(definition_attribute) +export(describe) export(description) export(dublincore) export(geolocation) @@ -97,6 +98,7 @@ export(xsd_convert) import(methods) import(pillar) import(vctrs) +importFrom(RefManageR,BibEntry) importFrom(assertthat,assert_that) importFrom(cli,cat_line) importFrom(haven,as_factor) diff --git a/R/agent.R b/R/agent.R new file mode 100644 index 0000000..d728275 --- /dev/null +++ b/R/agent.R @@ -0,0 +1,121 @@ + + +#' @keywords internal +`agent<-` <- function(x, value) { + + return_type <- NULL + + if ("dataset_bibentry" %in% names(attributes(x))) { + dataset_bibentry <- get_bibentry(x) + return_type <- "dataset" + } else if (inherits(x, "dublincore")) { + dataset_bibentry <- x + return_type <- "dublincore" + } else if (inherits(x, "datacite")) { + dataset_bibentry <- x + return_type <- "datacite" + } else { + stop("Error: agent(x)<- x must be a dataset_df, a dublincore or a datacite object.") + } + + assertthat::assert_that(all(inherits(value, "person")), + msg = "Error: agent(x) <- value: value must be a vector of utils::persons.") + + creators <- ifelse(is.null(dataset_bibentry$author), ":tba", dataset_bibentry$author) + publishers <- ifelse(is.null(dataset_bibentry$publisher), ":unas", dataset_bibentry$publisher) + contributors <- ifelse(is.null(dataset_bibentry$contributor), ":unas", dataset_bibentry$contributor) + + get_creator <- function(x) { + if (!is.null(x$role)) ifelse("cre" %in% x$role, TRUE, FALSE) else FALSE + } + + get_author <- function(x) { + if (!is.null(x$role)) ifelse("aut" %in% x$role, TRUE, FALSE) else FALSE + } + + get_publisher <- function(x) { + if (!is.null(x$role)) ifelse("pbl" %in% x$role, TRUE, FALSE) else FALSE + } + + is_creator <- vapply(value, get_creator, logical(1)) + is_author <- vapply(value, get_creator, logical(1)) + is_publisher <- vapply(value, get_publisher, logical(1)) + + + new_creators <- c(value[is_creator], value[is_author[!is_creator]]) + new_publishers <- c(value[is_publisher]) + new_contributors <- c(value[!value %in% c(creators, publishers)]) + + creators <- ifelse(length(new_creators)>0, new_creators, creators) + publishers <- ifelse(length(new_publishers)>0, new_publishers, publishers) + contributors <- ifelse(length(new_creators)>0, new_contributors, contributors) + + dataset_bibentry$author <- ifelse(length(new_creators)>0, new_creators, dataset_bibentry[[1]]$author) + dataset_bibentry$contributor <- contributors + dataset_bibentry$publisher <- publishers + + if ( return_type %in% c("datacite", "dublincore")) { + dataset_bibentry + } else if ( return_type == "dataset") { + attr(x, "dataset_bibentry") <- dataset_bibentry + invisible(x) + } +} + +#' @keywords internal +agent <- function(x) { + + if(inherits(x, "dataset_df")) { + dataset_bibentry <- get_bibentry(x) + } else if(inherits(x, "datacite")) { + dataset_bibentry <- x + creators <- ifelse(is.null(dataset_bibentry$author), ":tba", dataset_bibentry$author) + publishers <- ifelse(is.null(dataset_bibentry$publisher), ":unas", dataset_bibentry$publisher) + contributors <- ifelse(is.null(dataset_bibentry$contributor), ":unas", dataset_bibentry$contributor) + + } else if (inherits(x, "dublincore")) { + dataset_bibentry <- x + creators <- ifelse(is.null(dataset_bibentry$author), ":tba", dataset_bibentry$author) + publishers <- ifelse(is.null(dataset_bibentry$publisher), ":unas", dataset_bibentry$publisher) + contributors <- ifelse(is.null(dataset_bibentry$contributor), ":unas", dataset_bibentry$contributor) + } else if ( all(inherits(x, "person"))) { + contributors <- x + publishers <- x + creators <- x + return_type <- "persons_vector" + } else { + stop("Error: agent(x)<- x must be a dataset_df, a vector of persons, a dublincore or datacite object.") + } + + get_creator <- function(x) { + if (!is.null(x$role)) ifelse("cre" %in% x$role, TRUE, FALSE) else FALSE + } + + get_author <- function(x) { + if (!is.null(x$role)) ifelse("aut" %in% x$role, TRUE, FALSE) else FALSE + } + + get_publisher <- function(x) { + if (!is.null(x$role)) ifelse("pbl" %in% x$role, TRUE, FALSE) else FALSE + } + + is_creator <- vapply(creators, get_creator, logical(1)) + is_author <- vapply(creators, get_creator, logical(1)) + is_publisher <- vapply(creators, get_publisher, logical(1)) + + new_creators <- c(creators[is_creator], creators[is_author[!is_creator]]) + new_contributors <- c(contributors[!contributors %in% c(creators, publishers)]) + new_publishers <- publishers[is_publisher] + + creators <- if(length(new_creators)>0) creators <- new_creators + contributors <- if(length(new_contributors)>0) contributors <- new_contributors + publishers <- if(length(new_publishers)>0) publishers <- new_publishers + + + + list(creators = creators, + contributors = contributors, + publishers = publishers) + +} + diff --git a/R/as_dublincore.R b/R/as_dublincore.R new file mode 100644 index 0000000..0efcefb --- /dev/null +++ b/R/as_dublincore.R @@ -0,0 +1,164 @@ +#' @rdname dublincore +#' @param type For \code{as_dublincore}, any of \code{"bibentry", "dataset_df", "list", "ntriples"}. +#' @param ... Optional parameters to add to a \code{dublincore} object. +#' \code{author=person("Jane", "Doe")} adds an author to the citation +#' object if \code{type="dataset"}. +#' @export +as_dublincore <- function(x, type = "bibentry", ...) { + + citation_author <- person(NULL, NULL) + + is_person <- function(p) ifelse (inherits(p, "person"), TRUE, FALSE) + + arguments <- list(...) + + if (!is.null(arguments$author)) { + if ( is_person(arguments$author)) { + citation_author <- arguments$author + } else { + stop("as_dublincore(x, ..., author = ): author must be created with utils::person().") + } + } + + if (! type %in% c("bibentry", "list", "dataset_df", "ntriples")) { + warning_message <- "as_dublincore(ds, type=...) type cannot be " + warning(warning_message, type, ". Reverting to 'bibentry'.") + type <- 'bibentry' + } + + dataset_bibentry <- get_bibentry(x) + dataset_title <- dataset_bibentry$title + dataset_creator <- dataset_bibentry$author + + if (! is_person(dataset_creator)) { + stop('attr(x, "dataset_bibentry")$author is not a person object.') + } + + if (!is.null(dataset_bibentry$year)) { + if(is.null(dataset_bibentry$dataset_date)) { + dataset_date <- as.character(dataset_bibentry$year) + } else { + dataset_date <- as.character(dataset_bibentry$date) + } + } else if (!is.null(dataset_bibentry$date)) { + dataset_date <- dataset_bibentry$date + } else { + dataset_date <- ":tba" + } + + dataset_relation <- ifelse (is.null(dataset_bibentry$relation), ":unas", as.character(dataset_bibentry$relation)) + dataset_identifier <- ifelse (is.null(dataset_bibentry$identifier), ":tba", as.character(dataset_bibentry$identifier)) + dataset_version <- ifelse (is.null(dataset_bibentry$version), ":unas", as.character(dataset_bibentry$version)) + dataset_description <- ifelse (is.null(dataset_bibentry$description), ":unas", as.character(dataset_bibentry$description)) + dataset_language <- ifelse (is.null(dataset_bibentry$language), ":unas", as.character(dataset_bibentry$language)) + dataset_format <- ifelse (is.null(dataset_bibentry$format), ":tba", as.character(dataset_bibentry$format)) + dataset_rights <- ifelse (is.null(dataset_bibentry$rights), ":tba", as.character(dataset_bibentry$rights)) + dataset_coverage <- ifelse (is.null(dataset_bibentry$coverage), ":unas", as.character(dataset_bibentry$coverage)) + datasource <- ifelse (is.null(dataset_bibentry$datasource), ":unas", as.character(dataset_bibentry$datasource)) + dataset_contributor <- ifelse (is.null(dataset_bibentry$contributor), "", as.character(dataset_bibentry$contributor)) + dataset_subject <- ifelse (is.null(dataset_bibentry$subject), "", as.character(dataset_bibentry$subject)) + dataset_publisher <- ifelse (is.null(dataset_bibentry$publisher), "", as.character(dataset_bibentry$publisher)) + + properties <- c(length(dataset_title), + length(as.character(dataset_creator)), + length(dataset_identifier), + length(dataset_publisher), + length(dataset_subject), + length("DCMITYPE:Dataset"), + length(dataset_contributor), + length(dataset_date), + length(dataset_language), + length(dataset_relation), + length(dataset_format), + length(dataset_rights), + length(datasource), + length(dataset_description), + length(dataset_coverage) + ) + + if (type == "bibentry") { + new_dublincore(title = dataset_title, + creator = dataset_creator, + identifier = dataset_identifier, + publisher = dataset_publisher, + subject = dataset_subject, + type = "DCMITYPE:Dataset", + contributor = dataset_contributor, + publication_date = dataset_date, + language = dataset_language, + relation = dataset_relation, + format = dataset_format, + rights = dataset_rights, + datasource = datasource, + description = dataset_description, + coverage = dataset_coverage) + } else if (type== "list") { + if (dataset_contributor == "") dataset_contributor <- NULL + if (dataset_subject == "") dataset_subject <- NULL + + list(title=dataset_title, + creator=dataset_creator, + identifier = dataset_identifier, + publisher = dataset_publisher, + subject = dataset_subject, + type = "DCMITYPE:Dataset", + contributor = dataset_contributor, + date = dataset_date, + language = dataset_language, + relation = dataset_relation, + format = dataset_format, + rights = dataset_rights, + datasource = datasource, + description = dataset_description, + coverage = dataset_coverage) + } else if ( type == "dataset_df") { + assertthat::assert_that( + all(properties)==1, msg= "In as_dublincore() not all properties have a length 1 to export into datataset (data.frame)." + ) + dataset_df( + data.frame(title = dataset_title, + creator = as.character(dataset_creator), + identifier = dataset_identifier, + publisher = dataset_publisher, + subject = dataset_subject, + type = "DCMITYPE:Dataset", + contributor = dataset_contributor, + date = dataset_date, + language = dataset_language, + relation = dataset_relation, + format = dataset_format, + rights = dataset_rights, + datasource = datasource, + description = dataset_description, + coverage = dataset_coverage), + reference = list( + title = paste0("The Dublin Core Metadata of `", dataset_bibentry$title, "'"), + author = citation_author, + year = substr(as.character(Sys.Date()),1,4) + + )) + } else if (type=="ntriples") { + dclist <- list(title=dataset_title, + creator=dataset_creator, + identifier = dataset_identifier, + publisher = dataset_publisher, + subject = dataset_subject, + type = "DCMITYPE:Dataset", + contributor = dataset_contributor, + date = dataset_date, + language = dataset_language, + relation = dataset_relation, + format = dataset_format, + rights = dataset_rights, + datasource = datasource, + description = dataset_description, + coverage = dataset_coverage) + + if ( dataset_identifier == ":tba") { + dataset_id <- "http:/example.com/dataset_tba/" + } else { + dataset_id <- dataset_identifier + } + dublincore_to_triples(dclist=dclist, dataset_id=dataset_id) + } +} diff --git a/R/dataset_df.R b/R/dataset_df.R index 945d87c..9c1ca88 100644 --- a/R/dataset_df.R +++ b/R/dataset_df.R @@ -13,6 +13,11 @@ #' \cr #' For more details, please check the \code{vignette("dataset_df", package = "dataset")} #' vignette. +#' @param identifier Defaults to \code{c(eg="http://example.com/dataset#")}, which should be +#' changed to the permanent identifier of the dataset. For example, if your dataset will be +#' released with the Digital Object Identifier (DOI) `https;//doi.org/1234`, you should use +#' a short prefixed identifier like \code{c(obs="https://doi.org/1234#")}, which will resolve +#' to the rows being identified as https://doi.org/1234#1...https://doi.org/1234#n. #' @param dataset_bibentry A list of bibliographic references and descriptive metadata #' about the dataset as a whole created with \code{\link{datacite}} or #' \code{\link{dublincore}}. @@ -47,14 +52,21 @@ # User constructor dataset_df <- function(..., - dataset_bibentry=NULL, + identifier = c(eg="http://example.com/dataset#"), var_labels=NULL, units=NULL, definitions=NULL, + dataset_bibentry=NULL, dataset_subject=NULL ) { dots <- list(...) + if ( ! "rowid" %in% names(dots)) { + add_rowid <- TRUE + } else { + add_row_id <- FALSE + } + sys_time <- Sys.time() year <- substr(as.character(sys_time),1,4) @@ -78,6 +90,7 @@ dataset_df <- function(..., tmp <- new_my_tibble( x = tibble::tibble(...), + identifier = identifier, dataset_bibentry=dataset_bibentry, var_labels = var_labels, units = units, @@ -88,9 +101,11 @@ dataset_df <- function(..., } + #' @rdname dataset_df #' @export as_dataset_df <- function(df, + identifier = c(eg ="http://example.com/dataset#"), var_labels=NULL, units=NULL, definitions =NULL, @@ -99,19 +114,12 @@ as_dataset_df <- function(df, dots <- list(...) - sys_time <- Sys.time() - year <- substr(as.character(sys_time),1,4) - if (is.null(dots$dataset_bibentry)) { - Title <- "Untitled Dataset" - Creator <- person("Author", "Unknown") - - if(is.null(dataset_bibentry$year)) dataset_bibentry$year <- year + dataset_bibentry <- set_default_bibentry() } - dataset_bibentry <- datacite(Title=Title, Creator=Creator) - new_my_tibble(df, + identifier=identifier, dataset_bibentry=dataset_bibentry, var_labels = var_labels, units = units, @@ -122,25 +130,39 @@ as_dataset_df <- function(df, #' @importFrom tibble new_tibble #' @keywords internal new_my_tibble <- function(x, + add_rowid = TRUE, + identifier, dataset_bibentry = NULL, var_labels = NULL, units = NULL, definitions = NULL) { - started_at_time <- Sys.time() assertthat::assert_that(is.data.frame(x), msg="Error: new_my_tibble(x): x is not a data frame") + generated_at_time <- Sys.time() + tmp <- tibble::new_tibble( x, class = "dataset_df", nrow = nrow(x) ) - ended_at_time <- Sys.time() + add_rowid <- ifelse("rowid" %in% names(tmp), FALSE, TRUE) + + if (add_rowid) { + tmp <- tibble::rowid_to_column(tmp) + prefix <- paste0(names(identifier)[1], ":") + tmp$rowid <- defined(paste0(prefix, tmp$rowid), namespace = identifier) + } set_var_labels(tmp, var_labels = var_labels) + if (is.null(dataset_bibentry)) { + dataset_bibentry <- set_default_bibentry() + } + - prov <- default_provenance(started_at_time = started_at_time, ended_at_time = ended_at_time) + prov <- default_provenance(generated_at_time = generated_at_time, + author=dataset_bibentry$author) attr(tmp, "dataset_bibentry") <- dataset_bibentry attr(tmp, "prov") <- prov diff --git a/R/describe.R b/R/describe.R new file mode 100644 index 0000000..7061b13 --- /dev/null +++ b/R/describe.R @@ -0,0 +1,18 @@ +#' @title Describe a dataset +#' @param x A dataset_df object. +#' @param con A connection, for example, \code{con=tempfile()}. +#' @return The description of the dataset_df object is written to the connection +#' in the n-triples form, nothing is returned. +#' @examples +#' temp_prov <- tempfile() +#' describe(iris_dataset, con=temp_prov) +#' readLines(temp_prov) +#' @export + +describe <- function(x,con) { + assertthat::assert_that(is.dataset_df(x), + msg="Error: describe(x, con) - x most be a a dataset_df object.") + ntriples_text <- provenance(x) + ntriples_text <- c(ntriples_text, as_dublincore(x, "ntriples")) + writeLines(ntriples_text, con=con) +} diff --git a/R/dublincore.R b/R/dublincore.R index e6611e1..b6453f0 100644 --- a/R/dublincore.R +++ b/R/dublincore.R @@ -39,10 +39,13 @@ #' \code{\link{datacite}} it is a recommended property for discovery. In DataCite, a more complex #' referencing is used. See \code{\link{subject}} and create structured Subject objects with #' \code{\link{subject_create}}. -#' @param date Corresponds to a point or period of time associated with an event in the +#' @param dataset_date Corresponds to a point or period of time associated with an event in the #' lifecycle of the resource. \href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/date/}{dct:date}. #' \code{Date} is also recommended for #' discovery in \code{\link{datacite}}, but it requires a different formatting. +#' To aviod confusion with date-related functions, instead of the DCMITERMS +#' date or the DataCite Date term, the parameter name is +#' \code{dataset_date}. #' @param language The primary language of the resource. Allowed values are taken from #' IETF BCP 47, ISO 639-1 language code. See \code{\link{language}}. Corresponds to Language in Datacite. #' @param format The file format, physical medium, or dimensions of the resource. @@ -89,20 +92,22 @@ #' @return \code{dublincore()} creates a \code{utils::\link[utils]{bibentry}} object #' extended with standard Dublin Core bibliographical metadata, \code{as_dublincore()} #' retrieves the contents of this bibentry object of a dataset_df from its -#' attributes, and returns the contents as list, dataset_df, or bibentry object. +#' attributes, and returns the contents as list, dataset_df, or bibentry object, or an +#' ntriples string. #' @examples -#' my_bibentry <- dublincore( +#' my_bibentry <- dct_iris <- dublincore( #' title = "Iris Dataset", #' creator = person("Edgar", "Anderson", role = "aut"), -#' publisher = "American Iris Society", +#' publisher = person("American Iris Society", role="pbl"), +#' contributor = person("Daniel", "Antal", role="dtm"), #' datasource = "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x", -#' date = 1935, +#' dataset_date = 1935, #' language = "en", -#' description = "This famous (Fisher's or Anderson's) iris data set gives the -#' measurements in centimeters of the variables sepal length and width and petal length -#' and width, respectively, for 50 flowers from each of 3 species of iris. -#' The species are Iris setosa, versicolor, and virginica." -#' ) +#' description = "The famous (Fisher's or Anderson's) iris data set gives the +#' measurements in centimeters of the variables sepal length and width and +#' petal length and width, respectively, for 50 flowers from each of 3 +#' species of iris. The species are Iris setosa, versicolor, and virginica." +#' ) #' #' as_dublincore(iris_dataset, type="list") #' @export @@ -110,12 +115,12 @@ dublincore <- function( title, creator, - identifier = NULL, + contributor = NULL, publisher = NULL, + identifier = NULL, subject = NULL, type = "DCMITYPE:Dataset", - contributor = NULL, - date = NULL, + dataset_date = NULL, language = NULL, relation = NULL, format = "application/r-rds", @@ -124,7 +129,7 @@ dublincore <- function( description = NULL, coverage = NULL) { - date <- ifelse (is.null(date), ":tba", as.character(date)) + publication_date <- ifelse (is.null(dataset_date), ":tba", as.character(dataset_date)) identifier <- ifelse (is.null(identifier), ":tba", as.character(identifier)) format <- ifelse (is.null(format), ":tba", as.character(format)) relation <- ifelse (is.null(relation), ":unas", relation) @@ -132,15 +137,26 @@ dublincore <- function( rights <- ifelse (is.null(rights), ":tba", as.character(rights)) coverage <- ifelse (is.null(coverage), ":unas", as.character(coverage)) datasource <- ifelse (is.null(datasource), ":unas", as.character(datasource)) + publishers <- ifelse(is.null(publisher), ":unas", publisher) + contributors <- ifelse(is.null(contributor), ":unas", contributor) + creators <- if(is.null(creator)) creators <- ":tba" else creators <- creator + + ## Fix publishers + ## Due to bug in RefManager + publisher <- fix_publisher(publishers=publishers) + + ## Fix contributors + ## Due to bug in RefManager + contributor <- fix_contributor(contributors=contributors) new_dublincore(title = title, - creator = creator, + creator = creators, identifier = identifier, publisher = publisher, subject = subject, type = type, contributor = contributor, - date = date, + publication_date = publication_date, language = language, relation = relation, format = format, @@ -150,147 +166,92 @@ dublincore <- function( coverage = coverage) } -#' @rdname dublincore -#' @param ... Optional parameters to add to a \code{dublincore} object. -#' \code{author=person("Jane", "Doe")} adds an author to the citation -#' object if \code{type="dataset"}. -#' @export -as_dublincore <- function(x, type = "bibentry", ...) { +#' @keywords internal +dublincore_to_triples <- function(dclist, dataset_id) { - citation_author <- person(NULL, NULL) + if (is.null(dclist) | is.null(dclist$title) | nchar(dclist$title)==0) { + stop("Error: dublincore_to_triples(dclist, dataset_id): no title found in dclist") + } - is_person <- function(p) ifelse (inherits(p, "person"), TRUE, FALSE) + dctriples <- n_triple(dataset_id, + "http://purl.org/dc/terms/title", + dclist$title) - arguments <- list(...) + if ( !is.null(dclist$description) ) { + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/description", + dclist$description)) + } - if (!is.null(arguments$author)) { - if ( is_person(arguments$author)) { - citation_author <- arguments$author - } else { - stop("as_dublincore(x, ..., author = ): author must be created with utils::person().") - } + if ( !is.null(dclist$creator)) { + tcreator <- n_triple(dataset_id, + "http://purl.org/dc/terms/creator", + dclist$creator) + dctriples <- c(dctriples, tcreator) } - if (! type %in% c("bibentry", "list", "dataset")) { - warning_message <- "as_dublincore(ds, type=...) type cannot be " - warning(warning_message, type, ". Reverting to 'bibentry'.") - type <- 'bibentry' + if ( !is.null(dclist$publisher) ) { + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/publisher", + dclist$publisher)) } - ds_bibentry <- get_bibentry(x) - dataset_title <- ds_bibentry$title - dataset_creator <- ds_bibentry$author + if ( !is.null(dclist$identifier) ) { + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/identifier", + dclist$identifier)) + } - if (! is_person(dataset_creator)) { - stop('attr(x, "dataset_bibentry")$author is not a person object.') + if ( !is.null(dclist$subject) ) { + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/subject", + dclist$subject)) } - if (!is.null(ds_bibentry$year)) { - if(is.null(ds_bibentry$dataset_date)) { - dataset_date <- as.character(ds_bibentry$year) - } else { - dataset_date <- as.character(ds_bibentry$date) - } - } else { - dataset_date <- ":tba" + if ( !is.null(dclist$type) ) { + + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/type", + gsub("DCMITYPE:", "http://purl.org/dc/terms/DCMIType", dclist$type))) } - dataset_relation <- ifelse (is.null(ds_bibentry$relation), ":unas", as.character(ds_bibentry$relation)) - dataset_identifier <- ifelse (is.null(ds_bibentry$identifier), ":tba", as.character(ds_bibentry$identifier)) - dataset_version <- ifelse (is.null(ds_bibentry$version), ":unas", as.character(ds_bibentry$version)) - dataset_description <- ifelse (is.null(ds_bibentry$description), ":unas", as.character(ds_bibentry$description)) - dataset_language <- ifelse (is.null(ds_bibentry$language), ":unas", as.character(ds_bibentry$language)) - dataset_format <- ifelse (is.null(ds_bibentry$format), ":tba", as.character(ds_bibentry$format)) - dataset_rights <- ifelse (is.null(ds_bibentry$rights), ":tba", as.character(ds_bibentry$rights)) - dataset_coverage <- ifelse (is.null(ds_bibentry$coverage), ":unas", as.character(ds_bibentry$coverage)) - datasource <- ifelse (is.null(ds_bibentry$datasource), ":unas", as.character(ds_bibentry$datasource)) - dataset_contributor <- ifelse (is.null(ds_bibentry$contributor), "", as.character(ds_bibentry$contributor)) - dataset_subject <- ifelse (is.null(ds_bibentry$subject), "", as.character(ds_bibentry$subject)) - dataset_publisher <- ifelse (is.null(ds_bibentry$publisher), "", as.character(ds_bibentry$publisher)) - - if (type == "bibentry") { - new_dublincore(title = dataset_title, - creator = dataset_creator, - identifier = dataset_identifier, - publisher = dataset_publisher, - subject = dataset_subject, - type = "DCMITYPE:Dataset", - contributor = dataset_contributor, - date = dataset_date, - language = dataset_language, - relation = dataset_relation, - format = dataset_format, - rights = dataset_rights, - datasource = datasource, - description = dataset_description, - coverage = dataset_coverage) - } else if (type== "list") { - if (dataset_contributor == "") dataset_contributor <- NULL - if (dataset_subject == "") dataset_subject <- NULL - - list(title=dataset_title, - creator=dataset_creator, - identifier = dataset_identifier, - publisher = dataset_publisher, - subject = dataset_subject, - type = "DCMITYPE:Dataset", - contributor = dataset_contributor, - date = date, - language = dataset_language, - relation = dataset_relation, - format = dataset_format, - rights = dataset_rights, - datasource = datasource, - description = dataset_description, - coverage = dataset_coverage) - } else if ( type == "dataset") { - - properties <- c(length(dataset_title), - length(as.character(dataset_creator)), - length(dataset_identifier), - length(dataset_publisher), - length(dataset_subject), - length("DCMITYPE:Dataset"), - length(dataset_contributor), - length(dataset_date), - length(dataset_language), - length(dataset_relation), - length(dataset_format), - length(dataset_rights), - length(datasource), - length(dataset_description), - length(dataset_coverage) - ) - assertthat::assert_that( - all(properties)==1, msg= "In as_dublincore() not all properties have a length 1 to export into datataset (data.frame)." - ) - - dataset_df( - data.frame(title = dataset_title, - creator = as.character(dataset_creator), - identifier = dataset_identifier, - publisher = dataset_publisher, - subject = dataset_subject, - type = "DCMITYPE:Dataset", - contributor = dataset_contributor, - date = dataset_date, - language = dataset_language, - relation = dataset_relation, - format = dataset_format, - rights = dataset_rights, - datasource = datasource, - description = dataset_description, - coverage = dataset_coverage), - reference = list( - title = paste0("The Dublin Core Metadata of `", ds_bibentry$title, "'"), - author = citation_author, - year = substr(as.character(Sys.Date()),1,4) - - )) + if ( !is.null(dclist$contributor) ) { + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/contributor", + dclist$contributor)) + } + + #if ( !is.null(dclist$date) ) { + # dctriples <- c(dctriples, n_triple(dataset_id, + # "http://purl.org/dc/terms/date", + # dclist$date)) + #} + + if ( !is.null(dclist$language) ) { + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/language", + dclist$language)) + } + + if ( !is.null(dclist$datasource) ) { + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/source", + dclist$datasource)) + } + + + + if ( !is.null(dclist$coverage) ) { + dctriples <- c(dctriples, n_triple(dataset_id, + "http://purl.org/dc/terms/coverage", + dclist$coverage)) } + n_triples(dctriples) } + #' @keywords internal +#' @importFrom RefManageR BibEntry new_dublincore <- function (title, creator, identifier = NULL, @@ -298,7 +259,7 @@ new_dublincore <- function (title, subject = NULL, type = "DCMITYPE:Dataset", contributor = NULL, - date = NULL, + publication_date = NULL, language = NULL, relation = NULL, format = NULL, @@ -307,21 +268,55 @@ new_dublincore <- function (title, description = NULL, coverage = NULL) { - dublincore_object <- bibentry(bibtype = "Misc", - title = title, - author = creator, - identifier = identifier, - publisher = publisher, - contributor = contributor, - year = as.character(substr(date, 1,4)), - language = language, - relation = relation, - format = format, - rights = rights, - description = description, - type = type, - datasource = datasource, - coverage = coverage) + ## Fix publishers + ## Due to bug in RefManager + publisher <- fix_publisher(publisher) + + ## Fix contributors + ## Due to bug in RefManager + contributor <- fix_contributor(contributors=contributor) + + if ( inherits(creator, "list")) { + warning("list", creator) + for (i in 1:length(creator)) { + if ( i ==1 ) { + message(i) + creator <- person(given = creator[[i]]$given, middle = creator[[i]]$middle, family=creator[[i]]$family, + email=creator[[i]]$email, role=creator[[i]]$role, comment=creator[[i]]$comment, + first=creator[[i]]$first, last=creator[[i]]$last) + } else { + mesage(i) + tmp <- person(given = creator[[i]]$given, middle = creator[[i]]$middle, family=creator[[i]]$family, + email=creator[[i]]$email, role=creator[[i]]$role, comment=creator[[i]]$comment, + first=creator[[i]]$first, last=creator[[i]]$last) + creator <- c(creator, tmp) + } + } + + warning("\n", class(creator)) + } + + assertthat::assert_that(all(inherits(creator, "person"))) + + dublincore_object <- RefManageR::BibEntry( + bibtype = "Misc", + title = title, + author = creator, + identifier = identifier, + publisher = publisher, + contributor = contributor, + date = publication_date, + language = language, + relation = relation, + format = format, + rights = rights, + description = description, + type = type, + datasource = datasource, + coverage = coverage) + + assertthat::assert_that(!is.null(dublincore_object$author)) + assertthat::assert_that(inherits(dublincore_object$author, "person")) class(dublincore_object) <- c("dublincore", class(dublincore_object)) dublincore_object @@ -338,3 +333,87 @@ is.dublincore <- function(x) { #' Dublin Core specification. #' @exportS3Method is.dublincore.dublincore <- function(x) inherits(x, "dublincore") + + +#' @keywords internal +fix_publisher <- function(publishers) { + + if (is.null(publishers)) return(":unas") + + if ( all(inherits(publishers, "person")) ) { + if( length(publishers)>1 ) { + return_value <- paste0("{", + paste( vapply(publishers, function(x) x$given, character(1)), + collapse="} and {"), + "}" ) + } else { + return_value <- publishers$given + } + } else if ( all(inherits(publishers, "list")) ) { + + if( length(publishers)>1 ) { + return_value <- paste0("{", + paste( lapply(publishers, function(x) x$given), + collapse="} and {"), + "}" ) + } else { + return_value <- publishers[[1]]$given + } + } else if (length(publishers)>1) { + # several character strings + return_value <- paste0("{", + paste( vapply(publishers, function(x) x$given, character(1)), + collapse="} and {"), + "}" ) + } else { + return_value <- publishers + } + + assertthat::assert_that(is.character(return_value), + msg="Error: fix_publishers(publishers): not character but") + assertthat::assert_that(length(return_value)==1, msg="Error: fix_publishers(publishers): not 1" ) + + return_value +} + + +#' @keywords internal +fix_contributor <- function(contributors=NULL) { + + if (is.null(contributors)) return(":unas") + + if ( all(inherits(contributors, "person")) ) { + if( length(contributors)>1 ) { + return_value <- paste0("{", + paste( vapply(contributors, function(x) {as.character(x)}, character(1)), + collapse="} and {"), + "}" ) + } else { + return_value <- as.character(contributors) + } + } else if ( all(inherits(contributors, "list")) ) { + if( length(contributors)>1 ) { + return_value <- paste0("{", + paste( lapply(contributors, function(x) x$given), + collapse="} and {"), + "}" ) + } else { + return_value <- paste(unlist(contributors[[1]]), collapse=" ") + } + } else if (length(contributors)>1) { + # several character strings + return_value <- paste0("{", + paste( vapply(contributors, function(x) x$given, character(1)), + collapse="} and {"), + "}" ) + } else { + return_value <- contributors + } + + assertthat::assert_that(is.character(return_value), + msg="Error: fix_contributor(contributors): not character but") + assertthat::assert_that(length(return_value)==1, msg="Error: fix_contributor(contributors): not 1" ) + + return_value <- gsub("* dtm", " [dtm]", return_value ) + return_value +} diff --git a/R/get_bibentry.R b/R/get_bibentry.R index ef67d54..c118958 100644 --- a/R/get_bibentry.R +++ b/R/get_bibentry.R @@ -45,7 +45,6 @@ #' # Print the bibentry object according to the Dublin Core notation: #' as_dublincore(iris_dataset, "list") #' @export - get_bibentry <- function(dataset) { assertthat::assert_that("dataset_bibentry" %in% names(attributes(dataset)), msg="Error: get_bibentry(dataset): dataset has no dataset_bibentry attribute") @@ -62,8 +61,8 @@ get_bibentry <- function(dataset) { if(is.null(value)) { value <- dublincore(title = "Untitled Dataset", - creator = "Unknown Author", - date=year) + creator = person("Unknown Author"), + dataset_date=year) } if(is.null(value$year)) value$year <- year @@ -71,3 +70,13 @@ get_bibentry <- function(dataset) { attr(dataset, "dataset_bibentry") <- value invisible(dataset) } + +#' @keywords internal +set_default_bibentry <- function() { + sys_time <- Sys.time() + year <- substr(as.character(sys_time),1,4) + Title <- "Untitled Dataset" + Creator <- person("Unknown", "Author") + dataset_bibentry <- datacite(Title=Title, Creator=Creator, PublicationYear = year) + dataset_bibentry +} diff --git a/R/id_to_column.R b/R/id_to_column.R index 2e64b96..aba87ae 100644 --- a/R/id_to_column.R +++ b/R/id_to_column.R @@ -27,21 +27,36 @@ id_to_column <- function(x, prefix = "eg:", ids = NULL) { if (is.null(prefix)) { prefix <- "" } - rhs <- x - x$rowid <- paste0(prefix, ids) - lhs <- x[, "rowid", drop=FALSE] - - if (is_dataset) { - - DataBibentry <- get_bibentry(rhs) - tmp <- as_dataset_df(cbind(lhs, rhs), - reference = list(author=DataBibentry$author, - title = DataBibentry$title) - ) - attr(tmp, "dataset_bibentry") <- DataBibentry + if ( "rowid" %in% names(x)) { + x$rowid <- paste0(prefix, ids) + return(x) } else { - tmp <- cbind(lhs, rhs) + rhs <- x + x$rowid <- paste0(prefix, ids) + lhs <- x[, "rowid", drop=FALSE] + + if (is_dataset) { + + DataBibentry <- get_bibentry(rhs) + dataset_subject <- subject(rhs) + dataset_prov <- provenance(x) + tmp <- as_dataset_df(cbind(lhs, rhs), + reference = list(author=DataBibentry$author, + title = DataBibentry$title) + ) + attr(tmp, "dataset_bibentry") <- DataBibentry + attr(tmp, "prov") <- dataset_prov + subject(tmp) <- dataset_subject + } else { + tmp <- cbind(lhs, rhs) + } } + + + + + + tmp } diff --git a/R/n_triple.R b/R/n_triple.R index 70e6f9d..92be343 100644 --- a/R/n_triple.R +++ b/R/n_triple.R @@ -50,16 +50,39 @@ n_triple <- function(s,p,o) { create_iri <- function(x) { if ( any(c("list", "data.frame", "tbl", "data.table") %in% class(x)) ) { - stop ("Error: create_iri(x) must be any of an URI, string, integer, double, Date, or dateTime.") + stop ("Error: create_iri(x) must be any of an URI, string, integer, double, Date, dateTime, or person.") } - double_string <- '^^' - integer_string <- '^^' + double_string <- '^^' + integer_string <- '^^' character_string <- '^^' - date_string <- '^^' + date_string <- '^^' + datetime_string <- '^^' + + if(inherits(x, "person")) { + + if ( "isni" %in% tolower(names(x$comment)) ) { + x <- paste0("https://isni.org/isni/", x$comment[which(tolower(names(x$comment))=="isni")]) + } else if ( "orcid" %in% tolower(names(x$comment)) ) { + x <- paste0("https://orcid.org/", x$comment[which(tolower(names(x$comment))=="orcid")]) + } else if ( "viaf" %in% tolower(names(x$comment))) { + x <- paste0("https://viaf.org/viaf/", x$comment[which(tolower(names(x$comment))=="viaf")]) + } else if ( "wikidata" %in% tolower(names(x$comment)) ) { + qid <- x$comment[which(tolower(names(x$comment))=="wikidata")] + qid <- gsub("https://www.wikidata.org/wiki/", "", qid) + x <- paste0("https://www.wikidata.org/wiki/", qid) + } else { + tmp <- x + tmp$comment <- "" + tmp$email <- "" + x <- as.character(tmp) + } + } if(is.integer(x)) { sprintf('"%s"%s', as.character(x), integer_string) + } else if (inherits(x, "POSIXct")) { + xsd_convert(x) } else if( is.character(x) & substr(x, 1, 5) %in% c("http:", "https")) { sprintf('<%s>', as.character(x)) } else if ( grepl("^_\\:", x)) { @@ -73,11 +96,11 @@ create_iri <- function(x) { } else if (x=="a") { '' } else if (is.character(x)) { + x <- gsub("DCMITYPE\\:", "http://purl.org/dc/dcmitype/", x) sprintf('"%s"%s', as.character(x), character_string) } } - #' @keywords internal prov_author <- function(author_person) { @@ -85,26 +108,42 @@ prov_author <- function(author_person) { print_name <- "_:" if (!is.null(author_person$family)) print_name <- paste0(print_name, tolower(author_person$family)) if (!is.null(author_person$given)) print_name <- paste0(print_name, tolower(author_person$given)) - orcid <- get_orcid(author_person) + person_iri <- get_person_iri(author_person) } else if (is.character(attr(author_person, "person"))) { print_name <- paste0(attr(author_person, "person"), ": ") } else { print_name <- ""} - if(!is.null(orcid)) { - triple_1 <- n_triple(orcid, "a", "http://www.w3.org/ns/prov#Agent") + if(!is.null(person_iri)) { + n_triple(person_iri, "a", "http://www.w3.org/ns/prov#Agent") + } else { + n_triple(print_name, "a", "http://www.w3.org/ns/prov#Agent") } - - n_triple(print_name, "a", "http://www.w3.org/ns/prov#Agent") } #' @keywords internal -get_orcid <- function(p) { +get_person_iri <- function(p) { + + assertthat::assert_that(inherits(p, "person"), + msg="Error: get_person_iri(p): p is not a utils::person object.") + if (!is.null(p$comment)) { if ( any(c("ORCID", "ORCiD", "orcid") %in% names(p$comment))) { comment_n <- which(names(p$comment) %in% c("ORCID", "ORCiD", "orcid"))[1] - p$comment[comment_n] + orcid <- p$comment[comment_n] + if(!grepl("https://orcid.org/", orcid)) orcid <- paste0("https://orcid.org/", orcid) + orcid + } else if ( any("isni" %in% tolower(names(p$comment)) )) { + comment_n <- which(tolower(names(p$comment)) == "isni")[1] + isni <- p$comment[comment_n] + if(!grepl("https://isni.org/isni/", isni)) isni <- paste0("https://isni.org/isni/", isni) + isni + } else if ( any("viaf" %in% tolower(names(p$comment))) ) { + comment_n <- which(tolower(names(p$comment)) == "viaf")[1] + viaf <- p$comment[comment_n] + if(!grepl("http://viaf.org/viaf/", viaf)) viaf <- paste0("http://viaf.org/viaf/", viaf) + viaf } } else NULL } diff --git a/R/provenance.R b/R/provenance.R index fc3f42b..e76bc44 100644 --- a/R/provenance.R +++ b/R/provenance.R @@ -40,23 +40,28 @@ provenance <- function(x) { #' @keywords internal default_provenance <- function(dataset_id = "http://example.com/dataset#", - creator_id =NULL, - started_at_time = started_at_time, - ended_at_time = ended_at_time) { + author = NULL, + dtm = NULL, + generated_at_time = NULL) { cite_dataset <- utils::citation("dataset") - if(is.null(creator_id)) creator_statement <- NULL else { - creator_statement <- n_triple(creator_id, "a", "http://www.w3.org/ns/prov#Agent") - } + + agent_triples <- prov_author(author) + if((!is.null(dtm))) c(agent_tripels, prov_author(dtm)) + + if (is.null(generated_at_time)) generated_at_time <- Sys.time() + bundle_id <- gsub("#", "_prov.nt", dataset_id) prov <- n_triples( - c(n_triple(dataset_id, "a", "http://purl.org/linked-data/cube#DataSet"), - creator_statement, + c(n_triple(bundle_id, "a", "http://www.w3.org/ns/prov#Bundle"), + n_triple(dataset_id, "a", "http://www.w3.org/ns/prov#Entity"), + n_triple(dataset_id, "a", "http://purl.org/linked-data/cube#DataSet"), + n_triple(dataset_id, "a", "http://purl.org/linked-data/cube#DataSet"), + agent_triples, + n_triple("https://doi.org/10.32614/CRAN.package.dataset", "a", "http://www.w3.org/ns/prov#SoftwareAgent"), n_triple("http://example.com/creation", "a", "http://www.w3.org/ns/prov#Activity"), - n_triple("http://example.com/creation", "http://www.w3.org/ns/prov#startedAtTime", xsd_convert(started_at_time) ), - n_triple("http://example.com/creation", "http://www.w3.org/ns/prov#endedAtTime", xsd_convert(ended_at_time) ), + n_triple("http://example.com/creation", "http://www.w3.org/ns/prov#generatedAtTime", generated_at_time ), n_triple(paste0("https://doi.org/", cite_dataset[[2]]$doi), "a", "http://www.w3.org/ns/prov#SoftwareAgent") ) ) + prov } - - diff --git a/R/publication_year.R b/R/publication_year.R index b3965c2..8307e86 100644 --- a/R/publication_year.R +++ b/R/publication_year.R @@ -25,27 +25,27 @@ publication_year <- function(x) { msg = "publication_year(x): x must be a dataset object created with dataset() or as_dataset().") ds_bibentry <- get_bibentry(x) - as.character(ds_bibentry$year) + as.character(ds_bibentry$date) } #' @rdname publication_year #' @export `publication_year<-` <- function(x, overwrite = TRUE, value) { assert_that(is.dataset_df(x), - msg = "publication_year(x) <- value: x must be a dataset object created with dataset() or as_dataset().") + msg = "publication_year(x) <- value: x must be a dataset object created with dataset_df() or as_dataset_df().") ds_bibentry <- get_bibentry(x) - publication_year <- ds_bibentry$year + publication_year <- ds_bibentry$date if (is.null(value)) { value <- ":unas" } if ( overwrite ) { - ds_bibentry$year <- as.character(value) + ds_bibentry$date <- as.character(value) attr(x, "dataset_bibentry") <- ds_bibentry } else { - warning ("The dataset has already an publication_year: ", ds_bibentry$year, "." ) + warning ("The dataset has already an publication_year: ", ds_bibentry$date, "." ) } invisible(x) } diff --git a/R/subject.R b/R/subject.R index cc7e6ae..f2a04ab 100644 --- a/R/subject.R +++ b/R/subject.R @@ -146,8 +146,7 @@ new_Subject <- function(term, stop("subject(x, value)<- : value must be a created with 'subject_create()` or it must be a character string.") } - ds_bibentry$subject <- value - ds_bibentry$subject + ds_bibentry$subject <- ifelse(is.character(value), value, value$term) attr(x, "dataset_bibentry") <- ds_bibentry attr(x, "subject") <- value invisible(x) diff --git a/R/var_namespace.R b/R/var_namespace.R index 4b19873..85b3b4d 100644 --- a/R/var_namespace.R +++ b/R/var_namespace.R @@ -18,7 +18,9 @@ #' with variable labels, namespaces, units of measures, and machine-independent #' permanent variable identifiers. #' @examples -#' qid = defined(c("Q275912", "Q116196078"), namespace = "https://www.wikidata.org/wiki/") +#' qid = defined(c("Q275912", "Q116196078"), +#' namespace = c(wd="https://www.wikidata.org/wiki/") +#' ) #' var_namespace(qid) #' #' # To remove a namespace diff --git a/R/xsd_convert.R b/R/xsd_convert.R index 4e94072..2d78f77 100644 --- a/R/xsd_convert.R +++ b/R/xsd_convert.R @@ -193,9 +193,10 @@ xsd_convert.factor<- function(x, idcol=NULL, ... ) { #' @export #' @exportS3Method xsd_convert.POSIXct <- function(x, idcol=NULL, ...) { - time_gmt <- as.POSIXct(x, tz = "GMT") - time_string <- paste0(as.character(as.Date(time_gmt)), "T", - strftime(time_gmt, format="%H:%M:%S"), "Z") + + time_utc <- as.POSIXct(x, tz = "UTC") + time_string <- paste0(as.character(as.Date(time_utc)), "T", + strftime(time_utc, format="%H:%M:%S"), "Z") paste0('\"', time_string, '\"', "^^") } diff --git a/README.Rmd b/README.Rmd index e51738d..3a86b67 100644 --- a/README.Rmd +++ b/README.Rmd @@ -25,7 +25,7 @@ rlang::check_installed("here") [![CRAN_time_from_release](https://www.r-pkg.org/badges/ago/dataset)](https://cran.r-project.org/package=dataset) [![Status at rOpenSci Software Peer Review](https://badges.ropensci.org/553_status.svg)](https://github.com/ropensci/software-review/issues/553) [![DOI](https://zenodo.org/badge/DOI/10.32614/CRAN.package.dataset.svg)](https://zenodo.org/record/6950435#.YukDAXZBzIU) -[![devel-version](https://img.shields.io/badge/devel%20version-0.3.4-blue.svg)](https://github.com/dataobservatory-eu/dataset) +[![devel-version](https://img.shields.io/badge/devel%20version-0.3.4001-blue.svg)](https://github.com/dataobservatory-eu/dataset) [![dataobservatory](https://img.shields.io/badge/ecosystem-dataobservatory.eu-3EA135.svg)](https://dataobservatory.eu/) diff --git a/_pkgdown.yml b/_pkgdown.yml index a47c517..caf3fb2 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -34,6 +34,7 @@ articles: contents: - dataset_df - bibentry + - rdf - title: "Semantic Enrichment of Variables" desc: > Enriching metadata about individual variables. @@ -59,6 +60,12 @@ reference: - var_namespace - as_numeric - as_character + - title: "Describe" + desc: > + Describe the metadata in the Resource Description Framework + contents: + - describe + - xsd_convert - title: "Bibliography functions" desc: > Functions to add and retain bibliographical data about the whole dataset, not its @@ -87,6 +94,5 @@ reference: contents: - n_triples - n_triple - - xsd_convert - dataset_to_triples - id_to_column diff --git a/data-raw/prepare_iris_dataset.R b/data-raw/prepare_iris_dataset.R index a5f9633..4d9395b 100644 --- a/data-raw/prepare_iris_dataset.R +++ b/data-raw/prepare_iris_dataset.R @@ -5,7 +5,7 @@ iris_doi <- "10.5281/zenodo.10396807" iris_dataset <- dataset_df( rowid = defined(paste0("#", row.names(iris)), label = "ID in the iris dataset", - namespace="10.5281/zenodo.10396807"), + namespace=c("#"= "10.5281/zenodo.10396807")), Sepal.Length = defined(eg_iris$Sepal.Length, label = "Length of the sepal in cm", unit = "centimeter", @@ -28,27 +28,28 @@ iris_dataset <- dataset_df( namespace = "Iris" ), dataset_bibentry = dublincore( title = "Iris Dataset", - creator = person(given="Edgar", family="Anderson", role = "aut"), + creator = person(given="Edgar", family="Anderson", role = "cre", comment=c(VIAF="http://viaf.org/viaf/6440526")), + contributor = person(given="Antal", family="Daniel", role = "dtm"), identifier = "https://doi.org/10.5281/zenodo.10396807", publisher = "American Iris Society", datasource = "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x", - date = 1935, + dataset_date = 1935, language = "en", description = "The famous (Fisher's or Anderson's) iris data set." ), - subject = subject_create(term = "Irises (plants)", + dataset_subject = subject_create(term = "Irises (plants)", schemeURI = "http://id.loc.gov/authorities/subjects", valueURI = "https://id.loc.gov/authorities/subjects/sh85068079", subjectScheme = "LCCH", prefix = "lcch:") ) -subject(iris_dataset) -print(get_bibentry(iris_dataset), "Bibtex") -attributes(iris_dataset$Species) -get_bibentry(iris_dataset) -var_definition(iris_dataset$Sepal.Length) +dataset_bibentry <- get_bibentry(iris_dataset) +toBiblatex(dataset_bibentry) +dataset_bibentry$date + + #DSD <- DataStructure(iris_dataset) #DSD$Sepal.Length$label <- "The sepal length of iris specimen in centimeters." #DSD$Petal.Length$label <- "The petal length of iris specimen in centimeters." @@ -59,14 +60,9 @@ var_definition(iris_dataset$Sepal.Length) #describe(iris_dataset) #attr(iris_dataset, "DataStructure") <- DSD -prov <- n_triples( - c(n_triple("https://doi.org/10.5281/zenodo.10396807", "a", "http://purl.org/linked-data/cube#DataSet"), - n_triple("https://orcid.org/0000-0001-7513-6760", "a", "http://www.w3.org/ns/prov#Agent"), - n_triple("https://doi.org/10.5281/zenodo.6703764.", "a", "http://www.w3.org/ns/prov#SoftwareAgent") - ) -) -attr(iris_dataset, "prov") <- prov +provenance(iris_dataset) + usethis::use_data(iris_dataset, overwrite = TRUE) #snakecase::to_title_case("Edgar Anderson's Iris Data (For Testing the dataset R package)") @@ -102,17 +98,3 @@ readLines(tempcon) -attr(iris_dataset, "prov") <- prov -provenance(iris_dataset) -get_bibentry(iris_dataset) - - -iris_dataset - -var_label(iris_dataset) -var_unit(iris_dataset$Sepal.Length) - - -#readRDS(file.path("not_included", "iris_dataset.rds")) - -#readRDS(file.path("not_included", "iris_dataset.csv")) diff --git a/data/iris_dataset.rda b/data/iris_dataset.rda index 90d6ab04395eadd0a368cbdf5e20c88a0e6d9469..ef0388640f84038f2e6942e38518f420d65928a6 100644 GIT binary patch delta 2481 zcmV;i2~PH!5w;V5LRx4!F+o`-Q(3|1mwf;ant$*A|NsBl=l}QrXW#q3|Ns0z0s%1q zARs14AV>jiZQu)kdZwoqI1mhXOW1e3^anZvkZ2TL+JZ>}8jPlx)X>CfQ~eR>srZc) z^FVMJY$913&-(003k(0001K>INV(02@#M00L9Wl$6i_8U}y>2dDr50000P z000004FL5BG{nFr0GK09FaRb1O$`i800002FpL5;#K>R>l0-%%%@7)g=`{w882}9o zgH1Hh0iZO04FC{rTP0EJ&*@zAs9kNL4+BIhJ{;7$!Q>ph8b2b8s?LC zY}BTuwj74UWp|vM+7~xQN+DUTYgMnC`&}V@#~4M^F+L8atC-M3cZee5f`}@r<_8_T zH{1R8=KyeK>j424VzI4Z6wkaxq8BU*01S4Zv_i#yidI2W<;l*qUwwebempg|-CtI_ z?AOD8KkBDPuLW@hAU+(pad~CJ$?ZjKy{nLBdz=#7 zgXbS8xH!vZ!E5WSp`^zQLjX7d$cDP1k|F_rc&Y?dU1)*e3;{?zB|s~viZb692kQgyZ6(JQN0SOfLwFn8h-dk2x1smH3H@~*{FQC=Kls!&<`%5mE zk_jM^2_TX{s3d|&B!WmJf=DESW;l+2IOHA4r=gI60MWO*^tPqb6a|1*O)ojY(zxY} zJlF-T)-YZ-eq3I4oL3lE$%nnvqywqe5?96CL8>Z@m4$~)t(Uk1*x9MfErB_e`(8jy z0m60K%WQ@m9ef!os;Zup(ii0vMMPSWTt;?eU2{yXy6v*tKqEAxTMkT7G^3e+utu;i zTFfL43Co7FHjvUPyH%~yLNT2wc)e}wz!8ca7{yhQ zn;8pIF}8J1>#n-c1%g5lO+G~u7R^;x5Y4n=m8p5sWibxge6-|tEt`S?f|sytxF7|DkX_yS16=xjgq$8ZKVJf zh>}<$u1#k-P2Jtx0)!;eMMT)DB_@q2S5w(ryMS3Kq$U(HkjdhBu5T}YK$v8c1doHN z5B2;jpNrG#_5ECqV0L21?C;v7xe8zrI)s zU{NP(H@CH`ph_oepLU5Nk&=;?1C{Eu_QLe0VE+gsWkE`(#yl&I)` zi*}?&Bcdv3hKHNc_fbHr7Y$Dp768|8xyW0(M}ut(Pre^1f#u90G4@(;4n~z_+xH{I z3%0WaN@m0!^rpcRlz={25kXQyODH&BL^d>UNJvVUU&>Q>Qkj8?Iwq4IfV1NqbdjPu zM#&yCU!`0G%DzIa8XtGaRCrQqJ?Lq z5Yu(bfM?>@A{;=|;evUv~ zKVNp?RH#O4(nm`ODBf=MK(O!=ee3oG_@c=^PLSC;P;Lo_K8BnTlCDaSEDhOtUMqt* ztTr4lft==phmgI~6A9{c@d@bd(Bac{rLJS@@8mOsN?We6_~W zA?A5li?z%Qil-7nBUSL!0{+&2Akr}TEJSv3i#>=nc47DOUPY7%&Ka5QiL+)FDM`jG z`*9L1xjj91wOq=T zv9y14YBCb2Dl15+E`OVUh?08fX*t~~$j(qfGV7B;gzd0hwueVwiBT72P!n%$WCwWDB#GLcnvS)s5yxurveQ7~Q88LAUPM}K0!E2k%Ujr& vOR8FuR+1>~ghZQu%jdVR+6w|EVyI`Hpc*a6|`W*JhJb#RhF2BSk!=n&IQPg6}#Ca0rG zw1${WO%F&kJwcJ7qf8SpIB%;6w|CnE(I)000000000000000000000G+#9 z*0rsFYghmP0000000000wHDLwH@oWMWMbNKj}{R`#7EK1v%MX*n;VBx-G@Dy?r=w5 z4~KMka63z>YIZal2E;HJuWD@Xs1hixg+L-A;bBrck%%D&=%@$X6mmj2_!-S%pOjM^ z0~b(L3B;vYnUP7Vq6Vd)m4#(gQI&;~SsI#un=%C?S&B&;Gca6gP*y6zs3f99X+;%S ziD+DuQAZe|1&CHC85KlvVw#F*TB<3rNg;?5$YE@mQfydGKx2Z87$^}-Q3V1J2?SD! zFpAO;SS(Tr1`322qya#P{aC66K!8AF0D%$^G*aZ1STP|63N;EvkXkJXK&_yJfWeA? zNT~>^2&!jZ${Gs5kRgAf4(=VW0_p~wdqJ0HXh{T+Nd%BdAW{h+k_jM^2_TXQAe{Vi z_c^nNsM)e`ZS~!(lWr}gTFq&AmanT*f%|%4;K$1>ym;Hh7u5_-9Ep%~yRiIJ%Y$<4tut}i@S|~erCOp3$9dZeyFQ;rp!xAB%033GldDq&0F)& z95TyHz$8eMi7=4#cZtw0e= zsWiJ8AmEu_c; zk<$jO>vSudx-rK5ZI!KS07#X8@p?@zGfHq#efZsMv9+uLmH1Lp#X4a}O478qgqKm# zsx-eoN-+6uAl|)u6MW*|=d^Y+T@*Nx=t81Mk}@%a5Uc|waP3pjMIEJ7Tof5%c!<%$ zKxP!C6jE{=gcPD45_VHn!~+v|cXfwTcDGF2N+BH0a1?5BMa&d6lSCmd*8t5;igQwR z6^dkBXe_0i3E|Pf(QF2V#-zv*5RpPjXe4NrB*~S+NtUH^Yc-RY1tNdsbzPS)r1^ed zGrsWr{!cR3PF(Dtw}z?@zsJPxOV}Tf-YI>+^3l!dhiAxfqaFL0Iy2+uW-;iU!?&%e zduME2XYm0*TmKWF1Vp#E4j-}cW*M4}ywY3+sV|bwcV7Jpq^B-cmWpIP*E8+TTLnrg zV}kx+E(4e6;zU!7DCvLDIbEj~K(DvvrZ;7q!LSux(58U7loN~WIO4#AetL%%<-7Zg z)*52nId(YjnT$BWsV@%C7CmMbT<~Z|D7D-VWvs8O|5V`^A z3iJtRgooxe&4gRR*5wCE#$bUp0}y>u_*{6Ug1E69#TC{Rb#Q-JQD49OUivPHL@Xei zuzA1i+h*)LX4AW-#P9F1^_kLF z`6r;`(;ha>D|d5R2+jswgHT!`_2P&T9JV~XBIfzrJE-mu#vGvJE7&$}a_^P;DGi_C zD2;`mco9yJn|lKufrt_IUSuq4jobHQ=?I|edS zPK{dh0bPdt;Iwv%A9lN8TqUS!=2gTK9EH)6l}bod8}oTSGkJ}11@yEk7v|=$tK2bh zlbjoGhs)F)psm5T$S&-&NKe9&AufEb;7L5qsZuRtlG1-yTuhX70>o%#!WWA^G8r^X zifGqfPEoljDUp?&jaJm%y}xC0Uzb6n&Te6d$vdxfLej4_%EdqoGn1W)-mBql!~=H< zi0wCziEScz<`vgfHZkQu<}Aemi>#wN5bCcq<{WA>({i`sVi_q}D?*?^i&q1c#pma~ z@YBF%WSf6!mCR*E!F1Amu?Q`KoNVqK;RaaZ4U69bu8bnoMeSTFh(WGs1BIJpTcWZc zqWx!bP-I3Mu+<0zP&oxK!7=U|R1Ew%8m0m3d2kMrA`jEmI@7Hg2|+oHBA~hdzSK!S z;u@WXsnpI;K{EE_FU@l$J&0_%nrd#|zVtjbal~5x`F2aR*T*iX2OvPlR0PP#Spm?r z$s&FuvNq$6H6Ps{%AQm$p`@Irlj-R4X;*DTm5pm-OOoYFQcAu>8{|bZXc2QbrVE@p U0Q9R4u}A(ck diff --git a/man/datacite.Rd b/man/datacite.Rd index db79e2a..5d6008c 100644 --- a/man/datacite.Rd +++ b/man/datacite.Rd @@ -174,7 +174,7 @@ as_datacite(iris_dataset) } \seealso{ Other bibentry functions: -\code{\link{dublincore}()}, +\code{\link{as_dublincore}()}, \code{\link{get_bibentry}()} } \concept{bibentry functions} diff --git a/man/dataset_df.Rd b/man/dataset_df.Rd index d60017d..3d8784e 100644 --- a/man/dataset_df.Rd +++ b/man/dataset_df.Rd @@ -10,15 +10,17 @@ \usage{ dataset_df( ..., - dataset_bibentry = NULL, + identifier = c(eg = "http://example.com/dataset#"), var_labels = NULL, units = NULL, definitions = NULL, + dataset_bibentry = NULL, dataset_subject = NULL ) as_dataset_df( df, + identifier = c(eg = "http://example.com/dataset#"), var_labels = NULL, units = NULL, definitions = NULL, @@ -36,9 +38,11 @@ is_dataset_df(x) \arguments{ \item{...}{The vectors (variables) that should be included in the dataset.} -\item{dataset_bibentry}{A list of bibliographic references and descriptive metadata -about the dataset as a whole created with \code{\link{datacite}} or -\code{\link{dublincore}}.} +\item{identifier}{Defaults to \code{c(eg="http://example.com/dataset#")}, which should be +changed to the permanent identifier of the dataset. For example, if your dataset will be +released with the Digital Object Identifier (DOI) \verb{https;//doi.org/1234}, you should use +a short prefixed identifier like \code{c(obs="https://doi.org/1234#")}, which will resolve +to the rows being identified as https://doi.org/1234#1...https://doi.org/1234#n.} \item{var_labels}{The long, human readable labels of each variable.} @@ -46,6 +50,10 @@ about the dataset as a whole created with \code{\link{datacite}} or \item{definitions}{The linked definitions of the variables, attributes, or constants.} +\item{dataset_bibentry}{A list of bibliographic references and descriptive metadata +about the dataset as a whole created with \code{\link{datacite}} or +\code{\link{dublincore}}.} + \item{dataset_subject}{The subject of the dataset, see \code{\link{subject}}.} \item{df}{A \code{data.frame} to be converted to \code{dataset_df}.} diff --git a/man/describe.Rd b/man/describe.Rd new file mode 100644 index 0000000..08d5e5a --- /dev/null +++ b/man/describe.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/describe.R +\name{describe} +\alias{describe} +\title{Describe a dataset} +\usage{ +describe(x, con) +} +\arguments{ +\item{x}{A dataset_df object.} + +\item{con}{A connection, for example, \code{con=tempfile()}.} +} +\value{ +The description of the dataset_df object is written to the connection +in the n-triples form, nothing is returned. +} +\description{ +Describe a dataset +} +\examples{ +temp_prov <- tempfile() +describe(iris_dataset, con=temp_prov) +readLines(temp_prov) +} diff --git a/man/dublincore.Rd b/man/dublincore.Rd index fd77632..4148ba0 100644 --- a/man/dublincore.Rd +++ b/man/dublincore.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dublincore.R -\name{dublincore} -\alias{dublincore} +% Please edit documentation in R/as_dublincore.R, R/dublincore.R +\name{as_dublincore} \alias{as_dublincore} +\alias{dublincore} \alias{is.dublincore} \alias{is.dublincore.dublincore} \title{Add or get Dublin Core metadata} @@ -10,15 +10,17 @@ \href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/terms/format/}{ DCMI Metadata Terms}. } \usage{ +as_dublincore(x, type = "bibentry", ...) + dublincore( title, creator, - identifier = NULL, + contributor = NULL, publisher = NULL, + identifier = NULL, subject = NULL, type = "DCMITYPE:Dataset", - contributor = NULL, - date = NULL, + dataset_date = NULL, language = NULL, relation = NULL, format = "application/r-rds", @@ -28,13 +30,23 @@ dublincore( coverage = NULL ) -as_dublincore(x, type = "bibentry", ...) - is.dublincore(x) \method{is.dublincore}{dublincore}(x) } \arguments{ +\item{x}{An object that is tested if it has a class "dublincore".} + +\item{type}{The nature or genre of the resource. Recommended best practice is to use a controlled vocabulary such as the DCMI Type Vocabulary +\href{https://www.dublincore.org/specifications/dublin-core/dcmi-type-vocabulary/}{DCMITYPE}. +For a dataset, the correct term is \code{Dataset}. +To describe the file format, physical medium, or dimensions of the resource, use the +Format element.} + +\item{...}{Optional parameters to add to a \code{dublincore} object. +\code{author=person("Jane", "Doe")} adds an author to the citation +object if \code{type="dataset"}.} + \item{title}{\href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/title/}{dct:title}, a name given to the resource. \code{\link{datacite}} allows the use of alternate titles, too. See \code{\link{dataset_title}}.} @@ -42,14 +54,8 @@ is.dublincore(x) \href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/creator/}{dct:creator} Corresponds to \code{Creator} in \code{\link{datacite}}. See \code{\link{creator}}.} -\item{identifier}{An unambiguous reference to the resource within a given context. -Recommended practice is to identify the resource by means of a string conforming to an -identification system. Examples include International Standard Book Number (ISBN), -Digital Object Identifier (DOI), and Uniform Resource Name (URN). -Select and identifier scheme from -\href{https://www.ukoln.ac.uk/metadata/dcmi-ieee/identifiers/index.html}{registered URI schemes maintained by IANA}. -More details: \href{https://www.ukoln.ac.uk/metadata/dcmi-ieee/identifiers/}{Guidelines for using resource identifiers in Dublin Core metadata and IEEE LOM}. -Similar to \code{Identifier} in \code{\link{datacite}}. See \code{\link{identifier}}.} +\item{contributor}{An entity responsible for making contributions to the dataset. See +\href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/contributor/}{DCMI: Contributor}.} \item{publisher}{Corresponds to \href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#publisher}{dct:publisher} and Publisher in DataCite. @@ -61,22 +67,25 @@ code repository. If there is an entity other than a code repository, that code, use the property Contributor/contributorType/hostingInstitution for the code repository. See \code{\link{publisher}}.} +\item{identifier}{An unambiguous reference to the resource within a given context. +Recommended practice is to identify the resource by means of a string conforming to an +identification system. Examples include International Standard Book Number (ISBN), +Digital Object Identifier (DOI), and Uniform Resource Name (URN). +Select and identifier scheme from +\href{https://www.ukoln.ac.uk/metadata/dcmi-ieee/identifiers/index.html}{registered URI schemes maintained by IANA}. +More details: \href{https://www.ukoln.ac.uk/metadata/dcmi-ieee/identifiers/}{Guidelines for using resource identifiers in Dublin Core metadata and IEEE LOM}. +Similar to \code{Identifier} in \code{\link{datacite}}. See \code{\link{identifier}}.} + \item{subject}{Defaults to \code{NULL}. See \code{\link{subject}} to add subject descriptions to your dataset.} -\item{type}{The nature or genre of the resource. Recommended best practice is to use a controlled vocabulary such as the DCMI Type Vocabulary -\href{https://www.dublincore.org/specifications/dublin-core/dcmi-type-vocabulary/}{DCMITYPE}. -For a dataset, the correct term is \code{Dataset}. -To describe the file format, physical medium, or dimensions of the resource, use the -Format element.} - -\item{contributor}{An entity responsible for making contributions to the dataset. See -\href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/contributor/}{DCMI: Contributor}.} - -\item{date}{Corresponds to a point or period of time associated with an event in the +\item{dataset_date}{Corresponds to a point or period of time associated with an event in the lifecycle of the resource. \href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/date/}{dct:date}. \code{Date} is also recommended for -discovery in \code{\link{datacite}}, but it requires a different formatting.} +discovery in \code{\link{datacite}}, but it requires a different formatting. +To aviod confusion with date-related functions, instead of the DCMITERMS +date or the DataCite Date term, the parameter name is +\code{dataset_date}.} \item{language}{A language of the dataset. See \href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/language/}{DCMI: Language}.} @@ -108,18 +117,13 @@ an abstract, a table of contents, a graphical representation, or a free-text acc applicability of the dataset, or jurisdiction under which the dataset is relevant. See \href{https://www.dublincore.org/specifications/dublin-core/dcmi-terms/elements11/coverage/}{DCMI: Coverage}.} - -\item{x}{An object that is tested if it has a class "dublincore".} - -\item{...}{Optional parameters to add to a \code{dublincore} object. -\code{author=person("Jane", "Doe")} adds an author to the citation -object if \code{type="dataset"}.} } \value{ \code{dublincore()} creates a \code{utils::\link[utils]{bibentry}} object extended with standard Dublin Core bibliographical metadata, \code{as_dublincore()} retrieves the contents of this bibentry object of a dataset_df from its -attributes, and returns the contents as list, dataset_df, or bibentry object. +attributes, and returns the contents as list, dataset_df, or bibentry object, or an +ntriples string. A logical value, if the bibliographic entries are listed according to the Dublin Core specification. @@ -142,18 +146,19 @@ The \code{ResourceType} property will be by definition "Dataset". The \code{Size} attribute (e.g. bytes, pages, inches, etc.) will automatically added to the dataset. } \examples{ -my_bibentry <- dublincore( +my_bibentry <- dct_iris <- dublincore( title = "Iris Dataset", creator = person("Edgar", "Anderson", role = "aut"), - publisher = "American Iris Society", + publisher = person("American Iris Society", role="pbl"), + contributor = person("Daniel", "Antal", role="dtm"), datasource = "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x", - date = 1935, + dataset_date = 1935, language = "en", - description = "This famous (Fisher's or Anderson's) iris data set gives the - measurements in centimeters of the variables sepal length and width and petal length - and width, respectively, for 50 flowers from each of 3 species of iris. - The species are Iris setosa, versicolor, and virginica." - ) + description = "The famous (Fisher's or Anderson's) iris data set gives the + measurements in centimeters of the variables sepal length and width and + petal length and width, respectively, for 50 flowers from each of 3 + species of iris. The species are Iris setosa, versicolor, and virginica." + ) as_dublincore(iris_dataset, type="list") } diff --git a/man/get_bibentry.Rd b/man/get_bibentry.Rd index 5dda142..a424e44 100644 --- a/man/get_bibentry.Rd +++ b/man/get_bibentry.Rd @@ -60,7 +60,7 @@ as_dublincore(iris_dataset, "list") } \seealso{ Other bibentry functions: -\code{\link{datacite}()}, -\code{\link{dublincore}()} +\code{\link{as_dublincore}()}, +\code{\link{datacite}()} } \concept{bibentry functions} diff --git a/man/var_namespace.Rd b/man/var_namespace.Rd index 9b2b878..6ea2fd1 100644 --- a/man/var_namespace.Rd +++ b/man/var_namespace.Rd @@ -52,7 +52,9 @@ with variable labels, namespaces, units of measures, and machine-independent permanent variable identifiers. } \examples{ -qid = defined(c("Q275912", "Q116196078"), namespace = "https://www.wikidata.org/wiki/") +qid = defined(c("Q275912", "Q116196078"), + namespace = c(wd="https://www.wikidata.org/wiki/") + ) var_namespace(qid) # To remove a namespace diff --git a/tests/testthat/test-agent.R b/tests/testthat/test-agent.R new file mode 100644 index 0000000..cb1da73 --- /dev/null +++ b/tests/testthat/test-agent.R @@ -0,0 +1,22 @@ + +test_that("agent()<- assignment works", { + p1 <- person(given="Jane", family="Doe", role=c("cre", "aut")) + p2 <- person(given="Joe", family= "Doe", role=c("aut", "dtm")) + p3 <- person(given="Publisher Inc", role=c("pbl")) + test_df <- dataset_df(data.frame(a=1, b=2)) + expect_equal(get_bibentry(test_df)$author, person("Author", "Unknown")) + expect_error(agent(a=1)<-person("Jane Doe")) + agent(x=test_df) <- c(p1,p2,p3) +}) + +test_that("agent() works", { + p1 <- person("Jane", "Doe", role=c("cre", "aut")) + p2 <- person("Joe", "Doe", role=c("aut", "dtm")) + p3 <- person("Publisher Inc", role=c("pbl")) + expect_equal(length(agent(x=c(p1, p2, p3))), 3) + expect_equal(agent(x=c(p1, p2, p3))$creators, person("Jane", "Doe", role=c("cre", "aut"))) + expect_null(agent(x=c(p1, p2, p3))$contributors) + expect_equal(agent(x=c(p1, p2, p3))$publisher, person("Publisher Inc", role=c("pbl"))) +}) + + diff --git a/tests/testthat/test-creator.R b/tests/testthat/test-creator.R index 1ea22b2..8be1dbf 100644 --- a/tests/testthat/test-creator.R +++ b/tests/testthat/test-creator.R @@ -1,5 +1,6 @@ test_that("creator() works", { - expect_equal(creator(iris_dataset), person(given="Edgar", family="Anderson", role = "aut")) + expect_equal(creator(iris_dataset), person(given="Edgar", family="Anderson", role = "cre", + comment = c(VIAF="http://viaf.org/viaf/6440526"))) }) @@ -15,7 +16,7 @@ test_that("creator() <- value works without overwrite", { iris_dataset_3 <- iris_dataset creator(x=iris_dataset_3, overwrite=FALSE) <- person("Jane", "Doe") expect_equal(creator(iris_dataset_3), - c(person(given="Edgar", family="Anderson", role = "aut"), person("Jane", "Doe"))) + c(person(given="Edgar", family="Anderson", role = "cre", comment = c(VIAF="http://viaf.org/viaf/6440526")), person("Jane", "Doe"))) }) diff --git a/tests/testthat/test-dataset_df.R b/tests/testthat/test-dataset_df.R index 0d287c4..97d113d 100644 --- a/tests/testthat/test-dataset_df.R +++ b/tests/testthat/test-dataset_df.R @@ -2,8 +2,23 @@ test_that("dataset_df() works", { expect_equal(is.dataset_df(dataset_df(mtcars)), TRUE) expect_false(is.dataset_df(mtcars)) expect_equal(get_bibentry(dataset_df(mtcars))$author, person("Author", "Unknown")) + expect_equal(dataset_df(mtcars, identifier = c(mt="http:/mtcars.com/dataset#"))$rowid, + defined(paste0("mt:", 1:nrow(mtcars)), namespace=c(mt="http:/mtcars.com/dataset#"))) + my_dataset <- dataset_df( + country_name = defined( + c("AD", "LI"), + definition = "http://data.europa.eu/bna/c_6c2bb82d", + namespace = "https://www.geonames.org/countries/$1/"), + gdp = defined( + c(3897, 7365), + label = "Gross Domestic Product", + unit = "million dollars", + definition = "http://data.europa.eu/83i/aa/GDP") + ) + expect_equal(var_label(my_dataset$gdp), "Gross Domestic Product") }) + test_that("dataset_df() works", { test_dataset <- dataset_df(a=3, dataset_bibentry = datacite(Title="Hello", Creator = "Jane Doe")) expect_equal(get_bibentry(test_dataset)$author, person("Jane", "Doe")) @@ -17,9 +32,9 @@ test_that("subsetting works", { }) test_that("new_my_tibble() works", { - myiris <- new_my_tibble(x=iris) + myiris <- new_my_tibble(x=iris, identifier="example") expect_error(new_my_tibble(2)) - expect_equal(class(new_my_tibble(iris)), c("dataset_df", "tbl_df", "tbl", "data.frame")) + expect_equal(class(new_my_tibble(iris, identifier = "example")), c("dataset_df", "tbl_df", "tbl", "data.frame")) expect_output(print(provenance(myiris)), "") }) @@ -35,7 +50,7 @@ test_that("rbind works", { }) test_that("print.dataset_df() works", { - expect_output(print(iris_dataset), "Anderson E", ignore.case = FALSE) + expect_output(print(iris_dataset), "E. Anderson.", ignore.case = FALSE) }) test_that("as_dataset_df() works", { diff --git a/tests/testthat/test-dataset_to_triples.R b/tests/testthat/test-dataset_to_triples.R index 412b219..9a26412 100644 --- a/tests/testthat/test-dataset_to_triples.R +++ b/tests/testthat/test-dataset_to_triples.R @@ -11,3 +11,4 @@ test_that("dataset_to_triples works()", { + diff --git a/tests/testthat/test-defined.R b/tests/testthat/test-defined.R index cafc108..00bea69 100644 --- a/tests/testthat/test-defined.R +++ b/tests/testthat/test-defined.R @@ -35,9 +35,6 @@ test_that("labelled_defined() throws error", { namespace = 1)) }) -a <- defined(x = Sys.Date(), - label = "Today's date") - test_that("new_datetime_defined() throws errors", { expect_error(defined(x = Sys.Date(), label = c("Today's date", "Extra label"), diff --git a/tests/testthat/test-describe.R b/tests/testthat/test-describe.R new file mode 100644 index 0000000..d90a4ea --- /dev/null +++ b/tests/testthat/test-describe.R @@ -0,0 +1,8 @@ + +temp_prov <- tempfile() +describe(iris_dataset, temp_prov) + +test_that("describe() works", { + expect_equal(readLines(temp_prov)[1], " ." +) +}) diff --git a/tests/testthat/test-dublincore.R b/tests/testthat/test-dublincore.R index 4c88282..dd8d972 100644 --- a/tests/testthat/test-dublincore.R +++ b/tests/testthat/test-dublincore.R @@ -1,30 +1,96 @@ +test_that("fix_publisher() works", { + expect_equal(fix_publisher(publishers = "publisher"), "publisher") + expect_equal(fix_publisher(publishers = NULL), ":unas") + expect_equal(fix_publisher(publishers = person("Jane", "Doe", role = "pbl")), "Jane") + expect_equal(fix_publisher(publishers = person("American Iris Society", role = "pbl")), "American Iris Society") + expect_equal(fix_publisher(publishers = c(person("Jane", "Doe", role = "pbl"), + person("American Iris Society"))), "{Jane} and {American Iris Society}") +}) + +test_that("fix_contributors() works", { + expect_equal(fix_contributor(contributors = "contributor"), "contributor") + expect_equal(fix_contributor(contributors = NULL), ":unas") + expect_equal(fix_contributor(contributors = person("Jane", "Doe", role = "ctb")), "Jane Doe [ctb]") + expect_equal(fix_contributor(contributors = person("American Iris Society", role = "pbl")), "American Iris Society [pbl]") + expect_equal(fix_contributor(contributors = c(person("Jane", "Doe", role = "ctb"), + person("Joe", "Doe", comment=c(ORCID="1234")))), + "{Jane Doe [ctb]} and {Joe Doe (1234)}") +}) + +test_that("new_dublincore() works", { + expect_equal(new_dublincore(title="Test", creator=person("Jane", "Doe", role = "cre"))$author, + person("Jane", "Doe", role = "cre")) + expect_equal(new_dublincore(title="Test", creator=person("Jane", "Doe", role = "cre"))$title, + "Test") + expect_equal(new_dublincore(title="Test", + creator=person("Jane", "Doe", role = "cre"), + contributor=c(person("Joe", "Doe", role = "dtm"), + person("Daniel", "Antal", role = "ctb")))$contributor, + "{Joe Doe [dtm]} and {Daniel Antal [ctb]}") + expect_equal(new_dublincore(title="Test", + creator=person("Jane", "Doe", role = "cre"), + publisher=person("My Publisher Inc.", role = "pbl"))$publisher, + "My Publisher Inc.") + expect_equal(new_dublincore(title="Test", + creator=person("Jane", "Doe", role = "cre"), + datasource = "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x")$datasource, + "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x") + expect_equal(new_dublincore(title="Test", + creator=person("Jane", "Doe", role = "cre"), + publication_date= 1935)$date, + "1935") + expect_equal(new_dublincore(title="Test", + creator=person("Jane", "Doe", role = "cre"), + language="en")$language, + "en") +}) + test_that("dublincore works", { + dct_iris1 <- dublincore( + title = "Iris Dataset", + creator = c(person(given = "Edgar", family="Anderson", role = "aut"), + person(given = "Jane D", family="Anderson", role = "cre")), + publisher = person("American Iris Society", role="pbl"), + datasource = "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x", + dataset_date = 1935, + language = "en", + description = "The famous (Fisher's or Anderson's) iris data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica." + ) + expect_equal(dct_iris1$author, c(person(given = "Edgar", family="Anderson", role = "aut"), + person(given = "Jane D", family="Anderson", role = "cre"))) + expect_equal(dct_iris1$contributor, ":unas") + expect_equal(dct_iris1$publisher, "American Iris Society") dct_iris <- dublincore( title = "Iris Dataset", creator = person("Edgar", "Anderson", role = "aut"), - publisher = "American Iris Society", + publisher = person("American Iris Society", role="pbl"), + contributor = person("Daniel", "Antal", role="dtm"), datasource = "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x", - date = 1935, + dataset_date = 1935, language = "en", description = "The famous (Fisher's or Anderson's) iris data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica." - ) + ) + expect_equal(dct_iris$publisher, "American Iris Society" ) + expect_equal(dct_iris$contributor, "Daniel Antal [dtm]" ) + expect_equal(dct_iris$date, "1935" ) expect_true(is.dublincore(dct_iris)) }) + test_that("dublincore() works", { dct_iris <- dublincore( title = "Iris Dataset", creator = person("Edgar", "Anderson", role = "aut"), publisher = "American Iris Society", datasource = "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x", - date = 1935, + dataset_date = 1935, language = "en", description = "The famous (Fisher's or Anderson's) iris data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica." ) expect_equal(dct_iris$language, 'en') expect_equal(dct_iris$publisher, "American Iris Society") - expect_equal(dct_iris$year, "1935") + expect_equal(dct_iris$date, "1935") expect_equal(dct_iris$datasource, "https://doi.org/10.1111/j.1469-1809.1936.tb02137.x") expect_equal(dct_iris$identifier, ":tba") expect_equal(dct_iris$rights, ":tba") @@ -34,13 +100,15 @@ test_that("dublincore() works", { }) test_that("as_dublincore() works", { - expect_true(is.dublincore(as_dublincore(iris_dataset))) + expect_true(is.dublincore(as_dublincore(x=iris_dataset))) expect_true(is.list(as_dublincore(x=iris_dataset, type="list"))) - expect_true(is.null(as_dublincore(iris_dataset, type="list")$contributor)) - expect_equal(as_dublincore(iris_dataset)$year, "1935") + expect_equal(as_dublincore(iris_dataset, type="list")$contributor, "Antal Daniel [dtm]") + expect_equal(as_dublincore(iris_dataset)$date, "1935") expect_equal(as_dublincore(iris_dataset)$description, "The famous (Fisher's or Anderson's) iris data set.") expect_equal(as_dublincore(iris_dataset)$rights, ':tba') expect_equal(as_dublincore(iris_dataset)$coverage, ':unas') + iris_dc_triples <- as_dublincore(iris_dataset, "ntriples") + expect_equal(iris_dc_triples[1], ' \"Iris Dataset\"^^ .') }) @@ -49,3 +117,5 @@ test_that("as_dublincore() gives warning", { }) + + diff --git a/tests/testthat/test-get_bibentry.R b/tests/testthat/test-get_bibentry.R index fb033df..1341882 100644 --- a/tests/testthat/test-get_bibentry.R +++ b/tests/testthat/test-get_bibentry.R @@ -7,12 +7,12 @@ test_that("get_bibentry() works", { expect_error(get_bibentry(iris)) iris_bibentry <- get_bibentry(iris_dataset) expect_equal(iris_bibentry$title, "Iris Dataset") - expect_equal(iris_bibentry$year, "1935") + expect_equal(iris_bibentry$date, "1935") }) test_that("set_bibentry() works", { iris_dataset_2 <- iris_dataset - new_bibentry <- dublincore(title="Test", creator=person("Jane", "Doe"), date=2013) + new_bibentry <- dublincore(title="Test", creator=person("Jane", "Doe"), dataset_date=2013) set_bibentry(dataset=iris_dataset_2) <- new_bibentry expect_equal(get_bibentry(iris_dataset_2)$title, "Test") }) diff --git a/tests/testthat/test-id_to_column.R b/tests/testthat/test-id_to_column.R index 2138e6f..c453382 100644 --- a/tests/testthat/test-id_to_column.R +++ b/tests/testthat/test-id_to_column.R @@ -13,3 +13,4 @@ test_that("id_to_column works ()", { + diff --git a/tests/testthat/test-n_triple.R b/tests/testthat/test-n_triple.R index 0a0de2a..6192293 100644 --- a/tests/testthat/test-n_triple.R +++ b/tests/testthat/test-n_triple.R @@ -1,11 +1,13 @@ -triple_1 <- n_triple("http://example.org/show/218", "http://www.w3.org/2000/01/rdf-schema#label", "That Seventies Show") -triple_2 <- n_triple("http://example.org/show/218", "http://example.org/show/localName", '"Cette Série des Années Septante"@fr-be') - - -author_person <- person(given="Daniel", family="Antal", comment = c(ORCID = "https://orcid.org/0000-0001-7513-6760")) test_that("n_triples()", { + triple_1 <- n_triple("http://example.org/show/218", "http://www.w3.org/2000/01/rdf-schema#label", "That Seventies Show") + triple_2 <- n_triple("http://example.org/show/218", "http://example.org/show/localName", '"Cette Série des Années Septante"@fr-be') + author_person <- person(given="Daniel", family="Antal", comment = c(ORCID = "https://orcid.org/0000-0001-7513-6760")) expect_equal(triple_1, " \"That Seventies Show\"^^ .") + expect_equal(n_triple(s= "http://example.com/creation", + p= "http://www.w3.org/ns/prov#generatedAtTime", + o= as.POSIXct(10000, origin = "2024-01-01", tz="UTC")), + " \"2024-01-01T03:46:40Z\"^^ .") expect_equal(length(n_triples(c(triple_1, triple_2, triple_1))), 2) expect_equal(length(n_triples(c(triple_1, triple_2))), 2) expect_equal(n_triple("https://orcid.org/0000-0001-7513-6760", "a", 'http://www.w3.org/ns/prov#Agent'), @@ -14,26 +16,61 @@ test_that("n_triples()", { expect_equal(create_iri("23"), '\"23\"^^') expect_equal(create_iri(as.integer(23)), '\"23\"^^') expect_equal(create_iri(as.Date("2024-10-30")), '\"2024-10-30\"^^') - expect_equal(prov_author(author_person), "\"_:antaldaniel\" .") - expect_equal(get_orcid(author_person), c(ORCID = "https://orcid.org/0000-0001-7513-6760")) + expect_equal(prov_author(author_person), " .") }) +test_that("prov_author()", { + expect_equal( + prov_author(person(given = "Daniel", family = "Antal", + email = "daniel.antal@dataobservatory.eu", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0001-7513-6760"))), + " .") +}) test_that("create_iri()", { + author_person <- person(given = "Daniel", family = "Antal", + email = "daniel.antal@dataobservatory.eu", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0001-7513-6760")) expect_error(create_iri(list(a=1:2))) - expect_equal(create_iri(2), "\"2\"^^") + expect_equal(create_iri(as.POSIXct(10000, origin = "2024-01-01", tz="UTC")), "\"2024-01-01T03:46:40Z\"^^") + expect_equal(create_iri(author_person), "") + jane_doe <- person(given="Jane", family="Doe", role = "aut", email = "example@example.com") + expect_equal(create_iri(x=jane_doe), "\"Jane Doe [aut]\"^^") + joe_doe <- person(given="Joe", family="Doe", role = "aut", email = "example@example.com", + comment = c(Wikidata="https://www.wikidata.org/wiki/Q000")) + expect_equal(create_iri(x=joe_doe), "") + joe_doe <- person(given="Joe", family="Doe", role = "aut", email = "example@example.com", + comment = c(ISNI="1234")) + expect_equal(create_iri(x=joe_doe), "") + viaf_doe <- person(given="Joe", family="Doe", role = "aut", email = "example@example.com", + comment = c(VIAF="1234")) + expect_equal(create_iri(x=viaf_doe), "") + expect_equal(create_iri(x=2), "\"2\"^^") expect_true(grepl('http://www.w3.org/2001/XMLSchema#date>', create_iri(Sys.Date()) )) }) -author_person <- person(given = "Daniel", family = "Antal", - email = "daniel.antal@dataobservatory.eu", - role = c("aut", "cre"), - comment = c(ORCID = "0000-0001-7513-6760") -) - -test_that("get_orcid()", { - expect_equal(get_orcid(author_person), c(ORCID = "0000-0001-7513-6760")) - expect_equal(get_orcid(person("Jane Doe")), NULL) +test_that("get_person_iri() works", { + author_person <- person(given = "Daniel", family = "Antal", + email = "daniel.antal@dataobservatory.eu", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0001-7513-6760") + ) + expect_equal(get_person_iri(author_person), "https://orcid.org/0000-0001-7513-6760") + expect_equal(get_person_iri(person("Jane Doe")), NULL) + expect_equal(get_person_iri(p=person(given="Daniel", family="Antal", + role = "cre", comment=c(ORCID = "0000-0001-7513-6760"))), + "https://orcid.org/0000-0001-7513-6760" + ) + expect_equal(get_person_iri(p= + person(given="Edgar", family="Anderson", + role = "cre", comment=c(VIAF="http://viaf.org/viaf/6440526")) + ), + c(VIAF = "http://viaf.org/viaf/6440526")) + expect_equal(get_person_iri(p= + person(given="Taylor", family="Swift", role = "cre", comment=c(ISNI="https://isni.org/isni/0000000078519858")) + ), + c(ISNI = "https://isni.org/isni/0000000078519858")) }) -#prov_author("Jane Doe") diff --git a/tests/testthat/test-publication_year.R b/tests/testthat/test-publication_year.R index 3fed132..27b8d89 100644 --- a/tests/testthat/test-publication_year.R +++ b/tests/testthat/test-publication_year.R @@ -4,16 +4,17 @@ test_that("publication_year() works", { expect_warning(publication_year(iris_dataset, overwrite=F) <- 1934) }) +value <- 1936 test_that("publication_year() <- assignment works", { iris_dataset_2 <- iris_dataset - publication_year(iris_dataset_2, overwrite=T) <- 1936 + publication_year(x=iris_dataset_2, overwrite=T) <- 1936 expect_equal(publication_year(iris_dataset_2), as.character(1936)) }) test_that("publication_year()<- NULL results in :unas", { iris_dataset_2 <- iris_dataset - publication_year(iris_dataset_2, overwrite=T) <- NULL - expect_equal(publication_year(x=iris_dataset_2), ":unas") + publication_year(x=iris_dataset_2, overwrite=T) <- 1999 + expect_equal(publication_year(x=iris_dataset_2), as.character(1999)) }) diff --git a/tests/testthat/test-subject.R b/tests/testthat/test-subject.R index ac0156a..d5df465 100644 --- a/tests/testthat/test-subject.R +++ b/tests/testthat/test-subject.R @@ -40,7 +40,6 @@ test_that("subject() <- works", { expect_equal(subject(iris_dataset_2)$valueURI, "") }) -subject(iris_dataset) diff --git a/tests/testthat/test-toBiblatex.R b/tests/testthat/test-toBiblatex.R new file mode 100644 index 0000000..814c6ae --- /dev/null +++ b/tests/testthat/test-toBiblatex.R @@ -0,0 +1,17 @@ + +test_that("toBibtex() imported method", { + expect_true(inherits(toBibtex( + object = as_dublincore(iris_dataset), + note.replace.field = c("urldate", "pubsate", "addendum"), + extra.fields = NULL + ), "Bibtex")) +}) + +test_that("toBiblatex() imported function", { + tested <- toBiblatex( + object = as_dublincore(iris_dataset), + note.replace.field = c("urldate", "pubsate", "addendum"), + extra.fields = NULL + ) + expect_true(inherits(tested, "Bibtex")) +}) diff --git a/tests/testthat/test-var_label.R b/tests/testthat/test-var_label.R index c3310d3..d50a22d 100644 --- a/tests/testthat/test-var_label.R +++ b/tests/testthat/test-var_label.R @@ -3,7 +3,7 @@ test_that("var_label() works", { expect_equal(var_label(iris_dataset$Sepal.Length), "Length of the sepal in cm") expect_equal(class(var_label(iris_dataset, unlist=TRUE)), "character") test_df <- dataset_df(a = 1:2, b=3:4) - expect_equal(var_label(test_df, unlist=TRUE, null_action = "fill" ), c(a = "a", b="b")) + expect_equal(var_label(test_df, unlist=TRUE, null_action = "fill" ), c(rowid="rowid", a = "a", b="b")) #expect_equal(label_attribute(iris_dataset$Species), "Taxon name within the Iris genus") }) @@ -17,7 +17,7 @@ test_that("var_label()<- works", { test_that("var_label() throws error", { test_df <- dataset_df(a = 1:2, b=3:4) - expect_error(var_label(test_df) <- c("A", "B", "C")) + expect_error(var_label(test_df) <- c("A", "B", "C", "E")) }) @@ -31,3 +31,5 @@ test_that("var_label.dataset_df() works", { var_label(d) <- "test" expect_equal(var_label(d), "test") }) + +dataset_df(mtcars, identifier = c(mt="http://mtcars.com/")) diff --git a/tests/testthat/test-var_namespace.R b/tests/testthat/test-var_namespace.R index 342cbaf..4a1227a 100644 --- a/tests/testthat/test-var_namespace.R +++ b/tests/testthat/test-var_namespace.R @@ -1,11 +1,9 @@ - - test_that("var_namespace() works", { - qid <- defined(c("Q275912", "Q116196078"), namespace = "https://www.wikidata.org/wiki/") + qid <- defined(c("Q275912", "Q116196078"), namespace = c(wd="https://www.wikidata.org/wiki/")) expect_true(is.defined(qid)) - expect_equal(var_namespace(qid),"https://www.wikidata.org/wiki/") + expect_equal(var_namespace(qid), c(wd="https://www.wikidata.org/wiki/")) }) diff --git a/vignettes/bibentry.Rmd b/vignettes/bibentry.Rmd index d958af4..a5daa96 100644 --- a/vignettes/bibentry.Rmd +++ b/vignettes/bibentry.Rmd @@ -66,15 +66,7 @@ The publication year is usually one of the most important descriptive metadata i ```{r irispublicationyear} publication_year(iris_dataset_2) ``` -The default value is `:unas` for unassigned values: -```{r publicationyeardefault} -# Revert to default (unassigned): -publication_year(iris_dataset_2) <- NULL - -# Get the default value: -publication_year(iris_dataset_2) -``` ### Language ```{r, language} @@ -134,7 +126,7 @@ description(iris_dataset) ### Subject -```{r} +```{r subject} subject(iris_dataset) ``` @@ -146,7 +138,7 @@ subject(iris_dataset) ``` -```{r} +```{r subjectcreate} subject_create( term = "data sets", subjectScheme = "Library of Congress Subject Headings (LCSH)", @@ -174,6 +166,8 @@ And according to DCTERMS (Dublin Core): ```{r dc} +library(RefManageR) print(as_dublincore(iris_dataset), "Bibtex") +RefManageR::toBiblatex(as_dublincore(iris_dataset)) ``` diff --git a/vignettes/rdf.Rmd b/vignettes/rdf.Rmd new file mode 100644 index 0000000..d3bcffc --- /dev/null +++ b/vignettes/rdf.Rmd @@ -0,0 +1,47 @@ +--- +title: "From R to RDF" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{From R to RDF} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(dataset) +library(rdflib) +``` + + +```{r prov} +provenance(iris_dataset) +``` + +```{r bibliography} +as_dublincore(iris_dataset, type="ntriples") +``` + +```{r rdf} +# initialise an rdf triplestore: +dataset_describe <- rdf() + +# open a temporary file: +temp_prov <- tempfile() + +# describe the dataset in temporary file: +describe(iris_dataset, temp_prov) + +# parse temporary file into the RDF triplestore; +rdf_parse(rdf = dataset_describe, doc=temp_prov, format="ntriples") + +# show RDF triples: +dataset_describe +``` +