diff --git a/.gitignore b/.gitignore index a48bf17..6c8d8e3 100644 --- a/.gitignore +++ b/.gitignore @@ -12,4 +12,5 @@ inst/doc ..Rcheck/ .vscode/ *.dll -*.rds \ No newline at end of file +*.rds +CRAN-SUBMISSION \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index ac6acd3..8768b8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,15 @@ Package: sacRebleu Type: Package Title: Metrics for Assessing the Quality of Generated Text -Version: 0.1.0 +Version: 0.1.2 Date: 2024-04-05 -Author: Philipp Koch -Maintainer: Philipp Koch -Description: Package to provide metrics to assess the quality of generated text. +Authors@R: c(person("Philipp", "Koch", role = c("aut", "cre"), email = "PhillKoch@protonmail.com")) +Description: Implementation of the BLEU-Score in 'C++' to evaluate the + quality of generated text. The BLEU-Score, introduced by Papineni et al. (2002) + , is a metric for evaluating the quality of + generated text. It is based on the n-gram overlap between the generated + text and reference texts. Additionally, the package provides some smoothing + methods as described in Chen and Cherry (2014) . License: GPL (>= 2) SystemRequirements: Rust tool chain w/ cargo, libclang/llvm-config Depends: @@ -21,7 +25,8 @@ Suggests: knitr, rmarkdown, hfhub, - testthat (>= 3.0.0) + testthat (>= 3.0.0), + withr Config/testthat/edition: 3 RoxygenNote: 7.2.3 VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index dfb3d1f..7d34cff 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,2 +1,9 @@ +# sacRebleu 0.1.2 + - Update DESCRIPTION, documentation and examples to comply with CRAN standards. + - Avoid creating 'tok'-artifacts during the build process. + +# sacRebleu 0.1.1 +Update tests to avoid crashing on certain Windows setups. + # sacRebleu 0.1.0 Initial CRAN submission. \ No newline at end of file diff --git a/R/bleu.R b/R/bleu.R index cabc16d..29a0587 100644 --- a/R/bleu.R +++ b/R/bleu.R @@ -4,9 +4,9 @@ library(checkmate) #' Validate Arguments #' -#' @param weights Weight vector for `bleu_corpus_ids` and `bleu_sentence_ids` functions -#' @param smoothing Smoothing method for `bleu_corpus_ids` and `bleu_sentence_ids` functions -#' @param n N-gram for `bleu_corpus_ids` and `bleu_sentence_ids` functions +#' @param weights Weight vector for 'bleu_corpus_ids' and 'bleu_sentence_ids' functions +#' @param smoothing Smoothing method for 'bleu_corpus_ids' and 'bleu_sentence_ids' functions +#' @param n N-gram for 'bleu_corpus_ids' and 'bleu_sentence_ids' functions #' @returns A list with the validated arguments (weights and smoothing) validate_arguments <- function(weights, smoothing, n) { if (!is.null(weights)) { @@ -38,9 +38,9 @@ validate_references <- function(references, target) { #' Computes BLEU-Score (Papineni et al., 2002). #' -#' `bleu_sentence_ids` computes the BLEU score for a single candidate sentence and a list of reference sentences. +#' 'bleu_sentence_ids' computes the BLEU score for a single candidate sentence and a list of reference sentences. #' The sentences must be tokenized before so they are represented as integer vectors. -#' Akin to sacrebleu (Python), the function allows the application of different smoothing methods. +#' Akin to 'sacrebleu' ('Python'), the function allows the application of different smoothing methods. #' Epsilon- and add-k-smoothing are available. Epsilon-smoothing is equivalent to 'floor' #' smoothing in the sacrebleu implementation. #' The different smoothing techniques are described in Chen et al., 2014 @@ -80,9 +80,9 @@ bleu_sentence_ids <- function(references, candidate, n = 4, weights = NULL, smoo #' Computes BLEU score (Papineni et al., 2002). #' -#' `bleu_sentence_ids` computes the BLEU score for a corpus and its respective reference sentences. +#' 'bleu_sentence_ids' computes the BLEU score for a corpus and its respective reference sentences. #' The sentences must be tokenized before so they are represented as integer vectors. -#' Akin to sacreBLEU, the function allows the application of different smoothing methods. +#' Akin to 'sacrebleu' ('Python'), the function allows the application of different smoothing methods. #' Epsilon- and add-k-smoothing are available. Epsilon-smoothing is equivalent to 'floor' #' smoothing in the sacreBLEU implementation. #' The different smoothing techniques are described in Chen et al., 2014 @@ -128,7 +128,7 @@ bleu_corpus_ids <- function(references, candidates, n = 4, weights = NULL, smoot # Compute BLEU for a Corpus with Tokenization # #' This function applies tokenization based on the 'tok' library and computes the BLEU score. -#' An already initialized tokenizer can be provided using the `tokenizer`argument or +#' An already initialized tokenizer can be provided using the `tokenizer` argument or #' a valid huggingface identifier (string) can be passed. If the identifier is used only, #' the tokenizer is newly initialized on every call. #' @param references A list of a list of reference sentences (`list(list(c(1,2,...)), list(c(3,5,...)))`). @@ -144,9 +144,11 @@ bleu_corpus_ids <- function(references, candidates, n = 4, weights = NULL, smoot #' @returns The BLEU score for the candidate sentence. #' @export #' @examples +#' \dontrun{ #' cand_corpus <- list("This is good", "This is not good") #' ref_corpus <- list(list("Perfect outcome!", "Excellent!"), list("Not sufficient.", "Horrible.")) -#' # Call: bleu_corpus <- bleu_corpus(ref_corpus, cand_corpus) +#' tok <- tok::tokenizer$from_pretrained("bert-base-uncased") +#' bleu_corpus <- bleu_corpus(ref_corpus, cand_corpus, tok)} bleu_corpus <- function( references, candidates, @@ -186,7 +188,7 @@ bleu_corpus <- function( #' Compute BLEU for a Sentence with Tokenization #' #' This function applies tokenization based on the 'tok' library and computes the BLEU score. -#' An already initializied tokenizer can be provided using the `tokenizer` argument or +#' An already initializied tokenizer can be provided using the 'tokenizer' argument or #' a valid huggingface identifier (string) can be passed. If the identifier is used only, #' the tokenizer is newly initialized on every call. #' @param references A list of reference sentences. @@ -202,9 +204,11 @@ bleu_corpus <- function( #' @returns The BLEU score for the candidate sentence. #' @export #' @examples +#' \dontrun{ #' cand <- "Hello World!" #' ref <- list("Hello everyone.", "Hello Planet", "Hello World") -#' # Call: bleu_standard <- bleu_sentence(ref, cand) +#' tok <- tok::tokenizer$from_pretrained("bert-base-uncased") +#' bleu_standard <- bleu_sentence(ref, cand, tok)} bleu_sentence <- function( references, candidate, diff --git a/man/bleu_corpus.Rd b/man/bleu_corpus.Rd index 8bb007f..00ce0e5 100644 --- a/man/bleu_corpus.Rd +++ b/man/bleu_corpus.Rd @@ -3,7 +3,7 @@ \name{bleu_corpus} \alias{bleu_corpus} \title{This function applies tokenization based on the 'tok' library and computes the BLEU score. -An already initialized tokenizer can be provided using the `tokenizer`argument or +An already initialized tokenizer can be provided using the `tokenizer` argument or a valid huggingface identifier (string) can be passed. If the identifier is used only, the tokenizer is newly initialized on every call.} \usage{ @@ -41,12 +41,14 @@ The BLEU score for the candidate sentence. } \description{ This function applies tokenization based on the 'tok' library and computes the BLEU score. -An already initialized tokenizer can be provided using the `tokenizer`argument or +An already initialized tokenizer can be provided using the `tokenizer` argument or a valid huggingface identifier (string) can be passed. If the identifier is used only, the tokenizer is newly initialized on every call. } \examples{ +\dontrun{ cand_corpus <- list("This is good", "This is not good") ref_corpus <- list(list("Perfect outcome!", "Excellent!"), list("Not sufficient.", "Horrible.")) -# Call: bleu_corpus <- bleu_corpus(ref_corpus, cand_corpus) +tok <- tok::tokenizer$from_pretrained("bert-base-uncased") +bleu_corpus <- bleu_corpus(ref_corpus, cand_corpus, tok)} } diff --git a/man/bleu_corpus_ids.Rd b/man/bleu_corpus_ids.Rd index b471f5c..e7fdaa1 100644 --- a/man/bleu_corpus_ids.Rd +++ b/man/bleu_corpus_ids.Rd @@ -33,9 +33,9 @@ bleu_corpus_ids( The BLEU score for the candidate sentence. } \description{ -`bleu_sentence_ids` computes the BLEU score for a corpus and its respective reference sentences. +'bleu_sentence_ids' computes the BLEU score for a corpus and its respective reference sentences. The sentences must be tokenized before so they are represented as integer vectors. -Akin to sacreBLEU, the function allows the application of different smoothing methods. +Akin to 'sacrebleu' ('Python'), the function allows the application of different smoothing methods. Epsilon- and add-k-smoothing are available. Epsilon-smoothing is equivalent to 'floor' smoothing in the sacreBLEU implementation. The different smoothing techniques are described in Chen et al., 2014 diff --git a/man/bleu_sentence.Rd b/man/bleu_sentence.Rd index d113e58..6372c41 100644 --- a/man/bleu_sentence.Rd +++ b/man/bleu_sentence.Rd @@ -38,12 +38,14 @@ The BLEU score for the candidate sentence. } \description{ This function applies tokenization based on the 'tok' library and computes the BLEU score. -An already initializied tokenizer can be provided using the `tokenizer` argument or +An already initializied tokenizer can be provided using the 'tokenizer' argument or a valid huggingface identifier (string) can be passed. If the identifier is used only, the tokenizer is newly initialized on every call. } \examples{ +\dontrun{ cand <- "Hello World!" ref <- list("Hello everyone.", "Hello Planet", "Hello World") -# Call: bleu_standard <- bleu_sentence(ref, cand) +tok <- tok::tokenizer$from_pretrained("bert-base-uncased") +bleu_standard <- bleu_sentence(ref, cand, tok)} } diff --git a/man/bleu_sentence_ids.Rd b/man/bleu_sentence_ids.Rd index 248aa40..8250103 100644 --- a/man/bleu_sentence_ids.Rd +++ b/man/bleu_sentence_ids.Rd @@ -33,9 +33,9 @@ bleu_sentence_ids( The BLEU score for the candidate sentence. } \description{ -`bleu_sentence_ids` computes the BLEU score for a single candidate sentence and a list of reference sentences. +'bleu_sentence_ids' computes the BLEU score for a single candidate sentence and a list of reference sentences. The sentences must be tokenized before so they are represented as integer vectors. -Akin to sacrebleu (Python), the function allows the application of different smoothing methods. +Akin to 'sacrebleu' ('Python'), the function allows the application of different smoothing methods. Epsilon- and add-k-smoothing are available. Epsilon-smoothing is equivalent to 'floor' smoothing in the sacrebleu implementation. The different smoothing techniques are described in Chen et al., 2014 diff --git a/man/sacRebleu.Rd b/man/sacRebleu.Rd index edc3a76..491e4bb 100644 --- a/man/sacRebleu.Rd +++ b/man/sacRebleu.Rd @@ -16,5 +16,9 @@ Useful links: \item Report bugs at \url{https://github.com/LazerLambda/sacRebleu/issues} } +} +\author{ +\strong{Maintainer}: Philipp Koch \email{PhillKoch@protonmail.com} + } \keyword{internal} diff --git a/man/validate_arguments.Rd b/man/validate_arguments.Rd index e312fe5..0967c4f 100644 --- a/man/validate_arguments.Rd +++ b/man/validate_arguments.Rd @@ -7,11 +7,11 @@ validate_arguments(weights, smoothing, n) } \arguments{ -\item{weights}{Weight vector for `bleu_corpus_ids` and `bleu_sentence_ids` functions} +\item{weights}{Weight vector for 'bleu_corpus_ids' and 'bleu_sentence_ids' functions} -\item{smoothing}{Smoothing method for `bleu_corpus_ids` and `bleu_sentence_ids` functions} +\item{smoothing}{Smoothing method for 'bleu_corpus_ids' and 'bleu_sentence_ids' functions} -\item{n}{N-gram for `bleu_corpus_ids` and `bleu_sentence_ids` functions} +\item{n}{N-gram for 'bleu_corpus_ids' and 'bleu_sentence_ids' functions} } \value{ A list with the validated arguments (weights and smoothing) diff --git a/src/bleu.cpp b/src/bleu.cpp index be27e40..689a9e3 100644 --- a/src/bleu.cpp +++ b/src/bleu.cpp @@ -347,7 +347,7 @@ static long double bleu_corpus_ids(vector>> references, vecto /** * @brief Calculates the BLEU score for a list of a corpus. * - * Wrapper for the `bleu_corpus_ids` function to cast R datatypes to C++ datatypes. + * Wrapper for the 'bleu_corpus_ids' function to cast R datatypes to C++ datatypes. * * @param references The list of reference sentences to compare against. * @param candidate The candidate sentence to calculate BLEU score for. diff --git a/tests/testthat/test_bleu.R b/tests/testthat/test_bleu.R index 100055b..e1f7934 100644 --- a/tests/testthat/test_bleu.R +++ b/tests/testthat/test_bleu.R @@ -26,7 +26,7 @@ test_that("Expect errors for wrong arguments", { testthat::expect_error(bleu_corpus_ids(list(list(c(1,2,3), c(2,3,4))), list(c(1,2,3)), n=0.5)) }) -test_that("Expect number for `bleu_sentence_ids`", { +test_that("Expect number for 'bleu_sentence_ids'", { testthat::expect_vector(bleu_sentence_ids(list(c(1,2,3), c(2,3,4)), c(1,2,3), n=2)) ref_corpus <- list(c(1,2,3,4)) cand_corpus <- c(1,2,3,5) @@ -36,7 +36,7 @@ test_that("Expect number for `bleu_sentence_ids`", { testthat::expect_vector(bleu_sentence_ids(ref_corpus, cand_corpus, n=4, smoothing="add-k", k=1)) }) -test_that("Expect number for `bleu_corpus_ids`", { +test_that("Expect number for 'bleu_corpus_ids'", { testthat::expect_vector(bleu_corpus_ids(list(list(c(1,2,3), c(2,3,4))), list(c(1,2,3)), n=2)) ref_corpus <- list(list(c(1,2,3,4))) cand_corpus <- list(c(1,2,3,5)) @@ -108,3 +108,5 @@ test_that("Expect Errors with Tokenizer Functions", { testthat::expect_error(bleu_corpus(0, list(cand), tokenizer=tok)) testthat::expect_error(bleu_corpus(list(ref), 0, tokenizer=tok)) }) + +withr::defer(unlink(".cache/huggingface"), teardown_env()) diff --git a/vignettes/sacReBLEU.Rmd b/vignettes/sacReBLEU.Rmd index 87c8226..e6b966e 100644 --- a/vignettes/sacReBLEU.Rmd +++ b/vignettes/sacReBLEU.Rmd @@ -15,8 +15,8 @@ knitr::opts_chunk$set( ``` This package aims to provide metrics to evaluate generated text. To this point, only the BLEU (bilingual evaluation understudy) score, introduced by [Papineni et al., 2002](https://aclanthology.org/P02-1040/), -is available. The library is implemented in R and C++. The metrics are implemented on the base of previous tokenization, so that lists with tokenized sequences are evaluated. -This package is inspired by the [NLTK](https://www.nltk.org/) and [sacrebleu](https://github.com/mjpost/sacrebleu) implementation for Python. +is available. The library is implemented in 'R' and 'C++'. The metrics are implemented on the base of previous tokenization, so that lists with tokenized sequences are evaluated. +This package is inspired by the ['NLTK'](https://www.nltk.org/) and ['sacrebleu'](https://github.com/mjpost/sacrebleu) implementation for 'Python'. # BLEU Score The BLEU-score is a metric used to evaluate the quality of machine-generated texts by comparing them to @@ -69,5 +69,5 @@ ref_corpus <- list(list(c(1,2,3), c(2,3,4)), list(c(1,2,6), c(781, 21, 9), c(7, bleu_corpus_ids_standard <- bleu_corpus_ids(ref_corpus, cand_corpus) ``` -Here, the text is already tokenized and represented through integers in the `cand_corpus` and `ref_corpus` lists. For tokenization, the [`tok`](https://cran.r-project.org/package=tok) package is recommended. - It is also possible to feed the function with text using the `bleu_corpus` or `bleu_sentence` functions. \ No newline at end of file +Here, the text is already tokenized and represented through integers in the 'cand_corpus' and 'ref_corpus' lists. For tokenization, the ['tok'](https://cran.r-project.org/package=tok) package is recommended. +It is also possible to feed the function with text using the 'bleu_corpus' or 'bleu_sentence' functions. \ No newline at end of file