Try putting R code in tests and referencing that in yaml

kuriwaki · kuriwaki · commit 1bb3326416ce · 2024-05-09T19:40:14.000-04:00
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -6,6 +6,7 @@ man-roxygen/*
 ^README\.Rmd$
 ^README\.html$
 ^CONTRIBUTING\.md$
+tests/.*_ghaction.R
 ^vignettes/figure$
 ^vignettes/figure/.+$
 \.Rmd2$
diff --git a/.github/workflows/R-CMD-check-dev.yaml b/.github/workflows/R-CMD-check-dev.yaml
@@ -47,6 +47,11 @@ jobs:
           rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
         shell: Rscript {0}
 
+      - name: Test live dataverse (vignette)
+        run: |
+          Rscript -e "tests/B-search_ghaction.R"
+          Rscript -e "tests/C_download_ghaction.R"
+
       - name: Test coverage
         run: covr::codecov()
         shell: Rscript {0}
diff --git a/tests/B-search_ghaction.R b/tests/B-search_ghaction.R
@@ -0,0 +1,22 @@
+## ----knitr_options, echo=FALSE, results="hide"----------------------------------------------
+options(width = 120)
+knitr::opts_chunk$set(results = "hold")
+
+
+## -------------------------------------------------------------------------------------------
+library("dataverse")
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+dataverse_search("Gary King")[c("name")]
+
+
+## -------------------------------------------------------------------------------------------
+dataverse_search("Gary King", start = 6, per_page = 20)[c("name")]
+
+
+## -------------------------------------------------------------------------------------------
+ei <- dataverse_search(author = "Gary King", title = "Ecological Inference", type = "dataset", per_page = 20)
+# fields returned
+names(ei)
+# names of datasets
+ei$name
+
diff --git a/tests/C-download_ghaction.R b/tests/C-download_ghaction.R
@@ -0,0 +1,76 @@
+## ----knitr_options, echo=FALSE, results="hide"----------------------------------------------
+options(width = 120)
+knitr::opts_chunk$set(results = "hold")
+
+
+## -------------------------------------------------------------------------------------------
+Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
+
+
+## -------------------------------------------------------------------------------------------
+library("dataverse")
+library("tibble") # to see dataframes in tidyverse-form
+
+
+## ----echo=FALSE, message=FALSE,include=FALSE------------------------------------------------
+energy <- get_dataframe_by_name(
+  filename = "comprehensiveJapanEnergy.tab",
+  dataset = "10.7910/DVN/ARKOTI",
+  server = "dataverse.harvard.edu")
+
+
+## ----eval=FALSE-----------------------------------------------------------------------------
+## energy <- get_dataframe_by_name(
+##   filename = "comprehensiveJapanEnergy.tab",
+##   dataset = "10.7910/DVN/ARKOTI",
+##   server = "dataverse.harvard.edu")
+
+
+## -------------------------------------------------------------------------------------------
+head(energy)
+
+
+## -------------------------------------------------------------------------------------------
+library(readr)
+energy <- get_dataframe_by_name(
+  filename = "comprehensiveJapanEnergy.tab",
+  dataset = "10.7910/DVN/ARKOTI",
+  server = "dataverse.harvard.edu",
+  .f = function(x) read.delim(x, sep = "\t"))
+
+head(energy)
+
+
+## ----message=FALSE--------------------------------------------------------------------------
+argentina_tab <- get_dataframe_by_name(
+  filename = "alpl2013.tab",
+  dataset = "10.7910/DVN/ARKOTI",
+  server = "dataverse.harvard.edu")
+
+
+## -------------------------------------------------------------------------------------------
+str(argentina_tab$polling_place)
+
+
+## -------------------------------------------------------------------------------------------
+argentina_dta <- get_dataframe_by_name(
+  filename = "alpl2013.tab",
+  dataset = "10.7910/DVN/ARKOTI",
+  server = "dataverse.harvard.edu",
+  original = TRUE,
+  .f = haven::read_dta)
+
+
+## -------------------------------------------------------------------------------------------
+str(argentina_dta$polling_place)
+
+
+## -------------------------------------------------------------------------------------------
+str(dataset_metadata("10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu"),
+    max.level = 2)
+
+
+## ----eval = FALSE---------------------------------------------------------------------------
+## code3 <- get_file("chapter03.R", "doi:10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu")
+## writeBin(code3, "chapter03.R")
+
diff --git a/vignettes/B-search.Rmd b/vignettes/B-search.Rmd
@@ -17,26 +17,92 @@ knitr::opts_chunk$set(results = "hold")
 
 Searching for data within Dataverse is quite easy using the `dataverse_search()` function. The simplest searches simply consist of a query string:
 
-```{r}
+```{r, eval=FALSE}
 library("dataverse")
 Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
 dataverse_search("Gary King")[c("name")]
 ```
 
+```{r}
+##                                                                name
+## 1                         004_informal_food_retail_Nigeria_2018.tab
+## 2              00592Belle-Stress-PaperData-Subject_King_ChildIs.PDF
+## 3               00592Belle-Stress-PaperData-Subject_King_ChildO.PDF
+## 4               00592Belle-Stress-PaperData-Subject_King_Coping.PDF
+## 5       00592Belle-Stress-PaperData-Subject_King_Discrimination.PDF
+## 6               00592Belle-Stress-PaperData-Subject_King_LifeCs.PDF
+## 7                00592Belle-Stress-PaperData-Subject_King_LifeE.PDF
+## 8  00592Belle-Stress-PaperData-Subject_KingAndMeunier_Parenting.PDF
+## 9                             00698McArthur-King-BoxCoverSheets.pdf
+## 10                           00698McArthur-King-MemoOfAgreement.pdf
+```
+
+
 The results are paginated, so users can rely upon the `per_page` and `start` argument to requested subsequent pages of results. We'll start at 6 and to show that we retrieve the last five results from the previous query plus 15 more (due to `per_page = 20`):
 
-```{r}
+```{r, eval=FALSE}
 dataverse_search("Gary King", start = 6, per_page = 20)[c("name")]
 ```
 
+```{r}
+# 10 of 3676 results retrieved
+##                                                                                                                   name
+## 1                                                                            004_informal_food_retail_Nigeria_2018.tab
+## 2                                                                                00698McArthur-King-BoxCoverSheets.pdf
+## 3                                                                               00698McArthur-King-MemoOfAgreement.pdf
+## 4                                                                              00698McArthur-King-StudyDescription.pdf
+## 5  01 ReadMe Unlocking history through automated virtual unfolding of sealed documents imaged by X-ray microtomography
+## 6                                           01_ReadMe_The_Spiral_Locked_Letters_of_Elizabeth_I_and_Mary_Queen_of_Scots
+## 7                                     03 Brienne Collection letterlocking data: Images folder 02/16, DB-0874_2–DB-0903
+## 8                                    03 Brienne Collection letterlocking data: Images folder 04/16, DB-0988–DB-1109_03
+## 9                                 03 Brienne Collection letterlocking data: Images folder 06/16, DB-1241_02–DB-1339_06
+## 10                                03 Brienne Collection letterlocking data: Images folder 08/16, DB-1455_02–DB-1564_01
+```
+
+
 More complicated searches can specify metadata fields like `title` and restrict results to a specific `type` of Dataverse object (a "dataverse", "dataset", or "file"):
 
-```{r}
+```{r, eval=FALSE}
 ei <- dataverse_search(author = "Gary King", title = "Ecological Inference", type = "dataset", per_page = 20)
 # fields returned
 names(ei)
 # names of datasets
 ei$name
 ```
 
+```{r}
+##  [1] "name"                    "type"                    "url"                     "global_id"              
+##  [5] "description"             "published_at"            "publisher"               "citationHtml"           
+##  [9] "identifier_of_dataverse" "name_of_dataverse"       "citation"                "storageIdentifier"      
+## [13] "keywords"                "subjects"                "fileCount"               "versionId"              
+## [17] "versionState"            "majorVersion"            "minorVersion"            "createdAt"              
+## [21] "updatedAt"               "contacts"                "authors"                 "publications"           
+##  [1] "01 ReadMe Unlocking history through automated virtual unfolding of sealed documents imaged by X-ray microtomography"        
+##  [2] "01_ReadMe_The_Spiral_Locked_Letters_of_Elizabeth_I_and_Mary_Queen_of_Scots"                                                 
+##  [3] "03 Brienne Collection letterlocking data: Images folder 02/16, DB-0874_2–DB-0903"                                           
+##  [4] "03 Brienne Collection letterlocking data: Images folder 04/16, DB-0988–DB-1109_03"                                          
+##  [5] "03 Brienne Collection letterlocking data: Images folder 06/16, DB-1241_02–DB-1339_06"                                       
+##  [6] "03 Brienne Collection letterlocking data: Images folder 08/16, DB-1455_02–DB-1564_01"                                       
+##  [7] "03 Brienne Collection letterlocking data: Images folder 12/16, DB-1868–DB-1963_03"                                          
+##  [8] "03 Brienne Collection letterlocking data: Images folder 14/16, DB-2064_01–2155_03"                                          
+##  [9] "03 Spiral-lock figures"                                                                                                     
+## [10] "07 Letterlocking Categories and Formats Chart"                                                                              
+## [11] "10 Foldable: Launch Little Book of Locks (UH6089), with Categories and Formats Chart. Letterlocking Instructional Resources"
+## [12] "10 Million International Dyadic Events"                                                                                     
+## [13] "1479 data points of covid19 policy response times"                                                                          
+## [14] "2016 Census of Population: ADA and DA Maps for Kings County Nova Scotia"                                                    
+## [15] "3D Dust map from Green et al. (2015)"                                                                                       
+## [16] "3D dust map from Green et al. (2017)"                                                                                       
+## [17] "3D dust map from Green et al. (2019)"                                                                                       
+## [18] "A 1D Lyman-alpha Profile Camera for Plasma Edge Neutral Studies  on the DIII-D Tokamak"                                     
+## [19] "A Comparative Analysis of Brazil's Foreign Policy Drivers Towards the USA: Comment on Amorim Neto (2011)"                   
+## [20] "A Critique of Dyadic Design"
+## 16                                                             1998 Jewish Community Study of the Coachella Valley, California
+## 17                                                                                               2002 State Legislative Survey
+## 18                                                                          2007 White Sands Dune Field lidar topographic data
+## 19                                                                          2008 White Sands Dune Field lidar topographic data
+## 20                                                                                                         2012 STATA Data.tab               
+
+```
+
 Once datasets and files are identified, it is easy to download and use them directly in R. See the ["Data Download" vignette](C-download.html) for details.
diff --git a/vignettes/C-download.Rmd b/vignettes/C-download.Rmd
@@ -42,28 +42,35 @@ library("tibble") # to see dataframes in tidyverse-form
 
 First, we retrieve a plain-text file like this dataset on electricity consumption by [Wakiyama et al. (2014)](https://doi.org/10.7910/DVN/ARKOTI/GN1MRT). Taking the file name and dataset DOI from this entry,
 
-```{r, echo=FALSE, message=FALSE,include=FALSE}
+
+```{r, eval=FALSE}
 energy <- get_dataframe_by_name(
   filename = "comprehensiveJapanEnergy.tab",
   dataset = "10.7910/DVN/ARKOTI", 
   server = "dataverse.harvard.edu")
 ```
 
 ```{r, eval=FALSE}
-energy <- get_dataframe_by_name(
-  filename = "comprehensiveJapanEnergy.tab",
-  dataset = "10.7910/DVN/ARKOTI", 
-  server = "dataverse.harvard.edu")
+head(energy)
 ```
 
 ```{r}
-head(energy)
+## # A tibble: 6 × 10
+##    time date  dummy  temp temp2      all    large    house    kepco    tepco
+##   <dbl> <chr> <dbl> <dbl> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
+## 1     1 8-Jan     0   5.9  34.8 95792389 35194957 26190714 13357735 26960899
+## 2     2 8-Feb     0   5.5  30.3 95156901 35322031 24224097 13315027 27189705
+## 3     3 8-Mar     0  10.7 114.  91034047 36474192 21391965 12805831 24495519
+## 4     4 8-Apr     0  14.7 216.  84087552 34949622 18494473 11494328 23540356
+## 5     5 8-May     0  18.5 342.  82742929 35417089 17923760 11589061 22848737
+## 6     6 8-Jun     0  21.3 454.  82180013 36692291 15205229 11360771 22487441
 ```
 
+
 These `get_dataframe_*` functions, introduced in v0.3.0, directly read in the data into a R environment through whatever R function supplied by `.f`. The default of the  `get_dataframe_*` functions is to read in such data by `readr::read_tsv()`. The `.f` function can be modified to modify the read-in settings. For example, the following modification is a base-R equivalent to read in the ingested data.
 
 
-```{r}
+```{r, eval=FALSE}
 library(readr)
 energy <- get_dataframe_by_name(
   filename = "comprehensiveJapanEnergy.tab",
@@ -74,6 +81,16 @@ energy <- get_dataframe_by_name(
 head(energy)
 ```
 
+```{r}
+##   time  date dummy temp temp2      all    large    house    kepco    tepco
+## 1    1 8-Jan     0  5.9  34.8 95792389 35194957 26190714 13357735 26960899
+## 2    2 8-Feb     0  5.5  30.3 95156901 35322031 24224097 13315027 27189705
+## 3    3 8-Mar     0 10.7 114.5 91034047 36474192 21391965 12805831 24495519
+## 4    4 8-Apr     0 14.7 216.1 84087552 34949622 18494473 11494328 23540356
+## 5    5 8-May     0 18.5 342.3 82742929 35417089 17923760 11589061 22848737
+## 6    6 8-Jun     0 21.3 453.7 82180013 36692291 15205229 11360771 22487441
+```
+
 
 The dataverse package can also download datasets that are _drafts_ (i.e. versions not released publicly), as long as the user of the dataset provides their appropriate DATAVERSE_KEY. Users may need to modify the metadata of a datafile, such as adding a descriptive label, for the data downloading to work properly in this case. This is because the the file identifier UNF, which the read function relies on, may only appear after metadata has been added.
 
@@ -83,7 +100,7 @@ The dataverse package can also download datasets that are _drafts_ (i.e. version
 
 If a file is displayed on dataverse as a `.tab` file like the survey data by [Alvarez et al. (2013)](https://doi.org/10.7910/DVN/ARKOTI/A8YRMP), it is likely that Dataverse [ingested](https://guides.dataverse.org/en/latest/user/tabulardataingest/index.html) the file to a plain-text, tab-delimited format.
 
-```{r,  message=FALSE}
+```{r,  message=FALSE,eval=FALSE}
 argentina_tab <- get_dataframe_by_name(
   filename = "alpl2013.tab",
   dataset = "10.7910/DVN/ARKOTI",
@@ -93,13 +110,17 @@ argentina_tab <- get_dataframe_by_name(
 
 However, ingested files may not retain important dataset attributes. For example, Stata and SPSS datasets encode value labels on to numeric values. Factor variables in R dataframes encode levels, not only labels. A plain-text ingested file will discard such information. For example,  the `polling_place` variable in this data is only given by numbers, although the original data labelled these numbers with informative values.
 
-```{r}
+```{r,eval=FALSE}
 str(argentina_tab$polling_place)
 ```
 
+```{r}
+## num [1:1475] 31 31 31 31 31 31 31 31 31 31 ...
+```
+
 When ingesting, Dataverse retains a `original` version that retains these attributes but may not be readable in some platforms. The `get_dataframe_*` functions have an argument that can be set to `original = TRUE`. In this case we know that `alpl2013.tab` was originally a Stata dta file, so we can run:
 
-```{r}
+```{r, eval=FALSE}
 argentina_dta <- get_dataframe_by_name(
   filename = "alpl2013.tab",
   dataset = "10.7910/DVN/ARKOTI",
@@ -110,10 +131,17 @@ argentina_dta <- get_dataframe_by_name(
 
 Now we see that labels are read in through `haven`'s labelled variables class:
 
-```{r}
+```{r, eval=FALSE}
 str(argentina_dta$polling_place)
 ```
 
+```{r}
+##  dbl+lbl [1:1475] 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 3...
+##  @ label       : chr "polling_place"
+##  @ format.stata: chr "%9.0g"
+##  @ labels      : Named num [1:37] 1 2 3 4 5 6 7 8 9 10 ...
+##   ..- attr(*, "names")= chr [1:37] "E.E.T." "Escuela Juan Bautista Alberdi" "Escuela Juan Carlos DÃ¡valos" "Escuela Bernardino de Rivadavia" ...
+```
 
 
 Users should pick `.f` and `original` based on their existing knowledge of the file. If the original file is a `.sav` SPSS file, `.f` can be `haven::read_sav`. If it is a `.Rds` file, use `readRDS` or `readr::read_rds`. In fact, because the raw data is read in as a binary, there is no limitation to the file types `get_dataframe_*` can read in, as far as the dataverse package is concerned.
@@ -138,11 +166,23 @@ This shows that there are indeed 32 files, a mix of .R code files and tab- and c
 
 You can also retrieve more extensive metadata using `dataset_metadata()`:
 
-```{r}
+```{r, eval=FALSE}
 str(dataset_metadata("10.7910/DVN/ARKOTI", server = "dataverse.harvard.edu"), 
     max.level = 2)
 ```
 
+```{r}
+## List of 3
+##  $ displayName: chr "Citation Metadata"
+##  $ name       : chr "citation"
+##  $ fields     :'data.frame': 7 obs. of  4 variables:
+##   ..$ typeName : chr [1:7] "title" "author" "datasetContact" "dsDescription" ...
+##   ..$ multiple : logi [1:7] FALSE TRUE TRUE TRUE TRUE FALSE ...
+##   ..$ typeClass: chr [1:7] "primitive" "compound" "compound" "compound" ...
+##   ..$ value    :List of 7
+```
+
+
 ## Retrieving Scripts and Other Files
 
 If the file you want to retrieve is not data, you may want to use the  more primitive function, `get_file`, which gets the file data as a raw binary file. See the help page examples of `get_file()` that use the `base::writeBin()` function for details on how to write and read these binary files instead.