diff --git a/.binder/environment.yml b/.binder/environment.yml
new file mode 100644
index 0000000..4327043
--- /dev/null
+++ b/.binder/environment.yml
@@ -0,0 +1,28 @@
+name: metadatainrcr
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+  - r
+dependencies:
+  - r-base
+  - cwltool
+  - r-readr
+  - r-readxl
+  - r-stringr
+  - r-lubridate
+  - r-rvest
+  - r-ggplot2
+  - r-wordcloud
+  - r-tidytext
+  - r-rmarkdown
+  - r-ggpubr
+  - r-ggthemes
+  - r-here
+  - r-bibtex
+  - conda-build
+  - autopep8
+  - entrez-direct
+  - jupyter
+  - pandas
+  - scikit-learn
diff --git a/.binder/start b/.binder/start
new file mode 100755
index 0000000..b6acd2a
--- /dev/null
+++ b/.binder/start
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+# source: https://discourse.jupyter.org/t/glibcxx-3-4-26-not-found-from-rstudio/7778/8
+set -e
+export LD_LIBRARY_PATH=${NB_PYTHON_PREFIX}/lib:${LD_LIBRARY_PATH}
+exec "$@"
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 0911757..a001607 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,25 @@ __pycache__
 data/lens/citespace/
 .ipynb_checkpoints
 src/condabuilds/
-src/timeline.html
\ No newline at end of file
+src/timeline.html
+.ipython/
+
+.local/
+
+.cache/
+
+.conda/
+
+.jupyter/
+
+.rstudio/
+
+.subversion/
+
+.bash_history
+
+.bashrc
+
+.jupyter-server-log.txt
+
+src/timeline/timeline.html
diff --git a/.here b/.here
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
index de173e5..21c1b20 100644
--- a/README.md
+++ b/README.md
@@ -1,25 +1,31 @@
 # The Role of Metadata in Reproducible Computational Research
+
 This is a supplemental resource to Leipzig et al. "The Role of Metadata in Reproducible Computational Research" https://arxiv.org/pdf/2006.08589.pdf
 
 ## Organization
+
 ```
 ├───data/
 │   ├───examples/                  Examples of metadata standards
 │   ├───lens/                      Search exports for scimetric journal analysis
 │   └───standards.tsv              Raw standards table
 ├───src/
+│   ├───cwl/tools/                 CWL configuration to produce the timeline plot
+│   ├───manuscript/                Manuscript revision document
 │   ├───secrets/
 │   │   └───api.template.py        Replace this with api.py using your NCBI/NCBO keys
-│   ├───ncbo_ontologies.py         Scimetric ontology popularity analysis
-│   ├───scimetric.ipynb            Scimetric journal meta/rcr frequency analysis
-│   ├───timeline.R                 Produces the RCR case study timeline in the paper
+│   ├───ontologies/                Scimetric ontology popularity analysis
+│   ├───repotutils/                Scripts for automating management of this repository
+│   ├───scimetric/                 Scimetric journal meta/rcr frequency analysis in a Jupyter Notebook
+│   ├───timeline/                  R Markdown document to produce the RCR case study timeline in the paper, incl. helper files for execution with CWL (wrapper script, Dockerfile)
 │   ├───wget2jsonld.py             Helper script to convert wget output to jsonld
-│   └───wordcloud.R                Produces word cloud from cited abstracts
+│   └───wordcloud/                 R script to produce word cloud from cited abstracts
 ├───LICENSE                        The LICENSE file
 ├───README.md                      What you are looking at
 ├───environment.osx.yaml           OSX pinned Conda depenencies
 ├───environment.unpinned.yaml      Unpinned Conda depenencies
 └───ro-crate-metadata.jsonld       RO Crate config
+└───.binder                        Environment configuration files for usage with Binder (mybinder.org)
 ```
 <!--tree_placeholder-->
 
@@ -97,23 +103,34 @@ https://stackoverflow.com/questions/1740341/what-is-the-difference-between-rdf-a
 ## How to generate the timeline for this article
 
 Install [cwltool](https://github.com/common-workflow-language/cwltool)
+
 ```
 pip install cwltool
 cwltool src/cwl/tools/timeline.cwl --reportfile timeline.html
 ```
 
-## Contribute
-
-Contributions welcome!
-
-## License
+Note that the tools requires Docker for runningthe computing environment, see the file `timeline/Dockerfile` for the definition of the image used in the `.cwl` file.
 
-[![CC0](http://mirrors.creativecommons.org/presskit/buttons/88x31/svg/cc-zero.svg)](https://creativecommons.org/publicdomain/zero/1.0/)
+## Run on Binder
 
+[MyBinder](https://mybinder.org/) is a tool for creating executable computing environments based on standard and widely used dependency management files.
+You can easily run important parts of the analysis for the manuscript by clicking on the badges below.
+Binder will create a container using the environment configuration from the directory `.binder/` and provide you with an interactive environment to execute notebooks or scripts.
 
+- Scimetric journal frequency analysis of RCR and metadata terms (opens a Jupyter Notebook) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/leipzig/metadata-in-rcr/HEAD?filepath=src%2Fscimetric%2Fscimetric.ipynb)
+- Create Figure 2 from the paper (R Markdown notebook, open the file `src/timeline/timeline.Rmd` manually in RStudio) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/leipzig/metadata-in-rcr/HEAD?urlpath=rstudio)
+- Create word cloud from cited abstracts (run R script `src/wordcloud/wordcloud.R`) [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/leipzig/metadata-in-rcr/HEAD?urlpath=rstudio)
 
+For development purposes, you can also run `repo2docker` locally in the directory of the repository.
 
+```bash
+repo2docker --editable .
+```
 
+## Contribute
 
+Contributions welcome!
 
+## License
 
+[![CC0](http://mirrors.creativecommons.org/presskit/buttons/88x31/svg/cc-zero.svg)](https://creativecommons.org/publicdomain/zero/1.0/)
diff --git a/data/examples/rmarkdown.rmd b/data/examples/rmarkdown.rmd
index ead41e5..00ff369 100644
--- a/data/examples/rmarkdown.rmd
+++ b/data/examples/rmarkdown.rmd
@@ -1,5 +1,25 @@
 ---
 title: "A title for the analysis"
+# author metadata, esp. used for scientific articles
+author:
+  - name: Jeremy Leipzig
+    footnote: Corresponding author
+    affiliation: "Metadata Research Center, Drexel University, College of Computing and Informatics, Philadelphia PA, USA"
+    orcid: "0000-0001-7224-9620"
+  - name: Daniel Nüst
+    affiliation: "Institute for Geoinformatics, University of Münster, Germany"
+    orcid: "0000-0002-0024-5046"
+    email: daniel.nuest@uni-muenster.de
+
+# parameters to manipulate workflow; defaults can be changed when compiling the document
+params:
+  year: 2020
+  region: "Europe"
+  printcode: TRUE
+  data: file.csv
+  max_n: 42
+
+# configuration and styling of different output document formats
 output: 
   html_document:
     theme: lumen
@@ -7,8 +27,19 @@ output:
     toc_float:
       collapsed: false
     code_folding: show
+    self_contained: true
+  pdf_document:
+    toc: yes
+    fig_caption: yes
+    df_print: kable
+linkcolor: blue
+
+# field values can be generated from code
+date: "`r format(Sys.time(), '%d %B, %Y')`"
 ---
 
+<!-- inspiration/sources for fake front matter: https://bookdown.org/yihui/rmarkdown/params-declare.html, https://github.com/nuest/reproducible-research-at-giscience/blob/master/paper/reproducible-research-at-giscience.Rmd -->
+
 ```{r include=FALSE}
 knitr::opts_chunk$set(echo=TRUE, message=FALSE, warning=FALSE, fig.width=8, tidy=TRUE)
 ```
diff --git a/src/cwl/tools/timeline.cwl b/src/cwl/tools/timeline.cwl
index 4d2ba30..4faaf4b 100644
--- a/src/cwl/tools/timeline.cwl
+++ b/src/cwl/tools/timeline.cwl
@@ -61,7 +61,7 @@ doc:
 
 
 $schemas: 
-  - https://schema.org/version/3.9/schema.rdf
+  - https://schema.org/version/latest/schemaorg-current-https.rdf
 
 $namespaces: 
   iana: https://www.iana.org/assignments/media-types/
diff --git a/src/scimetric/scimetric.ipynb b/src/scimetric/scimetric.ipynb
index 776c0bf..27f9307 100644
--- a/src/scimetric/scimetric.ipynb
+++ b/src/scimetric/scimetric.ipynb
@@ -1844,7 +1844,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1855,7 +1855,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1864,7 +1864,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -3326,7 +3326,7 @@
        "Procedia - Social and Behavioral Sciences           0.995346  "
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3344,7 +3344,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -3471,7 +3471,7 @@
        " Erik Mannens                94516.0          0.999760"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3491,7 +3491,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -3531,12 +3531,12 @@
        "      <td>0.999607</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>Mihai Pop</th>\n",
+       "      <th>Ludmila Prokunina-Olsson</th>\n",
        "      <td>4425.5</td>\n",
        "      <td>0.999017</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>Ludmila Prokunina-Olsson</th>\n",
+       "      <th>Mihai Pop</th>\n",
        "      <td>4425.5</td>\n",
        "      <td>0.999017</td>\n",
        "    </tr>\n",
@@ -3556,12 +3556,12 @@
        "      <td>0.997837</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>Patricia Porter-Gill</th>\n",
+       "      <th>Iain Hrynaszkiewicz</th>\n",
        "      <td>4420.0</td>\n",
        "      <td>0.996854</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>Iain Hrynaszkiewicz</th>\n",
+       "      <th>Patricia Porter-Gill</th>\n",
        "      <td>4420.0</td>\n",
        "      <td>0.996854</td>\n",
        "    </tr>\n",
@@ -3571,7 +3571,7 @@
        "      <td>0.996854</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>Philippe Rocca-Serra</th>\n",
+       "      <th>Scott C. Edmunds</th>\n",
        "      <td>4416.0</td>\n",
        "      <td>0.995281</td>\n",
        "    </tr>\n",
@@ -3581,17 +3581,17 @@
        "      <td>0.995281</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>Steffen Neumann</th>\n",
+       "      <th>Philippe Rocca-Serra</th>\n",
        "      <td>4416.0</td>\n",
        "      <td>0.995281</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>Scott C. Edmunds</th>\n",
+       "      <th>Markus Rupp</th>\n",
        "      <td>4416.0</td>\n",
        "      <td>0.995281</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>Markus Rupp</th>\n",
+       "      <th>Steffen Neumann</th>\n",
        "      <td>4416.0</td>\n",
        "      <td>0.995281</td>\n",
        "    </tr>\n",
@@ -3603,22 +3603,22 @@
        "                          rcr_auth_cnt  rcr_auth_scaled\n",
        "Victoria Stodden                4428.0         1.000000\n",
        "Jean-Luc Starck                 4427.0         0.999607\n",
-       "Mihai Pop                       4425.5         0.999017\n",
        "Ludmila Prokunina-Olsson        4425.5         0.999017\n",
+       "Mihai Pop                       4425.5         0.999017\n",
        "Roger D. Peng                   4424.0         0.998427\n",
        "Wei Tang                        4422.5         0.997837\n",
        "Susanna-Assunta Sansone         4422.5         0.997837\n",
-       "Patricia Porter-Gill            4420.0         0.996854\n",
        "Iain Hrynaszkiewicz             4420.0         0.996854\n",
+       "Patricia Porter-Gill            4420.0         0.996854\n",
        "Yi-Ping Fu                      4420.0         0.996854\n",
-       "Philippe Rocca-Serra            4416.0         0.995281\n",
-       "David L. Donoho                 4416.0         0.995281\n",
-       "Steffen Neumann                 4416.0         0.995281\n",
        "Scott C. Edmunds                4416.0         0.995281\n",
-       "Markus Rupp                     4416.0         0.995281"
+       "David L. Donoho                 4416.0         0.995281\n",
+       "Philippe Rocca-Serra            4416.0         0.995281\n",
+       "Markus Rupp                     4416.0         0.995281\n",
+       "Steffen Neumann                 4416.0         0.995281"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -3974,7 +3974,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.2"
+   "version": "3.7.10"
   }
  },
  "nbformat": 4,
diff --git a/src/timeline/timeline.Rmd b/src/timeline/timeline.Rmd
index cf46b88..172defa 100644
--- a/src/timeline/timeline.Rmd
+++ b/src/timeline/timeline.Rmd
@@ -7,9 +7,10 @@ output:
     theme: paper
     toc: false
     code_folding: hide
+params:
+  useLive: true
 ---
 
-
 ```{r setup, include=FALSE}
 knitr::opts_chunk$set(echo = TRUE)
 library(dplyr)
@@ -24,11 +25,12 @@ library(ggpubr)
 ```
 
 ## Fetching and processing data from Awesome Reproducible Research
-There is some munging that needs to be done to access the publication dates, clean up the sizes, simplify approaches into their main goals, and classify by macrofield
+
+There is some munging that needs to be done to access the publication dates, clean up the sizes, simplify approaches into their main goals, and classify by macrofield.
+
 ```{r fetchandprocess}
 #try to parse the github repo as it is now vs a commit we know will work
-useLive<-TRUE
-if(useLive){
+if(params$useLive){
   url<-"https://raw.githubusercontent.com/leipzig/awesome-reproducible-research/master/readme.md"
 }else{
   url<-"https://raw.githubusercontent.com/leipzig/awesome-reproducible-research/98a6fbaafd70eb80b2bc9c7987bb4618b5d0bd2b/readme.md"
@@ -79,11 +81,14 @@ ggplot(timeline) +
 ```
 
 ## Session
+
 ```{r session}
 sessionInfo()
 ```
 
 ## Git commit
-```{bash git, engine.opts='-l'}
-git -C / show --summary
+
+```{r git}
+# (bash cells don't play nice with conda in Binder)
+cat(system2("git", c("show", "--summary"), stdout = TRUE), sep = "\n")
 ```
diff --git a/src/wordcloud/wordcloud.R b/src/wordcloud/wordcloud.R
index 2db0f24..ade274b 100644
--- a/src/wordcloud/wordcloud.R
+++ b/src/wordcloud/wordcloud.R
@@ -1,31 +1,45 @@
-library(bib2df)
-library(dplyr)
-library(tidytext)
-library(stringr)
-library(wordcloud)
-library(knitr)
-library(readr)
+library("bibtex")
+library("dplyr")
+library("tidytext")
+library("stringr")
+library("wordcloud")
+library("knitr")
+library("readr")
+library("here")
 
-pal <- brewer.pal(8,"Dark2")
+pal <- brewer.pal(8, "Dark2")
 
-useFullText<-TRUE
+useFullText <- FALSE
 
-if(useFullText==TRUE){
-  #full text from pdfs
-  readr::read_file("../data/citations/tokens.txt.gz") %>% 
-    stringr::str_replace_all("'","") %>% 
-    stringr::str_replace_all("\\[","") %>% 
-    stringr::str_replace_all("\\]","") %>% 
-    stringr::str_replace_all(" ","") %>% 
-    stringr::str_split(pattern=',',simplify = TRUE) %>%
+if (useFullText == TRUE) {
+  #full text from pdfs, cannot be shared publicly
+  readr::read_file("../data/citations/tokens.txt.gz") %>%
+    stringr::str_replace_all("'", "") %>%
+    stringr::str_replace_all("\\[", "") %>%
+    stringr::str_replace_all("\\]", "") %>%
+    stringr::str_replace_all(" ", "") %>%
+    stringr::str_split(pattern = ",", simplify = TRUE) %>%
     stringr::str_to_lower() -> tokenvec
-    data.frame(word=tokenvec) %>% anti_join(stop_words) %>% count(word, sort = TRUE) %>% ungroup() -> tokens_clean
-}else{
+    data.frame(word = tokenvec) %>%
+      anti_join(stop_words) %>%
+      count(word, sort = TRUE) %>%
+      ungroup() -> tokens_clean
+} else {
   #just the abstracts
-  path<-"../data/citations/metadata-in-rcr-refs.bib"
-  df <- bib2df(path)
-  df %>% dplyr::filter(!is.na(ABSTRACT)) %>% unnest_tokens(word,ABSTRACT) %>% anti_join(stop_words) %>% count(word, sort = TRUE) %>% ungroup() -> tokens_clean
+  path <- here::here("data/citations/metadata-in-rcr-refs.bib")
+  bib <- bibtex::read.bib(path)
+  df <- data.frame(`ABSTRACT` = unlist(
+    sapply(bib, function(b) { b$abstract })))
+  df %>% dplyr::filter(!is.na(ABSTRACT)) %>%
+    unnest_tokens(word, ABSTRACT) %>%
+    anti_join(stop_words) %>%
+    count(word, sort = TRUE) %>%
+    ungroup() -> tokens_clean
 }
 
 tokens_clean %>%
-with(wordcloud(word, n, random.order = FALSE, max.words = 100, colors=pal)) -> word_cloud
+  with(wordcloud(word,
+    n,
+    random.order = FALSE,
+    max.words = 100,
+    colors = pal)) -> word_cloud