diff --git a/paper.bib b/paper.bib index 085cc7e..808c327 100644 --- a/paper.bib +++ b/paper.bib @@ -33,4 +33,103 @@ @misc{cia2021internetusers author = {{United States Central Intelligence Agency}}, year = {2021}, url = {https://www.cia.gov/the-world-factbook/field/internet-users/} -} \ No newline at end of file +} + +@manual{vries2022minicran, + title = {miniCRAN: Create a Mini Version of CRAN Containing Only Selected Packages}, + author = {Vries, Andrie de and Chubaty, Alex and Microsoft}, + year = {2022}, + version = {0.2.16}, + url = {https://cran.r-project.org/web/packages/miniCRAN/index.html} +} + +@manual{montag2023pypimirror, + title = {python-pypi-mirror}, + author = {montag451}, + year = {2023}, + version = {5.2.1}, + url = {https://pypi.org/project/python-pypi-mirror/} +} + +@manual{warsaw2024implib, + title = {importlib-resources}, + author = {Barry Warsaw}, + year = {2024}, + version = {6.4.0}, + url = {https://pypi.org/project/importlib-resources/} +} + +@manual{airium2023airum, + title = {airium}, + author = {Michał Kaczmarczyk}, + year = {2023}, + version = {0.2.6}, + url = {https://pypi.org/project/airium/} +} + +@manual{richardson2024bs4, + title = {beautifulsoup4}, + author = {Leonard Richardson}, + year = {2023}, + version = {4.12.3}, + url = {https://pypi.org/project/beautifulsoup4/} +} + + +@manual{pitrou2014pl, + title = {pathlib}, + author = {Antoine Pitrou}, + year = {2014}, + version = {1.0.1}, + url = {https://pypi.org/project/pathlib/} +} + +@manual{reitz2023requests, + title = {requests}, + author = {Kenneth Reitz}, + year = {2024}, + version = {2.31.0}, + url = {https://pypi.org/project/requests/} +} + +@book{rossum2009py, + title = {Python 3 Reference Manual}, + author = {Van Rossum, Guido and Drake, Fred L.}, + year = {2009}, + isbn = {1441412697}, + publisher = {CreateSpace}, + address = {Scotts Valley, CA} +} + +@misc{fsf2010wget, + title = {GNU Wget}, + author = {Free Software Foundation}, + note = {Original author: Hrvoje Nikšić}, + url = {http://www.gnu.org/software/wget/}, + version = {1.24.5}, + year = {2024}, +} + +@inproceedings{soton403913, + booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, + editor = {Fernando Loizides and Birgit Scmidt}, + title = {Jupyter Notebooks ? a publishing format for reproducible computational workflows}, + author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando P{\'e}rez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Dami{\'a}n Avila and Safia Abdalla and Carol Willing and Jupyter development team}, + publisher = {IOS Press}, + year = {2016}, + pages = {87--90}, + url = {https://eprints.soton.ac.uk/403913/}, + abstract = {It is increasingly necessary for researchers in all fields to write computer code, and in order to reproduce research results, it is important that this code is published. We present Jupyter notebooks, a document format for publishing code, results and explanations in a form that is both readable and executable. We discuss various tools and use cases for notebook documents.} +} + +@Manual{rstudio2024, + title = {RStudio: Integrated Development Environment for R}, + author = {{Posit team}}, + organization = {Posit Software, PBC}, + address = {Boston, MA}, + year = {2024}, + url = {http://www.posit.co/}, + } + + + diff --git a/paper.md b/paper.md index 8f7ab84..e854da6 100644 --- a/paper.md +++ b/paper.md @@ -51,7 +51,7 @@ Offlinedatasci automates the downloading and updating of the most recent materials for running workshops, and conducting offline data science work more broadly, including open source statistical and graphing software (R and Python), the associated integrated development -environments (IDEs; RStudio and Jupyter), data science focused +environments (IDEs; RStudio [@rstudio2024] and Jupyter [@soton403913]), data science focused partial mirrors of the associated package repositories (CRAN, PyPI), and lesson materials structured for local use via the browser. This package includes Python and command-line interfaces and is designed for both @@ -187,7 +187,7 @@ and 2) package repositories must follow specific file structures with appropriate metadata. To address this issue, we leverage software packages designed to create partial mirrors of the CRAN and PyPI package repositories. We use miniCRAN [@vries2022minicran] for mirroring CRAN and -pypi-mirror for mirroring PyPI. These packages automate the download of +pypi-mirror [@montag2023pypimirror] for mirroring PyPI. These packages automate the download of packages including their full dependency trees and set up the local repository file structures. These local mirrors can then be used by pointing to a local teaching server with the repository mirror or by @@ -220,7 +220,7 @@ external dependencies for rendering the lesson material into websites. Therefore offlinedatasci downloads rendered content directly from lesson websites to avoid the complexity and fragility associated with upstream changes when building lessons from multiple sources. Our approach uses -Wget, a software package that enables retrieving files using common +Wget [@fsf2010wget], a software package that enables retrieving files using common Internet protocols. We use Wget to manage this process, leveraging it\'s capabilities to: 1) recursively mirror directories; automating the process of finding all of the web pages associated with multiple page