From 1679ebfdd1c4fbda024e1f90c5912a5feb71ff42 Mon Sep 17 00:00:00 2001 From: Raoul Wadhwa Date: Wed, 4 Nov 2020 16:11:04 -0500 Subject: [PATCH 1/2] updated preprint --- NAMESPACE | 1 + R/s3-ICON.R | 1 + preprint/ICON-preprint.Rmd | 11 ++++++++--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 1dc59b8..baf9c76 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,4 +4,5 @@ S3method(print,ICON) export(as_igraph) export(as_network) export(get_data) +export(print.ICON) importFrom(utils,download.file) diff --git a/R/s3-ICON.R b/R/s3-ICON.R index 770fb60..c982e94 100644 --- a/R/s3-ICON.R +++ b/R/s3-ICON.R @@ -46,6 +46,7 @@ ICON <- function(x) { #' @param x object of class `ICON` (and, as a consequence, `data.frame`) #' @param ... other relevant parameters #' @return does not return anything useful; prints `x`'s contents to console +#' @export print.ICON #' @export #' @examples #' \dontrun{ diff --git a/preprint/ICON-preprint.Rmd b/preprint/ICON-preprint.Rmd index 807d15a..86a5d7b 100644 --- a/preprint/ICON-preprint.Rmd +++ b/preprint/ICON-preprint.Rmd @@ -138,8 +138,7 @@ Looking at the structure of the complex networks with `utils::str` shows that `I The latter provides a suitable container for edge list objects with potential edge attributes in rectangular format. The former, an S3 class, benefits users by providing certain guarantees about object format, i.e., an unmodified complex network object acquired via the `ICON` package will have the `ICON` S3 class and is guaranteed to be a data frame containing an edge list in which each row represents a single edge, the first two columns specify nodes that define the corresponding edge, and additional columns define edge attributes. This standard format guarantee allows users, among other things, to generate code for one `ICON` dataset with assurances that it will function effectively for other `ICON` datasets. -The S3 class will also allow users to take advantage of relevant S3 generic methods. -In future `ICON` versions, we aim to implement methods for common generics, e.g. `base::plot`. +The S3 class will also allow users to take advantage of relevant S3 generic methods, such as `base::print`. # Use cases @@ -158,6 +157,12 @@ set.seed(42) # download sample dataset get_data("seed_disperse_beehler") + +# peek inside dataset using generic +print(seed_disperse_beehler) + +# can also peek with print.ICON +print.ICON(seed_disperse_beehler) ``` A quick exploration of `seed_disperse_beehler` will grant a deeper understanding of the use cases. @@ -165,7 +170,7 @@ Primarily, we would like to explore the third column - named `Frequency`. Due to the heavy skew, we will use two consecutive logarithmic transformations to more easily see the effects of coloring edges by the `Frequency` edge attribute. The following code chunk produces histograms of `seed_disperse_beehler$Frequency` before and after this transformation for comparison. -```{r seed_explore,warning=FALSE,fig.show='hold', fig.width=3.4, fig.height=2} +```{r seed_explore,warning=FALSE,fig.show='hold', fig.width=6, fig.height=3} # plot a histogram w/o transformation (skewed, tough to see differences) ggplot(seed_disperse_beehler, aes(x = Frequency)) + geom_histogram(bins = 10, fill = "white", color = "black") + From 11fc5879ee5b6242d1ab73db50313fe25ff92df2 Mon Sep 17 00:00:00 2001 From: Raoul Wadhwa Date: Wed, 4 Nov 2020 18:00:23 -0500 Subject: [PATCH 2/2] minor proofreading fixes --- preprint/ICON-preprint.Rmd | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/preprint/ICON-preprint.Rmd b/preprint/ICON-preprint.Rmd index 86a5d7b..1493714 100644 --- a/preprint/ICON-preprint.Rmd +++ b/preprint/ICON-preprint.Rmd @@ -12,14 +12,15 @@ authors: location: Cleveland, OH 44195, United States email: ScottJ10@ccf.org abstract: | - We introduce `ICON`, an R package that contains `r nrow(ICON::ICON_data)` complex network datasets in a standard edgelist format. - All provided datasets have associated citations and have been indexed by the Colorado Index of Complex Networks - also referred to as ICON. + We introduce `ICON`, an R package that contains `r nrow(ICON::ICON_data)` complex network datasets in a standard edge list format. + All provided datasets have associated citations and have been indexed by the Colorado Index of Complex Networks, or ICON for short. In addition to supplying a large and diverse corpus of useful real-world networks, `ICON` also implements conversion functions to work with the `network`, `ggnetwork`, and `igraph` R packages for network analysis and visualization. Currently, the Comprehensive R Archive Network hosts `ICON` v$0.4.0$. We hope that `ICON` will serve as a standard corpus for complex network research and prevent redundant work that would be otherwise necessary by individual research groups. The open source code for `ICON` and for this reproducible report can be found at \url{https://github.com/rrrlw/ICON}. keywords: - complex networks + - R data package - R programming language bibliography: ICONrefs.bib nocite: | @@ -34,7 +35,7 @@ Empirical study of complex networks requires real-world data to validate theoret A large, diverse corpus of networks often proves useful given the many shapes and sizes that complex networks assume.[@corpus1;@corpus2] To our knowledge, the [Colorado Index of Complex Networks (ICON)](https://icon.colorado.edu) hosts the largest curated index of real-world complex networks, with metadata and links to over 5,000 networks as of this writing.[@icon] However, heterogeneity in data format, access, and availability limit how easily users can take advantage of this incredible resource. -A central repository containing a large corpus of ICON-indexed networks in standard format would thus provide a useful service for network science researchers, who would avoid the tedious task of data format conversion prior to analysis. +A central repository containing a large corpus of ICON-indexed networks in standard format would thus provide a useful service for network science researchers, who could avoid the tedious task of data format conversion prior to analysis. Here, we introduce the `ICON` R package as such a solution, providing a large and diverse corpus of real-world networks and tools to work with existing network analysis and visualization R packages. # Implementation details @@ -150,6 +151,7 @@ library("ICON") library("network") library("ggnetwork") library("ggplot2") +library("gridExtra") library("igraph") # for reproducibility @@ -170,16 +172,18 @@ Primarily, we would like to explore the third column - named `Frequency`. Due to the heavy skew, we will use two consecutive logarithmic transformations to more easily see the effects of coloring edges by the `Frequency` edge attribute. The following code chunk produces histograms of `seed_disperse_beehler$Frequency` before and after this transformation for comparison. -```{r seed_explore,warning=FALSE,fig.show='hold', fig.width=6, fig.height=3} +```{r seed_explore,warning=FALSE,fig.show='hold', fig.width=4.5, fig.height=2.5} # plot a histogram w/o transformation (skewed, tough to see differences) -ggplot(seed_disperse_beehler, aes(x = Frequency)) + +g1 <- ggplot(seed_disperse_beehler, aes(x = Frequency)) + geom_histogram(bins = 10, fill = "white", color = "black") + theme_bw() # plot a histogram w/ transformation (more spread out, differences easily seen) -ggplot(seed_disperse_beehler, aes(x = log(log(Frequency)))) + +g2 <- ggplot(seed_disperse_beehler, aes(x = log(log(Frequency)))) + geom_histogram(bins = 10, fill = "white", color = "black") + theme_bw() + +grid.arrange(g1, g2, nrow = 2) ``` ## With the `network` R package {#networkusecase} @@ -188,7 +192,7 @@ Using the `seed_disperse_beehler` sample dataset, we first convert it to a `netw This allows us to take advantage of the large set of tools already built in the [Statnet suite](http://www.statnet.org) of R packages, specifically the [network](https://CRAN.R-project.org/package=network) package. Although we first use [ggnetwork](https://CRAN.R-project.org/package=ggnetwork) to rapidly visualize the nodes and edges, we also show how to visualize edge attributes toward the end of the code chunk. -```{r network-case,message=FALSE,fig.width=6.75,fig.height=4} +```{r network-case,message=FALSE,fig.width=4.5,fig.height=2.5} # convert using ICON function converted <- as_network(seed_disperse_beehler)