diff --git a/NAMESPACE b/NAMESPACE index 92ced32..11d66d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(num_tweets_by_timeperiod) export(num_tweets_by_username) export(num_users_by_timeperiod) export(top_n_hashtags) +export(top_n_mentions) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,mutate) diff --git a/R/top_n_mentions.R b/R/top_n_mentions.R new file mode 100644 index 0000000..d8d0d0c --- /dev/null +++ b/R/top_n_mentions.R @@ -0,0 +1,55 @@ +#' Plot top n account mentions +#' +#' @description Create a ggplot2 bar chart of the number of times the top `n` +#' accounts (ties for `n`th position will be included) were mentioned in +#' tweets. +#' +#' @param sqlite_con [Class SQLiteConnection](https://rsqlite.r-dbi.org/reference/sqliteconnection-class) +#' object that is a connection to an SQLite .db file created by the +#' [tidy-tweet package](https://github.com/QUT-Digital-Observatory/tidy_tweet). +#' The database contains a collection of tweets in relational tables. This can +#' be created with [tweetexploR::connect_to_sqlite_db()]. +#' +#' @param n Number of accounts to be plotted. Note, ties will be included. +#' +#' @return ggplot2 plot. +#' +#' @importFrom dplyr mutate rename group_by summarise n slice_max +#' +#' @importFrom ggplot2 ggplot aes geom_col labs theme +#' +#' @importFrom stringr str_to_lower +#' +#' @importFrom rlang .data +#' +#' @importFrom stats reorder +#' +#' @examples +#' \dontrun{ +#' +#' top_n_mentions(sqlite_con, n = 10) +#' my_plot <- top_n_mentions(sqlite_con, 20) +#' +#' } +#' +#' @export + +top_n_mentions <- function(sqlite_con, n) { + DBI::dbGetQuery(sqlite_con, + "SELECT username, source_id + FROM mention + WHERE source_type = 'tweet';") %>% + mutate(tag = str_to_lower(.data$username)) %>% + rename(account = .data$username) %>% + group_by(.data$account) %>% + summarise(mentions = n()) %>% + slice_max(n = n, order_by = .data$mentions, with_ties = TRUE) %>% + ggplot(aes(x = reorder(.data$account, .data$mentions), .data$mentions)) + + geom_col() + + labs(title = paste0("Top ", n, " accounts mentioned in tweets"), + y = "Number of tweets") + + configure_y_axis() + + ggplot2::coord_flip() + + configure_ggplot_theme() + + theme(axis.title.y = ggplot2::element_blank()) +} diff --git a/man/top_n_mentions.Rd b/man/top_n_mentions.Rd new file mode 100644 index 0000000..cb2fd21 --- /dev/null +++ b/man/top_n_mentions.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/top_n_mentions.R +\name{top_n_mentions} +\alias{top_n_mentions} +\title{Plot top n account mentions} +\usage{ +top_n_mentions(sqlite_con, n) +} +\arguments{ +\item{sqlite_con}{\href{https://rsqlite.r-dbi.org/reference/sqliteconnection-class}{Class SQLiteConnection} +object that is a connection to an SQLite .db file created by the +\href{https://github.com/QUT-Digital-Observatory/tidy_tweet}{tidy-tweet package}. +The database contains a collection of tweets in relational tables. This can +be created with \code{\link[=connect_to_sqlite_db]{connect_to_sqlite_db()}}.} + +\item{n}{Number of accounts to be plotted. Note, ties will be included.} +} +\value{ +ggplot2 plot. +} +\description{ +Create a ggplot2 bar chart of the number of times the top \code{n} +accounts (ties for \code{n}th position will be included) were mentioned in +tweets. +} +\examples{ +\dontrun{ + +top_n_mentions(sqlite_con, n = 10) +my_plot <- top_n_mentions(sqlite_con, 20) + +} + +} diff --git a/tests/testthat/_snaps/top_n_mentions/top-n-mentions-10.svg b/tests/testthat/_snaps/top_n_mentions/top-n-mentions-10.svg new file mode 100644 index 0000000..2bd9445 --- /dev/null +++ b/tests/testthat/_snaps/top_n_mentions/top-n-mentions-10.svg @@ -0,0 +1,77 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +AnnastaciaMP +TonyKoc69285191 +Qldaah +TonyHWindsor +andrewjgiles +PeterDutton_MP +ljayes +PaulFletcherMP +BelindaJones68 +SkyNewsAust +AlboMP + + + + + + + + + + + + + + + +0 +30 +60 +90 +Number of tweets +Top 10 accounts mentioned in tweets + + diff --git a/tests/testthat/test-top_n_mentions.R b/tests/testthat/test-top_n_mentions.R new file mode 100644 index 0000000..83ce640 --- /dev/null +++ b/tests/testthat/test-top_n_mentions.R @@ -0,0 +1,17 @@ +# Connect to sqlite .db file +sqlite_con <- connect_to_sqlite_db(test_path("fixtures", "auspol-test.db")) + + +test_that("result is a ggplot2 object", { + expect_true(ggplot2::is.ggplot(top_n_mentions(sqlite_con, 10))) +}) + + +test_that("ggplot2 plot has expected output", { + vdiffr::expect_doppelganger("top_n_mentions_10", + top_n_mentions(sqlite_con, 10)) +}) + + +# Disconnect from database +DBI::dbDisconnect(sqlite_con)