Deployed e57806a with MkDocs version: 1.6.1

richelbilderbeek · Jan 1, 2025 · 47b03b8 · 47b03b8
commit 47b03b8
Show file tree

Hide file tree

Showing 66 changed files with 15,712 additions and 0 deletions.
diff --git a/.nojekyll b/.nojekyll
diff --git a/404.html b/404.html
diff --git a/CODE_OF_CONDUCT/index.html b/CODE_OF_CONDUCT/index.html
diff --git a/CONTRIBUTING/index.html b/CONTRIBUTING/index.html
diff --git a/K3_ahoy.jpg b/K3_ahoy.jpg
diff --git a/LICENSE/index.html b/LICENSE/index.html
diff --git a/analysis/analysis.Rmd b/analysis/analysis.Rmd
@@ -0,0 +1,235 @@
+---
+title: "Analysis"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Analysis}
+  %\VignetteEncoding{UTF-8}
+  %\VignetteEngine{knitr::rmarkdown}
+editor_options: 
+  chunk_output_type: console
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+In this document, we do the analysis presented in the paper.
+
+Currently, the analysis uses fake data.
+
+## Setup
+
+```{r}
+library(testthat)
+library(ggsignif)
+```
+
+## Reading the data
+
+```{r}
+ratings <- readr::read_csv("ratings.csv", show_col_types = FALSE)
+n_ratings <- nrow(ratings)
+```
+
+There are `r n_ratings` ratings.
+
+## Analysis
+
+Connecting the ratings to the formations:
+
+```{r}
+songs <- dplyr::select(heyahmama::get_songs(), cd_title, song_title)
+n_songs <- nrow(songs)
+```
+
+There are `r n_songs` songs.
+
+```{r}
+cds <- dplyr::select(heyahmama::get_cds(), cd_title, formation)
+n_cds <- nrow(cds)
+```
+
+There are `r n_cds` CDs.
+
+```{r}
+songs_per_formation <- dplyr::select(merge(songs, cds), song_title, formation)
+testthat::expect_equal(n_songs, nrow(songs_per_formation))
+knitr::kable(head(songs_per_formation))
+```
+
+Add the formations to the ratings:
+
+```{r}
+ratings_per_formation <- dplyr::select(merge(ratings, songs_per_formation), formation, rating)
+testthat::expect_equal(n_ratings, nrow(ratings_per_formation))
+ratings_per_formation$formation <- as.factor(ratings_per_formation$formation)
+knitr::kable(head(ratings_per_formation))
+```
+
+## Formations
+
+There are two datasets:
+
+- Dataset A: all 4 formations
+- Dataset B: the first 3 formations
+
+## 4 formations
+
+### Plot distribution of ratings
+
+General plotting function:
+
+```{r}
+plot_ratings <- function(ratings_per_formation) {
+  ggplot2::ggplot(
+    ratings_per_formation,
+    ggplot2::aes(x = formation, y = rating)
+  ) + ggplot2::geom_violin()  
+}
+```
+
+Apply this to all ratings:
+
+```{r}
+p <- plot_ratings(ratings_per_formation)
+p
+```
+
+### Order formations based on rating
+
+Order formations by ratings:
+
+```{r}
+get_ordered_average_rating_per_formation <- function(ratings_per_formation) {
+  n_formations <- length(unique(ratings_per_formation$formation))
+  
+  average_rating_per_formation <-
+    ratings_per_formation |> 
+    dplyr::group_by(formation) |> 
+    dplyr::summarise(average_rating = mean(rating))
+  testthat::expect_equal(n_formations, nrow(average_rating_per_formation))
+
+  ordered_average_rating_per_formation <- 
+    average_rating_per_formation |> 
+    dplyr::arrange(dplyr::desc(average_rating))
+  testthat::expect_equal(n_formations, nrow(ordered_average_rating_per_formation))
+
+  ordered_average_rating_per_formation
+}
+```
+
+```{r}
+
+knitr::kable(
+  get_ordered_average_rating_per_formation(
+    ratings_per_formation
+  )
+)
+```
+
+## Statistics
+
+Do the formations have different ratings?
+
+General function:
+
+```{r}
+get_stats_table <- function(ratings_per_formation) {
+  n_formations <- length(unique(ratings_per_formation$formation))
+  n_combinations <- (n_formations * (n_formations - 1)) / 2
+  alpha <- 0.05 / n_combinations
+
+  p_values_table <- tibble::tibble(
+    a = rep(NA, n_combinations), 
+    b = NA, 
+    p = NA,
+    alpha = alpha
+  )
+
+  i <- 1
+  for (lhs in seq(1, n_formations - 1)) {
+    ratings_lhs <- ratings_per_formation[ratings_per_formation$formation == lhs, ]$rating
+    for (rhs in seq(lhs + 1, n_formations)) {
+      ratings_rhs <- ratings_per_formation[ratings_per_formation$formation == rhs, ]$rating
+      p_value <- wilcox.test(ratings_lhs, ratings_rhs, alternative = "two.sided")$p.value
+      testthat::expect_true(i >= 1)
+      testthat::expect_true(i <= nrow(p_values_table))
+      p_values_table$a[i] <- lhs
+      p_values_table$b[i] <- rhs
+      p_values_table$p[i] <- p_value
+      i <- i + 1
+    }
+  }
+  p_values_table$is_the_same <- p_values_table$p > alpha
+  p_values_table
+}
+```
+
+Applying it here:
+
+```{r}
+knitr::kable(get_stats_table(ratings_per_formation))
+```
+
+## Plot with significance indicators
+
+General function:
+
+```{r}
+plot_ratings_with_indicators <- function(ratings_per_formation) {
+  p <- plot_ratings(ratings_per_formation)
+  t_all <- get_stats_table(ratings_per_formation)
+  
+  t <- t_all[t_all$is_the_same == FALSE, ]
+  
+  t$annotation <- scales::scientific(t$p, digits = 1)
+  t$y_position <- seq(
+    from = 11.0, 
+    to = 11.0 + ((nrow(t) - 1) * 2.0),
+    by = 2.0
+  )
+  p + ggsignif::geom_signif(
+    data = t,
+    ggplot2::aes(
+      xmin = a,
+      xmax = b,
+      annotations = annotation,
+      y_position = y_position
+    ),
+    manual = TRUE
+  )
+  
+}
+```
+
+To these ratings
+
+```{r plot_ratings_with_indicators_4}
+plot_ratings_with_indicators(ratings_per_formation)
+```
+
+## 3 formations
+
+```{r}
+t <- ratings_per_formation[ratings_per_formation$formation != 4, ]
+p <- plot_ratings(t)
+p
+```
+
+```{r}
+knitr::kable(
+  get_ordered_average_rating_per_formation(t)
+)
+```
+
+```{r}
+t <- ratings_per_formation[ratings_per_formation$formation != 4, ]
+knitr::kable(get_stats_table(ratings_per_formation = t))
+```
+
+```{r plot_ratings_with_indicators_3}
+plot_ratings_with_indicators(t)
+```
diff --git a/analysis/analysis.pdf b/analysis/analysis.pdf