20241225 - convert to calculate_stats() from nflfastR v5.0

isaactpetersen · Dec 25, 2024 · 76a244d · 76a244d
1 parent 0facb85
commit 76a244d
Show file tree

Hide file tree

Showing 25 changed files with 787 additions and 591 deletions.
diff --git a/_quarto.yml b/_quarto.yml
@@ -65,7 +65,7 @@ format:
     theme:
       light: [cosmo, fonts/css/lora.css]
       dark: [cosmo, fonts/css/lora.css, theme-dark.scss]
-    mainfont: Lora
+    mainfont: Lora #gfonts::setup_font("lora", output_dir = "./fonts/"); gfonts::get_all_fonts()
     code-link: true
     number-sections: true
     code-fold: show

diff --git a/basic-statistics.qmd b/basic-statistics.qmd
@@ -14,6 +14,21 @@ library("grid")
 library("tidyverse")
 ```
 
+### Load Data {#sec-basicStatsLoadData}
+
+```{r}
+#| eval: false
+#| include: false
+
+load(file = file.path(path, "/OneDrive - University of Iowa/Teaching/Courses/Fantasy Football/Data/player_stats_seasonal.RData", fsep = ""))
+```
+
+```{r}
+load(file = "./data/player_stats_seasonal.RData")
+```
+
+We created the `player_stats_seasonal.RData` object in @sec-calculatePlayerAge.
+
 ## Descriptive Statistics {#sec-descriptiveStatistics}
 
 Descriptive statistics are used to describe data.
@@ -84,12 +99,26 @@ Because some projections are outliers, we use a trimmed version of the weighted
 Below is R code to estimate each:
 
 ```{r}
-#mean(data, na.rm = TRUE)
-#median(data, na.rm = TRUE)
-#DescTools::HodgesLehmann(exampleValues, na.rm = TRUE)
-#petersenlab::Mode(exampleValues)
-#weighted.mean(data, weights, na.rm = TRUE)
-#petersenlab::wthdquantile(data, weights, probs = 0.5)
+mean(player_stats_seasonal$fantasyPoints, na.rm = TRUE)
+median(player_stats_seasonal$fantasyPoints, na.rm = TRUE)
+DescTools::HodgesLehmann(player_stats_seasonal$fantasyPoints, na.rm = TRUE)
+petersenlab::Mode(player_stats_seasonal$fantasyPoints)
+
+weighted.mean(
+  player_stats_seasonal$fantasyPoints,
+  weights = sample( # randomly generate weights (could specify them manually)
+    x = 1:3,
+    size = length(player_stats_seasonal$fantasyPoints),
+    replace = TRUE),
+  na.rm = TRUE)
+
+petersenlab::wthdquantile(
+  player_stats_seasonal$fantasyPoints,
+  w = sample( # randomly generate weights (could specify them manually)
+    x = 1:3,
+    size = length(player_stats_seasonal$fantasyPoints),
+    replace = TRUE),
+  probs = 0.5)
 ```
 
 ### Spread {#sec-descriptiveStatisticsSpread}
@@ -173,34 +202,35 @@ Below is R code to estimate each:
 To estimate multiple indices of center, spread, and shape of the data, you can use the following code:
 
 ```{r}
-#psych::describe(mydata)
-
-#mydata %>% 
-#  summarise(across(
-#      everything(),
-#      .fns = list(
-#        n = ~ length(na.omit(.)),
-#        missingness = ~ mean(is.na(.)) * 100,
-#        M = ~ mean(., na.rm = TRUE),
-#        SD = ~ sd(., na.rm = TRUE),
-#        min = ~ min(., na.rm = TRUE),
-#        max = ~ max(., na.rm = TRUE),
-#        range = ~ max(., na.rm = TRUE) - min(., na.rm = TRUE),
-#        IQR = ~ IQR(., na.rm = TRUE),
-#        MAD = ~ mad(., na.rm = TRUE),
-#        median = ~ median(., na.rm = TRUE),
-#        pseudomedian = ~ DescTools::HodgesLehmann(., na.rm = TRUE),
-#        mode = ~ petersenlab::Mode(., multipleModes = "mean"),
-#        skewness = ~ psych::skew(., na.rm = TRUE),
-#        kurtosis = ~ psych::kurtosi(., na.rm = TRUE)),
-#      .names = "{.col}.{.fn}")) %>%
-#    pivot_longer(
-#      cols = everything(),
-#      names_to = c("variable","index"),
-#      names_sep = "\\.") %>% 
-#    pivot_wider(
-#      names_from = index,
-#      values_from = value)
+psych::describe(player_stats_seasonal["fantasyPoints"])
+
+player_stats_seasonal %>% 
+  select(age, years_of_experience, fantasyPoints) %>% 
+  summarise(across(
+      everything(),
+      .fns = list(
+        n = ~ length(na.omit(.)),
+        missingness = ~ mean(is.na(.)) * 100,
+        M = ~ mean(., na.rm = TRUE),
+        SD = ~ sd(., na.rm = TRUE),
+        min = ~ min(., na.rm = TRUE),
+        max = ~ max(., na.rm = TRUE),
+        range = ~ max(., na.rm = TRUE) - min(., na.rm = TRUE),
+        IQR = ~ IQR(., na.rm = TRUE),
+        MAD = ~ mad(., na.rm = TRUE),
+        median = ~ median(., na.rm = TRUE),
+        pseudomedian = ~ DescTools::HodgesLehmann(., na.rm = TRUE),
+        mode = ~ petersenlab::Mode(., multipleModes = "mean"),
+        skewness = ~ psych::skew(., na.rm = TRUE),
+        kurtosis = ~ psych::kurtosi(., na.rm = TRUE)),
+      .names = "{.col}.{.fn}")) %>%
+    pivot_longer(
+      cols = everything(),
+      names_to = c("variable","index"),
+      names_sep = "\\.") %>% 
+    pivot_wider(
+      names_from = index,
+      values_from = value)
 ```
 
 ## Scores and Scales {#sec-scoresAndScales}

diff --git a/bugs.qmd b/bugs.qmd
@@ -6,5 +6,5 @@ Bugs to report (`nflreadr` or `nflfastR`):
 - `player_stats` database of weekly stats does not have a `game_id` column (<https://github.com/nflverse/nflreadr/issues/238>)
 - common naming convention for key variables, for purposes of merging (<https://github.com/nflverse/nflreadr/issues/237>)
 - use of deprecated `summarise()` for returning more than 1 row per group; use `reframe()` instead (<https://github.com/nflverse/nflfastR/issues/480>)
-- calculate career stats using `calculate_stats()` function (as was formerly able to do with the `calculate_player_stats()` function)
+- calculate career stats using `calculate_stats()` function (as was formerly able to do with the `calculate_player_stats()` function) (<https://github.com/nflverse/nflfastR/issues/501>)
 - will `nflreadr` switch to using the objects from the `calculate_stats()` function in `nflfastR`? (<https://github.com/nflverse/nflreadr/issues/261>)