Add unit tests

mayer79 · mayer79 · commit f50a25a6a5e1 · 2024-08-05T19:43:56.000+02:00
diff --git a/README.md b/README.md
@@ -30,9 +30,9 @@ A situation where the two approaches give different results: The model has inter
 ### Typical workflow to explain any model
 
 1. **Sample rows to explain:** Sample 500 to 2000 rows `X` to be explained. If the training dataset is small, simply use the full training data for this purpose. `X` should only contain feature columns.
-2. **Select background data:** Both algorithms require a representative background dataset `bg_X` to calculate marginal means. For this purpose, set aside 50 to 500 rows from the training data.
+2. **Select background data (optional):** Both algorithms require a representative background dataset `bg_X` to calculate marginal means. For this purpose, set aside 50 to 500 rows from the training data. If not specified, maximum `bg_n = 200` rows are randomly sampled from `X`.
 If the training data is small, use the full training data. In cases with a natural "off" value (like MNIST digits), this can also be a single row with all values set to the off value.
-3. **Crunch:** Use `kernelshap(object, X, bg_X, ...)` or `permshap(object, X, bg_X, ...)` to calculate SHAP values. Runtime is proportional to `nrow(X)`, while memory consumption scales linearly in `nrow(bg_X)`.
+3. **Crunch:** Use `kernelshap(object, X, bg_X = NULL, ...)` or `permshap(object, X, bg_X = NULL, ...)` to calculate SHAP values. Runtime is proportional to `nrow(X)`, while memory consumption scales linearly in `nrow(bg_X)`.
 4. **Analyze:** Use {shapviz} to visualize the results.
 
 **Remarks**
diff --git a/tests/testthat/test-kernelshap.R b/tests/testthat/test-kernelshap.R
@@ -14,6 +14,25 @@ test_that("SHAP + baseline = prediction for exact mode", {
   expect_equal(rowSums(s$S) + s$baseline, preds[c(1L, 51L, 101L)])
 })
 
+test_that("background data is automatically selected", {
+  # Here, the background data equals the full X
+  s2 <- kernelshap(fit, iris[, x], verbose = FALSE)
+  expect_equal(s$S, s2$S[c(1L, 51L, 101L), ])
+})
+
+test_that("missing bg_X gives error if X is very small", {
+  expect_error(kernelshap(fit, iris[1:10, x], verbose = FALSE))
+})
+
+test_that("missing bg_X gives error if X is very small", {
+  expect_warning(kernelshap(fit, iris[1:30, x], verbose = FALSE))
+})
+
+test_that("selection of bg_X can be controlled via bg_n", {
+  s2 <- kernelshap(fit, iris[1:30, x], verbose = FALSE, bg_n = 20L)
+  expect_equal(nrow(s2$bg_X), 20L)
+})
+
 test_that("Exact hybrid calculation is similar to exact (non-hybrid)", {
   s1 <- kernelshap(
     fit, 
@@ -178,6 +197,14 @@ test_that("SHAP + baseline = prediction works with case weights", {
   expect_equal(rowSums(s$S) + s$baseline, preds[1:5])
 })
 
+test_that("selection of bg_X and bg_w can be controlled via bg_n", {
+  s2 <- kernelshap(
+    fit, iris[1:30, x], verbose = FALSE, bg_w = iris$Petal.Length[1:30], bg_n = 20L
+  )
+  expect_equal(nrow(s2$bg_X), 20L)
+  expect_equal(length(s2$bg_w), 20L)
+})
+
 test_that("Decomposing a single row works with case weights", {
   s <- kernelshap(
     fit, iris[1L, x], bg_X = iris, bg_w = iris$Petal.Length, verbose = FALSE
diff --git a/tests/testthat/test-permshap.R b/tests/testthat/test-permshap.R
@@ -15,6 +15,25 @@ test_that("SHAP + baseline = prediction", {
   expect_equal(rowSums(s$S) + s$baseline, preds[c(1L, 51L, 101L)])
 })
 
+test_that("background data is automatically selected", {
+  # Here, the background data equals the full X
+  s2 <- permshap(fit, iris[, x], verbose = FALSE)
+  expect_equal(s$S, s2$S[c(1L, 51L, 101L), ])
+})
+
+test_that("missing bg_X gives error if X is very small", {
+  expect_error(permshap(fit, iris[1:10, x], verbose = FALSE))
+})
+
+test_that("missing bg_X gives error if X is very small", {
+  expect_warning(permshap(fit, iris[1:30, x], verbose = FALSE))
+})
+
+test_that("selection of bg_X can be controlled via bg_n", {
+  s2 <- permshap(fit, iris[1:30, x], verbose = FALSE, bg_n = 20L)
+  expect_equal(nrow(s2$bg_X), 20L)
+})
+
 test_that("verbose is chatty", {
   capture_output(
     expect_message(
@@ -130,6 +149,14 @@ test_that("SHAP + baseline = prediction works with case weights", {
   expect_equal(rowSums(s$S) + s$baseline, preds[1:5])
 })
 
+test_that("selection of bg_X and bg_w can be controlled via bg_n", {
+  s2 <- permshap(
+    fit, iris[1:30, x], verbose = FALSE, bg_w = iris$Petal.Length[1:30], bg_n = 20L
+  )
+  expect_equal(nrow(s2$bg_X), 20L)
+  expect_equal(length(s2$bg_w), 20L)
+})
+
 test_that("Decomposing a single row works with case weights", {
   s <- permshap(
     fit, iris[1L, x], bg_X = iris, bg_w = iris$Petal.Length, verbose = FALSE