include_reference = TRUE erroneously works with datawizard::contr.deviation() (#966)

strengejacke · web-flow · commit 8e362ee3272c · 2024-10-15T08:37:05.000+02:00
* `include_reference = TRUE` erroneously works with `datawizard::contr.deviation()` Fixes #962 * add tests * update news
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: parameters
 Title: Processing of Model Parameters
-Version: 0.22.2.19
+Version: 0.22.2.20
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",
diff --git a/NEWS.md b/NEWS.md
@@ -37,6 +37,10 @@
 * `print()` for `compare_parameters()` now also puts factor levels into square
   brackets, like the `print()` method for `model_parameters()`.
 
+* `include_reference` now only adds the reference category of factors to the
+  parameters table when those factors have appropriate contrasts (treatment or
+  SAS contrasts).
+
 ## Bug fixes
 
 * Arguments like `digits` etc. were ignored in `model_parameters() for objects
diff --git a/R/utils.R b/R/utils.R
@@ -177,6 +177,29 @@
 }
 
 
+# This functions finds contrasts for those factors in a model, where including
+# a reference level makes sense. This is the case when there are contrasts
+# that are all zeros, which means that the reference level is not included in
+# the model matrix.
+.remove_reference_contrasts <- function(model) {
+  cons <- .safe(model$contrasts)
+  if (is.null(cons)) {
+    return(NULL)
+  }
+  out <- vapply(cons, function(mat) {
+    if (is.matrix(mat) && nrow(mat) > 0) {
+      any(rowSums(mat) == 0)
+    } else if (is.character(mat)) {
+      mat %in% c("contr.treatment", "contr.SAS")
+    } else {
+      FALSE
+    }
+  }, logical(1))
+  # only return those factors that need to be removed
+  names(out)[!out]
+}
+
+
 # Almost identical to dynGet(). The difference is that we deparse the expression
 # because get0() allows symbol only since R 4.1.0
 .dynGet <- function(x,
diff --git a/R/utils_format.R b/R/utils_format.R
@@ -389,6 +389,14 @@
       return(params)
     }
   }
+  # next, check contrasts of factors. including the reference level makes
+  # only sense if there are contrasts that are all zeros, which means that
+  # the reference level is not included in the model matrix
+  remove_contrasts <- .remove_reference_contrasts(model)
+  # keep only factors with valid contrasts
+  if (!is.null(remove_contrasts) && length(remove_contrasts)) {
+    factors <- factors[setdiff(names(factors), remove_contrasts)]
+  }
 
   # we need some more information about prettified labels etc.
   pretty_names <- attributes(params)$pretty_names
diff --git a/tests/testthat/_snaps/include_reference.md b/tests/testthat/_snaps/include_reference.md
@@ -64,3 +64,94 @@
       | Observations | 32                   | 32                   |
       +--------------+----------------------+----------------------+ 
 
+# include_reference, different contrasts
+
+    Code
+      print(out)
+    Output
+      Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
+      -------------------------------------------------------------------
+      (Intercept) |       19.70 | 1.18 | [ 17.28, 22.11] | 16.71 | < .001
+      cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
+      cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
+      gear [3]    |        0.00 |      |                 |       |       
+      gear [4]    |        1.32 | 1.93 | [ -2.63,  5.28] |  0.69 | 0.498 
+      gear [5]    |        1.50 | 1.85 | [ -2.31,  5.31] |  0.81 | 0.426 
+    Message
+      
+      Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+        using a Wald t-distribution approximation.
+
+---
+
+    Code
+      print(out)
+    Output
+      Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
+      -------------------------------------------------------------------
+      (Intercept) |       25.43 | 1.88 | [ 21.57, 29.29] | 13.52 | < .001
+      cyl [4]     |        0.00 |      |                 |       |       
+      cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
+      cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
+      gear [3]    |        0.00 |      |                 |       |       
+      gear [4]    |        1.32 | 1.93 | [ -2.63,  5.28] |  0.69 | 0.498 
+      gear [5]    |        1.50 | 1.85 | [ -2.31,  5.31] |  0.81 | 0.426 
+    Message
+      
+      Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+        using a Wald t-distribution approximation.
+
+---
+
+    Code
+      print(out)
+    Output
+      Parameter   | Coefficient |   SE |          95% CI | t(27) |      p
+      -------------------------------------------------------------------
+      (Intercept) |       20.64 | 0.67 | [ 19.26, 22.01] | 30.76 | < .001
+      cyl [6]     |       -6.66 | 1.63 | [-10.00, -3.31] | -4.09 | < .001
+      cyl [8]     |      -10.54 | 1.96 | [-14.56, -6.52] | -5.38 | < .001
+      gear [1]    |       -0.94 | 1.09 | [ -3.18,  1.30] | -0.86 | 0.396 
+      gear [2]    |        0.38 | 1.11 | [ -1.90,  2.67] |  0.34 | 0.734 
+    Message
+      
+      Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+        using a Wald t-distribution approximation.
+
+---
+
+    Code
+      print(out)
+    Output
+      Parameter   | Coefficient |   SE |         95% CI | t(27) |      p
+      ------------------------------------------------------------------
+      (Intercept) |       15.83 | 1.24 | [13.28, 18.37] | 12.75 | < .001
+      cyl [8]     |        0.00 |      |                |       |       
+      cyl [4]     |       10.54 | 1.96 | [ 6.52, 14.56] |  5.38 | < .001
+      cyl [6]     |        3.89 | 1.88 | [ 0.03,  7.75] |  2.07 | 0.049 
+      gear [1]    |       -0.94 | 1.09 | [-3.18,  1.30] | -0.86 | 0.396 
+      gear [2]    |        0.38 | 1.11 | [-1.90,  2.67] |  0.34 | 0.734 
+    Message
+      
+      Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+        using a Wald t-distribution approximation.
+
+---
+
+    Code
+      print(out)
+    Output
+      Parameter   | Coefficient |   SE |         95% CI | t(27) |      p
+      ------------------------------------------------------------------
+      (Intercept) |       14.89 | 0.92 | [13.00, 16.77] | 16.19 | < .001
+      cyl [8]     |        0.00 |      |                |       |       
+      cyl [4]     |       10.54 | 1.96 | [ 6.52, 14.56] |  5.38 | < .001
+      cyl [6]     |        3.89 | 1.88 | [ 0.03,  7.75] |  2.07 | 0.049 
+      gear [3]    |        0.00 |      |                |       |       
+      gear [4]    |        1.32 | 1.93 | [-2.63,  5.28] |  0.69 | 0.498 
+      gear [5]    |        1.50 | 1.85 | [-2.31,  5.31] |  0.81 | 0.426 
+    Message
+      
+      Uncertainty intervals (equal-tailed) and p-values (two-tailed) computed
+        using a Wald t-distribution approximation.
+
diff --git a/tests/testthat/test-include_reference.R b/tests/testthat/test-include_reference.R
@@ -56,3 +56,50 @@ test_that("include_reference, with pretty formatted cut", {
     )
   )
 })
+
+test_that("include_reference, different contrasts", {
+  data("mtcars")
+  mtcars$cyl <- factor(mtcars$cyl)
+  mtcars$gear <- factor(mtcars$gear)
+
+  m <- lm(mpg ~ cyl + gear, data = mtcars, contrasts = list(cyl = datawizard::contr.deviation))
+  out <- model_parameters(m, include_reference = TRUE)
+  expect_snapshot(print(out))
+
+  m <- lm(mpg ~ cyl + gear, data = mtcars)
+  out <- model_parameters(m, include_reference = TRUE)
+  expect_snapshot(print(out))
+
+  m <- lm(
+    mpg ~ cyl + gear,
+    data = mtcars,
+    contrasts = list(
+      cyl = datawizard::contr.deviation,
+      gear = contr.sum
+    )
+  )
+  out <- model_parameters(m, include_reference = TRUE)
+  expect_snapshot(print(out))
+
+  m <- lm(
+    mpg ~ cyl + gear,
+    data = mtcars,
+    contrasts = list(
+      cyl = contr.SAS,
+      gear = contr.sum
+    )
+  )
+  out <- model_parameters(m, include_reference = TRUE)
+  expect_snapshot(print(out))
+
+  m <- lm(
+    mpg ~ cyl + gear,
+    data = mtcars,
+    contrasts = list(
+      cyl = contr.SAS,
+      gear = contr.treatment
+    )
+  )
+  out <- model_parameters(m, include_reference = TRUE)
+  expect_snapshot(print(out))
+})

Original file line number	Diff line number	Diff line change
`@@ -389,6 +389,14 @@`
`389`	`389`	`return(params)`
`390`	`390`	`}`
`391`	`391`	`}`
	`392`	`+ # next, check contrasts of factors. including the reference level makes`
	`393`	`+ # only sense if there are contrasts that are all zeros, which means that`
	`394`	`+ # the reference level is not included in the model matrix`
	`395`	`+ remove_contrasts <- .remove_reference_contrasts(model)`
	`396`	`+ # keep only factors with valid contrasts`
	`397`	`+ if (!is.null(remove_contrasts) && length(remove_contrasts)) {`
	`398`	`+ factors <- factors[setdiff(names(factors), remove_contrasts)]`
	`399`	`+ }`
`392`	`400`
`393`	`401`	`# we need some more information about prettified labels etc.`
`394`	`402`	`pretty_names <- attributes(params)$pretty_names`