prepare new cran version

pfistfl · pfistfl · commit 447946091cfa · 2021-07-21T12:21:47.000+02:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -39,7 +39,6 @@ Depends:
 Imports:
     backports,
     checkmate (>= 2.0.0),
-    lifecycle,
     data.table (>= 1.13.6),
     mlr3 (>= 0.10),
     mlr3misc (>= 0.8.0),
diff --git a/NEWS.md b/NEWS.md
@@ -4,6 +4,8 @@
 
 * Fixed a bug for additive weight updates, were updates went
   in the wrong direction.
+* Added new parameter `eval_fulldata` that allows to compute
+  auditor effect across the full sample (as opposed to the bucket).
 
 # mcboost 0.3.0
 
diff --git a/R/MCBoost.R b/R/MCBoost.R
@@ -57,9 +57,15 @@ MCBoost = R6::R6Class("MCBoost",
     #'   Currently only supports "simple", even split along probabilities.
     #'   Only relevant for `num_buckets` > 1.
     bucket_strategy = NULL,
+
     #' @field rebucket [`logical`] \cr
     #'   Should buckets be re-calculated at each iteration?
     rebucket = NULL,
+
+    #' @field eval_fulldata [`logical`] \cr
+    #'   Should auditor be evaluated on the full data?
+    eval_fulldata = NULL,
+
     #' @field partition [`logical`] \cr
     #'   True/False flag for whether to split up predictions by their "partition"
     #'   (e.g., predictions less than 0.5 and predictions greater than 0.5).
@@ -118,6 +124,12 @@ MCBoost = R6::R6Class("MCBoost",
     #'   Only taken into account for `num_buckets` > 1.
     #' @param rebucket [`logical`] \cr
     #'   Should buckets be re-done at each iteration? Default `FALSE`.
+    #' @param eval_fulldata [`logical`] \cr
+    #'   Should the auditor be evaluated on the full data or on the respective bucket for determining
+    #'   the stopping criterion? Default `FALSE`, auditor is only evaluated on the bucket.
+    #'   This setting keeps the implementation closer to the Algorithm proposed in the corresponding
+    #'   multi-accuracy paper (Kim et al., 2019) where auditor effects are computed across the full
+    #'   sample (i.e. eval_fulldata = TRUE).
     #' @param multiplicative [`logical`] \cr
     #'   Specifies the strategy for updating the weights (multiplicative weight vs additive).
     #'   Defaults to `TRUE` (multi-accuracy boosting). Set to `FALSE` for multi-calibration.
@@ -141,6 +153,7 @@ MCBoost = R6::R6Class("MCBoost",
     #'   "split" splits the data into `max_iter` parts and validates on each sample in each iteration.\cr
     #'   "bootstrap" uses a new bootstrap sample in each iteration.\cr
     #'   "none" uses the same dataset in each iteration.
+
     initialize = function(
                  max_iter=5,
                  alpha=1e-4,
@@ -149,6 +162,7 @@ MCBoost = R6::R6Class("MCBoost",
                  num_buckets=2,
                  bucket_strategy="simple",
                  rebucket=FALSE,
+                 eval_fulldata=FALSE,
                  multiplicative=TRUE,
                  auditor_fitter=NULL,
                  subpops=NULL,
@@ -162,6 +176,7 @@ MCBoost = R6::R6Class("MCBoost",
       self$num_buckets = assert_int(num_buckets)
       self$bucket_strategy = assert_choice(bucket_strategy, choices = c("simple"))
       self$rebucket = assert_flag(rebucket)
+      self$eval_fulldata = assert_flag(eval_fulldata)
       self$partition = assert_flag(partition)
       self$multiplicative = assert_flag(multiplicative)
       self$iter_sampling = assert_choice(iter_sampling, choices = c("none", "bootstrap", "split"))
@@ -283,6 +298,13 @@ MCBoost = R6::R6Class("MCBoost",
           models[[j]] = out[[2]]
         }
 
+        if (self$eval_fulldata) {
+          corrs = map_dbl(models, function(m) {
+            if (is.null(m)) return(0)
+            mean(m$predict(data[idx,]) * resid[idx])
+          })
+        }
+
         self$iter_corr = c(self$iter_corr, list(corrs))
         if (abs(max(corrs)) < self$alpha) {
           break
diff --git a/man/MCBoost.Rd b/man/MCBoost.Rd
diff --git a/tests/testthat/test_mcboost.R b/tests/testthat/test_mcboost.R
@@ -165,7 +165,9 @@ test_that("MCBoost various settings", {
   # Check a list of settings
   mcs = list(
      MCBoost$new(auditor_fitter = NULL),
-     MCBoost$new(alpha = 0.05)
+     MCBoost$new(alpha = 0.05),
+     MCBoost$new(eval_fulldata = TRUE),
+     MCBoost$new(eval_fulldata = TRUE, multiplicative = FALSE)
   )
   for (mc in mcs) {
     mc$multicalibrate(data, labels)
@@ -335,5 +337,3 @@ test_that("mcboost on training data sanity checks", {
   df = do.call("rbind", mc$iter_corr)
   expect_true(all(diff(df) <= 0))
 })
-
-