From eec4e779c2de5e6f9e1824c2379cac1e4d68e99d Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Fri, 30 May 2025 23:43:01 -0700
Subject: [PATCH 1/3] Migrate plyr->dplyr

---
 DESCRIPTION                   |  2 +-
 NAMESPACE                     |  3 ++-
 R/Custom.user.queries.R       | 16 ++++++++--------
 R/Element.queries.R           | 24 ++++++++++++------------
 R/MainBar.R                   | 28 +++++++++++++---------------
 R/Specific.intersections.R    | 22 +++++++++++-----------
 R/upset.R                     |  2 +-
 R/util.R                      | 20 ++++++++++++++++++++
 man/upset.Rd                  |  2 +-
 vignettes/attribute.plots.Rmd |  4 ++--
 10 files changed, 71 insertions(+), 52 deletions(-)
 create mode 100644 R/util.R

diff --git a/DESCRIPTION b/DESCRIPTION
index b58a53a..7eef9b9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -15,7 +15,7 @@ Depends:
 Imports:
     ggplot2,
     gridExtra,
-    plyr,
+    dplyr (>= 1.1.0),
     utils,
     stats,
     methods,
diff --git a/NAMESPACE b/NAMESPACE
index 0a3bcf2..d86e379 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,6 +16,8 @@ import(methods)
 import(scales)
 import(stats)
 import(utils)
+importFrom(dplyr,n)
+importFrom(dplyr,summarize)
 importFrom(grid,gpar)
 importFrom(grid,grid.draw)
 importFrom(grid,grid.layout)
@@ -24,4 +26,3 @@ importFrom(grid,legendGrob)
 importFrom(grid,popViewport)
 importFrom(grid,pushViewport)
 importFrom(grid,viewport)
-importFrom(plyr,count)
diff --git a/R/Custom.user.queries.R b/R/Custom.user.queries.R
index 3cff55b..e14f8da 100644
--- a/R/Custom.user.queries.R
+++ b/R/Custom.user.queries.R
@@ -22,25 +22,25 @@ customQueries <- function(data, custom, names){
 customQueriesBar <- function(cust_data, sets,bar_data,custom){
   setup <- list()
   final_data <- list()
-  num <- (length(sets) + 1)
   if(length(cust_data) == 0){
     return(NULL)
   }
-  for(i in 1:length(cust_data)){
-    cust_data[[i]] <- count(cust_data[[i]][sets])
+  num <- (length(sets) + 1)
+  for(i in seq_along(cust_data)){
+    cust_data[[i]] <- plyr_count(cust_data[[i]][sets])
     colnames(cust_data[[i]])[num] <- "freq2"
-    cust_data[[i]] <- cust_data[[i]][!(rowSums(cust_data[[i]][ ,1:length(sets)]) == 0), ]
+    cust_data[[i]] <- cust_data[[i]][!(rowSums(cust_data[[i]][ ,seq_along(sets)]) == 0), ]
     setup[[i]] <- merge(cust_data[[i]], bar_data, by = sets)
     color2 <- rep(custom[[i]]$color, times = nrow(setup[[i]]))
-    if(isTRUE(custom[[i]]$active) == T){
-      act <- rep(T, nrow(setup[[i]]))
+    if(isTRUE(custom[[i]]$active)){
+      act <- rep(TRUE, nrow(setup[[i]]))
     }
     else{
-      act <- rep(F, nrow(setup[[i]]))
+      act <- rep(FALSE, nrow(setup[[i]]))
     }
     setup[[i]] <- cbind(setup[[i]], color2, act)
   }
-  for(i in 1:length(setup)){
+  for(i in seq_along(setup)){
     final_data <- rbind(final_data, setup[[i]])
   }
   return(final_data)
diff --git a/R/Element.queries.R b/R/Element.queries.R
index 46222ac..c61acf7 100644
--- a/R/Element.queries.R
+++ b/R/Element.queries.R
@@ -65,20 +65,20 @@ QuerieElemAtt <- function(q, data, start_col, exp, names, att_x, att_y, palette)
 
 
 ElemBarDat <- function(q, data1, first_col, exp, names, palette, mbdata){
-  data1 <- data.frame(data1, check.names = F)
-  bar <- count(data1)
-  bar$x <- 1:nrow(bar)
-  rows <- data.frame()
-  act <- c()
   if(length(q) == 0){
     return(NULL)
   }
-  for(i in 1:length(q)){
+  data1 <- data.frame(data1, check.names = FALSE)
+  bar <- plyr_count(data1)
+  bar$x <- seq_len(nrow(bar))
+  rows <- data.frame()
+  act <- NULL
+  for(i in seq_along(q)){
     index_q <- unlist(q[[i]]$params)
     test <- as.character(index_q[1])
     check <- match(test, names)
     elem_color <- q[[i]]$color
-    if(is.na(check) != T){
+    if(is.na(check) != TRUE){
       elem_data <- NULL
     }
     else{
@@ -86,18 +86,18 @@ ElemBarDat <- function(q, data1, first_col, exp, names, palette, mbdata){
       if(!is.null(exp)){
       elem_data <- Subset_att(elem_data, exp)
       }
-      elem_data <- as.data.frame(count(elem_data[names]))
+      elem_data <- plyr_count(elem_data[names])
       names(elem_data) <- c(names, "freq")
       elem_data <- elem_data[which(rowSums(elem_data[names]) != 0), ]
       x <- merge(mbdata, elem_data[names], by = names)
       elem_data <- merge(x[names], elem_data, by = names)
       x <- x$x
       elem_data$x <- x
-      if((isTRUE(q[[i]]$active) == T) && (is.null(elem_data) == F)){
-        act <- T
+      if((isTRUE(q[[i]]$active) == TRUE) && (is.null(elem_data) == FALSE)){
+        act <- TRUE
       }
-      else if((isTRUE(q[[i]]$active) == F || is.null(q[[i]]$active) == T) && (is.null(elem_data) == F)){
-        act <- F
+      else if((isTRUE(q[[i]]$active) == FALSE || is.null(q[[i]]$active) == TRUE) && (is.null(elem_data) == FALSE)){
+        act <- FALSE
       }
       elem_data$color <- elem_color
       elem_data$act <- act
diff --git a/R/MainBar.R b/R/MainBar.R
index e9cf791..6c0c97e 100644
--- a/R/MainBar.R
+++ b/R/MainBar.R
@@ -1,5 +1,3 @@
-#' @importFrom plyr count
-
 ## Counts the frequency of each intersection being looked at and sets up data for main bar plot.
 ## Also orders the data for the bar plot and matrix plot
 Counter <- function(data, num_sets, start_col, name_of_sets, nintersections, mbar_color, order_mat,
@@ -8,33 +6,33 @@ Counter <- function(data, num_sets, start_col, name_of_sets, nintersections, mba
   Freqs <- data.frame()
   end_col <- as.numeric(((start_col + num_sets) -1))
   #gets indices of columns containing sets used
-  for( i in 1:num_sets){
+  for( i in seq_len(num_sets)){
     temp_data[i] <- match(name_of_sets[i], colnames(data))
   }
-  Freqs <- data.frame(count(data[ ,as.integer(temp_data)]))
-  colnames(Freqs)[1:num_sets] <- name_of_sets
+  Freqs <- plyr_count(data[ ,as.integer(temp_data)])
+  colnames(Freqs)[seq_len(num_sets)] <- name_of_sets
   #Adds on empty intersections if option is selected
-  if(is.null(empty_intersects) == F){
+  if(is.null(empty_intersects) == FALSE){
     empty <- rep(list(c(0,1)), times = num_sets)
     empty <- data.frame(expand.grid(empty))
     colnames(empty) <- name_of_sets
     empty$freq <- 0
     all <- rbind(Freqs, empty)
-    Freqs <- data.frame(all[!duplicated(all[1:num_sets]), ], check.names = F)
+    Freqs <- data.frame(all[!duplicated(all[seq_len(num_sets)]), ], check.names = FALSE)
   }
   #Remove universal empty set
-  Freqs <- Freqs[!(rowSums(Freqs[ ,1:num_sets]) == 0), ]
+  Freqs <- Freqs[!(rowSums(Freqs[ ,seq_len(num_sets)]) == 0), ]
   #Aggregation by degree
   if(tolower(aggregate) == "degree"){
-    for(i in 1:nrow(Freqs)){
-      Freqs$degree[i] <- rowSums(Freqs[ i ,1:num_sets])
+    for(i in seq_len(nrow(Freqs))){
+      Freqs$degree[i] <- rowSums(Freqs[ i ,seq_len(num_sets)])
     }
-    order_cols <- c()
-    for(i in 1:length(order_mat)){
+    order_cols <- integer(length(order_mat))
+    for(i in seq_along(order_mat)){
       order_cols[i] <- match(order_mat[i], colnames(Freqs))
     }
     # if(length(order_cols)==2 && order_cols[1]>order_cols[2]){decrease <- rev(decrease)}
-    for(i in 1:length(order_cols)){
+    for(i in seq_along(order_cols)){
       logic <- decrease[i]
       Freqs <- Freqs[order(Freqs[ , order_cols[i]], decreasing = logic), ]
     }
@@ -47,14 +45,14 @@ Counter <- function(data, num_sets, start_col, name_of_sets, nintersections, mba
   #delete rows used to order data correctly. Not needed to set up bars.
   delete_row <- (num_sets + 2)
   Freqs <- Freqs[ , -delete_row]
-  for( i in 1:nrow(Freqs)){
+  for( i in seq_len(nrow(Freqs))){
     Freqs$x[i] <- i
     Freqs$color <- mbar_color
   }
   if(is.na(nintersections)){
     nintersections = nrow(Freqs)
   }
-  Freqs <- Freqs[1:nintersections, ]
+  Freqs <- Freqs[seq_len(nintersections), ]
   Freqs <- na.omit(Freqs)
   return(Freqs)
 }
diff --git a/R/Specific.intersections.R b/R/Specific.intersections.R
index f1acb15..a64b40e 100644
--- a/R/Specific.intersections.R
+++ b/R/Specific.intersections.R
@@ -7,14 +7,14 @@ specific_intersections <- function(data, first.col, last.col, intersections, ord
   if(length(remove) != 0){
     data <- data[-remove]
   }
-  data <- count(data[keep])
-  sets <- names(data[1:length(keep)])
+  data <- plyr_count(data[keep])
+  sets <- names(data[seq_along(keep)])
   data <- lapply(intersections, function(x){
     temp_sets <- unlist(x)
-    x <- data[which(rowSums(data[1:length(keep)]) == length(temp_sets)), ]
+    x <- data[which(rowSums(data[seq_along(keep)]) == length(temp_sets)), ]
     x <- x[which(rowSums(x[temp_sets]) == length(temp_sets)), ]
     if(nrow(x) == 0){
-      names <- names(x[1:length(keep)])
+      names <- names(x[seq_along(keep)])
       x <- rbind(x, rep(0, ncol(x)))
       colnames(x) <- c(names, "freq")
       x[ ,which(names %in% temp_sets)] <- 1
@@ -24,7 +24,7 @@ specific_intersections <- function(data, first.col, last.col, intersections, ord
   
   Freqs <- data.frame()
   
-  for(i in seq(length(data))){
+  for(i in seq_along(data)){
     Freqs <- rbind(Freqs, data[[i]])
   }
   
@@ -33,15 +33,15 @@ specific_intersections <- function(data, first.col, last.col, intersections, ord
   num_sets <- length(keep)
   
   if(tolower(aggregate) == "degree"){
-    for(i in 1:nrow(Freqs)){
-      Freqs$degree[i] <- rowSums(Freqs[ i ,1:num_sets])
+    for(i in seq_len(nrow(Freqs))){
+      Freqs$degree[i] <- rowSums(Freqs[ i ,seq_len(num_sets)])
     }
-    order_cols <- c()
-    for(i in 1:length(order_mat)){
+    order_cols <- integer(length(order_mat))
+    for(i in seq_along(order_mat)){
       order_cols[i] <- match(order_mat[i], colnames(Freqs))
     }
     
-    for(i in 1:length(order_cols)){
+    for(i in seq_along(order_cols)){
       logic <- decrease[i]
       Freqs <- Freqs[order(Freqs[ , order_cols[i]], decreasing = logic), ]
     }
@@ -54,7 +54,7 @@ specific_intersections <- function(data, first.col, last.col, intersections, ord
   #delete rows used to order data correctly. Not needed to set up bars.
   delete_row <- (num_sets + 2)
   Freqs <- Freqs[ , -delete_row]
-  for( i in 1:nrow(Freqs)){
+  for( i in seq_len(nrow(Freqs))){
     Freqs$x[i] <- i
     Freqs$color <- mbar_color
   }
diff --git a/R/upset.R b/R/upset.R
index 20c09c8..96785f1 100644
--- a/R/upset.R
+++ b/R/upset.R
@@ -71,7 +71,7 @@
 #' @seealso UpSetR github for additional examples: \url{http://github.com/hms-dbmi/UpSetR}
 #' @examples movies <- read.csv( system.file("extdata", "movies.csv", package = "UpSetR"), header=TRUE, sep=";" )
 #'
-#'require(ggplot2); require(plyr); require(gridExtra); require(grid);
+#'require(ggplot2); require(gridExtra); require(grid);
 #'
 #' between <- function(row, min, max){
 #'   newData <- (row["ReleaseDate"] < max) & (row["ReleaseDate"] > min)
diff --git a/R/util.R b/R/util.R
new file mode 100644
index 0000000..a31eeec
--- /dev/null
+++ b/R/util.R
@@ -0,0 +1,20 @@
+# matching plyr::count behavior. this is non-trivial to do with
+#   pure ; see PR description for full details. for three main reasons:
+#   1. table(<data.frame>) may fail if there are very few
+#      duplicates and each column is of high cardinality, meaning
+#      table(x) would have a very large number of 0 entries that
+#      need to be computed and dropped (plyr::count skips them).
+#   2. We can use something like interaction(..., drop=TRUE) + 
+#      tapply() to imitate this, but it's hard to generically
+#      reconstruct the un-interacted levels needed to build an
+#      equivalent data.frame -- basically, we'd need to, for full
+#      generality, use a sep=<str> where <str> is not present in
+#      any of the unique values of any of the columns of x in order
+#      for strsplit(<level>, <sep>) to uniquely map back.
+#   3. Something like vapply(split(x, x), nrow, integer(1L)) is also
+#      appealingly simple, _but_ split() always drops missing levels
+#      (https://bugs.r-project.org/show_bug.cgi?id=18899) --> we'd
+#      need an onerous/ugly loop over the columns to replace missing
+#      observations with a unique NA-equivalent, end-sorting sentinel.
+#' @importFrom dplyr n summarize
+plyr_count <- function(x) summarize(x, .by = names(x), freq = n())
diff --git a/man/upset.Rd b/man/upset.Rd
index 7947d64..3a3a2af 100644
--- a/man/upset.Rd
+++ b/man/upset.Rd
@@ -167,7 +167,7 @@ Data set must be formatted as described on the original UpSet github page: \url{
 \examples{
 movies <- read.csv( system.file("extdata", "movies.csv", package = "UpSetR"), header=TRUE, sep=";" )
 
-require(ggplot2); require(plyr); require(gridExtra); require(grid);
+require(ggplot2); require(gridExtra); require(grid);
 
 between <- function(row, min, max){
   newData <- (row["ReleaseDate"] < max) & (row["ReleaseDate"] > min)
diff --git a/vignettes/attribute.plots.Rmd b/vignettes/attribute.plots.Rmd
index c23f82e..2c84a41 100644
--- a/vignettes/attribute.plots.Rmd
+++ b/vignettes/attribute.plots.Rmd
@@ -11,7 +11,7 @@ vignette: >
 
 For all examples the movies data set contained in the package will be used.
 ```{r, tidy=TRUE}
-library(UpSetR); library(ggplot2); library(grid); library(plyr)
+library(UpSetR); library(ggplot2); library(grid)
 movies <- read.csv( system.file("extdata", "movies.csv", package = "UpSetR"), header=T, sep=";" )
 ```
 
@@ -68,7 +68,7 @@ myplot <- function(mydata,x,y){
 }
 
 another.plot <- function(data, x, y){
-  data$decades <- round_any(as.integer(unlist(data[y])), 10, ceiling)
+  data$decades <- 10 * ceiling(as.integer(unlist(data[y])) / 10)
   data <- data[which(data$decades >= 1970), ]
   myplot <- (ggplot(data, aes_string(x=x)) + 
                geom_density(aes(fill=factor(decades)), alpha = 0.4)

From fa80b8e80d4f43732516ad0cfd2faa329a954fc9 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Fri, 30 May 2025 23:43:56 -0700
Subject: [PATCH 2/3] move very long comment to GitHub

---
 R/util.R | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/R/util.R b/R/util.R
index a31eeec..f47f75a 100644
--- a/R/util.R
+++ b/R/util.R
@@ -1,20 +1,4 @@
 # matching plyr::count behavior. this is non-trivial to do with
-#   pure ; see PR description for full details. for three main reasons:
-#   1. table(<data.frame>) may fail if there are very few
-#      duplicates and each column is of high cardinality, meaning
-#      table(x) would have a very large number of 0 entries that
-#      need to be computed and dropped (plyr::count skips them).
-#   2. We can use something like interaction(..., drop=TRUE) + 
-#      tapply() to imitate this, but it's hard to generically
-#      reconstruct the un-interacted levels needed to build an
-#      equivalent data.frame -- basically, we'd need to, for full
-#      generality, use a sep=<str> where <str> is not present in
-#      any of the unique values of any of the columns of x in order
-#      for strsplit(<level>, <sep>) to uniquely map back.
-#   3. Something like vapply(split(x, x), nrow, integer(1L)) is also
-#      appealingly simple, _but_ split() always drops missing levels
-#      (https://bugs.r-project.org/show_bug.cgi?id=18899) --> we'd
-#      need an onerous/ugly loop over the columns to replace missing
-#      observations with a unique NA-equivalent, end-sorting sentinel.
+#   pure ; see PR description for full details.
 #' @importFrom dplyr n summarize
 plyr_count <- function(x) summarize(x, .by = names(x), freq = n())

From 51ce1b52722c131f50c74eaaef86c237acf1fdd5 Mon Sep 17 00:00:00 2001
From: Michael Chirico <michaelchirico4@gmail.com>
Date: Sat, 31 May 2025 11:36:18 -0700
Subject: [PATCH 3/3] also respect row ordering

---
 NAMESPACE | 3 +++
 R/util.R  | 9 ++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index d86e379..214f7ed 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -16,7 +16,10 @@ import(methods)
 import(scales)
 import(stats)
 import(utils)
+importFrom(dplyr,all_of)
+importFrom(dplyr,arrange)
 importFrom(dplyr,n)
+importFrom(dplyr,pick)
 importFrom(dplyr,summarize)
 importFrom(grid,gpar)
 importFrom(grid,grid.draw)
diff --git a/R/util.R b/R/util.R
index f47f75a..98ee7bd 100644
--- a/R/util.R
+++ b/R/util.R
@@ -1,4 +1,7 @@
 # matching plyr::count behavior. this is non-trivial to do with
-#   pure ; see PR description for full details.
-#' @importFrom dplyr n summarize
-plyr_count <- function(x) summarize(x, .by = names(x), freq = n())
+#   pure base R; see PR description for full details.
+#' @importFrom dplyr all_of arrange n pick summarize
+plyr_count <- function(x) {
+  cols <- names(x)
+  arrange(summarize(x, .by = all_of(cols), freq = n()), pick(cols))
+}