From eec4e779c2de5e6f9e1824c2379cac1e4d68e99d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 30 May 2025 23:43:01 -0700 Subject: [PATCH 1/3] Migrate plyr->dplyr --- DESCRIPTION | 2 +- NAMESPACE | 3 ++- R/Custom.user.queries.R | 16 ++++++++-------- R/Element.queries.R | 24 ++++++++++++------------ R/MainBar.R | 28 +++++++++++++--------------- R/Specific.intersections.R | 22 +++++++++++----------- R/upset.R | 2 +- R/util.R | 20 ++++++++++++++++++++ man/upset.Rd | 2 +- vignettes/attribute.plots.Rmd | 4 ++-- 10 files changed, 71 insertions(+), 52 deletions(-) create mode 100644 R/util.R diff --git a/DESCRIPTION b/DESCRIPTION index b58a53a..7eef9b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,7 @@ Depends: Imports: ggplot2, gridExtra, - plyr, + dplyr (>= 1.1.0), utils, stats, methods, diff --git a/NAMESPACE b/NAMESPACE index 0a3bcf2..d86e379 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,8 @@ import(methods) import(scales) import(stats) import(utils) +importFrom(dplyr,n) +importFrom(dplyr,summarize) importFrom(grid,gpar) importFrom(grid,grid.draw) importFrom(grid,grid.layout) @@ -24,4 +26,3 @@ importFrom(grid,legendGrob) importFrom(grid,popViewport) importFrom(grid,pushViewport) importFrom(grid,viewport) -importFrom(plyr,count) diff --git a/R/Custom.user.queries.R b/R/Custom.user.queries.R index 3cff55b..e14f8da 100644 --- a/R/Custom.user.queries.R +++ b/R/Custom.user.queries.R @@ -22,25 +22,25 @@ customQueries <- function(data, custom, names){ customQueriesBar <- function(cust_data, sets,bar_data,custom){ setup <- list() final_data <- list() - num <- (length(sets) + 1) if(length(cust_data) == 0){ return(NULL) } - for(i in 1:length(cust_data)){ - cust_data[[i]] <- count(cust_data[[i]][sets]) + num <- (length(sets) + 1) + for(i in seq_along(cust_data)){ + cust_data[[i]] <- plyr_count(cust_data[[i]][sets]) colnames(cust_data[[i]])[num] <- "freq2" - cust_data[[i]] <- cust_data[[i]][!(rowSums(cust_data[[i]][ ,1:length(sets)]) == 0), ] + cust_data[[i]] <- cust_data[[i]][!(rowSums(cust_data[[i]][ ,seq_along(sets)]) == 0), ] setup[[i]] <- merge(cust_data[[i]], bar_data, by = sets) color2 <- rep(custom[[i]]$color, times = nrow(setup[[i]])) - if(isTRUE(custom[[i]]$active) == T){ - act <- rep(T, nrow(setup[[i]])) + if(isTRUE(custom[[i]]$active)){ + act <- rep(TRUE, nrow(setup[[i]])) } else{ - act <- rep(F, nrow(setup[[i]])) + act <- rep(FALSE, nrow(setup[[i]])) } setup[[i]] <- cbind(setup[[i]], color2, act) } - for(i in 1:length(setup)){ + for(i in seq_along(setup)){ final_data <- rbind(final_data, setup[[i]]) } return(final_data) diff --git a/R/Element.queries.R b/R/Element.queries.R index 46222ac..c61acf7 100644 --- a/R/Element.queries.R +++ b/R/Element.queries.R @@ -65,20 +65,20 @@ QuerieElemAtt <- function(q, data, start_col, exp, names, att_x, att_y, palette) ElemBarDat <- function(q, data1, first_col, exp, names, palette, mbdata){ - data1 <- data.frame(data1, check.names = F) - bar <- count(data1) - bar$x <- 1:nrow(bar) - rows <- data.frame() - act <- c() if(length(q) == 0){ return(NULL) } - for(i in 1:length(q)){ + data1 <- data.frame(data1, check.names = FALSE) + bar <- plyr_count(data1) + bar$x <- seq_len(nrow(bar)) + rows <- data.frame() + act <- NULL + for(i in seq_along(q)){ index_q <- unlist(q[[i]]$params) test <- as.character(index_q[1]) check <- match(test, names) elem_color <- q[[i]]$color - if(is.na(check) != T){ + if(is.na(check) != TRUE){ elem_data <- NULL } else{ @@ -86,18 +86,18 @@ ElemBarDat <- function(q, data1, first_col, exp, names, palette, mbdata){ if(!is.null(exp)){ elem_data <- Subset_att(elem_data, exp) } - elem_data <- as.data.frame(count(elem_data[names])) + elem_data <- plyr_count(elem_data[names]) names(elem_data) <- c(names, "freq") elem_data <- elem_data[which(rowSums(elem_data[names]) != 0), ] x <- merge(mbdata, elem_data[names], by = names) elem_data <- merge(x[names], elem_data, by = names) x <- x$x elem_data$x <- x - if((isTRUE(q[[i]]$active) == T) && (is.null(elem_data) == F)){ - act <- T + if((isTRUE(q[[i]]$active) == TRUE) && (is.null(elem_data) == FALSE)){ + act <- TRUE } - else if((isTRUE(q[[i]]$active) == F || is.null(q[[i]]$active) == T) && (is.null(elem_data) == F)){ - act <- F + else if((isTRUE(q[[i]]$active) == FALSE || is.null(q[[i]]$active) == TRUE) && (is.null(elem_data) == FALSE)){ + act <- FALSE } elem_data$color <- elem_color elem_data$act <- act diff --git a/R/MainBar.R b/R/MainBar.R index e9cf791..6c0c97e 100644 --- a/R/MainBar.R +++ b/R/MainBar.R @@ -1,5 +1,3 @@ -#' @importFrom plyr count - ## Counts the frequency of each intersection being looked at and sets up data for main bar plot. ## Also orders the data for the bar plot and matrix plot Counter <- function(data, num_sets, start_col, name_of_sets, nintersections, mbar_color, order_mat, @@ -8,33 +6,33 @@ Counter <- function(data, num_sets, start_col, name_of_sets, nintersections, mba Freqs <- data.frame() end_col <- as.numeric(((start_col + num_sets) -1)) #gets indices of columns containing sets used - for( i in 1:num_sets){ + for( i in seq_len(num_sets)){ temp_data[i] <- match(name_of_sets[i], colnames(data)) } - Freqs <- data.frame(count(data[ ,as.integer(temp_data)])) - colnames(Freqs)[1:num_sets] <- name_of_sets + Freqs <- plyr_count(data[ ,as.integer(temp_data)]) + colnames(Freqs)[seq_len(num_sets)] <- name_of_sets #Adds on empty intersections if option is selected - if(is.null(empty_intersects) == F){ + if(is.null(empty_intersects) == FALSE){ empty <- rep(list(c(0,1)), times = num_sets) empty <- data.frame(expand.grid(empty)) colnames(empty) <- name_of_sets empty$freq <- 0 all <- rbind(Freqs, empty) - Freqs <- data.frame(all[!duplicated(all[1:num_sets]), ], check.names = F) + Freqs <- data.frame(all[!duplicated(all[seq_len(num_sets)]), ], check.names = FALSE) } #Remove universal empty set - Freqs <- Freqs[!(rowSums(Freqs[ ,1:num_sets]) == 0), ] + Freqs <- Freqs[!(rowSums(Freqs[ ,seq_len(num_sets)]) == 0), ] #Aggregation by degree if(tolower(aggregate) == "degree"){ - for(i in 1:nrow(Freqs)){ - Freqs$degree[i] <- rowSums(Freqs[ i ,1:num_sets]) + for(i in seq_len(nrow(Freqs))){ + Freqs$degree[i] <- rowSums(Freqs[ i ,seq_len(num_sets)]) } - order_cols <- c() - for(i in 1:length(order_mat)){ + order_cols <- integer(length(order_mat)) + for(i in seq_along(order_mat)){ order_cols[i] <- match(order_mat[i], colnames(Freqs)) } # if(length(order_cols)==2 && order_cols[1]>order_cols[2]){decrease <- rev(decrease)} - for(i in 1:length(order_cols)){ + for(i in seq_along(order_cols)){ logic <- decrease[i] Freqs <- Freqs[order(Freqs[ , order_cols[i]], decreasing = logic), ] } @@ -47,14 +45,14 @@ Counter <- function(data, num_sets, start_col, name_of_sets, nintersections, mba #delete rows used to order data correctly. Not needed to set up bars. delete_row <- (num_sets + 2) Freqs <- Freqs[ , -delete_row] - for( i in 1:nrow(Freqs)){ + for( i in seq_len(nrow(Freqs))){ Freqs$x[i] <- i Freqs$color <- mbar_color } if(is.na(nintersections)){ nintersections = nrow(Freqs) } - Freqs <- Freqs[1:nintersections, ] + Freqs <- Freqs[seq_len(nintersections), ] Freqs <- na.omit(Freqs) return(Freqs) } diff --git a/R/Specific.intersections.R b/R/Specific.intersections.R index f1acb15..a64b40e 100644 --- a/R/Specific.intersections.R +++ b/R/Specific.intersections.R @@ -7,14 +7,14 @@ specific_intersections <- function(data, first.col, last.col, intersections, ord if(length(remove) != 0){ data <- data[-remove] } - data <- count(data[keep]) - sets <- names(data[1:length(keep)]) + data <- plyr_count(data[keep]) + sets <- names(data[seq_along(keep)]) data <- lapply(intersections, function(x){ temp_sets <- unlist(x) - x <- data[which(rowSums(data[1:length(keep)]) == length(temp_sets)), ] + x <- data[which(rowSums(data[seq_along(keep)]) == length(temp_sets)), ] x <- x[which(rowSums(x[temp_sets]) == length(temp_sets)), ] if(nrow(x) == 0){ - names <- names(x[1:length(keep)]) + names <- names(x[seq_along(keep)]) x <- rbind(x, rep(0, ncol(x))) colnames(x) <- c(names, "freq") x[ ,which(names %in% temp_sets)] <- 1 @@ -24,7 +24,7 @@ specific_intersections <- function(data, first.col, last.col, intersections, ord Freqs <- data.frame() - for(i in seq(length(data))){ + for(i in seq_along(data)){ Freqs <- rbind(Freqs, data[[i]]) } @@ -33,15 +33,15 @@ specific_intersections <- function(data, first.col, last.col, intersections, ord num_sets <- length(keep) if(tolower(aggregate) == "degree"){ - for(i in 1:nrow(Freqs)){ - Freqs$degree[i] <- rowSums(Freqs[ i ,1:num_sets]) + for(i in seq_len(nrow(Freqs))){ + Freqs$degree[i] <- rowSums(Freqs[ i ,seq_len(num_sets)]) } - order_cols <- c() - for(i in 1:length(order_mat)){ + order_cols <- integer(length(order_mat)) + for(i in seq_along(order_mat)){ order_cols[i] <- match(order_mat[i], colnames(Freqs)) } - for(i in 1:length(order_cols)){ + for(i in seq_along(order_cols)){ logic <- decrease[i] Freqs <- Freqs[order(Freqs[ , order_cols[i]], decreasing = logic), ] } @@ -54,7 +54,7 @@ specific_intersections <- function(data, first.col, last.col, intersections, ord #delete rows used to order data correctly. Not needed to set up bars. delete_row <- (num_sets + 2) Freqs <- Freqs[ , -delete_row] - for( i in 1:nrow(Freqs)){ + for( i in seq_len(nrow(Freqs))){ Freqs$x[i] <- i Freqs$color <- mbar_color } diff --git a/R/upset.R b/R/upset.R index 20c09c8..96785f1 100644 --- a/R/upset.R +++ b/R/upset.R @@ -71,7 +71,7 @@ #' @seealso UpSetR github for additional examples: \url{http://github.com/hms-dbmi/UpSetR} #' @examples movies <- read.csv( system.file("extdata", "movies.csv", package = "UpSetR"), header=TRUE, sep=";" ) #' -#'require(ggplot2); require(plyr); require(gridExtra); require(grid); +#'require(ggplot2); require(gridExtra); require(grid); #' #' between <- function(row, min, max){ #' newData <- (row["ReleaseDate"] < max) & (row["ReleaseDate"] > min) diff --git a/R/util.R b/R/util.R new file mode 100644 index 0000000..a31eeec --- /dev/null +++ b/R/util.R @@ -0,0 +1,20 @@ +# matching plyr::count behavior. this is non-trivial to do with +# pure ; see PR description for full details. for three main reasons: +# 1. table() may fail if there are very few +# duplicates and each column is of high cardinality, meaning +# table(x) would have a very large number of 0 entries that +# need to be computed and dropped (plyr::count skips them). +# 2. We can use something like interaction(..., drop=TRUE) + +# tapply() to imitate this, but it's hard to generically +# reconstruct the un-interacted levels needed to build an +# equivalent data.frame -- basically, we'd need to, for full +# generality, use a sep= where is not present in +# any of the unique values of any of the columns of x in order +# for strsplit(, ) to uniquely map back. +# 3. Something like vapply(split(x, x), nrow, integer(1L)) is also +# appealingly simple, _but_ split() always drops missing levels +# (https://bugs.r-project.org/show_bug.cgi?id=18899) --> we'd +# need an onerous/ugly loop over the columns to replace missing +# observations with a unique NA-equivalent, end-sorting sentinel. +#' @importFrom dplyr n summarize +plyr_count <- function(x) summarize(x, .by = names(x), freq = n()) diff --git a/man/upset.Rd b/man/upset.Rd index 7947d64..3a3a2af 100644 --- a/man/upset.Rd +++ b/man/upset.Rd @@ -167,7 +167,7 @@ Data set must be formatted as described on the original UpSet github page: \url{ \examples{ movies <- read.csv( system.file("extdata", "movies.csv", package = "UpSetR"), header=TRUE, sep=";" ) -require(ggplot2); require(plyr); require(gridExtra); require(grid); +require(ggplot2); require(gridExtra); require(grid); between <- function(row, min, max){ newData <- (row["ReleaseDate"] < max) & (row["ReleaseDate"] > min) diff --git a/vignettes/attribute.plots.Rmd b/vignettes/attribute.plots.Rmd index c23f82e..2c84a41 100644 --- a/vignettes/attribute.plots.Rmd +++ b/vignettes/attribute.plots.Rmd @@ -11,7 +11,7 @@ vignette: > For all examples the movies data set contained in the package will be used. ```{r, tidy=TRUE} -library(UpSetR); library(ggplot2); library(grid); library(plyr) +library(UpSetR); library(ggplot2); library(grid) movies <- read.csv( system.file("extdata", "movies.csv", package = "UpSetR"), header=T, sep=";" ) ``` @@ -68,7 +68,7 @@ myplot <- function(mydata,x,y){ } another.plot <- function(data, x, y){ - data$decades <- round_any(as.integer(unlist(data[y])), 10, ceiling) + data$decades <- 10 * ceiling(as.integer(unlist(data[y])) / 10) data <- data[which(data$decades >= 1970), ] myplot <- (ggplot(data, aes_string(x=x)) + geom_density(aes(fill=factor(decades)), alpha = 0.4) From fa80b8e80d4f43732516ad0cfd2faa329a954fc9 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 30 May 2025 23:43:56 -0700 Subject: [PATCH 2/3] move very long comment to GitHub --- R/util.R | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/R/util.R b/R/util.R index a31eeec..f47f75a 100644 --- a/R/util.R +++ b/R/util.R @@ -1,20 +1,4 @@ # matching plyr::count behavior. this is non-trivial to do with -# pure ; see PR description for full details. for three main reasons: -# 1. table() may fail if there are very few -# duplicates and each column is of high cardinality, meaning -# table(x) would have a very large number of 0 entries that -# need to be computed and dropped (plyr::count skips them). -# 2. We can use something like interaction(..., drop=TRUE) + -# tapply() to imitate this, but it's hard to generically -# reconstruct the un-interacted levels needed to build an -# equivalent data.frame -- basically, we'd need to, for full -# generality, use a sep= where is not present in -# any of the unique values of any of the columns of x in order -# for strsplit(, ) to uniquely map back. -# 3. Something like vapply(split(x, x), nrow, integer(1L)) is also -# appealingly simple, _but_ split() always drops missing levels -# (https://bugs.r-project.org/show_bug.cgi?id=18899) --> we'd -# need an onerous/ugly loop over the columns to replace missing -# observations with a unique NA-equivalent, end-sorting sentinel. +# pure ; see PR description for full details. #' @importFrom dplyr n summarize plyr_count <- function(x) summarize(x, .by = names(x), freq = n()) From 51ce1b52722c131f50c74eaaef86c237acf1fdd5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 31 May 2025 11:36:18 -0700 Subject: [PATCH 3/3] also respect row ordering --- NAMESPACE | 3 +++ R/util.R | 9 ++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d86e379..214f7ed 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,7 +16,10 @@ import(methods) import(scales) import(stats) import(utils) +importFrom(dplyr,all_of) +importFrom(dplyr,arrange) importFrom(dplyr,n) +importFrom(dplyr,pick) importFrom(dplyr,summarize) importFrom(grid,gpar) importFrom(grid,grid.draw) diff --git a/R/util.R b/R/util.R index f47f75a..98ee7bd 100644 --- a/R/util.R +++ b/R/util.R @@ -1,4 +1,7 @@ # matching plyr::count behavior. this is non-trivial to do with -# pure ; see PR description for full details. -#' @importFrom dplyr n summarize -plyr_count <- function(x) summarize(x, .by = names(x), freq = n()) +# pure base R; see PR description for full details. +#' @importFrom dplyr all_of arrange n pick summarize +plyr_count <- function(x) { + cols <- names(x) + arrange(summarize(x, .by = all_of(cols), freq = n()), pick(cols)) +}