From 64e5cb533826294c0bcf1e9f7b6ba8f8aaa74907 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Fri, 9 Sep 2016 06:41:50 -0400 Subject: [PATCH] handle irregularly named columns, improve no-dupes-found msg --- R/get_dupes.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/get_dupes.R b/R/get_dupes.R index d82e9b90..9f082d3c 100644 --- a/R/get_dupes.R +++ b/R/get_dupes.R @@ -18,22 +18,23 @@ get_dupes <- function(dat, ...) { names <- as.list(substitute(list(...)))[-1L] df_name <- deparse(substitute(dat)) - if(length(names)==0){ - names <- names(dat) - message("No variable names specified - using all columns.\n") - } - - # check that each variable name provided is present in names(dat); if not, throw error + # check that each variable name provided is present in names(dat); if not, throw error var_names <- names if(is.list(var_names)){ var_names <- lapply(names, deparse) } # 'names' is not a list if defaulting to whole df, need this for consistency check_vars_in_df(dat, df_name, unlist(var_names)) - dupe_count <- NULL # to appease NOTE for CRAN; does nothing. + if(length(names)==0){ # if called on an entire data.frame with no specified variable names + var_names <- names(dat) + names <- paste0("`", as.list(names(dat)), "`") # to handle illegal variable names + message("No variable names specified - using all columns.\n") + } + # calculate counts to join back to main df counts <- dat %>% dplyr::count_(vars = names) - + + # join new count vector to main data.frame dupes <- suppressMessages(dplyr::inner_join(counts, dat)) @@ -43,6 +44,8 @@ get_dupes <- function(dat, ...) { dplyr::arrange_(.dots = names) %>% dplyr::rename(dupe_count = n) + # shorten error message for large data.frames + if(length(var_names) > 10){ var_names <- c(var_names[1:9], paste("... and", length(var_names) - 9, "other variables")) } if(nrow(dupes) == 0){message(paste0("No duplicate combinations found of: ", paste(var_names, collapse = ", ")))} dupes }