From 3fbd673ce3a635b3cdb0227622747c671a19110c Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Thu, 2 Feb 2017 09:56:42 -0500 Subject: [PATCH 01/28] add_totals functions now work w/ non-numeric cols per #57 --- R/add_totals.R | 42 +++++++++++++++++++++++--------- tests/testthat/test-add-totals.R | 36 +++++++++++++++++++++------ 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/R/add_totals.R b/R/add_totals.R index 52628b56..12eebd84 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -14,22 +14,36 @@ #' add_totals_row -add_totals_row <- function(dat, na.rm = TRUE){ - check_all_cols_after_first_are_numeric(dat) - dat[[1]] <- as.character(dat[[1]]) # for binding to the "Total" character value of add-on row - col_totals <- data.frame(x1 = "Total", t(colSums(dat[-1], na.rm = na.rm)), stringsAsFactors = FALSE) %>% +add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ + clean_dat <- clean_names(dat) # bad names will make select_if choke + + if(dim(select_if(clean_dat, is.numeric))[2] == 0){stop("data.frame must contain at least one column of class numeric")} # chokes on illegal names + + # creates the totals row to be appended + col_vec <- function(a_col, na_rm = na.rm){ + if(is.numeric(a_col)){ # can't do this with if_else because it doesn't like the sum() of a character vector, even if that clause is not reached + sum(a_col, na.rm = na_rm) + } else {fill} + } + + col_totals <- lapply(dat, col_vec) %>% + as.data.frame(stringsAsFactors = FALSE) %>% stats::setNames(names(dat)) - dplyr::bind_rows(dat, col_totals) + + col_totals[nrow(col_totals), min(which(!unlist(lapply(col_totals, is.numeric))))] <- "Total" # replace final row, first non-numeric column with "Total" + dplyr::bind_rows(clean_dat %>% + stats::setNames(names(dat)), col_totals) + } #' @title Append a totals column to a data.frame. #' #' @description -#' This function excludes the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. +#' This function excludes non-numeric columns of the input data.frame, e.g., a first column with a descriptive variable not to be summed. #' -#' @param dat an input data.frame with numeric values in all columns beyond the first. +#' @param dat an input data.frame with at least one numeric column. #' @param na.rm should missing values (including NaN) be omitted from the calculations? -#' @return Returns a data.frame with a totals column, consisting of "Total" in the first row and row sums in the others. +#' @return Returns a data.frame with a totals column containing row-wise sums. #' @export #' @examples #' library(dplyr) # for the %>% pipe @@ -38,8 +52,14 @@ add_totals_row <- function(dat, na.rm = TRUE){ #' add_totals_col add_totals_col <- function(dat, na.rm = TRUE){ - check_all_cols_after_first_are_numeric(dat) - row_totals <- data.frame(Total = rowSums(dat[-1], na.rm = na.rm)) - dplyr::bind_cols(dat, row_totals) + + clean_dat <- clean_names(dat) # bad names will make select_if choke + if(dim(dplyr::select_if(clean_dat, is.numeric))[2] == 0){stop("data.frame must contain at least one column of class numeric")} + row_totals <- clean_dat %>% + dplyr::select_if(is.numeric) %>% + dplyr::transmute(Total = rowSums(., na.rm = na.rm)) + + dplyr::bind_cols(dat, row_totals) %>% + stats::setNames(c(names(dat), "Total")) # put back original names } diff --git a/tests/testthat/test-add-totals.R b/tests/testthat/test-add-totals.R index 5e9e8bd8..1306baf4 100644 --- a/tests/testthat/test-add-totals.R +++ b/tests/testthat/test-add-totals.R @@ -10,13 +10,6 @@ dat <- data.frame(a = c(rep(c("big", "small", "big"), 3)), ct <- dat %>% crosstab(a, b) - -test_that("error thrown if column beyond the first is not numeric", { - expect_error(add_totals_row(dat %>% select(b, a)), - "all columns after the first one must be numeric") - expect_error(add_totals_col(dat %>% select(b, a)), - "all columns after the first one must be numeric") -}) test_that("totals row is correct", { @@ -74,3 +67,32 @@ test_that("both functions work with a single column", { expect_error(single_col %>% add_totals_row(), NA) # from http://stackoverflow.com/a/30068233 expect_error(single_col %>% add_totals_row(), NA) }) + + + + +dat <- data.frame( + a = c("hi", "lo"), + b = c(1, 2), + c = c(5, 10), + d = c("big", "small"), + e = c(20, NA), + stringsAsFactors = FALSE +) + +test_that("na.rm value gets passed through", { + +}) + + +test_that("error thrown if no columns are numeric", { + +}) + +test_that("works with non-numeric columns mixed in", { + +}) + +test_that("column names are passed through", { + +}) \ No newline at end of file From 5e901898d8350c4518acfa19a11ce23c89227909 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Thu, 2 Feb 2017 21:23:29 -0500 Subject: [PATCH 02/28] helpers nearly done taking non-numeric cols --- R/add_totals.R | 5 +++-- R/ns_to_percents.R | 22 ++++++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/R/add_totals.R b/R/add_totals.R index 12eebd84..e7e1a628 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -4,6 +4,7 @@ #' This function excludes the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. #' #' @param dat an input data.frame with numeric values in all columns beyond the first. +#' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns? #' @param na.rm should missing values (including NaN) be omitted from the calculations? #' @return Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. #' @export @@ -17,7 +18,7 @@ add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ clean_dat <- clean_names(dat) # bad names will make select_if choke - if(dim(select_if(clean_dat, is.numeric))[2] == 0){stop("data.frame must contain at least one column of class numeric")} # chokes on illegal names + if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} # creates the totals row to be appended col_vec <- function(a_col, na_rm = na.rm){ @@ -54,7 +55,7 @@ add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ add_totals_col <- function(dat, na.rm = TRUE){ clean_dat <- clean_names(dat) # bad names will make select_if choke - if(dim(dplyr::select_if(clean_dat, is.numeric))[2] == 0){stop("data.frame must contain at least one column of class numeric")} + if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} row_totals <- clean_dat %>% dplyr::select_if(is.numeric) %>% dplyr::transmute(Total = rowSums(., na.rm = na.rm)) diff --git a/R/ns_to_percents.R b/R/ns_to_percents.R index cadf61c9..83b35160 100644 --- a/R/ns_to_percents.R +++ b/R/ns_to_percents.R @@ -7,6 +7,8 @@ #' @param denom the denominator to use for calculating percentages. One of "row", "col", or "all". #' @param na.rm should missing values (including NaN) be omitted from the calculations? #' @param total_n an optional number to use as the denominator when calculating table-level percentages (when denom = "all"). Supply this if your input data.frame \code{dat} has values that would throw off the denominator if they were included, e.g., if there's a totals row appended to the bottom of the table. +#' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns?\ +#' #' @return Returns a data.frame of percentages, expressed as numeric values between 0 and 1. #' @export #' @examples @@ -21,29 +23,29 @@ #' add_totals_row() %>% # add a totals row that should not be included in the denominator #' ns_to_percents(denom = "all", total_n = nrow(mtcars)) # specify correct denominator -ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL){ +ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL, fill = "-"){ # catch bad inputs if(! denom %in% c("row", "col", "all")){stop("'denom' must be one of 'row', 'col', or 'all'")} - check_all_cols_after_first_are_numeric(dat) + if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} + numeric_cols <- which(unlist(lapply(dat, is.numeric))) + if(!is.null(total_n)){ if(!is.numeric(total_n)){stop("override_n must be numeric")} complete_n <- total_n } else{ - complete_n <- sum(dat[, -1], na.rm = TRUE) + complete_n <- sum(dat[, numeric_cols], na.rm = TRUE) } - n_col <- ncol(dat) - if(denom == "row"){ - row_sum <- rowSums(dat[, 2:n_col], na.rm = na.rm) - dat[, 2:n_col] <- dat[, 2:n_col] / row_sum + row_sum <- rowSums(dat[, numeric_cols], na.rm = na.rm) + dat[, numeric_cols] <- dat[, numeric_cols] / row_sum } else if(denom == "col"){ - col_sum <- colSums(dat[, 2:n_col], na.rm = na.rm) - dat[, 2:n_col] <- sweep(dat[, 2:n_col], 2, col_sum,`/`) # from http://stackoverflow.com/questions/9447801/dividing-columns-by-colsums-in-r + col_sum <- colSums(dat[, numeric_cols], na.rm = na.rm) + dat[, numeric_cols] <- sweep(dat[, numeric_cols], 2, col_sum,`/`) # from http://stackoverflow.com/questions/9447801/dividing-columns-by-colsums-in-r } else if(denom == "all"){ - dat[, 2:n_col] <- dat[, 2:n_col] / complete_n + dat[, numeric_cols] <- dat[, numeric_cols] / complete_n } dat From a50f93814353692cabd2313239cdc0aad8e16c0b Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Thu, 2 Feb 2017 21:41:02 -0500 Subject: [PATCH 03/28] resolved first column should be made into character for add_totals_row --- R/add_totals.R | 11 ++++++----- R/ns_to_percents.R | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/add_totals.R b/R/add_totals.R index e7e1a628..e45a2bdc 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -1,9 +1,9 @@ #' @title Append a totals row to a data.frame. #' #' @description -#' This function excludes the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. +#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. #' -#' @param dat an input data.frame with numeric values in all columns beyond the first. +#' @param dat an input data.frame with at least one numeric column. #' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns? #' @param na.rm should missing values (including NaN) be omitted from the calculations? #' @return Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. @@ -31,16 +31,17 @@ add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ as.data.frame(stringsAsFactors = FALSE) %>% stats::setNames(names(dat)) - col_totals[nrow(col_totals), min(which(!unlist(lapply(col_totals, is.numeric))))] <- "Total" # replace final row, first non-numeric column with "Total" + col_totals[nrow(col_totals), 1] <- "Total" # replace final row, first column with "Total" dplyr::bind_rows(clean_dat %>% - stats::setNames(names(dat)), col_totals) + stats::setNames(names(dat)) %>% + dplyr::mutate_at(1, as.character), col_totals) } #' @title Append a totals column to a data.frame. #' #' @description -#' This function excludes non-numeric columns of the input data.frame, e.g., a first column with a descriptive variable not to be summed. +#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. #' #' @param dat an input data.frame with at least one numeric column. #' @param na.rm should missing values (including NaN) be omitted from the calculations? diff --git a/R/ns_to_percents.R b/R/ns_to_percents.R index 83b35160..a1d0daab 100644 --- a/R/ns_to_percents.R +++ b/R/ns_to_percents.R @@ -29,6 +29,7 @@ ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL, fil if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} numeric_cols <- which(unlist(lapply(dat, is.numeric))) + numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols if(!is.null(total_n)){ if(!is.numeric(total_n)){stop("override_n must be numeric")} @@ -37,7 +38,7 @@ ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL, fil complete_n <- sum(dat[, numeric_cols], na.rm = TRUE) } - + if(denom == "row"){ row_sum <- rowSums(dat[, numeric_cols], na.rm = na.rm) dat[, numeric_cols] <- dat[, numeric_cols] / row_sum From e9f24a302963d3709c708a6fd7eda64f02478522 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Thu, 2 Feb 2017 21:48:35 -0500 Subject: [PATCH 04/28] whoops ns_to_percent does not need a fill argument --- R/ns_to_percents.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/ns_to_percents.R b/R/ns_to_percents.R index a1d0daab..d3b45379 100644 --- a/R/ns_to_percents.R +++ b/R/ns_to_percents.R @@ -7,7 +7,6 @@ #' @param denom the denominator to use for calculating percentages. One of "row", "col", or "all". #' @param na.rm should missing values (including NaN) be omitted from the calculations? #' @param total_n an optional number to use as the denominator when calculating table-level percentages (when denom = "all"). Supply this if your input data.frame \code{dat} has values that would throw off the denominator if they were included, e.g., if there's a totals row appended to the bottom of the table. -#' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns?\ #' #' @return Returns a data.frame of percentages, expressed as numeric values between 0 and 1. #' @export @@ -23,7 +22,7 @@ #' add_totals_row() %>% # add a totals row that should not be included in the denominator #' ns_to_percents(denom = "all", total_n = nrow(mtcars)) # specify correct denominator -ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL, fill = "-"){ +ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL){ # catch bad inputs if(! denom %in% c("row", "col", "all")){stop("'denom' must be one of 'row', 'col', or 'all'")} if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} From 69a6a7ec56f869cae4bb8b84face18d9c767b2cf Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sun, 5 Feb 2017 11:09:21 -0500 Subject: [PATCH 05/28] note changes to adorn helpers in news --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 6858d303..48c0a6a3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -9,6 +9,7 @@ NEWS ### Minor +* `add_totals_row()`, `add_totals_col()`, and `ns_to_percents()` can now be called on data.frames that have non-numeric columns beyond the first one (they will be ignored) [(#57)](https://github.com/sfirke/janitor/issues/57) ## Bug fixes * Long variable names with spaces no longer break `tabyl()` and `crosstab()` [(#87)](https://github.com/sfirke/janitor/issues/87) From 4e8950eef988da5ed4ee9557d88ac9b491faf464 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sat, 18 Mar 2017 22:08:44 -0400 Subject: [PATCH 06/28] eliminate check of non-numeric 1st col for add_totals functions See #57 --- R/add_totals.R | 6 +++--- R/adorn_crosstab.R | 20 ++++++++++---------- R/adorn_helpers.R | 14 +------------- R/ns_to_percents.R | 5 ++--- man/add_totals_col.Rd | 6 +++--- man/add_totals_row.Rd | 8 +++++--- man/adorn_crosstab.Rd | 4 ++-- tests/testthat/test-adorn-helpers.R | 16 ---------------- tests/testthat/test-ns-to-percents.R | 4 ++-- 9 files changed, 28 insertions(+), 55 deletions(-) delete mode 100644 tests/testthat/test-adorn-helpers.R diff --git a/R/add_totals.R b/R/add_totals.R index e45a2bdc..de952814 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -56,12 +56,12 @@ add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ add_totals_col <- function(dat, na.rm = TRUE){ clean_dat <- clean_names(dat) # bad names will make select_if choke - if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} + if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("input data.frame must contain at least one column of class numeric")} row_totals <- clean_dat %>% dplyr::select_if(is.numeric) %>% dplyr::transmute(Total = rowSums(., na.rm = na.rm)) - dplyr::bind_cols(dat, row_totals) %>% - stats::setNames(c(names(dat), "Total")) # put back original names + dat$Total <- row_totals$Total + dat } diff --git a/R/adorn_crosstab.R b/R/adorn_crosstab.R index 4d69229a..88016c96 100644 --- a/R/adorn_crosstab.R +++ b/R/adorn_crosstab.R @@ -3,7 +3,7 @@ #' @description #' Designed to run on the output of a call to \code{crosstab}, this adds formatting, percentage sign, Ns, totals row/column, and custom rounding to a table of numeric values. The result is no longer clean data, but it saves time in reporting table results. #' -#' @param crosstab a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to \code{crosstab} that included the argument \code{percent = "none"}. +#' @param dat a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to \code{crosstab} that included the argument \code{percent = "none"}. #' @param denom the denominator to use for calculating percentages. One of "row", "col", or "all". #' @param show_n should counts be displayed alongside the percentages? #' @param digits how many digits should be displayed after the decimal point? @@ -28,23 +28,23 @@ # take result of a crosstab() call and print a nice result #' @export -adorn_crosstab <- function(crosstab, denom = "row", show_n = TRUE, digits = 1, show_totals = FALSE, rounding = "half to even"){ +adorn_crosstab <- function(dat, denom = "row", show_n = TRUE, digits = 1, show_totals = FALSE, rounding = "half to even"){ # some input checks if(! rounding %in% c("half to even", "half up")){stop("'rounding' must be one of 'half to even' or 'half up'")} - check_all_cols_after_first_are_numeric(crosstab) + dat[[1]] <- as.character(dat[[1]]) # for type matching when binding the word "Total" on a factor. Moved up to this line so that if only 1st col is numeric, the function errors + if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("input data.frame must contain at least one column of class numeric")} # changed from select_if as it can't handle numbers as col names - crosstab[[1]] <- as.character(crosstab[[1]]) # for type matching when binding the word "Total" on a factor showing_col_totals <- (show_totals & denom %in% c("col", "all")) showing_row_totals <- (show_totals & denom %in% c("row", "all")) - complete_n <- complete_n <- sum(crosstab[, -1], na.rm = TRUE) # capture for percent calcs before any totals col/row is added + complete_n <- complete_n <- sum(dat[, -1], na.rm = TRUE) # capture for percent calcs before any totals col/row is added - if(showing_col_totals){ crosstab <- add_totals_col(crosstab) } - if(showing_row_totals){ crosstab <- add_totals_row(crosstab) } - n_col <- ncol(crosstab) + if(showing_col_totals){ dat <- add_totals_col(dat) } + if(showing_row_totals){ dat <- add_totals_row(dat) } + n_col <- ncol(dat) - percs <- ns_to_percents(crosstab, denom, total_n = complete_n) # last argument only gets used in the "all" case = no harm in passing otherwise + percs <- ns_to_percents(dat, denom, total_n = complete_n) # last argument only gets used in the "all" case = no harm in passing otherwise # round %s using specified method, add % sign percs <- dplyr::mutate_at(percs, dplyr::vars(2:n_col), dplyr::funs(. * 100)) # since we'll be adding % sign - do this before rounding @@ -56,7 +56,7 @@ adorn_crosstab <- function(crosstab, denom = "row", show_n = TRUE, digits = 1, s # paste Ns if needed if(show_n){ - result <- paste_ns(percs, crosstab) + result <- paste_ns(percs, dat) } else{ result <- percs} as.data.frame(result) # drop back to data.frame from tibble diff --git a/R/adorn_helpers.R b/R/adorn_helpers.R index 31cbcdc7..1a90eaba 100644 --- a/R/adorn_helpers.R +++ b/R/adorn_helpers.R @@ -44,16 +44,4 @@ fix_parens_whitespace <- function(x){ fixed = TRUE) } -} - -# check that all columns in a data.frame beyond the first one are numeric -check_all_cols_after_first_are_numeric <- function(x){ - non_numeric_count <- x %>% - dplyr::select(-1) %>% - lapply(function(x) !is.numeric(x)) %>% - unlist %>% - sum - if(non_numeric_count > 0){ - stop("all columns after the first one must be numeric") - } -} +} \ No newline at end of file diff --git a/R/ns_to_percents.R b/R/ns_to_percents.R index d3b45379..0198fce7 100644 --- a/R/ns_to_percents.R +++ b/R/ns_to_percents.R @@ -25,10 +25,9 @@ ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL){ # catch bad inputs if(! denom %in% c("row", "col", "all")){stop("'denom' must be one of 'row', 'col', or 'all'")} - if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} - numeric_cols <- which(unlist(lapply(dat, is.numeric))) - numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols + numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors + if(length(numeric_cols) == 0){stop("input data.frame must contain at least one column of class numeric")} if(!is.null(total_n)){ if(!is.numeric(total_n)){stop("override_n must be numeric")} diff --git a/man/add_totals_col.Rd b/man/add_totals_col.Rd index 60f962bd..027fced3 100644 --- a/man/add_totals_col.Rd +++ b/man/add_totals_col.Rd @@ -7,15 +7,15 @@ add_totals_col(dat, na.rm = TRUE) } \arguments{ -\item{dat}{an input data.frame with numeric values in all columns beyond the first.} +\item{dat}{an input data.frame with at least one numeric column.} \item{na.rm}{should missing values (including NaN) be omitted from the calculations?} } \value{ -Returns a data.frame with a totals column, consisting of "Total" in the first row and row sums in the others. +Returns a data.frame with a totals column containing row-wise sums. } \description{ -This function excludes the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. +This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. } \examples{ library(dplyr) # for the \%>\% pipe diff --git a/man/add_totals_row.Rd b/man/add_totals_row.Rd index f1eea959..c900e47e 100644 --- a/man/add_totals_row.Rd +++ b/man/add_totals_row.Rd @@ -4,10 +4,12 @@ \alias{add_totals_row} \title{Append a totals row to a data.frame.} \usage{ -add_totals_row(dat, na.rm = TRUE) +add_totals_row(dat, fill = "-", na.rm = TRUE) } \arguments{ -\item{dat}{an input data.frame with numeric values in all columns beyond the first.} +\item{dat}{an input data.frame with at least one numeric column.} + +\item{fill}{if there are more than one non-numeric columns, what string should fill the bottom row of those columns?} \item{na.rm}{should missing values (including NaN) be omitted from the calculations?} } @@ -15,7 +17,7 @@ add_totals_row(dat, na.rm = TRUE) Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. } \description{ -This function excludes the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. +This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. } \examples{ library(dplyr) # for the \%>\% pipe diff --git a/man/adorn_crosstab.Rd b/man/adorn_crosstab.Rd index d280f8d3..a659c83a 100644 --- a/man/adorn_crosstab.Rd +++ b/man/adorn_crosstab.Rd @@ -4,11 +4,11 @@ \alias{adorn_crosstab} \title{Add formatting to a crosstabulation table.} \usage{ -adorn_crosstab(crosstab, denom = "row", show_n = TRUE, digits = 1, +adorn_crosstab(dat, denom = "row", show_n = TRUE, digits = 1, show_totals = FALSE, rounding = "half to even") } \arguments{ -\item{crosstab}{a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to \code{crosstab} that included the argument \code{percent = "none"}.} +\item{dat}{a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to \code{crosstab} that included the argument \code{percent = "none"}.} \item{denom}{the denominator to use for calculating percentages. One of "row", "col", or "all".} diff --git a/tests/testthat/test-adorn-helpers.R b/tests/testthat/test-adorn-helpers.R deleted file mode 100644 index b686a23f..00000000 --- a/tests/testthat/test-adorn-helpers.R +++ /dev/null @@ -1,16 +0,0 @@ -# Tests functions called by adorn_crosstab() - -library(janitor) -context("adorn helper functions") - -library(dplyr) - -test_that("check_all_numeric function is accurate", { - expect_silent(check_all_cols_after_first_are_numeric(data.frame(a = 1:2, b = 1:2, c = 1:2))) - expect_silent(check_all_cols_after_first_are_numeric(data.frame(a = c("a", "b"), b = 1:2, c = 1:2))) - expect_error(check_all_cols_after_first_are_numeric(data.frame(a = 1:2, b = c("a", "b"), c = 1:2)), - "all columns after the first one must be numeric") - expect_error(check_all_cols_after_first_are_numeric(data.frame(a = 1:2, b = 1:2, c = c("a", "b"), - stringsAsFactors = FALSE)), - "all columns after the first one must be numeric") -}) diff --git a/tests/testthat/test-ns-to-percents.R b/tests/testthat/test-ns-to-percents.R index 71e8c323..b9b37b11 100644 --- a/tests/testthat/test-ns-to-percents.R +++ b/tests/testthat/test-ns-to-percents.R @@ -69,9 +69,9 @@ test_that("NAs handled correctly with na.rm = FALSE", { ) }) -test_that("data.frames with non-numeric columns cause failure", { +test_that("data.frames with no numeric columns beyond the first cause failure", { expect_error(ns_to_percents(data.frame(a = 1:2, b = c("hi", "lo"))), - "all columns after the first one must be numeric") + "input data.frame must contain at least one column of class numeric") }) test_that("non-numeric argument to total_n fails", { From 81f3e6a8ef752cc61885bf48ac0d6ee65e3e2ceb Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sat, 18 Mar 2017 22:14:14 -0400 Subject: [PATCH 07/28] note the change to adorn_crosstab arguments in NEWS --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 48c0a6a3..18002768 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,9 @@ NEWS # janitor 0.2.1.9000 (in progress) +## Breaking changes +* The first argument of `adorn_crosstab()` is now "dat" instead of "crosstab" (since the function can be called on any data.frame, not just a result of `crosstab()`) + ## Features ### Major From c8893e48ce5239264b324f8c647ebfc9773620bd Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sat, 18 Mar 2017 23:05:29 -0400 Subject: [PATCH 08/28] merged add_totals_row and add_totals_col into a single add_totals() still need to deprecate the old ones --- NAMESPACE | 3 +- R/add_totals.R | 81 ++++++++++++++------------------ R/adorn_crosstab.R | 4 +- R/ns_to_percents.R | 2 +- man/add_totals.Rd | 29 ++++++++++++ man/add_totals_col.Rd | 25 ---------- man/add_totals_row.Rd | 27 ----------- man/ns_to_percents.Rd | 2 +- tests/testthat/test-add-totals.R | 40 ++++++++++------ vignettes/introduction.Rmd | 7 ++- 10 files changed, 97 insertions(+), 123 deletions(-) create mode 100644 man/add_totals.Rd delete mode 100644 man/add_totals_col.Rd delete mode 100644 man/add_totals_row.Rd diff --git a/NAMESPACE b/NAMESPACE index 0ad9ecf3..05fdc2fd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,8 +4,7 @@ S3method(crosstab,data.frame) S3method(crosstab,default) S3method(tabyl,data.frame) S3method(tabyl,default) -export(add_totals_col) -export(add_totals_row) +export(add_totals) export(adorn_crosstab) export(clean_names) export(convert_to_NA) diff --git a/R/add_totals.R b/R/add_totals.R index de952814..01788f1c 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -1,67 +1,54 @@ -#' @title Append a totals row to a data.frame. +#' @title Append a totals row and/or column to a data.frame. #' #' @description -#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +#' This function excludes other non-numeric columns. #' #' @param dat an input data.frame with at least one numeric column. -#' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns? +#' @param which one of "row", "col", or \code{c("row", "col")} +#' @param fill if there are multiple non-numeric columns, what string should fill the bottom row of those columns? #' @param na.rm should missing values (including NaN) be omitted from the calculations? -#' @return Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. +#' @return Returns a data.frame augmented with a totals row, column, or both. #' @export #' @examples #' library(dplyr) # for the %>% pipe #' mtcars %>% #' crosstab(am, cyl) %>% -#' add_totals_row +#' add_totals() -add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ - clean_dat <- clean_names(dat) # bad names will make select_if choke +add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ + # clean_dat <- clean_names(dat) # bad names will make select_if choke + dat[[1]] <- as.character(dat[[1]]) # for type matching when binding the word "Total" on a factor. Moved up to this line so that if only 1st col is numeric, the function errors + if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("data.frame must contain at least one column of class numeric")} - if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} - - # creates the totals row to be appended - col_vec <- function(a_col, na_rm = na.rm){ - if(is.numeric(a_col)){ # can't do this with if_else because it doesn't like the sum() of a character vector, even if that clause is not reached - sum(a_col, na.rm = na_rm) - } else {fill} + if("row" %in% which){ + # creates the totals row to be appended + col_vec <- function(a_col, na_rm = na.rm){ + if(is.numeric(a_col)){ # can't do this with if_else because it doesn't like the sum() of a character vector, even if that clause is not reached + sum(a_col, na.rm = na_rm) + } else {fill} + } + + col_totals <- lapply(dat, col_vec) %>% + as.data.frame(stringsAsFactors = FALSE) %>% + stats::setNames(names(dat)) + + col_totals[nrow(col_totals), 1] <- "Total" # replace final row, first column with "Total" + dat <- dplyr::bind_rows(dat %>% + stats::setNames(names(dat)) %>% + dplyr::mutate_at(1, as.character), col_totals) } - col_totals <- lapply(dat, col_vec) %>% - as.data.frame(stringsAsFactors = FALSE) %>% - stats::setNames(names(dat)) + if("col" %in% which){ + # Add totals col + clean_dat <- clean_names(dat) # bad names will make select_if choke + row_totals <- clean_dat %>% + dplyr::select_if(is.numeric) %>% + dplyr::transmute(Total = rowSums(., na.rm = na.rm)) - col_totals[nrow(col_totals), 1] <- "Total" # replace final row, first column with "Total" - dplyr::bind_rows(clean_dat %>% - stats::setNames(names(dat)) %>% - dplyr::mutate_at(1, as.character), col_totals) - -} - -#' @title Append a totals column to a data.frame. -#' -#' @description -#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. -#' -#' @param dat an input data.frame with at least one numeric column. -#' @param na.rm should missing values (including NaN) be omitted from the calculations? -#' @return Returns a data.frame with a totals column containing row-wise sums. -#' @export -#' @examples -#' library(dplyr) # for the %>% pipe -#' mtcars %>% -#' crosstab(am, cyl) %>% -#' add_totals_col - -add_totals_col <- function(dat, na.rm = TRUE){ - - clean_dat <- clean_names(dat) # bad names will make select_if choke - if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("input data.frame must contain at least one column of class numeric")} - row_totals <- clean_dat %>% - dplyr::select_if(is.numeric) %>% - dplyr::transmute(Total = rowSums(., na.rm = na.rm)) + dat$Total <- row_totals$Total + } - dat$Total <- row_totals$Total dat } diff --git a/R/adorn_crosstab.R b/R/adorn_crosstab.R index 88016c96..0d19412a 100644 --- a/R/adorn_crosstab.R +++ b/R/adorn_crosstab.R @@ -40,8 +40,8 @@ adorn_crosstab <- function(dat, denom = "row", show_n = TRUE, digits = 1, show_t complete_n <- complete_n <- sum(dat[, -1], na.rm = TRUE) # capture for percent calcs before any totals col/row is added - if(showing_col_totals){ dat <- add_totals_col(dat) } - if(showing_row_totals){ dat <- add_totals_row(dat) } + if(showing_col_totals){ dat <- add_totals(dat, "col") } + if(showing_row_totals){ dat <- add_totals(dat, "row") } n_col <- ncol(dat) percs <- ns_to_percents(dat, denom, total_n = complete_n) # last argument only gets used in the "all" case = no harm in passing otherwise diff --git a/R/ns_to_percents.R b/R/ns_to_percents.R index 0198fce7..87212a39 100644 --- a/R/ns_to_percents.R +++ b/R/ns_to_percents.R @@ -19,7 +19,7 @@ #' # when total_n is needed #' mtcars %>% #' crosstab(am, cyl) %>% -#' add_totals_row() %>% # add a totals row that should not be included in the denominator +#' add_totals("row") %>% # add a totals row that should not be included in the denominator #' ns_to_percents(denom = "all", total_n = nrow(mtcars)) # specify correct denominator ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL){ diff --git a/man/add_totals.Rd b/man/add_totals.Rd new file mode 100644 index 00000000..99616717 --- /dev/null +++ b/man/add_totals.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_totals.R +\name{add_totals} +\alias{add_totals} +\title{Append a totals row and/or column to a data.frame.} +\usage{ +add_totals(dat, which = c("row", "col"), fill = "-", na.rm = TRUE) +} +\arguments{ +\item{dat}{an input data.frame with at least one numeric column.} + +\item{which}{one of "row", "col", or \code{c("row", "col")}} + +\item{fill}{if there are multiple non-numeric columns, what string should fill the bottom row of those columns?} + +\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} +} +\value{ +Returns a data.frame augmented with a totals row, column, or both. +} +\description{ +This function excludes other non-numeric columns. +} +\examples{ +library(dplyr) # for the \%>\% pipe +mtcars \%>\% + crosstab(am, cyl) \%>\% + add_totals_row +} diff --git a/man/add_totals_col.Rd b/man/add_totals_col.Rd deleted file mode 100644 index 027fced3..00000000 --- a/man/add_totals_col.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_totals.R -\name{add_totals_col} -\alias{add_totals_col} -\title{Append a totals column to a data.frame.} -\usage{ -add_totals_col(dat, na.rm = TRUE) -} -\arguments{ -\item{dat}{an input data.frame with at least one numeric column.} - -\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} -} -\value{ -Returns a data.frame with a totals column containing row-wise sums. -} -\description{ -This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. -} -\examples{ -library(dplyr) # for the \%>\% pipe -mtcars \%>\% - crosstab(am, cyl) \%>\% - add_totals_col -} diff --git a/man/add_totals_row.Rd b/man/add_totals_row.Rd deleted file mode 100644 index c900e47e..00000000 --- a/man/add_totals_row.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_totals.R -\name{add_totals_row} -\alias{add_totals_row} -\title{Append a totals row to a data.frame.} -\usage{ -add_totals_row(dat, fill = "-", na.rm = TRUE) -} -\arguments{ -\item{dat}{an input data.frame with at least one numeric column.} - -\item{fill}{if there are more than one non-numeric columns, what string should fill the bottom row of those columns?} - -\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} -} -\value{ -Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. -} -\description{ -This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. -} -\examples{ -library(dplyr) # for the \%>\% pipe -mtcars \%>\% - crosstab(am, cyl) \%>\% - add_totals_row -} diff --git a/man/ns_to_percents.Rd b/man/ns_to_percents.Rd index ead7bb09..d7794dd6 100644 --- a/man/ns_to_percents.Rd +++ b/man/ns_to_percents.Rd @@ -30,6 +30,6 @@ mtcars \%>\% # when total_n is needed mtcars \%>\% crosstab(am, cyl) \%>\% - add_totals_row() \%>\% # add a totals row that should not be included in the denominator + add_totals("row") \%>\% # add a totals row that should not be included in the denominator ns_to_percents(denom = "all", total_n = nrow(mtcars)) # specify correct denominator } diff --git a/tests/testthat/test-add-totals.R b/tests/testthat/test-add-totals.R index 1306baf4..fecc69a7 100644 --- a/tests/testthat/test-add-totals.R +++ b/tests/testthat/test-add-totals.R @@ -1,7 +1,7 @@ # Tests add_totals_row and add_totals_col library(janitor) -context("add_totals functions") +context("add_totals function") library(dplyr) dat <- data.frame(a = c(rep(c("big", "small", "big"), 3)), @@ -13,7 +13,7 @@ ct <- dat %>% test_that("totals row is correct", { - expect_equal(add_totals_row(ct), + expect_equal(add_totals(ct, "row"), data.frame(a = c("big", "small", "Total"), `1` = c(4, 1, 5), `2` = c(0, 2, 2), @@ -25,22 +25,21 @@ test_that("totals row is correct", { test_that("totals col is correct", { - expect_equal(add_totals_col(ct), + expect_equal(add_totals(ct, "col"), data.frame(a = c("big", "small"), `1` = c(4, 1), `2` = c(0, 2), `3` = c(2, 0), Total = c(6, 3), check.names = FALSE, - stringsAsFactors = TRUE) + stringsAsFactors = FALSE) ) }) test_that("totals row and col produce correct results when called together", { expect_equal(ct %>% - add_totals_col %>% - add_totals_row(), + add_totals(c("row", "col")), data.frame(a = c("big", "small", "Total"), `1` = c(4, 1, 5), `2` = c(0, 2, 2), @@ -51,21 +50,20 @@ test_that("totals row and col produce correct results when called together", { ) }) -test_that("order doesn't matter when totals row and col are called together", { +test_that("order doesn't matter when row and col are called together", { expect_equal(ct %>% - add_totals_col %>% - add_totals_row, + add_totals(c("row", "col")), ct %>% - add_totals_row %>% - add_totals_col + add_totals(c("col", "row")) ) }) test_that("both functions work with a single column", { single_col <- data_frame(a = c(as.Date("2016-01-01"), as.Date("2016-02-03")), b = c(1, 2)) - expect_error(single_col %>% add_totals_row(), NA) # from http://stackoverflow.com/a/30068233 - expect_error(single_col %>% add_totals_row(), NA) + expect_error(single_col %>% add_totals("row"), NA) # from http://stackoverflow.com/a/30068233 + expect_error(single_col %>% add_totals("col"), NA) + expect_error(single_col %>% add_totals(c("col", "row")), NA) }) @@ -80,6 +78,20 @@ dat <- data.frame( stringsAsFactors = FALSE ) +test_that("numeric first column is ignored", { + expect_equal(mtcars %>% + crosstab(cyl, gear) %>% + add_totals("col"), + data.frame( + cyl = c("4", "6", "8"), + `3` = c(1, 2, 12), + `4` = c(8, 4, 0), + `5` = c(2, 1, 2), + Total = c(11, 7, 14), + check.names = FALSE, + stringsAsFactors = FALSE)) +}) + test_that("na.rm value gets passed through", { }) @@ -95,4 +107,4 @@ test_that("works with non-numeric columns mixed in", { test_that("column names are passed through", { -}) \ No newline at end of file +}) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 8d796e9e..ec0255e7 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -179,13 +179,12 @@ q %>% ## Exploring -### `add_totals_col()` and `add_totals_row()` -These functions add a totals row or column to a data.frame. These functions exclude the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. +### `add_totals()` +This adds a totals row and/or column to a data.frame. These functions exclude the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. ```{r} mtcars %>% crosstab(am, cyl) %>% - add_totals_row %>% - add_totals_col + add_totals() ``` ### Convert a data.frame of numbers to percentages with `ns_to_percents()` From 55f93c9815b5711ffc2d4a5333ece99059eb3d40 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sat, 18 Mar 2017 23:06:45 -0400 Subject: [PATCH 09/28] minor tweaks for add_totals function --- NEWS.md | 1 + man/add_totals.Rd | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 18002768..38dbae95 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ NEWS ## Breaking changes * The first argument of `adorn_crosstab()` is now "dat" instead of "crosstab" (since the function can be called on any data.frame, not just a result of `crosstab()`) +* The functions `add_totals_row` and `add_totals_col` were combined into a single function, `add_totals()`. [(#57)](https://github.com/sfirke/janitor/issues/57) ## Features diff --git a/man/add_totals.Rd b/man/add_totals.Rd index 99616717..5185107d 100644 --- a/man/add_totals.Rd +++ b/man/add_totals.Rd @@ -25,5 +25,5 @@ This function excludes other non-numeric columns. library(dplyr) # for the \%>\% pipe mtcars \%>\% crosstab(am, cyl) \%>\% - add_totals_row + add_totals() } From c326e44bd52e4ec614d6236ed3f3ec56a1b926c3 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sun, 19 Mar 2017 10:54:18 -0400 Subject: [PATCH 10/28] add_totals works on grouped_df closes #97 --- R/add_totals.R | 3 ++- tests/testthat/test-add-totals.R | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/R/add_totals.R b/R/add_totals.R index 01788f1c..6f8a4c51 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -17,7 +17,8 @@ add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ - # clean_dat <- clean_names(dat) # bad names will make select_if choke + if("grouped_df" %in% class(dat)){ dat <- dplyr::ungroup(dat) } # grouped_df causes problems, #97 + dat[[1]] <- as.character(dat[[1]]) # for type matching when binding the word "Total" on a factor. Moved up to this line so that if only 1st col is numeric, the function errors if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("data.frame must contain at least one column of class numeric")} diff --git a/tests/testthat/test-add-totals.R b/tests/testthat/test-add-totals.R index fecc69a7..24c52161 100644 --- a/tests/testthat/test-add-totals.R +++ b/tests/testthat/test-add-totals.R @@ -92,6 +92,13 @@ test_that("numeric first column is ignored", { stringsAsFactors = FALSE)) }) +test_that("grouped_df gets ungrouped and succeeds", { + ct <- mtcars %>% group_by(cyl, gear) %>% tally() %>% tidyr::spread(gear, n) + expect_equal(ct %>% add_totals(), + ct %>% ungroup() %>% add_totals + ) +}) + test_that("na.rm value gets passed through", { }) From 70595359bed0fe677b676a7e257e38879a7c7835 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sun, 19 Mar 2017 18:41:43 -0400 Subject: [PATCH 11/28] complete test coverage, improve error message for add_totals --- R/add_totals.R | 4 +-- R/ns_to_percents.R | 2 +- man/add_totals.Rd | 2 +- tests/testthat/test-add-totals.R | 51 ++++++++++++++++++++++++---- tests/testthat/test-ns-to-percents.R | 2 +- 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/R/add_totals.R b/R/add_totals.R index 6f8a4c51..126683f1 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -1,7 +1,7 @@ #' @title Append a totals row and/or column to a data.frame. #' #' @description -#' This function excludes other non-numeric columns. +#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. #' #' @param dat an input data.frame with at least one numeric column. #' @param which one of "row", "col", or \code{c("row", "col")} @@ -20,7 +20,7 @@ add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ if("grouped_df" %in% class(dat)){ dat <- dplyr::ungroup(dat) } # grouped_df causes problems, #97 dat[[1]] <- as.character(dat[[1]]) # for type matching when binding the word "Total" on a factor. Moved up to this line so that if only 1st col is numeric, the function errors - if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("data.frame must contain at least one column of class numeric")} + if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("at least one one of columns 2:n must be of class numeric")} if("row" %in% which){ # creates the totals row to be appended diff --git a/R/ns_to_percents.R b/R/ns_to_percents.R index 87212a39..59f66217 100644 --- a/R/ns_to_percents.R +++ b/R/ns_to_percents.R @@ -27,7 +27,7 @@ ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL){ if(! denom %in% c("row", "col", "all")){stop("'denom' must be one of 'row', 'col', or 'all'")} numeric_cols <- which(unlist(lapply(dat, is.numeric))) numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors - if(length(numeric_cols) == 0){stop("input data.frame must contain at least one column of class numeric")} + if(length(numeric_cols) == 0){stop("at least one one of columns 2:n must be of class numeric")} if(!is.null(total_n)){ if(!is.numeric(total_n)){stop("override_n must be numeric")} diff --git a/man/add_totals.Rd b/man/add_totals.Rd index 5185107d..bfc50005 100644 --- a/man/add_totals.Rd +++ b/man/add_totals.Rd @@ -19,7 +19,7 @@ add_totals(dat, which = c("row", "col"), fill = "-", na.rm = TRUE) Returns a data.frame augmented with a totals row, column, or both. } \description{ -This function excludes other non-numeric columns. +This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. } \examples{ library(dplyr) # for the \%>\% pipe diff --git a/tests/testthat/test-add-totals.R b/tests/testthat/test-add-totals.R index 24c52161..7d00aab6 100644 --- a/tests/testthat/test-add-totals.R +++ b/tests/testthat/test-add-totals.R @@ -92,6 +92,10 @@ test_that("numeric first column is ignored", { stringsAsFactors = FALSE)) }) +# create input tables for subsequent testing +ct <- mtcars %>% group_by(cyl, gear) %>% tally() %>% tidyr::spread(gear, n) +df1 <- data.frame(x = c(1, 2), y = c(NA, 4)) + test_that("grouped_df gets ungrouped and succeeds", { ct <- mtcars %>% group_by(cyl, gear) %>% tally() %>% tidyr::spread(gear, n) expect_equal(ct %>% add_totals(), @@ -99,19 +103,54 @@ test_that("grouped_df gets ungrouped and succeeds", { ) }) -test_that("na.rm value gets passed through", { - +test_that("na.rm value works correctly", { + expect_equal(df1 %>% add_totals(na.rm = FALSE), + data.frame( + x = c("1", "2", "Total"), + y = c(NA, 4, NA), + Total = c(NA, 4, NA), + stringsAsFactors = FALSE + ) + ) }) - -test_that("error thrown if no columns are numeric", { +test_that("add_totals respects whether input was data.frame or data_frame", { + expect_equal(class(df1), + class(df1 %>% add_totals())) + expect_equal(class(df1 %>% as_tibble()), + class(df1 %>% as_tibble() %>% add_totals())) }) -test_that("works with non-numeric columns mixed in", { +test_that("error thrown if no columns past first are numeric", { + df2 <- data.frame(x = c("big", "small"), + y = c("hi", "lo")) + expect_error(add_totals(df2, "col"), + "at least one one of columns 2:n must be of class numeric") + + # Add a test where only the first column is numeric + df3 <- data.frame(x = 1:2, + y = c("hi", "lo")) + expect_error(add_totals(df3), + "at least one one of columns 2:n must be of class numeric") }) -test_that("column names are passed through", { +test_that("works with non-numeric columns mixed in; fill character specification", { + mixed <- data.frame( + a = 1:3, + b = c("x", "y", "z"), + c = 5:7, + d = c("big", "med", "small"), + stringsAsFactors = FALSE + ) + expect_equal(mixed %>% add_totals(fill = "*"), + data.frame(a = c("1", "2", "3", "Total"), + b = c("x", "y", "z", "*"), + c = c(5, 6, 7, 18), + d = c("big", "med", "small", "*"), + Total = c(5, 6, 7, 18), + stringsAsFactors = FALSE) + ) }) diff --git a/tests/testthat/test-ns-to-percents.R b/tests/testthat/test-ns-to-percents.R index b9b37b11..c3decc5b 100644 --- a/tests/testthat/test-ns-to-percents.R +++ b/tests/testthat/test-ns-to-percents.R @@ -71,7 +71,7 @@ test_that("NAs handled correctly with na.rm = FALSE", { test_that("data.frames with no numeric columns beyond the first cause failure", { expect_error(ns_to_percents(data.frame(a = 1:2, b = c("hi", "lo"))), - "input data.frame must contain at least one column of class numeric") + "at least one one of columns 2:n must be of class numeric") }) test_that("non-numeric argument to total_n fails", { From 2677369e603b6f0b17a91fd8ec25bffef5f5f804 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sun, 19 Mar 2017 19:05:57 -0400 Subject: [PATCH 12/28] putting add_totals_col, add_totals_row back as deprecated functions --- NAMESPACE | 2 ++ R/add_totals.R | 69 +++++++++++++++++++++++++++++++++++++++++++ man/add_totals_col.Rd | 25 ++++++++++++++++ man/add_totals_row.Rd | 27 +++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 man/add_totals_col.Rd create mode 100644 man/add_totals_row.Rd diff --git a/NAMESPACE b/NAMESPACE index 05fdc2fd..aaefc9a6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,8 @@ S3method(crosstab,default) S3method(tabyl,data.frame) S3method(tabyl,default) export(add_totals) +export(add_totals_col) +export(add_totals_row) export(adorn_crosstab) export(clean_names) export(convert_to_NA) diff --git a/R/add_totals.R b/R/add_totals.R index 126683f1..dabd5241 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -53,3 +53,72 @@ add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ dat } +### Deprecated functions ----------------------------- +#' @title Append a totals row to a data.frame. +#' +#' @description +#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +#' +#' @param dat an input data.frame with at least one numeric column. +#' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns? +#' @param na.rm should missing values (including NaN) be omitted from the calculations? +#' @return Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. +#' @export +#' @examples +#' library(dplyr) # for the %>% pipe +#' mtcars %>% +#' crosstab(am, cyl) %>% +#' add_totals_row + + +add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ + .Deprecated("add_totals(\"row\")") + clean_dat <- clean_names(dat) # bad names will make select_if choke + + if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} + + # creates the totals row to be appended + col_vec <- function(a_col, na_rm = na.rm){ + if(is.numeric(a_col)){ # can't do this with if_else because it doesn't like the sum() of a character vector, even if that clause is not reached + sum(a_col, na.rm = na_rm) + } else {fill} + } + + col_totals <- lapply(dat, col_vec) %>% + as.data.frame(stringsAsFactors = FALSE) %>% + stats::setNames(names(dat)) + + col_totals[nrow(col_totals), 1] <- "Total" # replace final row, first column with "Total" + dplyr::bind_rows(clean_dat %>% + stats::setNames(names(dat)) %>% + dplyr::mutate_at(1, as.character), col_totals) + +} + +#' @title Append a totals column to a data.frame. +#' +#' @description +#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +#' +#' @param dat an input data.frame with at least one numeric column. +#' @param na.rm should missing values (including NaN) be omitted from the calculations? +#' @return Returns a data.frame with a totals column containing row-wise sums. +#' @export +#' @examples +#' library(dplyr) # for the %>% pipe +#' mtcars %>% +#' crosstab(am, cyl) %>% +#' add_totals_col + +add_totals_col <- function(dat, na.rm = TRUE){ + .Deprecated("add_totals(\"col\")") + clean_dat <- clean_names(dat) # bad names will make select_if choke + if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("input data.frame must contain at least one column of class numeric")} + row_totals <- clean_dat %>% + dplyr::select_if(is.numeric) %>% + dplyr::transmute(Total = rowSums(., na.rm = na.rm)) + + dat$Total <- row_totals$Total + dat +} + diff --git a/man/add_totals_col.Rd b/man/add_totals_col.Rd new file mode 100644 index 00000000..027fced3 --- /dev/null +++ b/man/add_totals_col.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_totals.R +\name{add_totals_col} +\alias{add_totals_col} +\title{Append a totals column to a data.frame.} +\usage{ +add_totals_col(dat, na.rm = TRUE) +} +\arguments{ +\item{dat}{an input data.frame with at least one numeric column.} + +\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} +} +\value{ +Returns a data.frame with a totals column containing row-wise sums. +} +\description{ +This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +} +\examples{ +library(dplyr) # for the \%>\% pipe +mtcars \%>\% + crosstab(am, cyl) \%>\% + add_totals_col +} diff --git a/man/add_totals_row.Rd b/man/add_totals_row.Rd new file mode 100644 index 00000000..c900e47e --- /dev/null +++ b/man/add_totals_row.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_totals.R +\name{add_totals_row} +\alias{add_totals_row} +\title{Append a totals row to a data.frame.} +\usage{ +add_totals_row(dat, fill = "-", na.rm = TRUE) +} +\arguments{ +\item{dat}{an input data.frame with at least one numeric column.} + +\item{fill}{if there are more than one non-numeric columns, what string should fill the bottom row of those columns?} + +\item{na.rm}{should missing values (including NaN) be omitted from the calculations?} +} +\value{ +Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. +} +\description{ +This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +} +\examples{ +library(dplyr) # for the \%>\% pipe +mtcars \%>\% + crosstab(am, cyl) \%>\% + add_totals_row +} From 7b1792f7bbb7c1f7633435aef6e8cd72a9c94824 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sun, 19 Mar 2017 19:07:49 -0400 Subject: [PATCH 13/28] add tests for deprecated add_totals_* functions --- tests/testthat/tests-add-totals-deprecated.R | 69 ++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/testthat/tests-add-totals-deprecated.R diff --git a/tests/testthat/tests-add-totals-deprecated.R b/tests/testthat/tests-add-totals-deprecated.R new file mode 100644 index 00000000..082251c3 --- /dev/null +++ b/tests/testthat/tests-add-totals-deprecated.R @@ -0,0 +1,69 @@ +# Tests add_totals_row and add_totals_col + +library(janitor) +context("deprecated add_totals_* functions") + +library(dplyr) +dat <- data.frame(a = c(rep(c("big", "small", "big"), 3)), + b = c(1:3, 1:3, 1, 1, 1) +) +ct <- dat %>% + crosstab(a, b) + + + +test_that("totals row is correct", { + expect_equal(add_totals_row(ct), + data.frame(a = c("big", "small", "Total"), + `1` = c(4, 1, 5), + `2` = c(0, 2, 2), + `3` = c(2, 0, 2), + check.names = FALSE, + stringsAsFactors = FALSE) + ) +}) + + +test_that("totals col is correct", { + expect_equal(add_totals_col(ct), + data.frame(a = c("big", "small"), + `1` = c(4, 1), + `2` = c(0, 2), + `3` = c(2, 0), + Total = c(6, 3), + check.names = FALSE, + stringsAsFactors = TRUE) + ) +}) + + +test_that("totals row and col produce correct results when called together", { + expect_equal(ct %>% + add_totals_col %>% + add_totals_row(), + data.frame(a = c("big", "small", "Total"), + `1` = c(4, 1, 5), + `2` = c(0, 2, 2), + `3` = c(2, 0, 2), + Total = c(6, 3, 9), + check.names = FALSE, + stringsAsFactors = FALSE) + ) +}) + +test_that("order doesn't matter when totals row and col are called together", { + expect_equal(ct %>% + add_totals_col %>% + add_totals_row, + ct %>% + add_totals_row %>% + add_totals_col + ) +}) + +test_that("both functions work with a single column", { + single_col <- data_frame(a = c(as.Date("2016-01-01"), as.Date("2016-02-03")), + b = c(1, 2)) + expect_error(single_col %>% add_totals_row(), NA) # from http://stackoverflow.com/a/30068233 + expect_error(single_col %>% add_totals_row(), NA) +}) From 82b0aac3bb68c92285141c1a1bdaa83c23164221 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Mon, 20 Mar 2017 08:18:24 -0400 Subject: [PATCH 14/28] added test for adorn_crosstab on factor input --- tests/testthat/test-adorn-crosstab.R | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-adorn-crosstab.R b/tests/testthat/test-adorn-crosstab.R index e7323ab8..dcf0d12f 100644 --- a/tests/testthat/test-adorn-crosstab.R +++ b/tests/testthat/test-adorn-crosstab.R @@ -126,7 +126,16 @@ test_that("totals row and columns are correct", { }) test_that("Totals works with factor column", { - + df1 <- data.frame(x = c("big", "small"), + y = 1:2, + z = 10:11) + expect_equal( + adorn_crosstab(df1, denom = "row", show_totals = TRUE), + data.frame(x = c("big", "small", "Total"), + y = c("9.1% (1)", "15.4% (2)", "12.5% (3)"), + z = c("90.9% (10)", "84.6% (11)", "87.5% (21)"), + stringsAsFactors = FALSE + )) }) From f3b72de37fae76d21e845b0318da36698ad27617 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Mon, 20 Mar 2017 08:25:42 -0400 Subject: [PATCH 15/28] this test passes on my PC, breaking apart to see why failing on Travis --- tests/testthat/test-add-totals.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-add-totals.R b/tests/testthat/test-add-totals.R index 7d00aab6..0b6aacb1 100644 --- a/tests/testthat/test-add-totals.R +++ b/tests/testthat/test-add-totals.R @@ -114,10 +114,12 @@ test_that("na.rm value works correctly", { ) }) -test_that("add_totals respects whether input was data.frame or data_frame", { +test_that("add_totals respects if input was data.frame", { expect_equal(class(df1), class(df1 %>% add_totals())) - +}) + +test_that("add_totals respects if input was data_frame", { expect_equal(class(df1 %>% as_tibble()), class(df1 %>% as_tibble() %>% add_totals())) }) From b9d0cfab152944d901ea0990ba505cad4c9bb64b Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Tue, 21 Mar 2017 22:20:59 -0400 Subject: [PATCH 16/28] switching from as_tibble to as_data_frame in test --- tests/testthat/test-add-totals.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-add-totals.R b/tests/testthat/test-add-totals.R index 0b6aacb1..dec0fdb0 100644 --- a/tests/testthat/test-add-totals.R +++ b/tests/testthat/test-add-totals.R @@ -4,6 +4,7 @@ library(janitor) context("add_totals function") library(dplyr) + dat <- data.frame(a = c(rep(c("big", "small", "big"), 3)), b = c(1:3, 1:3, 1, 1, 1) ) @@ -120,8 +121,8 @@ test_that("add_totals respects if input was data.frame", { }) test_that("add_totals respects if input was data_frame", { - expect_equal(class(df1 %>% as_tibble()), - class(df1 %>% as_tibble() %>% add_totals())) + expect_equal(class(df1 %>% as_data_frame()), + class(df1 %>% as_data_frame() %>% add_totals())) }) test_that("error thrown if no columns past first are numeric", { From e6852b0672d5738e62a9c296404e6c5131879341 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Tue, 21 Mar 2017 22:26:07 -0400 Subject: [PATCH 17/28] tell Travis not to treat warnings as errors deprecated functions cause warnings in tests --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d92c50a0..b24929e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,10 +6,13 @@ r: - release - devel +warnings_are_errors: false + + os: - linux - osx - + cache: packages r_github_packages: From a1325bfe1ac2c8c3dff54ad94464511eab56e4ce Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Tue, 21 Mar 2017 22:42:18 -0400 Subject: [PATCH 18/28] removing Travis tests on OSX Failing with R-devel because some packages aren't available --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b24929e1..57c27d57 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ warnings_are_errors: false os: - linux - - osx +# - osx cache: packages From af6830404ddb16c54575547feb7aad4b01d72fd5 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Tue, 21 Mar 2017 22:49:04 -0400 Subject: [PATCH 19/28] fix agreement error typo --- vignettes/introduction.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index ec0255e7..6dedcca5 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -180,7 +180,7 @@ q %>% ## Exploring ### `add_totals()` -This adds a totals row and/or column to a data.frame. These functions exclude the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. +This adds a totals row and/or column to a data.frame. This function excludes the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. ```{r} mtcars %>% crosstab(am, cyl) %>% From 0bd3f505995f55d625420fec2989705ee2beb39b Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Tue, 21 Mar 2017 22:53:38 -0400 Subject: [PATCH 20/28] adding test coverage comments to PRs --- codecov.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/codecov.yml b/codecov.yml index 69cb7601..765e05d2 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1 +1,7 @@ -comment: false +comment: + layout: "reach, diff, flags, files" + behavior: default + require_changes: false # if true: only post the comment if coverage changes + require_base: no # [yes :: must have a base report to post] + require_head: yes # [yes :: must have a head report to post] + branches: null From 18053da387febee559f8300119ef0483b5b8bcc0 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Wed, 22 Mar 2017 09:08:36 -0400 Subject: [PATCH 21/28] add_totals("col") retains input factor class in 1st col also redirect add_totals_row and _col calls through add_totals() --- R/add_totals.R | 34 +++----------------- tests/testthat/test-add-totals.R | 4 +-- tests/testthat/tests-add-totals-deprecated.R | 11 +++++++ 3 files changed, 18 insertions(+), 31 deletions(-) diff --git a/R/add_totals.R b/R/add_totals.R index dabd5241..cc176741 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -19,10 +19,10 @@ add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ if("grouped_df" %in% class(dat)){ dat <- dplyr::ungroup(dat) } # grouped_df causes problems, #97 - dat[[1]] <- as.character(dat[[1]]) # for type matching when binding the word "Total" on a factor. Moved up to this line so that if only 1st col is numeric, the function errors - if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("at least one one of columns 2:n must be of class numeric")} + if(sum(unlist(lapply(dat, is.numeric))[-1]) == 0){stop("at least one one of columns 2:n must be of class numeric")} if("row" %in% which){ + dat[[1]] <- as.character(dat[[1]]) # for type matching when binding the word "Total" on a factor when adding Totals row # creates the totals row to be appended col_vec <- function(a_col, na_rm = na.rm){ if(is.numeric(a_col)){ # can't do this with if_else because it doesn't like the sum() of a character vector, even if that clause is not reached @@ -44,6 +44,7 @@ add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ # Add totals col clean_dat <- clean_names(dat) # bad names will make select_if choke row_totals <- clean_dat %>% + dplyr::select(-1) %>% # don't include the first column, even if numeric dplyr::select_if(is.numeric) %>% dplyr::transmute(Total = rowSums(., na.rm = na.rm)) @@ -73,25 +74,7 @@ add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ .Deprecated("add_totals(\"row\")") - clean_dat <- clean_names(dat) # bad names will make select_if choke - - if(ncol(dplyr::select_if(clean_dat, is.numeric)) == 0){stop("data.frame must contain at least one column of class numeric")} - - # creates the totals row to be appended - col_vec <- function(a_col, na_rm = na.rm){ - if(is.numeric(a_col)){ # can't do this with if_else because it doesn't like the sum() of a character vector, even if that clause is not reached - sum(a_col, na.rm = na_rm) - } else {fill} - } - - col_totals <- lapply(dat, col_vec) %>% - as.data.frame(stringsAsFactors = FALSE) %>% - stats::setNames(names(dat)) - - col_totals[nrow(col_totals), 1] <- "Total" # replace final row, first column with "Total" - dplyr::bind_rows(clean_dat %>% - stats::setNames(names(dat)) %>% - dplyr::mutate_at(1, as.character), col_totals) + add_totals(dat, which = "row", fill = fill, na.rm = na.rm) } @@ -112,13 +95,6 @@ add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ add_totals_col <- function(dat, na.rm = TRUE){ .Deprecated("add_totals(\"col\")") - clean_dat <- clean_names(dat) # bad names will make select_if choke - if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("input data.frame must contain at least one column of class numeric")} - row_totals <- clean_dat %>% - dplyr::select_if(is.numeric) %>% - dplyr::transmute(Total = rowSums(., na.rm = na.rm)) - - dat$Total <- row_totals$Total - dat + add_totals(dat, which = "col", fill = "-", na.rm = na.rm) } diff --git a/tests/testthat/test-add-totals.R b/tests/testthat/test-add-totals.R index dec0fdb0..c17411c7 100644 --- a/tests/testthat/test-add-totals.R +++ b/tests/testthat/test-add-totals.R @@ -33,7 +33,7 @@ test_that("totals col is correct", { `3` = c(2, 0), Total = c(6, 3), check.names = FALSE, - stringsAsFactors = FALSE) + stringsAsFactors = TRUE) ) }) @@ -84,7 +84,7 @@ test_that("numeric first column is ignored", { crosstab(cyl, gear) %>% add_totals("col"), data.frame( - cyl = c("4", "6", "8"), + cyl = c(4, 6, 8), `3` = c(1, 2, 12), `4` = c(8, 4, 0), `5` = c(2, 1, 2), diff --git a/tests/testthat/tests-add-totals-deprecated.R b/tests/testthat/tests-add-totals-deprecated.R index 082251c3..0d3636c4 100644 --- a/tests/testthat/tests-add-totals-deprecated.R +++ b/tests/testthat/tests-add-totals-deprecated.R @@ -67,3 +67,14 @@ test_that("both functions work with a single column", { expect_error(single_col %>% add_totals_row(), NA) # from http://stackoverflow.com/a/30068233 expect_error(single_col %>% add_totals_row(), NA) }) + +test_that("error thrown for no non-numeric cols after 1st position", { + df2 <- data.frame(x = c("big", "small"), + y = c("hi", "lo")) + expect_error(add_totals_row(df2), + "at least one one of columns 2:n must be of class numeric") + expect_error(add_totals_col(df2), + "at least one one of columns 2:n must be of class numeric") + + +}) \ No newline at end of file From 3db268c3ec7cb7a0925506b85bc7fdb67df690bc Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Wed, 22 Mar 2017 09:12:14 -0400 Subject: [PATCH 22/28] adjust adorn_crosstab description, check all columns 2:n are numeric --- R/adorn_crosstab.R | 9 +++++---- man/adorn_crosstab.Rd | 6 ++++-- tests/testthat/test-adorn-crosstab.R | 9 +++++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/R/adorn_crosstab.R b/R/adorn_crosstab.R index 0d19412a..31b9750e 100644 --- a/R/adorn_crosstab.R +++ b/R/adorn_crosstab.R @@ -1,7 +1,9 @@ -#' @title Add formatting to a crosstabulation table. +#' @title Add presentation formatting to a crosstabulation table. #' #' @description -#' Designed to run on the output of a call to \code{crosstab}, this adds formatting, percentage sign, Ns, totals row/column, and custom rounding to a table of numeric values. The result is no longer clean data, but it saves time in reporting table results. +#' Formats a data.frame containing counts of co-occurences of two variables (i.e., a contingency table or crosstab). Adds a mix of percentages, Ns, totals row/column, and custom rounding to a table of integer counts, in the style of a Microsoft Excel PivotTable. The result is no longer clean data, but is an audience-friendly way to report results. +#' +#' Designed to run on the output of a call to \code{janitor::crosstab}, but can be called on any data.frame containing a contingency table, e.g., the result of \code{dplyr::count()} followed by \code{tidyr::spread()}. #' #' @param dat a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to \code{crosstab} that included the argument \code{percent = "none"}. #' @param denom the denominator to use for calculating percentages. One of "row", "col", or "all". @@ -32,8 +34,7 @@ adorn_crosstab <- function(dat, denom = "row", show_n = TRUE, digits = 1, show_t # some input checks if(! rounding %in% c("half to even", "half up")){stop("'rounding' must be one of 'half to even' or 'half up'")} dat[[1]] <- as.character(dat[[1]]) # for type matching when binding the word "Total" on a factor. Moved up to this line so that if only 1st col is numeric, the function errors - if(sum(unlist(lapply(dat, is.numeric))) == 0){stop("input data.frame must contain at least one column of class numeric")} # changed from select_if as it can't handle numbers as col names - + if(sum(!unlist(lapply(dat, is.numeric))[-1]) > 0){stop("all columns 2:n in input data.frame must be of class numeric")} showing_col_totals <- (show_totals & denom %in% c("col", "all")) showing_row_totals <- (show_totals & denom %in% c("row", "all")) diff --git a/man/adorn_crosstab.Rd b/man/adorn_crosstab.Rd index a659c83a..e0548449 100644 --- a/man/adorn_crosstab.Rd +++ b/man/adorn_crosstab.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/adorn_crosstab.R \name{adorn_crosstab} \alias{adorn_crosstab} -\title{Add formatting to a crosstabulation table.} +\title{Add presentation formatting to a crosstabulation table.} \usage{ adorn_crosstab(dat, denom = "row", show_n = TRUE, digits = 1, show_totals = FALSE, rounding = "half to even") @@ -24,7 +24,9 @@ adorn_crosstab(dat, denom = "row", show_n = TRUE, digits = 1, Returns a data.frame. } \description{ -Designed to run on the output of a call to \code{crosstab}, this adds formatting, percentage sign, Ns, totals row/column, and custom rounding to a table of numeric values. The result is no longer clean data, but it saves time in reporting table results. +Formats a data.frame containing counts of co-occurences of two variables (i.e., a contingency table or crosstab). Adds a mix of percentages, Ns, totals row/column, and custom rounding to a table of integer counts, in the style of a Microsoft Excel PivotTable. The result is no longer clean data, but is an audience-friendly way to report results. + +Designed to run on the output of a call to \code{janitor::crosstab}, but can be called on any data.frame containing a contingency table, e.g., the result of \code{dplyr::count()} followed by \code{tidyr::spread()}. } \examples{ library(dplyr) # for the \%>\% pipe diff --git a/tests/testthat/test-adorn-crosstab.R b/tests/testthat/test-adorn-crosstab.R index dcf0d12f..70d5edd3 100644 --- a/tests/testthat/test-adorn-crosstab.R +++ b/tests/testthat/test-adorn-crosstab.R @@ -142,4 +142,13 @@ test_that("Totals works with factor column", { test_that("bad inputs are caught", { expect_error(adorn_crosstab(source1, rounding = "up"), "'rounding' must be one of 'half to even' or 'half up'") expect_error(adorn_crosstab(source1, denom = "roww"), "'denom' must be one of 'row', 'col', or 'all'") +}) + +test_that("error thrown if any of columns 2:n in input are not numeric", { + df2 <- data.frame(x = c("big", "small"), + y = c(1:2), + z = c("hi", "lo"), + stringsAsFactors = FALSE) + expect_error(adorn_crosstab(df2), + "all columns 2:n in input data.frame must be of class numeric") }) \ No newline at end of file From c79e6cb318f6f4c6aa686085e5b000a83a23d052 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Wed, 22 Mar 2017 09:54:11 -0400 Subject: [PATCH 23/28] fix small typo in adorn_crosstab --- R/adorn_crosstab.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/adorn_crosstab.R b/R/adorn_crosstab.R index 31b9750e..0cd2e682 100644 --- a/R/adorn_crosstab.R +++ b/R/adorn_crosstab.R @@ -39,7 +39,7 @@ adorn_crosstab <- function(dat, denom = "row", show_n = TRUE, digits = 1, show_t showing_col_totals <- (show_totals & denom %in% c("col", "all")) showing_row_totals <- (show_totals & denom %in% c("row", "all")) - complete_n <- complete_n <- sum(dat[, -1], na.rm = TRUE) # capture for percent calcs before any totals col/row is added + complete_n <- sum(dat[, -1], na.rm = TRUE) # capture for percent calcs before any totals col/row is added if(showing_col_totals){ dat <- add_totals(dat, "col") } if(showing_row_totals){ dat <- add_totals(dat, "row") } From bf9b39f13dfd0c813318d53c30e4a461b6680aca Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Wed, 22 Mar 2017 09:55:32 -0400 Subject: [PATCH 24/28] ns_to_percents works on a data.frame with just one numeric col closes #89 --- R/ns_to_percents.R | 10 +++++----- tests/testthat/test-ns-to-percents.R | 9 +++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/R/ns_to_percents.R b/R/ns_to_percents.R index 59f66217..235c25e5 100644 --- a/R/ns_to_percents.R +++ b/R/ns_to_percents.R @@ -38,13 +38,13 @@ ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL){ if(denom == "row"){ - row_sum <- rowSums(dat[, numeric_cols], na.rm = na.rm) - dat[, numeric_cols] <- dat[, numeric_cols] / row_sum + row_sum <- rowSums(dat[numeric_cols], na.rm = na.rm) + dat[, numeric_cols] <- dat[numeric_cols] / row_sum } else if(denom == "col"){ - col_sum <- colSums(dat[, numeric_cols], na.rm = na.rm) - dat[, numeric_cols] <- sweep(dat[, numeric_cols], 2, col_sum,`/`) # from http://stackoverflow.com/questions/9447801/dividing-columns-by-colsums-in-r + col_sum <- colSums(dat[numeric_cols], na.rm = na.rm) + dat[, numeric_cols] <- sweep(dat[numeric_cols], 2, col_sum,`/`) # from http://stackoverflow.com/questions/9447801/dividing-columns-by-colsums-in-r } else if(denom == "all"){ - dat[, numeric_cols] <- dat[, numeric_cols] / complete_n + dat[numeric_cols] <- dat[numeric_cols] / complete_n } dat diff --git a/tests/testthat/test-ns-to-percents.R b/tests/testthat/test-ns-to-percents.R index c3decc5b..28634433 100644 --- a/tests/testthat/test-ns-to-percents.R +++ b/tests/testthat/test-ns-to-percents.R @@ -87,4 +87,13 @@ test_that("override value total_n functions correctly", { cbind(data.frame(cyl = c(4, 6, 8)), ns_to_percents(source1, denom = "all")[, -1] / 10) # divide by 10 because the mtcars n = 32 ) +}) + +test_that("works with a single numeric column per #89", { + dat <- data.frame(Operation = c("Login", "Posted", "Deleted"), `Total Count` = c(5, 25, 40), check.names = FALSE) + expect_equal(dat %>% ns_to_percents("col"), + data.frame(Operation = c("Login", "Posted", "Deleted"), + `Total Count` = c(5/70, 25/70, 40/70), + check.names = FALSE) + ) }) \ No newline at end of file From 83b4df5ce17035c763107a591f56c9888de6e8f1 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Wed, 29 Mar 2017 21:41:53 -0400 Subject: [PATCH 25/28] rename add_totals() to adorn_totals() --- NAMESPACE | 2 +- R/add_totals.R | 16 +++++------ R/adorn_crosstab.R | 4 +-- R/ns_to_percents.R | 2 +- man/add_totals_col.Rd | 2 +- man/add_totals_row.Rd | 2 +- man/{add_totals.Rd => adorn_totals.Rd} | 8 +++--- man/ns_to_percents.Rd | 2 +- tests/testthat/test-add-totals.R | 38 +++++++++++++------------- vignettes/introduction.Rmd | 4 +-- 10 files changed, 40 insertions(+), 40 deletions(-) rename man/{add_totals.Rd => adorn_totals.Rd} (86%) diff --git a/NAMESPACE b/NAMESPACE index aaefc9a6..1f6622b7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,10 +4,10 @@ S3method(crosstab,data.frame) S3method(crosstab,default) S3method(tabyl,data.frame) S3method(tabyl,default) -export(add_totals) export(add_totals_col) export(add_totals_row) export(adorn_crosstab) +export(adorn_totals) export(clean_names) export(convert_to_NA) export(crosstab) diff --git a/R/add_totals.R b/R/add_totals.R index cc176741..ab8eb8c2 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -13,10 +13,10 @@ #' library(dplyr) # for the %>% pipe #' mtcars %>% #' crosstab(am, cyl) %>% -#' add_totals() +#' adorn_totals() -add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ +adorn_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ if("grouped_df" %in% class(dat)){ dat <- dplyr::ungroup(dat) } # grouped_df causes problems, #97 if(sum(unlist(lapply(dat, is.numeric))[-1]) == 0){stop("at least one one of columns 2:n must be of class numeric")} @@ -58,7 +58,7 @@ add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ #' @title Append a totals row to a data.frame. #' #' @description -#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +#' This function is deprecated, use \code{adorn_totals} instead. #' #' @param dat an input data.frame with at least one numeric column. #' @param fill if there are more than one non-numeric columns, what string should fill the bottom row of those columns? @@ -73,15 +73,15 @@ add_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE){ add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ - .Deprecated("add_totals(\"row\")") - add_totals(dat, which = "row", fill = fill, na.rm = na.rm) + .Deprecated("adorn_totals(\"row\")") + adorn_totals(dat, which = "row", fill = fill, na.rm = na.rm) } #' @title Append a totals column to a data.frame. #' #' @description -#' This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +#' This function is deprecated, use \code{adorn_totals} instead. #' #' @param dat an input data.frame with at least one numeric column. #' @param na.rm should missing values (including NaN) be omitted from the calculations? @@ -94,7 +94,7 @@ add_totals_row <- function(dat, fill = "-", na.rm = TRUE){ #' add_totals_col add_totals_col <- function(dat, na.rm = TRUE){ - .Deprecated("add_totals(\"col\")") - add_totals(dat, which = "col", fill = "-", na.rm = na.rm) + .Deprecated("adorn_totals(\"col\")") + adorn_totals(dat, which = "col", fill = "-", na.rm = na.rm) } diff --git a/R/adorn_crosstab.R b/R/adorn_crosstab.R index 0cd2e682..fc37b496 100644 --- a/R/adorn_crosstab.R +++ b/R/adorn_crosstab.R @@ -41,8 +41,8 @@ adorn_crosstab <- function(dat, denom = "row", show_n = TRUE, digits = 1, show_t complete_n <- sum(dat[, -1], na.rm = TRUE) # capture for percent calcs before any totals col/row is added - if(showing_col_totals){ dat <- add_totals(dat, "col") } - if(showing_row_totals){ dat <- add_totals(dat, "row") } + if(showing_col_totals){ dat <- adorn_totals(dat, "col") } + if(showing_row_totals){ dat <- adorn_totals(dat, "row") } n_col <- ncol(dat) percs <- ns_to_percents(dat, denom, total_n = complete_n) # last argument only gets used in the "all" case = no harm in passing otherwise diff --git a/R/ns_to_percents.R b/R/ns_to_percents.R index 235c25e5..c5b8abd8 100644 --- a/R/ns_to_percents.R +++ b/R/ns_to_percents.R @@ -19,7 +19,7 @@ #' # when total_n is needed #' mtcars %>% #' crosstab(am, cyl) %>% -#' add_totals("row") %>% # add a totals row that should not be included in the denominator +#' adorn_totals("row") %>% # add a totals row that should not be included in the denominator #' ns_to_percents(denom = "all", total_n = nrow(mtcars)) # specify correct denominator ns_to_percents <- function(dat, denom = "row", na.rm = TRUE, total_n = NULL){ diff --git a/man/add_totals_col.Rd b/man/add_totals_col.Rd index 027fced3..d05b955a 100644 --- a/man/add_totals_col.Rd +++ b/man/add_totals_col.Rd @@ -15,7 +15,7 @@ add_totals_col(dat, na.rm = TRUE) Returns a data.frame with a totals column containing row-wise sums. } \description{ -This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +This function is deprecated, use \code{adorn_totals} instead. } \examples{ library(dplyr) # for the \%>\% pipe diff --git a/man/add_totals_row.Rd b/man/add_totals_row.Rd index c900e47e..2b19f83c 100644 --- a/man/add_totals_row.Rd +++ b/man/add_totals_row.Rd @@ -17,7 +17,7 @@ add_totals_row(dat, fill = "-", na.rm = TRUE) Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. } \description{ -This function excludes the first column of the input data.frame, assuming it's a descriptive variable not to be summed. It also excludes other non-numeric columns. +This function is deprecated, use \code{adorn_totals} instead. } \examples{ library(dplyr) # for the \%>\% pipe diff --git a/man/add_totals.Rd b/man/adorn_totals.Rd similarity index 86% rename from man/add_totals.Rd rename to man/adorn_totals.Rd index bfc50005..e968af05 100644 --- a/man/add_totals.Rd +++ b/man/adorn_totals.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/add_totals.R -\name{add_totals} -\alias{add_totals} +\name{adorn_totals} +\alias{adorn_totals} \title{Append a totals row and/or column to a data.frame.} \usage{ -add_totals(dat, which = c("row", "col"), fill = "-", na.rm = TRUE) +adorn_totals(dat, which = c("row", "col"), fill = "-", na.rm = TRUE) } \arguments{ \item{dat}{an input data.frame with at least one numeric column.} @@ -25,5 +25,5 @@ This function excludes the first column of the input data.frame, assuming it's a library(dplyr) # for the \%>\% pipe mtcars \%>\% crosstab(am, cyl) \%>\% - add_totals() + adorn_totals() } diff --git a/man/ns_to_percents.Rd b/man/ns_to_percents.Rd index d7794dd6..22ea9f60 100644 --- a/man/ns_to_percents.Rd +++ b/man/ns_to_percents.Rd @@ -30,6 +30,6 @@ mtcars \%>\% # when total_n is needed mtcars \%>\% crosstab(am, cyl) \%>\% - add_totals("row") \%>\% # add a totals row that should not be included in the denominator + adorn_totals("row") \%>\% # add a totals row that should not be included in the denominator ns_to_percents(denom = "all", total_n = nrow(mtcars)) # specify correct denominator } diff --git a/tests/testthat/test-add-totals.R b/tests/testthat/test-add-totals.R index c17411c7..30294521 100644 --- a/tests/testthat/test-add-totals.R +++ b/tests/testthat/test-add-totals.R @@ -1,7 +1,7 @@ -# Tests add_totals_row and add_totals_col +# Tests adorn_totals and deprecated add_totals_row, add_totals_col library(janitor) -context("add_totals function") +context("adorn_totals & deprecated add_totals functions") library(dplyr) @@ -14,7 +14,7 @@ ct <- dat %>% test_that("totals row is correct", { - expect_equal(add_totals(ct, "row"), + expect_equal(adorn_totals(ct, "row"), data.frame(a = c("big", "small", "Total"), `1` = c(4, 1, 5), `2` = c(0, 2, 2), @@ -26,7 +26,7 @@ test_that("totals row is correct", { test_that("totals col is correct", { - expect_equal(add_totals(ct, "col"), + expect_equal(adorn_totals(ct, "col"), data.frame(a = c("big", "small"), `1` = c(4, 1), `2` = c(0, 2), @@ -40,7 +40,7 @@ test_that("totals col is correct", { test_that("totals row and col produce correct results when called together", { expect_equal(ct %>% - add_totals(c("row", "col")), + adorn_totals(c("row", "col")), data.frame(a = c("big", "small", "Total"), `1` = c(4, 1, 5), `2` = c(0, 2, 2), @@ -53,18 +53,18 @@ test_that("totals row and col produce correct results when called together", { test_that("order doesn't matter when row and col are called together", { expect_equal(ct %>% - add_totals(c("row", "col")), + adorn_totals(c("row", "col")), ct %>% - add_totals(c("col", "row")) + adorn_totals(c("col", "row")) ) }) test_that("both functions work with a single column", { single_col <- data_frame(a = c(as.Date("2016-01-01"), as.Date("2016-02-03")), b = c(1, 2)) - expect_error(single_col %>% add_totals("row"), NA) # from http://stackoverflow.com/a/30068233 - expect_error(single_col %>% add_totals("col"), NA) - expect_error(single_col %>% add_totals(c("col", "row")), NA) + expect_error(single_col %>% adorn_totals("row"), NA) # from http://stackoverflow.com/a/30068233 + expect_error(single_col %>% adorn_totals("col"), NA) + expect_error(single_col %>% adorn_totals(c("col", "row")), NA) }) @@ -82,7 +82,7 @@ dat <- data.frame( test_that("numeric first column is ignored", { expect_equal(mtcars %>% crosstab(cyl, gear) %>% - add_totals("col"), + adorn_totals("col"), data.frame( cyl = c(4, 6, 8), `3` = c(1, 2, 12), @@ -99,13 +99,13 @@ df1 <- data.frame(x = c(1, 2), y = c(NA, 4)) test_that("grouped_df gets ungrouped and succeeds", { ct <- mtcars %>% group_by(cyl, gear) %>% tally() %>% tidyr::spread(gear, n) - expect_equal(ct %>% add_totals(), - ct %>% ungroup() %>% add_totals + expect_equal(ct %>% adorn_totals(), + ct %>% ungroup() %>% adorn_totals() ) }) test_that("na.rm value works correctly", { - expect_equal(df1 %>% add_totals(na.rm = FALSE), + expect_equal(df1 %>% adorn_totals(na.rm = FALSE), data.frame( x = c("1", "2", "Total"), y = c(NA, 4, NA), @@ -117,24 +117,24 @@ test_that("na.rm value works correctly", { test_that("add_totals respects if input was data.frame", { expect_equal(class(df1), - class(df1 %>% add_totals())) + class(df1 %>% adorn_totals())) }) test_that("add_totals respects if input was data_frame", { expect_equal(class(df1 %>% as_data_frame()), - class(df1 %>% as_data_frame() %>% add_totals())) + class(df1 %>% as_data_frame() %>% adorn_totals())) }) test_that("error thrown if no columns past first are numeric", { df2 <- data.frame(x = c("big", "small"), y = c("hi", "lo")) - expect_error(add_totals(df2, "col"), + expect_error(adorn_totals(df2, "col"), "at least one one of columns 2:n must be of class numeric") # Add a test where only the first column is numeric df3 <- data.frame(x = 1:2, y = c("hi", "lo")) - expect_error(add_totals(df3), + expect_error(adorn_totals(df3), "at least one one of columns 2:n must be of class numeric") }) @@ -148,7 +148,7 @@ test_that("works with non-numeric columns mixed in; fill character specification stringsAsFactors = FALSE ) - expect_equal(mixed %>% add_totals(fill = "*"), + expect_equal(mixed %>% adorn_totals(fill = "*"), data.frame(a = c("1", "2", "3", "Total"), b = c("x", "y", "z", "*"), c = c(5, 6, 7, 18), diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 6dedcca5..c9255b22 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -179,12 +179,12 @@ q %>% ## Exploring -### `add_totals()` +### `adorn_totals()` This adds a totals row and/or column to a data.frame. This function excludes the first column of the input data.frame, assuming that it contains a descriptive variable not to be summed. ```{r} mtcars %>% crosstab(am, cyl) %>% - add_totals() + adorn_totals() ``` ### Convert a data.frame of numbers to percentages with `ns_to_percents()` From c23f66911ebf809b9b3aec20b3f529ffe905ac3d Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Wed, 29 Mar 2017 21:49:06 -0400 Subject: [PATCH 26/28] add link to dplyr select_if on bad names issue --- R/add_totals.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_totals.R b/R/add_totals.R index ab8eb8c2..39dc204f 100644 --- a/R/add_totals.R +++ b/R/add_totals.R @@ -42,7 +42,7 @@ adorn_totals <- function(dat, which = c("row", "col"), fill = "-", na.rm = TRUE) if("col" %in% which){ # Add totals col - clean_dat <- clean_names(dat) # bad names will make select_if choke + clean_dat <- clean_names(dat) # bad names will make select_if choke; this may get fixed, see https://github.com/hadley/dplyr/issues/2243 but work around it for now w/ this line row_totals <- clean_dat %>% dplyr::select(-1) %>% # don't include the first column, even if numeric dplyr::select_if(is.numeric) %>% From d57477da1adca58b912acc3e4768bccaf939786b Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Wed, 29 Mar 2017 22:06:34 -0400 Subject: [PATCH 27/28] update NEWS with adorn_totals() features --- NEWS.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 38dbae95..040aedb7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,19 +5,22 @@ NEWS ## Breaking changes * The first argument of `adorn_crosstab()` is now "dat" instead of "crosstab" (since the function can be called on any data.frame, not just a result of `crosstab()`) -* The functions `add_totals_row` and `add_totals_col` were combined into a single function, `add_totals()`. [(#57)](https://github.com/sfirke/janitor/issues/57) +* The functions `add_totals_row` and `add_totals_col` were combined into a single function, `adorn_totals()`. [(#57)](https://github.com/sfirke/janitor/issues/57). The `add_totals_` functions are now deprecated and should not be used. ## Features + ### Major ### Minor -* `add_totals_row()`, `add_totals_col()`, and `ns_to_percents()` can now be called on data.frames that have non-numeric columns beyond the first one (they will be ignored) [(#57)](https://github.com/sfirke/janitor/issues/57) +* `adorn_totals()` and `ns_to_percents()` can now be called on data.frames that have non-numeric columns beyond the first one (they will be ignored) [(#57)](https://github.com/sfirke/janitor/issues/57) +* `adorn_totals("col")` retains factor class in 1st column if that was the input ## Bug fixes * Long variable names with spaces no longer break `tabyl()` and `crosstab()` [(#87)](https://github.com/sfirke/janitor/issues/87) * `clean_names()` now handles leading spaces [(#85)](https://github.com/sfirke/janitor/issues/85) +* `adorn_totals()` now works on a grouped tibble [(#97)](https://github.com/sfirke/janitor/issues/97) # janitor 0.2.1 (Release date: 2016-10-30) From 6fd5cf741b6013f6fa29ef2dacbde12ee4cc7cd7 Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Wed, 29 Mar 2017 22:11:35 -0400 Subject: [PATCH 28/28] note in NEWS that adorn_crosstab works on a 2-column df describes #89 --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 040aedb7..af8a5c8b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,7 @@ NEWS ## Bug fixes * Long variable names with spaces no longer break `tabyl()` and `crosstab()` [(#87)](https://github.com/sfirke/janitor/issues/87) * `clean_names()` now handles leading spaces [(#85)](https://github.com/sfirke/janitor/issues/85) +* `adorn_crosstab()` and `ns_to_percents()` work on a 2-column data.frame [(#89)](https://github.com/sfirke/janitor/issues/89) * `adorn_totals()` now works on a grouped tibble [(#97)](https://github.com/sfirke/janitor/issues/97) # janitor 0.2.1 (Release date: 2016-10-30)