From 1796d8ac925c48d14d5201745abab2b7af8b817d Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sun, 23 Oct 2016 18:48:03 -0400 Subject: [PATCH] changed to data.frame instead of as.data.frame which allows check.names = FALSE, which - I hope - closes #76 --- R/crosstab.R | 30 +++++++++++++++--------------- R/tabyl.R | 7 ++++--- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/R/crosstab.R b/R/crosstab.R index 1839c4f5..829804f0 100644 --- a/R/crosstab.R +++ b/R/crosstab.R @@ -44,7 +44,7 @@ crosstab <- function(...) UseMethod("crosstab") #' @rdname crosstab #' @export crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE, ...){ - + if(!mode(vec1) %in% c("logical", "numeric", "character", "list") | is.matrix(vec1)){ stop("vec1 must be a vector of type logical, numeric, character, list, or factor")} if(!mode(vec2) %in% c("logical", "numeric", "character","list") | is.matrix(vec2)){ @@ -53,20 +53,20 @@ crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE, ...){ if(! percent %in% c("none", "row", "col", "all")){stop("'percent' must be one of 'none', 'row', 'col', or 'all'")} if(length(vec1) != length(vec2)){ stop("the two vectors are not the same length")} - + dat <- data.frame(vec1 = vec1, vec2 = vec2, stringsAsFactors = FALSE) dat_col_names <- names(dat) - + if(is.null(names(vec1))) { var_name <- deparse(substitute(vec1)) } else { var_name <- names(vec1) } - - + + if(!show_na){ dat <- dat[!is.na(dat[[1]]) & !is.na(dat[[2]]), ] } @@ -75,15 +75,15 @@ crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE, ...){ tabl <- dat %>% dplyr::count_(dat_col_names) %>% dplyr::ungroup() - - + + # replace NA with string NA_ in vec2 to avoid invalid col name after spreading # if this col is a factor, need to add that level to the factor if(is.factor(tabl[[2]])){ levels(tabl[[2]]) <- c(levels(tabl[[2]]), "NA_") } tabl[2][is.na(tabl[2])] <- "NA_" - + # spread to wide, ungroup() for cleanliness of result, and rename 1st col result <- tabl %>% tidyr::spread_(dat_col_names[[2]], "n", fill = 0) %>% @@ -106,7 +106,7 @@ crosstab.data.frame <- function(.data, ...){ # collect dots dots <- as.list(substitute(list(...)))[-1L] # n <- length(dots) - + # select columns from .data columns <- dots[1:2] if(dots[[1]] == dots[[2]]){stop("the same column name is specified for both input variables. Use tabyl() for tabulating a single variable")} @@ -114,19 +114,19 @@ crosstab.data.frame <- function(.data, ...){ x <- list() x[[deparse(columns[[1]])]] <- .data[,deparse(columns[[1]])] x[[deparse(columns[[2]])]] <- .data[,deparse(columns[[2]])] - x <- as.data.frame(x, - stringsAsFactors = FALSE, - check.names = FALSE) # preserve bad input names + x <- data.frame(x, + stringsAsFactors = FALSE, + check.names = FALSE) # preserve bad input names # create args list to use with do.call arguments <- list() - + if(n > 2) arguments <- dots[3:n] - + arguments$vec1 <- x[1] arguments$vec2 <- x[2] do.call(crosstab.default, args = arguments) - + } diff --git a/R/tabyl.R b/R/tabyl.R index 89f47510..47e28879 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -108,9 +108,10 @@ tabyl.data.frame <- function(.data, ...){ columns <- dots[1] x <- list() x[[deparse(columns[[1]])]] <- .data[, deparse(columns[[1]])] - x <- as.data.frame(x, - stringsAsFactors = is.factor(x[[1]]), - check.names = FALSE) # preserve bad input names + + x <- data.frame(x, + stringsAsFactors = is.factor(x[[1]]), + check.names = FALSE) # preserve bad input names # create args list to use with do.call arguments <- list()