sfirke · sfirke · Jul 8, 2016 · Jul 7, 2016 · Jul 7, 2016 · Jul 7, 2016
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 .Rhistory
 .RData
 inst/doc
+janitor.Rproj
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(crosstab,data.frame)
+S3method(crosstab,default)
 export(clean_names)
 export(convert_to_NA)
 export(crosstab)

diff --git a/R/crosstab.R b/R/crosstab.R
@@ -8,58 +8,110 @@
 #' @param percent which grouping to use for percentages, if desired (defaults to counts).  Must be one of "none", "row", "col", or "all".
 #' @param show_na should cases where either variable is NA be included?
 #' @return Returns a data.frame (actually a \code{tbl_df}) with the frequencies of the crosstabulated variables.
-#' @export
+
 #' @examples
 #' crosstab(mtcars$cyl, mtcars$gear)
 #' crosstab(mtcars$cyl, mtcars$gear, "row")
 #'
 
 # Crosstab table of two variables
-# Take two vectors and one of "none", "row", "col", and "full" to calculate %s
+# Take two vectors and one of "none", "row", "col", and "all" to calculate %s
 # Could also take a data.frame and two vector names, for pipeline, but this seems simpler
-crosstab <- function(vec1, vec2, percent = "none", show_na = TRUE){
-  if(!mode(vec1) %in% c("logical", "numeric", "character")){
-    stop("vec1 must be a vector of type logical, numeric, character, or factor")}
-  if(!mode(vec2) %in% c("logical", "numeric", "character")){
-    stop("vec2 must be a vector of type logical, numeric, character, or factor")}
+#' @export
+crosstab <- function(...) UseMethod("crosstab")
+
+#' @inheritParams crosstab
+#' @describeIn crosstab Create a crosstab from two vectors,
+#' displaying either frequencies or percentages calculated by row, column, or overall.
+#' Vectors don't have to be from the same data.frame, but typically are.
+#' @export
+crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE, ...){
+
+
+  if(!mode(vec1) %in% c("logical", "numeric", "character", "list")){
+    stop("vec1 must be a vector of type logical, numeric, character, list, or factor")}
+  if(!mode(vec2) %in% c("logical", "numeric", "character","list")){
+    stop("vec2 must be a vector of type logical, numeric, character, list, or factor")}
   if(length(vec1) != length(vec2)){ stop("the two vectors are not the same length")}
 
-  dat <- data.frame(vec1, vec2, stringsAsFactors = FALSE)
-  var_name <- deparse(substitute(vec1))
+  dat <- data.frame(vec1=vec1, vec2=vec2, stringsAsFactors = FALSE)
+
+  dat_col_names <- names(dat)
+
+  if (is.null(names(vec1))) {
+    var_name <- deparse(substitute(vec1))
+  } else {
+    var_name <- dat_col_names[[1]]
+  }
+
+
 
   if(!show_na){
-    dat <- dat[!is.na(dat$vec1) & !is.na(dat$vec2), ]
+    dat <- dat[!is.na(dat[[1]]) & !is.na(dat[[2]]), ]
   }
-  
+
   # create long data.frame with initial counts
   tabl <- dat %>%
-    dplyr::count(vec1, vec2) %>%
+    dplyr::count_(dat_col_names) %>%
     dplyr::ungroup()
-  
+
   # calculate percentages, if specified
   if(percent == "row"){
     tabl <- tabl %>%
-      dplyr::group_by(vec1) %>%
+      dplyr::group_by_(dat_col_names[[1]]) %>%
       dplyr::mutate(n = n / sum(n, na.rm = TRUE))
   } else if (percent == "col"){
     tabl <- tabl %>%
-      dplyr::group_by(vec2) %>%
+      dplyr::group_by_(dat_col_names[[2]]) %>%
       dplyr::mutate(n = n / sum(n, na.rm = TRUE))
   } else if (percent == "all"){
     tabl <- tabl %>%
       dplyr::mutate(n = n / sum(n, na.rm = TRUE))
   }
-  
+
   # replace NA with string NA in vec2 to avoid invalid col name after spreading
-    # if this col is a factor, need to add that level to the factor
-  if(is.factor(tabl$vec2)){
-    levels(tabl$vec2) <- c(levels(tabl$vec2), "NA")
+  # if this col is a factor, need to add that level to the factor
+  if(is.factor(tabl[[2]])){
+    levels(tabl[[2]]) <- c(levels(tabl[[2]]), "NA")
   }
-  tabl$vec2[is.na(tabl$vec2)] <- "NA"
+  tabl[2][is.na(tabl[2])] <- "NA"
 
   # spread to wide, ungroup() for cleanliness of result, and rename 1st col
   tabl %>%
-    tidyr::spread(vec2, n) %>%
+    tidyr::spread_(dat_col_names[[2]], "n") %>%
     dplyr::ungroup() %>%
     stats::setNames(., c(var_name, names(.)[-1]))
 }
+
+#' @inheritParams crosstab.default
+#' @param .data a data.frame.
+#' @param ... arguments passed to crosstab.default.
+#' @describeIn crosstab Create a crosstab from a data.frame,
+#' displaying either frequencies or percentages calculated by row, column, or overall.
+#' Vectors don't have to be from the same data.frame, but typically are.
+#' @export
+crosstab.data.frame <- function(.data, ...){
+  # collect dots
+  dots <- as.list(substitute(list(...)))[-1L] #
+  n <- length(dots)
+
+  # select columns from .data
+  columns <- dots[1:2]
+
+  x <- list()
+  x[[deparse(columns[[1]])]] <- .data[,deparse(columns[[1]])]
+  x[[deparse(columns[[2]])]] <- .data[,deparse(columns[[2]])]
+  x <- as.data.frame(x)
+
+  # create args list to use with do.call
+  arguments <- list()
+
+  if(n > 2) arguments <- dots[3:n]
+
+  arguments$vec1 <- x[1]
+  arguments$vec2 <- x[2]
+
+  do.call(crosstab.default,
+          args = arguments)
+
+}
diff --git a/man/crosstab.Rd b/man/crosstab.Rd
diff --git a/tests/testthat/test-crosstab.R b/tests/testthat/test-crosstab.R
@@ -13,10 +13,10 @@ dat <- data.frame(
 )
 
 test_that("bad inputs are handled properly", {
-  expect_error(crosstab(list(1, 2), dat$v1), "vec1 must be a vector of type logical, numeric, character, or factor")
-  expect_error(crosstab(dat$v1, list(1, 2)), "vec2 must be a vector of type logical, numeric, character, or factor")
+  #expect_error(crosstab(list(1, 2), dat$v1), "vec1 must be a vector of type logical, numeric, character, or factor")
+  #expect_error(crosstab(dat$v1, list(1, 2)), "vec2 must be a vector of type logical, numeric, character, or factor")
   expect_error(crosstab(c(1, 1), c(1)), "the two vectors are not the same length")
-  })
+})
 
 # simple crosstab w/o NAs
 res <- crosstab(dat$v2, dat$v4)
@@ -38,14 +38,14 @@ test_that("percentages are correct", {
   expect_equal(res_row[[2]], c(0.5, 1/3, NA))
   expect_equal(res_row[[3]], c(NA, 0.5, NA))
   expect_equal(res_row[[4]], c(0.5, 1/6, 1))
-  
+
   res_col <- crosstab(dat$v2, dat$v4, "col")
   expect_equal(res_col[[2]], c(1/3, 2/3, NA))
   expect_equal(res_col[[3]], c(NA, 1, NA))
   expect_equal(res_col[[4]], c(1/3, 1/3, 1/3))
-  
+
   res_all <- crosstab(dat$v2, dat$v4, "all")
-  expect_equal(res_all[, 2:4],tbl_df(res[, 2:4]/9))
+  expect_equal(as.data.frame(res_all[, 2:4]),as.data.frame(res[, 2:4]/9))
 })
 
 z <- crosstab(dat$v3, dat$v1)
@@ -69,3 +69,42 @@ test_that("factor levels order correctly", {
   expect_equal(as.character(vv[[1]]), c("hi", "med", "lo", NA))
   expect_true(is.factor(vv[[1]]))
 })
+
+test_that("crosstab.data.frame dispatches", {
+
+  z <- crosstab(dat, v3, v1)
+
+  expect_equal(z[[1]], as.factor(c("a", "b", NA)))
+  expect_equal(z[[2]], c(1, 1, NA))
+  expect_equal(z[[3]], c(1, 2, 1))
+  expect_equal(z[[4]], c(2, NA, NA))
+  expect_equal(z[[5]], c(NA, NA, 1))
+  expect_equal(names(z), c("v3", "hi", "med", "lo", "NA"))
+})
+
+test_that("crosstab.data.frame is pipeable", {
+  z <- dat %>%
+    crosstab(v3, v1)
+
+    expect_equal(z[[1]], as.factor(c("a", "b", NA)))
+    expect_equal(z[[2]], c(1, 1, NA))
+    expect_equal(z[[3]], c(1, 2, 1))
+    expect_equal(z[[4]], c(2, NA, NA))
+    expect_equal(z[[5]], c(NA, NA, 1))
+    expect_equal(names(z), c("v3", "hi", "med", "lo", "NA"))
+})
+
+test_that("crosstab.data.frame renders percentages are correct", {
+  res_row <- crosstab(dat, v2, v4, "row")
+  expect_equal(res_row[[2]], c(0.5, 1/3, NA))
+  expect_equal(res_row[[3]], c(NA, 0.5, NA))
+  expect_equal(res_row[[4]], c(0.5, 1/6, 1))
+
+  res_col <- crosstab(dat, v2, v4, "col")
+  expect_equal(res_col[[2]], c(1/3, 2/3, NA))
+  expect_equal(res_col[[3]], c(NA, 1, NA))
+  expect_equal(res_col[[4]], c(1/3, 1/3, 1/3))
+
+  res_all <- crosstab(dat, v2, v4, "all")
+  expect_equal(as.data.frame(res_all[, 2:4]),as.data.frame(res[, 2:4]/9))
+})