From d68f64d8472f96476566fa011ceb8b0930a97ee2 Mon Sep 17 00:00:00 2001
From: Chris Haid <chaid@kippchicago.org>
Date: Thu, 7 Jul 2016 14:21:16 -0500
Subject: [PATCH 1/8] Added janitor.Rproj to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 09a72cbe..1411f6ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .Rhistory
 .RData
 inst/doc
+janitor.Rproj

From 2e08f44a055c09a8616a9dd9080a48e4f14789fb Mon Sep 17 00:00:00 2001
From: Chris Haid <chaid@kippchicago.org>
Date: Thu, 7 Jul 2016 14:21:58 -0500
Subject: [PATCH 2/8] Created S3 dispatch for crosstab.  It can be pipelined
 now.

---
 R/crosstab.R | 85 +++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 64 insertions(+), 21 deletions(-)

diff --git a/R/crosstab.R b/R/crosstab.R
index f9a8f3d7..ee08d8a7 100644
--- a/R/crosstab.R
+++ b/R/crosstab.R
@@ -8,58 +8,101 @@
 #' @param percent which grouping to use for percentages, if desired (defaults to counts).  Must be one of "none", "row", "col", or "all".
 #' @param show_na should cases where either variable is NA be included?
 #' @return Returns a data.frame (actually a \code{tbl_df}) with the frequencies of the crosstabulated variables.
-#' @export
+
 #' @examples
 #' crosstab(mtcars$cyl, mtcars$gear)
 #' crosstab(mtcars$cyl, mtcars$gear, "row")
 #'
 
 # Crosstab table of two variables
-# Take two vectors and one of "none", "row", "col", and "full" to calculate %s
+# Take two vectors and one of "none", "row", "col", and "all" to calculate %s
 # Could also take a data.frame and two vector names, for pipeline, but this seems simpler
-crosstab <- function(vec1, vec2, percent = "none", show_na = TRUE){
-  if(!mode(vec1) %in% c("logical", "numeric", "character")){
-    stop("vec1 must be a vector of type logical, numeric, character, or factor")}
-  if(!mode(vec2) %in% c("logical", "numeric", "character")){
-    stop("vec2 must be a vector of type logical, numeric, character, or factor")}
+crosstab <- function(x, ...
+) UseMethod("crosstab")
+
+#' @export
+crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE){
+
+
+  if(!mode(vec1) %in% c("logical", "numeric", "character", "list")){
+    stop("vec1 must be a vector of type logical, numeric, character, list, or factor")}
+  if(!mode(vec2) %in% c("logical", "numeric", "character","list")){
+    stop("vec2 must be a vector of type logical, numeric, character, list, or factor")}
   if(length(vec1) != length(vec2)){ stop("the two vectors are not the same length")}
 
-  dat <- data.frame(vec1, vec2, stringsAsFactors = FALSE)
-  var_name <- deparse(substitute(vec1))
+  dat <- data.frame(vec1=vec1, vec2=vec2, stringsAsFactors = FALSE)
+
+  dat_col_names <- names(dat)
+
+  if (is.null(names(vec1))) {
+    var_name <- deparse(substitute(vec1))
+  } else {
+    var_name <- dat_col_names[[1]]
+  }
+
+
 
   if(!show_na){
-    dat <- dat[!is.na(dat$vec1) & !is.na(dat$vec2), ]
+    dat <- dat[!is.na(dat[[1]]) & !is.na(dat[[2]]), ]
   }
-  
+
   # create long data.frame with initial counts
   tabl <- dat %>%
-    dplyr::count(vec1, vec2) %>%
+    dplyr::count_(dat_col_names) %>%
     dplyr::ungroup()
-  
+
   # calculate percentages, if specified
   if(percent == "row"){
     tabl <- tabl %>%
-      dplyr::group_by(vec1) %>%
+      dplyr::group_by_(dat_col_names[[1]]) %>%
       dplyr::mutate(n = n / sum(n, na.rm = TRUE))
   } else if (percent == "col"){
     tabl <- tabl %>%
-      dplyr::group_by(vec2) %>%
+      dplyr::group_by_(dat_col_names[[2]]) %>%
       dplyr::mutate(n = n / sum(n, na.rm = TRUE))
   } else if (percent == "all"){
     tabl <- tabl %>%
       dplyr::mutate(n = n / sum(n, na.rm = TRUE))
   }
-  
+
   # replace NA with string NA in vec2 to avoid invalid col name after spreading
-    # if this col is a factor, need to add that level to the factor
-  if(is.factor(tabl$vec2)){
-    levels(tabl$vec2) <- c(levels(tabl$vec2), "NA")
+  # if this col is a factor, need to add that level to the factor
+  if(is.factor(tabl[[2]])){
+    levels(tabl[[2]]) <- c(levels(tabl$vec2), "NA")
   }
-  tabl$vec2[is.na(tabl$vec2)] <- "NA"
+  tabl[2][is.na(tabl[2])] <- "NA"
 
   # spread to wide, ungroup() for cleanliness of result, and rename 1st col
   tabl %>%
-    tidyr::spread(vec2, n) %>%
+    tidyr::spread_(dat_col_names[[2]], "n") %>%
     dplyr::ungroup() %>%
     stats::setNames(., c(var_name, names(.)[-1]))
 }
+
+#' @export
+crosstab.data.frame <- function(.data, ...){
+  # collect dots
+  dots <- as.list(substitute(list(...)))[-1L] #
+  n <- length(dots)
+
+  # select columns from .data
+  columns <- dots[1:2]
+
+  arguments <- list()
+
+  if(n > 2) arguments <- dots[3:n]
+
+  x <- list()
+  x[[deparse(columns[[1]])]] <- as.numeric(.data[,deparse(columns[[1]])])
+  x[[deparse(columns[[2]])]] <- .data[,deparse(columns[[2]])]
+  x <- as.data.frame(x)
+
+  arguments$vec1 <- x[1]
+  arguments$vec2 <- x[2]
+
+
+  # create parameters vector to sue with do.call
+  do.call(crosstab.default,
+          args = arguments)
+
+}

From f67bfc85f4dc833518d9fa7ee2959870bb152db3 Mon Sep 17 00:00:00 2001
From: Chris Haid <chaid@kippchicago.org>
Date: Thu, 7 Jul 2016 14:26:53 -0500
Subject: [PATCH 3/8] Moved some code around to clear up when args are being
 constructed

---
 R/crosstab.R | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/R/crosstab.R b/R/crosstab.R
index ee08d8a7..ae863060 100644
--- a/R/crosstab.R
+++ b/R/crosstab.R
@@ -88,20 +88,19 @@ crosstab.data.frame <- function(.data, ...){
   # select columns from .data
   columns <- dots[1:2]
 
-  arguments <- list()
-
-  if(n > 2) arguments <- dots[3:n]
-
   x <- list()
-  x[[deparse(columns[[1]])]] <- as.numeric(.data[,deparse(columns[[1]])])
+  x[[deparse(columns[[1]])]] <- .data[,deparse(columns[[1]])]
   x[[deparse(columns[[2]])]] <- .data[,deparse(columns[[2]])]
   x <- as.data.frame(x)
 
+  # create args list to use with do.call
+  arguments <- list()
+
+  if(n > 2) arguments <- dots[3:n]
+
   arguments$vec1 <- x[1]
   arguments$vec2 <- x[2]
 
-
-  # create parameters vector to sue with do.call
   do.call(crosstab.default,
           args = arguments)
 

From fc738d49245501cacbc0b5438d7882f52aed0cbe Mon Sep 17 00:00:00 2001
From: Chris Haid <chaid@kippchicago.org>
Date: Thu, 7 Jul 2016 14:30:10 -0500
Subject: [PATCH 4/8] Updated tests to accept crosstab.data.frame

Needed crosstab.default to accept lists, expect_equal was also failing on the percent='all' test but shouldnt' have been.
---
 tests/testthat/test-crosstab.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/test-crosstab.R b/tests/testthat/test-crosstab.R
index 47317de0..62babfc4 100644
--- a/tests/testthat/test-crosstab.R
+++ b/tests/testthat/test-crosstab.R
@@ -13,10 +13,10 @@ dat <- data.frame(
 )
 
 test_that("bad inputs are handled properly", {
-  expect_error(crosstab(list(1, 2), dat$v1), "vec1 must be a vector of type logical, numeric, character, or factor")
-  expect_error(crosstab(dat$v1, list(1, 2)), "vec2 must be a vector of type logical, numeric, character, or factor")
+  #expect_error(crosstab(list(1, 2), dat$v1), "vec1 must be a vector of type logical, numeric, character, or factor")
+  #expect_error(crosstab(dat$v1, list(1, 2)), "vec2 must be a vector of type logical, numeric, character, or factor")
   expect_error(crosstab(c(1, 1), c(1)), "the two vectors are not the same length")
-  })
+})
 
 # simple crosstab w/o NAs
 res <- crosstab(dat$v2, dat$v4)
@@ -38,14 +38,14 @@ test_that("percentages are correct", {
   expect_equal(res_row[[2]], c(0.5, 1/3, NA))
   expect_equal(res_row[[3]], c(NA, 0.5, NA))
   expect_equal(res_row[[4]], c(0.5, 1/6, 1))
-  
+
   res_col <- crosstab(dat$v2, dat$v4, "col")
   expect_equal(res_col[[2]], c(1/3, 2/3, NA))
   expect_equal(res_col[[3]], c(NA, 1, NA))
   expect_equal(res_col[[4]], c(1/3, 1/3, 1/3))
-  
+
   res_all <- crosstab(dat$v2, dat$v4, "all")
-  expect_equal(res_all[, 2:4],tbl_df(res[, 2:4]/9))
+  expect_equal(as.data.frame(res_all[, 2:4]),as.data.frame(res[, 2:4]/9))
 })
 
 z <- crosstab(dat$v3, dat$v1)

From 8facd7073a13adccc1aba95d9836020783dc45e3 Mon Sep 17 00:00:00 2001
From: Chris Haid <chaid@kippchicago.org>
Date: Thu, 7 Jul 2016 14:47:26 -0500
Subject: [PATCH 5/8] Added tests for crosstab.data.frame. Updated
 documentation.

---
 NAMESPACE                      |  3 ++-
 R/crosstab.R                   |  2 +-
 man/crosstab.Rd                |  2 +-
 tests/testthat/test-crosstab.R | 39 ++++++++++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 71799de9..43932a6d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,9 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(crosstab,data.frame)
+S3method(crosstab,default)
 export(clean_names)
 export(convert_to_NA)
-export(crosstab)
 export(excel_numeric_to_date)
 export(get_dupes)
 export(remove_empty_cols)
diff --git a/R/crosstab.R b/R/crosstab.R
index ae863060..f961d931 100644
--- a/R/crosstab.R
+++ b/R/crosstab.R
@@ -68,7 +68,7 @@ crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE){
   # replace NA with string NA in vec2 to avoid invalid col name after spreading
   # if this col is a factor, need to add that level to the factor
   if(is.factor(tabl[[2]])){
-    levels(tabl[[2]]) <- c(levels(tabl$vec2), "NA")
+    levels(tabl[[2]]) <- c(levels(tabl[[2]]), "NA")
   }
   tabl[2][is.na(tabl[2])] <- "NA"
 
diff --git a/man/crosstab.Rd b/man/crosstab.Rd
index 39d20816..1ae835e9 100644
--- a/man/crosstab.Rd
+++ b/man/crosstab.Rd
@@ -4,7 +4,7 @@
 \alias{crosstab}
 \title{Generate a crosstabulation of two vectors.}
 \usage{
-crosstab(vec1, vec2, percent = "none", show_na = TRUE)
+crosstab(x, ...)
 }
 \arguments{
 \item{vec1}{the vector to place on the crosstab column.}
diff --git a/tests/testthat/test-crosstab.R b/tests/testthat/test-crosstab.R
index 62babfc4..951784c2 100644
--- a/tests/testthat/test-crosstab.R
+++ b/tests/testthat/test-crosstab.R
@@ -69,3 +69,42 @@ test_that("factor levels order correctly", {
   expect_equal(as.character(vv[[1]]), c("hi", "med", "lo", NA))
   expect_true(is.factor(vv[[1]]))
 })
+
+test_that("crosstab.data.frame dispatches", {
+
+  z <- crosstab(dat, v3, v1)
+
+  expect_equal(z[[1]], as.factor(c("a", "b", NA)))
+  expect_equal(z[[2]], c(1, 1, NA))
+  expect_equal(z[[3]], c(1, 2, 1))
+  expect_equal(z[[4]], c(2, NA, NA))
+  expect_equal(z[[5]], c(NA, NA, 1))
+  expect_equal(names(z), c("v3", "hi", "med", "lo", "NA"))
+})
+
+test_that("crosstab.data.frame is pipeable", {
+  z <- dat %>%
+    crosstab(v3, v1)
+
+    expect_equal(z[[1]], as.factor(c("a", "b", NA)))
+    expect_equal(z[[2]], c(1, 1, NA))
+    expect_equal(z[[3]], c(1, 2, 1))
+    expect_equal(z[[4]], c(2, NA, NA))
+    expect_equal(z[[5]], c(NA, NA, 1))
+    expect_equal(names(z), c("v3", "hi", "med", "lo", "NA"))
+})
+
+test_that("crosstab.data.frame renders percentages are correct", {
+  res_row <- crosstab(dat, v2, v4, "row")
+  expect_equal(res_row[[2]], c(0.5, 1/3, NA))
+  expect_equal(res_row[[3]], c(NA, 0.5, NA))
+  expect_equal(res_row[[4]], c(0.5, 1/6, 1))
+
+  res_col <- crosstab(dat, v2, v4, "col")
+  expect_equal(res_col[[2]], c(1/3, 2/3, NA))
+  expect_equal(res_col[[3]], c(NA, 1, NA))
+  expect_equal(res_col[[4]], c(1/3, 1/3, 1/3))
+
+  res_all <- crosstab(dat, v2, v4, "all")
+  expect_equal(as.data.frame(res_all[, 2:4]),as.data.frame(res[, 2:4]/9))
+})

From b8ff8c68a4ac85fe9dac24f92a217c2e1fa7ef72 Mon Sep 17 00:00:00 2001
From: Chris Haid <chaid@kippchicago.org>
Date: Thu, 7 Jul 2016 14:52:28 -0500
Subject: [PATCH 6/8] Exported crosstab method.  Updated NAMESPACE.  R CMD
 Check passes.

---
 NAMESPACE    | 1 +
 R/crosstab.R | 1 +
 2 files changed, 2 insertions(+)

diff --git a/NAMESPACE b/NAMESPACE
index 43932a6d..51f50580 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,6 +4,7 @@ S3method(crosstab,data.frame)
 S3method(crosstab,default)
 export(clean_names)
 export(convert_to_NA)
+export(crosstab)
 export(excel_numeric_to_date)
 export(get_dupes)
 export(remove_empty_cols)
diff --git a/R/crosstab.R b/R/crosstab.R
index f961d931..592460e9 100644
--- a/R/crosstab.R
+++ b/R/crosstab.R
@@ -17,6 +17,7 @@
 # Crosstab table of two variables
 # Take two vectors and one of "none", "row", "col", and "all" to calculate %s
 # Could also take a data.frame and two vector names, for pipeline, but this seems simpler
+#' @export
 crosstab <- function(x, ...
 ) UseMethod("crosstab")
 

From 8980f3ffcf09a7ca6bde343b4f79bd9468583d64 Mon Sep 17 00:00:00 2001
From: Chris Haid <chaid@kippchicago.org>
Date: Thu, 7 Jul 2016 15:11:33 -0500
Subject: [PATCH 7/8] Updated crosstab method docs

---
 R/crosstab.R    | 12 +++++++++++-
 man/crosstab.Rd | 22 +++++++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/R/crosstab.R b/R/crosstab.R
index 592460e9..2af5a586 100644
--- a/R/crosstab.R
+++ b/R/crosstab.R
@@ -18,9 +18,13 @@
 # Take two vectors and one of "none", "row", "col", and "all" to calculate %s
 # Could also take a data.frame and two vector names, for pipeline, but this seems simpler
 #' @export
-crosstab <- function(x, ...
+crosstab <- function(vec1, ...
 ) UseMethod("crosstab")
 
+#' @inheritParams crosstab
+#' @describeIn crosstab Create a crosstab from two vectors,
+#' displaying either frequencies or percentages calculated by row, column, or overall.
+#' Vectors don't have to be from the same data.frame, but typically are.
 #' @export
 crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE){
 
@@ -80,6 +84,12 @@ crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE){
     stats::setNames(., c(var_name, names(.)[-1]))
 }
 
+#' @inheritParams crosstab.default
+#' @param .data a data.frame.
+#' @param ... arguments passed to crosstab.default.
+#' @describeIn crosstab Create a crosstab from a data.frame,
+#' displaying either frequencies or percentages calculated by row, column, or overall.
+#' Vectors don't have to be from the same data.frame, but typically are.
 #' @export
 crosstab.data.frame <- function(.data, ...){
   # collect dots
diff --git a/man/crosstab.Rd b/man/crosstab.Rd
index 1ae835e9..0efa0de6 100644
--- a/man/crosstab.Rd
+++ b/man/crosstab.Rd
@@ -2,18 +2,28 @@
 % Please edit documentation in R/crosstab.R
 \name{crosstab}
 \alias{crosstab}
+\alias{crosstab.data.frame}
+\alias{crosstab.default}
 \title{Generate a crosstabulation of two vectors.}
 \usage{
-crosstab(x, ...)
+crosstab(vec1, ...)
+
+\method{crosstab}{default}(vec1, vec2, percent = "none", show_na = TRUE)
+
+\method{crosstab}{data.frame}(.data, ...)
 }
 \arguments{
 \item{vec1}{the vector to place on the crosstab column.}
 
+\item{...}{arguments passed to crosstab.default.}
+
 \item{vec2}{the vector to place on the crosstab row.}
 
 \item{percent}{which grouping to use for percentages, if desired (defaults to counts).  Must be one of "none", "row", "col", or "all".}
 
 \item{show_na}{should cases where either variable is NA be included?}
+
+\item{.data}{a data.frame.}
 }
 \value{
 Returns a data.frame (actually a \code{tbl_df}) with the frequencies of the crosstabulated variables.
@@ -21,6 +31,16 @@ Returns a data.frame (actually a \code{tbl_df}) with the frequencies of the cros
 \description{
 Create a crosstab, displaying either frequencies or percentages calculated by row, column, or overall.  Vectors don't have to be from the same data.frame, but typically are.
 }
+\section{Methods (by class)}{
+\itemize{
+\item \code{default}: Create a crosstab from two vectors,
+displaying either frequencies or percentages calculated by row, column, or overall.
+Vectors don't have to be from the same data.frame, but typically are.
+
+\item \code{data.frame}: Create a crosstab from a data.frame,
+displaying either frequencies or percentages calculated by row, column, or overall.
+Vectors don't have to be from the same data.frame, but typically are.
+}}
 \examples{
 crosstab(mtcars$cyl, mtcars$gear)
 crosstab(mtcars$cyl, mtcars$gear, "row")

From 3844188eeb292bac9fb93aa5e7e5915aadc6e690 Mon Sep 17 00:00:00 2001
From: Chris Haid <chaid@kippchicago.org>
Date: Thu, 7 Jul 2016 15:16:31 -0500
Subject: [PATCH 8/8] Final fix for crosstab method consistancy

---
 R/crosstab.R    | 5 ++---
 man/crosstab.Rd | 9 +++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/crosstab.R b/R/crosstab.R
index 2af5a586..99cd5e34 100644
--- a/R/crosstab.R
+++ b/R/crosstab.R
@@ -18,15 +18,14 @@
 # Take two vectors and one of "none", "row", "col", and "all" to calculate %s
 # Could also take a data.frame and two vector names, for pipeline, but this seems simpler
 #' @export
-crosstab <- function(vec1, ...
-) UseMethod("crosstab")
+crosstab <- function(...) UseMethod("crosstab")
 
 #' @inheritParams crosstab
 #' @describeIn crosstab Create a crosstab from two vectors,
 #' displaying either frequencies or percentages calculated by row, column, or overall.
 #' Vectors don't have to be from the same data.frame, but typically are.
 #' @export
-crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE){
+crosstab.default <- function(vec1, vec2, percent = "none", show_na = TRUE, ...){
 
 
   if(!mode(vec1) %in% c("logical", "numeric", "character", "list")){
diff --git a/man/crosstab.Rd b/man/crosstab.Rd
index 0efa0de6..b7f5c156 100644
--- a/man/crosstab.Rd
+++ b/man/crosstab.Rd
@@ -6,17 +6,18 @@
 \alias{crosstab.default}
 \title{Generate a crosstabulation of two vectors.}
 \usage{
-crosstab(vec1, ...)
+crosstab(...)
 
-\method{crosstab}{default}(vec1, vec2, percent = "none", show_na = TRUE)
+\method{crosstab}{default}(vec1, vec2, percent = "none", show_na = TRUE,
+  ...)
 
 \method{crosstab}{data.frame}(.data, ...)
 }
 \arguments{
-\item{vec1}{the vector to place on the crosstab column.}
-
 \item{...}{arguments passed to crosstab.default.}
 
+\item{vec1}{the vector to place on the crosstab column.}
+
 \item{vec2}{the vector to place on the crosstab row.}
 
 \item{percent}{which grouping to use for percentages, if desired (defaults to counts).  Must be one of "none", "row", "col", or "all".}