From 20fb66b5a2d8a3b0d985fc162dc9490696bcf07e Mon Sep 17 00:00:00 2001
From: Sam Firke <samuel.firke@gmail.com>
Date: Sun, 31 Jul 2016 12:30:05 -0400
Subject: [PATCH] tabyl() is pipable

closes #35
---
 NAMESPACE                   |  2 ++
 NEWS.md                     |  2 ++
 R/tabyl.R                   | 16 +++++++---------
 man/tabyl.Rd                | 24 ++++++++++++++++++++----
 tests/testthat/test-tabyl.R | 34 ++++++++++++++++++++--------------
 vignettes/introduction.Rmd  | 10 +++++++++-
 vignettes/introduction.md   | 16 ++++++++++++++--
 7 files changed, 74 insertions(+), 30 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index f2a568f6..4998e650 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,8 @@
 
 S3method(crosstab,data.frame)
 S3method(crosstab,default)
+S3method(tabyl,data.frame)
+S3method(tabyl,default)
 export(clean_names)
 export(convert_to_NA)
 export(crosstab)
diff --git a/NEWS.md b/NEWS.md
index 30595220..30a24f77 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,11 +8,13 @@ NEWS
 
 ### Major
 * `crosstab()` can be called in a `%>%` pipeline, e.g., `mtcars %>% crosstab(cyl, gear)`.  Thanks to [@chrishaid](https://github.com/chrishaid) [(#34)](https://github.com/sfirke/janitor/pull/34)
+* `tabyl()` can also be called in a `%>%` pipeline, e.g., `mtcars %>% tabyl(cyl)` [(#35)](https://github.com/sfirke/janitor/issues/35)
 * added `use_first_valid_of()` function [(#32)](https://github.com/sfirke/janitor/issues/32)
 
 ### Minor
 
 * `crosstab()` returns 0 instead of NA when there are no instances of a variable combination.
+* A call like `tabyl(df$vecname)` retains the more-descriptive `$` symbol in the column name of the result - if you want a legal R name in the result, call it as `df %>% tabyl(vecname)`
 * Single and double quotation marks are handled by `clean_names()`
 
 ## Bug fixes
diff --git a/R/tabyl.R b/R/tabyl.R
index dca4bfe0..c9bd0a7e 100644
--- a/R/tabyl.R
+++ b/R/tabyl.R
@@ -12,7 +12,7 @@
 #' tabyl(mtcars$cyl, sort = TRUE)
 #' # called with magrittr pipe:
 #' library(dplyr)
-#' mtcars %>% .$cyl %>% tabyl()
+#' mtcars %>% tabyl(cyl)
 #' # illustrating show_na functionality:
 #' my_cars <- rbind(mtcars, rep(NA, 11))
 #' tabyl(my_cars$cyl)
@@ -22,9 +22,9 @@
 tabyl <- function(...) UseMethod("tabyl")
 
 #' @inheritParams tabyl
-#' @describeIn Create a frequency table from a vector, returned as a data.frame, showing percentages and with or without including \code{NA} values.  A fully-featured alternative to \code{table()}.
+#' @describeIn tabyl Create a frequency table from a vector, returned as a data.frame, showing percentages and with or without including \code{NA} values.  A fully-featured alternative to \code{table()}.
 #' @export
-tabyl.default <- function(vec, sort = FALSE, show_na = TRUE){
+tabyl.default <- function(vec, sort = FALSE, show_na = TRUE, ...) {
   
   # catch and adjust input variable name.
   if(is.null(names(vec))) {
@@ -54,12 +54,10 @@ tabyl.default <- function(vec, sort = FALSE, show_na = TRUE){
   result <- result %>%
     dplyr::mutate(percent = n / sum(n, na.rm = TRUE))
   
-  # these 4 lines sort the NA row to the bottom, necessary to retain factor sorting  
-  result$is_na <- is.na(result$vec)
-  result <- result %>%
-    dplyr::arrange(is_na) %>%
-    dplyr::select(-is_na)
-  
+  # sort the NA row to the bottom, necessary to retain factor sorting  
+  result <- result[order(is.na(result$vec)), ]
+  result$is_na <- NULL
+
   # reassign correct variable name
   names(result)[1] <- var_name
   
diff --git a/man/tabyl.Rd b/man/tabyl.Rd
index 45e9f7f2..69959b4c 100644
--- a/man/tabyl.Rd
+++ b/man/tabyl.Rd
@@ -2,29 +2,45 @@
 % Please edit documentation in R/tabyl.R
 \name{tabyl}
 \alias{tabyl}
-\title{Generate a table of a vector.}
+\alias{tabyl.data.frame}
+\alias{tabyl.default}
+\title{Generate a frequency table from a vector.}
 \usage{
-tabyl(vec, sort = FALSE, show_na = TRUE)
+tabyl(...)
+
+\method{tabyl}{default}(vec, sort = FALSE, show_na = TRUE, ...)
+
+\method{tabyl}{data.frame}(.data, ...)
 }
 \arguments{
+\item{...}{arguments passed to tabyl.default.}
+
 \item{vec}{the vector to tabulate.}
 
 \item{sort}{should the resulting table be sorted in descending order?}
 
 \item{show_na}{should cases where the variable is NA be shown?}
+
+\item{.data}{a data.frame.}
 }
 \value{
 Returns a data.frame (actually a \code{tbl_df}) with the frequencies of the tabulated variable.  Includes counts, percentages, and valid percentages (calculated omitting \code{NA} values, if present in the vector and \code{show_na = TRUE}.)
 }
 \description{
-Get a frequency table of a variable as a data.frame, showing percentages and with or without including \code{NA} values.  A fully-featured alternative to \code{table()}.
+Create a frequency table of a variable, returned as a data.frame, showing percentages and with or without including \code{NA} values.  A fully-featured alternative to \code{table()}.
 }
+\section{Methods (by class)}{
+\itemize{
+\item \code{default}: Create a frequency table from a vector, returned as a data.frame, showing percentages and with or without including \code{NA} values.  A fully-featured alternative to \code{table()}.
+
+\item \code{data.frame}: Create a frequency table from a variable in a data.frame, returned as a data.frame, showing percentages and with or without including \code{NA} values.  A fully-featured alternative to \code{table()}.
+}}
 \examples{
 tabyl(mtcars$cyl)
 tabyl(mtcars$cyl, sort = TRUE)
 # called with magrittr pipe:
 library(dplyr)
-mtcars \%>\% .$cyl \%>\% tabyl()
+mtcars \%>\% tabyl(cyl)
 # illustrating show_na functionality:
 my_cars <- rbind(mtcars, rep(NA, 11))
 tabyl(my_cars$cyl)
diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R
index fc89a4f3..a669fb11 100644
--- a/tests/testthat/test-tabyl.R
+++ b/tests/testthat/test-tabyl.R
@@ -7,7 +7,7 @@ context("tabyl")
 cyl_tbl <- tabyl(mtcars$cyl)
 
 test_that("counts are accurate", {
-  expect_equal(cyl_tbl$mtcars_cyl, c(4, 6, 8))
+  expect_equal(cyl_tbl$`mtcars$cyl`, c(4, 6, 8))
   expect_equal(cyl_tbl$n, c(11, 7, 14))
 })
 
@@ -22,8 +22,8 @@ test_res <- tabyl(test_df$grp)
 test_res_na <- tabyl(test_df_na$grp)
 
 test_that("names are right", {
-  expect_equal(names(cyl_tbl), c("mtcars_cyl", "n", "percent"))
-  expect_equal(names(test_res_na), c("test_df_na_grp", "n", "percent", "valid_percent"))
+  expect_equal(names(cyl_tbl), c("mtcars$cyl", "n", "percent"))
+  expect_equal(names(test_res_na), c("test_df_na$grp", "n", "percent", "valid_percent"))
 })
 
 test_that("NAs handled correctly", {
@@ -33,7 +33,7 @@ test_that("NAs handled correctly", {
 
 test_that("show_NA = FALSE parameter works", {
   expect_equal(test_res %>%
-                 stats::setNames(c("test_df_na_grp", names(test_res)[-1])),
+                 stats::setNames(c("test_df_na$grp", names(test_res)[-1])),
                tabyl(test_df_na$grp, show_na = FALSE))
 })
 
@@ -65,23 +65,29 @@ sorted_with_fac <- data.frame(grp = factor(c("a", "c", "c"), levels = letters[1:
 sorted_with_fac <- tabyl(sorted_with_fac$grp, sort = TRUE)
 
 sorted_with_na_and_fac <- data.frame(grp = factor(c("a", "c", "c", NA), levels = letters[1:3]))
-sorted_with_na_and_fac <- tabyl(sorted_with_na_and_fac$grp, sort = TRUE)
+sorted_with_na_and_fac_res <- tabyl(sorted_with_na_and_fac$grp, sort = TRUE)
 
 test_that("sort parameter works", {
   expect_equal(sorted_test_df_na[[1]], c("b", "a", "c", NA))
   expect_equal(sorted_test_df_na[[4]], c(0.5, 0.25, 0.25, NA))
   expect_equal(sorted_with_fac[[1]], factor(c("c", "a", "b"), levels = letters[1:3]))
   expect_equal(sorted_with_fac[[2]], c(2, 1, NA))
-  expect_equal(sorted_with_na_and_fac[[1]], factor(c("c", "a", "b", NA), levels = letters[1:3]))
-  expect_equal(sorted_with_na_and_fac[[2]], c(2, 1, NA, 1))
+  expect_equal(sorted_with_na_and_fac_res[[1]], factor(c("c", "a", "b", NA), levels = letters[1:3]))
+  expect_equal(sorted_with_na_and_fac_res[[2]], c(2, 1, NA, 1))
 })
 
-# bad inputs
-
-test_that("failure occurs when passed a list", {
-  expect_error(tabyl(list(1, 2)), "input must be a logical, numeric, or character vector")
+# piping
+test_that("piping in a data.frame works", {
+  expect_equal(tabyl(mtcars$cyl) %>%
+                 setNames(., c("cyl", names(.)[2:3])),
+               mtcars %>% tabyl(cyl))
+  expect_equal(tabyl(sorted_with_na_and_fac$grp, sort = TRUE) %>% # complete levels + correct sorting work for factors with empty categories
+                 setNames(., c("grp", names(.)[-1])), sorted_with_na_and_fac %>% tabyl(grp, sort = TRUE))
 })
 
-test_that("a piped name of dot turns into x", {
-  expect_equal(mtcars %>% .$gear %>% tabyl %>% names(.) %>% .[1], "x")
-})
+# bad inputs
+
+test_that("failure occurs when passed unsupported types", {
+  expect_error(tabyl(matrix(1:10, nrow = 5)), "input must be a vector of type logical, numeric, character, list, or factor")
+  expect_error(tabyl(complex(10)), "input must be a vector of type logical, numeric, character, list, or factor")
+})
\ No newline at end of file
diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd
index 59d0c0df..1bdada82 100644
--- a/vignettes/introduction.Rmd
+++ b/vignettes/introduction.Rmd
@@ -51,6 +51,8 @@ names(clean_df) # they are clean
 + It can (optionally) display `NA` values
     + When `NA` values are present, it will calculate an additional column `valid_percent` in the style of SPSS
 + It can (optionally) sort on counts
++ It can be called with `%>%` in a pipeline
++ When called on a factor, it will include missing levels in the result (levels not present in the vector)
 
 ```{r}
 x <- c("a", "b", "c", "c", NA)
@@ -61,6 +63,12 @@ Compare to:
 table(x)
 ```
 
+Called with a pipe:
+```{r}
+mtcars %>% tabyl(cyl)
+```
+
+
 ## Crosstabulate two variables with `crosstab()`
 `crosstab()` generates a crosstab table.  There many R crosstab functions already; this one is distinguished by:
 
@@ -68,7 +76,7 @@ table(x)
 + It is simple.
     + It calculates frequencies by default but can calculate row, column, and table-wise percentages.
     + It can (optionally) display `NA` values
-+ It can be called with `%>%` in a pipeline.
++ It can be called with `%>%` in a pipeline
 
 Usage:
 ```{r}
diff --git a/vignettes/introduction.md b/vignettes/introduction.md
index 4ecbb4de..6ec2131d 100644
--- a/vignettes/introduction.md
+++ b/vignettes/introduction.md
@@ -1,6 +1,6 @@
 Intro to janitor functions
 ================
-2016-07-28
+2016-07-31
 
 -   [Major functions](#major-functions)
     -   [Clean data.frame names with `clean_names()`](#clean-data.frame-names-with-clean_names)
@@ -59,6 +59,8 @@ names(clean_df) # they are clean
 -   It can (optionally) display `NA` values
     -   When `NA` values are present, it will calculate an additional column `valid_percent` in the style of SPSS
 -   It can (optionally) sort on counts
+-   It can be called with `%>%` in a pipeline
+-   When called on a factor, it will include missing levels in the result (levels not present in the vector)
 
 ``` r
 x <- c("a", "b", "c", "c", NA)
@@ -79,6 +81,16 @@ table(x)
 #> 1 1 2
 ```
 
+Called with a pipe:
+
+``` r
+mtcars %>% tabyl(cyl)
+#>   cyl  n percent
+#> 1   4 11 0.34375
+#> 2   6  7 0.21875
+#> 3   8 14 0.43750
+```
+
 Crosstabulate two variables with `crosstab()`
 ---------------------------------------------
 
@@ -88,7 +100,7 @@ Crosstabulate two variables with `crosstab()`
 -   It is simple.
     -   It calculates frequencies by default but can calculate row, column, and table-wise percentages.
     -   It can (optionally) display `NA` values
--   It can be called with `%>%` in a pipeline.
+-   It can be called with `%>%` in a pipeline
 
 Usage: