tabyl() is pipable

closes #35
sfirke · Jul 31, 2016 · 20fb66b · 20fb66b
1 parent 9d9a7fe
commit 20fb66b
Show file tree

Hide file tree

Showing 7 changed files with 74 additions and 30 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,8 @@
 
 S3method(crosstab,data.frame)
 S3method(crosstab,default)
+S3method(tabyl,data.frame)
+S3method(tabyl,default)
 export(clean_names)
 export(convert_to_NA)
 export(crosstab)

diff --git a/NEWS.md b/NEWS.md
@@ -8,11 +8,13 @@ NEWS
 
 ### Major
 * `crosstab()` can be called in a `%>%` pipeline, e.g., `mtcars %>% crosstab(cyl, gear)`.  Thanks to [@chrishaid](https://github.com/chrishaid) [(#34)](https://github.com/sfirke/janitor/pull/34)
+* `tabyl()` can also be called in a `%>%` pipeline, e.g., `mtcars %>% tabyl(cyl)` [(#35)](https://github.com/sfirke/janitor/issues/35)
 * added `use_first_valid_of()` function [(#32)](https://github.com/sfirke/janitor/issues/32)
 
 ### Minor
 
 * `crosstab()` returns 0 instead of NA when there are no instances of a variable combination.
+* A call like `tabyl(df$vecname)` retains the more-descriptive `$` symbol in the column name of the result - if you want a legal R name in the result, call it as `df %>% tabyl(vecname)`
 * Single and double quotation marks are handled by `clean_names()`
 
 ## Bug fixes

diff --git a/R/tabyl.R b/R/tabyl.R
@@ -12,7 +12,7 @@
 #' tabyl(mtcars$cyl, sort = TRUE)
 #' # called with magrittr pipe:
 #' library(dplyr)
-#' mtcars %>% .$cyl %>% tabyl()
+#' mtcars %>% tabyl(cyl)
 #' # illustrating show_na functionality:
 #' my_cars <- rbind(mtcars, rep(NA, 11))
 #' tabyl(my_cars$cyl)
@@ -22,9 +22,9 @@
 tabyl <- function(...) UseMethod("tabyl")
 
 #' @inheritParams tabyl
-#' @describeIn Create a frequency table from a vector, returned as a data.frame, showing percentages and with or without including \code{NA} values.  A fully-featured alternative to \code{table()}.
+#' @describeIn tabyl Create a frequency table from a vector, returned as a data.frame, showing percentages and with or without including \code{NA} values.  A fully-featured alternative to \code{table()}.
 #' @export
-tabyl.default <- function(vec, sort = FALSE, show_na = TRUE){
+tabyl.default <- function(vec, sort = FALSE, show_na = TRUE, ...) {
 
   # catch and adjust input variable name.
   if(is.null(names(vec))) {
@@ -54,12 +54,10 @@ tabyl.default <- function(vec, sort = FALSE, show_na = TRUE){
   result <- result %>%
     dplyr::mutate(percent = n / sum(n, na.rm = TRUE))
 
-  # these 4 lines sort the NA row to the bottom, necessary to retain factor sorting  
-  result$is_na <- is.na(result$vec)
-  result <- result %>%
-    dplyr::arrange(is_na) %>%
-    dplyr::select(-is_na)
-
+  # sort the NA row to the bottom, necessary to retain factor sorting  
+  result <- result[order(is.na(result$vec)), ]
+  result$is_na <- NULL
+
   # reassign correct variable name
   names(result)[1] <- var_name
 

diff --git a/man/tabyl.Rd b/man/tabyl.Rd
diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R
@@ -7,7 +7,7 @@ context("tabyl")
 cyl_tbl <- tabyl(mtcars$cyl)
 
 test_that("counts are accurate", {
-  expect_equal(cyl_tbl$mtcars_cyl, c(4, 6, 8))
+  expect_equal(cyl_tbl$`mtcars$cyl`, c(4, 6, 8))
   expect_equal(cyl_tbl$n, c(11, 7, 14))
 })
 
@@ -22,8 +22,8 @@ test_res <- tabyl(test_df$grp)
 test_res_na <- tabyl(test_df_na$grp)
 
 test_that("names are right", {
-  expect_equal(names(cyl_tbl), c("mtcars_cyl", "n", "percent"))
-  expect_equal(names(test_res_na), c("test_df_na_grp", "n", "percent", "valid_percent"))
+  expect_equal(names(cyl_tbl), c("mtcars$cyl", "n", "percent"))
+  expect_equal(names(test_res_na), c("test_df_na$grp", "n", "percent", "valid_percent"))
 })
 
 test_that("NAs handled correctly", {
@@ -33,7 +33,7 @@ test_that("NAs handled correctly", {
 
 test_that("show_NA = FALSE parameter works", {
   expect_equal(test_res %>%
-                 stats::setNames(c("test_df_na_grp", names(test_res)[-1])),
+                 stats::setNames(c("test_df_na$grp", names(test_res)[-1])),
                tabyl(test_df_na$grp, show_na = FALSE))
 })
 
@@ -65,23 +65,29 @@ sorted_with_fac <- data.frame(grp = factor(c("a", "c", "c"), levels = letters[1:
 sorted_with_fac <- tabyl(sorted_with_fac$grp, sort = TRUE)
 
 sorted_with_na_and_fac <- data.frame(grp = factor(c("a", "c", "c", NA), levels = letters[1:3]))
-sorted_with_na_and_fac <- tabyl(sorted_with_na_and_fac$grp, sort = TRUE)
+sorted_with_na_and_fac_res <- tabyl(sorted_with_na_and_fac$grp, sort = TRUE)
 
 test_that("sort parameter works", {
   expect_equal(sorted_test_df_na[[1]], c("b", "a", "c", NA))
   expect_equal(sorted_test_df_na[[4]], c(0.5, 0.25, 0.25, NA))
   expect_equal(sorted_with_fac[[1]], factor(c("c", "a", "b"), levels = letters[1:3]))
   expect_equal(sorted_with_fac[[2]], c(2, 1, NA))
-  expect_equal(sorted_with_na_and_fac[[1]], factor(c("c", "a", "b", NA), levels = letters[1:3]))
-  expect_equal(sorted_with_na_and_fac[[2]], c(2, 1, NA, 1))
+  expect_equal(sorted_with_na_and_fac_res[[1]], factor(c("c", "a", "b", NA), levels = letters[1:3]))
+  expect_equal(sorted_with_na_and_fac_res[[2]], c(2, 1, NA, 1))
 })
 
-# bad inputs
-
-test_that("failure occurs when passed a list", {
-  expect_error(tabyl(list(1, 2)), "input must be a logical, numeric, or character vector")
+# piping
+test_that("piping in a data.frame works", {
+  expect_equal(tabyl(mtcars$cyl) %>%
+                 setNames(., c("cyl", names(.)[2:3])),
+               mtcars %>% tabyl(cyl))
+  expect_equal(tabyl(sorted_with_na_and_fac$grp, sort = TRUE) %>% # complete levels + correct sorting work for factors with empty categories
+                 setNames(., c("grp", names(.)[-1])), sorted_with_na_and_fac %>% tabyl(grp, sort = TRUE))
 })
 
-test_that("a piped name of dot turns into x", {
-  expect_equal(mtcars %>% .$gear %>% tabyl %>% names(.) %>% .[1], "x")
-})
+# bad inputs
+
+test_that("failure occurs when passed unsupported types", {
+  expect_error(tabyl(matrix(1:10, nrow = 5)), "input must be a vector of type logical, numeric, character, list, or factor")
+  expect_error(tabyl(complex(10)), "input must be a vector of type logical, numeric, character, list, or factor")
+})
diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd
@@ -51,6 +51,8 @@ names(clean_df) # they are clean
 + It can (optionally) display `NA` values
     + When `NA` values are present, it will calculate an additional column `valid_percent` in the style of SPSS
 + It can (optionally) sort on counts
++ It can be called with `%>%` in a pipeline
++ When called on a factor, it will include missing levels in the result (levels not present in the vector)
 
 ```{r}
 x <- c("a", "b", "c", "c", NA)
@@ -61,14 +63,20 @@ Compare to:
 table(x)
 ```
 
+Called with a pipe:
+```{r}
+mtcars %>% tabyl(cyl)
+```
+
+
 ## Crosstabulate two variables with `crosstab()`
 `crosstab()` generates a crosstab table.  There many R crosstab functions already; this one is distinguished by:
 
 + It returns a data.frame
 + It is simple.
     + It calculates frequencies by default but can calculate row, column, and table-wise percentages.
     + It can (optionally) display `NA` values
-+ It can be called with `%>%` in a pipeline.
++ It can be called with `%>%` in a pipeline
 
 Usage:
 ```{r}

diff --git a/vignettes/introduction.md b/vignettes/introduction.md
@@ -1,6 +1,6 @@
 Intro to janitor functions
 ================
-2016-07-28
+2016-07-31
 
 -   [Major functions](#major-functions)
     -   [Clean data.frame names with `clean_names()`](#clean-data.frame-names-with-clean_names)
@@ -59,6 +59,8 @@ names(clean_df) # they are clean
 -   It can (optionally) display `NA` values
     -   When `NA` values are present, it will calculate an additional column `valid_percent` in the style of SPSS
 -   It can (optionally) sort on counts
+-   It can be called with `%>%` in a pipeline
+-   When called on a factor, it will include missing levels in the result (levels not present in the vector)
 
 ``` r
 x <- c("a", "b", "c", "c", NA)
@@ -79,6 +81,16 @@ table(x)
 #> 1 1 2
 ```
 
+Called with a pipe:
+
+``` r
+mtcars %>% tabyl(cyl)
+#>   cyl  n percent
+#> 1   4 11 0.34375
+#> 2   6  7 0.21875
+#> 3   8 14 0.43750
+```
+
 Crosstabulate two variables with `crosstab()`
 ---------------------------------------------
 
@@ -88,7 +100,7 @@ Crosstabulate two variables with `crosstab()`
 -   It is simple.
     -   It calculates frequencies by default but can calculate row, column, and table-wise percentages.
     -   It can (optionally) display `NA` values
--   It can be called with `%>%` in a pipeline.
+-   It can be called with `%>%` in a pipeline
 
 Usage: