From 20fb66b5a2d8a3b0d985fc162dc9490696bcf07e Mon Sep 17 00:00:00 2001 From: Sam Firke Date: Sun, 31 Jul 2016 12:30:05 -0400 Subject: [PATCH] tabyl() is pipable closes #35 --- NAMESPACE | 2 ++ NEWS.md | 2 ++ R/tabyl.R | 16 +++++++--------- man/tabyl.Rd | 24 ++++++++++++++++++++---- tests/testthat/test-tabyl.R | 34 ++++++++++++++++++++-------------- vignettes/introduction.Rmd | 10 +++++++++- vignettes/introduction.md | 16 ++++++++++++++-- 7 files changed, 74 insertions(+), 30 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index f2a568f6..4998e650 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ S3method(crosstab,data.frame) S3method(crosstab,default) +S3method(tabyl,data.frame) +S3method(tabyl,default) export(clean_names) export(convert_to_NA) export(crosstab) diff --git a/NEWS.md b/NEWS.md index 30595220..30a24f77 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,11 +8,13 @@ NEWS ### Major * `crosstab()` can be called in a `%>%` pipeline, e.g., `mtcars %>% crosstab(cyl, gear)`. Thanks to [@chrishaid](https://github.com/chrishaid) [(#34)](https://github.com/sfirke/janitor/pull/34) +* `tabyl()` can also be called in a `%>%` pipeline, e.g., `mtcars %>% tabyl(cyl)` [(#35)](https://github.com/sfirke/janitor/issues/35) * added `use_first_valid_of()` function [(#32)](https://github.com/sfirke/janitor/issues/32) ### Minor * `crosstab()` returns 0 instead of NA when there are no instances of a variable combination. +* A call like `tabyl(df$vecname)` retains the more-descriptive `$` symbol in the column name of the result - if you want a legal R name in the result, call it as `df %>% tabyl(vecname)` * Single and double quotation marks are handled by `clean_names()` ## Bug fixes diff --git a/R/tabyl.R b/R/tabyl.R index dca4bfe0..c9bd0a7e 100644 --- a/R/tabyl.R +++ b/R/tabyl.R @@ -12,7 +12,7 @@ #' tabyl(mtcars$cyl, sort = TRUE) #' # called with magrittr pipe: #' library(dplyr) -#' mtcars %>% .$cyl %>% tabyl() +#' mtcars %>% tabyl(cyl) #' # illustrating show_na functionality: #' my_cars <- rbind(mtcars, rep(NA, 11)) #' tabyl(my_cars$cyl) @@ -22,9 +22,9 @@ tabyl <- function(...) UseMethod("tabyl") #' @inheritParams tabyl -#' @describeIn Create a frequency table from a vector, returned as a data.frame, showing percentages and with or without including \code{NA} values. A fully-featured alternative to \code{table()}. +#' @describeIn tabyl Create a frequency table from a vector, returned as a data.frame, showing percentages and with or without including \code{NA} values. A fully-featured alternative to \code{table()}. #' @export -tabyl.default <- function(vec, sort = FALSE, show_na = TRUE){ +tabyl.default <- function(vec, sort = FALSE, show_na = TRUE, ...) { # catch and adjust input variable name. if(is.null(names(vec))) { @@ -54,12 +54,10 @@ tabyl.default <- function(vec, sort = FALSE, show_na = TRUE){ result <- result %>% dplyr::mutate(percent = n / sum(n, na.rm = TRUE)) - # these 4 lines sort the NA row to the bottom, necessary to retain factor sorting - result$is_na <- is.na(result$vec) - result <- result %>% - dplyr::arrange(is_na) %>% - dplyr::select(-is_na) - + # sort the NA row to the bottom, necessary to retain factor sorting + result <- result[order(is.na(result$vec)), ] + result$is_na <- NULL + # reassign correct variable name names(result)[1] <- var_name diff --git a/man/tabyl.Rd b/man/tabyl.Rd index 45e9f7f2..69959b4c 100644 --- a/man/tabyl.Rd +++ b/man/tabyl.Rd @@ -2,29 +2,45 @@ % Please edit documentation in R/tabyl.R \name{tabyl} \alias{tabyl} -\title{Generate a table of a vector.} +\alias{tabyl.data.frame} +\alias{tabyl.default} +\title{Generate a frequency table from a vector.} \usage{ -tabyl(vec, sort = FALSE, show_na = TRUE) +tabyl(...) + +\method{tabyl}{default}(vec, sort = FALSE, show_na = TRUE, ...) + +\method{tabyl}{data.frame}(.data, ...) } \arguments{ +\item{...}{arguments passed to tabyl.default.} + \item{vec}{the vector to tabulate.} \item{sort}{should the resulting table be sorted in descending order?} \item{show_na}{should cases where the variable is NA be shown?} + +\item{.data}{a data.frame.} } \value{ Returns a data.frame (actually a \code{tbl_df}) with the frequencies of the tabulated variable. Includes counts, percentages, and valid percentages (calculated omitting \code{NA} values, if present in the vector and \code{show_na = TRUE}.) } \description{ -Get a frequency table of a variable as a data.frame, showing percentages and with or without including \code{NA} values. A fully-featured alternative to \code{table()}. +Create a frequency table of a variable, returned as a data.frame, showing percentages and with or without including \code{NA} values. A fully-featured alternative to \code{table()}. } +\section{Methods (by class)}{ +\itemize{ +\item \code{default}: Create a frequency table from a vector, returned as a data.frame, showing percentages and with or without including \code{NA} values. A fully-featured alternative to \code{table()}. + +\item \code{data.frame}: Create a frequency table from a variable in a data.frame, returned as a data.frame, showing percentages and with or without including \code{NA} values. A fully-featured alternative to \code{table()}. +}} \examples{ tabyl(mtcars$cyl) tabyl(mtcars$cyl, sort = TRUE) # called with magrittr pipe: library(dplyr) -mtcars \%>\% .$cyl \%>\% tabyl() +mtcars \%>\% tabyl(cyl) # illustrating show_na functionality: my_cars <- rbind(mtcars, rep(NA, 11)) tabyl(my_cars$cyl) diff --git a/tests/testthat/test-tabyl.R b/tests/testthat/test-tabyl.R index fc89a4f3..a669fb11 100644 --- a/tests/testthat/test-tabyl.R +++ b/tests/testthat/test-tabyl.R @@ -7,7 +7,7 @@ context("tabyl") cyl_tbl <- tabyl(mtcars$cyl) test_that("counts are accurate", { - expect_equal(cyl_tbl$mtcars_cyl, c(4, 6, 8)) + expect_equal(cyl_tbl$`mtcars$cyl`, c(4, 6, 8)) expect_equal(cyl_tbl$n, c(11, 7, 14)) }) @@ -22,8 +22,8 @@ test_res <- tabyl(test_df$grp) test_res_na <- tabyl(test_df_na$grp) test_that("names are right", { - expect_equal(names(cyl_tbl), c("mtcars_cyl", "n", "percent")) - expect_equal(names(test_res_na), c("test_df_na_grp", "n", "percent", "valid_percent")) + expect_equal(names(cyl_tbl), c("mtcars$cyl", "n", "percent")) + expect_equal(names(test_res_na), c("test_df_na$grp", "n", "percent", "valid_percent")) }) test_that("NAs handled correctly", { @@ -33,7 +33,7 @@ test_that("NAs handled correctly", { test_that("show_NA = FALSE parameter works", { expect_equal(test_res %>% - stats::setNames(c("test_df_na_grp", names(test_res)[-1])), + stats::setNames(c("test_df_na$grp", names(test_res)[-1])), tabyl(test_df_na$grp, show_na = FALSE)) }) @@ -65,23 +65,29 @@ sorted_with_fac <- data.frame(grp = factor(c("a", "c", "c"), levels = letters[1: sorted_with_fac <- tabyl(sorted_with_fac$grp, sort = TRUE) sorted_with_na_and_fac <- data.frame(grp = factor(c("a", "c", "c", NA), levels = letters[1:3])) -sorted_with_na_and_fac <- tabyl(sorted_with_na_and_fac$grp, sort = TRUE) +sorted_with_na_and_fac_res <- tabyl(sorted_with_na_and_fac$grp, sort = TRUE) test_that("sort parameter works", { expect_equal(sorted_test_df_na[[1]], c("b", "a", "c", NA)) expect_equal(sorted_test_df_na[[4]], c(0.5, 0.25, 0.25, NA)) expect_equal(sorted_with_fac[[1]], factor(c("c", "a", "b"), levels = letters[1:3])) expect_equal(sorted_with_fac[[2]], c(2, 1, NA)) - expect_equal(sorted_with_na_and_fac[[1]], factor(c("c", "a", "b", NA), levels = letters[1:3])) - expect_equal(sorted_with_na_and_fac[[2]], c(2, 1, NA, 1)) + expect_equal(sorted_with_na_and_fac_res[[1]], factor(c("c", "a", "b", NA), levels = letters[1:3])) + expect_equal(sorted_with_na_and_fac_res[[2]], c(2, 1, NA, 1)) }) -# bad inputs - -test_that("failure occurs when passed a list", { - expect_error(tabyl(list(1, 2)), "input must be a logical, numeric, or character vector") +# piping +test_that("piping in a data.frame works", { + expect_equal(tabyl(mtcars$cyl) %>% + setNames(., c("cyl", names(.)[2:3])), + mtcars %>% tabyl(cyl)) + expect_equal(tabyl(sorted_with_na_and_fac$grp, sort = TRUE) %>% # complete levels + correct sorting work for factors with empty categories + setNames(., c("grp", names(.)[-1])), sorted_with_na_and_fac %>% tabyl(grp, sort = TRUE)) }) -test_that("a piped name of dot turns into x", { - expect_equal(mtcars %>% .$gear %>% tabyl %>% names(.) %>% .[1], "x") -}) +# bad inputs + +test_that("failure occurs when passed unsupported types", { + expect_error(tabyl(matrix(1:10, nrow = 5)), "input must be a vector of type logical, numeric, character, list, or factor") + expect_error(tabyl(complex(10)), "input must be a vector of type logical, numeric, character, list, or factor") +}) \ No newline at end of file diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 59d0c0df..1bdada82 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -51,6 +51,8 @@ names(clean_df) # they are clean + It can (optionally) display `NA` values + When `NA` values are present, it will calculate an additional column `valid_percent` in the style of SPSS + It can (optionally) sort on counts ++ It can be called with `%>%` in a pipeline ++ When called on a factor, it will include missing levels in the result (levels not present in the vector) ```{r} x <- c("a", "b", "c", "c", NA) @@ -61,6 +63,12 @@ Compare to: table(x) ``` +Called with a pipe: +```{r} +mtcars %>% tabyl(cyl) +``` + + ## Crosstabulate two variables with `crosstab()` `crosstab()` generates a crosstab table. There many R crosstab functions already; this one is distinguished by: @@ -68,7 +76,7 @@ table(x) + It is simple. + It calculates frequencies by default but can calculate row, column, and table-wise percentages. + It can (optionally) display `NA` values -+ It can be called with `%>%` in a pipeline. ++ It can be called with `%>%` in a pipeline Usage: ```{r} diff --git a/vignettes/introduction.md b/vignettes/introduction.md index 4ecbb4de..6ec2131d 100644 --- a/vignettes/introduction.md +++ b/vignettes/introduction.md @@ -1,6 +1,6 @@ Intro to janitor functions ================ -2016-07-28 +2016-07-31 - [Major functions](#major-functions) - [Clean data.frame names with `clean_names()`](#clean-data.frame-names-with-clean_names) @@ -59,6 +59,8 @@ names(clean_df) # they are clean - It can (optionally) display `NA` values - When `NA` values are present, it will calculate an additional column `valid_percent` in the style of SPSS - It can (optionally) sort on counts +- It can be called with `%>%` in a pipeline +- When called on a factor, it will include missing levels in the result (levels not present in the vector) ``` r x <- c("a", "b", "c", "c", NA) @@ -79,6 +81,16 @@ table(x) #> 1 1 2 ``` +Called with a pipe: + +``` r +mtcars %>% tabyl(cyl) +#> cyl n percent +#> 1 4 11 0.34375 +#> 2 6 7 0.21875 +#> 3 8 14 0.43750 +``` + Crosstabulate two variables with `crosstab()` --------------------------------------------- @@ -88,7 +100,7 @@ Crosstabulate two variables with `crosstab()` - It is simple. - It calculates frequencies by default but can calculate row, column, and table-wise percentages. - It can (optionally) display `NA` values -- It can be called with `%>%` in a pipeline. +- It can be called with `%>%` in a pipeline Usage: