Merge branch 'main' into v2.2.1

sfirke · Dec 22, 2024 · 0c47b68 · 0c47b68
2 parents 54409b3 + 7eaa06d
commit 0c47b68
Show file tree

Hide file tree

Showing 195 changed files with 3,495 additions and 17,213 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -23,7 +23,7 @@ If your proposed contribution addresses multiple issues, it should ideally be br
 * Make sure to track progress upstream (i.e., on our version of `janitor` at `sfirke/janitor`) by doing `git remote add upstream https://github.com/sfirke/janitor.git`. Before making changes make sure to pull changes in from upstream by doing either `git fetch upstream` then merge later or `git pull upstream` to fetch and merge in one step
 * Make your changes (bonus points for making changes on a new feature branch)
 * Push up to your account
-* Submit a pull request to the master branch at `sfirke/janitor`
+* Submit a pull request to the main branch at `sfirke/janitor`
 
 ### Prefer to discuss over email?
 Email Sam.  His email address is in the `DESCRIPTION` file of this repo.

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -8,6 +8,8 @@ on:
 
 name: R-CMD-check
 
+permissions: read-all
+
 jobs:
   R-CMD-check:
     runs-on: ${{ matrix.config.os }}
@@ -29,7 +31,7 @@ jobs:
       R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -49,3 +51,4 @@ jobs:
       - uses: r-lib/actions/check-r-package@v2
         with:
           upload-snapshots: true
+          build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -11,6 +11,8 @@ on:
 
 name: pkgdown
 
+permissions: read-all
+
 jobs:
   pkgdown:
     runs-on: ubuntu-latest
@@ -19,8 +21,10 @@ jobs:
       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -39,7 +43,7 @@ jobs:
 
       - name: Deploy to GitHub pages 🚀
         if: github.event_name != 'pull_request'
-        uses: JamesIves/github-pages-deploy-action@v4.4.1
+        uses: JamesIves/github-pages-deploy-action@v4.5.0
         with:
           clean: false
           branch: gh-pages

diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml
@@ -0,0 +1,77 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"]
+
+name: Style
+
+permissions: read-all
+
+jobs:
+  style:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - name: Install dependencies
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::styler, any::roxygen2
+          needs: styler
+
+      - name: Enable styler cache
+        run: styler::cache_activate()
+        shell: Rscript {0}
+
+      - name: Determine cache location
+        id: styler-location
+        run: |
+          cat(
+            "location=",
+            styler::cache_info(format = "tabular")$location,
+            "\n",
+            file = Sys.getenv("GITHUB_OUTPUT"),
+            append = TRUE,
+            sep = ""
+          )
+        shell: Rscript {0}
+
+      - name: Cache styler
+        uses: actions/cache@v4
+        with:
+          path: ${{ steps.styler-location.outputs.location }}
+          key: ${{ runner.os }}-styler-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-styler-
+            ${{ runner.os }}-
+
+      - name: Style
+        run: styler::style_pkg()
+        shell: Rscript {0}
+
+      - name: Commit and push changes
+        run: |
+          if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \
+              | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$'))
+          then
+            git config --local user.name "$GITHUB_ACTOR"
+            git config --local user.email "[email protected]"
+            git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)"
+            git pull --ff-only
+            git push origin
+          else
+            echo "No changes to commit."
+          fi
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -8,14 +8,16 @@ on:
 
 name: test-coverage
 
+permissions: read-all
+
 jobs:
   test-coverage:
     runs-on: ubuntu-latest
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-r@v2
         with:
@@ -26,27 +28,37 @@ jobs:
           extra-packages: |
             any::sf
             any::covr
+            any::xml2
           needs: coverage
 
       - name: Test coverage
         run: |
-          covr::codecov(
+          cov <- covr::package_coverage(
             quiet = FALSE,
             clean = FALSE,
-            install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
           )
+          covr::to_cobertura(cov)
         shell: Rscript {0}
 
+      - uses: codecov/codecov-action@v4
+        with:
+          fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
+          file: ./cobertura.xml
+          plugin: noop
+          disable_search: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+
       - name: Show testthat output
         if: always()
         run: |
           ## --------------------------------------------------------------------
-          find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
         shell: bash
 
       - name: Upload test results
         if: failure()
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: coverage-test-failures
           path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,22 +1,27 @@
 Package: janitor
 Title: Simple Tools for Examining and Cleaning Dirty Data
 Version: 2.2.1
-Authors@R: c(person("Sam", "Firke", email = "[email protected]", role = c("aut", "cre")),
-    person("Bill", "Denney", email = "[email protected]", role = "ctb"),
-    person("Chris", "Haid", email = "[email protected]", role = "ctb"),
-    person("Ryan", "Knight", email = "[email protected]", role = "ctb"),
-    person("Malte", "Grosser", email = "[email protected]", role = "ctb"),
-    person("Jonathan", "Zadra", email = "[email protected]", role = "ctb"))
-Description: The main janitor functions can: perfectly format data.frame column
-    names; provide quick counts of variable combinations (i.e., frequency
-    tables and crosstabs); and explore duplicate records. Other janitor functions
-    nicely format the tabulation results. These tabulate-and-report functions
-    approximate popular features of SPSS and Microsoft Excel. This package
-    follows the principles of the "tidyverse" and works well with the pipe function
-    %>%. janitor was built with beginning-to-intermediate R users in mind and is
-    optimized for user-friendliness.
-URL: https://github.com/sfirke/janitor,
-    https://sfirke.github.io/janitor/
+Authors@R: c(
+    person("Sam", "Firke", , "[email protected]", role = c("aut", "cre")),
+    person("Bill", "Denney", , "[email protected]", role = "ctb"),
+    person("Chris", "Haid", , "[email protected]", role = "ctb"),
+    person("Ryan", "Knight", , "[email protected]", role = "ctb"),
+    person("Malte", "Grosser", , "[email protected]", role = "ctb"),
+    person("Jonathan", "Zadra", , "[email protected]", role = "ctb"),
+    person("Olivier", "Roy", role = "ctb"),
+    person("Josep", family = "Pueyo-Ros", email = "[email protected]", role = "ctb")
+  )
+Description: The main janitor functions can: perfectly format data.frame
+    column names; provide quick counts of variable combinations (i.e.,
+    frequency tables and crosstabs); and explore duplicate records. Other
+    janitor functions nicely format the tabulation results. These
+    tabulate-and-report functions approximate popular features of SPSS and
+    Microsoft Excel. This package follows the principles of the
+    "tidyverse" and works well with the pipe function %>%. janitor was
+    built with beginning-to-intermediate R users in mind and is optimized
+    for user-friendliness.
+License: MIT + file LICENSE
+URL: https://github.com/sfirke/janitor, https://sfirke.github.io/janitor/
 BugReports: https://github.com/sfirke/janitor/issues
 Depends:
     R (>= 3.1.2)
@@ -28,14 +33,11 @@ Imports:
     magrittr,
     purrr,
     rlang,
+    snakecase (>= 0.9.2),
     stringi,
     stringr,
-    snakecase (>= 0.9.2),
-    tidyselect (>= 1.0.0),
-    tidyr (>= 0.7.0)
-License: MIT + file LICENSE
-LazyData: true
-RoxygenNote: 7.2.3
+    tidyr (>= 1.0.0),
+    tidyselect (>= 1.0.0)
 Suggests:
     dbplyr,
     knitr,
@@ -45,6 +47,10 @@ Suggests:
     testthat (>= 3.0.0),
     tibble,
     tidygraph
-VignetteBuilder: knitr
-Encoding: UTF-8
+VignetteBuilder: 
+    knitr
 Config/testthat/edition: 3
+Encoding: UTF-8
+LazyData: true
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,11 @@ S3method(clean_names,tbl_graph)
 S3method(clean_names,tbl_lazy)
 S3method(describe_class,default)
 S3method(describe_class,factor)
+S3method(excel_time_to_numeric,POSIXct)
+S3method(excel_time_to_numeric,POSIXlt)
+S3method(excel_time_to_numeric,character)
+S3method(excel_time_to_numeric,logical)
+S3method(excel_time_to_numeric,numeric)
 S3method(fisher.test,default)
 S3method(fisher.test,tabyl)
 S3method(print,tabyl)
@@ -34,11 +39,13 @@ export(convert_to_datetime)
 export(crosstab)
 export(describe_class)
 export(excel_numeric_to_date)
+export(excel_time_to_numeric)
 export(find_header)
 export(fisher.test)
 export(get_dupes)
 export(get_one_to_one)
 export(make_clean_names)
+export(paste_skip_na)
 export(remove_constant)
 export(remove_empty)
 export(remove_empty_cols)
@@ -62,6 +69,7 @@ importFrom(lubridate,second)
 importFrom(lubridate,ymd)
 importFrom(lubridate,ymd_hms)
 importFrom(magrittr,"%>%")
+importFrom(rlang,"%||%")
 importFrom(rlang,dots_n)
 importFrom(rlang,expr)
 importFrom(rlang,syms)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,39 @@
+# janitor 2.2.1.9000 - unreleased development version
+
+## Breaking changes
+
+These are all minor breaking changes resulting from enhancements and are not expected to affect the vast majority of users.
+
+* When using `row_to_names()`, when all input values in `row_number` for a column are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`. If code previously used relied on a column name of `NA`, it will now error. To fix this, rely on a column name of `"NA"`.
+
+* When `tabyl()` is called on a data.frame containing labels, it now displays the label attribute as the name of the first column in the the resulting `tabyl` object (@olivroy, #394). This may break subsequent code that refers to the output of such a `tabyl` by column name. To maintain the previous behavior of ignoring variable labels, you can remove the labels with a function like `haven::zap_labels()` or `labelled::remove_labels()` before calling `tabyl()`.
+
+## New features
+
+* A new function `paste_skip_na()` pastes without including NA values (#537).
+
+* `row_to_names()` now accepts multiple rows as input, and merges them using a new `sep` argument (#536). The default is `sep = "_"`. When handling multiple `NA` values, `row_to_names()` ignores them and only merges non-NA values for column names. When all values are `NA`, `row_to_names()` creates a column name of `"NA"`, a character, rather than `NA`.
+
+* The new function `excel_time_to_numeric()` converts times from Excel that do not have accompanying dates into a number of seconds.  (#245, thanks to **@billdenney** for the feature.)
+
+* A new argument `set_labels` to `clean_names()` stores the old names as labels in each column. Variable labels are visualized in Rstudio's data viewer or used by default by some packages such as `gt` instead of variable names. Labels can also be used in ggplot labels thanks to the function `easy_labs()` in the `ggeasy` package. Read this wonderful [post](https://www.pipinghotdata.com/posts/2022-09-13-the-case-for-variable-labels-in-r/) for more info about column labels. (#563, thanks to **@jospueyo** for the feature).
+
+## Bug fixes
+
+* `adorn_totals("row")` now succeeds if the new `name` of the totals row is already a factor level of the input data.frame (#529, thanks @egozoglu for reporting).
+
+* `make_clean_names()` no longer accepts a data.frame or tibble as input, use `clean_names()` for that (fix #532, **@billdenney**).
+
+* `get_one_to_one()` no longer errors with near-equal values that become identical factor levels (fix #543, thanks to @olivroy for reporting)
+
+* `clean_names()` for sf objects now works in cases when the sf_column is not the last column name (fix #578, thanks to @ar-puuk for reporting and @billdenney for fixing)
+
+## Refactoring
+
+* Remove dplyr verbs superseded in dplyr 1.0.0 (#547, @olivroy)
+
+* Restyle the package and vignettes according to the [tidyverse style guide](https://style.tidyverse.org) (#548, @olivroy)
+
 # janitor 2.2.1 (2024-12-22)
 
 This is a trivial bugfix release whose only purpose is fixing a test that was failing on CRAN due to the way timezones are handled in Debian. In making that fix (PR #584), we made a small - technically breaking - improvement to a function that works with SAS dates. >99.9% of janitor users should be unaffected by this release.
@@ -6,7 +42,6 @@ This is a trivial bugfix release whose only purpose is fixing a test that was fa
 
 * `sas_numeric_to_date()` now warns for timezones other than "UTC" due to the way that SAS loads timezones, and the default timezone for `sas_numeric_to_date()` is now "UTC" instead of "" (#583, @billdenney)
 
-
 # janitor 2.2.0 (2023-02-02)
 
 ## Breaking changes