diff --git a/NAMESPACE b/NAMESPACE index 0f5511b7..f6f8acca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,6 +50,12 @@ export(top_levels) export(untabyl) export(use_first_valid_of) importFrom(dplyr,rename_all) +importFrom(lubridate,as_date) +importFrom(lubridate,as_datetime) +importFrom(lubridate,force_tz) +importFrom(lubridate,hour) +importFrom(lubridate,minute) +importFrom(lubridate,second) importFrom(lubridate,ymd) importFrom(lubridate,ymd_hms) importFrom(magrittr,"%>%") diff --git a/NEWS.md b/NEWS.md index e133c7a9..d03a9f9b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ## Breaking changes * Microsoft Excel incorrectly has a leap day on 29 February 1900 (see https://docs.microsoft.com/en-us/office/troubleshoot/excel/wrongly-assumes-1900-is-leap-year). `excel_numeric_to_date()` did not account for this error, and now it does (thanks **@billdenney** for fixing). Dates output from `excel_numeric_to_date()` before 1 March 1900 will now be one day later compared to previous versions (i.e. what was 1 Feb 1900 is now 2 Feb 1900), and dates that Excel presents as 29 Feb 1900 will become `as.POSIXct(NA)`. (thanks **@billdenney** for fixing) +* A minor breaking change is the time zone is always set for `excel_numeric_to_date()` and `convert_date()`. The default timezone is `Sys.timezone()` while before it was an empty string (`""`). ## Minor features diff --git a/R/excel_dates.R b/R/excel_dates.R index 0cec2730..d32b951a 100644 --- a/R/excel_dates.R +++ b/R/excel_dates.R @@ -42,16 +42,14 @@ #' excel_numeric_to_date(40000.521, include_time = TRUE, #' round_seconds = FALSE) # Time with fractional seconds is included #' @family Date-time cleaning - -# Converts a numeric value like 42414 into a date "2016-02-14" - -excel_numeric_to_date <- function(date_num, date_system = "modern", include_time = FALSE, round_seconds = TRUE, tz = "") { +#' @importFrom lubridate as_date as_datetime force_tz hour minute second +excel_numeric_to_date <- function(date_num, date_system = "modern", include_time = FALSE, round_seconds = TRUE, tz = Sys.timezone()) { if (all(is.na(date_num))) { # For NA input, return the expected type of NA output. if (include_time) { - return(as.POSIXlt(as.character(date_num))) + return(lubridate::as_datetime(date_num, tz=tz)) } else { - return(as.Date(as.character(date_num))) + return(lubridate::as_date(date_num)) } } else if (!is.numeric(date_num)) { stop("argument `date_num` must be of class numeric") @@ -77,18 +75,18 @@ excel_numeric_to_date <- function(date_num, date_system = "modern", include_time date_num_days[mask_before_excel_leap_day_bug] <- date_num_days[mask_before_excel_leap_day_bug] + 1 ret <- if (date_system == "mac pre-2011") { - as.Date(floor(date_num_days), origin = "1904-01-01") + lubridate::as_date(floor(date_num_days), origin = "1904-01-01") } else if (date_system == "modern") { - as.Date(floor(date_num_days), origin = "1899-12-30") + lubridate::as_date(floor(date_num_days), origin = "1899-12-30") } else { stop("argument 'date_system' must be one of 'mac pre-2011' or 'modern'") } if (include_time) { - ret <- as.POSIXlt(ret, tz = tz) - ret$sec <- date_num_seconds %% 60 - ret$min <- floor(date_num_seconds/60) %% 60 - ret$hour <- floor(date_num_seconds/3600) - ret <- as.POSIXct(ret, tz = tz) + ret <- lubridate::as_datetime(ret) + lubridate::second(ret) <- date_num_seconds %% 60 + lubridate::minute(ret) <- floor(date_num_seconds/60) %% 60 + lubridate::hour(ret) <- floor(date_num_seconds/3600) + ret <- lubridate::force_tz(ret, tzone=tz) } if (any(mask_excel_leap_day_bug)) { warning("NAs introduced by coercion, Excel leap day bug detected in `date_num`. 29 February 1900 does not exist.") diff --git a/man/excel_numeric_to_date.Rd b/man/excel_numeric_to_date.Rd index 9895c68e..01727bf7 100644 --- a/man/excel_numeric_to_date.Rd +++ b/man/excel_numeric_to_date.Rd @@ -9,7 +9,7 @@ excel_numeric_to_date( date_system = "modern", include_time = FALSE, round_seconds = TRUE, - tz = "" + tz = Sys.timezone() ) } \arguments{ diff --git a/tests/testthat/test-date-conversion.R b/tests/testthat/test-date-conversion.R index c1707ea1..1cde5ee9 100644 --- a/tests/testthat/test-date-conversion.R +++ b/tests/testthat/test-date-conversion.R @@ -59,12 +59,16 @@ test_that("time handling at the edge of the next date works correctly", { }) test_that("excel_numeric_to_date handles NA", { - expect_equal(excel_numeric_to_date(NA), - as.Date(NA_character_), - info="Return NA output of the correct class (Date) for NA input.") - expect_equal(excel_numeric_to_date(NA, include_time=TRUE), - as.POSIXct(NA_character_), - info="Return NA output of the correct class (POSIXct) for NA input.") + expect_equal( + excel_numeric_to_date(NA), + as.Date(NA_character_), + info="Return NA output of the correct class (Date) for NA input." + ) + expect_equal( + excel_numeric_to_date(NA, include_time=TRUE), + as.POSIXct(NA_character_, tz=Sys.timezone()), + info="Return NA output of the correct class (POSIXct) for NA input." + ) expect_equal( excel_numeric_to_date(c(43088, NA)), as.Date(floor(c(43088, NA)), origin = "1899-12-30"), @@ -86,16 +90,23 @@ test_that("excel_numeric_to_date returns a POSIXct object when include_time is r }) test_that("time zone setting works", { - expect_equal(attr(excel_numeric_to_date(43001.11, include_time = TRUE), "tzone"), - "") # blank by default - expect_equal(attr(excel_numeric_to_date(43001.11, include_time = TRUE, tz = "America/New_York"), "tzone"), - "America/New_York") -# expect_warning(excel_numeric_to_date(43001.11, include_time = TRUE, tz = "nonsense"), -# "unknown timezone 'nonsense'") - # this test should be written: - # providing a bad timezone value defaults to blank tz value "" and throws warning - # Then delete prior test as it will fail - + expect_equal( + attr(excel_numeric_to_date(43001.11, include_time = TRUE), "tzone"), + Sys.timezone(), + info="Defaults to the local timezone" + ) + expect_equal( + attr(excel_numeric_to_date(43001.11, include_time = TRUE, tz = "America/New_York"), "tzone"), + "America/New_York" + ) + expect_equal( + attr(excel_numeric_to_date(43001.11, include_time = TRUE, tz = "Europe/Zurich"), "tzone"), + "Europe/Zurich" + ) + expect_error( + excel_numeric_to_date(43001.11, include_time = TRUE, tz = "nonsense"), + info="Invalid timezone gives an error" + ) }) test_that("integer Excel dates do not overflow (ref issue #241)", { @@ -110,7 +121,7 @@ test_that("daylight savings time handling (issue #420)", { regexp="NAs introduced by coercion, possible daylight savings time issue with input. Consider `tz='UTC'`.", fixed=TRUE ), - as.POSIXct(NA_real_, tz="America/New_York") + as.POSIXct(NA_real_, tz="America/New_York", origin="1900-01-01") ) expect_equal( excel_numeric_to_date(43170.09, include_time=TRUE, tz="UTC"),