Skip to content

Commit

Permalink
Work around as.POSIX* cross-platform bugs by moving to lubridate
Browse files Browse the repository at this point in the history
  • Loading branch information
billdenney committed Jan 19, 2021
1 parent f357125 commit 1e60a7b
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 31 deletions.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ export(top_levels)
export(untabyl)
export(use_first_valid_of)
importFrom(dplyr,rename_all)
importFrom(lubridate,as_date)
importFrom(lubridate,as_datetime)
importFrom(lubridate,force_tz)
importFrom(lubridate,hour)
importFrom(lubridate,minute)
importFrom(lubridate,second)
importFrom(lubridate,ymd)
importFrom(lubridate,ymd_hms)
importFrom(magrittr,"%>%")
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Breaking changes

* Microsoft Excel incorrectly has a leap day on 29 February 1900 (see https://docs.microsoft.com/en-us/office/troubleshoot/excel/wrongly-assumes-1900-is-leap-year). `excel_numeric_to_date()` did not account for this error, and now it does (thanks **@billdenney** for fixing). Dates output from `excel_numeric_to_date()` before 1 March 1900 will now be one day later compared to previous versions (i.e. what was 1 Feb 1900 is now 2 Feb 1900), and dates that Excel presents as 29 Feb 1900 will become `as.POSIXct(NA)`. (thanks **@billdenney** for fixing)
* A minor breaking change is the time zone is always set for `excel_numeric_to_date()` and `convert_date()`. The default timezone is `Sys.timezone()` while before it was an empty string (`""`).

## Minor features

Expand Down
24 changes: 11 additions & 13 deletions R/excel_dates.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,14 @@
#' excel_numeric_to_date(40000.521, include_time = TRUE,
#' round_seconds = FALSE) # Time with fractional seconds is included
#' @family Date-time cleaning

# Converts a numeric value like 42414 into a date "2016-02-14"

excel_numeric_to_date <- function(date_num, date_system = "modern", include_time = FALSE, round_seconds = TRUE, tz = "") {
#' @importFrom lubridate as_date as_datetime force_tz hour minute second
excel_numeric_to_date <- function(date_num, date_system = "modern", include_time = FALSE, round_seconds = TRUE, tz = Sys.timezone()) {
if (all(is.na(date_num))) {
# For NA input, return the expected type of NA output.
if (include_time) {
return(as.POSIXlt(as.character(date_num)))
return(lubridate::as_datetime(date_num, tz=tz))
} else {
return(as.Date(as.character(date_num)))
return(lubridate::as_date(date_num))
}
} else if (!is.numeric(date_num)) {
stop("argument `date_num` must be of class numeric")
Expand All @@ -77,18 +75,18 @@ excel_numeric_to_date <- function(date_num, date_system = "modern", include_time
date_num_days[mask_before_excel_leap_day_bug] <- date_num_days[mask_before_excel_leap_day_bug] + 1
ret <-
if (date_system == "mac pre-2011") {
as.Date(floor(date_num_days), origin = "1904-01-01")
lubridate::as_date(floor(date_num_days), origin = "1904-01-01")
} else if (date_system == "modern") {
as.Date(floor(date_num_days), origin = "1899-12-30")
lubridate::as_date(floor(date_num_days), origin = "1899-12-30")
} else {
stop("argument 'date_system' must be one of 'mac pre-2011' or 'modern'")
}
if (include_time) {
ret <- as.POSIXlt(ret, tz = tz)
ret$sec <- date_num_seconds %% 60
ret$min <- floor(date_num_seconds/60) %% 60
ret$hour <- floor(date_num_seconds/3600)
ret <- as.POSIXct(ret, tz = tz)
ret <- lubridate::as_datetime(ret)
lubridate::second(ret) <- date_num_seconds %% 60
lubridate::minute(ret) <- floor(date_num_seconds/60) %% 60
lubridate::hour(ret) <- floor(date_num_seconds/3600)
ret <- lubridate::force_tz(ret, tzone=tz)
}
if (any(mask_excel_leap_day_bug)) {
warning("NAs introduced by coercion, Excel leap day bug detected in `date_num`. 29 February 1900 does not exist.")
Expand Down
2 changes: 1 addition & 1 deletion man/excel_numeric_to_date.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 28 additions & 17 deletions tests/testthat/test-date-conversion.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,16 @@ test_that("time handling at the edge of the next date works correctly", {
})

test_that("excel_numeric_to_date handles NA", {
expect_equal(excel_numeric_to_date(NA),
as.Date(NA_character_),
info="Return NA output of the correct class (Date) for NA input.")
expect_equal(excel_numeric_to_date(NA, include_time=TRUE),
as.POSIXct(NA_character_),
info="Return NA output of the correct class (POSIXct) for NA input.")
expect_equal(
excel_numeric_to_date(NA),
as.Date(NA_character_),
info="Return NA output of the correct class (Date) for NA input."
)
expect_equal(
excel_numeric_to_date(NA, include_time=TRUE),
as.POSIXct(NA_character_, tz=Sys.timezone()),
info="Return NA output of the correct class (POSIXct) for NA input."
)
expect_equal(
excel_numeric_to_date(c(43088, NA)),
as.Date(floor(c(43088, NA)), origin = "1899-12-30"),
Expand All @@ -86,16 +90,23 @@ test_that("excel_numeric_to_date returns a POSIXct object when include_time is r
})

test_that("time zone setting works", {
expect_equal(attr(excel_numeric_to_date(43001.11, include_time = TRUE), "tzone"),
"") # blank by default
expect_equal(attr(excel_numeric_to_date(43001.11, include_time = TRUE, tz = "America/New_York"), "tzone"),
"America/New_York")
# expect_warning(excel_numeric_to_date(43001.11, include_time = TRUE, tz = "nonsense"),
# "unknown timezone 'nonsense'")
# this test should be written:
# providing a bad timezone value defaults to blank tz value "" and throws warning
# Then delete prior test as it will fail

expect_equal(
attr(excel_numeric_to_date(43001.11, include_time = TRUE), "tzone"),
Sys.timezone(),
info="Defaults to the local timezone"
)
expect_equal(
attr(excel_numeric_to_date(43001.11, include_time = TRUE, tz = "America/New_York"), "tzone"),
"America/New_York"
)
expect_equal(
attr(excel_numeric_to_date(43001.11, include_time = TRUE, tz = "Europe/Zurich"), "tzone"),
"Europe/Zurich"
)
expect_error(
excel_numeric_to_date(43001.11, include_time = TRUE, tz = "nonsense"),
info="Invalid timezone gives an error"
)
})

test_that("integer Excel dates do not overflow (ref issue #241)", {
Expand All @@ -110,7 +121,7 @@ test_that("daylight savings time handling (issue #420)", {
regexp="NAs introduced by coercion, possible daylight savings time issue with input. Consider `tz='UTC'`.",
fixed=TRUE
),
as.POSIXct(NA_real_, tz="America/New_York")
as.POSIXct(NA_real_, tz="America/New_York", origin="1900-01-01")
)
expect_equal(
excel_numeric_to_date(43170.09, include_time=TRUE, tz="UTC"),
Expand Down

0 comments on commit 1e60a7b

Please sign in to comment.