diff --git a/.gitignore b/.gitignore index 5b6a065..221b531 100755 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .Rhistory .RData .Ruserdata +.Renviron diff --git a/.renvignore b/.renvignore new file mode 100644 index 0000000..04ed523 --- /dev/null +++ b/.renvignore @@ -0,0 +1 @@ +notes/ \ No newline at end of file diff --git a/app/R/calc_sol_rad_theoretical.R b/app/R/calc_sol_rad_theoretical.R new file mode 100644 index 0000000..ad072e4 --- /dev/null +++ b/app/R/calc_sol_rad_theoretical.R @@ -0,0 +1,161 @@ +# library(TrenchR) + +# Wrapper to sum direct, diffuse, and reflected solar radiation output by `solar_radiation()` +solar_radiation_total <- function(doy, psi, tau, elev, rho) { + purrr::pmap(list(doy, psi, tau, elev, rho), solar_radiation) |> + purrr::map_dbl(sum) +} + + + +#' Calculate theoretical maximum solar radiation +#' +#' @param data data returned by either `az_daily()` or `az_hourly()` +#' @param freq indicator of whether `data` is hourly or daily (default is hourly) +#' @param tau passed to `TrenchR::solar_radiation()` (via +#' `solar_radiation_total()`). Atmospheric sensitivity from 0-1. While 1 is +#' the theoretical max, 0.7 seems like the max observed globaly. E.g. in +#' https://www.mdpi.com/2072-4292/13/9/1716 +#' @param rho passed to `TrenchR::solar_radiation()` (via +#' `solar_radiation_total()`). Albedo ranging from 0-1. Typical albedo values: +#' desert sand, 0.4; concrete, 0.55; bare soil, 0.17; asphalt, 0.04 (values +#' from https://en.wikipedia.org/wiki/Albedo). +#' +#' @return `data`, but with a `sol_rad_est` column indicating the estimated +#' theoretical maximum solar radiation for that date or datetime +#' +calc_sol_rad_theoretical <- function(data, freq = c("hourly", "daily"), tau = 0.7, rho = 0.4) { + # hourly and daily data have slightly different formats, so must deal with that + freq <- match.arg(freq) + if (freq == "hourly") { + date_col <- sym("date_datetime") + } else { + date_col <- sym("datetime") + } + + # Join location data ------------------------------------------------------ + data_joined <- + left_join(data, station_info, by = join_by(meta_station_id, meta_station_name)) |> + select(meta_station_id, meta_station_name, {{ date_col }}, latitude, longitude, elev_m, sol_rad_total) + + + # Expand grid ------------------------------------------------------------- + # For hourly data, to get better resolution calculate instantaneous solar + # radiation every few minutes and then sum. For daily data, calculate hourly + # and sum. + + if (freq == "hourly") { + + # NOTE: this calculation of midpoint is NOT ROBUST. Decimal values will get + # coerced to integers and then be wrong. Edit with caution! + n_segments <- 10L + start <- 0 + (1/2 * 1/n_segments) + midpts <- seq(start, by = 1/n_segments, length.out = n_segments) * 60 + + data_expand <- + data_joined |> + expand_grid( + minute = midpts + ) |> + mutate(grid_datetime = make_datetime( + year = year(date_datetime), + month = month(date_datetime), + day = day(date_datetime), + hour = hour(date_datetime), + min = as.integer(minute) + )) + } else { + data_expand <- + data_joined |> + expand_grid( + hour = 0:23 + ) |> + mutate(grid_datetime = make_datetime( + year = year(datetime), + month = month(datetime), + day = day(datetime), + hour = hour, + min = 30 #estimate at midpoint of hour + )) + } + + + # Calculate instantaneous solar radiation --------------------------------- + data_sol <- + data_expand |> + #Zenith angle + mutate(psi = zenith_angle( + doy = yday({{ date_col }}), + lat = latitude, + lon = longitude, + hour = as_hms(grid_datetime) |> as.duration() |> as.numeric("hours"), + offset = -7 + )) |> + #Total solar radiation + mutate( + sol_rad_est = solar_radiation_total( + doy = yday({{ date_col }}), + psi = psi * pi / 180, + tau = tau, + elev = elev_m, + rho = rho + ) # in W/m^2 + ) + + + # Integrate instantaneous to total ---------------------------------------- + # For hourly, need to multiply by 1/n() (∆x). For daily, just sum hourly for each date + + if (freq == "hourly") { + data_summed <- + data_sol |> + summarize( + sol_rad_est = sum(sol_rad_est * (1/n()), na.rm = TRUE), + .by = c(meta_station_id, meta_station_name, date_datetime) + ) |> + # Sensor totals are for the previous hour, so lag estimates to match + mutate(sol_rad_est = lag(sol_rad_est), .by = meta_station_id) + } else { + data_summed <- + data_sol |> + summarize( + sol_rad_est = sum(sol_rad_est, na.rm = TRUE), + .by = c(meta_station_id, meta_station_name, datetime) + ) + } + + data_summed |> + mutate( + #convert to same units as azmet data: 1 W*hr/m^2 = 0.0036 MJ/m^2, + sol_rad_est = sol_rad_est * 0.0036 + ) |> + right_join(data, by = join_by(meta_station_id, meta_station_name, {{ date_col }})) +} + +# hourly <- az_hourly(start = "2023-05-29 00", end = "2023-05-30 23") +# hourly_calced <- calc_sol_rad_theoretical(hourly) +# nrow(hourly) == nrow(hourly_calced) +# +# hourly_calced |> +# filter(!lte(sol_rad_total, sol_rad_est, tol = 0.5)) |> +# select(date_datetime, sol_rad_est, sol_rad_total) +# +# hourly_calced |> +# filter(meta_station_id %in% c("az01")) |> +# ggplot(aes(x = date_datetime)) + +# geom_line(aes(y = sol_rad_total, color = "Observed")) + +# geom_line(aes(y = sol_rad_est, color = "Theoretical max")) + +# facet_wrap(~meta_station_id) +# +# daily <- az_daily(start = "2022-10-10", end = "2023-10-10") +# daily_calced <- calc_sol_rad_theoretical(daily, "daily") +# daily_calced |> +# filter(!lte(sol_rad_total, sol_rad_est)) |> +# select(datetime, sol_rad_est, sol_rad_total) +# +# daily_calced |> +# filter(meta_station_id %in% c("az01")) |> +# ggplot(aes(x = datetime)) + +# geom_line(aes(y = sol_rad_total, color = "Observed")) + +# geom_line(aes(y = sol_rad_est, color = "Theoretical max")) + +# facet_wrap(~meta_station_id) diff --git a/app/R/check_daily.R b/app/R/check_daily.R index b59b0ce..a82ccb5 100755 --- a/app/R/check_daily.R +++ b/app/R/check_daily.R @@ -11,6 +11,8 @@ check_daily <- function(daily) { report <- data.validator::data_validation_report() + daily <- calc_sol_rad_theoretical(daily, freq = "daily") + data.validator::validate(daily, name = "Daily Data") |> # Internal consistency checks from 'NWS (1994) TSP 88-21-R2': data.validator::validate_if(gte(temp_air_meanC, dwpt_mean, na_pass = TRUE), @@ -32,6 +34,8 @@ check_daily <- function(daily) { ), description = "`wind_spd_mean_mps` ≤ `wind_spd_max_mps`" ) |> + validate_if(lte(sol_rad_total, sol_rad_est, na_pass = TRUE), + "`sol_rad_total` ≤ theoretical max") |> data.validator::validate_if( btwn( relative_humidity_mean, diff --git a/app/R/check_hourly.R b/app/R/check_hourly.R index e34c0ed..20c345d 100755 --- a/app/R/check_hourly.R +++ b/app/R/check_hourly.R @@ -9,6 +9,7 @@ #' check_hourly(hourly) check_hourly <- function(hourly) { hourly <- hourly |> + calc_sol_rad_theoretical() |> dplyr::group_by(meta_station_name) |> dplyr::arrange(dplyr::desc(date_datetime)) |> #make sure arranged in datetime order # Create a few new columns for temporal consistency checks from 'NWS (1994) TSP 88-21-R2': @@ -43,13 +44,15 @@ check_hourly <- function(hourly) { report <- data.validator::data_validation_report() data.validator::validate(hourly, name = "Hourly Data") |> data.validator::validate_if(# within rounding error - gte(temp_airC, dwpt - 0.2, na_pass = TRUE), + gte(temp_airC, na_pass = TRUE, tol = 0.2), "`temp_airC` ≥ `dwpt`") |> data.validator::validate_if(lte(wind_spd_mps, wind_spd_max_mps, na_pass = TRUE), "`wind_spd` ≤ `wind_spd_max`") |> validate_if(temp_airC_delta, "|∆`temp_airC`| ≤ 19.4") |> validate_if(relative_humidity_delta, "|∆`relative_humidity`| ≤ 50") |> validate_if(wind_spd_delta, "|∆`wind_spd_mps`| ≤ 10.3") |> + validate_if(lte(sol_rad_total, sol_rad_est, na_pass = TRUE, tol = 0.5), + "`sol_rad_total` ≤ theoretical max") |> #TODO would be nice if CSV would contain all 14+ hours in a row maybe? validate_if(sol_rad_total_14 | is.na(sol_rad_total_14), "`sol_rad_total` not < 0.1 for more than 14 hrs") |> diff --git a/app/R/helpers.R b/app/R/helpers.R index bc63a9d..d1b8ce6 100755 --- a/app/R/helpers.R +++ b/app/R/helpers.R @@ -1,14 +1,18 @@ #' @param na_pass logical; do NAs count as passing the validation step? -gte <- function(x,y, na_pass = FALSE) { - res <- x >= y +#' @param tol tolerance; passed on to `all.equal()` to evaluate the "equal" part +#' of greater than or equal to +gte <- function(x,y, na_pass = FALSE, tol = sqrt(.Machine$double.eps)) { + eq <- purrr::map2_lgl(x, y, \(x,y) isTRUE(all.equal(x,y, tolerance = tol, scale = 1))) + res <- x > y | eq if(isTRUE(na_pass)) { res <- ifelse(is.na(res), TRUE, res) } res } -lte <- function(x,y, na_pass = FALSE) { - res <- x <= y +lte <- function(x,y, na_pass = FALSE, tol = sqrt(.Machine$double.eps)) { + eq <- purrr::map2_lgl(x, y, \(x,y) isTRUE(all.equal(x,y, tolerance = tol, scale = 1))) + res <- x < y | eq if(isTRUE(na_pass)) { res <- ifelse(is.na(res), TRUE, res) } diff --git a/app/app.R b/app/app.R index f85f5ce..fdbeca7 100755 --- a/app/app.R +++ b/app/app.R @@ -15,6 +15,8 @@ library(plotly) library(slider) library(units) library(patchwork) +library(hms) +library(TrenchR) # Everything in R/ should be sourced automatically, but I found I needed to add # these to get it to work. Not sure why. diff --git a/app/testdata_hourly.csv b/app/testdata_hourly.csv index ac130ee..264a328 100644 --- a/app/testdata_hourly.csv +++ b/app/testdata_hourly.csv @@ -26,7 +26,7 @@ meta_bat_volt,meta_needs_review,meta_station_id,meta_station_name,meta_version,d 12.68,0,az02,Yuma Valley,1,2023-07-02T07:00:00Z,183,0700,2023,9.3,48.7,0,0,22.8,73,0,0,37,0,9.8,25.2,77.4,27.1,80.8,26.5,79.7,1.17,2.05,0.7,0.3,0,0,2023-07-02T06:00:10-07:00,18,1.8,0.8,0,0,317,3,0,0 13.01,0,az02,Yuma Valley,1,2023-07-02T08:00:00Z,183,0800,2023,6.2,43.2,0.2,0.01,26,78.7,0,0,22,0,25.33,30.2,86.4,26.9,80.4,26.5,79.7,0.95,3.33,3.8,1.7,0.9,0.4,2023-07-02T08:00:00-07:00,189,5.1,2.3,0.9,0.4,159,18,0.3,0.7 13.61,0,az02,Yuma Valley,1,2023-07-02T09:00:00Z,183,0900,2023,7.3,45.1,0.5,0.02,27.2,81,0,0,22,0,41.59,32,89.6,27.2,81,26.5,79.7,1.02,3.73,11.4,5.1,8.1,3.6,2023-07-02T08:43:30-07:00,177,14.3,6.4,8.1,3.6,166,14,3.5,7.8 -12.84,0,az02,Yuma Valley,1,2023-07-02T21:00:00Z,183,2100,2023,11.2,52.1,0.1,0,29.9,85.7,0,0,26,0,0,33.2,91.8,33.8,92.8,26.4,79.5,1.33,3.76,4,1.8,2,0.9,2023-07-02T20:41:20-07:00,269,5.6,2.5,2,0.9,241,21,0.8,1.8 +12.84,0,az02,Yuma Valley,1,2023-07-02T21:00:00Z,183,2100,2023,11.2,52.1,0.1,0,29.9,85.7,0,0,26,2.3,0,33.2,91.8,33.8,92.8,26.4,79.5,1.33,3.76,4,1.8,2,0.9,2023-07-02T20:41:20-07:00,269,5.6,2.5,2,0.9,241,21,0.8,1.8 12.8,0,az02,Yuma Valley,1,2023-07-02T22:00:00Z,183,2200,2023,12.2,53.9,0.1,0,28,82.4,0,0,32,0,0,31.1,88,33,91.4,26.4,79.5,1.42,3.08,3.1,1.4,1.1,0.5,2023-07-02T21:02:00-07:00,223,4,1.8,1.1,0.5,237,13,0.5,1.1 12.76,0,az02,Yuma Valley,1,2023-07-02T23:00:00Z,183,2300,2023,13.1,55.6,0,0,24.6,76.3,0,0,41,0,0,27.6,81.7,32.2,90,26.4,79.5,1.51,2.18,2,0.9,0,0,2023-07-02T22:16:40-07:00,87,2.7,1.2,0,0,82,3,0,0 12.76,0,az04,Safford,1,2023-07-01T21:00:00Z,182,2100,2023,-0.5,31.1,0.2,0.01,28.7,83.6,0,0,11,0,0,33.5,92.3,34.3,93.7,29.4,84.9,0.59,4.57,10.1,4.5,7.6,3.4,2023-07-01T20:54:30-07:00,315,12.8,5.7,0,3.4,314,13,3.3,7.4 diff --git a/notes/azmet_battery.qmd b/notes/azmet_battery.qmd new file mode 100644 index 0000000..64857e4 --- /dev/null +++ b/notes/azmet_battery.qmd @@ -0,0 +1,233 @@ +--- +title: "Battery Diagnostics Notes" +format: html +editor: visual +--- + +```{r} +library(tidyverse) +library(azmetr) +library(data.validator) +library(slider) +library(plotly) +source(here::here("app/R/helpers.R")) +``` + +Get example data + +```{r} +daily <- az_daily(start_date = "2022-08-10", end_date = "2023-08-10") +hourly <- az_hourly(start = "2023-01-15 00", end = "2023-01-17 23") + +daily +hourly +``` + +## Data Validations + +- Voltage within range + +- Voltage not changing dramatically from rolling mean (not sure by how much) + +### Use `slider` to calculate anomalies from rolling mean + +```{r} +daily <- + daily |> + arrange(desc(datetime)) |> + mutate( + #Calculate 10 day rolling mean of mean voltage (less than 10 days is allowed at start of timeseries) + meta_bat_mean_rollmean = slide_dbl(meta_bat_volt_mean, mean, .after = 10), + #Calculate anomaly from 10 day rolling mean + meta_bat_volt_anomaly = abs(meta_bat_volt_mean - meta_bat_mean_rollmean), + .by = meta_station_id + ) + +hourly <- + hourly |> + arrange(desc(date_datetime)) |> + mutate( + meta_bat_volt_rollmean = slide_dbl(meta_bat_volt, mean, .after = 10), + meta_bat_volt_anomaly = abs(meta_bat_volt - meta_bat_volt_rollmean), + .by = meta_station_id + ) +``` + +What's a reasonable threshold? + +```{r} +daily|> + select(meta_station_name, datetime, starts_with("meta_bat_")) |> + filter(meta_bat_volt_anomaly > 1) + +hourly |> + select(meta_station_name, date_datetime, starts_with("meta_bat_")) |> + filter(meta_bat_volt_anomaly > 2) +``` + +### Daily validations + +```{r} +report <- data.validator::data_validation_report() + +data.validator::validate(daily, name = "Daily Data") |> + validate_if(btwn(meta_bat_volt_mean, meta_bat_volt_min, meta_bat_volt_max), + description = "`meta_bat_volt_*` (min ≤ mean ≤ max)") |> + validate_if(meta_bat_volt_min >= 9.6, "`meta_bat_volt_min` ≥ 9.6") |> + validate_if(meta_bat_volt_max <= 16, "`meta_bat_volt_max` ≤ 16") |> + validate_if(lte(meta_bat_volt_anomaly, 1, na_pass = TRUE), "∆ voltage from 10-day mean ≤ 1") |> + add_results(report) + +report +get_results(report) +``` + +### Hourly validations + +```{r} +report <- data.validator::data_validation_report() + +data.validator::validate(hourly, "Hourly Data") |> + validate_if(meta_bat_volt >= 9.6, "`meta_bat_volt` ≥ 9.6") |> + validate_if(meta_bat_volt <= 16, "`meta_bat_volt` ≤ 16") |> + validate_if(lte(meta_bat_volt_anomaly, 2, na_pass = TRUE), "∆ voltage from rolling mean ≤ 1") |> #not sure how helpful this is with hourly data since spikes happen every morning with sunrise + add_results(report) + +report +get_results(report) +``` + +## Visualizations + +### Timeseries + +Hourly + +```{r} +h_time <- + ggplot(hourly, aes(x = date_datetime, y = meta_bat_volt)) + + geom_line(aes(color = meta_station_id)) + + geom_hline(aes(yintercept = 9.6), color = "red") + + geom_hline(aes(yintercept = 16), color = "orange") + + geom_hline(aes(yintercept = 20), color = "red") + + scale_y_continuous(limits = range(hourly$meta_bat_volt, na.rm = TRUE)) + + theme_bw() + + theme(legend.position = "none") + + labs(title= "hourly") +ggplotly(h_time) +``` + +Daily + +```{r} +h_daily <- + daily |> + ggplot(aes(x = datetime, y = meta_bat_volt_mean)) + + geom_ribbon(aes(ymin = meta_bat_volt_min, ymax = meta_bat_volt_max, fill = meta_station_id), alpha = 0.15) + + geom_line(aes(color = meta_station_id)) + + geom_hline(aes(yintercept = 9.6), color = "red") + + geom_hline(aes(yintercept = 16), color = "orange") + + geom_hline(aes(yintercept = 20), color = "red") + + scale_y_continuous(limits = c(min(daily$meta_bat_volt_min, na.rm = TRUE), max(daily$meta_bat_volt_max, na.rm = TRUE))) + + theme_bw() + + theme(legend.position = "none") + + labs(title = "daily") + +ggplotly(h_daily) +``` + +## Relationship with Sun and Temp + +### Solar radiation + +```{r} +hourly_sol <- + hourly |> + ggplot(aes(x = sol_rad_total, y = meta_bat_volt)) + + geom_point(aes(color = meta_station_id), alpha = 0.2) + + geom_smooth(se = FALSE) + + theme_bw() + + theme(legend.position = "none") + + labs(title = "hourly") +hourly_sol +``` + +```{r} +daily_sol <- + daily |> + ggplot(aes(x = sol_rad_total, y = meta_bat_volt_mean)) + + geom_point(aes(color = meta_station_id), alpha = 0.2) + + geom_smooth(se = FALSE) + + theme_bw() + + theme(legend.position = "none") + + labs(title = "daily") +daily_sol +``` + +### Air temp + +Hourly probably isn't very meaningful since it's just a few days in the winter + +```{r} +hourly_temp <- + hourly |> + ggplot(aes(x = temp_airC, y = meta_bat_volt)) + + geom_point(aes(color = meta_station_id), alpha = 0.2) + + geom_smooth(se = FALSE) + + theme_bw() + + theme(legend.position = "none") + + labs(title = "hourly") +hourly_temp +``` + +```{r} +daily_temp <- + daily |> + ggplot(aes(x = temp_air_meanC, y = meta_bat_volt_mean)) + + geom_point(aes(color = meta_station_id), alpha = 0.2) + + geom_smooth(se = FALSE) + + theme_bw() + + theme(legend.position = "none") + + labs(title = "daily") +daily_temp +``` + +slight negative slope with air temp + +## Modeling + +```{r} +library(mgcv) +library(gratia) +``` + +```{r} +m <- + gam( + meta_bat_volt_mean ~ s(sol_rad_total, k = 17) + + s(temp_air_meanC, k = 15) + + #interaction + ti(sol_rad_total, temp_air_meanC, bs = "cr", k = 20) + + s(id, bs = "re"), #random intercepts for sites + # family = scat(), #residuals are leptokurtic with gaussian(), but this is slower + family = gaussian(), + data = daily |> mutate(id = factor(meta_station_id)), #gam() requires factors + method = "REML" + ) + + +mgcv::k.check(m) +gratia::appraise(m) +summary(m) +overview(m) +``` + +```{r} +gratia::draw(m, unconditional = TRUE, overall_uncertainty = TRUE) +``` + +Battery voltage increases non-linearly with solar radiation (because it reaches max charge at some point?) and decreases with air temperature. +Significant interaction looks like its just that there is a more than additive effect when cold and low solar radiation. + +Could generate predictive intervals for these smooths and flag points that fall outside of them. +Would probably want to use more data for that though. diff --git a/notes/references.bib b/notes/references.bib new file mode 100644 index 0000000..dfd07a1 --- /dev/null +++ b/notes/references.bib @@ -0,0 +1,24 @@ + +@article{gupta2023, + title = {Estimation of Solar Radiation with Consideration of Terrestrial Losses at a Selected Location{\textemdash}A Review}, + author = {Gupta, Shubham and Singh, Amit Kumar and Mishra, Sachin and Vishnuram, Pradeep and Dharavat, Nagaraju and Rajamanickam, Narayanamoorthi and Kalyan, Ch Naga Sai and AboRas, Kareem M. and Sharma, Naveen Kumar and Bajaj, Mohit}, + year = {2023}, + month = {01}, + date = {2023-01}, + journal = {Sustainability}, + pages = {9962}, + volume = {15}, + number = {13}, + doi = {10.3390/su15139962}, + url = {https://www.mdpi.com/2071-1050/15/13/9962}, + note = {Number: 13 +Publisher: Multidisciplinary Digital Publishing Institute}, + langid = {en} +} + +@article{frouin, + title = {A Simple Analytical Formula to Compute Clear Sky Total and Photosynthetically Available Solar Irradiance at the Ocean Surface}, + author = {Frouin, Robert and Lingner, David W and Gautier, Catherine and Baker, Karen S and Smith, Ray C}, + doi = {10.1029/JC094iC07p09731}, + langid = {en} +}