02-Oceanic-pH.qmd

---
title: "Oceanic_pH"
author: "phoebe.woodworth-jefcoats@noaa.gov"
format:
  docx:
    reference-doc: SAFE-Reference-Doc.docx
---

## Oceanic pH at Station ALOHA

```{r}
#| include: false
### Load libraries
library(tidyverse)
library(lubridate)
library(here)
library(stringr)
# remotes::install_github("nmfs-fish-tools/nmfspalette")
library(nmfspalette)
library(plotly)
library(reticulate)

reticulate::use_miniconda('r-reticulate')
```

```{r}
#| include: false
# Set report year (RptYr), to make things easier
# Note that there's a one year lag for HOT data
RptYr <- 2023

# Set path to variable: Oceanic_pH
# This is where the data are and where the plots will go
Dir <- here("Oceanic_pH")
```

```{r}
#| include: false
### Load data
# Monthly(ish) pH is in the 8th column of this file generated by Matlab
pH_full <- read_delim(file = paste(Dir, '/pH_', RptYr, '.dat', sep = ""), col_names = TRUE)
# And dates are in this file
pH_dates <- read_csv(file = paste(Dir, '/TA_DIC_', RptYr, '_withDate.csv', sep = ""))
```

```{r}
#| include: false
### Remove erroneous outliers
pH <- pH_full$`pH Total`
outlier <- which(pH < 8)
pH[outlier] <- NA
```

```{r}
#| include: false
### Linear fit 
# Note that this assumes that observations are equally spaced in time, which they're  not
n_obs <- seq(1, length(pH), 1)
pH_lm <- lm(pH ~ n_obs)

# There are a few NA values, so we'll calculate new fitted values for the full number of dates.
# This is just to make plotting easier.
pH_lm_fit <- n_obs*pH_lm$coefficients[2] + pH_lm$coefficients[1]

# Change over time, keeping in mind that pH is a logrithmic scale
delta_pH_lm <- pH_lm_fit[length(n_obs)] - pH_lm_fit[1]
delta_pH_pct = (10^-pH_lm_fit[length(n_obs)] - 10^-pH_lm_fit[1])/10^-pH_lm_fit[1]*100
```

```{r}
#| include: false
### Annual values
yrs <- year(mdy(pH_dates$`date mmddyy`))
pH_dates <- bind_cols(pH_dates, yrs, pH)
pH_dates <- rename(pH_dates, Year = ...6)
pH_dates <- rename(pH_dates, pH = ...7)

ann_pH <- pH_dates %>%
  group_by(Year) %>%
  summarise(pH = mean(pH, na.rm = TRUE))
ann_mean_RptYr <- ann_pH$pH[which(ann_pH$Year == RptYr - 1)]

# Find years when the highest value is lower than the lowest value in the first year
ann_pHmin <- pH_dates %>%
  group_by(Year) %>%
  summarise(min_pH = min(pH, na.rm = TRUE))
yr1_min <- ann_pHmin$min_pH[1]

ann_pHmax <- pH_dates %>%
  group_by(Year) %>%
  summarise(max_pH = max(pH, na.rm = TRUE))
yrs_below <- ann_pHmax$Year[which(ann_pHmax$max_pH < yr1_min)]
n_yrs_below <- length(yrs_below)
ann_max_RptYr <- ann_pHmax$max_pH[which(ann_pHmax$Year == RptYr - 1)]
```

```{r}
#| echo: false
# Note that the above needs to be 'echo' and not 'include' so that the error checks print.

# This section includes some error checks to prompt fixing the text
if (any(summary(pH_lm)$coefficients[,4] > 0.05)) {
  print('The linear fit is not signficant. Remove text related to the linear trend.')
}

n_txt <- as.numeric(str_extract(n_yrs_below, "\\d$"))
if (n_txt > 0 & n_txt < 4) {
  print('Edit the text around the number of years that pH has been outside the range seen the first year.')
}

```

```{r}
#| include: false
# Write csv for portal
# Note that output csvs go in their own folder
ann_pH <- rename(ann_pH, year = Year)
write_csv(ann_pH, file = paste(here(), '/PelagicClimate_', RptYr, '/pH_', RptYr, '.csv', sep = ""))
```

```{r}
#| include: false
# Write csv for dashboard
# Note that dashboard output has its own folder
# Thanks to Emily Conklin for this chunk of code!
pH_dashboard <- pH_dates %>%
  mutate(Date_Time = as.character(`date mmddyy`)) %>%
  mutate(Date_Time_tmp = ifelse(str_length(Date_Time) == 5, "0", "")) %>%
  mutate(Date_Time = paste0(Date_Time_tmp, Date_Time)) %>%
  mutate(Date_Time = mdy(Date_Time)) %>%
  mutate(Year = year(Date_Time), Month = month(Date_Time), ID = "pH", Anom = NA, Value_lm = pH_lm_fit,
         Anom_lm = NA, Units = "pH") %>%
  mutate(Date_Time = Date_Time %>%
           format('%d-%b-%Y %H:%M') %>%
           toupper) %>%
  select(Date_Time, Year, Month, Value = pH, Anom, ID, Value_lm, Anom_lm, Units)


write_csv(pH_dashboard, file = here("Indicator_Dashboard", "Data", paste('pH_Dashboard_Data_', RptYr, '.csv', sep = "")))
```

```{r}
#| include: false
# Borrowing code from the dashboard for this chunk
# so that figures look the same across products
indicator_data <- pH_dashboard |>
  filter(Year <= RptYr)

# Create color palette for easy reference 
oceans <- nmfs_palette("oceans")(3) # 1 = report_year, 3 = previous years
crustacean <- nmfs_palette("crustacean")(4) # 1 = linear trend
coral <- nmfs_palette("coral")(3) # 3 = annual average for Rpt Yr
ann_grey <- "#D0D0D0" # annual means; in NMFS branding guide but not in package
waves <- nmfs_palette("waves")(3) # annual means; in NMFS branding guide but not in package
seagrass <- nmfs_palette("seagrass")(3)
pal <- c(oceans[3], coral[2], waves[2], coral[3], crustacean[2])

# Formatting
plot_title_font <- list(size = 14)
plot_height <- 350 #in pixels

# Calculate annual means 
ann_vals <- indicator_data |>
  group_by(Year) |>
  summarise(Value = mean(Value, na.rm = TRUE))

# Identify the current year, to overlay on plot
given_yr <- indicator_data |>
  filter(Year == RptYr - 1) # pH data have a one-year lag

given_yr_ann <- bind_cols(rep(ann_vals$Value[dim(ann_vals)[1]]),
                          given_yr$Date_Time)
given_yr_ann <- rename(given_yr_ann, Value = ...1)
given_yr_ann <- rename(given_yr_ann, Date_Time = ...2)

p1 <- plot_ly(indicator_data, x = dmy_hm(indicator_data$Date_Time), y = ~Value,
              type = "scatter", mode = "lines", line = list(color = pal[1]),
              name = ~ID[1], height = plot_height) |>
  add_trace(indicator_data, x = dmy_hm(indicator_data$Date_Time), y = ~Value_lm,
            type = "scatter", mode = "lines", line = list(color = pal[5]),
            name = "Long-term Trend") |>
  add_trace(ann_vals, x = ymd_hm(paste(ann_vals$Year, '0601 00:00', sep = "")), y = ann_vals$Value,
            type = "scatter", mode = "lines", line = list(color = pal[3]),
            name = "Annual Mean") |>
  add_trace(given_yr, x = dmy_hm(given_yr$Date_Time), y = given_yr$Value,
            type = "scatter", mode = "lines", line = list(color = pal[2]),
            name = ~ID[1], height = plot_height) |>
  add_trace(given_yr_ann, x = dmy_hm(given_yr_ann$Date_Time), y = given_yr_ann$Value,
            type = "scatter", mode = "lines", line = list(color = pal[4]),
            name = ~ID[1], height = plot_height)

#apply same layout parameters for all plots

#custom x axis (min, every decade, report year)
all_years <- unique(indicator_data$Year)
first_date <- as.character(parse_date_time(indicator_data$Date_Time[1], orders = "d-b-Y H:M"))
date_axis <- c(first_date, 
              all_years[which(all_years %% 10 == 0)], 
              RptYr - 1)

p1 <- p1 |> layout(title = list(text = "Indicator Time Series", x = 0.01, font = plot_title_font), #add title
                   xaxis = list(type = "date", tickformat = "%Y", tickmode = "array", tickvals = date_axis, tickangle = 90),
                   #xaxis = list(tick0 = min(indicator_data$Date_Time), dtick = "M24"),
                   yaxis = list(title = indicator_data$Units[1], hoverformat = '.3f', range = list(8.03, 8.14), tickvals = list(8.03, 8.04, 8.06, 8.08, 8.10, 8.12, 8.14)), #add units and round values in hover display; not sure of a better way to set axis limits and ticks...
                   paper_bgcolor = 'transparent', plot_bgcolor = 'transparent', #transparent background
                   hovermode = "x unified", #show data for all traces simultaneously
                   hoverlabel = list(namelength = -1)) #don't cutoff hoverinfo due to length
  
#return plot 
save_image(p1, paste(Dir, '/Oceanic_pH_ts_', RptYr, '.pdf', sep = ""))

# Handy reference, in case this doesn't work in the future
# from: https://stackoverflow.com/questions/64028289/export-plot-from-plotly-into-pdf
# install.packages('reticulate')
# reticulate::install_miniconda()
# reticulate::conda_install('r-reticulate', 'python-kaleido')
# reticulate::conda_install('r-reticulate', 'plotly', channel = 'plotly')
# reticulate::use_miniconda('r-reticulate')
# To export:
# library(plotly)
# kaleido(FINAL_plot, "FINAL_plot.pdf")
```

Rationale: Oceanic pH is a measure of how greenhouse gas emissions have already impacted the ocean. This indicator demonstrates that oceanic pH has decreased significantly over the past several decades (i.e., the ocean has become more acidic). Increasing ocean acidification limits the ability of marine organisms to build shells and other calcareous structures. Recent research has shown that pelagic organisms such as pteropods and other prey for commercially valuable fish species are already being negatively impacted by increasing acidification (Feely et al. 2016). The full impact of ocean acidification on the pelagic food web is an area of active research (Fabry et al. 2008).  
\
Status: The ocean is roughly `r signif(delta_pH_pct, 3)`% more acidic than it was 30 years ago at the start of this time series. Over this time, pH has declined by `r abs(signif(delta_pH_lm, 2))` at a constant rate. In `r RptYr-1`, the most recent year for which data are available, the average pH was `r signif(ann_mean_RptYr, 3)`. Additionally, for the `r n_yrs_below`th year, small variations seen over the course of the year are outside the range seen in the first year of the time series. The highest pH value reported for the most recent year (`r signif(ann_max_RptYr, 4)`) is lower than the lowest pH value reported in the first year of the time series (`r signif(yr1_min, 4)`).  
\
Description: Trends in surface (5 m) pH at Station ALOHA, north of Oahu (22.75°N, 158°W), collected by the Hawaiʻi Ocean Time-Series (HOT) from October 1988 to `r RptYr - 1` (`r RptYr` data are not yet available). Oceanic pH is a measure of ocean acidity, which increases as the ocean absorbs carbon dioxide from the atmosphere. Lower pH values represent greater acidity. Oceanic pH is calculated from total alkalinity (TA) and dissolved inorganic carbon (DIC). Total alkalinity represents the ocean’s capacity to resist acidification as it absorbs CO~2~ and the amount of CO~2~ absorbed is captured through measurements of DIC. The multi-decadal time series at Station ALOHA represents the best available documentation of the significant downward trend in oceanic pH since the time series began in 1988. Oceanic pH varies over both time and space, though the conditions at Station ALOHA are considered broadly representative of those across the Western and Central Pacific’s pelagic fishing grounds.  
\
Timeframe: Monthly.  
\
Region/Location: Station ALOHA: 22.75°N, 158°W.  
\
Measurement Platform: *In-situ* station.  
\
Data available at: <https://hahana.soest.hawaii.edu/hot/hot-dogs/bseries.html>.  
\
Sourced from: Fabry et al. (2008), Feely et al. (2016), and the Hawaiʻi Ocean Time-Series as described in Karl and Lukas (1996) and on its website (HOT 2024) using the methodology provided by Zeebe and Wolf-Gladrow (2001).

Graphics produced in part using Stawitz (2023).  
\
## Additional Information

The following variables (listed as operators on the web interface) were downloaded from <https://hahana.soest.hawaii.edu/hot/hot-dogs/bseries.html>:

- Alkalinitiy
- Dissolved Inorganic Carbon

Other options on the web interface were set to:

- Years: blank to download all
- Station #: 2 (station ALOHA)
- Value/range: 5 (for 5 m depth)
- Function Grouping: Depth
- Function: Horizon
- Grouping: none
- Output type: text

Data were manually copied and pasted into .csvs.  In this process, the 2-line header was combined 
into a 1-line header.  These variables were then matched by date in `pH_prep.Rmd` (which can be found in the [Oceanic_pH](https://github.com/pwoodworth-jefcoats/climate-indicators/tree/main/Oceanic_pH) folder) and used to run 
csysUI.m with the default options, and a comma-delimited input file (TA_DIC_`r RptYr`.csv):

- Temperature = 25 C (reasonable up until maybe the most recent year)
- Salinity = 35 (broadly reasonable)
- Pressure = 5 bar (because these values come from 5 m depth)
The output headers can be found here: 
<https://www.soest.hawaii.edu/oceanography/faculty/zeebe_files/CO2_System_in_Seawater/ReadMeUI.txt>

The resulting total pH values are in reasonable agreement with the calculated pH values that are 
available on HOT albeit with a large temporal gap.  The resulting total pH values (pH_`r RptYr`.dat) 
are used in this script.  The R + Matlab workflow is admittedly cumbersome and not an option 
for those without a Matlab license.  Please let me know if an R equivalent of csysUI.m exists.

A plot of the residuals from the linear model showed that they were evenly distributed between 
-0.03 and 0.03.