Environmental data: pH vs dissolved oxygen

Scatterplot and linear equation looking at the relationship between dissolved oxygen and pH. This is to see if the DO data could be used as aproxy for pH data gaps and to see how reliable the data is. Doing this for China Camp, EOS, and Richardson Bay. No available pH or DO data at Fort Point.

rm(list=ls())

library(tidyverse)
library(ggpubr)
library(scales)
library(chron)
library(plotly)
library(taRifx)
library(aweek)
library(easypackages)
library(renv)
library(here)
library(ggthemes)
library(gridExtra)
library(patchwork)
library(tidyquant)
library(recipes) 
library(cranlogs)
library(knitr)
library(openair)

China Camp

#read in data
cc.ph<- read.csv("C:/Users/chels/Box Sync/Thesis/Data/Working data/Bouy data/cc_ph.csv", header = TRUE, sep=",", fileEncoding="UTF-8-BOM", stringsAsFactors = FALSE)

cc.do<- read.csv("C:/Users/chels/Box Sync/Thesis/Data/Working data/Bouy data/cc_do.csv", header = TRUE, sep=",", fileEncoding="UTF-8-BOM", stringsAsFactors = FALSE)

#format datetime column
cc.ph$datetime<-as.POSIXct(cc.ph$datetime, format=c("%Y-%m-%d %H:%M:%S"))
cc.do$datetime<-as.POSIXct(cc.do$datetime, format=c("%Y-%m-%d %H:%M:%S"))

#remove rows with NA in the datetime or do/ph column. Since we're trying to see the relationship between DO and pH, we need periods of time where both measurments were taken.
cc.ph<-cc.ph[!with(cc.ph,is.na(datetime), is.na(ph)),]
cc.do<-cc.do[!with(cc.do,is.na(datetime), is.na(do)),]

#merge dataframes
cc<-merge(cc.ph, cc.do[,c("datetime", "do")], by="datetime")

#linear equation and R^2 
lm_eqn <- function(cc){
    m <- lm(ph ~ do, cc);
    eq <- substitute(italic(pH) == a + b %.% italic(dissolved.oxygen)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}


#graph
cc %>% ggplot(aes(x=do, y=ph))+
  geom_point(alpha=0.5)+
  geom_smooth(method=lm)+
  geom_text(x = 5.5, y = 8.75, label = lm_eqn(cc), parse = TRUE)+
  labs(title="China Camp do vs pH",
       subtitle="01/01/2017 - 12/31/2019",
       caption= "data courtesy of NERR")

## `geom_smooth()` using formula 'y ~ x'

Weaker relationship than I was expecting

EOS

#read in data
eos.ph<- read.csv("C:/Users/chels/Box Sync/Thesis/Data/Working data/Bouy data/eos_ph.csv", header = TRUE, sep=",", fileEncoding="UTF-8-BOM", stringsAsFactors = FALSE)

eos.do<- read.csv("C:/Users/chels/Box Sync/Thesis/Data/Working data/Bouy data/eos_do.csv", header = TRUE, sep=",", fileEncoding="UTF-8-BOM", stringsAsFactors = FALSE)

#format datetime column
eos.ph$datetime<-as.POSIXct(eos.ph$datetime, format=c("%Y-%m-%d %H:%M:%S"))
eos.do$datetime<-as.POSIXct(eos.do$datetime, format=c("%Y-%m-%d %H:%M:%S"))

#remove rows with NA in the datetime or do/ph column. Since we're trying to see the relationship between DO and pH, we need periods of time where both measurments were taken.
eos.ph<-eos.ph[!with(eos.ph,is.na(datetime), is.na(ph)),]
eos.do<-eos.do[!with(eos.do,is.na(datetime), is.na(odo)),]

#merge dataframes
eos<-merge(eos.ph, eos.do[,c("datetime", "odo")], by="datetime")

#linear equation and R^2 
lm_eqn <- function(eos){
    m <- lm(ph ~ odo, eos);
    eq <- substitute(italic(pH) == a + b %.% italic(dissolved.oxygen)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}


#graph
eos %>% ggplot(aes(x=odo, y=ph))+
  geom_point(alpha=0.5)+
  geom_smooth(method=lm)+
  geom_text(x = 3, y = 8.25, label = lm_eqn(eos), parse = TRUE)+
  labs(title="EOS do vs pH",
       subtitle="01/01/2017 - 12/31/2019",
       caption= "data courtesy of CeNCOOS")

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 597 rows containing non-finite values (stat_smooth).

## Warning: Removed 597 rows containing missing values (geom_point).

Looks better than the China Camp relationship. Looks like its slightly influenced by those lower values around 6.5 pH but those will likely be removed anyway

Richardson Bay

#read in data
rb.ph<- read.csv("C:/Users/chels/Box Sync/Thesis/Data/Working data/Bouy data/rb_ph.csv", header = TRUE, sep=",", fileEncoding="UTF-8-BOM", stringsAsFactors = FALSE)

rb.do<-cc.ph<- read.csv("C:/Users/chels/Box Sync/Thesis/Data/Working data/Bouy data/rb_do.csv", header = TRUE, sep=",", fileEncoding="UTF-8-BOM", stringsAsFactors = FALSE)

#format datetime column
rb.ph$datetime<-as.POSIXct(rb.ph$datetime, format=c("%Y-%m-%d %H:%M:%S"))
rb.do$datetime<-as.POSIXct(rb.do$datetime, format=c("%Y-%m-%d %H:%M:%S"))

#remove rows with NA in the datetime or do/ph column. Since we're trying to see the relationship between DO and pH, we need periods of time where both measurments were taken.
rb.ph<-rb.ph[!with(rb.ph,is.na(datetime), is.na(ph)),]
rb.do<-rb.do[!with(rb.do,is.na(datetime), is.na(do)),]

#merge dataframes
rb<-merge(rb.ph, rb.do[,c("datetime", "do")], by="datetime")

#linear equation and R^2 
lm_eqn <- function(rb){
    m <- lm(ph ~ do, rb);
    eq <- substitute(italic(pH) == a + b %.% italic(DO)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}


#graph
rb %>% ggplot(aes(x=do, y=ph))+
  geom_point(alpha=0.5)+
  geom_smooth(method=lm)+
  geom_text(x = 5.5, y = 8.75, label = lm_eqn(rb), parse = TRUE)+
  labs(title="Richardson Bay do vs pH",
       subtitle="01/01/2017 - 12/31/2019",
       caption= "data courtesy of NERR")

## `geom_smooth()` using formula 'y ~ x'

no dissolved oxygen or pH data at Fort Point

Environmental data: pH vs dissolved oxygen

Chelsey Wegener

12/8/2020