Goal: a table summarizing salinity, pH, and water temperature at my four sites. (Air temperature should probably be a separate table since one of the data sources I use is different than the buoys I use for the water data?) Summarizing environmental conditions during the course of my field work.

Columns:
- From hourly median data: max value, min value, overall median - From daily summary data: max daily range, min daily range, median daily range

Questions:
- Should I add an “events” column? For example, how many days with a daily max below 10 salinity?

rm(list=ls())

library(tidyverse)
library(ggpubr)
library(scales)
library(chron)
library(plotly)
library(taRifx)
library(aweek)
library(easypackages)
library(renv)
library(here)
library(ggthemes)
library(gridExtra)
library(patchwork)
library(tidyquant)
library(recipes) 
library(cranlogs)
library(knitr)
library(openair)
library(data.table)
library(gt)

####Setting up#### Reading in data. Need both hourly median datasets and daily summaries.

Hourly medians

#Salinity
cc.sal<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/cc_sal.csv",
    header = TRUE
  )
eos.sal<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/eos_sal.csv",
    header = TRUE
  )
rb.sal<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/rb_sal.csv",
    header = TRUE
  )
fp.sal<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/fp_sal.csv",
    header = TRUE
  )

#pH
cc.ph<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/cc_ph.csv",
    header = TRUE
  )
eos.ph<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/eos_ph.csv",
    header = TRUE
  )
rb.ph<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/rb_ph.csv",
    header = TRUE
  )
#no pH data at Fort Point

#water temperature
cc.wtemp<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/cc_watertemp.csv",
    header = TRUE
  )
eos.wtemp<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/eos_watertemp.csv",
    header = TRUE
  )
rb.wtemp<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/rb_watertemp.csv",
    header = TRUE
  )
fp.wtemp<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/fp_watertemp.csv",
    header = TRUE
  )

Daily summaries

cc.daily.sum<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/daily_summaries/cc.daily.sum.csv",
    header = TRUE
  )
eos.daily.sum<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/daily_summaries/eos.daily.sum.csv",
    header = TRUE
  )
rb.daily.sum<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/daily_summaries/rb.daily.sum.csv",
    header = TRUE
  )
fp.daily.sum<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/daily_summaries/fp.daily.sum.csv",
    header = TRUE
  )

Format date for all data sets

cc.sal$date<-as.Date(cc.sal$date, format = c("%Y-%m-%d"))
cc.ph$date<-as.Date(cc.ph$date, format = c("%Y-%m-%d"))
cc.wtemp$date<-as.Date(cc.wtemp$date, format = c("%Y-%m-%d"))
cc.daily.sum$date<-as.Date(cc.daily.sum$date, format = c("%Y-%m-%d"))

eos.sal$date<-as.Date(eos.sal$date, format = c("%Y-%m-%d"))
eos.ph$date<-as.Date(eos.ph$date, format = c("%Y-%m-%d"))
eos.wtemp$date<-as.Date(eos.wtemp$date, format = c("%Y-%m-%d"))
eos.daily.sum$date<-as.Date(eos.daily.sum$date, format = c("%Y-%m-%d"))


rb.sal$date<-as.Date(rb.sal$datetime, format = c("%Y-%m-%d"))
rb.ph$date<-as.Date(rb.ph$date, format = c("%Y-%m-%d"))
rb.wtemp$date<-as.Date(rb.wtemp$date, format = c("%Y-%m-%d"))
rb.daily.sum$date<-as.Date(rb.daily.sum$date, format = c("%Y-%m-%d"))


fp.sal$date<-as.Date(fp.sal$date, format = c("%Y-%m-%d"))
fp.wtemp$date<-as.Date(fp.wtemp$date, format = c("%Y-%m-%d"))
fp.daily.sum$date<-as.Date(fp.daily.sum$date, format = c("%Y-%m-%d"))

I’ve summarized between survey dates in the my dataset I use for the mixed modles, but here I’m going to summarize over the entire field time for this table (one month before the first survey to the last survey date.)

Subset data to focus on field dates. Should I do the same dates for all of them? They differ by max of one day

#salinity
cc.sal<- cc.sal[cc.sal$date >= "2018-05-14" & cc.sal$date < "2019-09-12",]
eos.sal<-eos.sal[eos.sal$date >= "2018-05-14" & eos.sal$date < "2019-09-12",]
rb.sal<- rb.sal[rb.sal$date >= "2018-05-15" & rb.sal$date < "2019-09-12",]
fp.sal<-fp.sal[fp.sal$date >= "2018-05-15" & fp.sal$date < "2019-09-12",]

#pH
cc.ph<- cc.ph[cc.ph$date >= "2018-05-14" & cc.ph$date < "2019-09-12",]
eos.ph<-eos.ph[eos.ph$date >= "2018-05-14" & eos.ph$date < "2019-09-12",]
rb.ph<- rb.ph[rb.ph$date >= "2018-05-15" & rb.ph$date < "2019-09-12",]
#no ph data at fort point

#watertemp
cc.wtemp<- cc.wtemp[cc.wtemp$date >= "2018-05-14" & cc.wtemp$date < "2019-09-12",]
eos.wtemp<-eos.wtemp[eos.wtemp$date >= "2018-05-14" & eos.wtemp$date < "2019-09-12",]
rb.wtemp<- rb.wtemp[rb.wtemp$date >= "2018-05-15" & rb.wtemp$date < "2019-09-12",]
fp.wtemp<-fp.wtemp[fp.wtemp$date >= "2018-05-15" & fp.wtemp$date < "2019-09-12",]


#daily summaries
cc.daily.sum<- cc.daily.sum[cc.daily.sum$date >= "2018-05-14" & cc.daily.sum$date < "2019-09-12",]
eos.daily.sum<- eos.daily.sum[eos.daily.sum$date >= "2018-05-14" & eos.daily.sum$date < "2019-09-12",]
rb.daily.sum<- rb.daily.sum[rb.daily.sum$date >= "2018-05-15" & rb.daily.sum$date < "2019-09-12",]
fp.daily.sum<- fp.daily.sum[fp.daily.sum$date >= "2018-05-15" & fp.daily.sum$date < "2019-09-12",]

####Summaries####

Similar to what I did in envir.field code used for my mixed model. Calculating the max, min, and median hourly data and max, min, median daily range from daily summary data. Creating a new df to use for table.

Salinity

#China Camp
#max, min, median hourly median salinity - hourly data
cc1<-as.data.frame(setDT(cc.sal)[, .(max.sal = max(salinity, na.rm=TRUE), min.sal = min(salinity, na.rm=TRUE), med.sal=median(salinity, na.rm=TRUE))])

#max, min, median daily salinity range - daily data
cc2<-as.data.frame(setDT(cc.daily.sum)[, .(max.sal.range = max(daily.sal.range, na.rm=TRUE), min.sal.range = min(daily.sal.range, na.rm=TRUE), med.sal.range=median(daily.sal.range, na.rm=TRUE))])

cc.field.sum<-cbind(cc1, cc2)
cc.field.sum$site<-"China Camp"

#EOS#
#max, min, median hourly median salinity - hourly data
eos1<-as.data.frame(setDT(eos.sal)[, .(max.sal = max(salinity, na.rm=TRUE), min.sal = min(salinity, na.rm=TRUE), med.sal=median(salinity, na.rm=TRUE))])

#max, min, median daily salinity range - daily data
eos2<-as.data.frame(setDT(eos.daily.sum)[, .(max.sal.range = max(daily.sal.range, na.rm=TRUE), min.sal.range = min(daily.sal.range, na.rm=TRUE), med.sal.range=median(daily.sal.range, na.rm=TRUE))])

eos.field.sum<-cbind(eos1, eos2)
eos.field.sum$site<-"Tiburon"

#Richardson Bay#
#max, min, median hourly median salinity - hourly data
rb1<-as.data.frame(setDT(rb.sal)[, .(max.sal = max(salinity, na.rm=TRUE), min.sal = min(salinity, na.rm=TRUE), med.sal=median(salinity, na.rm=TRUE))])

#max, min, median daily salinity range - daily data
rb2<-as.data.frame(setDT(rb.daily.sum)[, .(max.sal.range = max(daily.sal.range, na.rm=TRUE), min.sal.range = min(daily.sal.range, na.rm=TRUE), med.sal.range=median(daily.sal.range, na.rm=TRUE))])

rb.field.sum<-cbind(rb1, rb2)
rb.field.sum$site<-"Richarson Bay"

#Fort Point#
#max, min, median hourly median salinity - hourly data
fp1<-as.data.frame(setDT(fp.sal)[, .(max.sal = max(salinity, na.rm=TRUE), min.sal = min(salinity, na.rm=TRUE), med.sal=median(salinity, na.rm=TRUE))])

#max, min, median daily salinity range - daily data
fp2<-as.data.frame(setDT(fp.daily.sum)[, .(max.sal.range = max(daily.sal.range, na.rm=TRUE), min.sal.range = min(daily.sal.range, na.rm=TRUE), med.sal.range=median(daily.sal.range, na.rm=TRUE))])

fp.field.sum<-cbind(fp1, fp2)
fp.field.sum$site<-"Fort Point"

#Combine
site.summary<-rbind(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)

rm(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)

pH

#China Camp#
#max, min, median hourly median ph - hourly data
cc3<-as.data.frame(setDT(cc.ph)[, .(max.ph = max(ph, na.rm=TRUE), min.ph = min(ph, na.rm=TRUE), med.ph=median(ph, na.rm=TRUE))])

#max, min, median daily ph range - daily data
cc4<-as.data.frame(setDT(cc.daily.sum)[, .(max.ph.range = max(daily.ph.range, na.rm=TRUE), min.ph.range = min(daily.ph.range, na.rm=TRUE), med.ph.range=median(daily.ph.range, na.rm=TRUE))])

cc.field.sum<-cbind(cc3, cc4)

#EOS#
#max, min, median hourly median ph - hourly data
eos3<-as.data.frame(setDT(eos.ph)[, .(max.ph = max(ph, na.rm=TRUE), min.ph = min(ph, na.rm=TRUE), med.ph=median(ph, na.rm=TRUE))])

#max, min, median daily ph range - daily data
eos4<-as.data.frame(setDT(eos.daily.sum)[, .(max.ph.range = max(daily.ph.range, na.rm=TRUE), min.ph.range = min(daily.ph.range, na.rm=TRUE), med.ph.range=median(daily.ph.range, na.rm=TRUE))])

eos.field.sum<-cbind(eos3, eos4)

#Richardson Bay#
#max, min, median hourly median ph - hourly data
rb3<-as.data.frame(setDT(rb.ph)[, .(max.ph = max(ph, na.rm=TRUE), min.ph = min(ph, na.rm=TRUE), med.ph=median(ph, na.rm=TRUE))])

#max, min, median daily ph range - daily data
rb4<-as.data.frame(setDT(rb.daily.sum)[, .(max.ph.range = max(daily.ph.range, na.rm=TRUE), min.ph.range = min(daily.ph.range, na.rm=TRUE), med.ph.range=median(daily.ph.range, na.rm=TRUE))])

rb.field.sum<-cbind(rb3, rb4)


#Fort Point#
#no pH data at fort point, making a "blank" df
max.ph<-NA
min.ph<-NA
med.ph<-NA
max.ph.range<-NA
min.ph.range<-NA
med.ph.range<-NA
fp.field.sum<-data.frame(max.ph, min.ph, med.ph, max.ph.range, min.ph.range, med.ph.range)


#Combine
ph<-rbind(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)
site.summary<-cbind(site.summary, ph)

rm(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)

Water temperature

#China Camp
#max, min, median hourly median water temperature - hourly data
cc1<-as.data.frame(setDT(cc.wtemp)[, .(max.wtemp = max(water_temp, na.rm=TRUE), min.wtemp = min(water_temp, na.rm=TRUE), med.wtemp=median(water_temp, na.rm=TRUE))])

#max, min, median daily salinity range - daily data
cc2<-as.data.frame(setDT(cc.daily.sum)[, .(max.wtemp.range = max(daily.wt.range, na.rm=TRUE), min.wtemp.range = min(daily.wt.range, na.rm=TRUE), med.wtemp.range=median(daily.wt.range, na.rm=TRUE))])

cc.field.sum<-cbind(cc1, cc2)
cc.field.sum$site<-"China Camp"

#EOS#
#max, min, median hourly median salinity - hourly data
eos1<-as.data.frame(setDT(eos.wtemp)[, .(max.wtemp = max(water_temp, na.rm=TRUE), min.wtemp = min(water_temp, na.rm=TRUE), med.wtemp=median(water_temp, na.rm=TRUE))])

#max, min, median daily salinity range - daily data
eos2<-as.data.frame(setDT(eos.daily.sum)[, .(max.wtemp.range = max(daily.wt.range, na.rm=TRUE), min.wtemp.range = min(daily.wt.range, na.rm=TRUE), med.wtemp.range=median(daily.wt.range, na.rm=TRUE))])

eos.field.sum<-cbind(eos1, eos2)
eos.field.sum$site<-"Tiburon"

#Richardson Bay#
#max, min, median hourly median salinity - hourly data
rb1<-as.data.frame(setDT(rb.wtemp)[, .(max.wtemp = max(water_temp, na.rm=TRUE), min.wtemp = min(water_temp, na.rm=TRUE), med.wtemp=median(water_temp, na.rm=TRUE))])

#max, min, median daily salinity range - daily data
rb2<-as.data.frame(setDT(rb.daily.sum)[, .(max.wtemp.range = max(daily.wt.range, na.rm=TRUE), min.wtemp.range = min(daily.wt.range, na.rm=TRUE), med.wtemp.range=median(daily.wt.range, na.rm=TRUE))])

rb.field.sum<-cbind(rb1, rb2)
rb.field.sum$site<-"Richarson Bay"

#Fort Point#
#max, min, median hourly median salinity - hourly data
fp1<-as.data.frame(setDT(fp.wtemp)[, .(max.wtemp = max(water_temp, na.rm=TRUE), min.wtemp = min(water_temp, na.rm=TRUE), med.wtemp=median(water_temp, na.rm=TRUE))])

#max, min, median daily salinity range - daily data
fp2<-as.data.frame(setDT(fp.daily.sum)[, .(max.wtemp.range = max(daily.wt.range, na.rm=TRUE), min.wtemp.range = min(daily.wt.range, na.rm=TRUE), med.wtemp.range=median(daily.wt.range, na.rm=TRUE))])

fp.field.sum<-cbind(fp1, fp2)
fp.field.sum$site<-"Fort Point"

#Combine
wt<-rbind(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)
site.summary<-cbind(site.summary, wt)

rm(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)

#remove duplicate "site" column
site.summary<-site.summary[-c(7)]

####Table#### https://blog.rstudio.com/2020/04/08/great-looking-tables-gt-0-2/ https://yutannihilation.github.io/gt/reference/ https://www.allisonhorst.com/post/2020-03-02-gt-tables-examples/

Very basic table

site.summary%>% gt()
max.sal min.sal med.sal max.sal.range min.sal.range med.sal.range max.ph min.ph med.ph max.ph.range min.ph.range med.ph.range max.wtemp min.wtemp med.wtemp max.wtemp.range min.wtemp.range med.wtemp.range site
29.7000 1.7000 23.300 15.1000 0.900 5.225 9.00 7.300 7.90 1.30 0.00 0.20 30.90 7.250 17.900 13.600 0.4 3.350 China Camp
32.1150 2.9550 27.590 23.0350 2.665 6.630 8.36 7.285 7.85 0.45 0.01 0.08 20.75 9.895 15.800 7.725 0.0 2.620 Tiburon
32.3000 12.3000 29.400 4.5000 0.200 1.100 9.00 7.450 8.00 1.15 0.00 0.20 23.75 9.100 17.200 7.500 0.1 2.325 Richarson Bay
32.4915 8.9535 29.609 15.9365 0.000 1.931 NA NA NA NA NA NA 18.55 10.770 14.275 5.335 0.0 1.285 Fort Point

Aesthetics
Maybe I flip the orientation of this table because right now it’s going to be very long

#data
table<-
  site.summary %>%
  gt() %>%                   #gt() package
  fmt_number(                #format numbers in selected rows to display 2 decimal points
    columns = vars(
      max.sal,
      min.sal,
      med.sal,
      max.sal.range,
      min.sal.range,
      med.sal.range,
      max.ph,
      min.ph,
      med.ph,
      max.ph.range,
      min.ph.range,
      med.ph.range,
      max.wtemp,
      min.wtemp,
      med.wtemp,
      max.wtemp.range,
      min.wtemp.range,
      med.wtemp.range,
      
    ),
    decimals = 2
  ) %>%
  tab_header(title = md("Field site summary"),            #add title, subtitle, and note
             subtitle = "05/14/2017-09/12/2019") %>%
  tab_source_note(md("Data courtesy of CeNCOOS and NERR")) %>%
  cols_label(                                            #change titles of columns
    max.sal = "max",
    min.sal = "min",
    med.sal = "median",
    max.sal.range = "max daily range",
    min.sal.range = "min daily range",
    med.sal.range = "median daily range",
    max.ph = "max",
    min.ph = "min",
    med.ph = "median",
    max.ph.range = "max daily range",
    min.ph.range = "min daily range",
    med.ph.range = "median daily range",
    max.wtemp = "max",
    min.wtemp = "min",
    med.wtemp = "median",
    max.wtemp.range = "max daily range",
    min.wtemp.range = "min daily range",
    med.wtemp.range = "median daily range",
    site = "Site",
    
  ) %>%                                   #tiered lables
  tab_spanner(label = "Salinity", columns = matches("sal"),) %>%
  tab_spanner(label = "pH", columns = matches("ph"),) %>%
  tab_spanner(label = "Water Temperature", columns = matches("wtemp"),) %>%
  cols_move_to_start(columns = vars(site)) %>%                  #move column to front
  data_color(                                                   #cell colors
    columns = vars(max.sal),
    colors = scales::col_numeric(                       #because the column is numeric
      palette = c("blue", "navy blue"),           #color scheme/gradient
      domain = c(29, 33)                          #column scale endpoints
    )
  ) %>%
  data_color(
    columns = vars(min.sal),
    colors = scales::col_numeric(
      palette = c("light blue", "blue"),
      domain = c(1, 13)
    )
  ) %>%
  data_color(
    columns = vars(med.sal),
    colors = scales::col_numeric(
      palette = c("light blue", "blue"),
      domain = c(1, 35)
    )
  ) %>%
  data_color(
    columns = vars(min.ph),
    colors = scales::col_numeric(
      palette = c("red", "orange"),
      domain = c(6, 8)
    )
  ) %>%
  data_color(
    columns = vars(max.ph),
    colors = scales::col_numeric(
      palette = c("yellow", "orange"),
      domain = c(8, 9)
    )
  ) %>%
  data_color(
    columns = vars(med.ph),
    colors = scales::col_numeric(
      palette = c("yellow", "red"),
      domain = c(6, 9)
    )
  ) %>%
  data_color(
    columns = vars(min.wtemp),
    colors = scales::col_numeric(
      palette = c("purple", "blue"),
      domain = c(5, 15)
    )
  ) %>%
  data_color(
    columns = vars(max.wtemp),
    colors = scales::col_numeric(
      palette = c("light blue", "blue"),
      domain = c(18, 31)
    )
  ) %>%
  data_color(
    columns = vars(med.wtemp),
    colors = scales::col_numeric(
      palette = c("purple", "red"),
      domain = c(14, 18)
    )
  )
  
table  

Field site summary
05/14/2017-09/12/2019
Site Salinity pH Water Temperature
max min median max daily range min daily range median daily range max min median max daily range min daily range median daily range max min median max daily range min daily range median daily range
China Camp 29.70 1.70 23.30 15.10 0.90 5.22 9.00 7.30 7.90 1.30 0.00 0.20 30.90 7.25 17.90 13.60 0.40 3.35
Tiburon 32.12 2.96 27.59 23.04 2.66 6.63 8.36 7.29 7.85 0.45 0.01 0.08 20.75 9.89 15.80 7.72 0.00 2.62
Richarson Bay 32.30 12.30 29.40 4.50 0.20 1.10 9.00 7.45 8.00 1.15 0.00 0.20 23.75 9.10 17.20 7.50 0.10 2.33
Fort Point 32.49 8.95 29.61 15.94 0.00 1.93 NA NA NA NA NA NA 18.55 10.77 14.28 5.33 0.00 1.29
Data courtesy of CeNCOOS and NERR
Colors are awful but I like the general ideal of this graph

Save table

webshot::install_phantomjs()
## It seems that the version of `phantomjs` installed is greater than or equal to the requested version.To install the requested version or downgrade to another version, use `force = TRUE`.
gtsave(table, "C:/Users/chels/Box Sync/Thesis/Data/Working data/GitHubReady/Tables/field_table.png")

table %>%
   gtsave(
     "tab_1.html", inline_css = TRUE,
     path = tempdir()
   )