Goal: a table summarizing salinity, pH, and water temperature at my four sites. (Air temperature should probably be a separate table since one of the data sources I use is different than the buoys I use for the water data?) Summarizing environmental conditions during the course of my field work.
Columns:
- From hourly median data: max value, min value, overall median - From daily summary data: max daily range, min daily range, median daily range
Questions:
- Should I add an “events” column? For example, how many days with a daily max below 10 salinity?
rm(list=ls())
library(tidyverse)
library(ggpubr)
library(scales)
library(chron)
library(plotly)
library(taRifx)
library(aweek)
library(easypackages)
library(renv)
library(here)
library(ggthemes)
library(gridExtra)
library(patchwork)
library(tidyquant)
library(recipes)
library(cranlogs)
library(knitr)
library(openair)
library(data.table)
library(gt)
####Setting up#### Reading in data. Need both hourly median datasets and daily summaries.
Hourly medians
#Salinity
cc.sal<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/cc_sal.csv",
header = TRUE
)
eos.sal<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/eos_sal.csv",
header = TRUE
)
rb.sal<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/rb_sal.csv",
header = TRUE
)
fp.sal<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/fp_sal.csv",
header = TRUE
)
#pH
cc.ph<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/cc_ph.csv",
header = TRUE
)
eos.ph<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/eos_ph.csv",
header = TRUE
)
rb.ph<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/rb_ph.csv",
header = TRUE
)
#no pH data at Fort Point
#water temperature
cc.wtemp<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/cc_watertemp.csv",
header = TRUE
)
eos.wtemp<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/eos_watertemp.csv",
header = TRUE
)
rb.wtemp<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/rb_watertemp.csv",
header = TRUE
)
fp.wtemp<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/hourly_median/fp_watertemp.csv",
header = TRUE
)
Daily summaries
cc.daily.sum<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/daily_summaries/cc.daily.sum.csv",
header = TRUE
)
eos.daily.sum<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/daily_summaries/eos.daily.sum.csv",
header = TRUE
)
rb.daily.sum<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/daily_summaries/rb.daily.sum.csv",
header = TRUE
)
fp.daily.sum<-read.csv(
"https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environmental/daily_summaries/fp.daily.sum.csv",
header = TRUE
)
Format date for all data sets
cc.sal$date<-as.Date(cc.sal$date, format = c("%Y-%m-%d"))
cc.ph$date<-as.Date(cc.ph$date, format = c("%Y-%m-%d"))
cc.wtemp$date<-as.Date(cc.wtemp$date, format = c("%Y-%m-%d"))
cc.daily.sum$date<-as.Date(cc.daily.sum$date, format = c("%Y-%m-%d"))
eos.sal$date<-as.Date(eos.sal$date, format = c("%Y-%m-%d"))
eos.ph$date<-as.Date(eos.ph$date, format = c("%Y-%m-%d"))
eos.wtemp$date<-as.Date(eos.wtemp$date, format = c("%Y-%m-%d"))
eos.daily.sum$date<-as.Date(eos.daily.sum$date, format = c("%Y-%m-%d"))
rb.sal$date<-as.Date(rb.sal$datetime, format = c("%Y-%m-%d"))
rb.ph$date<-as.Date(rb.ph$date, format = c("%Y-%m-%d"))
rb.wtemp$date<-as.Date(rb.wtemp$date, format = c("%Y-%m-%d"))
rb.daily.sum$date<-as.Date(rb.daily.sum$date, format = c("%Y-%m-%d"))
fp.sal$date<-as.Date(fp.sal$date, format = c("%Y-%m-%d"))
fp.wtemp$date<-as.Date(fp.wtemp$date, format = c("%Y-%m-%d"))
fp.daily.sum$date<-as.Date(fp.daily.sum$date, format = c("%Y-%m-%d"))
I’ve summarized between survey dates in the my dataset I use for the mixed modles, but here I’m going to summarize over the entire field time for this table (one month before the first survey to the last survey date.)
Subset data to focus on field dates. Should I do the same dates for all of them? They differ by max of one day
#salinity
cc.sal<- cc.sal[cc.sal$date >= "2018-05-14" & cc.sal$date < "2019-09-12",]
eos.sal<-eos.sal[eos.sal$date >= "2018-05-14" & eos.sal$date < "2019-09-12",]
rb.sal<- rb.sal[rb.sal$date >= "2018-05-15" & rb.sal$date < "2019-09-12",]
fp.sal<-fp.sal[fp.sal$date >= "2018-05-15" & fp.sal$date < "2019-09-12",]
#pH
cc.ph<- cc.ph[cc.ph$date >= "2018-05-14" & cc.ph$date < "2019-09-12",]
eos.ph<-eos.ph[eos.ph$date >= "2018-05-14" & eos.ph$date < "2019-09-12",]
rb.ph<- rb.ph[rb.ph$date >= "2018-05-15" & rb.ph$date < "2019-09-12",]
#no ph data at fort point
#watertemp
cc.wtemp<- cc.wtemp[cc.wtemp$date >= "2018-05-14" & cc.wtemp$date < "2019-09-12",]
eos.wtemp<-eos.wtemp[eos.wtemp$date >= "2018-05-14" & eos.wtemp$date < "2019-09-12",]
rb.wtemp<- rb.wtemp[rb.wtemp$date >= "2018-05-15" & rb.wtemp$date < "2019-09-12",]
fp.wtemp<-fp.wtemp[fp.wtemp$date >= "2018-05-15" & fp.wtemp$date < "2019-09-12",]
#daily summaries
cc.daily.sum<- cc.daily.sum[cc.daily.sum$date >= "2018-05-14" & cc.daily.sum$date < "2019-09-12",]
eos.daily.sum<- eos.daily.sum[eos.daily.sum$date >= "2018-05-14" & eos.daily.sum$date < "2019-09-12",]
rb.daily.sum<- rb.daily.sum[rb.daily.sum$date >= "2018-05-15" & rb.daily.sum$date < "2019-09-12",]
fp.daily.sum<- fp.daily.sum[fp.daily.sum$date >= "2018-05-15" & fp.daily.sum$date < "2019-09-12",]
####Summaries####
Similar to what I did in envir.field code used for my mixed model. Calculating the max, min, and median hourly data and max, min, median daily range from daily summary data. Creating a new df to use for table.
Salinity
#China Camp
#max, min, median hourly median salinity - hourly data
cc1<-as.data.frame(setDT(cc.sal)[, .(max.sal = max(salinity, na.rm=TRUE), min.sal = min(salinity, na.rm=TRUE), med.sal=median(salinity, na.rm=TRUE))])
#max, min, median daily salinity range - daily data
cc2<-as.data.frame(setDT(cc.daily.sum)[, .(max.sal.range = max(daily.sal.range, na.rm=TRUE), min.sal.range = min(daily.sal.range, na.rm=TRUE), med.sal.range=median(daily.sal.range, na.rm=TRUE))])
cc.field.sum<-cbind(cc1, cc2)
cc.field.sum$site<-"China Camp"
#EOS#
#max, min, median hourly median salinity - hourly data
eos1<-as.data.frame(setDT(eos.sal)[, .(max.sal = max(salinity, na.rm=TRUE), min.sal = min(salinity, na.rm=TRUE), med.sal=median(salinity, na.rm=TRUE))])
#max, min, median daily salinity range - daily data
eos2<-as.data.frame(setDT(eos.daily.sum)[, .(max.sal.range = max(daily.sal.range, na.rm=TRUE), min.sal.range = min(daily.sal.range, na.rm=TRUE), med.sal.range=median(daily.sal.range, na.rm=TRUE))])
eos.field.sum<-cbind(eos1, eos2)
eos.field.sum$site<-"Tiburon"
#Richardson Bay#
#max, min, median hourly median salinity - hourly data
rb1<-as.data.frame(setDT(rb.sal)[, .(max.sal = max(salinity, na.rm=TRUE), min.sal = min(salinity, na.rm=TRUE), med.sal=median(salinity, na.rm=TRUE))])
#max, min, median daily salinity range - daily data
rb2<-as.data.frame(setDT(rb.daily.sum)[, .(max.sal.range = max(daily.sal.range, na.rm=TRUE), min.sal.range = min(daily.sal.range, na.rm=TRUE), med.sal.range=median(daily.sal.range, na.rm=TRUE))])
rb.field.sum<-cbind(rb1, rb2)
rb.field.sum$site<-"Richarson Bay"
#Fort Point#
#max, min, median hourly median salinity - hourly data
fp1<-as.data.frame(setDT(fp.sal)[, .(max.sal = max(salinity, na.rm=TRUE), min.sal = min(salinity, na.rm=TRUE), med.sal=median(salinity, na.rm=TRUE))])
#max, min, median daily salinity range - daily data
fp2<-as.data.frame(setDT(fp.daily.sum)[, .(max.sal.range = max(daily.sal.range, na.rm=TRUE), min.sal.range = min(daily.sal.range, na.rm=TRUE), med.sal.range=median(daily.sal.range, na.rm=TRUE))])
fp.field.sum<-cbind(fp1, fp2)
fp.field.sum$site<-"Fort Point"
#Combine
site.summary<-rbind(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)
rm(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)
pH
#China Camp#
#max, min, median hourly median ph - hourly data
cc3<-as.data.frame(setDT(cc.ph)[, .(max.ph = max(ph, na.rm=TRUE), min.ph = min(ph, na.rm=TRUE), med.ph=median(ph, na.rm=TRUE))])
#max, min, median daily ph range - daily data
cc4<-as.data.frame(setDT(cc.daily.sum)[, .(max.ph.range = max(daily.ph.range, na.rm=TRUE), min.ph.range = min(daily.ph.range, na.rm=TRUE), med.ph.range=median(daily.ph.range, na.rm=TRUE))])
cc.field.sum<-cbind(cc3, cc4)
#EOS#
#max, min, median hourly median ph - hourly data
eos3<-as.data.frame(setDT(eos.ph)[, .(max.ph = max(ph, na.rm=TRUE), min.ph = min(ph, na.rm=TRUE), med.ph=median(ph, na.rm=TRUE))])
#max, min, median daily ph range - daily data
eos4<-as.data.frame(setDT(eos.daily.sum)[, .(max.ph.range = max(daily.ph.range, na.rm=TRUE), min.ph.range = min(daily.ph.range, na.rm=TRUE), med.ph.range=median(daily.ph.range, na.rm=TRUE))])
eos.field.sum<-cbind(eos3, eos4)
#Richardson Bay#
#max, min, median hourly median ph - hourly data
rb3<-as.data.frame(setDT(rb.ph)[, .(max.ph = max(ph, na.rm=TRUE), min.ph = min(ph, na.rm=TRUE), med.ph=median(ph, na.rm=TRUE))])
#max, min, median daily ph range - daily data
rb4<-as.data.frame(setDT(rb.daily.sum)[, .(max.ph.range = max(daily.ph.range, na.rm=TRUE), min.ph.range = min(daily.ph.range, na.rm=TRUE), med.ph.range=median(daily.ph.range, na.rm=TRUE))])
rb.field.sum<-cbind(rb3, rb4)
#Fort Point#
#no pH data at fort point, making a "blank" df
max.ph<-NA
min.ph<-NA
med.ph<-NA
max.ph.range<-NA
min.ph.range<-NA
med.ph.range<-NA
fp.field.sum<-data.frame(max.ph, min.ph, med.ph, max.ph.range, min.ph.range, med.ph.range)
#Combine
ph<-rbind(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)
site.summary<-cbind(site.summary, ph)
rm(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)
Water temperature
#China Camp
#max, min, median hourly median water temperature - hourly data
cc1<-as.data.frame(setDT(cc.wtemp)[, .(max.wtemp = max(water_temp, na.rm=TRUE), min.wtemp = min(water_temp, na.rm=TRUE), med.wtemp=median(water_temp, na.rm=TRUE))])
#max, min, median daily salinity range - daily data
cc2<-as.data.frame(setDT(cc.daily.sum)[, .(max.wtemp.range = max(daily.wt.range, na.rm=TRUE), min.wtemp.range = min(daily.wt.range, na.rm=TRUE), med.wtemp.range=median(daily.wt.range, na.rm=TRUE))])
cc.field.sum<-cbind(cc1, cc2)
cc.field.sum$site<-"China Camp"
#EOS#
#max, min, median hourly median salinity - hourly data
eos1<-as.data.frame(setDT(eos.wtemp)[, .(max.wtemp = max(water_temp, na.rm=TRUE), min.wtemp = min(water_temp, na.rm=TRUE), med.wtemp=median(water_temp, na.rm=TRUE))])
#max, min, median daily salinity range - daily data
eos2<-as.data.frame(setDT(eos.daily.sum)[, .(max.wtemp.range = max(daily.wt.range, na.rm=TRUE), min.wtemp.range = min(daily.wt.range, na.rm=TRUE), med.wtemp.range=median(daily.wt.range, na.rm=TRUE))])
eos.field.sum<-cbind(eos1, eos2)
eos.field.sum$site<-"Tiburon"
#Richardson Bay#
#max, min, median hourly median salinity - hourly data
rb1<-as.data.frame(setDT(rb.wtemp)[, .(max.wtemp = max(water_temp, na.rm=TRUE), min.wtemp = min(water_temp, na.rm=TRUE), med.wtemp=median(water_temp, na.rm=TRUE))])
#max, min, median daily salinity range - daily data
rb2<-as.data.frame(setDT(rb.daily.sum)[, .(max.wtemp.range = max(daily.wt.range, na.rm=TRUE), min.wtemp.range = min(daily.wt.range, na.rm=TRUE), med.wtemp.range=median(daily.wt.range, na.rm=TRUE))])
rb.field.sum<-cbind(rb1, rb2)
rb.field.sum$site<-"Richarson Bay"
#Fort Point#
#max, min, median hourly median salinity - hourly data
fp1<-as.data.frame(setDT(fp.wtemp)[, .(max.wtemp = max(water_temp, na.rm=TRUE), min.wtemp = min(water_temp, na.rm=TRUE), med.wtemp=median(water_temp, na.rm=TRUE))])
#max, min, median daily salinity range - daily data
fp2<-as.data.frame(setDT(fp.daily.sum)[, .(max.wtemp.range = max(daily.wt.range, na.rm=TRUE), min.wtemp.range = min(daily.wt.range, na.rm=TRUE), med.wtemp.range=median(daily.wt.range, na.rm=TRUE))])
fp.field.sum<-cbind(fp1, fp2)
fp.field.sum$site<-"Fort Point"
#Combine
wt<-rbind(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)
site.summary<-cbind(site.summary, wt)
rm(cc.field.sum, eos.field.sum, rb.field.sum, fp.field.sum)
#remove duplicate "site" column
site.summary<-site.summary[-c(7)]
####Table#### https://blog.rstudio.com/2020/04/08/great-looking-tables-gt-0-2/ https://yutannihilation.github.io/gt/reference/ https://www.allisonhorst.com/post/2020-03-02-gt-tables-examples/
Very basic table
site.summary%>% gt()
| max.sal | min.sal | med.sal | max.sal.range | min.sal.range | med.sal.range | max.ph | min.ph | med.ph | max.ph.range | min.ph.range | med.ph.range | max.wtemp | min.wtemp | med.wtemp | max.wtemp.range | min.wtemp.range | med.wtemp.range | site |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 29.7000 | 1.7000 | 23.300 | 15.1000 | 0.900 | 5.225 | 9.00 | 7.300 | 7.90 | 1.30 | 0.00 | 0.20 | 30.90 | 7.250 | 17.900 | 13.600 | 0.4 | 3.350 | China Camp |
| 32.1150 | 2.9550 | 27.590 | 23.0350 | 2.665 | 6.630 | 8.36 | 7.285 | 7.85 | 0.45 | 0.01 | 0.08 | 20.75 | 9.895 | 15.800 | 7.725 | 0.0 | 2.620 | Tiburon |
| 32.3000 | 12.3000 | 29.400 | 4.5000 | 0.200 | 1.100 | 9.00 | 7.450 | 8.00 | 1.15 | 0.00 | 0.20 | 23.75 | 9.100 | 17.200 | 7.500 | 0.1 | 2.325 | Richarson Bay |
| 32.4915 | 8.9535 | 29.609 | 15.9365 | 0.000 | 1.931 | NA | NA | NA | NA | NA | NA | 18.55 | 10.770 | 14.275 | 5.335 | 0.0 | 1.285 | Fort Point |
Aesthetics
Maybe I flip the orientation of this table because right now it’s going to be very long
#data
table<-
site.summary %>%
gt() %>% #gt() package
fmt_number( #format numbers in selected rows to display 2 decimal points
columns = vars(
max.sal,
min.sal,
med.sal,
max.sal.range,
min.sal.range,
med.sal.range,
max.ph,
min.ph,
med.ph,
max.ph.range,
min.ph.range,
med.ph.range,
max.wtemp,
min.wtemp,
med.wtemp,
max.wtemp.range,
min.wtemp.range,
med.wtemp.range,
),
decimals = 2
) %>%
tab_header(title = md("Field site summary"), #add title, subtitle, and note
subtitle = "05/14/2017-09/12/2019") %>%
tab_source_note(md("Data courtesy of CeNCOOS and NERR")) %>%
cols_label( #change titles of columns
max.sal = "max",
min.sal = "min",
med.sal = "median",
max.sal.range = "max daily range",
min.sal.range = "min daily range",
med.sal.range = "median daily range",
max.ph = "max",
min.ph = "min",
med.ph = "median",
max.ph.range = "max daily range",
min.ph.range = "min daily range",
med.ph.range = "median daily range",
max.wtemp = "max",
min.wtemp = "min",
med.wtemp = "median",
max.wtemp.range = "max daily range",
min.wtemp.range = "min daily range",
med.wtemp.range = "median daily range",
site = "Site",
) %>% #tiered lables
tab_spanner(label = "Salinity", columns = matches("sal"),) %>%
tab_spanner(label = "pH", columns = matches("ph"),) %>%
tab_spanner(label = "Water Temperature", columns = matches("wtemp"),) %>%
cols_move_to_start(columns = vars(site)) %>% #move column to front
data_color( #cell colors
columns = vars(max.sal),
colors = scales::col_numeric( #because the column is numeric
palette = c("blue", "navy blue"), #color scheme/gradient
domain = c(29, 33) #column scale endpoints
)
) %>%
data_color(
columns = vars(min.sal),
colors = scales::col_numeric(
palette = c("light blue", "blue"),
domain = c(1, 13)
)
) %>%
data_color(
columns = vars(med.sal),
colors = scales::col_numeric(
palette = c("light blue", "blue"),
domain = c(1, 35)
)
) %>%
data_color(
columns = vars(min.ph),
colors = scales::col_numeric(
palette = c("red", "orange"),
domain = c(6, 8)
)
) %>%
data_color(
columns = vars(max.ph),
colors = scales::col_numeric(
palette = c("yellow", "orange"),
domain = c(8, 9)
)
) %>%
data_color(
columns = vars(med.ph),
colors = scales::col_numeric(
palette = c("yellow", "red"),
domain = c(6, 9)
)
) %>%
data_color(
columns = vars(min.wtemp),
colors = scales::col_numeric(
palette = c("purple", "blue"),
domain = c(5, 15)
)
) %>%
data_color(
columns = vars(max.wtemp),
colors = scales::col_numeric(
palette = c("light blue", "blue"),
domain = c(18, 31)
)
) %>%
data_color(
columns = vars(med.wtemp),
colors = scales::col_numeric(
palette = c("purple", "red"),
domain = c(14, 18)
)
)
table
| Field site summary | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 05/14/2017-09/12/2019 | ||||||||||||||||||
| Site | Salinity | pH | Water Temperature | |||||||||||||||
| max | min | median | max daily range | min daily range | median daily range | max | min | median | max daily range | min daily range | median daily range | max | min | median | max daily range | min daily range | median daily range | |
| China Camp | 29.70 | 1.70 | 23.30 | 15.10 | 0.90 | 5.22 | 9.00 | 7.30 | 7.90 | 1.30 | 0.00 | 0.20 | 30.90 | 7.25 | 17.90 | 13.60 | 0.40 | 3.35 |
| Tiburon | 32.12 | 2.96 | 27.59 | 23.04 | 2.66 | 6.63 | 8.36 | 7.29 | 7.85 | 0.45 | 0.01 | 0.08 | 20.75 | 9.89 | 15.80 | 7.72 | 0.00 | 2.62 |
| Richarson Bay | 32.30 | 12.30 | 29.40 | 4.50 | 0.20 | 1.10 | 9.00 | 7.45 | 8.00 | 1.15 | 0.00 | 0.20 | 23.75 | 9.10 | 17.20 | 7.50 | 0.10 | 2.33 |
| Fort Point | 32.49 | 8.95 | 29.61 | 15.94 | 0.00 | 1.93 | NA | NA | NA | NA | NA | NA | 18.55 | 10.77 | 14.28 | 5.33 | 0.00 | 1.29 |
| Data courtesy of CeNCOOS and NERR | ||||||||||||||||||
Save table
webshot::install_phantomjs()
## It seems that the version of `phantomjs` installed is greater than or equal to the requested version.To install the requested version or downgrade to another version, use `force = TRUE`.
gtsave(table, "C:/Users/chels/Box Sync/Thesis/Data/Working data/GitHubReady/Tables/field_table.png")
table %>%
gtsave(
"tab_1.html", inline_css = TRUE,
path = tempdir()
)