AIC: https://www.scribbr.com/statistics/akaike-information-criterion/
Fixed vs random factors: https://rstudio-pubs-static.s3.amazonaws.com/372492_3e05f38dd3f248e89cdedd317d603b9a.html#42_fixed_effects_model
Possible transformations for count data: https://www.rdocumentation.org/packages/dlookr/versions/0.3.13/topics/transform

Set up

rm(list=ls())

library(tidyverse)
library(ggpubr)
library(scales)
library(chron)
library(plotly)
library(taRifx)
library(aweek)
library(easypackages)
library(renv)
library(here)
library(ggthemes)
library(gridExtra)
library(patchwork)
library(tidyquant)
library(recipes) 
library(cranlogs)
library(knitr)
library(openair)

#new
library(AICcmodavg)
library(plm) #for random factors
library(lme4)

Read in data

#read in data
all<-read.csv(
      "https://raw.githubusercontent.com/Cmwegener/thesis/master/data/environment_field/envi.field.all.csv",
    header = TRUE
  )

Make month year column. Was originally going to do just month but since I have some duplicate months I decided to do month-year. NOTE: since you need a day to convert to date format, I decided just to keep month year column as a character and will just keep that in mind for the next step.

all$date<-as.Date(all$date, format = c("%Y-%m-%d"))
all$month.yr<-format(all$date, "%Y-%m")

Season column. Split year into quarters. Added year since there’s some duplicate months? Since it’s a character I’ll need to list all the month.yr per season.
2018-06 - 2018-08 summer.2018
2018-09 - 2018-11 fall.2018
2018-12 - 2019-02 winter.2018
2019-03 - 2019-05 spring.2019
2019-06 - 2019-08 summer.2019
2019-09 - 2019-11 fall.2019 incomplete season, only have field data for 2019-09 –> Think I should probably remove this term and put “NA” since I only have field data for that month. Doesn’t seem right to compare it to the other seasons with more months and data.

all$season[all$month.yr == "2018-06"] <- "summer.2018"
all$season[all$month.yr == "2018-07"] <- "summer.2018"
all$season[all$month.yr == "2018-08"] <- "summer.2018"

all$season[all$month.yr == "2018-09"] <- "fall.2018"
all$season[all$month.yr == "2018-10"] <- "fall.2018"
all$season[all$month.yr == "2018-11"] <- "fall.2018"

all$season[all$month.yr == "2018-12"] <- "winter.2018"
all$season[all$month.yr == "2019-01"] <- "winter.2018"
all$season[all$month.yr == "2019-02"] <- "winter.2018"

all$season[all$month.yr == "2019-03"] <- "spring.2019"
all$season[all$month.yr == "2019-04"] <- "spring.2019"
all$season[all$month.yr == "2019-05"] <- "spring.2019"

all$season[all$month.yr == "2019-06"] <- "summer.2019"
all$season[all$month.yr == "2019-07"] <- "summer.2019"
all$season[all$month.yr == "2019-08"] <- "summer.2019"

all$season[all$month.yr == "2019-09"] <- "fall.2019"

####Density ~ site ####
Lm density ~ site

lm1 <- lm(no.fuc.q ~ site, data =all)  
lm1
## 
## Call:
## lm(formula = no.fuc.q ~ site, data = all)
## 
## Coefficients:
## (Intercept)    sitehs.fp   sitend.eos    sitepc.cc  
##      48.544      -30.363       -9.630       -4.565
summary (lm1)
## 
## Call:
## lm(formula = no.fuc.q ~ site, data = all)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.044 -20.013  -5.981   5.319 119.521 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   48.544      8.313   5.840 2.63e-07 ***
## sitehs.fp    -30.363     11.756  -2.583   0.0124 *  
## sitend.eos    -9.630     11.950  -0.806   0.4237    
## sitepc.cc     -4.565     12.169  -0.375   0.7089    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.25 on 57 degrees of freedom
## Multiple R-squared:  0.1193, Adjusted R-squared:  0.07291 
## F-statistic: 2.573 on 3 and 57 DF,  p-value: 0.0629
anova (lm1)
## Analysis of Variance Table
## 
## Response: no.fuc.q
##           Df Sum Sq Mean Sq F value Pr(>F)  
## site       3   8534  2844.5  2.5728 0.0629 .
## Residuals 57  63020  1105.6                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot (lm1)

Where’d the fourth site go? It’s in the dataset

glm density ~site

glm1 <- glm(no.fuc.q ~ site, data =all)  
glm1
## 
## Call:  glm(formula = no.fuc.q ~ site, data = all)
## 
## Coefficients:
## (Intercept)    sitehs.fp   sitend.eos    sitepc.cc  
##      48.544      -30.363       -9.630       -4.565  
## 
## Degrees of Freedom: 60 Total (i.e. Null);  57 Residual
## Null Deviance:       71550 
## Residual Deviance: 63020     AIC: 606.5
summary (glm1)
## 
## Call:
## glm(formula = no.fuc.q ~ site, data = all)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -38.044  -20.013   -5.981    5.319  119.521  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   48.544      8.313   5.840 2.63e-07 ***
## sitehs.fp    -30.363     11.756  -2.583   0.0124 *  
## sitend.eos    -9.630     11.950  -0.806   0.4237    
## sitepc.cc     -4.565     12.169  -0.375   0.7089    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 1105.622)
## 
##     Null deviance: 71554  on 60  degrees of freedom
## Residual deviance: 63020  on 57  degrees of freedom
## AIC: 606.47
## 
## Number of Fisher Scoring iterations: 2
anova (glm1)
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: no.fuc.q
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev
## NULL                    60      71554
## site  3   8533.5        57      63020
plot (glm1)

lm desnity ~ site, site fixed factor

lm2 <- lm(no.fuc.q ~ factor(site), data =all)  
lm2
## 
## Call:
## lm(formula = no.fuc.q ~ factor(site), data = all)
## 
## Coefficients:
##        (Intercept)   factor(site)hs.fp  factor(site)nd.eos   factor(site)pc.cc  
##             48.544             -30.363              -9.630              -4.565
summary (lm2)
## 
## Call:
## lm(formula = no.fuc.q ~ factor(site), data = all)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.044 -20.013  -5.981   5.319 119.521 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          48.544      8.313   5.840 2.63e-07 ***
## factor(site)hs.fp   -30.363     11.756  -2.583   0.0124 *  
## factor(site)nd.eos   -9.630     11.950  -0.806   0.4237    
## factor(site)pc.cc    -4.565     12.169  -0.375   0.7089    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.25 on 57 degrees of freedom
## Multiple R-squared:  0.1193, Adjusted R-squared:  0.07291 
## F-statistic: 2.573 on 3 and 57 DF,  p-value: 0.0629
anova (lm2)
## Analysis of Variance Table
## 
## Response: no.fuc.q
##              Df Sum Sq Mean Sq F value Pr(>F)  
## factor(site)  3   8534  2844.5  2.5728 0.0629 .
## Residuals    57  63020  1105.6                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot (lm2)

lm desnity ~ site, site random factor - don’t understand output

lm3<-lmer(no.fuc.q~1+(1|site), data = all)
lm3
## Linear mixed model fit by REML ['lmerMod']
## Formula: no.fuc.q ~ 1 + (1 | site)
##    Data: all
## REML criterion at convergence: 597.6365
## Random effects:
##  Groups   Name        Std.Dev.
##  site     (Intercept) 10.61   
##  Residual             33.24   
## Number of obs: 61, groups:  site, 4
## Fixed Effects:
## (Intercept)  
##       37.31
summary (lm3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: no.fuc.q ~ 1 + (1 | site)
##    Data: all
## 
## REML criterion at convergence: 597.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0570 -0.5800 -0.3332  0.0286  3.6786 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  site     (Intercept)  112.6   10.61   
##  Residual             1104.7   33.24   
## Number of obs: 61, groups:  site, 4
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   37.308      6.804   5.483
anova (lm3)
## Analysis of Variance Table
##      npar Sum Sq Mean Sq F value
plot (lm3)

####density~month####
Lm density ~ month

lm1 <- lm(no.fuc.q ~ month.yr, data =all)  
lm1
## 
## Call:
## lm(formula = no.fuc.q ~ month.yr, data = all)
## 
## Coefficients:
##     (Intercept)  month.yr2018-07  month.yr2018-08  month.yr2018-09  
##          34.950           48.075           71.000           42.300  
## month.yr2018-10  month.yr2018-11  month.yr2018-12  month.yr2019-01  
##           3.683          -12.250          -13.050          -12.475  
## month.yr2019-02  month.yr2019-03  month.yr2019-04  month.yr2019-05  
##         -13.975          -15.725          -13.700           -6.375  
## month.yr2019-06  month.yr2019-07  month.yr2019-08  month.yr2019-09  
##         -22.700           -7.775           -7.400          -11.075
summary (lm1)
## 
## Call:
## lm(formula = no.fuc.q ~ month.yr, data = all)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -80.450  -7.050  -0.875   8.950  57.550 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       34.950     12.460   2.805 0.007406 ** 
## month.yr2018-07   48.075     17.621   2.728 0.009050 ** 
## month.yr2018-08   71.000     17.621   4.029 0.000213 ***
## month.yr2018-09   42.300     17.621   2.400 0.020570 *  
## month.yr2018-10    3.683     19.033   0.194 0.847422    
## month.yr2018-11  -12.250     21.582  -0.568 0.573121    
## month.yr2018-12  -13.050     17.621  -0.741 0.462797    
## month.yr2019-01  -12.475     17.621  -0.708 0.482629    
## month.yr2019-02  -13.975     17.621  -0.793 0.431899    
## month.yr2019-03  -15.725     17.621  -0.892 0.376936    
## month.yr2019-04  -13.700     17.621  -0.777 0.440953    
## month.yr2019-05   -6.375     17.621  -0.362 0.719213    
## month.yr2019-06  -22.700     17.621  -1.288 0.204258    
## month.yr2019-07   -7.775     17.621  -0.441 0.661163    
## month.yr2019-08   -7.400     17.621  -0.420 0.676525    
## month.yr2019-09  -11.075     17.621  -0.628 0.532855    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.92 on 45 degrees of freedom
## Multiple R-squared:  0.6094, Adjusted R-squared:  0.4793 
## F-statistic: 4.681 on 15 and 45 DF,  p-value: 2.764e-05
anova (lm1)
## Analysis of Variance Table
## 
## Response: no.fuc.q
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## month.yr  15  43608 2907.19  4.6813 2.764e-05 ***
## Residuals 45  27946  621.03                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot (lm1)

glm density ~month.yr

glm1 <- glm(no.fuc.q ~ month.yr, data =all)  
glm1
## 
## Call:  glm(formula = no.fuc.q ~ month.yr, data = all)
## 
## Coefficients:
##     (Intercept)  month.yr2018-07  month.yr2018-08  month.yr2018-09  
##          34.950           48.075           71.000           42.300  
## month.yr2018-10  month.yr2018-11  month.yr2018-12  month.yr2019-01  
##           3.683          -12.250          -13.050          -12.475  
## month.yr2019-02  month.yr2019-03  month.yr2019-04  month.yr2019-05  
##         -13.975          -15.725          -13.700           -6.375  
## month.yr2019-06  month.yr2019-07  month.yr2019-08  month.yr2019-09  
##         -22.700           -7.775           -7.400          -11.075  
## 
## Degrees of Freedom: 60 Total (i.e. Null);  45 Residual
## Null Deviance:       71550 
## Residual Deviance: 27950     AIC: 580.9
summary (glm1)
## 
## Call:
## glm(formula = no.fuc.q ~ month.yr, data = all)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -80.450   -7.050   -0.875    8.950   57.550  
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       34.950     12.460   2.805 0.007406 ** 
## month.yr2018-07   48.075     17.621   2.728 0.009050 ** 
## month.yr2018-08   71.000     17.621   4.029 0.000213 ***
## month.yr2018-09   42.300     17.621   2.400 0.020570 *  
## month.yr2018-10    3.683     19.033   0.194 0.847422    
## month.yr2018-11  -12.250     21.582  -0.568 0.573121    
## month.yr2018-12  -13.050     17.621  -0.741 0.462797    
## month.yr2019-01  -12.475     17.621  -0.708 0.482629    
## month.yr2019-02  -13.975     17.621  -0.793 0.431899    
## month.yr2019-03  -15.725     17.621  -0.892 0.376936    
## month.yr2019-04  -13.700     17.621  -0.777 0.440953    
## month.yr2019-05   -6.375     17.621  -0.362 0.719213    
## month.yr2019-06  -22.700     17.621  -1.288 0.204258    
## month.yr2019-07   -7.775     17.621  -0.441 0.661163    
## month.yr2019-08   -7.400     17.621  -0.420 0.676525    
## month.yr2019-09  -11.075     17.621  -0.628 0.532855    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 621.0266)
## 
##     Null deviance: 71554  on 60  degrees of freedom
## Residual deviance: 27946  on 45  degrees of freedom
## AIC: 580.87
## 
## Number of Fisher Scoring iterations: 2
anova (glm1)
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: no.fuc.q
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev
## NULL                        60      71554
## month.yr 15    43608        45      27946
plot (glm1)

####density ~ season####
Lm density ~ season

lm1 <- lm(no.fuc.q ~ season, data =all)  
lm1
## 
## Call:
## lm(formula = no.fuc.q ~ season, data = all)
## 
## Coefficients:
##       (Intercept)    seasonfall.2019  seasonspring.2019  seasonsummer.2018  
##             52.26             -28.38             -29.24              22.39  
## seasonsummer.2019  seasonwinter.2018  
##            -29.93             -30.47
summary (lm1)
## 
## Call:
## lm(formula = no.fuc.q ~ season, data = all)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -55.942 -10.717  -2.883   8.275  88.858 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         52.256      9.435   5.539 8.83e-07 ***
## seasonfall.2019    -28.381     17.008  -1.669   0.1009    
## seasonspring.2019  -29.239     12.481  -2.343   0.0228 *  
## seasonsummer.2018   22.386     12.481   1.794   0.0784 .  
## seasonsummer.2019  -29.931     12.481  -2.398   0.0199 *  
## seasonwinter.2018  -30.472     12.481  -2.442   0.0179 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 28.3 on 55 degrees of freedom
## Multiple R-squared:  0.3842, Adjusted R-squared:  0.3283 
## F-statistic: 6.864 on 5 and 55 DF,  p-value: 4.853e-05
anova (lm1)
## Analysis of Variance Table
## 
## Response: no.fuc.q
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## season     5  27494  5498.8  6.8641 4.853e-05 ***
## Residuals 55  44060   801.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot (lm1)

Season looks pretty good. The only season that didn’t show significance was Fall 2019 but I think I should remove that term anyway since I only have field data for one month of the three month “season”. Doesn’t seem fair to compare to the other full seasons with more data and months.
Summer 2018 showing a less sig than the other seasons, wonder if that has to do with the heat wave that season.

glm density ~season

glm2 <- glm(no.fuc.q ~ season, data =all)  
glm2
## 
## Call:  glm(formula = no.fuc.q ~ season, data = all)
## 
## Coefficients:
##       (Intercept)    seasonfall.2019  seasonspring.2019  seasonsummer.2018  
##             52.26             -28.38             -29.24              22.39  
## seasonsummer.2019  seasonwinter.2018  
##            -29.93             -30.47  
## 
## Degrees of Freedom: 60 Total (i.e. Null);  55 Residual
## Null Deviance:       71550 
## Residual Deviance: 44060     AIC: 588.6
summary (glm2)
## 
## Call:
## glm(formula = no.fuc.q ~ season, data = all)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -55.942  -10.717   -2.883    8.275   88.858  
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         52.256      9.435   5.539 8.83e-07 ***
## seasonfall.2019    -28.381     17.008  -1.669   0.1009    
## seasonspring.2019  -29.239     12.481  -2.343   0.0228 *  
## seasonsummer.2018   22.386     12.481   1.794   0.0784 .  
## seasonsummer.2019  -29.931     12.481  -2.398   0.0199 *  
## seasonwinter.2018  -30.472     12.481  -2.442   0.0179 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 801.0908)
## 
##     Null deviance: 71554  on 60  degrees of freedom
## Residual deviance: 44060  on 55  degrees of freedom
## AIC: 588.64
## 
## Number of Fisher Scoring iterations: 2
anova (glm2)
## Analysis of Deviance Table
## 
## Model: gaussian, link: identity
## 
## Response: no.fuc.q
## 
## Terms added sequentially (first to last)
## 
## 
##        Df Deviance Resid. Df Resid. Dev
## NULL                      60      71554
## season  5    27494        55      44060
plot (glm2)