Note: All packages used in the Rmarkdown script are automatically downloaded if needed, and unpacked, in the first (hidden) code chunk. These include packages hosted on Github and obtained through Bioconductor.
df <- read.csv(here("Data", "HRS_SpeciesData.csv"),
sep=";", header=TRUE, as.is=TRUE,
na.strings=c(NA, "na", " NA"))
df <- df |>
mutate_at(c('Time_Num', 'Time_con'), as.numeric)
df <- df %>%
mutate(Date = trimws(as.character(Date))) # Remove leading/trailing spaces
df <- df |>
mutate(across(c('Superfamily', "Taxon",
'Date'), as.factor))
df <- df %>%
filter(!(Taxon %in% c("Empty")))#Remove the rows with zero findings.
sum_abundance <- df %>%
filter(!(Taxon %in% c("empty", "Empty"))) %>%
group_by(Time_con, Date) %>%
summarise(Abundance = sum(Abundance), .groups = "drop")
#plot
plot_abundance <- sum_abundance %>%
ggplot(aes(x = Time_con, y = Abundance)) +
geom_point() +
geom_smooth(span = 0.85, se = TRUE, color = "cornflowerblue", linewidth = 1) +
theme_minimal() +
geom_vline(xintercept = 6, linetype = "dashed", color = "black", alpha = 0.6) +
geom_vline(xintercept = 12, linetype = "dashed", color = "black", alpha = 0.6) +
geom_vline(xintercept = 18, linetype = "dashed", color = "black", alpha = 0.6) +
geom_vline(xintercept = 22, linetype = "dashed", color = "black", alpha = 0.6) +
xlab("Time of day") +
ylab("Total abundance") +
scale_x_continuous(limits = c(2, 22), breaks = seq(2, 22, by = 2)) +
scale_y_continuous(limits = c(0, 400), breaks = seq(0, 400, by = 50)) +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_line(color = "black")
)
#Divide time into time periods
df <- df %>%
mutate(Time_Period = case_when(
Time_Num >= 0.08 & Time_Num < 0.33 ~ "Night", #Samples from 22-06
Time_Num >= 0.33 & Time_Num < 0.58 ~ "Morning", #Samples from 06-12
Time_Num >= 0.58 & Time_Num < 0.83 ~ "Afternoon", #Samples from 12-18
Time_Num >= 0.83 | Time_Num < 0.08 ~ "Evening", #Samples from 18-22
TRUE ~ NA_character_
))
df$Time_Period[is.na(df$Time_Period)] <- "Evening"
# Put time periods in correct order
time_order <- c("Morning", "Afternoon", "Evening", "Night")
# Summarize total abundance per day and time period
df_sum <- df %>%
group_by(Date, Time_Period) %>%
summarise(TotalAbundance = sum(Abundance, na.rm = TRUE)) %>%
ungroup()
#Turn into wide format
df_wide <- df_sum %>%
pivot_wider(names_from = Time_Period, values_from = TotalAbundance)
#Run spearman correlation test
cor_matrix <- cor(dplyr::select(df_wide, -Date), method = "spearman",
use = "pairwise.complete.obs")
cor_df <- as.data.frame(cor_matrix) %>%
rownames_to_column(var = "Time1")
cor_long <- cor_df %>%
pivot_longer(
cols = -Time1,
names_to = "Time2",
values_to = "rho"
)
cor_long <- cor_long %>%
mutate(
Time1 = factor(Time1, levels = c("Morning", "Afternoon", "Evening", "Night")),
Time2 = factor(Time2, levels = c("Morning", "Afternoon", "Evening", "Night"))
)
#plot
correlationplot <- ggplot(cor_long, aes(Time1, Time2, fill = rho)) +
geom_tile() +
geom_text(aes(label = round(rho, 2)), color = "black", size = 5) +
scale_fill_gradient2(low = "cornflowerblue", mid = "white", high = "firebrick", midpoint = 0) +
theme_minimal() +
labs(x = "", y = "", fill = "Spearman rho")
# Create wide-format dataset for heatmap
heatmap_data <- df %>%
group_by(Time_Period, Taxon) %>%
summarise(Abundance = sum(Abundance), .groups = "drop") %>%
pivot_wider(names_from = Time_Period,
values_from = Abundance,
values_fill = list(Abundance = 0)) %>%
dplyr::select(Taxon, all_of(time_order)) %>%
column_to_rownames("Taxon")
#Replace 0s with NA
heatmap_data_na <- heatmap_data
heatmap_data_na[heatmap_data_na == 0] <- NA
#Log-transform the data
log_data <- log1p(heatmap_data_na)
Taxon_mapping <- data.frame(
Taxon = rownames(log_data),
Taxon_ID = seq_len(nrow(log_data))
)
# Replace row names in log_data
rownames(log_data) <- Taxon_mapping$Taxon_ID
# Create the heatmap with NA shown as white
plot_heatmap <- Heatmap(
log_data,
col = viridis(100),
na_col = "white",
cluster_rows = FALSE,
cluster_columns = FALSE,
show_row_names = TRUE,
column_names_rot = 0,
column_names_centered = TRUE,
column_names_side = "bottom",
border = TRUE,
rect_gp = gpar(col = "grey90", lwd = 0.5),
row_names_gp = gpar(fontsize = 6),
heatmap_legend_param = list(
title = NULL,
legend_height = unit(4, "cm"),
title_position = "topcenter",
labels_gp = gpar(fontsize = 10)
)
)
NMDS_data <- df %>%
group_by(Time_Period, Date, Taxon) %>%
summarise(Abundance = sum(Abundance), .groups = "drop") %>%
pivot_wider(
id_cols = c(Time_Period, Date),
names_from = Taxon,
values_from = Abundance,
values_fill = 0
)
NMDS_data$Time_Period <- as.factor(NMDS_data$Time_Period)
NMDS_data$Time_Period <- factor(NMDS_data$Time_Period, levels = c("Morning", "Afternoon", "Evening", "Night"))
#Standardize data
NMDS_data <- NMDS_data %>%
mutate(row_sum = rowSums(across(where(is.numeric))),
across(where(is.numeric), ~ . / row_sum)) %>%
dplyr::select(-row_sum)
#Subset the dataframe on which to base the ordination
species_data <- NMDS_data[,3:85]
#Identify the columns that contains the diel data
Diel_data <- NMDS_data[,1:2]
NMDS_mod <- metaMDS(species_data, distance = "bray", k = 2, trymax = 1000)
## Run 0 stress 0.1056525
## Run 1 stress 0.1056526
## ... Procrustes: rmse 0.0001263438 max resid 0.0004470798
## ... Similar to previous best
## Run 2 stress 0.1117394
## Run 3 stress 0.1096904
## Run 4 stress 0.1082662
## Run 5 stress 0.1056525
## ... Procrustes: rmse 2.201555e-05 max resid 7.596146e-05
## ... Similar to previous best
## Run 6 stress 0.1056525
## ... Procrustes: rmse 2.724357e-05 max resid 9.643912e-05
## ... Similar to previous best
## Run 7 stress 0.108579
## Run 8 stress 0.1117396
## Run 9 stress 0.1096904
## Run 10 stress 0.1117397
## Run 11 stress 0.1159448
## Run 12 stress 0.1056526
## ... Procrustes: rmse 0.0001300821 max resid 0.0004606749
## ... Similar to previous best
## Run 13 stress 0.1236548
## Run 14 stress 0.1088512
## Run 15 stress 0.1075685
## Run 16 stress 0.11691
## Run 17 stress 0.1117395
## Run 18 stress 0.11691
## Run 19 stress 0.1082663
## Run 20 stress 0.1159449
## *** Best solution repeated 4 times
# Run twice to avoid local optimum.
NMDS_mod <- metaMDS(species_data, distance = "bray", k = 2, trymax = 1000,
previous.best = NMDS_mod)
## Starting from 2-dimensional configuration
## Run 0 stress 0.1056525
## Run 1 stress 0.1075685
## Run 2 stress 0.1082662
## Run 3 stress 0.1075684
## Run 4 stress 0.1056526
## ... Procrustes: rmse 0.000117399 max resid 0.0004148205
## ... Similar to previous best
## Run 5 stress 0.1082662
## Run 6 stress 0.1075683
## Run 7 stress 0.11691
## Run 8 stress 0.1056525
## ... Procrustes: rmse 2.19304e-05 max resid 7.749299e-05
## ... Similar to previous best
## Run 9 stress 0.1075682
## Run 10 stress 0.1117394
## Run 11 stress 0.1163086
## Run 12 stress 0.1236551
## Run 13 stress 0.1159449
## Run 14 stress 0.1265001
## Run 15 stress 0.1056526
## ... Procrustes: rmse 0.0001328137 max resid 0.0004720156
## ... Similar to previous best
## Run 16 stress 0.1247028
## Run 17 stress 0.1117396
## Run 18 stress 0.1096904
## Run 19 stress 0.1246587
## Run 20 stress 0.11691
## *** Best solution repeated 7 times
#extract the site scores
datascores = as.data.frame(scores(NMDS_mod)$sites)
datascores$Date = NMDS_data$Date
datascores$Time_Period = NMDS_data$Time_Period
#plot by time period and date
NMDS_plot <- ggplot(datascores, aes(x = NMDS1, y = NMDS2, color = Time_Period)) +
geom_point(size = 2, aes(color = Time_Period)) +
coord_fixed() +
theme_bw() +
stat_ellipse(aes(color = Time_Period), level=0.9) +
theme(legend.position="right",
legend.text=element_text(size=20),
legend.title=element_text(size=22),
legend.direction='vertical',
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text = element_text(size = 16),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank()) +
annotate("text", x = max(datascores$NMDS1),
y = min(datascores$NMDS2),
label = paste("Stress =", round(NMDS_mod$stress, 3)),
hjust = 0.4, vjust = 1.5, size = 6) +
scale_color_brewer(palette = "Dark2") +
labs(color = "Time of Day")
##Permanova with date as a random factor
permutations <- with(Diel_data, how(nperm=999, blocks = Date))
#Calculate distance matrix
dist_matrix <- vegdist(species_data, method = "bray")
###Fit permanova model
Permanova_mod <- adonis2(dist_matrix ~ Time_Period, data=Diel_data,
permutations=permutations, method="bray")
#Check assumption of homogeneity of multivariate dispersion
Assumption <- anova(betadisper(dist_matrix, Diel_data$Date))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 5 0.054840 0.0109680 1.8687 0.1563
## Residuals 16 0.093911 0.0058694
# Fit the pairwise comparisons
Pairwise_mod <- pairwise.adonis2(dist_matrix ~ Time_Period, data=Diel_data,
permutations=999, method="bray", p.adjust.m="Bonferroni")
## $parent_call
## [1] "dist_matrix ~ Time_Period , strata = Null , permutations 999"
##
## $Afternoon_vs_Evening
## Df SumOfSqs R2 F Pr(>F)
## Time_Period 1 0.14280 0.21578 2.7515 0.011 *
## Residual 10 0.51899 0.78422
## Total 11 0.66179 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Afternoon_vs_Morning
## Df SumOfSqs R2 F Pr(>F)
## Time_Period 1 0.13411 0.30754 3.9972 0.013 *
## Residual 9 0.30196 0.69246
## Total 10 0.43608 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Afternoon_vs_Night
## Df SumOfSqs R2 F Pr(>F)
## Time_Period 1 0.39615 0.5885 12.871 0.003 **
## Residual 9 0.27700 0.4115
## Total 10 0.67315 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Evening_vs_Morning
## Df SumOfSqs R2 F Pr(>F)
## Time_Period 1 0.09899 0.17573 1.9188 0.044 *
## Residual 9 0.46433 0.82427
## Total 10 0.56332 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Evening_vs_Night
## Df SumOfSqs R2 F Pr(>F)
## Time_Period 1 0.13497 0.235 2.7647 0.013 *
## Residual 9 0.43936 0.765
## Total 10 0.57433 1.000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Morning_vs_Night
## Df SumOfSqs R2 F Pr(>F)
## Time_Period 1 0.12754 0.36453 4.5891 0.009 **
## Residual 8 0.22234 0.63547
## Total 9 0.34988 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## attr(,"class")
## [1] "pwadstrata" "list"
# Observed counts
realized <- c(Morning = 729, Afternoon = 2572, Evening = 1197, Night = 1024)
# Expected counts
expected <- c(1381, 1381, 922, 1839)
# Chi-square test
chisq.test(x = realized, p = expected / sum(expected))
##
## Chi-squared test for given probabilities
##
## data: realized
## X-squared = 1778.5, df = 3, p-value < 2.2e-16
#load and prepare new dataset with environmental variables for only the 17 most common taxa
data <- read.csv(here("Data", "HRS_EnviData.csv"),
sep=";", header=TRUE, as.is=TRUE,
na.strings=c(NA, "na", " NA"))
data <- data |>
mutate(across(c('Superfamily', "Taxon", 'Order',
'Date', 'ID'), as.factor))
data <- data |>
mutate_at(c('RH', 'Temperature','Wind_Speed',
'Cloud_cover', 'Time_con', 'Dewpoint'), as.numeric)
#Create time-model
data$Dailyrythm_cos <- cos(2*pi*data$Time_con/24)
data$Dailyrythm_sin <- sin(2*pi*data$Time_con/24)
##Filter occurrence data
data <- data %>%
mutate(presence = as.integer(Abundance > 0))
#Filter taxa with almost only presence or absence
data %>%
group_by(Taxon) %>%
summarise(
ID_with_presence = n_distinct(ID[presence == 1]),
total_ID = n_distinct(ID),
proportion = ID_with_presence / total_ID
)
## # A tibble: 17 × 4
## Taxon ID_with_presence total_ID proportion
## <fct> <int> <int> <dbl>
## 1 Auchenorrhyncha 26 50 0.52
## 2 Braconidae 25 50 0.5
## 3 Cecidomyidae 50 50 1
## 4 Ceraphronidae 27 50 0.54
## 5 Chalcidoidea 45 50 0.9
## 6 Diapriidae 36 50 0.72
## 7 Dolichopodidae 23 50 0.46
## 8 Hybotidae 22 50 0.44
## 9 Ichneumonidae 39 50 0.78
## 10 Microlepidoptera 37 50 0.74
## 11 Mycetophilidae 35 50 0.7
## 12 Phoridae 35 50 0.7
## 13 Platygastridae 25 50 0.5
## 14 Psocoptera 30 50 0.6
## 15 Scelionidae 37 50 0.74
## 16 Sciaridae 49 50 0.98
## 17 Throscidae 20 50 0.4
#Drop Sciaridae and Cecidomyiidae, and Chalcidoidea as they have >=0.9 presence, and Hybotidae, Dolichopodidae, and Throscidae as they have <0.5
data_bin <- data %>%
filter(!Taxon %in% c("Sciaridae", "Cecidomyidae", "Chalcidoidea",
"Hybotidae", "Dolichopodidae", "Throscidae")) %>%
droplevels()
levels(data_bin$Taxon)
## [1] "Auchenorrhyncha" "Braconidae" "Ceraphronidae" "Diapriidae"
## [5] "Ichneumonidae" "Microlepidoptera" "Mycetophilidae" "Phoridae"
## [9] "Platygastridae" "Psocoptera" "Scelionidae"
binmod <- glmmTMB(presence ~ (Dailyrythm_sin + Dailyrythm_cos) * Taxon +
Temperature * Taxon +
Wind_Speed +
Cloud_cover +
Dewpoint +
(1 | Date),
data=data_bin,
family=binomial(link = "logit"))
#No interaction between other environmetal variables and taxa
Anova(binmod, type ='III')
## Analysis of Deviance Table (Type III Wald chisquare tests)
##
## Response: presence
## Chisq Df Pr(>Chisq)
## (Intercept) 0.0002 1 0.98925
## Dailyrythm_sin 3.4918 1 0.06167 .
## Dailyrythm_cos 1.4832 1 0.22327
## Taxon 21.0983 10 0.02042 *
## Temperature 0.0778 1 0.78033
## Wind_Speed 1.9074 1 0.16726
## Cloud_cover 0.9027 1 0.34207
## Dewpoint 0.4135 1 0.52022
## Dailyrythm_sin:Taxon 20.8681 10 0.02203 *
## Dailyrythm_cos:Taxon 22.8253 10 0.01141 *
## Taxon:Temperature 20.6542 10 0.02364 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(binmod)
## Family: binomial ( logit )
## Formula:
## presence ~ (Dailyrythm_sin + Dailyrythm_cos) * Taxon + Temperature *
## Taxon + Wind_Speed + Cloud_cover + Dewpoint + (1 | Date)
## Data: data_bin
##
## AIC BIC logLik deviance df.resid
## 576.0 782.9 -240.0 480.0 502
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Date (Intercept) 3.098e-09 5.566e-05
## Number of obs: 550, groups: Date, 6
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.02733 2.02810 0.014 0.9892
## Dailyrythm_sin -1.07514 0.57536 -1.869 0.0617 .
## Dailyrythm_cos 0.68902 0.56576 1.218 0.2233
## TaxonBraconidae -4.94902 2.79012 -1.774 0.0761 .
## TaxonCeraphronidae -5.98759 2.92361 -2.048 0.0406 *
## TaxonDiapriidae -3.35559 3.54616 -0.946 0.3440
## TaxonIchneumonidae -6.96869 4.56803 -1.526 0.1271
## TaxonMicrolepidoptera -0.21512 3.01759 -0.071 0.9432
## TaxonMycetophilidae 2.76977 3.49295 0.793 0.4278
## TaxonPhoridae -5.23752 4.17126 -1.256 0.2093
## TaxonPlatygastridae -4.04142 2.88012 -1.403 0.1606
## TaxonPsocoptera 4.35629 2.87184 1.517 0.1293
## TaxonScelionidae -0.31034 2.91083 -0.107 0.9151
## Temperature 0.03590 0.12873 0.279 0.7803
## Wind_Speed -0.25418 0.18405 -1.381 0.1673
## Cloud_cover -0.36671 0.38597 -0.950 0.3421
## Dewpoint 0.03950 0.06143 0.643 0.5202
## Dailyrythm_sin:TaxonBraconidae 0.64227 0.77111 0.833 0.4049
## Dailyrythm_sin:TaxonCeraphronidae 1.44234 0.80692 1.788 0.0739 .
## Dailyrythm_sin:TaxonDiapriidae -0.38379 0.85148 -0.451 0.6522
## Dailyrythm_sin:TaxonIchneumonidae 0.22629 0.91836 0.246 0.8054
## Dailyrythm_sin:TaxonMicrolepidoptera 1.24318 0.89291 1.392 0.1638
## Dailyrythm_sin:TaxonMycetophilidae -2.25390 1.18486 -1.902 0.0571 .
## Dailyrythm_sin:TaxonPhoridae -0.90547 0.90307 -1.003 0.3160
## Dailyrythm_sin:TaxonPlatygastridae -0.48578 0.79870 -0.608 0.5430
## Dailyrythm_sin:TaxonPsocoptera -1.35102 0.87888 -1.537 0.1242
## Dailyrythm_sin:TaxonScelionidae 0.02901 0.80611 0.036 0.9713
## Dailyrythm_cos:TaxonBraconidae -0.53606 0.73591 -0.728 0.4663
## Dailyrythm_cos:TaxonCeraphronidae 0.06643 0.76928 0.086 0.9312
## Dailyrythm_cos:TaxonDiapriidae -1.75553 0.79526 -2.208 0.0273 *
## Dailyrythm_cos:TaxonIchneumonidae 1.38145 1.19105 1.160 0.2461
## Dailyrythm_cos:TaxonMicrolepidoptera 3.18788 1.56431 2.038 0.0416 *
## Dailyrythm_cos:TaxonMycetophilidae 0.14501 0.87665 0.165 0.8686
## Dailyrythm_cos:TaxonPhoridae -0.90298 0.82312 -1.097 0.2726
## Dailyrythm_cos:TaxonPlatygastridae -1.31494 0.74764 -1.759 0.0786 .
## Dailyrythm_cos:TaxonPsocoptera -1.50199 0.72938 -2.059 0.0395 *
## Dailyrythm_cos:TaxonScelionidae -0.96054 0.74501 -1.289 0.1973
## TaxonBraconidae:Temperature 0.31901 0.18704 1.706 0.0881 .
## TaxonCeraphronidae:Temperature 0.41115 0.19753 2.082 0.0374 *
## TaxonDiapriidae:Temperature 0.30149 0.25022 1.205 0.2282
## TaxonIchneumonidae:Temperature 0.65355 0.34893 1.873 0.0611 .
## TaxonMicrolepidoptera:Temperature 0.19689 0.21342 0.923 0.3562
## TaxonMycetophilidae:Temperature -0.07823 0.22777 -0.343 0.7313
## TaxonPhoridae:Temperature 0.44340 0.29881 1.484 0.1378
## TaxonPlatygastridae:Temperature 0.24245 0.19195 1.263 0.2065
## TaxonPsocoptera:Temperature -0.27609 0.18734 -1.474 0.1406
## TaxonScelionidae:Temperature 0.08816 0.19841 0.444 0.6568
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check model assumptions
sim <- simulateResiduals(binmod)
plotQQunif(sim) # 1. QQ plot
plotResiduals(sim) # 2. Residuals vs predicted
testDispersion(sim, plot=TRUE) # 3. Dispersion test (produces a plot)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.99031, p-value = 0.912
## alternative hypothesis: two.sided
testZeroInflation(sim, plot=TRUE) # 4. Zero inflation / sums
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0.99664, p-value = 0.96
## alternative hypothesis: two.sided
#Make predictions on interactions between taxon and temperature, and taxon and diel rhythm
# 1. Make a time sequence with 300 steps between hour 0 and 24
time_seq <- seq(0, 24, length.out = 300)
# 2. Calculate mean and SD temps
temp_mean <- mean(data$Temperature, na.rm = TRUE)
temp_sd <- sd(data$Temperature, na.rm = TRUE)
# 3. Define the three values of temp for predictions
temp_seq <- c(temp_mean - temp_sd, temp_mean, temp_mean + temp_sd)
temp_seq
## [1] 12.03814 15.36258 18.68702
# 4. Define diel rhythm
Dailyrythm_cos <- cos(2 * pi * time_seq / 24)
Dailyrythm_sin <- sin(2 * pi * time_seq / 24)
diel_df <- data.frame(
hours = time_seq,
Dailyrythm_cos = Dailyrythm_cos,
Dailyrythm_sin = Dailyrythm_sin
)
# 5. Set all other variables to mean levels
pred_grid <- expand.grid(
Taxon = levels(model.frame(binmod)$Taxon),
Temperature = temp_seq,
Wind_Speed = mean(data_bin$Wind_Speed, na.rm = TRUE),
Cloud_cover = mean(data_bin$Cloud_cover, na.rm = TRUE),
Dewpoint = mean(data_bin$Dewpoint, na.rm = TRUE)
)
pred_grid <- merge(pred_grid, diel_df, all = TRUE)
# 5. Predict
pred_link <- predict(
binmod,
newdata = pred_grid,
type = "link",
se.fit = TRUE,
re.form = NA
)
pred_grid$fit_link <- pred_link$fit
pred_grid$se_link <- pred_link$se.fit
pred_grid$lower_link <- pred_grid$fit_link - 1.96 * pred_grid$se_link
pred_grid$upper_link <- pred_grid$fit_link + 1.96 * pred_grid$se_link
# Back-transform from logit to probability
inv_logit <- function(x) exp(x) / (1 + exp(x))
pred_grid$pred <- inv_logit(pred_grid$fit_link)
pred_grid$lower <- inv_logit(pred_grid$lower_link)
pred_grid$upper <- inv_logit(pred_grid$upper_link)
#Sort temepratures in falling order
pred_grid$Temperature_f <- factor(
pred_grid$Temperature,
levels = sort(unique(pred_grid$Temperature))
)
# 6. Plot
presencepredict <- ggplot(pred_grid,
aes(x = hours, y = pred, color = Temperature_f)) +
geom_ribbon(aes(ymin = lower, ymax = upper, fill = Temperature_f),
alpha = 0.2, color = NA, show.legend = FALSE) +
geom_line(linewidth = 1.5) + # deterministic line
facet_wrap(~ Taxon, scales = "free_y") +
scale_color_manual(values = c("cornflowerblue", "goldenrod", "firebrick"),
labels = c("12.0", "15.4", "18.7")) +
scale_fill_manual(values = c("cornflowerblue", "goldenrod", "firebrick"),
labels = c("12.0", "15.4", "18.7")) +
labs(x = "Hour of day",
y = "Predicted presence",
color = "Temperature (°C)") +
scale_x_continuous(limits = c(0, 24), breaks = seq(0, 24, by = 3)) +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
strip.text = element_text(size = 14),
legend.text = element_text(size = 12), # legend item labels
legend.title = element_text(size = 14), # legend heading
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line.x = element_line(color = "black"),
axis.line = element_line(color = "black")
)
#filter abundance conditional on presence
data_abu <- data %>%
filter(Abundance > 0)
#Divide model by taxonomic order
run_order_model <- function(order_name, data_abu, temp_seq, diel_df) {
mod <- glmmTMB(
Abundance ~
Dailyrythm_sin * Taxon +
Dailyrythm_cos * Taxon +
Temperature * Taxon +
Wind_Speed +
Cloud_cover +
Dewpoint +
(1|Date),
family = nbinom2(link = "log"),
data = subset(data_abu, Order == order_name)
)
# 2. Diagnostics
sim <- simulateResiduals(mod)
print(plotQQunif(sim))
print(plotResiduals(sim))
print(testDispersion(sim, plot = TRUE))
print(testZeroInflation(sim, plot = TRUE))
# 3. ANOVA + summary
model_anova <- Anova(mod, type = "III")
model_summary <- summary(mod)
# 4. Predicted effect of environmental variables (excluding temperature)
eff_cloud <- ggpredict(mod, terms = "Cloud_cover")
plot_cloud <- ggplot(eff_cloud, aes(x = x, y = predicted)) +
geom_line(linewidth = 0.8, color = "firebrick") +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill = "red", alpha = 0.3) +
theme_minimal() +
ylab("Predicted abundance") +
xlab("Cloud cover") +
ggtitle(order_name) +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
title = element_text(size = 14),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_line(color = "black")
)
eff_wind <- ggpredict(mod, terms = "Wind_Speed")
plot_wind <- ggplot(eff_wind, aes(x = x, y = predicted)) +
geom_line(linewidth = 0.8, color = "firebrick") +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill = "red", alpha = 0.3) +
theme_minimal() +
ylab("Predicted abundance") +
xlab("Wind speed") +
ggtitle(order_name) +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
title = element_text(size = 14),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_line(color = "black")
)
eff_dew <- ggpredict(mod, terms = "Dewpoint")
plot_dew <- ggplot(eff_dew, aes(x = x, y = predicted)) +
geom_line(linewidth = 0.8, color = "firebrick") +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
fill = "red", alpha = 0.3) +
theme_minimal() +
ylab("Predicted abundance") +
xlab("Dew point") +
ggtitle(order_name) +
theme(
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
title = element_text(size = 14),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line = element_line(color = "black")
)
#Visualize interaction between temperature and taxon, and taxon and diel rhythm. See earlier model on presence for step-by-step explanations.
pred_grid <- expand.grid(
Taxon = levels(model.frame(mod)$Taxon),
Temperature = temp_seq,
Wind_Speed = mean(data_abu$Wind_Speed, na.rm = TRUE),
Cloud_cover = mean(data_abu$Cloud_cover, na.rm = TRUE),
Dewpoint = mean(data_abu$Dewpoint, na.rm = TRUE)
)
pred_grid <- merge(pred_grid, diel_df, all = TRUE)
pred_link <- predict(
mod,
newdata = pred_grid,
type = "link",
se.fit=TRUE,
re.form = NA
)
pred_grid$fit <- pred_link$fit
pred_grid$lower_link <- pred_link$fit - 1.96 * pred_link$se.fit
pred_grid$upper_link <- pred_link$fit + 1.96 * pred_link$se.fit
# Back-transform to response scale
pred_grid$lower <- exp(pred_grid$lower_link)
pred_grid$upper <- exp(pred_grid$upper_link)
pred_grid$pred <- exp(pred_grid$fit)
pred_grid$Temperature_f <- factor(
pred_grid$Temperature,
levels = sort(unique(pred_grid$Temperature))
)
abundancepredict <- ggplot(pred_grid, aes(x = hours, y = pred, color = factor(Temperature_f))) +
geom_ribbon(aes(ymin = lower, ymax = upper, fill = factor(Temperature_f)),
alpha = 0.2, color = NA, show.legend = FALSE) +
geom_line(aes(group = Temperature_f), linewidth = 1.5) +
facet_wrap(~ Taxon, scales = "free_y", nrow = 5, ncol = 4) +
scale_color_manual(values = c("cornflowerblue", "goldenrod", "firebrick"),
labels = c("12.0", "15.4", "18.7")) +
scale_fill_manual(values = c("cornflowerblue", "goldenrod", "firebrick"),
labels = c("12.0", "15.4", "18.7")) +
labs(x = "Hour of day",
y = "Predicted abundance",
color = "Temperature (°C)") +
scale_x_continuous(limits = c(0, 24), breaks = seq(0, 24, by = 3)) +
theme_minimal() +
guides(color = guide_legend(reverse = TRUE)) +
theme(
axis.text = element_text(size = 12),
axis.title.y = element_text(size = 18),
axis.title.x = element_text(size = 18),
strip.text = element_text(size = 14),
legend.text = element_text(size = 16),
legend.title = element_text(size = 18),
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
axis.line.x = element_line(color = "black"),
axis.line = element_line(color = "black"))
return(list(
model = mod,
anova = model_anova,
summary = model_summary,
eff_cloud = eff_cloud,
eff_wind = eff_wind,
eff_dew = eff_dew,
plot_cloud = plot_cloud,
plot_wind = plot_wind,
plot_dew = plot_dew,
abundancepredict = abundancepredict
))
}
orders_to_run <- c("Diptera", "Hymenoptera", "Other")
results <- lapply(orders_to_run, run_order_model,
data_abu = data_abu, temp_seq = temp_seq, diel_df = diel_df)
## $rect
## $rect$w
## [1] 0.2221771
##
## $rect$h
## [1] 0.1051948
##
## $rect$left
## [1] 0.3889115
##
## $rect$top
## [1] 0.5525974
##
##
## $text
## $text$x
## [1] 0.419023 0.419023
##
## $text$y
## [1] 0.5175325 0.4824675
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 0.0027
## alternative hypothesis: both
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0989, p-value = 0.608
## alternative hypothesis: two.sided
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value < 2.2e-16
## alternative hypothesis: two.sided
## $rect
## $rect$w
## [1] 0.2221771
##
## $rect$h
## [1] 0.1051948
##
## $rect$left
## [1] 0.3889115
##
## $rect$top
## [1] 0.5525974
##
##
## $text
## $text$x
## [1] 0.419023 0.419023
##
## $text$y
## [1] 0.5175325 0.4824675
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 2.244e-05
## alternative hypothesis: both
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.1218, p-value = 0.536
## alternative hypothesis: two.sided
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value < 2.2e-16
## alternative hypothesis: two.sided
## $rect
## $rect$w
## [1] 0.2221771
##
## $rect$h
## [1] 0.1051948
##
## $rect$left
## [1] 0.3889115
##
## $rect$top
## [1] 0.5525974
##
##
## $text
## $text$x
## [1] 0.419023 0.419023
##
## $text$y
## [1] 0.5175325 0.4824675
##
## Test for location of quantiles via qgam
##
## data: simulationOutput
## p-value = 0.0001562
## alternative hypothesis: both
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.5475, p-value = 0.104
## alternative hypothesis: two.sided
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value < 2.2e-16
## alternative hypothesis: two.sided
names(results) <- orders_to_run
plot_names <- c("plot_cloud", "plot_wind", "plot_dew", "abundancepredict")
for (ord in names(results)) {
cat("##", ord, "\n")
for (plt in plot_names) {
print(results[[ord]][[plt]])
}
}
## ## Diptera
## ## Hymenoptera
## ## Other
## @Book{,
## title = {An {R} Companion to Applied Regression},
## edition = {Third},
## author = {John Fox and Sanford Weisberg},
## year = {2019},
## publisher = {Sage},
## address = {Thousand Oaks {CA}},
## url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
## }
## @Article{,
## title = {Welcome to the {tidyverse}},
## author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
## year = {2019},
## journal = {Journal of Open Source Software},
## volume = {4},
## number = {43},
## pages = {1686},
## doi = {10.21105/joss.01686},
## }
## @Book{,
## author = {Hadley Wickham},
## title = {ggplot2: Elegant Graphics for Data Analysis},
## publisher = {Springer-Verlag New York},
## year = {2016},
## isbn = {978-3-319-24277-4},
## url = {https://ggplot2.tidyverse.org},
## }
## @Manual{,
## title = {here: A Simpler Way to Find Your Files},
## author = {Kirill Müller},
## year = {2020},
## note = {R package version 1.0.1},
## url = {https://CRAN.R-project.org/package=here},
## }
## @Article{,
## title = {Complex heatmaps reveal patterns and correlations in multidimensional genomic data},
## author = {Zuguang Gu and Roland Eils and Matthias Schlesner},
## journal = {Bioinformatics},
## doi = {10.1093/bioinformatics/btw313},
## year = {2016},
## }
##
## @Article{,
## title = {Complex Heatmap Visualization},
## author = {Zuguang Gu},
## doi = {10.1002/imt2.43},
## journal = {iMeta},
## year = {2022},
## }
## @Manual{,
## title = {{viridis(Lite)} - Colorblind-Friendly Color Maps for R},
## author = {{Garnier} and {Simon} and {Ross} and {Noam} and {Rudis} and {Robert} and {Camargo} and Antônio Pedro and {Sciaini} and {Marco} and {Scherer} and {Cédric}},
## year = {2023},
## note = {viridisLite package version 0.4.2},
## url = {https://sjmgarnier.github.io/viridis/},
## doi = {10.5281/zenodo.4678327},
## }
## @Manual{,
## title = {vegan: Community Ecology Package},
## author = {Jari Oksanen and Gavin L. Simpson and F. Guillaume Blanchet and Roeland Kindt and Pierre Legendre and Peter R. Minchin and R.B. O'Hara and Peter Solymos and M. Henry H. Stevens and Eduard Szoecs and Helene Wagner and Matt Barbour and Michael Bedward and Ben Bolker and Daniel Borcard and Gustavo Carvalho and Michael Chirico and Miquel {De Caceres} and Sebastien Durand and Heloisa Beatriz Antoniazi Evangelista and Rich FitzJohn and Michael Friendly and Brendan Furneaux and Geoffrey Hannigan and Mark O. Hill and Leo Lahti and Dan McGlinn and Marie-Helene Ouellette and Eduardo {Ribeiro Cunha} and Tyler Smith and Adrian Stier and Cajo J.F. {Ter Braak} and James Weedon},
## year = {2024},
## note = {R package version 2.6-6.1},
## url = {https://CRAN.R-project.org/package=vegan},
## }
## @Manual{,
## title = {pairwiseAdonis: Pairwise Multilevel Comparison using Adonis},
## author = {Pedro {Martinez Arbizu}},
## year = {2017},
## note = {R package version 0.4.1, commit cb190f7668a0c82c0b0853927db239e7b9ec3e83},
## url = {https://github.com/pmartinezarbizu/pairwiseAdonis},
## }
## @Book{,
## title = {Modern Applied Statistics with S},
## author = {W. N. Venables and B. D. Ripley},
## publisher = {Springer},
## edition = {Fourth},
## address = {New York},
## year = {2002},
## note = {ISBN 0-387-95457-0},
## url = {https://www.stats.ox.ac.uk/pub/MASS4/},
## }
## @Article{,
## title = {ggeffects: Tidy Data Frames of Marginal Effects from Regression Models.},
## volume = {3},
## doi = {10.21105/joss.00772},
## number = {26},
## journal = {Journal of Open Source Software},
## author = {Daniel Lüdecke},
## year = {2018},
## pages = {772},
## }
## @Manual{,
## title = {patchwork: The Composer of Plots},
## author = {Thomas Lin Pedersen},
## year = {2024},
## note = {R package version 1.3.0},
## url = {https://CRAN.R-project.org/package=patchwork},
## }
## @Article{,
## title = {{performance}: An {R} Package for Assessment, Comparison and Testing of Statistical Models},
## author = {Daniel Lüdecke and Mattan S. Ben-Shachar and Indrajeet Patil and Philip Waggoner and Dominique Makowski},
## year = {2021},
## journal = {Journal of Open Source Software},
## volume = {6},
## number = {60},
## pages = {3139},
## doi = {10.21105/joss.03139},
## }
## @Manual{jtools,
## title = {jtools: Analysis and Presentation of Social Scientific Data},
## author = {Jacob A. Long},
## year = {2022},
## note = {R package version 2.2.0},
## url = {https://cran.r-project.org/package=jtools},
## }
## @Book{,
## title = {An {R} Companion to Applied Regression},
## edition = {Third},
## author = {John Fox and Sanford Weisberg},
## year = {2019},
## publisher = {Sage},
## address = {Thousand Oaks {CA}},
## url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
## }
## @Manual{,
## title = {DHARMa: Residual Diagnostics for Hierarchical (Multi-Level / Mixed)
## Regression Models},
## author = {Florian Hartig},
## year = {2022},
## note = {R package version 0.4.6},
## url = {https://CRAN.R-project.org/package=DHARMa},
## }
## @Article{,
## author = {Mollie E. Brooks and Kasper Kristensen and Koen J. {van Benthem} and Arni Magnusson and Casper W. Berg and Anders Nielsen and Hans J. Skaug and Martin Maechler and Benjamin M. Bolker},
## title = {{glmmTMB} Balances Speed and Flexibility Among Packages for Zero-inflated Generalized Linear Mixed Modeling},
## year = {2017},
## journal = {The R Journal},
## doi = {10.32614/RJ-2017-066},
## pages = {378--400},
## volume = {9},
## number = {2},
## }