Note: All packages used in the Rmarkdown script are automatically downloaded if needed, and unpacked, in the first (hidden) code chunk. These include packages hosted on Github and obtained through Bioconductor.

Step 1. Load and prepare the data. In this example, the file is expected to be inside a folder named “Data” within the project directory. If your copy of the repository does not contain this folder, or if the file is stored elsewhere, you may need to adjust the path accordingly.

df <- read.csv(here("Data", "HRS_SpeciesData.csv"), 
                       sep=";", header=TRUE, as.is=TRUE,
                       na.strings=c(NA, "na", " NA"))
                       
df <- df |>
  mutate_at(c('Time_Num', 'Time_con'), as.numeric)

df <- df %>%
  mutate(Date = trimws(as.character(Date)))  # Remove leading/trailing spaces


df <- df |>
  mutate(across(c('Superfamily', "Taxon",
                  'Date'), as.factor)) 
                  
df <- df %>%
  filter(!(Taxon %in% c("Empty")))#Remove the rows with zero findings.

2. Plot total abundances of insects across time of day (Fig 2A)

sum_abundance <- df %>%
  filter(!(Taxon %in% c("empty", "Empty"))) %>%
  group_by(Time_con, Date) %>%
  summarise(Abundance = sum(Abundance), .groups = "drop")

#plot
plot_abundance <- sum_abundance %>%
  ggplot(aes(x = Time_con, y = Abundance)) +
    geom_point() +
  geom_smooth(span = 0.85, se = TRUE, color = "cornflowerblue", linewidth = 1) +
  theme_minimal() +
  geom_vline(xintercept = 6, linetype = "dashed", color = "black", alpha = 0.6) +
  geom_vline(xintercept = 12, linetype = "dashed", color = "black", alpha = 0.6) +
  geom_vline(xintercept = 18, linetype = "dashed", color = "black", alpha = 0.6) +
  geom_vline(xintercept = 22, linetype = "dashed", color = "black", alpha = 0.6) +
  xlab("Time of day") +
  ylab("Total abundance") +
  scale_x_continuous(limits = c(2, 22), breaks = seq(2, 22, by = 2)) +
  scale_y_continuous(limits = c(0, 400), breaks = seq(0, 400, by = 50)) +
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    axis.line = element_line(color = "black")
  )

3. Plot spearman correlations on abundances between time periods (Fig. 2C)

#Divide time into time periods
df <- df %>%
  mutate(Time_Period = case_when(
    Time_Num >= 0.08 & Time_Num < 0.33  ~ "Night",   #Samples from 22-06
    Time_Num >= 0.33 & Time_Num < 0.58 ~ "Morning", #Samples from 06-12
    Time_Num >= 0.58 & Time_Num < 0.83 ~ "Afternoon", #Samples from 12-18
    Time_Num >= 0.83 | Time_Num < 0.08   ~ "Evening",  #Samples from 18-22
    TRUE ~ NA_character_
  ))

df$Time_Period[is.na(df$Time_Period)] <- "Evening"

# Put time periods in correct order
time_order <- c("Morning", "Afternoon", "Evening", "Night")

# Summarize total abundance per day and time period
df_sum <- df %>%
  group_by(Date, Time_Period) %>% 
  summarise(TotalAbundance = sum(Abundance, na.rm = TRUE)) %>%
  ungroup()

#Turn into wide format
df_wide <- df_sum %>%
  pivot_wider(names_from = Time_Period, values_from = TotalAbundance)

#Run spearman correlation test
cor_matrix <- cor(dplyr::select(df_wide, -Date), method = "spearman",
    use = "pairwise.complete.obs") 

cor_df <- as.data.frame(cor_matrix) %>%
  rownames_to_column(var = "Time1")

cor_long <- cor_df %>%
  pivot_longer(
    cols = -Time1,        
    names_to = "Time2",   
    values_to = "rho"     
  )

cor_long <- cor_long %>%
  mutate(
    Time1 = factor(Time1, levels = c("Morning", "Afternoon", "Evening", "Night")),
    Time2 = factor(Time2, levels = c("Morning", "Afternoon", "Evening", "Night"))
  )

#plot
correlationplot <- ggplot(cor_long, aes(Time1, Time2, fill = rho)) +
  geom_tile() +
  geom_text(aes(label = round(rho, 2)), color = "black", size = 5) +
  scale_fill_gradient2(low = "cornflowerblue", mid = "white", high = "firebrick", midpoint = 0) +
  theme_minimal() +
  labs(x = "", y = "", fill = "Spearman rho")

4. Plot community composition across time periods as a heatmap (Fig 2D)

# Create wide-format dataset for heatmap
heatmap_data <- df %>%
  group_by(Time_Period, Taxon) %>%
  summarise(Abundance = sum(Abundance), .groups = "drop") %>%
  pivot_wider(names_from = Time_Period, 
              values_from = Abundance, 
              values_fill = list(Abundance = 0)) %>%
    dplyr::select(Taxon, all_of(time_order)) %>%
  column_to_rownames("Taxon") 
 

#Replace 0s with NA
heatmap_data_na <- heatmap_data
heatmap_data_na[heatmap_data_na == 0] <- NA

#Log-transform the data
log_data <- log1p(heatmap_data_na)

Taxon_mapping <- data.frame(
  Taxon = rownames(log_data),
  Taxon_ID = seq_len(nrow(log_data))
)

# Replace row names in log_data
rownames(log_data) <- Taxon_mapping$Taxon_ID

# Create the heatmap with NA shown as white
plot_heatmap <- Heatmap(
  log_data,
  col = viridis(100),
  na_col = "white",
  cluster_rows = FALSE,
  cluster_columns = FALSE,
  show_row_names = TRUE,
  column_names_rot = 0,
  column_names_centered = TRUE, 
  column_names_side = "bottom", 
  border = TRUE,
  rect_gp = gpar(col = "grey90", lwd = 0.5),
  row_names_gp = gpar(fontsize = 6),
  heatmap_legend_param = list(
    title = NULL,
    legend_height = unit(4, "cm"),  
    title_position = "topcenter",
    labels_gp = gpar(fontsize = 10)
  )
)

5. Create a NMDS-plot (Fig. 3) and fit a permanova model

NMDS_data <- df %>%
  group_by(Time_Period, Date, Taxon) %>%
  summarise(Abundance = sum(Abundance), .groups = "drop") %>%
  pivot_wider(
    id_cols = c(Time_Period, Date), 
    names_from = Taxon,                   
    values_from = Abundance,             
    values_fill = 0                      
  )

NMDS_data$Time_Period <- as.factor(NMDS_data$Time_Period)
NMDS_data$Time_Period <- factor(NMDS_data$Time_Period, levels = c("Morning", "Afternoon", "Evening", "Night"))

#Standardize data
NMDS_data <- NMDS_data %>%
  mutate(row_sum = rowSums(across(where(is.numeric))), 
         across(where(is.numeric), ~ . / row_sum)) %>%  
  dplyr::select(-row_sum) 

#Subset the dataframe on which to base the ordination 
species_data <- NMDS_data[,3:85]

#Identify the columns that contains the diel data
Diel_data <- NMDS_data[,1:2]

NMDS_mod <- metaMDS(species_data, distance = "bray", k = 2, trymax = 1000)
## Run 0 stress 0.1056525 
## Run 1 stress 0.1056526 
## ... Procrustes: rmse 0.0001263438  max resid 0.0004470798 
## ... Similar to previous best
## Run 2 stress 0.1117394 
## Run 3 stress 0.1096904 
## Run 4 stress 0.1082662 
## Run 5 stress 0.1056525 
## ... Procrustes: rmse 2.201555e-05  max resid 7.596146e-05 
## ... Similar to previous best
## Run 6 stress 0.1056525 
## ... Procrustes: rmse 2.724357e-05  max resid 9.643912e-05 
## ... Similar to previous best
## Run 7 stress 0.108579 
## Run 8 stress 0.1117396 
## Run 9 stress 0.1096904 
## Run 10 stress 0.1117397 
## Run 11 stress 0.1159448 
## Run 12 stress 0.1056526 
## ... Procrustes: rmse 0.0001300821  max resid 0.0004606749 
## ... Similar to previous best
## Run 13 stress 0.1236548 
## Run 14 stress 0.1088512 
## Run 15 stress 0.1075685 
## Run 16 stress 0.11691 
## Run 17 stress 0.1117395 
## Run 18 stress 0.11691 
## Run 19 stress 0.1082663 
## Run 20 stress 0.1159449 
## *** Best solution repeated 4 times
# Run twice to avoid local optimum.
NMDS_mod <- metaMDS(species_data, distance = "bray", k = 2, trymax = 1000, 
                    previous.best = NMDS_mod)
## Starting from 2-dimensional configuration
## Run 0 stress 0.1056525 
## Run 1 stress 0.1075685 
## Run 2 stress 0.1082662 
## Run 3 stress 0.1075684 
## Run 4 stress 0.1056526 
## ... Procrustes: rmse 0.000117399  max resid 0.0004148205 
## ... Similar to previous best
## Run 5 stress 0.1082662 
## Run 6 stress 0.1075683 
## Run 7 stress 0.11691 
## Run 8 stress 0.1056525 
## ... Procrustes: rmse 2.19304e-05  max resid 7.749299e-05 
## ... Similar to previous best
## Run 9 stress 0.1075682 
## Run 10 stress 0.1117394 
## Run 11 stress 0.1163086 
## Run 12 stress 0.1236551 
## Run 13 stress 0.1159449 
## Run 14 stress 0.1265001 
## Run 15 stress 0.1056526 
## ... Procrustes: rmse 0.0001328137  max resid 0.0004720156 
## ... Similar to previous best
## Run 16 stress 0.1247028 
## Run 17 stress 0.1117396 
## Run 18 stress 0.1096904 
## Run 19 stress 0.1246587 
## Run 20 stress 0.11691 
## *** Best solution repeated 7 times
#extract the site scores
datascores = as.data.frame(scores(NMDS_mod)$sites)  
datascores$Date = NMDS_data$Date
datascores$Time_Period = NMDS_data$Time_Period

#plot by time period and date
NMDS_plot <- ggplot(datascores, aes(x = NMDS1, y = NMDS2, color = Time_Period)) +
  geom_point(size = 2, aes(color = Time_Period)) +
  coord_fixed() + 
  theme_bw() +
  stat_ellipse(aes(color = Time_Period), level=0.9) +
  theme(legend.position="right",
        legend.text=element_text(size=20),
        legend.title=element_text(size=22),
        legend.direction='vertical',
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        axis.text = element_text(size = 16),
        panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank()) + 
  annotate("text", x = max(datascores$NMDS1), 
           y = min(datascores$NMDS2), 
           label = paste("Stress =", round(NMDS_mod$stress, 3)), 
           hjust = 0.4, vjust = 1.5, size = 6) +
  scale_color_brewer(palette = "Dark2") + 
  labs(color = "Time of Day")

##Permanova with date as a random factor
permutations <- with(Diel_data, how(nperm=999, blocks = Date))

#Calculate distance matrix
dist_matrix <- vegdist(species_data, method = "bray")

###Fit permanova model
Permanova_mod <- adonis2(dist_matrix ~ Time_Period, data=Diel_data,
                         permutations=permutations, method="bray")

#Check assumption of homogeneity of multivariate dispersion
Assumption <- anova(betadisper(dist_matrix, Diel_data$Date))
## Analysis of Variance Table
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq F value Pr(>F)
## Groups     5 0.054840 0.0109680  1.8687 0.1563
## Residuals 16 0.093911 0.0058694
# Fit the pairwise comparisons
Pairwise_mod <- pairwise.adonis2(dist_matrix ~ Time_Period, data=Diel_data,
                                 permutations=999, method="bray", p.adjust.m="Bonferroni")
## $parent_call
## [1] "dist_matrix ~ Time_Period , strata = Null , permutations 999"
## 
## $Afternoon_vs_Evening
##             Df SumOfSqs      R2      F Pr(>F)  
## Time_Period  1  0.14280 0.21578 2.7515  0.011 *
## Residual    10  0.51899 0.78422                
## Total       11  0.66179 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Afternoon_vs_Morning
##             Df SumOfSqs      R2      F Pr(>F)  
## Time_Period  1  0.13411 0.30754 3.9972  0.013 *
## Residual     9  0.30196 0.69246                
## Total       10  0.43608 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Afternoon_vs_Night
##             Df SumOfSqs     R2      F Pr(>F)   
## Time_Period  1  0.39615 0.5885 12.871  0.003 **
## Residual     9  0.27700 0.4115                 
## Total       10  0.67315 1.0000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Evening_vs_Morning
##             Df SumOfSqs      R2      F Pr(>F)  
## Time_Period  1  0.09899 0.17573 1.9188  0.044 *
## Residual     9  0.46433 0.82427                
## Total       10  0.56332 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Evening_vs_Night
##             Df SumOfSqs    R2      F Pr(>F)  
## Time_Period  1  0.13497 0.235 2.7647  0.013 *
## Residual     9  0.43936 0.765                
## Total       10  0.57433 1.000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $Morning_vs_Night
##             Df SumOfSqs      R2      F Pr(>F)   
## Time_Period  1  0.12754 0.36453 4.5891  0.009 **
## Residual     8  0.22234 0.63547                 
## Total        9  0.34988 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## attr(,"class")
## [1] "pwadstrata" "list"

6. Chi-square test to compare expected to realized abundances per time period

# Observed counts
realized <- c(Morning = 729, Afternoon = 2572, Evening = 1197, Night = 1024)

# Expected counts
expected <- c(1381, 1381, 922, 1839)

# Chi-square test
chisq.test(x = realized, p = expected / sum(expected))
## 
##  Chi-squared test for given probabilities
## 
## data:  realized
## X-squared = 1778.5, df = 3, p-value < 2.2e-16

7. Plot taxon-specific diel models

#load and prepare new dataset with environmental variables for only the 17 most common taxa

data <- read.csv(here("Data", "HRS_EnviData.csv"), 
                       sep=";", header=TRUE, as.is=TRUE,
                       na.strings=c(NA, "na", " NA"))

data <- data |>
  mutate(across(c('Superfamily', "Taxon", 'Order',
                  'Date', 'ID'), as.factor))

data <- data |>
  mutate_at(c('RH', 'Temperature','Wind_Speed', 
                                       'Cloud_cover', 'Time_con', 'Dewpoint'), as.numeric)

7.1 Fit environmental occurence model (Fig. 4)

#Create time-model
data$Dailyrythm_cos <- cos(2*pi*data$Time_con/24) 
data$Dailyrythm_sin <- sin(2*pi*data$Time_con/24)

##Filter occurrence data
data <- data %>%
  mutate(presence = as.integer(Abundance > 0))

#Filter taxa with almost only presence or absence
data %>%
  group_by(Taxon) %>%
  summarise(
    ID_with_presence = n_distinct(ID[presence == 1]),
    total_ID = n_distinct(ID),
    proportion = ID_with_presence / total_ID
  )
## # A tibble: 17 × 4
##    Taxon            ID_with_presence total_ID proportion
##    <fct>                       <int>    <int>      <dbl>
##  1 Auchenorrhyncha                26       50       0.52
##  2 Braconidae                     25       50       0.5 
##  3 Cecidomyidae                   50       50       1   
##  4 Ceraphronidae                  27       50       0.54
##  5 Chalcidoidea                   45       50       0.9 
##  6 Diapriidae                     36       50       0.72
##  7 Dolichopodidae                 23       50       0.46
##  8 Hybotidae                      22       50       0.44
##  9 Ichneumonidae                  39       50       0.78
## 10 Microlepidoptera               37       50       0.74
## 11 Mycetophilidae                 35       50       0.7 
## 12 Phoridae                       35       50       0.7 
## 13 Platygastridae                 25       50       0.5 
## 14 Psocoptera                     30       50       0.6 
## 15 Scelionidae                    37       50       0.74
## 16 Sciaridae                      49       50       0.98
## 17 Throscidae                     20       50       0.4
#Drop Sciaridae and Cecidomyiidae, and Chalcidoidea as they have >=0.9 presence, and Hybotidae, Dolichopodidae, and Throscidae as they have <0.5

data_bin <- data %>%
  filter(!Taxon %in% c("Sciaridae", "Cecidomyidae", "Chalcidoidea",
                     "Hybotidae", "Dolichopodidae", "Throscidae")) %>%
  droplevels()

levels(data_bin$Taxon)
##  [1] "Auchenorrhyncha"  "Braconidae"       "Ceraphronidae"    "Diapriidae"      
##  [5] "Ichneumonidae"    "Microlepidoptera" "Mycetophilidae"   "Phoridae"        
##  [9] "Platygastridae"   "Psocoptera"       "Scelionidae"
binmod <- glmmTMB(presence ~ (Dailyrythm_sin + Dailyrythm_cos) * Taxon +
      Temperature * Taxon +
    Wind_Speed +
    Cloud_cover +
    Dewpoint +
  (1 | Date),
  data=data_bin,
  family=binomial(link = "logit"))

#No interaction between other environmetal variables and taxa

Anova(binmod, type ='III')
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: presence
##                        Chisq Df Pr(>Chisq)  
## (Intercept)           0.0002  1    0.98925  
## Dailyrythm_sin        3.4918  1    0.06167 .
## Dailyrythm_cos        1.4832  1    0.22327  
## Taxon                21.0983 10    0.02042 *
## Temperature           0.0778  1    0.78033  
## Wind_Speed            1.9074  1    0.16726  
## Cloud_cover           0.9027  1    0.34207  
## Dewpoint              0.4135  1    0.52022  
## Dailyrythm_sin:Taxon 20.8681 10    0.02203 *
## Dailyrythm_cos:Taxon 22.8253 10    0.01141 *
## Taxon:Temperature    20.6542 10    0.02364 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(binmod)
##  Family: binomial  ( logit )
## Formula:          
## presence ~ (Dailyrythm_sin + Dailyrythm_cos) * Taxon + Temperature *  
##     Taxon + Wind_Speed + Cloud_cover + Dewpoint + (1 | Date)
## Data: data_bin
## 
##      AIC      BIC   logLik deviance df.resid 
##    576.0    782.9   -240.0    480.0      502 
## 
## Random effects:
## 
## Conditional model:
##  Groups Name        Variance  Std.Dev. 
##  Date   (Intercept) 3.098e-09 5.566e-05
## Number of obs: 550, groups:  Date, 6
## 
## Conditional model:
##                                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                           0.02733    2.02810   0.014   0.9892  
## Dailyrythm_sin                       -1.07514    0.57536  -1.869   0.0617 .
## Dailyrythm_cos                        0.68902    0.56576   1.218   0.2233  
## TaxonBraconidae                      -4.94902    2.79012  -1.774   0.0761 .
## TaxonCeraphronidae                   -5.98759    2.92361  -2.048   0.0406 *
## TaxonDiapriidae                      -3.35559    3.54616  -0.946   0.3440  
## TaxonIchneumonidae                   -6.96869    4.56803  -1.526   0.1271  
## TaxonMicrolepidoptera                -0.21512    3.01759  -0.071   0.9432  
## TaxonMycetophilidae                   2.76977    3.49295   0.793   0.4278  
## TaxonPhoridae                        -5.23752    4.17126  -1.256   0.2093  
## TaxonPlatygastridae                  -4.04142    2.88012  -1.403   0.1606  
## TaxonPsocoptera                       4.35629    2.87184   1.517   0.1293  
## TaxonScelionidae                     -0.31034    2.91083  -0.107   0.9151  
## Temperature                           0.03590    0.12873   0.279   0.7803  
## Wind_Speed                           -0.25418    0.18405  -1.381   0.1673  
## Cloud_cover                          -0.36671    0.38597  -0.950   0.3421  
## Dewpoint                              0.03950    0.06143   0.643   0.5202  
## Dailyrythm_sin:TaxonBraconidae        0.64227    0.77111   0.833   0.4049  
## Dailyrythm_sin:TaxonCeraphronidae     1.44234    0.80692   1.788   0.0739 .
## Dailyrythm_sin:TaxonDiapriidae       -0.38379    0.85148  -0.451   0.6522  
## Dailyrythm_sin:TaxonIchneumonidae     0.22629    0.91836   0.246   0.8054  
## Dailyrythm_sin:TaxonMicrolepidoptera  1.24318    0.89291   1.392   0.1638  
## Dailyrythm_sin:TaxonMycetophilidae   -2.25390    1.18486  -1.902   0.0571 .
## Dailyrythm_sin:TaxonPhoridae         -0.90547    0.90307  -1.003   0.3160  
## Dailyrythm_sin:TaxonPlatygastridae   -0.48578    0.79870  -0.608   0.5430  
## Dailyrythm_sin:TaxonPsocoptera       -1.35102    0.87888  -1.537   0.1242  
## Dailyrythm_sin:TaxonScelionidae       0.02901    0.80611   0.036   0.9713  
## Dailyrythm_cos:TaxonBraconidae       -0.53606    0.73591  -0.728   0.4663  
## Dailyrythm_cos:TaxonCeraphronidae     0.06643    0.76928   0.086   0.9312  
## Dailyrythm_cos:TaxonDiapriidae       -1.75553    0.79526  -2.208   0.0273 *
## Dailyrythm_cos:TaxonIchneumonidae     1.38145    1.19105   1.160   0.2461  
## Dailyrythm_cos:TaxonMicrolepidoptera  3.18788    1.56431   2.038   0.0416 *
## Dailyrythm_cos:TaxonMycetophilidae    0.14501    0.87665   0.165   0.8686  
## Dailyrythm_cos:TaxonPhoridae         -0.90298    0.82312  -1.097   0.2726  
## Dailyrythm_cos:TaxonPlatygastridae   -1.31494    0.74764  -1.759   0.0786 .
## Dailyrythm_cos:TaxonPsocoptera       -1.50199    0.72938  -2.059   0.0395 *
## Dailyrythm_cos:TaxonScelionidae      -0.96054    0.74501  -1.289   0.1973  
## TaxonBraconidae:Temperature           0.31901    0.18704   1.706   0.0881 .
## TaxonCeraphronidae:Temperature        0.41115    0.19753   2.082   0.0374 *
## TaxonDiapriidae:Temperature           0.30149    0.25022   1.205   0.2282  
## TaxonIchneumonidae:Temperature        0.65355    0.34893   1.873   0.0611 .
## TaxonMicrolepidoptera:Temperature     0.19689    0.21342   0.923   0.3562  
## TaxonMycetophilidae:Temperature      -0.07823    0.22777  -0.343   0.7313  
## TaxonPhoridae:Temperature             0.44340    0.29881   1.484   0.1378  
## TaxonPlatygastridae:Temperature       0.24245    0.19195   1.263   0.2065  
## TaxonPsocoptera:Temperature          -0.27609    0.18734  -1.474   0.1406  
## TaxonScelionidae:Temperature          0.08816    0.19841   0.444   0.6568  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check model assumptions
sim <- simulateResiduals(binmod)
plotQQunif(sim)                # 1. QQ plot

plotResiduals(sim)             # 2. Residuals vs predicted

testDispersion(sim, plot=TRUE) # 3. Dispersion test (produces a plot)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.99031, p-value = 0.912
## alternative hypothesis: two.sided
testZeroInflation(sim, plot=TRUE) # 4. Zero inflation / sums

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0.99664, p-value = 0.96
## alternative hypothesis: two.sided
#Make predictions on interactions between taxon and temperature, and taxon and diel rhythm

# 1. Make a time sequence with 300 steps between hour 0 and 24
time_seq <- seq(0, 24, length.out = 300) 

# 2. Calculate mean and SD temps
temp_mean <- mean(data$Temperature, na.rm = TRUE)
temp_sd   <- sd(data$Temperature, na.rm = TRUE)

# 3. Define the three values of temp for predictions
temp_seq <- c(temp_mean - temp_sd, temp_mean, temp_mean + temp_sd)
temp_seq
## [1] 12.03814 15.36258 18.68702
# 4. Define diel rhythm
Dailyrythm_cos <- cos(2 * pi * time_seq / 24)
Dailyrythm_sin <- sin(2 * pi * time_seq / 24)

diel_df <- data.frame(
  hours = time_seq,
  Dailyrythm_cos = Dailyrythm_cos,
  Dailyrythm_sin = Dailyrythm_sin
)

# 5. Set all other variables to mean levels
pred_grid <- expand.grid(
  Taxon = levels(model.frame(binmod)$Taxon),
  Temperature = temp_seq,
  Wind_Speed = mean(data_bin$Wind_Speed, na.rm = TRUE),
  Cloud_cover = mean(data_bin$Cloud_cover, na.rm = TRUE),
  Dewpoint = mean(data_bin$Dewpoint, na.rm = TRUE)
)

pred_grid <- merge(pred_grid, diel_df, all = TRUE)

# 5. Predict 
pred_link <- predict(
  binmod,
  newdata = pred_grid,
  type = "link",
  se.fit = TRUE,
  re.form = NA
)

pred_grid$fit_link <- pred_link$fit
pred_grid$se_link  <- pred_link$se.fit

pred_grid$lower_link <- pred_grid$fit_link - 1.96 * pred_grid$se_link
pred_grid$upper_link <- pred_grid$fit_link + 1.96 * pred_grid$se_link

# Back-transform from logit to probability
inv_logit <- function(x) exp(x) / (1 + exp(x))

pred_grid$pred  <- inv_logit(pred_grid$fit_link)
pred_grid$lower <- inv_logit(pred_grid$lower_link)
pred_grid$upper <- inv_logit(pred_grid$upper_link)

#Sort temepratures in falling order
pred_grid$Temperature_f <- factor(
  pred_grid$Temperature,
  levels = sort(unique(pred_grid$Temperature))
)

# 6. Plot
presencepredict <- ggplot(pred_grid, 
            aes(x = hours, y = pred, color = Temperature_f)) +
  geom_ribbon(aes(ymin = lower, ymax = upper, fill = Temperature_f),
              alpha = 0.2, color = NA, show.legend = FALSE) +
geom_line(linewidth = 1.5) +       # deterministic line 
  facet_wrap(~ Taxon, scales = "free_y") +
  scale_color_manual(values = c("cornflowerblue", "goldenrod", "firebrick"),
                     labels = c("12.0", "15.4", "18.7")) +
  scale_fill_manual(values = c("cornflowerblue", "goldenrod", "firebrick"),
                    labels = c("12.0", "15.4", "18.7")) +
  labs(x = "Hour of day",
       y = "Predicted presence",
       color = "Temperature (°C)") +
  scale_x_continuous(limits = c(0, 24), breaks = seq(0, 24, by = 3)) +
  theme_minimal() +
  guides(color = guide_legend(reverse = TRUE)) +
  theme(
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14),
    strip.text = element_text(size = 14),
    legend.text = element_text(size = 12),      # legend item labels
    legend.title = element_text(size = 14),     # legend heading
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    axis.line.x = element_line(color = "black"),
    axis.line = element_line(color = "black")
  )

7.2 Fit environmental abundance models (Fig. 5 & 6)

#filter abundance conditional on presence
data_abu <- data %>%
  filter(Abundance > 0)

#Divide model by taxonomic order

run_order_model <- function(order_name, data_abu, temp_seq, diel_df) {

mod <- glmmTMB(
    Abundance ~
      Dailyrythm_sin * Taxon +
      Dailyrythm_cos * Taxon +
      Temperature * Taxon +
      Wind_Speed +
      Cloud_cover +
      Dewpoint +
      (1|Date),
    family = nbinom2(link = "log"),
    data = subset(data_abu, Order == order_name)
  )
  
  # 2. Diagnostics 
  sim <- simulateResiduals(mod)
  print(plotQQunif(sim))
  print(plotResiduals(sim))
  print(testDispersion(sim, plot = TRUE))
  print(testZeroInflation(sim, plot = TRUE))
  
  # 3. ANOVA + summary 
  model_anova <- Anova(mod, type = "III")
  model_summary <- summary(mod)
  
  # 4. Predicted effect of environmental variables (excluding temperature)
  
  eff_cloud <- ggpredict(mod, terms = "Cloud_cover")
  
  plot_cloud <- ggplot(eff_cloud, aes(x = x, y = predicted)) +
    geom_line(linewidth = 0.8, color = "firebrick") +
    geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
                fill = "red", alpha = 0.3) +
    theme_minimal() +
    ylab("Predicted abundance") +
    xlab("Cloud cover") +
    ggtitle(order_name) +
    theme(
      axis.text = element_text(size = 12),
      axis.title = element_text(size = 14),
      title = element_text(size = 14),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_blank(),
      axis.line = element_line(color = "black")
    )

  eff_wind <- ggpredict(mod, terms = "Wind_Speed")
  
  plot_wind <- ggplot(eff_wind, aes(x = x, y = predicted)) +
    geom_line(linewidth = 0.8, color = "firebrick") +
    geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
                fill = "red", alpha = 0.3) +
    theme_minimal() +
    ylab("Predicted abundance") +
    xlab("Wind speed") +
    ggtitle(order_name) +
    theme(
      axis.text = element_text(size = 12),
      axis.title = element_text(size = 14),
      title = element_text(size = 14),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_blank(),
      axis.line = element_line(color = "black")
    )
  
    eff_dew <- ggpredict(mod, terms = "Dewpoint")
  
  plot_dew <- ggplot(eff_dew, aes(x = x, y = predicted)) +
    geom_line(linewidth = 0.8, color = "firebrick") +
    geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
                fill = "red", alpha = 0.3) +
    theme_minimal() +
    ylab("Predicted abundance") +
    xlab("Dew point") +
    ggtitle(order_name) +
    theme(
      axis.text = element_text(size = 12),
      axis.title = element_text(size = 14),
      title = element_text(size = 14),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_blank(),
      axis.line = element_line(color = "black")
    )
  
  #Visualize interaction between temperature and taxon, and taxon and diel rhythm. See earlier model on presence for step-by-step explanations.
  
  pred_grid <- expand.grid(
  Taxon = levels(model.frame(mod)$Taxon),
  Temperature = temp_seq,
  Wind_Speed = mean(data_abu$Wind_Speed, na.rm = TRUE),
  Cloud_cover = mean(data_abu$Cloud_cover, na.rm = TRUE),
  Dewpoint = mean(data_abu$Dewpoint, na.rm = TRUE)
)

pred_grid <- merge(pred_grid, diel_df, all = TRUE)

pred_link <- predict(
  mod,
  newdata = pred_grid,
  type = "link",
  se.fit=TRUE,
  re.form = NA
)

pred_grid$fit <- pred_link$fit
pred_grid$lower_link <- pred_link$fit - 1.96 * pred_link$se.fit
pred_grid$upper_link <- pred_link$fit + 1.96 * pred_link$se.fit

# Back-transform to response scale
pred_grid$lower <- exp(pred_grid$lower_link)
pred_grid$upper <- exp(pred_grid$upper_link)
pred_grid$pred  <- exp(pred_grid$fit)

pred_grid$Temperature_f <- factor(
  pred_grid$Temperature,
  levels = sort(unique(pred_grid$Temperature))
)

abundancepredict <- ggplot(pred_grid, aes(x = hours, y = pred, color = factor(Temperature_f))) +
  geom_ribbon(aes(ymin = lower, ymax = upper, fill = factor(Temperature_f)),
              alpha = 0.2, color = NA, show.legend = FALSE) +
  geom_line(aes(group = Temperature_f), linewidth = 1.5) +
  facet_wrap(~ Taxon, scales = "free_y", nrow = 5, ncol = 4) +
  scale_color_manual(values = c("cornflowerblue", "goldenrod", "firebrick"),
                     labels = c("12.0", "15.4", "18.7")) +
  scale_fill_manual(values = c("cornflowerblue", "goldenrod", "firebrick"),
                    labels = c("12.0", "15.4", "18.7")) +
  labs(x = "Hour of day",
       y = "Predicted abundance",
       color = "Temperature (°C)") +
  scale_x_continuous(limits = c(0, 24), breaks = seq(0, 24, by = 3)) +
  theme_minimal() +
  guides(color = guide_legend(reverse = TRUE)) +
  theme(
    axis.text = element_text(size = 12),
    axis.title.y = element_text(size = 18),
    axis.title.x = element_text(size = 18),
    strip.text = element_text(size = 14),
    legend.text = element_text(size = 16),     
    legend.title = element_text(size = 18),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    axis.line.x = element_line(color = "black"),
    axis.line = element_line(color = "black"))

return(list(
  model = mod,
  anova = model_anova,
  summary = model_summary,
  eff_cloud = eff_cloud,
  eff_wind = eff_wind,
  eff_dew = eff_dew,
  plot_cloud = plot_cloud,
  plot_wind = plot_wind,
  plot_dew = plot_dew,
  abundancepredict = abundancepredict
))
}
orders_to_run <- c("Diptera", "Hymenoptera", "Other")

results <- lapply(orders_to_run, run_order_model, 
                  data_abu = data_abu, temp_seq = temp_seq, diel_df = diel_df)

## $rect
## $rect$w
## [1] 0.2221771
## 
## $rect$h
## [1] 0.1051948
## 
## $rect$left
## [1] 0.3889115
## 
## $rect$top
## [1] 0.5525974
## 
## 
## $text
## $text$x
## [1] 0.419023 0.419023
## 
## $text$y
## [1] 0.5175325 0.4824675

## 
##  Test for location of quantiles via qgam
## 
## data:  simulationOutput
## p-value = 0.0027
## alternative hypothesis: both

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.0989, p-value = 0.608
## alternative hypothesis: two.sided

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value < 2.2e-16
## alternative hypothesis: two.sided

## $rect
## $rect$w
## [1] 0.2221771
## 
## $rect$h
## [1] 0.1051948
## 
## $rect$left
## [1] 0.3889115
## 
## $rect$top
## [1] 0.5525974
## 
## 
## $text
## $text$x
## [1] 0.419023 0.419023
## 
## $text$y
## [1] 0.5175325 0.4824675

## 
##  Test for location of quantiles via qgam
## 
## data:  simulationOutput
## p-value = 2.244e-05
## alternative hypothesis: both

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1218, p-value = 0.536
## alternative hypothesis: two.sided

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value < 2.2e-16
## alternative hypothesis: two.sided

## $rect
## $rect$w
## [1] 0.2221771
## 
## $rect$h
## [1] 0.1051948
## 
## $rect$left
## [1] 0.3889115
## 
## $rect$top
## [1] 0.5525974
## 
## 
## $text
## $text$x
## [1] 0.419023 0.419023
## 
## $text$y
## [1] 0.5175325 0.4824675

## 
##  Test for location of quantiles via qgam
## 
## data:  simulationOutput
## p-value = 0.0001562
## alternative hypothesis: both

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.5475, p-value = 0.104
## alternative hypothesis: two.sided

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value < 2.2e-16
## alternative hypothesis: two.sided
names(results) <- orders_to_run

plot_names <- c("plot_cloud", "plot_wind", "plot_dew", "abundancepredict")

for (ord in names(results)) {
  cat("##", ord, "\n")  
  for (plt in plot_names) {
    print(results[[ord]][[plt]])
  }
}
## ## Diptera

## ## Hymenoptera

## ## Other

References:

## @Book{,
##   title = {An {R} Companion to Applied Regression},
##   edition = {Third},
##   author = {John Fox and Sanford Weisberg},
##   year = {2019},
##   publisher = {Sage},
##   address = {Thousand Oaks {CA}},
##   url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
## }
## @Article{,
##   title = {Welcome to the {tidyverse}},
##   author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
##   year = {2019},
##   journal = {Journal of Open Source Software},
##   volume = {4},
##   number = {43},
##   pages = {1686},
##   doi = {10.21105/joss.01686},
## }
## @Book{,
##   author = {Hadley Wickham},
##   title = {ggplot2: Elegant Graphics for Data Analysis},
##   publisher = {Springer-Verlag New York},
##   year = {2016},
##   isbn = {978-3-319-24277-4},
##   url = {https://ggplot2.tidyverse.org},
## }
## @Manual{,
##   title = {here: A Simpler Way to Find Your Files},
##   author = {Kirill Müller},
##   year = {2020},
##   note = {R package version 1.0.1},
##   url = {https://CRAN.R-project.org/package=here},
## }
## @Article{,
##   title = {Complex heatmaps reveal patterns and correlations in multidimensional genomic data},
##   author = {Zuguang Gu and Roland Eils and Matthias Schlesner},
##   journal = {Bioinformatics},
##   doi = {10.1093/bioinformatics/btw313},
##   year = {2016},
## }
## 
## @Article{,
##   title = {Complex Heatmap Visualization},
##   author = {Zuguang Gu},
##   doi = {10.1002/imt2.43},
##   journal = {iMeta},
##   year = {2022},
## }
## @Manual{,
##   title = {{viridis(Lite)} - Colorblind-Friendly Color Maps for R},
##   author = {{Garnier} and {Simon} and {Ross} and {Noam} and {Rudis} and {Robert} and {Camargo} and Antônio Pedro and {Sciaini} and {Marco} and {Scherer} and {Cédric}},
##   year = {2023},
##   note = {viridisLite package version 0.4.2},
##   url = {https://sjmgarnier.github.io/viridis/},
##   doi = {10.5281/zenodo.4678327},
## }
## @Manual{,
##   title = {vegan: Community Ecology Package},
##   author = {Jari Oksanen and Gavin L. Simpson and F. Guillaume Blanchet and Roeland Kindt and Pierre Legendre and Peter R. Minchin and R.B. O'Hara and Peter Solymos and M. Henry H. Stevens and Eduard Szoecs and Helene Wagner and Matt Barbour and Michael Bedward and Ben Bolker and Daniel Borcard and Gustavo Carvalho and Michael Chirico and Miquel {De Caceres} and Sebastien Durand and Heloisa Beatriz Antoniazi Evangelista and Rich FitzJohn and Michael Friendly and Brendan Furneaux and Geoffrey Hannigan and Mark O. Hill and Leo Lahti and Dan McGlinn and Marie-Helene Ouellette and Eduardo {Ribeiro Cunha} and Tyler Smith and Adrian Stier and Cajo J.F. {Ter Braak} and James Weedon},
##   year = {2024},
##   note = {R package version 2.6-6.1},
##   url = {https://CRAN.R-project.org/package=vegan},
## }
## @Manual{,
##   title = {pairwiseAdonis: Pairwise Multilevel Comparison using Adonis},
##   author = {Pedro {Martinez Arbizu}},
##   year = {2017},
##   note = {R package version 0.4.1, commit cb190f7668a0c82c0b0853927db239e7b9ec3e83},
##   url = {https://github.com/pmartinezarbizu/pairwiseAdonis},
## }
## @Book{,
##   title = {Modern Applied Statistics with S},
##   author = {W. N. Venables and B. D. Ripley},
##   publisher = {Springer},
##   edition = {Fourth},
##   address = {New York},
##   year = {2002},
##   note = {ISBN 0-387-95457-0},
##   url = {https://www.stats.ox.ac.uk/pub/MASS4/},
## }
## @Article{,
##   title = {ggeffects: Tidy Data Frames of Marginal Effects from Regression Models.},
##   volume = {3},
##   doi = {10.21105/joss.00772},
##   number = {26},
##   journal = {Journal of Open Source Software},
##   author = {Daniel Lüdecke},
##   year = {2018},
##   pages = {772},
## }
## @Manual{,
##   title = {patchwork: The Composer of Plots},
##   author = {Thomas Lin Pedersen},
##   year = {2024},
##   note = {R package version 1.3.0},
##   url = {https://CRAN.R-project.org/package=patchwork},
## }
## @Article{,
##   title = {{performance}: An {R} Package for Assessment, Comparison and Testing of Statistical Models},
##   author = {Daniel Lüdecke and Mattan S. Ben-Shachar and Indrajeet Patil and Philip Waggoner and Dominique Makowski},
##   year = {2021},
##   journal = {Journal of Open Source Software},
##   volume = {6},
##   number = {60},
##   pages = {3139},
##   doi = {10.21105/joss.03139},
## }
## @Manual{jtools,
##   title = {jtools: Analysis and Presentation of Social Scientific Data},
##   author = {Jacob A. Long},
##   year = {2022},
##   note = {R package version 2.2.0},
##   url = {https://cran.r-project.org/package=jtools},
## }
## @Book{,
##   title = {An {R} Companion to Applied Regression},
##   edition = {Third},
##   author = {John Fox and Sanford Weisberg},
##   year = {2019},
##   publisher = {Sage},
##   address = {Thousand Oaks {CA}},
##   url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
## }
## @Manual{,
##   title = {DHARMa: Residual Diagnostics for Hierarchical (Multi-Level / Mixed)
## Regression Models},
##   author = {Florian Hartig},
##   year = {2022},
##   note = {R package version 0.4.6},
##   url = {https://CRAN.R-project.org/package=DHARMa},
## }
## @Article{,
##   author = {Mollie E. Brooks and Kasper Kristensen and Koen J. {van Benthem} and Arni Magnusson and Casper W. Berg and Anders Nielsen and Hans J. Skaug and Martin Maechler and Benjamin M. Bolker},
##   title = {{glmmTMB} Balances Speed and Flexibility Among Packages for Zero-inflated Generalized Linear Mixed Modeling},
##   year = {2017},
##   journal = {The R Journal},
##   doi = {10.32614/RJ-2017-066},
##   pages = {378--400},
##   volume = {9},
##   number = {2},
## }