This is the code for obtaining Figure 3 and Table 5 from the article The potential of semiochemical blends to monitor saproxylic beetle communities published in Journal of Insect Conservation

Note: All packages used in the Rmarkdown script are automatically downloaded if needed, and unpacked, in the first (hidden) code chunk.

Step 1: Load data from sampling events. In this example, the file is expected to be inside a folder named “Data” within the project directory. If your copy of the repository does not contain this folder, or if the file is stored elsewhere, you may need to adjust the path accordingly.

SpeciesMatrix <- read.csv(here("Data", "SpeciesMatrix.csv"), 
                         sep=";", header=TRUE, as.is=TRUE)

SpeciesMatrix <- SpeciesMatrix %>%
  select(-Trap_Count, -Month) %>%  # Explicitly remove Month
  mutate(across(c("Harbor", "Blend", "Year"), as.factor)) %>%
  group_by(Harbor, Blend, Year) %>%
  summarise(across(where(is.numeric), sum), .groups = "drop")  # Sum numeric columns

# Create a new Blend level that sums all Blend levels
all_blends <- SpeciesMatrix %>%
  ungroup() %>%  # Ensure no lingering groupings
  group_by(Year, Harbor) %>%  # Group by all factors except "Blend"
  summarise(across(where(is.numeric), sum), .groups = "drop") %>%  # Sum species counts
  mutate(Blend = "All")  # Add new Blend level

# Append the new "All" blend rows back to the dataset
SpeciesMatrixAll <- SpeciesMatrix %>%
  ungroup() %>%  # Ensure it's not grouped before binding
  bind_rows(all_blends)

# Turn into presence absence
presence_absence_matrix <- SpeciesMatrixAll %>%
  mutate(across(-c(Blend, Harbor, Year), ~ ifelse(. > 0, 1, 0)))  

# Combine sampling from every two consecutive years
combine_year_pairs <- function(presence_absence_matrix) {
  presence_absence_matrix %>%
    arrange(Harbor, Blend, Year) %>%  # Ensure chronological order
    group_by(Harbor, Blend) %>% 
    mutate(YearPair = paste(Year, lead(Year), sep = "-")) %>%  # Create Year Pairs
    mutate(across(where(is.numeric) & !all_of("Year"), ~ . + lead(.), .names = "paired_{.col}")) %>%  # Sum species findings
    select(Harbor, Blend, YearPair, starts_with("paired_")) %>%  # Keep only relevant columns
    rename_with(~ gsub("^paired_", "", .x))  # Remove prefix from column names
}

# Apply function
year_pairs_data <- combine_year_pairs(presence_absence_matrix)

#Make data binary (0/1)
year_pairs_data <- year_pairs_data %>%
  mutate(across(-c(YearPair), ~ ifelse(. > 0, 1, 0)))

# Remove rows where YearPair is "2023-NA"
year_pairs_data <- year_pairs_data %>%
  filter(YearPair != "2023-NA")

# Repeat for every three consecutive years
combine_three_years <- function(presence_absence_matrix) {
  presence_absence_matrix %>%
    arrange(Harbor, Blend, Year) %>%  # Ensure chronological order
    group_by(Harbor, Blend) %>%
    mutate(YearTriplet = paste(Year, lead(Year, 1), lead(Year, 2), sep = "-")) %>%  # Create three-year groups
    mutate(across(where(is.numeric) & !all_of("Year"), ~ . + lead(., 1) + lead(., 2), .names = "triplet_{.col}")) %>%  # Sum over three years
    select(Harbor, Blend, YearTriplet, starts_with("triplet_")) %>%  # Keep only relevant columns
    rename_with(~ gsub("^triplet_", "", .x)) %>%  # Remove prefix from column names
    filter(!grepl("NA", YearTriplet))  # Remove incomplete last groups
}

# Apply function
three_years_data <- combine_three_years(presence_absence_matrix)

three_years_data <- three_years_data %>%
  mutate(across(-c(YearTriplet), ~ ifelse(. > 0, 1, 0)))

total_species_data <- bind_rows(
  presence_absence_matrix, year_pairs_data, three_years_data
)

Step 2: Combine the species data with data on tree host information

species_tree_host <- read.csv(here("Data", "TreeHost.csv"), 
                              sep=";", header=TRUE, as.is=TRUE)

# Combine with species data

total_species_host <- total_species_data %>%
  pivot_longer(cols = -c(Harbor, Year, Blend, YearPair, YearTriplet), names_to = "Species", values_to = "Presence") %>%
  left_join(species_tree_host, by = "Species")  # Join tree host info with species names

# Add Year Category based on which year column is populated
total_species_host <- total_species_host %>%
  mutate(YearCategory = case_when(
    !is.na(YearTriplet) ~ "Three years",    # If YearTriplet is populated, assign "Three"
    !is.na(YearPair) ~ "Two years",         # If YearPair is populated, assign "Two"
    !is.na(Year) ~ "One year",             # If Year is populated, assign "One"
    TRUE ~ NA_character_              # In case none are populated (though shouldn't happen)
  ))

total_species_host
## # A tibble: 25,920 × 9
##    Harbor     Blend Year  YearPair YearTriplet Species         Presence TreeHost
##    <fct>      <chr> <fct> <chr>    <chr>       <chr>              <dbl> <chr>   
##  1 Gothenburg Ips   2017  <NA>     <NA>        Acanthocinus_a…        0 Con     
##  2 Gothenburg Ips   2017  <NA>     <NA>        Acanthocinus_g…        0 Con     
##  3 Gothenburg Ips   2017  <NA>     <NA>        Agrilus_angust…        0 Dec     
##  4 Gothenburg Ips   2017  <NA>     <NA>        Agrilus_betule…        0 Dec     
##  5 Gothenburg Ips   2017  <NA>     <NA>        Agrilus_bigutt…        0 Dec     
##  6 Gothenburg Ips   2017  <NA>     <NA>        Agrilus_latico…        0 Dec     
##  7 Gothenburg Ips   2017  <NA>     <NA>        Agrilus_sulcic…        0 Dec     
##  8 Gothenburg Ips   2017  <NA>     <NA>        Alosterna_taba…        0 Con     
##  9 Gothenburg Ips   2017  <NA>     <NA>        Anastrangalia_…        0 Con     
## 10 Gothenburg Ips   2017  <NA>     <NA>        Anisandrus_dis…        0 Dec     
## # ℹ 25,910 more rows
## # ℹ 1 more variable: YearCategory <chr>

Step 3: Create a dataset with species presence absence per harbor across all blends and years, using the original Speciesmatrix.

total_community_long <- SpeciesMatrix %>%
  select(-c(Year, Blend)) %>% 
  group_by(Harbor) %>%
  summarise(across(everything(), ~ sum(. > 0, na.rm = TRUE))) %>%  
  pivot_longer(cols = -Harbor, names_to = "Species", values_to = "Presence") %>%  # Convert to long format
  mutate(Presence = ifelse(Presence > 0, 1, 0)) %>%  
  left_join(species_tree_host, by = "Species")  # Join tree host info


# Divide all data into a dataset for coniferous species, and one for deciduous species

deciduous_community <- total_community_long %>%
  filter(TreeHost == "Dec") %>%
  select(-TreeHost)

coniferous_community <- total_community_long %>%
  filter(TreeHost == "Con")%>%
  select(-TreeHost)

deciduous_samples <- total_species_host %>%
  filter(TreeHost == "Dec")%>%
  select(-TreeHost)

coniferous_samples <- total_species_host %>%
  filter(TreeHost == "Con") %>%
  select(-TreeHost)

Step 4: Create two datasets with proportion of community sampled for each harbor:blend:year combination. One for species with deciduous tree hosts, one for species with coniferous tree hosts. Here the code for the coniferous dataset is shown

#Prepare and clean data
coniferous_community_wide <- coniferous_community %>%
  pivot_wider(names_from = Species, values_from = Presence, values_fill = list(Presence = 0))

coniferous_samples_wide <- coniferous_samples %>%
  pivot_wider(names_from = Species, values_from = Presence, values_fill = list(Presence = 0))

coniferous_samples_wide <- coniferous_samples_wide %>%
  select(-c(Year, YearPair, YearTriplet))

# Ensure both datasets are properly aligned
species_columns <- setdiff(names(coniferous_samples_wide), c("Harbor", "YearCategory", "Blend"))

community_columns <- setdiff(names(coniferous_community_wide), c("Harbor"))

# Add missing species as columns (fill with 0)
missing_species_in_sample <- setdiff(community_columns, species_columns)

# Add missing species with zeros in species matrix
for (species in missing_species_in_sample) {
  coniferous_samples_wide[[species]] <- 0
}

missing_species_in_community <- setdiff(species_columns, community_columns)

# Add missing species with zeros in community matrix
for (species in missing_species_in_community) {
  coniferous_samples_wide[[species]] <- 0
}

# Merge species matrix with the community data by "Harbor"
merged_data_coniferous <- coniferous_samples_wide %>%
  left_join(coniferous_community_wide, by = "Harbor", suffix = c("_sample", "_control"))

# Convert all non-zero values to 1
merged_data_coniferous <- merged_data_coniferous %>%
  mutate(across(where(is.numeric), ~ ifelse(. > 0, 1, .)))

# Calculate the proportion of control species found
proportion_matrix_coniferous <- merged_data_coniferous %>%
  rowwise() %>%     # Count the exact number of species that overlap (both 1 in sample & control)
  mutate(
    Overlapping_Species = sum((c_across(ends_with("_sample")) == 1) & 
                              (c_across(ends_with("_control")) == 1), na.rm = TRUE),
    
    # Calculate proportion (ensuring integer division)
    Proportion_Caught_con = Overlapping_Species / sum(c_across(ends_with("_control")) == 1, na.rm = TRUE)
  ) %>%
  ungroup() %>%  
  select(Harbor, YearCategory, Blend, Overlapping_Species, Proportion_Caught_con)  


# Make sure the 'Proportion_Caught' column is numeric for plotting
proportion_matrix_coniferous$Proportion_Caught_con <- as.numeric(proportion_matrix_coniferous$Proportion_Caught_con)


# Set YearCategory as a factor with specified levels
proportion_matrix_coniferous <- proportion_matrix_coniferous %>%
  mutate(YearCategory = factor(YearCategory,
                               levels = c("One year",
                                          "Two years",
                                          "Three years")))

# Set Blend as a factor with specified levels
proportion_matrix_coniferous <- proportion_matrix_coniferous %>%
  mutate(Blend = factor(Blend,
                               levels = c("Ips",
                                          "Monochamus",
                                          "Pissodes",
                                          "All")))

Step 5: Calculate the mean proportion of total community caught per harbor-blend-number of years of sampling combination, along with standard errors. Here we conitnue with the dataset of species associated with coniferous wood

#Calculate mean and SE
summary_stats_coniferous <- proportion_matrix_coniferous %>%
  group_by(YearCategory, Blend, Harbor) %>%
  summarise(
    Mean_Proportion = mean(Proportion_Caught_con, na.rm = TRUE),  # Mean of ProportionCaught
    SE_Proportion = sd(Proportion_Caught_con, na.rm = TRUE) / sqrt(n()),  # SE calculation
    n = n()  # Sample size
  )
## # A tibble: 36 × 6
## # Groups:   YearCategory, Blend [12]
##    YearCategory Blend      Harbor     Mean_Proportion SE_Proportion     n
##    <fct>        <fct>      <fct>                <dbl>         <dbl> <int>
##  1 One year     Ips        Gothenburg           0.357        0.0222     6
##  2 One year     Ips        Monsteras            0.548        0.0357     6
##  3 One year     Ips        Norrkoping           0.454        0.0497     6
##  4 One year     Monochamus Gothenburg           0.238        0.0275     6
##  5 One year     Monochamus Monsteras            0.379        0.0278     6
##  6 One year     Monochamus Norrkoping           0.340        0.0360     6
##  7 One year     Pissodes   Gothenburg           0.290        0.0225     6
##  8 One year     Pissodes   Monsteras            0.452        0.0565     6
##  9 One year     Pissodes   Norrkoping           0.336        0.0318     6
## 10 One year     All        Gothenburg           0.560        0.0306     6
## # ℹ 26 more rows

Step 6: Make one plot per harbor. Repeat all code from step 4 for the dataset on deciduous species

#Create harbor-specific datasets for plotting
gothenburg_data_con <- proportion_matrix_coniferous %>%
  filter(Harbor == "Gothenburg")
monsteras_data_con <- proportion_matrix_coniferous %>%
  filter(Harbor == "Monsteras")
norrkoping_data_con <- proportion_matrix_coniferous %>%
  filter(Harbor == "Norrkoping")

gothenburg_con_plot <- ggplot(data = gothenburg_data_con, 
                          aes(x = YearCategory, y = Proportion_Caught_con)) +
  geom_boxplot() + 
  facet_wrap(~ Blend) +
  theme_bw() +
  scale_y_continuous(limits = c(0, 1.0)) +
  theme(axis.title = element_text(size = 14), legend.title = element_text(size=14),
        axis.text.x = element_text(size = 12, angle = 30, hjust = 1), legend.text = element_text(size = 12),
        strip.text = element_text(size = 12),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 16),
        plot.subtitle = element_text(hjust = 0.5, size = 13),
        legend.position = "none") +
  labs(subtitle = "Gothenburg",
       y = "Proportion sampled") 


monsteras_con_plot <- ggplot(data = monsteras_data_con, 
                              aes(x = YearCategory, y = Proportion_Caught_con)) +
  geom_boxplot() + 
  facet_wrap(~ Blend) +
  theme_bw() +
  scale_y_continuous(limits = c(0, 1.0)) +
  theme(axis.title = element_text(size = 14), legend.title = element_text(size=14),
        axis.text.x = element_text(size = 12, angle = 30, hjust = 1), legend.text = element_text(size = 12),
        strip.text = element_text(size = 12),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 16),
        plot.subtitle = element_text(hjust = 0.5, size = 13),
        legend.position = "none") +
  labs(subtitle = "Monsteras",
       title = "Species with coniferous hosts") 

norrkoping_con_plot <- ggplot(data = norrkoping_data_con, 
                             aes(x = YearCategory, y = Proportion_Caught_con)) +
  geom_boxplot() + 
  facet_wrap(~ Blend) +
  theme_bw() +
  scale_y_continuous(limits = c(0, 1.0)) +
  theme(axis.title = element_text(size = 14), legend.title = element_text(size=14),
        axis.text.x = element_text(size = 12, angle = 30, hjust = 1), legend.text = element_text(size = 12),
        strip.text = element_text(size = 12),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        plot.title = element_text(hjust = 0.5, size = 16),
        plot.subtitle = element_text(hjust = 0.5, size = 13),
        legend.position = "none") +
  labs(subtitle = "Norrkoping") 


Coniferous_plots <- (gothenburg_con_plot +
                       monsteras_con_plot +
                       norrkoping_con_plot)
## # A tibble: 36 × 6
## # Groups:   YearCategory, Blend [12]
##    YearCategory Blend      Harbor     Mean_Proportion SE_Proportion     n
##    <fct>        <fct>      <fct>                <dbl>         <dbl> <int>
##  1 One year     Ips        Gothenburg          0.15         0.0645      6
##  2 One year     Ips        Monsteras           0.243        0.0197      6
##  3 One year     Ips        Norrkoping          0.138        0.0294      6
##  4 One year     Monochamus Gothenburg          0.075        0.0403      6
##  5 One year     Monochamus Monsteras           0.0946       0.0152      6
##  6 One year     Monochamus Norrkoping          0.188        0.0464      6
##  7 One year     Pissodes   Gothenburg          0.117        0.0441      6
##  8 One year     Pissodes   Monsteras           0.158        0.0352      6
##  9 One year     Pissodes   Norrkoping          0.1          0.00645     6
## 10 One year     All        Gothenburg          0.258        0.0908      6
## # ℹ 26 more rows

Step 7: Combine the plots

References:

## @Manual{,
##   title = {here: A Simpler Way to Find Your Files},
##   author = {Kirill Müller},
##   year = {2020},
##   note = {R package version 1.0.1},
##   url = {https://CRAN.R-project.org/package=here},
## }
## @Article{,
##   title = {Welcome to the {tidyverse}},
##   author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
##   year = {2019},
##   journal = {Journal of Open Source Software},
##   volume = {4},
##   number = {43},
##   pages = {1686},
##   doi = {10.21105/joss.01686},
## }
## @Manual{,
##   title = {patchwork: The Composer of Plots},
##   author = {Thomas Lin Pedersen},
##   year = {2024},
##   note = {R package version 1.3.0},
##   url = {https://CRAN.R-project.org/package=patchwork},
## }