Note: All packages used in the Rmarkdown script are automatically downloaded if needed, and unpacked, in the first (hidden) code chunk.
SpeciesMatrix <- read.csv(here("Data", "SpeciesMatrix.csv"),
sep=";", header=TRUE, as.is=TRUE)
SpeciesMatrix <- SpeciesMatrix %>%
select(-Trap_Count, -Month) %>% # Explicitly remove Month
mutate(across(c("Harbor", "Blend", "Year"), as.factor)) %>%
group_by(Harbor, Blend, Year) %>%
summarise(across(where(is.numeric), sum), .groups = "drop") # Sum numeric columns
# Create a new Blend level that sums all Blend levels
all_blends <- SpeciesMatrix %>%
ungroup() %>% # Ensure no lingering groupings
group_by(Year, Harbor) %>% # Group by all factors except "Blend"
summarise(across(where(is.numeric), sum), .groups = "drop") %>% # Sum species counts
mutate(Blend = "All") # Add new Blend level
# Append the new "All" blend rows back to the dataset
SpeciesMatrixAll <- SpeciesMatrix %>%
ungroup() %>% # Ensure it's not grouped before binding
bind_rows(all_blends)
# Turn into presence absence
presence_absence_matrix <- SpeciesMatrixAll %>%
mutate(across(-c(Blend, Harbor, Year), ~ ifelse(. > 0, 1, 0)))
# Combine sampling from every two consecutive years
combine_year_pairs <- function(presence_absence_matrix) {
presence_absence_matrix %>%
arrange(Harbor, Blend, Year) %>% # Ensure chronological order
group_by(Harbor, Blend) %>%
mutate(YearPair = paste(Year, lead(Year), sep = "-")) %>% # Create Year Pairs
mutate(across(where(is.numeric) & !all_of("Year"), ~ . + lead(.), .names = "paired_{.col}")) %>% # Sum species findings
select(Harbor, Blend, YearPair, starts_with("paired_")) %>% # Keep only relevant columns
rename_with(~ gsub("^paired_", "", .x)) # Remove prefix from column names
}
# Apply function
year_pairs_data <- combine_year_pairs(presence_absence_matrix)
#Make data binary (0/1)
year_pairs_data <- year_pairs_data %>%
mutate(across(-c(YearPair), ~ ifelse(. > 0, 1, 0)))
# Remove rows where YearPair is "2023-NA"
year_pairs_data <- year_pairs_data %>%
filter(YearPair != "2023-NA")
# Repeat for every three consecutive years
combine_three_years <- function(presence_absence_matrix) {
presence_absence_matrix %>%
arrange(Harbor, Blend, Year) %>% # Ensure chronological order
group_by(Harbor, Blend) %>%
mutate(YearTriplet = paste(Year, lead(Year, 1), lead(Year, 2), sep = "-")) %>% # Create three-year groups
mutate(across(where(is.numeric) & !all_of("Year"), ~ . + lead(., 1) + lead(., 2), .names = "triplet_{.col}")) %>% # Sum over three years
select(Harbor, Blend, YearTriplet, starts_with("triplet_")) %>% # Keep only relevant columns
rename_with(~ gsub("^triplet_", "", .x)) %>% # Remove prefix from column names
filter(!grepl("NA", YearTriplet)) # Remove incomplete last groups
}
# Apply function
three_years_data <- combine_three_years(presence_absence_matrix)
three_years_data <- three_years_data %>%
mutate(across(-c(YearTriplet), ~ ifelse(. > 0, 1, 0)))
total_species_data <- bind_rows(
presence_absence_matrix, year_pairs_data, three_years_data
)
species_tree_host <- read.csv(here("Data", "TreeHost.csv"),
sep=";", header=TRUE, as.is=TRUE)
# Combine with species data
total_species_host <- total_species_data %>%
pivot_longer(cols = -c(Harbor, Year, Blend, YearPair, YearTriplet), names_to = "Species", values_to = "Presence") %>%
left_join(species_tree_host, by = "Species") # Join tree host info with species names
# Add Year Category based on which year column is populated
total_species_host <- total_species_host %>%
mutate(YearCategory = case_when(
!is.na(YearTriplet) ~ "Three years", # If YearTriplet is populated, assign "Three"
!is.na(YearPair) ~ "Two years", # If YearPair is populated, assign "Two"
!is.na(Year) ~ "One year", # If Year is populated, assign "One"
TRUE ~ NA_character_ # In case none are populated (though shouldn't happen)
))
total_species_host
## # A tibble: 25,920 × 9
## Harbor Blend Year YearPair YearTriplet Species Presence TreeHost
## <fct> <chr> <fct> <chr> <chr> <chr> <dbl> <chr>
## 1 Gothenburg Ips 2017 <NA> <NA> Acanthocinus_a… 0 Con
## 2 Gothenburg Ips 2017 <NA> <NA> Acanthocinus_g… 0 Con
## 3 Gothenburg Ips 2017 <NA> <NA> Agrilus_angust… 0 Dec
## 4 Gothenburg Ips 2017 <NA> <NA> Agrilus_betule… 0 Dec
## 5 Gothenburg Ips 2017 <NA> <NA> Agrilus_bigutt… 0 Dec
## 6 Gothenburg Ips 2017 <NA> <NA> Agrilus_latico… 0 Dec
## 7 Gothenburg Ips 2017 <NA> <NA> Agrilus_sulcic… 0 Dec
## 8 Gothenburg Ips 2017 <NA> <NA> Alosterna_taba… 0 Con
## 9 Gothenburg Ips 2017 <NA> <NA> Anastrangalia_… 0 Con
## 10 Gothenburg Ips 2017 <NA> <NA> Anisandrus_dis… 0 Dec
## # ℹ 25,910 more rows
## # ℹ 1 more variable: YearCategory <chr>
total_community_long <- SpeciesMatrix %>%
select(-c(Year, Blend)) %>%
group_by(Harbor) %>%
summarise(across(everything(), ~ sum(. > 0, na.rm = TRUE))) %>%
pivot_longer(cols = -Harbor, names_to = "Species", values_to = "Presence") %>% # Convert to long format
mutate(Presence = ifelse(Presence > 0, 1, 0)) %>%
left_join(species_tree_host, by = "Species") # Join tree host info
# Divide all data into a dataset for coniferous species, and one for deciduous species
deciduous_community <- total_community_long %>%
filter(TreeHost == "Dec") %>%
select(-TreeHost)
coniferous_community <- total_community_long %>%
filter(TreeHost == "Con")%>%
select(-TreeHost)
deciduous_samples <- total_species_host %>%
filter(TreeHost == "Dec")%>%
select(-TreeHost)
coniferous_samples <- total_species_host %>%
filter(TreeHost == "Con") %>%
select(-TreeHost)
#Prepare and clean data
coniferous_community_wide <- coniferous_community %>%
pivot_wider(names_from = Species, values_from = Presence, values_fill = list(Presence = 0))
coniferous_samples_wide <- coniferous_samples %>%
pivot_wider(names_from = Species, values_from = Presence, values_fill = list(Presence = 0))
coniferous_samples_wide <- coniferous_samples_wide %>%
select(-c(Year, YearPair, YearTriplet))
# Ensure both datasets are properly aligned
species_columns <- setdiff(names(coniferous_samples_wide), c("Harbor", "YearCategory", "Blend"))
community_columns <- setdiff(names(coniferous_community_wide), c("Harbor"))
# Add missing species as columns (fill with 0)
missing_species_in_sample <- setdiff(community_columns, species_columns)
# Add missing species with zeros in species matrix
for (species in missing_species_in_sample) {
coniferous_samples_wide[[species]] <- 0
}
missing_species_in_community <- setdiff(species_columns, community_columns)
# Add missing species with zeros in community matrix
for (species in missing_species_in_community) {
coniferous_samples_wide[[species]] <- 0
}
# Merge species matrix with the community data by "Harbor"
merged_data_coniferous <- coniferous_samples_wide %>%
left_join(coniferous_community_wide, by = "Harbor", suffix = c("_sample", "_control"))
# Convert all non-zero values to 1
merged_data_coniferous <- merged_data_coniferous %>%
mutate(across(where(is.numeric), ~ ifelse(. > 0, 1, .)))
# Calculate the proportion of control species found
proportion_matrix_coniferous <- merged_data_coniferous %>%
rowwise() %>% # Count the exact number of species that overlap (both 1 in sample & control)
mutate(
Overlapping_Species = sum((c_across(ends_with("_sample")) == 1) &
(c_across(ends_with("_control")) == 1), na.rm = TRUE),
# Calculate proportion (ensuring integer division)
Proportion_Caught_con = Overlapping_Species / sum(c_across(ends_with("_control")) == 1, na.rm = TRUE)
) %>%
ungroup() %>%
select(Harbor, YearCategory, Blend, Overlapping_Species, Proportion_Caught_con)
# Make sure the 'Proportion_Caught' column is numeric for plotting
proportion_matrix_coniferous$Proportion_Caught_con <- as.numeric(proportion_matrix_coniferous$Proportion_Caught_con)
# Set YearCategory as a factor with specified levels
proportion_matrix_coniferous <- proportion_matrix_coniferous %>%
mutate(YearCategory = factor(YearCategory,
levels = c("One year",
"Two years",
"Three years")))
# Set Blend as a factor with specified levels
proportion_matrix_coniferous <- proportion_matrix_coniferous %>%
mutate(Blend = factor(Blend,
levels = c("Ips",
"Monochamus",
"Pissodes",
"All")))
#Calculate mean and SE
summary_stats_coniferous <- proportion_matrix_coniferous %>%
group_by(YearCategory, Blend, Harbor) %>%
summarise(
Mean_Proportion = mean(Proportion_Caught_con, na.rm = TRUE), # Mean of ProportionCaught
SE_Proportion = sd(Proportion_Caught_con, na.rm = TRUE) / sqrt(n()), # SE calculation
n = n() # Sample size
)
## # A tibble: 36 × 6
## # Groups: YearCategory, Blend [12]
## YearCategory Blend Harbor Mean_Proportion SE_Proportion n
## <fct> <fct> <fct> <dbl> <dbl> <int>
## 1 One year Ips Gothenburg 0.357 0.0222 6
## 2 One year Ips Monsteras 0.548 0.0357 6
## 3 One year Ips Norrkoping 0.454 0.0497 6
## 4 One year Monochamus Gothenburg 0.238 0.0275 6
## 5 One year Monochamus Monsteras 0.379 0.0278 6
## 6 One year Monochamus Norrkoping 0.340 0.0360 6
## 7 One year Pissodes Gothenburg 0.290 0.0225 6
## 8 One year Pissodes Monsteras 0.452 0.0565 6
## 9 One year Pissodes Norrkoping 0.336 0.0318 6
## 10 One year All Gothenburg 0.560 0.0306 6
## # ℹ 26 more rows
#Create harbor-specific datasets for plotting
gothenburg_data_con <- proportion_matrix_coniferous %>%
filter(Harbor == "Gothenburg")
monsteras_data_con <- proportion_matrix_coniferous %>%
filter(Harbor == "Monsteras")
norrkoping_data_con <- proportion_matrix_coniferous %>%
filter(Harbor == "Norrkoping")
gothenburg_con_plot <- ggplot(data = gothenburg_data_con,
aes(x = YearCategory, y = Proportion_Caught_con)) +
geom_boxplot() +
facet_wrap(~ Blend) +
theme_bw() +
scale_y_continuous(limits = c(0, 1.0)) +
theme(axis.title = element_text(size = 14), legend.title = element_text(size=14),
axis.text.x = element_text(size = 12, angle = 30, hjust = 1), legend.text = element_text(size = 12),
strip.text = element_text(size = 12),
axis.title.x = element_blank(),
plot.title = element_text(hjust = 0.5, size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 13),
legend.position = "none") +
labs(subtitle = "Gothenburg",
y = "Proportion sampled")
monsteras_con_plot <- ggplot(data = monsteras_data_con,
aes(x = YearCategory, y = Proportion_Caught_con)) +
geom_boxplot() +
facet_wrap(~ Blend) +
theme_bw() +
scale_y_continuous(limits = c(0, 1.0)) +
theme(axis.title = element_text(size = 14), legend.title = element_text(size=14),
axis.text.x = element_text(size = 12, angle = 30, hjust = 1), legend.text = element_text(size = 12),
strip.text = element_text(size = 12),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(hjust = 0.5, size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 13),
legend.position = "none") +
labs(subtitle = "Monsteras",
title = "Species with coniferous hosts")
norrkoping_con_plot <- ggplot(data = norrkoping_data_con,
aes(x = YearCategory, y = Proportion_Caught_con)) +
geom_boxplot() +
facet_wrap(~ Blend) +
theme_bw() +
scale_y_continuous(limits = c(0, 1.0)) +
theme(axis.title = element_text(size = 14), legend.title = element_text(size=14),
axis.text.x = element_text(size = 12, angle = 30, hjust = 1), legend.text = element_text(size = 12),
strip.text = element_text(size = 12),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
plot.title = element_text(hjust = 0.5, size = 16),
plot.subtitle = element_text(hjust = 0.5, size = 13),
legend.position = "none") +
labs(subtitle = "Norrkoping")
Coniferous_plots <- (gothenburg_con_plot +
monsteras_con_plot +
norrkoping_con_plot)
## # A tibble: 36 × 6
## # Groups: YearCategory, Blend [12]
## YearCategory Blend Harbor Mean_Proportion SE_Proportion n
## <fct> <fct> <fct> <dbl> <dbl> <int>
## 1 One year Ips Gothenburg 0.15 0.0645 6
## 2 One year Ips Monsteras 0.243 0.0197 6
## 3 One year Ips Norrkoping 0.138 0.0294 6
## 4 One year Monochamus Gothenburg 0.075 0.0403 6
## 5 One year Monochamus Monsteras 0.0946 0.0152 6
## 6 One year Monochamus Norrkoping 0.188 0.0464 6
## 7 One year Pissodes Gothenburg 0.117 0.0441 6
## 8 One year Pissodes Monsteras 0.158 0.0352 6
## 9 One year Pissodes Norrkoping 0.1 0.00645 6
## 10 One year All Gothenburg 0.258 0.0908 6
## # ℹ 26 more rows
## @Manual{,
## title = {here: A Simpler Way to Find Your Files},
## author = {Kirill Müller},
## year = {2020},
## note = {R package version 1.0.1},
## url = {https://CRAN.R-project.org/package=here},
## }
## @Article{,
## title = {Welcome to the {tidyverse}},
## author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
## year = {2019},
## journal = {Journal of Open Source Software},
## volume = {4},
## number = {43},
## pages = {1686},
## doi = {10.21105/joss.01686},
## }
## @Manual{,
## title = {patchwork: The Composer of Plots},
## author = {Thomas Lin Pedersen},
## year = {2024},
## note = {R package version 1.3.0},
## url = {https://CRAN.R-project.org/package=patchwork},
## }