Chapter 5 Simple model - Squished toroid experiment analyses
5.1 Setup and Dependencies
library(tidyverse)
library(cowplot)
library(RColorBrewer)
library(khroma)
library(rstatix)
library(knitr)
library(kableExtra)
library(infer)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
# Check if Rmd is being compiled using bookdown
<- exists("bookdown_build") bookdown
<- "lattice-experiments"
experiment_slug <- paste(
working_directory "experiments",
"mabe2-exps",
experiment_slug,sep = "/"
)# Adjust working directory if being knitted for bookdown build.
if (bookdown) {
<- paste0(
working_directory
bookdown_wd_prefix,
working_directory
) }
# Configure our default graphing theme
theme_set(theme_cowplot())
# Create a directory to store plots
<- paste(
plot_dir
working_directory,"rmd_plots",
sep = "/"
)
dir.create(
plot_dir,showWarnings = FALSE
)
5.2 Max organism data analyses
<- 100000
max_generation <- paste(
max_org_data_path
working_directory,"data",
"combined_max_org_data.csv",
sep = "/"
)# Data file has time series
<- read_csv(max_org_data_path)
max_org_data_ts <- max_org_data_ts %>%
max_org_data_ts mutate(
landscape = as.factor(landscape),
structure = factor(
structure,levels = c(
"1_3600",
"2_1800",
"3_1200",
"4_900",
"15_240",
"30_120",
"60_60"
)
),%>%
) mutate(
valleys_crossed = case_when(
== "Valley crossing" ~ round(log(fitness, base = 1.5)),
landscape .default = 0
)
)# Get tibble with just final generation
<- max_org_data_ts %>%
max_org_data filter(generation == max_generation)
Check that replicate count for each condition matches expectations.
<- max_org_data %>%
run_summary group_by(landscape, structure) %>%
summarize(
n = n()
)print(run_summary, n = 30)
## # A tibble: 21 × 3
## # Groups: landscape [3]
## landscape structure n
## <fct> <fct> <int>
## 1 Multipath 1_3600 50
## 2 Multipath 2_1800 50
## 3 Multipath 3_1200 50
## 4 Multipath 4_900 50
## 5 Multipath 15_240 50
## 6 Multipath 30_120 50
## 7 Multipath 60_60 50
## 8 Single gradient 1_3600 50
## 9 Single gradient 2_1800 50
## 10 Single gradient 3_1200 50
## 11 Single gradient 4_900 50
## 12 Single gradient 15_240 50
## 13 Single gradient 30_120 50
## 14 Single gradient 60_60 50
## 15 Valley crossing 1_3600 50
## 16 Valley crossing 2_1800 50
## 17 Valley crossing 3_1200 50
## 18 Valley crossing 4_900 50
## 19 Valley crossing 15_240 50
## 20 Valley crossing 30_120 50
## 21 Valley crossing 60_60 50
5.2.1 Fitness in smooth gradient landscape
<- ggplot(
single_gradient_final_fitness_plt data = filter(max_org_data, landscape == "Single gradient"),
mapping = aes(
x = structure,
y = fitness,
fill = structure
)+
) geom_flat_violin(
position = position_nudge(x = .2, y = 0),
alpha = .8
+
) geom_point(
mapping = aes(color = structure),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
+
) geom_boxplot(
width = .1,
outlier.shape = NA,
alpha = 0.5
+
) theme(
legend.position = "none",
axis.text.x = element_text(
angle = 30,
hjust = 1
)
)
ggsave(
filename = paste0(plot_dir, "/single_gradient_final_fitness.pdf"),
plot = single_gradient_final_fitness_plt,
width = 15,
height = 10
)
single_gradient_final_fitness_plt
Max fitness over time
<- ggplot(
single_gradient_fitness_ts_plt data = filter(max_org_data_ts, landscape == "Single gradient"),
mapping = aes(
x = generation,
y = fitness,
color = structure,
fill = structure
)+
) stat_summary(fun = "mean", geom = "line") +
stat_summary(
fun.data = "mean_cl_boot",
fun.args = list(conf.int = 0.95),
geom = "ribbon",
alpha = 0.2,
linetype = 0
+
) theme(legend.position = "bottom")
ggsave(
plot = single_gradient_fitness_ts_plt,
filename = paste0(
plot_dir,"/single_gradient_fitness_ts.pdf"
),width = 15,
height = 10
)
single_gradient_fitness_ts_plt
Time to maximum fitness
# Find all rows with maximum fitness value, then get row with minimum generation,
# then project out expected generation to max (for runs that didn't finish)
= 50
max_possible_fit <- max_org_data_ts %>%
time_to_max_single_gradient filter(landscape == "Single gradient") %>%
group_by(rep, structure) %>%
slice_max(
fitness,n = 1
%>%
) slice_min(
generation,n = 1
%>%
) mutate(
proj_gen_max = (max_possible_fit / fitness) * generation
)
<- ggplot(
single_gradient_gen_max_proj_plt data = time_to_max_single_gradient,
mapping = aes(
x = structure,
y = proj_gen_max,
fill = structure
)+
) geom_flat_violin(
position = position_nudge(x = .2, y = 0),
alpha = .8
+
) geom_point(
mapping = aes(color = structure),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
+
) geom_boxplot(
width = .1,
outlier.shape = NA,
alpha = 0.5
+
) scale_y_log10(
guide = "axis_logticks"
+
) # scale_y_continuous(
# trans="pseudo_log",
# breaks = c(10, 100, 1000, 10000, 100000, 1000000)
# ,limits = c(10, 100, 1000, 10000, 100000, 1000000)
# ) +
geom_hline(
yintercept = max_generation,
linetype = "dashed"
+
) theme(
legend.position = "none",
axis.text.x = element_text(
angle = 30,
hjust = 1
)
)
ggsave(
filename = paste0(plot_dir, "/single_gradient_gen_max_proj.pdf"),
plot = single_gradient_gen_max_proj_plt,
width = 15,
height = 10
)
single_gradient_gen_max_proj_plt
Rank ordering of time to max fitness values
%>%
time_to_max_single_gradient group_by(structure) %>%
summarize(
reps = n(),
median_proj_gen = median(proj_gen_max),
mean_proj_gen = mean(proj_gen_max)
%>%
) arrange(
mean_proj_gen )
## # A tibble: 7 × 4
## structure reps median_proj_gen mean_proj_gen
## <fct> <int> <dbl> <dbl>
## 1 60_60 50 28000 27540
## 2 30_120 50 28000 27880
## 3 15_240 50 30000 30160
## 4 4_900 50 42000 42020
## 5 3_1200 50 47000 46900
## 6 2_1800 50 53000 53340
## 7 1_3600 50 69000 68700
kruskal.test(
formula = proj_gen_max ~ structure,
data = time_to_max_single_gradient
)
##
## Kruskal-Wallis rank sum test
##
## data: proj_gen_max by structure
## Kruskal-Wallis chi-squared = 341.17, df = 6, p-value < 2.2e-16
<- pairwise.wilcox.test(
wc_results x = time_to_max_single_gradient$proj_gen_max,
g = time_to_max_single_gradient$structure,
p.adjust.method = "holm",
exact = FALSE
)
<- kbl(wc_results$p.value) %>%
single_gradient_proj_gen_max_wc_table kable_styling()
save_kable(
single_gradient_proj_gen_max_wc_table,paste0(plot_dir, "/single_gradient_proj_gen_max_wc_table.pdf")
) single_gradient_proj_gen_max_wc_table
1_3600 | 2_1800 | 3_1200 | 4_900 | 15_240 | 30_120 | |
---|---|---|---|---|---|---|
2_1800 | 0 | NA | NA | NA | NA | NA |
3_1200 | 0 | 0 | NA | NA | NA | NA |
4_900 | 0 | 0 | 0 | NA | NA | NA |
15_240 | 0 | 0 | 0 | 0 | NA | NA |
30_120 | 0 | 0 | 0 | 0 | 0 | NA |
60_60 | 0 | 0 | 0 | 0 | 0 | 0.0001966 |
library(boot)
# Define sample mean function
<- function(x, d) {
samplemean return(mean(x[d]))
}
<- tibble(
summary_gen_to_max structure = character(),
proj_gen_max_mean = double(),
proj_gen_max_mean_ci_low = double(),
proj_gen_max_mean_ci_high = double()
)
<- levels(time_to_max_single_gradient$structure)
structures for (struct in structures) {
<- boot(
boot_result data = filter(
time_to_max_single_gradient,== struct
structure $proj_gen_max,
)statistic = samplemean,
R = 10000
)<- boot.ci(boot_result, conf = 0.99, type = "perc")
result_ci <- result_ci$t0
m <- result_ci$percent[4]
low <- result_ci$percent[5]
high
<- summary_gen_to_max %>%
summary_gen_to_max add_row(
structure = struct,
proj_gen_max_mean = m,
proj_gen_max_mean_ci_low = low,
proj_gen_max_mean_ci_high = high
)
}
<- median(
wm_median filter(time_to_max_single_gradient, structure == "well_mixed")$proj_gen_max
)
<- ggplot(
simple_time_to_max_plt data = summary_gen_to_max,
mapping = aes(
x = structure,
y = proj_gen_max_mean,
fill = structure,
color = structure
)+
) # geom_point() +
geom_col() +
geom_linerange(
aes(
ymin = proj_gen_max_mean_ci_low,
ymax = proj_gen_max_mean_ci_high
),color = "black",
linewidth = 0.75,
lineend = "round"
+
) # scale_y_log10(
# guide = "axis_logticks"
# ) +
geom_hline(
yintercept = max_generation,
linetype = "dashed"
+
) geom_hline(
yintercept = wm_median,
linetype = "dotted",
color = "orange"
+
) scale_color_discreterainbow() +
scale_fill_discreterainbow() +
coord_flip() +
theme(
legend.position = "none",
axis.text.x = element_text(
angle = 30,
hjust = 1
)
)
ggsave(
filename = paste0(plot_dir, "/simple_time_to_max.pdf"),
plot = simple_time_to_max_plt,
width = 8,
height = 4
)
simple_time_to_max_plt
5.2.2 Fitness in multi-path landscape
<- ggplot(
multipath_final_fitness_plt data = filter(max_org_data, landscape == "Multipath"),
mapping = aes(
x = structure,
y = fitness,
fill = structure
)+
) # geom_flat_violin(
# position = position_nudge(x = .2, y = 0),
# alpha = .8
# ) +
geom_point(
mapping = aes(color = structure),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
+
) geom_boxplot(
width = .3,
outlier.shape = NA,
alpha = 0.5
+
) scale_color_discreterainbow() +
scale_fill_discreterainbow() +
theme(
legend.position = "none",
axis.text.x = element_text(
angle = 30,
hjust = 1
)
)
ggsave(
filename = paste0(plot_dir, "/multipath_final_fitness.pdf"),
plot = multipath_final_fitness_plt,
width = 6,
height = 4
)
multipath_final_fitness_plt
Max fitness over time
<- ggplot(
multipath_fitness_ts_plt data = filter(max_org_data_ts, landscape == "Multipath"),
mapping = aes(
x = generation,
y = fitness,
color = structure,
fill = structure
)+
) stat_summary(fun = "mean", geom = "line") +
stat_summary(
fun.data = "mean_cl_boot",
fun.args = list(conf.int = 0.95),
geom = "ribbon",
alpha = 0.2,
linetype = 0
+
) theme(legend.position = "bottom")
ggsave(
plot = multipath_fitness_ts_plt,
filename = paste0(
plot_dir,"/multipath_fitness_ts.pdf"
),width = 15,
height = 10
)
multipath_fitness_ts_plt
Rank ordering of fitness values
%>%
max_org_data filter(landscape == "Multipath") %>%
group_by(structure) %>%
summarize(
reps = n(),
median_fitness = median(fitness),
mean_fitness = mean(fitness)
%>%
) arrange(
desc(mean_fitness)
)
## # A tibble: 7 × 4
## structure reps median_fitness mean_fitness
## <fct> <int> <dbl> <dbl>
## 1 1_3600 50 4.88 4.84
## 2 2_1800 50 4.88 4.78
## 3 3_1200 50 4.74 4.63
## 4 4_900 50 4.64 4.54
## 5 15_240 50 4.06 4.06
## 6 60_60 50 3.94 3.81
## 7 30_120 50 4 3.80
kruskal.test(
formula = fitness ~ structure,
data = filter(max_org_data, landscape == "Multipath")
)
##
## Kruskal-Wallis rank sum test
##
## data: fitness by structure
## Kruskal-Wallis chi-squared = 144.73, df = 6, p-value < 2.2e-16
<- pairwise.wilcox.test(
wc_results x = filter(max_org_data, landscape == "Multipath")$fitness,
g = filter(max_org_data, landscape == "Multipath")$structure,
p.adjust.method = "holm",
exact = FALSE
)
<- kbl(wc_results$p.value) %>%
mp_fitness_wc_table kable_styling()
save_kable(
mp_fitness_wc_table,paste0(plot_dir, "/multipath_fitness_wc_table.pdf")
) mp_fitness_wc_table
1_3600 | 2_1800 | 3_1200 | 4_900 | 15_240 | 30_120 | |
---|---|---|---|---|---|---|
2_1800 | 1.0000000 | NA | NA | NA | NA | NA |
3_1200 | 0.0389539 | 0.2309342 | NA | NA | NA | NA |
4_900 | 0.0000552 | 0.0022081 | 0.6036094 | NA | NA | NA |
15_240 | 0.0000000 | 0.0000001 | 0.0000387 | 0.0022081 | NA | NA |
30_120 | 0.0000000 | 0.0000000 | 0.0000000 | 0.0000003 | 0.4456978 | NA |
60_60 | 0.0000000 | 0.0000000 | 0.0000002 | 0.0000094 | 0.6036094 | 1 |
5.2.3 Valleys crossed in valley-crossing landscape
<- ggplot(
valleycrossing_valleys_plt data = filter(max_org_data, landscape == "Valley crossing"),
mapping = aes(
x = structure,
y = valleys_crossed,
fill = structure
)+
) # geom_flat_violin(
# position = position_nudge(x = .2, y = 0),
# alpha = .8
# ) +
geom_point(
mapping = aes(color = structure),
position = position_jitter(width = .15),
size = .5,
alpha = 0.8
+
) geom_boxplot(
width = .3,
outlier.shape = NA,
alpha = 0.5
+
) scale_color_discreterainbow() +
scale_fill_discreterainbow() +
theme(
legend.position = "none",
axis.text.x = element_text(
angle = 30,
hjust = 1
)
)ggsave(
filename = paste0(plot_dir, "/valleycrossing_valleys_crossed.pdf"),
plot = valleycrossing_valleys_plt,
width = 6,
height = 4
)
valleycrossing_valleys_plt
<- max_org_data %>%
vc filter(landscape == "Valley crossing") %>%
group_by(structure) %>%
summarize(
reps = n(),
median_valleys_crossed = median(valleys_crossed),
mean_valleys_crossed = mean(valleys_crossed),
min_valleys_crossed = min(valleys_crossed)
%>%
) arrange(
desc(mean_valleys_crossed)
) vc
## # A tibble: 7 × 5
## structure reps median_valleys_crossed mean_valleys_crossed
## <fct> <int> <dbl> <dbl>
## 1 1_3600 50 100 100
## 2 2_1800 50 100 100
## 3 3_1200 50 100 99.6
## 4 4_900 50 89.5 89.3
## 5 30_120 50 47 47.2
## 6 15_240 50 46 46.6
## 7 60_60 50 46 45.5
## # ℹ 1 more variable: min_valleys_crossed <dbl>
$min_valleys_crossed vc
## [1] 100 100 89 76 31 32 32
kruskal.test(
formula = valleys_crossed ~ structure,
data = filter(max_org_data, landscape == "Valley crossing")
)
##
## Kruskal-Wallis rank sum test
##
## data: valleys_crossed by structure
## Kruskal-Wallis chi-squared = 309.49, df = 6, p-value < 2.2e-16
<- pairwise.wilcox.test(
wc_results x = filter(max_org_data, landscape == "Valley crossing")$valleys_crossed,
g = filter(max_org_data, landscape == "Valley crossing")$structure,
p.adjust.method = "holm",
exact = FALSE
)
<- kbl(wc_results$p.value) %>%
vc_valleys_crossed_wc_table kable_styling()
save_kable(
vc_valleys_crossed_wc_table,paste0(plot_dir, "/valley_crossing_valleys_wc_table.pdf")
) vc_valleys_crossed_wc_table
1_3600 | 2_1800 | 3_1200 | 4_900 | 15_240 | 30_120 | |
---|---|---|---|---|---|---|
2_1800 | NaN | NA | NA | NA | NA | NA |
3_1200 | 0.796952 | 0.796952 | NA | NA | NA | NA |
4_900 | 0.000000 | 0.000000 | 0 | NA | NA | NA |
15_240 | 0.000000 | 0.000000 | 0 | 0 | NA | NA |
30_120 | 0.000000 | 0.000000 | 0 | 0 | 0.9787605 | NA |
60_60 | 0.000000 | 0.000000 | 0 | 0 | 0.9787605 | 0.796952 |