Chapter 5 Simple model - Squished toroid experiment analyses

5.1 Setup and Dependencies

library(tidyverse)
library(cowplot)
library(RColorBrewer)
library(khroma)
library(rstatix)
library(knitr)
library(kableExtra)
library(infer)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
# Check if Rmd is being compiled using bookdown
bookdown <- exists("bookdown_build")
experiment_slug <- "lattice-experiments"
working_directory <- paste(
  "experiments",
  "mabe2-exps",
  experiment_slug,
  sep = "/"
)
# Adjust working directory if being knitted for bookdown build.
if (bookdown) {
  working_directory <- paste0(
    bookdown_wd_prefix,
    working_directory
  )
}
# Configure our default graphing theme
theme_set(theme_cowplot())
# Create a directory to store plots
plot_dir <- paste(
  working_directory,
  "rmd_plots",
  sep = "/"
)

dir.create(
  plot_dir,
  showWarnings = FALSE
)

5.2 Max organism data analyses

max_generation <- 100000
max_org_data_path <- paste(
  working_directory,
  "data",
  "combined_max_org_data.csv",
  sep = "/"
)
# Data file has time series
max_org_data_ts <- read_csv(max_org_data_path)
max_org_data_ts <- max_org_data_ts %>%
  mutate(
    landscape = as.factor(landscape),
    structure = factor(
      structure,
      levels = c(
        "1_3600",
        "2_1800",
        "3_1200",
        "4_900",
        "15_240",
        "30_120",
        "60_60"
      )
    ),
  ) %>%
  mutate(
    valleys_crossed = case_when(
      landscape == "Valley crossing" ~ round(log(fitness, base = 1.5)),
      .default = 0
    )
  )
# Get tibble with just final generation
max_org_data <- max_org_data_ts %>%
  filter(generation == max_generation)

Check that replicate count for each condition matches expectations.

run_summary <- max_org_data %>%
  group_by(landscape, structure) %>%
  summarize(
    n = n()
  )
print(run_summary, n = 30)
## # A tibble: 21 × 3
## # Groups:   landscape [3]
##    landscape       structure     n
##    <fct>           <fct>     <int>
##  1 Multipath       1_3600       50
##  2 Multipath       2_1800       50
##  3 Multipath       3_1200       50
##  4 Multipath       4_900        50
##  5 Multipath       15_240       50
##  6 Multipath       30_120       50
##  7 Multipath       60_60        50
##  8 Single gradient 1_3600       50
##  9 Single gradient 2_1800       50
## 10 Single gradient 3_1200       50
## 11 Single gradient 4_900        50
## 12 Single gradient 15_240       50
## 13 Single gradient 30_120       50
## 14 Single gradient 60_60        50
## 15 Valley crossing 1_3600       50
## 16 Valley crossing 2_1800       50
## 17 Valley crossing 3_1200       50
## 18 Valley crossing 4_900        50
## 19 Valley crossing 15_240       50
## 20 Valley crossing 30_120       50
## 21 Valley crossing 60_60        50

5.2.1 Fitness in smooth gradient landscape

single_gradient_final_fitness_plt <- ggplot(
    data = filter(max_org_data, landscape == "Single gradient"),
    mapping = aes(
      x = structure,
      y = fitness,
      fill = structure
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping = aes(color = structure),
    position = position_jitter(width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 30,
      hjust = 1
    )
  )

ggsave(
  filename = paste0(plot_dir, "/single_gradient_final_fitness.pdf"),
  plot = single_gradient_final_fitness_plt,
  width = 15,
  height = 10
)

single_gradient_final_fitness_plt

Max fitness over time

single_gradient_fitness_ts_plt <- ggplot(
    data = filter(max_org_data_ts, landscape == "Single gradient"),
    mapping = aes(
      x = generation,
      y = fitness,
      color = structure,
      fill = structure
    )
  ) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(
    fun.data = "mean_cl_boot",
    fun.args = list(conf.int = 0.95),
    geom = "ribbon",
    alpha = 0.2,
    linetype = 0
  ) +
  theme(legend.position = "bottom")

ggsave(
  plot = single_gradient_fitness_ts_plt,
  filename = paste0(
    plot_dir,
    "/single_gradient_fitness_ts.pdf"
  ),
  width = 15,
  height = 10
)

single_gradient_fitness_ts_plt

Time to maximum fitness

# Find all rows with maximum fitness value, then get row with minimum generation,
#  then project out expected generation to max (for runs that didn't finish)
max_possible_fit = 50
time_to_max_single_gradient <- max_org_data_ts %>%
  filter(landscape == "Single gradient") %>%
  group_by(rep, structure) %>%
  slice_max(
    fitness,
    n = 1
  ) %>%
  slice_min(
    generation,
    n = 1
  ) %>%
  mutate(
    proj_gen_max = (max_possible_fit / fitness) * generation
  )
single_gradient_gen_max_proj_plt <- ggplot(
    data = time_to_max_single_gradient,
    mapping = aes(
      x = structure,
      y = proj_gen_max,
      fill = structure
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping = aes(color = structure),
    position = position_jitter(width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_log10(
    guide = "axis_logticks"
  ) +
  # scale_y_continuous(
  #   trans="pseudo_log",
  #   breaks = c(10, 100, 1000, 10000, 100000, 1000000)
  #   ,limits = c(10, 100, 1000, 10000, 100000, 1000000)
  # ) +
  geom_hline(
    yintercept = max_generation,
    linetype = "dashed"
  ) +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 30,
      hjust = 1
    )
  )

ggsave(
  filename = paste0(plot_dir, "/single_gradient_gen_max_proj.pdf"),
  plot = single_gradient_gen_max_proj_plt,
  width = 15,
  height = 10
)

single_gradient_gen_max_proj_plt

Rank ordering of time to max fitness values

time_to_max_single_gradient %>%
  group_by(structure) %>%
  summarize(
    reps = n(),
    median_proj_gen = median(proj_gen_max),
    mean_proj_gen = mean(proj_gen_max)
  ) %>%
  arrange(
    mean_proj_gen
  )
## # A tibble: 7 × 4
##   structure  reps median_proj_gen mean_proj_gen
##   <fct>     <int>           <dbl>         <dbl>
## 1 60_60        50           28000         27540
## 2 30_120       50           28000         27880
## 3 15_240       50           30000         30160
## 4 4_900        50           42000         42020
## 5 3_1200       50           47000         46900
## 6 2_1800       50           53000         53340
## 7 1_3600       50           69000         68700
kruskal.test(
  formula = proj_gen_max ~ structure,
  data = time_to_max_single_gradient
)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  proj_gen_max by structure
## Kruskal-Wallis chi-squared = 341.17, df = 6, p-value < 2.2e-16
wc_results <- pairwise.wilcox.test(
  x = time_to_max_single_gradient$proj_gen_max,
  g = time_to_max_single_gradient$structure,
  p.adjust.method   = "holm",
  exact = FALSE
)

single_gradient_proj_gen_max_wc_table <- kbl(wc_results$p.value) %>%
  kable_styling()

save_kable(
  single_gradient_proj_gen_max_wc_table,
  paste0(plot_dir, "/single_gradient_proj_gen_max_wc_table.pdf")
)
single_gradient_proj_gen_max_wc_table
1_3600 2_1800 3_1200 4_900 15_240 30_120
2_1800 0 NA NA NA NA NA
3_1200 0 0 NA NA NA NA
4_900 0 0 0 NA NA NA
15_240 0 0 0 0 NA NA
30_120 0 0 0 0 0 NA
60_60 0 0 0 0 0 0.0001966
library(boot)
# Define sample mean function
samplemean <- function(x, d) {
  return(mean(x[d]))
}

summary_gen_to_max <- tibble(
  structure = character(),
  proj_gen_max_mean = double(),
  proj_gen_max_mean_ci_low = double(),
  proj_gen_max_mean_ci_high = double()
)

structures <- levels(time_to_max_single_gradient$structure)
for (struct in structures) {
  boot_result <- boot(
    data = filter(
      time_to_max_single_gradient,
      structure == struct
    )$proj_gen_max,
    statistic = samplemean,
    R = 10000
  )
  result_ci <- boot.ci(boot_result, conf = 0.99, type = "perc")
  m <- result_ci$t0
  low <- result_ci$percent[4]
  high <- result_ci$percent[5]

  summary_gen_to_max <- summary_gen_to_max %>%
    add_row(
      structure = struct,
      proj_gen_max_mean = m,
      proj_gen_max_mean_ci_low = low,
      proj_gen_max_mean_ci_high = high
    )
}

wm_median <- median(
  filter(time_to_max_single_gradient, structure == "well_mixed")$proj_gen_max
)

simple_time_to_max_plt <- ggplot(
    data = summary_gen_to_max,
    mapping = aes(
      x = structure,
      y = proj_gen_max_mean,
      fill = structure,
      color = structure
    )
  ) +
  # geom_point() +
  geom_col() +
  geom_linerange(
    aes(
      ymin = proj_gen_max_mean_ci_low,
      ymax = proj_gen_max_mean_ci_high
    ),
    color = "black",
    linewidth = 0.75,
    lineend = "round"
  ) +
  # scale_y_log10(
  #   guide = "axis_logticks"
  # ) +
  geom_hline(
    yintercept = max_generation,
    linetype = "dashed"
  ) +
  geom_hline(
    yintercept = wm_median,
    linetype = "dotted",
    color = "orange"
  ) +
  scale_color_discreterainbow() +
  scale_fill_discreterainbow() +
  coord_flip() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 30,
      hjust = 1
    )
  )

ggsave(
  filename = paste0(plot_dir, "/simple_time_to_max.pdf"),
  plot = simple_time_to_max_plt,
  width = 8,
  height = 4
)

simple_time_to_max_plt

5.2.2 Fitness in multi-path landscape

multipath_final_fitness_plt <- ggplot(
    data = filter(max_org_data, landscape == "Multipath"),
    mapping = aes(
      x = structure,
      y = fitness,
      fill = structure
    )
  ) +
  # geom_flat_violin(
  #   position = position_nudge(x = .2, y = 0),
  #   alpha = .8
  # ) +
  geom_point(
    mapping = aes(color = structure),
    position = position_jitter(width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .3,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_color_discreterainbow() +
  scale_fill_discreterainbow() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 30,
      hjust = 1
    )
  )

ggsave(
  filename = paste0(plot_dir, "/multipath_final_fitness.pdf"),
  plot = multipath_final_fitness_plt,
  width = 6,
  height = 4
)

multipath_final_fitness_plt

Max fitness over time

multipath_fitness_ts_plt <- ggplot(
    data = filter(max_org_data_ts, landscape == "Multipath"),
    mapping = aes(
      x = generation,
      y = fitness,
      color = structure,
      fill = structure
    )
  ) +
  stat_summary(fun = "mean", geom = "line") +
  stat_summary(
    fun.data = "mean_cl_boot",
    fun.args = list(conf.int = 0.95),
    geom = "ribbon",
    alpha = 0.2,
    linetype = 0
  ) +
  theme(legend.position = "bottom")

ggsave(
  plot = multipath_fitness_ts_plt,
  filename = paste0(
    plot_dir,
    "/multipath_fitness_ts.pdf"
  ),
  width = 15,
  height = 10
)

multipath_fitness_ts_plt

Rank ordering of fitness values

max_org_data %>%
  filter(landscape == "Multipath") %>%
  group_by(structure) %>%
  summarize(
    reps = n(),
    median_fitness = median(fitness),
    mean_fitness = mean(fitness)
  ) %>%
  arrange(
    desc(mean_fitness)
  )
## # A tibble: 7 × 4
##   structure  reps median_fitness mean_fitness
##   <fct>     <int>          <dbl>        <dbl>
## 1 1_3600       50           4.88         4.84
## 2 2_1800       50           4.88         4.78
## 3 3_1200       50           4.74         4.63
## 4 4_900        50           4.64         4.54
## 5 15_240       50           4.06         4.06
## 6 60_60        50           3.94         3.81
## 7 30_120       50           4            3.80
kruskal.test(
  formula = fitness ~ structure,
  data = filter(max_org_data, landscape == "Multipath")
)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  fitness by structure
## Kruskal-Wallis chi-squared = 144.73, df = 6, p-value < 2.2e-16
wc_results <- pairwise.wilcox.test(
  x = filter(max_org_data, landscape == "Multipath")$fitness,
  g = filter(max_org_data, landscape == "Multipath")$structure,
  p.adjust.method   = "holm",
  exact = FALSE
)

mp_fitness_wc_table <- kbl(wc_results$p.value) %>%
  kable_styling()

save_kable(
  mp_fitness_wc_table,
  paste0(plot_dir, "/multipath_fitness_wc_table.pdf")
)
mp_fitness_wc_table
1_3600 2_1800 3_1200 4_900 15_240 30_120
2_1800 1.0000000 NA NA NA NA NA
3_1200 0.0389539 0.2309342 NA NA NA NA
4_900 0.0000552 0.0022081 0.6036094 NA NA NA
15_240 0.0000000 0.0000001 0.0000387 0.0022081 NA NA
30_120 0.0000000 0.0000000 0.0000000 0.0000003 0.4456978 NA
60_60 0.0000000 0.0000000 0.0000002 0.0000094 0.6036094 1

5.2.3 Valleys crossed in valley-crossing landscape

valleycrossing_valleys_plt <- ggplot(
    data = filter(max_org_data, landscape == "Valley crossing"),
    mapping = aes(
      x = structure,
      y = valleys_crossed,
      fill = structure
    )
  ) +
  # geom_flat_violin(
  #   position = position_nudge(x = .2, y = 0),
  #   alpha = .8
  # ) +
  geom_point(
    mapping = aes(color = structure),
    position = position_jitter(width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .3,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_color_discreterainbow() +
  scale_fill_discreterainbow() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 30,
      hjust = 1
    )
  )
ggsave(
  filename = paste0(plot_dir, "/valleycrossing_valleys_crossed.pdf"),
  plot = valleycrossing_valleys_plt,
  width = 6,
  height = 4
)

valleycrossing_valleys_plt

vc <- max_org_data %>%
  filter(landscape == "Valley crossing") %>%
  group_by(structure) %>%
  summarize(
    reps = n(),
    median_valleys_crossed = median(valleys_crossed),
    mean_valleys_crossed = mean(valleys_crossed),
    min_valleys_crossed = min(valleys_crossed)
  ) %>%
  arrange(
    desc(mean_valleys_crossed)
  )
vc
## # A tibble: 7 × 5
##   structure  reps median_valleys_crossed mean_valleys_crossed
##   <fct>     <int>                  <dbl>                <dbl>
## 1 1_3600       50                  100                  100  
## 2 2_1800       50                  100                  100  
## 3 3_1200       50                  100                   99.6
## 4 4_900        50                   89.5                 89.3
## 5 30_120       50                   47                   47.2
## 6 15_240       50                   46                   46.6
## 7 60_60        50                   46                   45.5
## # ℹ 1 more variable: min_valleys_crossed <dbl>
vc$min_valleys_crossed
## [1] 100 100  89  76  31  32  32
kruskal.test(
  formula = valleys_crossed ~ structure,
  data = filter(max_org_data, landscape == "Valley crossing")
)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  valleys_crossed by structure
## Kruskal-Wallis chi-squared = 309.49, df = 6, p-value < 2.2e-16
wc_results <- pairwise.wilcox.test(
  x = filter(max_org_data, landscape == "Valley crossing")$valleys_crossed,
  g = filter(max_org_data, landscape == "Valley crossing")$structure,
  p.adjust.method   = "holm",
  exact = FALSE
)

vc_valleys_crossed_wc_table <- kbl(wc_results$p.value) %>%
  kable_styling()

save_kable(
  vc_valleys_crossed_wc_table,
  paste0(plot_dir, "/valley_crossing_valleys_wc_table.pdf")
)
vc_valleys_crossed_wc_table
1_3600 2_1800 3_1200 4_900 15_240 30_120
2_1800 NaN NA NA NA NA NA
3_1200 0.796952 0.796952 NA NA NA NA
4_900 0.000000 0.000000 0 NA NA NA
15_240 0.000000 0.000000 0 0 NA NA
30_120 0.000000 0.000000 0 0 0.9787605 NA
60_60 0.000000 0.000000 0 0 0.9787605 0.796952