Chapter 5 Exploitation rate diagnostic experiments

experiment_slug <- "2023-12-28-phylo-sampling-diag"

working_directory <- paste0(
  "experiments/",
  experiment_slug,
  "/analysis/"
)

if (exists("bookdown_wd_prefix")) {
  working_directory <- paste0(
    bookdown_wd_prefix,
    working_directory
  )
}

5.1 Dependencies

library(tidyverse)
## ── Attaching core tidyverse packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(RColorBrewer)
library(khroma)
library(rstatix)
## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(knitr)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")
print(version)
##                _                           
## platform       aarch64-apple-darwin20      
## arch           aarch64                     
## os             darwin20                    
## system         aarch64, darwin20           
## status                                     
## major          4                           
## minor          2.1                         
## year           2022                        
## month          06                          
## day            23                          
## svn rev        82513                       
## language       R                           
## version.string R version 4.2.1 (2022-06-23)
## nickname       Funny-Looking Kid

5.2 Setup

# Configure our default graphing theme
theme_set(theme_cowplot())
# Create a directory to store plots
plot_directory <- paste0(working_directory, "plots/")
dir.create(plot_directory, showWarnings=FALSE)
# Constants
focal_diagnostic <- "exploitation-rate"

5.2.1 Load experiment summary data

summary_data_loc <- paste0(working_directory, "data/aggregate.csv")
summary_data <- read_csv(summary_data_loc)
## Rows: 1080 Columns: 58
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): DIAGNOSTIC, EVAL_FIT_EST_MODE, EVAL_MODE, SELECTION, STOP_MODE
## dbl (53): ACCURACY, CREDIT, DIAGNOSTIC_DIMENSIONALITY, EVAL_MAX_PHYLO_SEARCH...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary_data <- summary_data %>%
  mutate(
    evals_per_gen = case_when(
      EVAL_MODE == "cohort-full-compete" ~ 1.0 / NUM_COHORTS,
      EVAL_MODE == "cohort" ~ 1.0 / NUM_COHORTS,
      EVAL_MODE == "down-sample" ~ TEST_DOWNSAMPLE_RATE,
      EVAL_MODE == "full" ~ 1.0,
      EVAL_MODE == "indiv-rand-sample" ~ TEST_DOWNSAMPLE_RATE,
      EVAL_MODE == "phylo-informed-sample" ~ TEST_DOWNSAMPLE_RATE
    ),
    EVAL_FIT_EST_MODE = case_when(
      EVAL_FIT_EST_MODE == "ancestor-opt" ~ "ancestor",
      EVAL_FIT_EST_MODE == "relative-opt" ~ "relative",
      .default = EVAL_FIT_EST_MODE
    ),
    .keep = "all"
  ) %>%
  mutate(
    eval_label = case_when(
      # Clean up down-sample label
      EVAL_MODE == "down-sample" & EVAL_FIT_EST_MODE != "none" ~ paste("down-sample", EVAL_FIT_EST_MODE, sep="-"),
      .default = EVAL_MODE
    ),
  ) %>%
  mutate(
    evals_per_gen = as.factor(evals_per_gen),
    DIAGNOSTIC = as.factor(DIAGNOSTIC),
    SELECTION = as.factor(SELECTION),
    EVAL_MODE = as.factor(EVAL_MODE),
    NUM_COHORTS = as.factor(NUM_COHORTS),
    TEST_DOWNSAMPLE_RATE = as.factor(TEST_DOWNSAMPLE_RATE),
    EVAL_FIT_EST_MODE = factor(
      EVAL_FIT_EST_MODE,
      levels = c(
        "none",
        "ancestor",
        "relative"
      ),
      labels = c(
        "None",
        "Ancestor",
        "Relative"
      )
    )
  )

# Grab just the exploitation rate data
exploit_summary_data <- filter(
  summary_data,
  DIAGNOSTIC == "exploitation-rate"
)

5.2.2 Load experiment time series data

ts_data_loc <- paste0(working_directory, "data/time_series.csv")
ts_data <- read_csv(ts_data_loc)
## Rows: 108000 Columns: 28
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): DIAGNOSTIC, EVAL_FIT_EST_MODE, EVAL_MODE, SELECTION
## dbl (24): NUM_COHORTS, SEED, TEST_DOWNSAMPLE_RATE, ave_depth, deleterious_st...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ts_data <- ts_data %>%
  mutate(
    evals_per_gen = case_when(
      EVAL_MODE == "cohort-full-compete" ~ 1.0 / NUM_COHORTS,
      EVAL_MODE == "cohort" ~ 1.0 / NUM_COHORTS,
      EVAL_MODE == "down-sample" ~ TEST_DOWNSAMPLE_RATE,
      EVAL_MODE == "full" ~ 1.0,
      EVAL_MODE == "indiv-rand-sample" ~ TEST_DOWNSAMPLE_RATE,
      EVAL_MODE == "phylo-informed-sample" ~ TEST_DOWNSAMPLE_RATE
    ),
    EVAL_FIT_EST_MODE = case_when(
      EVAL_FIT_EST_MODE == "ancestor-opt" ~ "ancestor",
      EVAL_FIT_EST_MODE == "relative-opt" ~ "relative",
      .default = EVAL_FIT_EST_MODE
    ),
    .keep = "all"
  ) %>%
  mutate(
    eval_label = case_when(
      EVAL_MODE == "down-sample" & EVAL_FIT_EST_MODE != "none" ~ paste("down-sample", EVAL_FIT_EST_MODE, sep="-"),
      .default = EVAL_MODE
    )
  ) %>%
  mutate(
    evals_per_gen = as.factor(evals_per_gen),
    DIAGNOSTIC = as.factor(DIAGNOSTIC),
    SELECTION = as.factor(SELECTION),
    EVAL_MODE = as.factor(EVAL_MODE),
    NUM_COHORTS = as.factor(NUM_COHORTS),
    TEST_DOWNSAMPLE_RATE = as.factor(TEST_DOWNSAMPLE_RATE),
    EVAL_FIT_EST_MODE = factor(
      EVAL_FIT_EST_MODE,
      levels = c(
        "none",
        "ancestor",
        "relative"
      ),
      labels = c(
        "None",
        "Ancestor",
        "Relative"
      )
    )
  )

# Grab just the exploitation rate data
exploit_ts_data <- ts_data %>%
  filter(DIAGNOSTIC == "exploitation-rate")

Summarize time series data:

ts_summary_data <- ts_data %>%
  group_by(SEED, DIAGNOSTIC, SELECTION, evals_per_gen, eval_label) %>%
  summarize(
    n = n(),
    avg_num_unique_selected = mean(num_unique_selected),
    total_optimal_trait_coverage_loss = sum(optimal_trait_coverage_loss)
  )
## `summarise()` has grouped output by 'SEED', 'DIAGNOSTIC', 'SELECTION',
## 'evals_per_gen'. You can override using the `.groups` argument.

5.2.3 Plotting helper functions

The following function assist with exploratory plotting of different measurements from summary and time series data. Note that for these plots, standard lexicase reference is rendered at equivalent number of generations (instead of evaluations).

build_plot_summary_data <- function(data, diagnostic, selection, response) {
  diag_data <- data %>% filter(DIAGNOSTIC == diagnostic)

  full_median <- median(
    filter(
      diag_data,
      eval_label == "full" & SELECTION == selection
    )[[response]]
  )

  plot <- diag_data %>%
    filter(
      eval_label != "full" & SELECTION == selection
    ) %>%
    ggplot(
      aes_string(
        x = "eval_label",
        y = response,
        fill = "eval_label"
      )
    ) +
    geom_hline(
      yintercept = full_median,
      size = 1.0,
      alpha = 0.7,
      color = "black",
      linetype="dashed"
    ) +
    geom_flat_violin(
      position = position_nudge(x = .2, y = 0),
      alpha = .8,
      adjust = 1.5
    ) +
    geom_point(
      mapping = aes(color = eval_label),
      position = position_jitter(width = .15),
      size = .5,
      alpha = 0.8
    ) +
    geom_boxplot(
      width = .1,
      outlier.shape = NA,
      alpha = 0.5
    ) +
    scale_y_continuous(
      # limits = c(-0.5, 100)
    ) +
    scale_fill_bright() +
    scale_color_bright() +
    facet_grid(
      SELECTION ~ evals_per_gen,
      # nrow=2,
      labeller = label_both
    ) +
    theme(
      legend.position = "none",
      axis.text.x = element_text(
        angle = 30,
        hjust = 1
      ),
      panel.border = element_rect(color = "gray", size = 2)
    )

  return(plot)
}

build_plot_time_series_single_sampling <- function(
  data,
  diagnostic,
  selection,
  sampling_level,
  response
) {

  diag_data <- data %>% filter(
    DIAGNOSTIC == diagnostic &
    SELECTION == selection &
    evals_per_gen == sampling_level
  ) %>%
  mutate(
    sampling_level_label = sampling_level
  )

  full_diag_data <- data %>% filter(
    DIAGNOSTIC == diagnostic & SELECTION == selection & eval_label == "full"
  ) %>%
  mutate(
    # Ensure that median line will sit in same facet
    sampling_level_label = sampling_level
  )

  plot <- diag_data %>%
    filter(
      eval_label != "full"
    ) %>%
    ggplot(
      aes_string(
        x = "ts_step",
        # x = "evaluations",
        y = {{ response }}
      )
    ) +
    stat_summary(
      geom = "line",
      fun = mean,
      aes(
        color = eval_label
      )
    ) +
    stat_summary(
      geom = "ribbon",
      fun.data = "mean_cl_boot",
      fun.args = list(conf.int = 0.95),
      alpha = 0.2,
      linetype = 0,
      aes(
        color = eval_label,
        fill = eval_label
      )
    ) +
    scale_fill_bright() +
    scale_color_bright() +
    # facet_wrap(
    #   ~ sampling_level_label,
    #   ncol = 1,
    #   labeller = label_both
    # ) +
    theme(
      legend.position = "right"
    ) +
    stat_summary(
      data = full_diag_data,
      geom = "line",
      fun = median,
      linetype = "dashed",
      color = "black"
    )

  return(plot)
}

build_plot_time_series <- function(
  data,
  diagnostic,
  selection,
  response
) {
  # Build 1% sampling plot and 10% sampling plot
  p_01 <- data  %>% build_plot_time_series_single_sampling(
    diagnostic,
    selection,
    "0.01",
    response
  )
  p_10 <- data %>% build_plot_time_series_single_sampling(
    diagnostic,
    selection,
    "0.1",
    response
  )

  title <- ggdraw() +
    draw_label(
      paste0(diagnostic, " - ", selection),
      fontface = 'bold',
      x = 0,
      hjust = 0
    ) +
    theme(
      # add margin on the left of the drawing canvas,
      # so title is aligned with left edge of first plot
      plot.margin = margin(0, 0, 0, 7)
    )

  plot <- plot_grid(
    title,
    p_01 + labs(title = "1% subsampling") + theme(legend.position = "none"),
    p_10 + labs(title = "10% subsampling") + theme(legend.position = "bottom"),
    nrow = 3,
    ncol = 1,
    rel_heights = c(0.075, 1, 1)
  )

  return(plot)
}

5.3 Aggregate score

5.3.1 Final - Lexicase selection

p <- summary_data %>% build_plot_summary_data(
  "exploitation-rate",
  "lexicase",
  "elite_true_agg_score"
)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ggsave(
  filename = paste0(plot_directory, "exploit-score-final-lex.pdf"),
  plot = p + labs(title = "Exploitation rate - Lexicase selection"),
  width = 15,
  height = 10
)
## Warning: Using the `size` aesthetic with geom_polygon was deprecated in ggplot2 3.4.0.
## ℹ Please use the `linewidth` aesthetic instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

5.3.2 Final - Tournament selection

p <- summary_data %>% build_plot_summary_data(
  "exploitation-rate",
  "tournament",
  "elite_true_agg_score"
)
ggsave(
  filename = paste0(plot_directory, "exploit-score-final-tourn.pdf"),
  plot = p + labs(title = "Exploitation rate - Tournament selection"),
  width = 15,
  height = 10
)

5.3.3 Statistical analysis

First, we’ll create a table of median / mean values for easy reference.

exploit_summary_data %>%
  group_by(DIAGNOSTIC, SELECTION, evals_per_gen, eval_label) %>%
  summarize(
    score_median = median(elite_true_agg_score),
    score_mean = mean(elite_true_agg_score),
    replicates = n()
  ) %>%
  kable()
## `summarise()` has grouped output by 'DIAGNOSTIC', 'SELECTION', 'evals_per_gen'.
## You can override using the `.groups` argument.
DIAGNOSTIC SELECTION evals_per_gen eval_label score_median score_mean replicates
exploitation-rate lexicase 0.01 down-sample 9933.1800 9933.2455 20
exploitation-rate lexicase 0.01 down-sample-ancestor 920.1625 913.6102 20
exploitation-rate lexicase 0.01 indiv-rand-sample 2117.1200 2137.2725 20
exploitation-rate lexicase 0.01 phylo-informed-sample 2157.9350 2162.8605 20
exploitation-rate lexicase 0.1 down-sample 9967.3500 9968.1275 20
exploitation-rate lexicase 0.1 down-sample-ancestor 6976.3600 6985.9325 20
exploitation-rate lexicase 0.1 indiv-rand-sample 9360.5800 9360.2230 20
exploitation-rate lexicase 0.1 phylo-informed-sample 9301.3500 9308.4105 20
exploitation-rate lexicase 1 full 9981.7200 9982.2910 20
exploitation-rate tournament 0.01 down-sample 9650.1650 9650.6660 20
exploitation-rate tournament 0.01 down-sample-ancestor 1023.4150 1011.8228 20
exploitation-rate tournament 0.01 indiv-rand-sample 9969.7650 9969.2945 20
exploitation-rate tournament 0.01 phylo-informed-sample 9970.8950 9970.1455 20
exploitation-rate tournament 0.1 down-sample 9972.3050 9972.0210 20
exploitation-rate tournament 0.1 down-sample-ancestor 9988.9200 9988.9365 20
exploitation-rate tournament 0.1 indiv-rand-sample 9999.8250 9999.8240 20
exploitation-rate tournament 0.1 phylo-informed-sample 9999.7700 9999.7800 20
exploitation-rate tournament 1 full 10000.0000 10000.0000 20

Next, we run a Kruskal-Wallis test to check for differences. For these tests, we only compare within a single subsampling level (evals_per_gen) and within the same selection scheme.

kw_test <- exploit_summary_data %>%
  filter(eval_label != "full") %>%
  group_by(SELECTION, evals_per_gen) %>%
  kruskal_test(elite_true_agg_score ~ eval_label) %>%
  mutate(sig = (p < 0.05)) %>%
  unite(
    "comparison_group",
    SELECTION,
    evals_per_gen,
    sep = "_",
    remove = FALSE
  )
kable(kw_test)
comparison_group SELECTION evals_per_gen .y. n statistic df p method sig
lexicase_0.01 lexicase 0.01 elite_true_agg_score 80 67.04167 3 0 Kruskal-Wallis TRUE
lexicase_0.1 lexicase 0.1 elite_true_agg_score 80 68.10074 3 0 Kruskal-Wallis TRUE
tournament_0.01 tournament 0.01 elite_true_agg_score 80 66.76541 3 0 Kruskal-Wallis TRUE
tournament_0.1 tournament 0.1 elite_true_agg_score 80 67.17274 3 0 Kruskal-Wallis TRUE

Perform pairwise wilcoxon rank-sum tests for all significant comparison groups.

# Grab group names of significant comparisons
sig_kw_groups <- filter(kw_test, p < 0.05)$comparison_group

wrs_test <- exploit_summary_data %>%
  unite(
    "comparison_group",
    SELECTION,
    evals_per_gen,
    sep = "_",
    remove = FALSE
  ) %>%
  filter(
    eval_label != "full" & comparison_group %in% sig_kw_groups
  ) %>%
  group_by(SELECTION, evals_per_gen) %>%
  pairwise_wilcox_test(elite_true_agg_score ~ eval_label) %>%
  adjust_pvalue(method = "holm") %>%
  add_significance("p.adj")

kable(wrs_test)
SELECTION evals_per_gen .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
lexicase 0.01 elite_true_agg_score down-sample down-sample-ancestor 20 20 400 0.00e+00 0.00e+00 ****
lexicase 0.01 elite_true_agg_score down-sample indiv-rand-sample 20 20 400 0.00e+00 0.00e+00 ****
lexicase 0.01 elite_true_agg_score down-sample phylo-informed-sample 20 20 400 0.00e+00 0.00e+00 ****
lexicase 0.01 elite_true_agg_score down-sample-ancestor indiv-rand-sample 20 20 0 0.00e+00 0.00e+00 ****
lexicase 0.01 elite_true_agg_score down-sample-ancestor phylo-informed-sample 20 20 0 0.00e+00 0.00e+00 ****
lexicase 0.01 elite_true_agg_score indiv-rand-sample phylo-informed-sample 20 20 155 2.31e-01 5.13e-01 ns
lexicase 0.1 elite_true_agg_score down-sample down-sample-ancestor 20 20 400 0.00e+00 0.00e+00 ****
lexicase 0.1 elite_true_agg_score down-sample indiv-rand-sample 20 20 400 0.00e+00 0.00e+00 ****
lexicase 0.1 elite_true_agg_score down-sample phylo-informed-sample 20 20 400 0.00e+00 0.00e+00 ****
lexicase 0.1 elite_true_agg_score down-sample-ancestor indiv-rand-sample 20 20 0 0.00e+00 0.00e+00 ****
lexicase 0.1 elite_true_agg_score down-sample-ancestor phylo-informed-sample 20 20 0 0.00e+00 0.00e+00 ****
lexicase 0.1 elite_true_agg_score indiv-rand-sample phylo-informed-sample 20 20 288 1.70e-02 6.80e-02 ns
tournament 0.01 elite_true_agg_score down-sample down-sample-ancestor 20 20 400 1.00e-07 7.00e-07 ****
tournament 0.01 elite_true_agg_score down-sample indiv-rand-sample 20 20 0 0.00e+00 0.00e+00 ****
tournament 0.01 elite_true_agg_score down-sample phylo-informed-sample 20 20 0 0.00e+00 0.00e+00 ****
tournament 0.01 elite_true_agg_score down-sample-ancestor indiv-rand-sample 20 20 0 1.00e-07 7.00e-07 ****
tournament 0.01 elite_true_agg_score down-sample-ancestor phylo-informed-sample 20 20 0 1.00e-07 7.00e-07 ****
tournament 0.01 elite_true_agg_score indiv-rand-sample phylo-informed-sample 20 20 177 5.47e-01 5.47e-01 ns
tournament 0.1 elite_true_agg_score down-sample down-sample-ancestor 20 20 0 0.00e+00 0.00e+00 ****
tournament 0.1 elite_true_agg_score down-sample indiv-rand-sample 20 20 0 1.00e-07 7.00e-07 ****
tournament 0.1 elite_true_agg_score down-sample phylo-informed-sample 20 20 0 1.00e-07 7.00e-07 ****
tournament 0.1 elite_true_agg_score down-sample-ancestor indiv-rand-sample 20 20 0 1.00e-07 7.00e-07 ****
tournament 0.1 elite_true_agg_score down-sample-ancestor phylo-informed-sample 20 20 0 1.00e-07 7.00e-07 ****
tournament 0.1 elite_true_agg_score indiv-rand-sample phylo-informed-sample 20 20 251 1.71e-01 5.13e-01 ns

5.3.4 Over time - Lexicase

p <- ts_data  %>% build_plot_time_series(
  "exploitation-rate",
  "lexicase",
  "max_agg_score"
)
ggsave(
  filename = paste0(plot_directory, "exploit-score-ts-lex.pdf"),
  plot = p,
  width = 15,
  height = 10
)

5.3.5 Over time - Tournament

p <- ts_data  %>% build_plot_time_series(
  "exploitation-rate",
  "tournament",
  "max_agg_score"
)
ggsave(
  filename = paste0(plot_directory, "exploit-score-ts-tourn.pdf"),
  plot = p,
  width = 15,
  height = 10
)

5.4 Number unique individual selected

build_plot_summary_data(
  ts_summary_data,
  focal_diagnostic,
  "lexicase",
  "avg_num_unique_selected"
)

Average number selected by standard lexicase?

mean(filter(
  ts_summary_data,
  SELECTION == "lexicase" &
  DIAGNOSTIC == "exploitation-rate" &
  evals_per_gen == "1"
)$avg_num_unique_selected)
## [1] 216.6185
mean(filter(
  ts_summary_data,
  SELECTION == "lexicase" &
  DIAGNOSTIC == "exploitation-rate" &
  evals_per_gen == "0.1",
  eval_label == "down-sample"
)$avg_num_unique_selected)
## [1] 28.099
mean(filter(
  ts_summary_data,
  SELECTION == "lexicase" &
  DIAGNOSTIC == "exploitation-rate" &
  evals_per_gen == "0.01",
  eval_label == "down-sample"
)$avg_num_unique_selected)
## [1] 120.7455

5.5 Manuscript figures

Figures customized / cleaned up for the manuscript.

build_final_score_manuscript_plot <- function(
  selection,
  subsample_rate
) {
  # Extract median values for max aggregate score at same evaluation level
  # as sampling regimes
  max_eval <- max(
    filter(exploit_ts_data, evals_per_gen == subsample_rate)$evaluations
  )
  full_eval_steps <- as.numeric(
    levels(
      as.factor(
        filter(exploit_ts_data, eval_label == "full" & evaluations >= max_eval)$evaluations # nolint: line_length_linter.
      )
    )
  )
  full_eval <- full_eval_steps[which.min( full_eval_steps  - max_eval )]
  full_median_score_evals <- median(
    filter(
      exploit_ts_data,
      SELECTION == selection & eval_label == "full" & evaluations == full_eval
    )$max_agg_score
  )

  plot <- exploit_summary_data %>%
    filter(
      eval_label != "full" &
      SELECTION == selection &
      evals_per_gen == subsample_rate
    ) %>%
    ggplot(
      aes(
        x = eval_label,
        y = elite_true_agg_score,
        fill = eval_label
      )
    ) +
    geom_hline(
      yintercept = full_median_score_evals,
      size = 1.0,
      alpha = 0.7,
      color = "black",
      linetype="dashed"
    ) +
    geom_flat_violin(
      position = position_nudge(x = .2, y = 0),
      alpha = .8,
      adjust = 1.5
    ) +
    geom_point(
      mapping = aes(color = eval_label),
      position = position_jitter(width = .15),
      size = .5,
      alpha = 0.8
    ) +
    geom_boxplot(
      width = .1,
      outlier.shape = NA,
      alpha = 0.5
    ) +
    scale_y_continuous(
      name = "Aggregate score",
      limits = c(0, 10010)
    ) +
    scale_x_discrete(
      name = "Subsampling regimes",
      breaks = c("down-sample", "down-sample-ancestor", "indiv-rand-sample", "phylo-informed-sample"),
      labels = c("DS", "DS+EST", "IRS", "ABS")
    ) +
    scale_fill_bright() +
    scale_color_bright() +
    theme(
      legend.position = "none",
      # axis.text.x = element_text(
      #   angle = 30,
      #   hjust = 1
      # ),
    )
  return(plot)
}

Build time series manuscript plot:

build_score_over_time_manuscript_plot <- function(
  selection,
  subsample_rate
) {

  max_eval <- max(
    filter(exploit_ts_data, evals_per_gen == subsample_rate)$evaluations
  )
  full_eval_steps <- as.numeric(
    levels(
      as.factor(
        filter(exploit_ts_data, eval_label == "full" & evaluations >= max_eval)$evaluations # nolint: line_length_linter.
      )
    )
  )
  full_eval <- full_eval_steps[which.min( full_eval_steps  - max_eval )]

  data <- exploit_ts_data %>%
    filter(
      SELECTION == selection &
      evals_per_gen == subsample_rate
    ) %>%
    mutate(
      sampling_level_label = subsample_rate
    )

  full_diag_data <- exploit_ts_data %>%
    filter(
      SELECTION == selection & eval_label == "full" & evaluations <= full_eval
    ) %>%
    mutate(
      # Ensure that median line will sit in same facet
      sampling_level_label = subsample_rate
    )

  plot <- data %>%
    filter(
      eval_label != "full"
    ) %>%
    ggplot(
      aes(
        x = evaluations,
        y = max_agg_score
      )
    ) +
    stat_summary(
      geom = "line",
      fun = mean,
      aes(
        color = eval_label
      )
    ) +
    stat_summary(
      geom = "ribbon",
      fun.data = "mean_cl_boot",
      fun.args = list(conf.int = 0.95),
      alpha = 0.2,
      linetype = 0,
      aes(
        color = eval_label,
        fill = eval_label
      )
    ) +
    scale_y_continuous(
      name = "Aggregate score",
      limits = c(0, 10010)
    ) +
    scale_x_continuous(
      name = "Evaluations"
    ) +
    scale_fill_bright(
      labels=c(
        "Down-sampling (DS), no estimation",
        "Down-sampling + Estimation (DS+EST)",
        "Individualized random sampling (IRS)",
        "Ancestor-based sampling (ABS)"
      )
    ) +
    scale_color_bright(
      labels=c(
        "Down-sampling (DS), no estimation",
        "Down-sampling + Estimation (DS+EST)",
        "Individualized random sampling (IRS)",
        "Ancestor-based sampling (ABS)"
      )
    ) +
    theme(
      legend.position = "none"
    ) +
    stat_summary(
      data = full_diag_data,
      geom = "line",
      fun = median,
      linetype = "dashed",
      color = "black"
    )

  return(plot)
}

Build plots of final scores (after fixed number of evaluations)

plot_final_lex_01 <- build_final_score_manuscript_plot(
  "lexicase",
  "0.01"
)
plot_final_lex_10 <- build_final_score_manuscript_plot(
  "lexicase",
  "0.1"
)
plot_final_tourn_01 <- build_final_score_manuscript_plot(
  "tournament",
  "0.01"
)
plot_final_tourn_10 <- build_final_score_manuscript_plot(
  "tournament",
  "0.1"
)

Build time series plots (with evaluations on x-axis)

plot_ts_lex_01 <- build_score_over_time_manuscript_plot(
  "lexicase",
  "0.01"
)

plot_ts_lex_10 <- build_score_over_time_manuscript_plot(
  "lexicase",
  "0.1"
)

plot_ts_tourn_01 <- build_score_over_time_manuscript_plot(
  "tournament",
  "0.01"
)

plot_ts_tourn_10 <- build_score_over_time_manuscript_plot(
  "tournament",
  "0.1"
)

5.5.1 Lexicase selection manuscript figure

txt_size <- 16
legend <- get_legend(
  plot_ts_lex_01 +
    guides(
      color = guide_legend(nrow = 2, title = "Subsampling regime:"),
      fill = guide_legend(nrow = 2, title = "Subsampling regime:")
    ) +
    theme(
      legend.position = "bottom",
      legend.text = element_text(size = txt_size - 2),
      legend.title = element_text(size = txt_size)
    )
)

grid <- plot_grid(
  plot_ts_lex_01 +
    labs(title = "1% Subsampling") +
    theme(
      axis.text.x = element_text(size = txt_size-2),
      axis.text.y = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size),
      axis.title.y = element_text(size = txt_size)
    ),
  plot_final_lex_01 +
    theme(
      axis.text.y = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(0, 0, 0, 1, "cm"),
      axis.text.x = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size)
    ),
  plot_ts_lex_10 +
    labs(title = "10% Subsampling") +
    theme(
      axis.text.x = element_text(size = txt_size-2),
      axis.text.y = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size),
      axis.title.y = element_text(size = txt_size)
    ),
  plot_final_lex_10 +
    theme(
      axis.text.y = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(0, 0, 0, 1, "cm"),
      axis.text.x = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size)
    ),
  nrow = 2,
  ncol = 2,
  align = "h",
  labels = c("a", "b", "c", "d"),
  label_size = 18,
  rel_widths = c(1.3, 1, 1.3, 1)
)

grid

lex_fig <- plot_grid(
  grid,
  legend,
  nrow = 2,
  ncol = 1,
  rel_heights = c(1, 0.05)
)

# lex_fig
save_plot(
  filename = paste0(plot_directory, "2023-12-28-exploit-lex-fig.pdf"),
  plot = lex_fig,
  base_width = 10,
  base_height = 8,
  dpi = 600
)

5.5.2 Tournament selection manuscript figures

legend <- get_legend(
  plot_ts_tourn_01 +
    guides(
      color = guide_legend(nrow = 2, title = "Subsampling regime:"),
      fill = guide_legend(nrow = 2, title = "Subsampling regime:")
    ) +
    theme(
      legend.position = "bottom",
      legend.text = element_text(size = txt_size - 2),
      legend.title = element_text(size = txt_size)
    )
)

grid <- plot_grid(
  plot_ts_tourn_01 +
    labs(title = "1% Subsampling") +
    theme(
      axis.text.x = element_text(size = txt_size-2),
      axis.text.y = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size),
      axis.title.y = element_text(size = txt_size)
    ),
  plot_final_tourn_01 +
    theme(
      axis.text.y = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(0, 0, 0, 1, "cm"),
      axis.text.x = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size)
    ),
  plot_ts_tourn_10 +
    labs(title = "10% Subsampling") +
    theme(
      axis.text.x = element_text(size = txt_size-2),
      axis.text.y = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size),
      axis.title.y = element_text(size = txt_size)
    ),
  plot_final_tourn_10 +
    theme(
      axis.text.y = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(0, 0, 0, 1, "cm"),
      axis.text.x = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size)
    ),
  nrow = 2,
  ncol = 2,
  align = "h",
  labels = c("a", "b", "c", "d"),
  label_size = 18,
  rel_widths = c(1.3, 1, 1.3, 1)
)

tourn_fig <- plot_grid(
  grid,
  legend,
  nrow = 2,
  ncol = 1,
  rel_heights = c(1, 0.05)
)

tourn_fig

save_plot(
  filename = paste0(plot_directory, "2023-12-28-exploit-tourn-fig.pdf"),
  plot = tourn_fig,
  base_width = 10,
  base_height = 8,
  dpi = 600
)