Chapter 5 Exploitation rate diagnostic experiments

experiment_slug <- "2023-12-28-phylo-sampling-diag"

working_directory <- paste0(
  "experiments/",
  experiment_slug,
  "/analysis/"
)

if (exists("bookdown_wd_prefix")) {
  working_directory <- paste0(
    bookdown_wd_prefix,
    working_directory
  )
}

5.1 Dependencies

library(tidyverse)

## ── Attaching core tidyverse packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(cowplot)

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp

library(RColorBrewer)
library(khroma)
library(rstatix)

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(knitr)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")

print(version)

##                _                           
## platform       aarch64-apple-darwin20      
## arch           aarch64                     
## os             darwin20                    
## system         aarch64, darwin20           
## status                                     
## major          4                           
## minor          2.1                         
## year           2022                        
## month          06                          
## day            23                          
## svn rev        82513                       
## language       R                           
## version.string R version 4.2.1 (2022-06-23)
## nickname       Funny-Looking Kid

5.2 Setup

# Configure our default graphing theme
theme_set(theme_cowplot())
# Create a directory to store plots
plot_directory <- paste0(working_directory, "plots/")
dir.create(plot_directory, showWarnings=FALSE)
# Constants
focal_diagnostic <- "exploitation-rate"

5.2.1 Load experiment summary data

summary_data_loc <- paste0(working_directory, "data/aggregate.csv")
summary_data <- read_csv(summary_data_loc)

## Rows: 1080 Columns: 58
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): DIAGNOSTIC, EVAL_FIT_EST_MODE, EVAL_MODE, SELECTION, STOP_MODE
## dbl (53): ACCURACY, CREDIT, DIAGNOSTIC_DIMENSIONALITY, EVAL_MAX_PHYLO_SEARCH...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

summary_data <- summary_data %>%
  mutate(
    evals_per_gen = case_when(
      EVAL_MODE == "cohort-full-compete" ~ 1.0 / NUM_COHORTS,
      EVAL_MODE == "cohort" ~ 1.0 / NUM_COHORTS,
      EVAL_MODE == "down-sample" ~ TEST_DOWNSAMPLE_RATE,
      EVAL_MODE == "full" ~ 1.0,
      EVAL_MODE == "indiv-rand-sample" ~ TEST_DOWNSAMPLE_RATE,
      EVAL_MODE == "phylo-informed-sample" ~ TEST_DOWNSAMPLE_RATE
    ),
    EVAL_FIT_EST_MODE = case_when(
      EVAL_FIT_EST_MODE == "ancestor-opt" ~ "ancestor",
      EVAL_FIT_EST_MODE == "relative-opt" ~ "relative",
      .default = EVAL_FIT_EST_MODE
    ),
    .keep = "all"
  ) %>%
  mutate(
    eval_label = case_when(
      # Clean up down-sample label
      EVAL_MODE == "down-sample" & EVAL_FIT_EST_MODE != "none" ~ paste("down-sample", EVAL_FIT_EST_MODE, sep="-"),
      .default = EVAL_MODE
    ),
  ) %>%
  mutate(
    evals_per_gen = as.factor(evals_per_gen),
    DIAGNOSTIC = as.factor(DIAGNOSTIC),
    SELECTION = as.factor(SELECTION),
    EVAL_MODE = as.factor(EVAL_MODE),
    NUM_COHORTS = as.factor(NUM_COHORTS),
    TEST_DOWNSAMPLE_RATE = as.factor(TEST_DOWNSAMPLE_RATE),
    EVAL_FIT_EST_MODE = factor(
      EVAL_FIT_EST_MODE,
      levels = c(
        "none",
        "ancestor",
        "relative"
      ),
      labels = c(
        "None",
        "Ancestor",
        "Relative"
      )
    )
  )

# Grab just the exploitation rate data
exploit_summary_data <- filter(
  summary_data,
  DIAGNOSTIC == "exploitation-rate"
)

5.2.2 Load experiment time series data

ts_data_loc <- paste0(working_directory, "data/time_series.csv")
ts_data <- read_csv(ts_data_loc)

## Rows: 108000 Columns: 28
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): DIAGNOSTIC, EVAL_FIT_EST_MODE, EVAL_MODE, SELECTION
## dbl (24): NUM_COHORTS, SEED, TEST_DOWNSAMPLE_RATE, ave_depth, deleterious_st...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

ts_data <- ts_data %>%
  mutate(
    evals_per_gen = case_when(
      EVAL_MODE == "cohort-full-compete" ~ 1.0 / NUM_COHORTS,
      EVAL_MODE == "cohort" ~ 1.0 / NUM_COHORTS,
      EVAL_MODE == "down-sample" ~ TEST_DOWNSAMPLE_RATE,
      EVAL_MODE == "full" ~ 1.0,
      EVAL_MODE == "indiv-rand-sample" ~ TEST_DOWNSAMPLE_RATE,
      EVAL_MODE == "phylo-informed-sample" ~ TEST_DOWNSAMPLE_RATE
    ),
    EVAL_FIT_EST_MODE = case_when(
      EVAL_FIT_EST_MODE == "ancestor-opt" ~ "ancestor",
      EVAL_FIT_EST_MODE == "relative-opt" ~ "relative",
      .default = EVAL_FIT_EST_MODE
    ),
    .keep = "all"
  ) %>%
  mutate(
    eval_label = case_when(
      EVAL_MODE == "down-sample" & EVAL_FIT_EST_MODE != "none" ~ paste("down-sample", EVAL_FIT_EST_MODE, sep="-"),
      .default = EVAL_MODE
    )
  ) %>%
  mutate(
    evals_per_gen = as.factor(evals_per_gen),
    DIAGNOSTIC = as.factor(DIAGNOSTIC),
    SELECTION = as.factor(SELECTION),
    EVAL_MODE = as.factor(EVAL_MODE),
    NUM_COHORTS = as.factor(NUM_COHORTS),
    TEST_DOWNSAMPLE_RATE = as.factor(TEST_DOWNSAMPLE_RATE),
    EVAL_FIT_EST_MODE = factor(
      EVAL_FIT_EST_MODE,
      levels = c(
        "none",
        "ancestor",
        "relative"
      ),
      labels = c(
        "None",
        "Ancestor",
        "Relative"
      )
    )
  )

# Grab just the exploitation rate data
exploit_ts_data <- ts_data %>%
  filter(DIAGNOSTIC == "exploitation-rate")

Summarize time series data:

ts_summary_data <- ts_data %>%
  group_by(SEED, DIAGNOSTIC, SELECTION, evals_per_gen, eval_label) %>%
  summarize(
    n = n(),
    avg_num_unique_selected = mean(num_unique_selected),
    total_optimal_trait_coverage_loss = sum(optimal_trait_coverage_loss)
  )

## `summarise()` has grouped output by 'SEED', 'DIAGNOSTIC', 'SELECTION',
## 'evals_per_gen'. You can override using the `.groups` argument.

5.2.3 Plotting helper functions

The following function assist with exploratory plotting of different measurements from summary and time series data. Note that for these plots, standard lexicase reference is rendered at equivalent number of generations (instead of evaluations).

build_plot_summary_data <- function(data, diagnostic, selection, response) {
  diag_data <- data %>% filter(DIAGNOSTIC == diagnostic)

  full_median <- median(
    filter(
      diag_data,
      eval_label == "full" & SELECTION == selection
    )[[response]]
  )

  plot <- diag_data %>%
    filter(
      eval_label != "full" & SELECTION == selection
    ) %>%
    ggplot(
      aes_string(
        x = "eval_label",
        y = response,
        fill = "eval_label"
      )
    ) +
    geom_hline(
      yintercept = full_median,
      size = 1.0,
      alpha = 0.7,
      color = "black",
      linetype="dashed"
    ) +
    geom_flat_violin(
      position = position_nudge(x = .2, y = 0),
      alpha = .8,
      adjust = 1.5
    ) +
    geom_point(
      mapping = aes(color = eval_label),
      position = position_jitter(width = .15),
      size = .5,
      alpha = 0.8
    ) +
    geom_boxplot(
      width = .1,
      outlier.shape = NA,
      alpha = 0.5
    ) +
    scale_y_continuous(
      # limits = c(-0.5, 100)
    ) +
    scale_fill_bright() +
    scale_color_bright() +
    facet_grid(
      SELECTION ~ evals_per_gen,
      # nrow=2,
      labeller = label_both
    ) +
    theme(
      legend.position = "none",
      axis.text.x = element_text(
        angle = 30,
        hjust = 1
      ),
      panel.border = element_rect(color = "gray", size = 2)
    )

  return(plot)
}

build_plot_time_series_single_sampling <- function(
  data,
  diagnostic,
  selection,
  sampling_level,
  response
) {

  diag_data <- data %>% filter(
    DIAGNOSTIC == diagnostic &
    SELECTION == selection &
    evals_per_gen == sampling_level
  ) %>%
  mutate(
    sampling_level_label = sampling_level
  )

  full_diag_data <- data %>% filter(
    DIAGNOSTIC == diagnostic & SELECTION == selection & eval_label == "full"
  ) %>%
  mutate(
    # Ensure that median line will sit in same facet
    sampling_level_label = sampling_level
  )

  plot <- diag_data %>%
    filter(
      eval_label != "full"
    ) %>%
    ggplot(
      aes_string(
        x = "ts_step",
        # x = "evaluations",
        y = {{ response }}
      )
    ) +
    stat_summary(
      geom = "line",
      fun = mean,
      aes(
        color = eval_label
      )
    ) +
    stat_summary(
      geom = "ribbon",
      fun.data = "mean_cl_boot",
      fun.args = list(conf.int = 0.95),
      alpha = 0.2,
      linetype = 0,
      aes(
        color = eval_label,
        fill = eval_label
      )
    ) +
    scale_fill_bright() +
    scale_color_bright() +
    # facet_wrap(
    #   ~ sampling_level_label,
    #   ncol = 1,
    #   labeller = label_both
    # ) +
    theme(
      legend.position = "right"
    ) +
    stat_summary(
      data = full_diag_data,
      geom = "line",
      fun = median,
      linetype = "dashed",
      color = "black"
    )

  return(plot)
}

build_plot_time_series <- function(
  data,
  diagnostic,
  selection,
  response
) {
  # Build 1% sampling plot and 10% sampling plot
  p_01 <- data  %>% build_plot_time_series_single_sampling(
    diagnostic,
    selection,
    "0.01",
    response
  )
  p_10 <- data %>% build_plot_time_series_single_sampling(
    diagnostic,
    selection,
    "0.1",
    response
  )

  title <- ggdraw() +
    draw_label(
      paste0(diagnostic, " - ", selection),
      fontface = 'bold',
      x = 0,
      hjust = 0
    ) +
    theme(
      # add margin on the left of the drawing canvas,
      # so title is aligned with left edge of first plot
      plot.margin = margin(0, 0, 0, 7)
    )

  plot <- plot_grid(
    title,
    p_01 + labs(title = "1% subsampling") + theme(legend.position = "none"),
    p_10 + labs(title = "10% subsampling") + theme(legend.position = "bottom"),
    nrow = 3,
    ncol = 1,
    rel_heights = c(0.075, 1, 1)
  )

  return(plot)
}

5.3 Aggregate score

5.3.1 Final - Lexicase selection

p <- summary_data %>% build_plot_summary_data(
  "exploitation-rate",
  "lexicase",
  "elite_true_agg_score"
)

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

ggsave(
  filename = paste0(plot_directory, "exploit-score-final-lex.pdf"),
  plot = p + labs(title = "Exploitation rate - Lexicase selection"),
  width = 15,
  height = 10
)

## Warning: Using the `size` aesthetic with geom_polygon was deprecated in ggplot2 3.4.0.
## ℹ Please use the `linewidth` aesthetic instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

5.3.2 Final - Tournament selection

p <- summary_data %>% build_plot_summary_data(
  "exploitation-rate",
  "tournament",
  "elite_true_agg_score"
)
ggsave(
  filename = paste0(plot_directory, "exploit-score-final-tourn.pdf"),
  plot = p + labs(title = "Exploitation rate - Tournament selection"),
  width = 15,
  height = 10
)

5.3.3 Statistical analysis

First, we’ll create a table of median / mean values for easy reference.

exploit_summary_data %>%
  group_by(DIAGNOSTIC, SELECTION, evals_per_gen, eval_label) %>%
  summarize(
    score_median = median(elite_true_agg_score),
    score_mean = mean(elite_true_agg_score),
    replicates = n()
  ) %>%
  kable()

## `summarise()` has grouped output by 'DIAGNOSTIC', 'SELECTION', 'evals_per_gen'.
## You can override using the `.groups` argument.

DIAGNOSTIC	SELECTION	evals_per_gen	eval_label	score_median	score_mean	replicates
exploitation-rate	lexicase	0.01	down-sample	9933.1800	9933.2455	20
exploitation-rate	lexicase	0.01	down-sample-ancestor	920.1625	913.6102	20
exploitation-rate	lexicase	0.01	indiv-rand-sample	2117.1200	2137.2725	20
exploitation-rate	lexicase	0.01	phylo-informed-sample	2157.9350	2162.8605	20
exploitation-rate	lexicase	0.1	down-sample	9967.3500	9968.1275	20
exploitation-rate	lexicase	0.1	down-sample-ancestor	6976.3600	6985.9325	20
exploitation-rate	lexicase	0.1	indiv-rand-sample	9360.5800	9360.2230	20
exploitation-rate	lexicase	0.1	phylo-informed-sample	9301.3500	9308.4105	20
exploitation-rate	lexicase	1	full	9981.7200	9982.2910	20
exploitation-rate	tournament	0.01	down-sample	9650.1650	9650.6660	20
exploitation-rate	tournament	0.01	down-sample-ancestor	1023.4150	1011.8228	20
exploitation-rate	tournament	0.01	indiv-rand-sample	9969.7650	9969.2945	20
exploitation-rate	tournament	0.01	phylo-informed-sample	9970.8950	9970.1455	20
exploitation-rate	tournament	0.1	down-sample	9972.3050	9972.0210	20
exploitation-rate	tournament	0.1	down-sample-ancestor	9988.9200	9988.9365	20
exploitation-rate	tournament	0.1	indiv-rand-sample	9999.8250	9999.8240	20
exploitation-rate	tournament	0.1	phylo-informed-sample	9999.7700	9999.7800	20
exploitation-rate	tournament	1	full	10000.0000	10000.0000	20

Next, we run a Kruskal-Wallis test to check for differences. For these tests, we only compare within a single subsampling level (evals_per_gen) and within the same selection scheme.

kw_test <- exploit_summary_data %>%
  filter(eval_label != "full") %>%
  group_by(SELECTION, evals_per_gen) %>%
  kruskal_test(elite_true_agg_score ~ eval_label) %>%
  mutate(sig = (p < 0.05)) %>%
  unite(
    "comparison_group",
    SELECTION,
    evals_per_gen,
    sep = "_",
    remove = FALSE
  )
kable(kw_test)

comparison_group	SELECTION	evals_per_gen	.y.	n	statistic	df	method	sig
lexicase_0.01	lexicase	0.01	elite_true_agg_score	80	67.04167	3	Kruskal-Wallis	TRUE
lexicase_0.1	lexicase	0.1	elite_true_agg_score	80	68.10074	3	Kruskal-Wallis	TRUE
tournament_0.01	tournament	0.01	elite_true_agg_score	80	66.76541	3	Kruskal-Wallis	TRUE
tournament_0.1	tournament	0.1	elite_true_agg_score	80	67.17274	3	Kruskal-Wallis	TRUE

Perform pairwise wilcoxon rank-sum tests for all significant comparison groups.

# Grab group names of significant comparisons
sig_kw_groups <- filter(kw_test, p < 0.05)$comparison_group

wrs_test <- exploit_summary_data %>%
  unite(
    "comparison_group",
    SELECTION,
    evals_per_gen,
    sep = "_",
    remove = FALSE
  ) %>%
  filter(
    eval_label != "full" & comparison_group %in% sig_kw_groups
  ) %>%
  group_by(SELECTION, evals_per_gen) %>%
  pairwise_wilcox_test(elite_true_agg_score ~ eval_label) %>%
  adjust_pvalue(method = "holm") %>%
  add_significance("p.adj")

kable(wrs_test)

SELECTION	evals_per_gen	.y.	group1	group2	n1	n2	statistic	p	p.adj	p.adj.signif
lexicase	0.01	elite_true_agg_score	down-sample	down-sample-ancestor	20	20	400	0.00e+00	0.00e+00	****
lexicase	0.01	elite_true_agg_score	down-sample	indiv-rand-sample	20	20	400	0.00e+00	0.00e+00	****
lexicase	0.01	elite_true_agg_score	down-sample	phylo-informed-sample	20	20	400	0.00e+00	0.00e+00	****
lexicase	0.01	elite_true_agg_score	down-sample-ancestor	indiv-rand-sample	20	20	0	0.00e+00	0.00e+00	****
lexicase	0.01	elite_true_agg_score	down-sample-ancestor	phylo-informed-sample	20	20	0	0.00e+00	0.00e+00	****
lexicase	0.01	elite_true_agg_score	indiv-rand-sample	phylo-informed-sample	20	20	155	2.31e-01	5.13e-01	ns
lexicase	0.1	elite_true_agg_score	down-sample	down-sample-ancestor	20	20	400	0.00e+00	0.00e+00	****
lexicase	0.1	elite_true_agg_score	down-sample	indiv-rand-sample	20	20	400	0.00e+00	0.00e+00	****
lexicase	0.1	elite_true_agg_score	down-sample	phylo-informed-sample	20	20	400	0.00e+00	0.00e+00	****
lexicase	0.1	elite_true_agg_score	down-sample-ancestor	indiv-rand-sample	20	20	0	0.00e+00	0.00e+00	****
lexicase	0.1	elite_true_agg_score	down-sample-ancestor	phylo-informed-sample	20	20	0	0.00e+00	0.00e+00	****
lexicase	0.1	elite_true_agg_score	indiv-rand-sample	phylo-informed-sample	20	20	288	1.70e-02	6.80e-02	ns
tournament	0.01	elite_true_agg_score	down-sample	down-sample-ancestor	20	20	400	1.00e-07	7.00e-07	****
tournament	0.01	elite_true_agg_score	down-sample	indiv-rand-sample	20	20	0	0.00e+00	0.00e+00	****
tournament	0.01	elite_true_agg_score	down-sample	phylo-informed-sample	20	20	0	0.00e+00	0.00e+00	****
tournament	0.01	elite_true_agg_score	down-sample-ancestor	indiv-rand-sample	20	20	0	1.00e-07	7.00e-07	****
tournament	0.01	elite_true_agg_score	down-sample-ancestor	phylo-informed-sample	20	20	0	1.00e-07	7.00e-07	****
tournament	0.01	elite_true_agg_score	indiv-rand-sample	phylo-informed-sample	20	20	177	5.47e-01	5.47e-01	ns
tournament	0.1	elite_true_agg_score	down-sample	down-sample-ancestor	20	20	0	0.00e+00	0.00e+00	****
tournament	0.1	elite_true_agg_score	down-sample	indiv-rand-sample	20	20	0	1.00e-07	7.00e-07	****
tournament	0.1	elite_true_agg_score	down-sample	phylo-informed-sample	20	20	0	1.00e-07	7.00e-07	****
tournament	0.1	elite_true_agg_score	down-sample-ancestor	indiv-rand-sample	20	20	0	1.00e-07	7.00e-07	****
tournament	0.1	elite_true_agg_score	down-sample-ancestor	phylo-informed-sample	20	20	0	1.00e-07	7.00e-07	****
tournament	0.1	elite_true_agg_score	indiv-rand-sample	phylo-informed-sample	20	20	251	1.71e-01	5.13e-01	ns

5.3.4 Over time - Lexicase

p <- ts_data  %>% build_plot_time_series(
  "exploitation-rate",
  "lexicase",
  "max_agg_score"
)
ggsave(
  filename = paste0(plot_directory, "exploit-score-ts-lex.pdf"),
  plot = p,
  width = 15,
  height = 10
)

5.3.5 Over time - Tournament

p <- ts_data  %>% build_plot_time_series(
  "exploitation-rate",
  "tournament",
  "max_agg_score"
)
ggsave(
  filename = paste0(plot_directory, "exploit-score-ts-tourn.pdf"),
  plot = p,
  width = 15,
  height = 10
)

5.4 Number unique individual selected

build_plot_summary_data(
  ts_summary_data,
  focal_diagnostic,
  "lexicase",
  "avg_num_unique_selected"
)

Average number selected by standard lexicase?

mean(filter(
  ts_summary_data,
  SELECTION == "lexicase" &
  DIAGNOSTIC == "exploitation-rate" &
  evals_per_gen == "1"
)$avg_num_unique_selected)

## [1] 216.6185

mean(filter(
  ts_summary_data,
  SELECTION == "lexicase" &
  DIAGNOSTIC == "exploitation-rate" &
  evals_per_gen == "0.1",
  eval_label == "down-sample"
)$avg_num_unique_selected)

## [1] 28.099

mean(filter(
  ts_summary_data,
  SELECTION == "lexicase" &
  DIAGNOSTIC == "exploitation-rate" &
  evals_per_gen == "0.01",
  eval_label == "down-sample"
)$avg_num_unique_selected)

## [1] 120.7455

5.5 Manuscript figures

Figures customized / cleaned up for the manuscript.

build_final_score_manuscript_plot <- function(
  selection,
  subsample_rate
) {
  # Extract median values for max aggregate score at same evaluation level
  # as sampling regimes
  max_eval <- max(
    filter(exploit_ts_data, evals_per_gen == subsample_rate)$evaluations
  )
  full_eval_steps <- as.numeric(
    levels(
      as.factor(
        filter(exploit_ts_data, eval_label == "full" & evaluations >= max_eval)$evaluations # nolint: line_length_linter.
      )
    )
  )
  full_eval <- full_eval_steps[which.min( full_eval_steps  - max_eval )]
  full_median_score_evals <- median(
    filter(
      exploit_ts_data,
      SELECTION == selection & eval_label == "full" & evaluations == full_eval
    )$max_agg_score
  )

  plot <- exploit_summary_data %>%
    filter(
      eval_label != "full" &
      SELECTION == selection &
      evals_per_gen == subsample_rate
    ) %>%
    ggplot(
      aes(
        x = eval_label,
        y = elite_true_agg_score,
        fill = eval_label
      )
    ) +
    geom_hline(
      yintercept = full_median_score_evals,
      size = 1.0,
      alpha = 0.7,
      color = "black",
      linetype="dashed"
    ) +
    geom_flat_violin(
      position = position_nudge(x = .2, y = 0),
      alpha = .8,
      adjust = 1.5
    ) +
    geom_point(
      mapping = aes(color = eval_label),
      position = position_jitter(width = .15),
      size = .5,
      alpha = 0.8
    ) +
    geom_boxplot(
      width = .1,
      outlier.shape = NA,
      alpha = 0.5
    ) +
    scale_y_continuous(
      name = "Aggregate score",
      limits = c(0, 10010)
    ) +
    scale_x_discrete(
      name = "Subsampling regimes",
      breaks = c("down-sample", "down-sample-ancestor", "indiv-rand-sample", "phylo-informed-sample"),
      labels = c("DS", "DS+EST", "IRS", "ABS")
    ) +
    scale_fill_bright() +
    scale_color_bright() +
    theme(
      legend.position = "none",
      # axis.text.x = element_text(
      #   angle = 30,
      #   hjust = 1
      # ),
    )
  return(plot)
}

Build time series manuscript plot:

build_score_over_time_manuscript_plot <- function(
  selection,
  subsample_rate
) {

  max_eval <- max(
    filter(exploit_ts_data, evals_per_gen == subsample_rate)$evaluations
  )
  full_eval_steps <- as.numeric(
    levels(
      as.factor(
        filter(exploit_ts_data, eval_label == "full" & evaluations >= max_eval)$evaluations # nolint: line_length_linter.
      )
    )
  )
  full_eval <- full_eval_steps[which.min( full_eval_steps  - max_eval )]

  data <- exploit_ts_data %>%
    filter(
      SELECTION == selection &
      evals_per_gen == subsample_rate
    ) %>%
    mutate(
      sampling_level_label = subsample_rate
    )

  full_diag_data <- exploit_ts_data %>%
    filter(
      SELECTION == selection & eval_label == "full" & evaluations <= full_eval
    ) %>%
    mutate(
      # Ensure that median line will sit in same facet
      sampling_level_label = subsample_rate
    )

  plot <- data %>%
    filter(
      eval_label != "full"
    ) %>%
    ggplot(
      aes(
        x = evaluations,
        y = max_agg_score
      )
    ) +
    stat_summary(
      geom = "line",
      fun = mean,
      aes(
        color = eval_label
      )
    ) +
    stat_summary(
      geom = "ribbon",
      fun.data = "mean_cl_boot",
      fun.args = list(conf.int = 0.95),
      alpha = 0.2,
      linetype = 0,
      aes(
        color = eval_label,
        fill = eval_label
      )
    ) +
    scale_y_continuous(
      name = "Aggregate score",
      limits = c(0, 10010)
    ) +
    scale_x_continuous(
      name = "Evaluations"
    ) +
    scale_fill_bright(
      labels=c(
        "Down-sampling (DS), no estimation",
        "Down-sampling + Estimation (DS+EST)",
        "Individualized random sampling (IRS)",
        "Ancestor-based sampling (ABS)"
      )
    ) +
    scale_color_bright(
      labels=c(
        "Down-sampling (DS), no estimation",
        "Down-sampling + Estimation (DS+EST)",
        "Individualized random sampling (IRS)",
        "Ancestor-based sampling (ABS)"
      )
    ) +
    theme(
      legend.position = "none"
    ) +
    stat_summary(
      data = full_diag_data,
      geom = "line",
      fun = median,
      linetype = "dashed",
      color = "black"
    )

  return(plot)
}

Build plots of final scores (after fixed number of evaluations)

plot_final_lex_01 <- build_final_score_manuscript_plot(
  "lexicase",
  "0.01"
)
plot_final_lex_10 <- build_final_score_manuscript_plot(
  "lexicase",
  "0.1"
)
plot_final_tourn_01 <- build_final_score_manuscript_plot(
  "tournament",
  "0.01"
)
plot_final_tourn_10 <- build_final_score_manuscript_plot(
  "tournament",
  "0.1"
)

Build time series plots (with evaluations on x-axis)

plot_ts_lex_01 <- build_score_over_time_manuscript_plot(
  "lexicase",
  "0.01"
)

plot_ts_lex_10 <- build_score_over_time_manuscript_plot(
  "lexicase",
  "0.1"
)

plot_ts_tourn_01 <- build_score_over_time_manuscript_plot(
  "tournament",
  "0.01"
)

plot_ts_tourn_10 <- build_score_over_time_manuscript_plot(
  "tournament",
  "0.1"
)

5.5.1 Lexicase selection manuscript figure

txt_size <- 16
legend <- get_legend(
  plot_ts_lex_01 +
    guides(
      color = guide_legend(nrow = 2, title = "Subsampling regime:"),
      fill = guide_legend(nrow = 2, title = "Subsampling regime:")
    ) +
    theme(
      legend.position = "bottom",
      legend.text = element_text(size = txt_size - 2),
      legend.title = element_text(size = txt_size)
    )
)

grid <- plot_grid(
  plot_ts_lex_01 +
    labs(title = "1% Subsampling") +
    theme(
      axis.text.x = element_text(size = txt_size-2),
      axis.text.y = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size),
      axis.title.y = element_text(size = txt_size)
    ),
  plot_final_lex_01 +
    theme(
      axis.text.y = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(0, 0, 0, 1, "cm"),
      axis.text.x = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size)
    ),
  plot_ts_lex_10 +
    labs(title = "10% Subsampling") +
    theme(
      axis.text.x = element_text(size = txt_size-2),
      axis.text.y = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size),
      axis.title.y = element_text(size = txt_size)
    ),
  plot_final_lex_10 +
    theme(
      axis.text.y = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(0, 0, 0, 1, "cm"),
      axis.text.x = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size)
    ),
  nrow = 2,
  ncol = 2,
  align = "h",
  labels = c("a", "b", "c", "d"),
  label_size = 18,
  rel_widths = c(1.3, 1, 1.3, 1)
)

grid

lex_fig <- plot_grid(
  grid,
  legend,
  nrow = 2,
  ncol = 1,
  rel_heights = c(1, 0.05)
)

# lex_fig
save_plot(
  filename = paste0(plot_directory, "2023-12-28-exploit-lex-fig.pdf"),
  plot = lex_fig,
  base_width = 10,
  base_height = 8,
  dpi = 600
)

5.5.2 Tournament selection manuscript figures

legend <- get_legend(
  plot_ts_tourn_01 +
    guides(
      color = guide_legend(nrow = 2, title = "Subsampling regime:"),
      fill = guide_legend(nrow = 2, title = "Subsampling regime:")
    ) +
    theme(
      legend.position = "bottom",
      legend.text = element_text(size = txt_size - 2),
      legend.title = element_text(size = txt_size)
    )
)

grid <- plot_grid(
  plot_ts_tourn_01 +
    labs(title = "1% Subsampling") +
    theme(
      axis.text.x = element_text(size = txt_size-2),
      axis.text.y = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size),
      axis.title.y = element_text(size = txt_size)
    ),
  plot_final_tourn_01 +
    theme(
      axis.text.y = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(0, 0, 0, 1, "cm"),
      axis.text.x = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size)
    ),
  plot_ts_tourn_10 +
    labs(title = "10% Subsampling") +
    theme(
      axis.text.x = element_text(size = txt_size-2),
      axis.text.y = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size),
      axis.title.y = element_text(size = txt_size)
    ),
  plot_final_tourn_10 +
    theme(
      axis.text.y = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(0, 0, 0, 1, "cm"),
      axis.text.x = element_text(size = txt_size),
      axis.title.x = element_text(size = txt_size)
    ),
  nrow = 2,
  ncol = 2,
  align = "h",
  labels = c("a", "b", "c", "d"),
  label_size = 18,
  rel_widths = c(1.3, 1, 1.3, 1)
)

tourn_fig <- plot_grid(
  grid,
  legend,
  nrow = 2,
  ncol = 1,
  rel_heights = c(1, 0.05)
)

tourn_fig

save_plot(
  filename = paste0(plot_directory, "2023-12-28-exploit-tourn-fig.pdf"),
  plot = tourn_fig,
  base_width = 10,
  base_height = 8,
  dpi = 600
)