Chapter 7 Directed digital evolution experiment

Supplemental information and data analyses for the initial (baseline) directed evolution modeling experiment.

7.1 Overview

experiment_slug <- "2021-11-11-selection"

working_directory <- paste0("experiments/",experiment_slug,"/analysis/")

7.2 Analysis dependencies

Load all required R libraries

library(tidyverse)
library(ggplot2)
library(cowplot)
library(RColorBrewer)
library(khroma)
source("https://gist.githubusercontent.com/benmarwick/2a1bb0133ff568cbe28d/raw/fb53bd97121f7f9ce947837ef1a4c65a73bffb3f/geom_flat_violin.R")

These analyses were knit with the following environment:

print(version)

##                _                           
## platform       x86_64-pc-linux-gnu         
## arch           x86_64                      
## os             linux-gnu                   
## system         x86_64, linux-gnu           
## status                                     
## major          4                           
## minor          2.1                         
## year           2022                        
## month          06                          
## day            23                          
## svn rev        82513                       
## language       R                           
## version.string R version 4.2.1 (2022-06-23)
## nickname       Funny-Looking Kid

7.3 Setup

Load experiment summary data.

exp_summary_data_loc <- paste0(working_directory,"data/experiment_summary.csv")
exp_summary_data <- read.csv(exp_summary_data_loc, na.strings="NONE")

exp_summary_data$SELECTION_METHOD <- factor(
  exp_summary_data$SELECTION_METHOD,
  levels=c(
    "elite",
    "elite-10",
    "tournament",
    "lexicase",
    "non-dominated-elite",
    "random",
    "none"
  ),
  labels=c(
    "elite",
    "elite-10",
    "tourn",
    "lex",
    "nde",
    "random",
    "none"
  )
)

Load time series data.

times_series_data_loc <- paste0(working_directory,"data/evaluation_time_series_corrected.csv")
times_series_data <- read.csv(times_series_data_loc, na.strings="NONE")

# Specify experimental condition for each datum.
times_series_data$SELECTION_METHOD <- factor(
  times_series_data$SELECTION_METHOD,
  levels=c(
    "elite",
    "elite-10",
    "tournament",
    "lexicase",
    "non-dominated-elite",
    "non-dominated-tournament",
    "random",
    "none"
  ),
  labels=c(
    "elite",
    "elite-10",
    "tourn",
    "lex",
    "nde",
    "ndt",
    "random",
    "none"
  )
)

times_series_data$epoch_offset <- times_series_data$epoch+1

Load task coverage per population data.

task_coverage_per_pop_data_loc <- paste0(working_directory,"data/max_coverage_per_pop_cnt.csv")
task_coverage_per_pop_data <- read.csv(task_coverage_per_pop_data_loc, na.strings="NONE")

# Specify experimental condition for each datum.
task_coverage_per_pop_data$SELECTION_METHOD <- factor(
  task_coverage_per_pop_data$SELECTION_METHOD,
  levels=c(
    "elite",
    "elite-10",
    "tournament",
    "lexicase",
    "non-dominated-elite",
    "non-dominated-tournament",
    "random",
    "none"
  ),
  labels=c(
    "elite",
    "elite-10",
    "tourn",
    "lex",
    "nde",
    "ndt",
    "random",
    "none"
  )
)

Miscellaneous setup

# Configure our default graphing theme
theme_set(theme_cowplot())
# Palette
scale_fill_fun <- scale_fill_bright
scale_color_fun <- scale_color_bright
alpha <- 0.05
# Create a directory to store plots
plot_directory <- paste0(working_directory, "plots/")
dir.create(plot_directory, showWarnings=FALSE)

p_label <- function(p_value) {
  threshold = 0.0001
  if (p_value < threshold) {
    return(paste0("p < ", threshold))
  } else {
    return(paste0("p = ", p_value))
  }
}


selection_method_breaks <- c("elite", "elite-10", "tourn", "lex", "nde", "random", "none")
selection_method_labels <- c("ELITE", "TOP-10", "TOURN", "LEX", "NDE", "RAND", "NONE")

7.4 Best single-population task coverage

Best single-population task coverage at the end of the experiment.

max_trait_cov_fig <-
  ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=max_trait_coverage,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8,
    adjust=1.5
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(height=0.1, width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="Task Coverage",
    limits=c(-0.5,18.5),
    breaks=seq(0,18,2)
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun() +
  scale_color_fun() +
  theme(
    legend.position="none"
  )

max_trait_cov_fig

ggsave(
  plot=max_trait_cov_fig,
  paste0(plot_directory, "max_trait_coverage.pdf")
)

## Saving 7 x 5 in image

Statistical results:

kruskal.test(
  formula=max_trait_coverage~SELECTION_METHOD,
  data=exp_summary_data
)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  max_trait_coverage by SELECTION_METHOD
## Kruskal-Wallis chi-squared = 244.55, df = 6, p-value < 2.2e-16

# Kruskal-wallis is significant, so we do a post-hoc wilcoxon rank-sum.
pairwise.wilcox.test(
  x=exp_summary_data$max_trait_coverage,
  g=exp_summary_data$SELECTION_METHOD,
  p.adjust.method="bonferroni",
)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  exp_summary_data$max_trait_coverage and exp_summary_data$SELECTION_METHOD 
## 
##          elite   elite-10 tourn   lex     nde     random
## elite-10 1.0000  -        -       -       -       -     
## tourn    0.0404  0.0167   -       -       -       -     
## lex      2.3e-11 9.6e-10  3.2e-14 -       -       -     
## nde      0.0001  0.0234   6.8e-10 2.2e-07 -       -     
## random   1.2e-14 6.2e-13  1.7e-10 < 2e-16 < 2e-16 -     
## none     1.0e-14 4.8e-12  1.1e-08 < 2e-16 < 2e-16 0.0392
## 
## P value adjustment method: bonferroni

7.4.1 Best single-population task coverage time series

To speed up graphing, we plot a low-resolution version of the time series.

max_trait_cov_ot_fig <-
  ggplot(
    # times_series_data,
    filter(times_series_data, (epoch_offset%%100)==0 | epoch_offset==1),
    aes(
      x=epoch_offset,
      y=max_trait_coverage,
      fill=SELECTION_METHOD,
      color=SELECTION_METHOD
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_x_continuous(
    name="Cycles"
  ) +
  scale_y_continuous(
    name="Task Coverage",
    limits=c(-0.5,18.5),
    breaks=seq(0,18,2)
  ) +
  scale_fill_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  theme(
    legend.position="none"
  )
max_trait_cov_ot_fig

## Warning: Computation failed in `stat_summary()`:

ggsave(
  plot=max_trait_cov_ot_fig,
  filename=paste0(plot_directory, "2021-11-11-best-pop-task-cov-ts.pdf"),
  width=10,
  height=6
)

## Warning: Computation failed in `stat_summary()`:

7.4.1.1 First 30 cycles of the experiment

max_trait_cov_ot_early_fig <-
  ggplot(
    # times_series_data,
    filter(times_series_data, (epoch_offset <= 30)),
    aes(
      x=epoch_offset,
      y=max_trait_coverage,
      fill=SELECTION_METHOD,
      color=SELECTION_METHOD
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_x_continuous(
    name="Cycles"
  ) +
  scale_y_continuous(
    name="Task Coverage",
    limits=c(-0.5,18.5),
    breaks=seq(0,18,2)
  ) +
  scale_fill_fun(
    name="Selection Method",

    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",

    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  theme(
    legend.position="bottom"
  )
max_trait_cov_ot_early_fig

## Warning: Computation failed in `stat_summary()`:

ggsave(
  filename=paste0(plot_directory, "2021-11-11-best-pop-task-cov-ts-early.pdf"),
  width=10,
  height=6
)

## Warning: Computation failed in `stat_summary()`:

After just 10 cycles, we observed significant gains from using NDE and LEX selection protocols.

early_data <- filter(times_series_data, epoch_offset==10)
kruskal.test(
  formula=max_trait_coverage~SELECTION_METHOD,
  data=early_data
)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  max_trait_coverage by SELECTION_METHOD
## Kruskal-Wallis chi-squared = 166.2, df = 6, p-value < 2.2e-16

# Kruskal-wallis is significant, so we do a post-hoc wilcoxon rank-sum.
pairwise.wilcox.test(
  x=early_data$max_trait_coverage,
  g=early_data$SELECTION_METHOD,
  p.adjust.method="bonferroni",
)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  early_data$max_trait_coverage and early_data$SELECTION_METHOD 
## 
##          elite   elite-10 tourn   lex     nde     random 
## elite-10 1.00000 -        -       -       -       -      
## tourn    1.00000 1.00000  -       -       -       -      
## lex      2.1e-06 0.00087  2.3e-06 -       -       -      
## nde      0.14106 1.00000  0.63684 0.00709 -       -      
## random   3.3e-07 5.5e-10  5.2e-11 1.2e-13 1.7e-11 -      
## none     2.0e-06 3.2e-09  3.9e-10 3.6e-13 9.5e-11 1.00000
## 
## P value adjustment method: bonferroni

7.5 Metapopulation task coverage

Metapopulation task coverage at the end of the experiment.

total_trait_cov_fig <-
  ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=total_trait_coverage,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8,
    adjust=1.5
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(height=0.1, width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="Task Coverage",
    limits=c(-0.5,18.5),
    breaks=seq(0,18,2)
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun(
  ) +
  scale_color_fun(
  ) +
  theme(
    legend.position="none"
  )

total_trait_cov_fig

ggsave(
  plot=total_trait_cov_fig,
  paste0(plot_directory, "2021-11-11-metapop-task-cov.pdf")
)

## Saving 7 x 5 in image

Statistical results:

kruskal.test(
  formula=total_trait_coverage~SELECTION_METHOD,
  data=exp_summary_data
)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  total_trait_coverage by SELECTION_METHOD
## Kruskal-Wallis chi-squared = 244.66, df = 6, p-value < 2.2e-16

# Kruskal-wallis is significant, so we do a post-hoc wilcoxon rank-sum.
pairwise.wilcox.test(
  x=exp_summary_data$total_trait_coverage,
  g=exp_summary_data$SELECTION_METHOD,
  p.adjust.method="bonferroni",
)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  exp_summary_data$total_trait_coverage and exp_summary_data$SELECTION_METHOD 
## 
##          elite   elite-10 tourn   lex     nde     random 
## elite-10 1.0000  -        -       -       -       -      
## tourn    0.0513  0.0079   -       -       -       -      
## lex      < 2e-16 < 2e-16  < 2e-16 -       -       -      
## nde      1.5e-07 1.1e-05  2.1e-13 < 2e-16 -       -      
## random   8.3e-12 5.9e-11  6.1e-07 < 2e-16 < 2e-16 -      
## none     0.0261  0.0017   1.0000  < 2e-16 6.7e-16 5.4e-10
## 
## P value adjustment method: bonferroni

# All sig p<0.5, except tourn vs none

7.5.1 Metapopulation task coverage time series

To speed up plotting, we graph a low-resolution version of this time series.

metapop_task_cov_ot_fig <-
  ggplot(
    # times_series_data,
    filter(times_series_data, (epoch_offset%%100)==0 | epoch_offset==1),
    aes(
      x=epoch_offset,
      y=total_trait_coverage,
      fill=SELECTION_METHOD,
      color=SELECTION_METHOD
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
    scale_x_continuous(
    name="Cycles"
  ) +
  scale_y_continuous(
    name="Task Coverage",
    limits=c(-0.5,18.5),
    breaks=seq(0,18,2)
  ) +
  scale_fill_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  theme(
    legend.position="bottom"
  )
metapop_task_cov_ot_fig

## Warning: Computation failed in `stat_summary()`:

ggsave(
  plot=metapop_task_cov_ot_fig,
  filename=paste0(plot_directory, "2021-11-11-metapop-task-cov-ts.pdf"),
  width=10,
  height=6
)

## Warning: Computation failed in `stat_summary()`:

7.5.1.1 First 30 cycles of the experiment

metapop_task_cov_ot_early_fig <-
  ggplot(
    # times_series_data,
    filter(times_series_data, (epoch_offset <= 30)),
    aes(
      x=epoch_offset,
      y=total_trait_coverage,
      fill=SELECTION_METHOD,
      color=SELECTION_METHOD
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_x_continuous(
    name="Cycles"
  ) +
  scale_y_continuous(
    name="Task Coverage",
    limits=c(-0.5,18.5),
    breaks=seq(0,18,2)
  ) +
  scale_fill_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  theme(
    legend.position="bottom"
  )
metapop_task_cov_ot_early_fig

## Warning: Computation failed in `stat_summary()`:

ggsave(
  filename=paste0(plot_directory, "2021-11-11-metapop-task-cov-ts-early.pdf"),
  width=10,
  height=6
)

## Warning: Computation failed in `stat_summary()`:

After just 10 cycles, we observed significant gains from using NDE and LEX selection protocols.

early_data <- filter(times_series_data, epoch_offset==10)
kruskal.test(
  formula=total_trait_coverage~SELECTION_METHOD,
  data=early_data
)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  total_trait_coverage by SELECTION_METHOD
## Kruskal-Wallis chi-squared = 180.78, df = 6, p-value < 2.2e-16

# Kruskal-wallis is significant, so we do a post-hoc wilcoxon rank-sum.
pairwise.wilcox.test(
  x=early_data$total_trait_coverage,
  g=early_data$SELECTION_METHOD,
  p.adjust.method="bonferroni",
)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  early_data$total_trait_coverage and early_data$SELECTION_METHOD 
## 
##          elite   elite-10 tourn   lex     nde     random 
## elite-10 0.06728 -        -       -       -       -      
## tourn    0.01593 1.00000  -       -       -       -      
## lex      3.7e-14 9.8e-11  1.3e-11 -       -       -      
## nde      1.7e-12 1.9e-07  1.8e-08 0.03737 -       -      
## random   1.00000 0.00627  0.00051 7.7e-16 4.9e-15 -      
## none     1.00000 0.93717  0.28402 8.7e-15 2.6e-13 0.38944
## 
## P value adjustment method: bonferroni

7.6 Metapopulation task profile diversity

We measured the “phenotypic” diversity within evolved metapopulations in three ways:

the number of task profiles (richness)
the spread of task profiles as the average cosine distance from the centroid profile
the Shannon entropy of task profiles

7.6.1 Number of different task profiles

num_pop_task_profiles_fig <-
  ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=num_pop_trait_profiles,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(height=0, width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="# Different Task Profiles"
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun(
  ) +
  scale_color_fun(
  ) +
  theme(
    legend.position="none"
  )
num_pop_task_profiles_fig

ggsave(
  plot=num_pop_task_profiles_fig,
  paste0(plot_directory, "2021-11-11-num-task-profiles.pdf")
)

## Saving 7 x 5 in image

Statistical results:

kruskal.test(
  formula=num_pop_trait_profiles~SELECTION_METHOD,
  data=exp_summary_data
)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  num_pop_trait_profiles by SELECTION_METHOD
## Kruskal-Wallis chi-squared = 239.62, df = 6, p-value < 2.2e-16

# Kruskal-wallis is significant, so we do a post-hoc wilcoxon rank-sum.
pairwise.wilcox.test(
  x=exp_summary_data$num_pop_trait_profiles,
  g=exp_summary_data$SELECTION_METHOD,
  p.adjust.method="bonferroni",
)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  exp_summary_data$num_pop_trait_profiles and exp_summary_data$SELECTION_METHOD 
## 
##          elite   elite-10 tourn   lex     nde     random 
## elite-10 1       -        -       -       -       -      
## tourn    1       1        -       -       -       -      
## lex      < 2e-16 < 2e-16  < 2e-16 -       -       -      
## nde      5.3e-16 < 2e-16  < 2e-16 3.2e-06 -       -      
## random   1       1        1       < 2e-16 < 2e-16 -      
## none     6.2e-06 2.2e-06  5.1e-06 < 2e-16 < 2e-16 1.5e-07
## 
## P value adjustment method: bonferroni

7.6.1.1 Number of different task profiles over time

num_task_profiles_ot_fig <-
  ggplot(
    filter(times_series_data, (updates_elapsed%%10000)==0 | updates_elapsed==1),
    aes(
      x=updates_elapsed,
      y=num_pop_trait_profiles,
      fill=SELECTION_METHOD,
      color=SELECTION_METHOD
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_x_continuous(
    name="Updates elapsed"
  ) +
  scale_y_continuous(
    name="# Different Task Profiles"
  ) +
  scale_fill_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  theme(
    legend.position="bottom"
  )
num_task_profiles_ot_fig

## Warning: Computation failed in `stat_summary()`:

ggsave(
  num_task_profiles_ot_fig,
  filename=paste0(plot_directory, "2021-11-11-num-task-profiles-ts.png"),
  width=10,
  height=6
)

## Warning: Computation failed in `stat_summary()`:

7.6.2 Task profile spread

task_profile_spread_fig <-
  ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=avg_cosine_dist_from_centroid,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(height=0, width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="Avg. Task Spread"
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun(
  ) +
  scale_color_fun(
  ) +
  theme(
    legend.position="none"
  )
task_profile_spread_fig

ggsave(
  plot=task_profile_spread_fig,
  paste0(plot_directory, "2021-11-11-task-profile-spread.pdf")
)

## Saving 7 x 5 in image

Statistical results:

kruskal.test(
  formula=avg_cosine_dist_from_centroid~SELECTION_METHOD,
  data=exp_summary_data
)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  avg_cosine_dist_from_centroid by SELECTION_METHOD
## Kruskal-Wallis chi-squared = 292.39, df = 6, p-value < 2.2e-16

# Kruskal-wallis is significant, so we do a post-hoc wilcoxon rank-sum.
pairwise.wilcox.test(
  x=exp_summary_data$avg_cosine_dist_from_centroid,
  g=exp_summary_data$SELECTION_METHOD,
  p.adjust.method="bonferroni",
)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  exp_summary_data$avg_cosine_dist_from_centroid and exp_summary_data$SELECTION_METHOD 
## 
##          elite   elite-10 tourn   lex     nde     random 
## elite-10 1.00000 -        -       -       -       -      
## tourn    0.00069 0.13725  -       -       -       -      
## lex      < 2e-16 < 2e-16  < 2e-16 -       -       -      
## nde      < 2e-16 < 2e-16  < 2e-16 2.5e-16 -       -      
## random   1.8e-06 7.7e-08  1.0e-10 < 2e-16 8.5e-13 -      
## none     < 2e-16 < 2e-16  < 2e-16 < 2e-16 1.00000 2.6e-14
## 
## P value adjustment method: bonferroni

7.6.3 Task profile entropy

ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=pop_trait_profile_entropy,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(height=0, width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="Shannon entropy of task profiles"
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun(
  ) +
  scale_color_fun(
  ) +
  theme(
    legend.position="none"
  )

ggsave(
  paste0(plot_directory, "2021-11-11-task-profile-entropy.pdf")
)

## Saving 7 x 5 in image

Statistical results:

kruskal.test(
  formula=pop_trait_profile_entropy~SELECTION_METHOD,
  data=exp_summary_data
)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  pop_trait_profile_entropy by SELECTION_METHOD
## Kruskal-Wallis chi-squared = 237.28, df = 6, p-value < 2.2e-16

pairwise.wilcox.test(
  x=exp_summary_data$pop_trait_profile_entropy,
  g=exp_summary_data$SELECTION_METHOD,
  p.adjust.method="bonferroni",
)

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  exp_summary_data$pop_trait_profile_entropy and exp_summary_data$SELECTION_METHOD 
## 
##          elite   elite-10 tourn   lex     nde     random 
## elite-10 1       -        -       -       -       -      
## tourn    1       1        -       -       -       -      
## lex      < 2e-16 < 2e-16  < 2e-16 -       -       -      
## nde      4.9e-16 4.1e-16  3.8e-16 1.3e-07 -       -      
## random   1       1        1       < 2e-16 < 2e-16 -      
## none     3.9e-05 2.2e-05  4.7e-08 < 2e-16 < 2e-16 9.1e-06
## 
## P value adjustment method: bonferroni

7.6.3.1 Task entropy over time

ggplot(
    filter(times_series_data, (updates_elapsed%%10000)==0 | updates_elapsed==1),
    aes(
      x=updates_elapsed,
      y=pop_trait_profile_entropy,
      fill=SELECTION_METHOD,
      color=SELECTION_METHOD
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_x_continuous(
    name="Updates elapsed"
  ) +
  scale_y_continuous(
    name="Task profile entropy"
  ) +
  scale_fill_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  theme(
    legend.position="bottom"
  )

## Warning: Computation failed in `stat_summary()`:

ggsave(
  filename=paste0(plot_directory, "2021-11-11-pop-trait-profile-entropy-ts.png"),
  width=10,
  height=6
)

## Warning: Computation failed in `stat_summary()`:

7.7 Task coverage per N populations

We analyzed the (maximum) number of tasks added to metapopulation task coverage for a given number (N) of member populations considered. That is, for each N, we solved the maximum set coverage problem for task coverage: what is the maximum number of tasks that can be covered given N populations from this metapopulation?

ggplot(
    task_coverage_per_pop_data,
    aes(
      x=n_pops,
      y=max_tasks_covered,
      fill=SELECTION_METHOD,
      color=SELECTION_METHOD
    )
  ) +
  stat_summary(geom="line", fun=mean) +
  stat_summary(
    geom="ribbon",
    fun.data="mean_cl_boot",
    fun.args=list(conf.int=0.95),
    alpha=0.2,
    linetype=0
  ) +
  scale_y_continuous(
    name="Maximum task coverage"
  ) +
  scale_x_continuous(
    name="Number of populations",
    limits=c(0, 15)
  ) +
  scale_fill_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  theme(
    legend.position="bottom"
  )

## Warning: Removed 28350 rows containing non-finite values (stat_summary).
## Removed 28350 rows containing non-finite values (stat_summary).

## Warning: Computation failed in `stat_summary()`:

ggsave(
  paste0(plot_directory, "2021-11-11-task-cov-per-n-pops.pdf"),
  width=10,
  height=6
)

## Warning: Removed 28350 rows containing non-finite values (stat_summary).

## Warning: Removed 28350 rows containing non-finite values (stat_summary).

## Warning: Computation failed in `stat_summary()`:

ggplot(
    filter(
      task_coverage_per_pop_data,
      n_pops > 0 & n_pops <= 5
    ),
    aes(
      x=SELECTION_METHOD,
      y=max_tasks_covered,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(height=0, width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="Maximum task coverage"
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  facet_wrap(
    ~n_pops,
    nrow=1,
    labeller=label_both
  ) +
  theme(
    legend.position="bottom",
    axis.text.x = element_blank()
  )

ggsave(
  paste0(plot_directory, "2021-11-11-task-cov-per-n-pops-alt.pdf"),
  width=10,
  height=6
)

7.8 Average number of different populations selected per generation

ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=avg_unique_selected,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(height=0, width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="Avg. number selected"
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_color_fun(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  theme(
    legend.position="none"
  )

ggsave(
  paste0(plot_directory, "2021-11-11-num-selected.pdf")
)

## Saving 7 x 5 in image

mean(filter(exp_summary_data, SELECTION_METHOD=="elite")$avg_unique_selected)

## [1] 1

mean(filter(exp_summary_data, SELECTION_METHOD=="elite-10")$avg_unique_selected)

## [1] 10

mean(filter(exp_summary_data, SELECTION_METHOD=="tourn")$avg_unique_selected)

## [1] 50.14939

mean(filter(exp_summary_data, SELECTION_METHOD=="nde")$avg_unique_selected)

## [1] 83.29294

mean(filter(exp_summary_data, SELECTION_METHOD=="lex")$avg_unique_selected)

## [1] 12.36869

mean(filter(exp_summary_data, SELECTION_METHOD=="random")$avg_unique_selected)

## [1] 60.87775

7.8.1 Entropy of selected population IDs

ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=avg_entropy_selected,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(height=0, width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_fill_fun(
  ) +
  scale_color_fun(
  ) +
  theme(
    legend.position="none"
  )

7.9 Average number of organisms in populations at end of maturation period

ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=avg_num_orgs,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="Average number of organisms",
    limits=c(950, 1000)
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun(
  ) +
  scale_color_fun(
  ) +
  theme(
    legend.position="none"
  )

ggsave(
  paste0(plot_directory, "2021-11-11-num-orgs.pdf")
)

## Saving 7 x 5 in image

7.10 Average generations per maturation period

ggplot(
    exp_summary_data,
    aes(
      x=SELECTION_METHOD,
      y=avg_gens,
      fill=SELECTION_METHOD
    )
  ) +
  geom_flat_violin(
    position = position_nudge(x = .2, y = 0),
    alpha = .8
  ) +
  geom_point(
    mapping=aes(color=SELECTION_METHOD),
    position = position_jitter(width = .15),
    size = .5,
    alpha = 0.8
  ) +
  geom_boxplot(
    width = .1,
    outlier.shape = NA,
    alpha = 0.5
  ) +
  scale_y_continuous(
    name="Average generations per maturation period"
  ) +
  scale_x_discrete(
    name="Selection Method",
    breaks=selection_method_breaks,
    labels=selection_method_labels
  ) +
  scale_fill_fun(
  ) +
  scale_color_fun(
  ) +
  theme(
    legend.position="none"
  )

ggsave(
  paste0(plot_directory, "2021-11-11-avg-gens.pdf")
)

## Saving 7 x 5 in image

median(exp_summary_data$total_gens_approx) # Used for determining how many generations to run EC for

## [1] 57365.67

7.11 Representative task profiles

Visualized task profiles of two example metapopulations.

Elite selection

knitr::include_graphics("https://raw.githubusercontent.com/amlalejini/directed-digital-evolution/main/docs/media/metapop-profiles/profile_70007_elite.png")

Lexicase selection

knitr::include_graphics("https://raw.githubusercontent.com/amlalejini/directed-digital-evolution/main/docs/media/metapop-profiles/profile_70110_lex.png")

7.12 Manuscript figures

Without time series:

grid <- plot_grid(
  max_trait_cov_fig +
    theme(
      axis.title.x=element_blank(),
      axis.text=element_text(size=10),
      plot.title=element_text(size=12),
      axis.text.x = element_text(size = 9),
      axis.title.y = element_text(size = 10)
    ) +
    ggtitle("Best population task coverage"),
  total_trait_cov_fig +
    theme(
      axis.title.x=element_blank(),
      plot.title=element_text(size=12),
      axis.text=element_text(size=10),
      axis.text.x = element_text(size = 9),
      axis.title.y = element_text(size = 10)
    ) +
    ggtitle("Metapopulation task coverage"),
  num_pop_task_profiles_fig +
    theme(
      axis.text=element_text(size=10),
      plot.title=element_text(size=12),
      axis.text.x = element_text(size = 9),
      axis.title.y = element_text(size = 10)
    ) +
    ggtitle("Diversity of task profiles"),
  task_profile_spread_fig +
    theme(
      axis.text=element_text(size=10),
      plot.title=element_text(size=12),
      axis.text.x = element_text(size = 9),
      axis.title.y = element_text(size = 10)
    ) +
    ggtitle("Spread of task profiles"),
  nrow=2,
  ncol=2,
  labels="auto"
)
grid

save_plot(
  filename=paste0(plot_directory, "2021-11-11-selection-figure.pdf"),
  plot=grid,
  base_height=5,
  base_asp=2.5,
  dpi=600
)

With time series:

legend <- cowplot::get_legend(
    max_trait_cov_ot_fig +
      guides(
        color=guide_legend(nrow=1),
        fill=guide_legend(nrow=1)
      ) +
      theme(
        legend.position = "bottom",
        legend.box="horizontal",
        legend.justification="center"
      )
  )

## Warning: Computation failed in `stat_summary()`:

max_trait_cov_row <- plot_grid(
  max_trait_cov_ot_fig +
    ggtitle("Best population task coverage (over time)") +
    theme(
      legend.position="none"
      # plot.title=element_text(size=12),
      # axis.text=element_text(size=10),
      # axis.text.x = element_text(size = 9),
      # axis.title.y = element_text(size = 10)
    ),
  max_trait_cov_fig +
    ggtitle("Best population task coverage (final)"),
    theme(
      legend.position="none"
      # plot.title=element_text(size=12),
      # axis.text=element_text(size=10),
      # axis.text.x = element_text(size = 9),
      # axis.title.y = element_text(size = 10)
    ),
  nrow=1,
  ncol=2,
  align="h",
  labels=c("a", "b")
  # rel_widths=c(3,2),
)

## Warning: Computation failed in `stat_summary()`:

## Warning in as_grob.default(plot): Cannot convert object of class themegg into a
## grob.

## Warning: Graphs cannot be horizontally aligned unless the axis parameter is set.
## Placing graphs unaligned.

# max_trait_cov_row

total_trait_cov_row <- plot_grid(
  metapop_task_cov_ot_fig +
    ggtitle("Metapopulation task coverage (over time)") +
    theme(
      legend.position="none"
      # plot.title=element_text(size=12),
      # axis.text=element_text(size=10),
      # axis.text.x = element_text(size = 9),
      # axis.title.y = element_text(size = 10)
    ),
  total_trait_cov_fig +
    ggtitle("Metapopulation task coverage (final)"),
    theme(
      legend.position="none"
      # plot.title=element_text(size=12),
      # axis.text=element_text(size=10),
      # axis.text.x = element_text(size = 9),
      # axis.title.y = element_text(size = 10)
    ),
  nrow=1,
  ncol=2,
  align="h",
  labels=c("c", "d")
  # rel_widths=c(3,2),
)

## Warning: Computation failed in `stat_summary()`:

## Warning in as_grob.default(plot): Cannot convert object of class themegg into a
## grob.

## Warning: Graphs cannot be horizontally aligned unless the axis parameter is set.
## Placing graphs unaligned.

# total_trait_cov_row

diversity_row <-  plot_grid(
  num_pop_task_profiles_fig +
    ggtitle("Diversity of task profiles") +
    theme(
      legend.position="none"
      # plot.title=element_text(size=12),
      # axis.text=element_text(size=10),
      # axis.text.x = element_text(size = 9),
      # axis.title.y = element_text(size = 10)
    ),
  task_profile_spread_fig +
    ggtitle("Spread of task profiles"),
    theme(
      legend.position="none"
      # plot.title=element_text(size=12),
      # axis.text=element_text(size=10),
      # axis.text.x = element_text(size = 9),
      # axis.title.y = element_text(size = 10)
    ),
  nrow=1,
  ncol=2,
  align="h",
  labels=c("e", "f")
  # rel_widths=c(3,2),
)

## Warning in as_grob.default(plot): Cannot convert object of class themegg into a
## grob.

## Warning in as_grob.default(plot): Graphs cannot be horizontally aligned unless
## the axis parameter is set. Placing graphs unaligned.

# diversity_row

grid <- plot_grid(
  max_trait_cov_row,
  total_trait_cov_row,
  diversity_row,
  legend,
  nrow=4,
  ncol=1,
  rel_heights=c(1, 1, 1, 0.1)

)
grid

save_plot(
  filename=paste0(plot_directory, "2021-11-11-selection-figure-with-timeseries.pdf"),
  plot=grid,
  base_width=12,
  base_height=10,
  # base_asp=1.6,
  dpi=600
)