To be sure that MAP-Elites is helping us explore a meaningfully large portion of the range of possible programs we could evolve with a given representation, we want to show that the distribution of our metrics among solutions to the problem is different among different selection schemes.
First let’s load in the data
library(readr)
library(dplyr)
library(ggplot2)
library(dgof)
data <- read_csv("../data/scopegp_trait_50000_data.csv")
data$problem <- as.factor(data$problem)
data$selection_method <- as.factor(data$selection_method)
# We're not including the symbolic regression problem (Dow) because nothing solved it
data <- data %>% filter(problem!="SYMREG")
data <- data %>% filter(problem!="COLLATZ")
Make sure we have all the data:
de_dup <- data %>% distinct(run_id, .keep_all = TRUE)
summary(de_dup)
## run_id agent_id selection_method problem
## Min. : 1 Min. :0.000000 LEX :118 COLLATZ : 0
## 1st Qu.:118 1st Qu.:0.000000 MAPE :109 LOGIC :120
## Median :366 Median :0.000000 RAND :120 SMALLEST:119
## Mean :331 Mean :0.006452 TOURN:118 SQUARES :120
## 3rd Qu.:542 3rd Qu.:0.000000 SUM :106
## Max. :660 Max. :1.000000 SYMREG : 0
## update instruction_entropy scope_count fitness
## Min. :50000 Min. :0.0000 Min. : 1.000 Min. : 0
## 1st Qu.:50000 1st Qu.:0.9186 1st Qu.: 1.000 1st Qu.: 0
## Median :50000 Median :2.9223 Median : 2.000 Median : 1208
## Mean :50000 Mean :2.6366 Mean : 2.796 Mean : 35828
## 3rd Qu.:50000 3rd Qu.:4.4177 3rd Qu.: 4.000 3rd Qu.: 10000
## Max. :50000 Max. :4.6840 Max. :12.000 Max. :200000
## scope_count_bin inst_ent_bin
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 3.000
## Median : 1.000 Median :10.000
## Mean : 1.796 Mean : 8.871
## 3rd Qu.: 3.000 3rd Qu.:15.000
## Max. :11.000 Max. :16.000
Now let’s see if the distributions are different using a Kolmogrov-Smirnov test
single_problem <- function(df, prob, cutoff) {
lex <- data %>% filter(problem==prob, selection_method=="LEX", fitness>=cutoff)
mape <- data %>% filter(problem==prob, selection_method=="MAPE", fitness>=cutoff)
rand <- data %>% filter(problem==prob, selection_method=="RAND", fitness>=cutoff)
tourn <- data %>% filter(problem==prob, selection_method=="TOURN")
if (length(mape$instruction_entropy) == 0) {
print("MAPE DID NOT SOLVE PROBLEM")
return()
}
if (length(lex$instruction_entropy) > 0) {
print("MAPE vs. Lex instruction entropy")
print(ks.test(mape$instruction_entropy, lex$instruction_entropy))
print("MAPE vs. Lex scope count")
print(ks.test(mape$scope_count, lex$scope_count))
} else {
print("Lexicase did not solve problem")
}
if (length(rand$instruction_entropy) > 0) {
print("MAPE vs. Rand instruction entropy")
print(ks.test(mape$instruction_entropy, rand$instruction_entropy))
print("MAPE vs. Rand scope count")
print(ks.test(mape$scope_count, rand$scope_count))
} else {
print("Random did not solve problem")
}
if (length(tourn$instruction_entropy) > 0) {
print("MAPE vs. Tourn instruction entropy")
print(ks.test(mape$instruction_entropy, tourn$instruction_entropy))
print("MAPE vs. Tourn scope count")
print(ks.test(mape$scope_count, tourn$scope_count))
} else {
print("Tournament did not solve problem")
}
}
probs = c("LOGIC", "SQUARES", "SMALLEST", "SUM")
min_fs = c(10, 10000, 200000, 200000)
for (i in 1:length(probs)) {
print(probs[i])
single_problem(data, probs[i], min_fs[i])
}
## [1] "LOGIC"
## [1] "MAPE vs. Lex instruction entropy"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$instruction_entropy and lex$instruction_entropy
## D = 0.22445, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "MAPE vs. Lex scope count"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$scope_count and lex$scope_count
## D = 0.60791, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "Random did not solve problem"
## [1] "MAPE vs. Tourn instruction entropy"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$instruction_entropy and tourn$instruction_entropy
## D = 0.14578, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "MAPE vs. Tourn scope count"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$scope_count and tourn$scope_count
## D = 0.68851, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "SQUARES"
## [1] "MAPE vs. Lex instruction entropy"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$instruction_entropy and lex$instruction_entropy
## D = 0.26963, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "MAPE vs. Lex scope count"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$scope_count and lex$scope_count
## D = 0.78875, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "Random did not solve problem"
## [1] "MAPE vs. Tourn instruction entropy"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$instruction_entropy and tourn$instruction_entropy
## D = 0.20966, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "MAPE vs. Tourn scope count"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$scope_count and tourn$scope_count
## D = 0.70846, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "SMALLEST"
## [1] "MAPE vs. Lex instruction entropy"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$instruction_entropy and lex$instruction_entropy
## D = 0.39574, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "MAPE vs. Lex scope count"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$scope_count and lex$scope_count
## D = 0.70962, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "Random did not solve problem"
## [1] "MAPE vs. Tourn instruction entropy"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$instruction_entropy and tourn$instruction_entropy
## D = 0.22254, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "MAPE vs. Tourn scope count"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$scope_count and tourn$scope_count
## D = 0.73713, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "SUM"
## [1] "MAPE vs. Lex instruction entropy"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$instruction_entropy and lex$instruction_entropy
## D = 0.54654, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "MAPE vs. Lex scope count"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$scope_count and lex$scope_count
## D = 0.51001, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "Random did not solve problem"
## [1] "MAPE vs. Tourn instruction entropy"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$instruction_entropy and tourn$instruction_entropy
## D = 0.47622, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
## [1] "MAPE vs. Tourn scope count"
##
## Two-sample Kolmogorov-Smirnov test
##
## data: mape$scope_count and tourn$scope_count
## D = 0.5927, p-value < 2.2e-16
## alternative hypothesis: two-sided
That was a lot of comparisons so we should do a bonferroni correction for multiple comparisons. Unfortunately, these p-values are so low that R is just reporting “0” if we try to probe deeper into that p < 2.2e-16. To perform our bonferroni correction, we will round our p-values up (which is safe to do because it only makes our test more conservative) to 2.2e-16 (0.00000000000000022). Because we performed 48 hypothesis tests, a Bonferroni correction would lower our original alpha value of .05 to .05/48 = .001. Thus, p-values must be less than this value for us to consider them significant. Since 0.00000000000000022 is well below .001, we can safely say that all non-map-elites distributions are significantly different from all map-elites distributions.
Let’s plot the distributions we were just comparing, as a sanity check:
filtered_data = data.frame()
for (i in 1:length(probs)) {
filtered_data <- rbind(filtered_data, data %>% filter(problem==probs[i], selection_method!="RAND", fitness>=min_fs[i]))
filtered_data <- rbind(filtered_data, data %>% filter(problem==probs[i], selection_method=="RAND"))
}
filtered_data$selection_method <- factor(filtered_data$selection_method, c("MAPE", "TOURN", "LEX", "RAND"))
levels(filtered_data$selection_method) <- c("MAP-Elites", "Tournament", "Lexicase", "Random")
levels(filtered_data$problem) <- c("Collatz", "Logic", "Smallest", "Squares", "Sum", "Symreg")
ggplot(filtered_data, aes(instruction_entropy, colour = selection_method)) +
geom_freqpoly(stat = "density", bw=.2) + theme_classic() + facet_wrap(~problem) +
scale_x_continuous("Instruction entropy") + scale_y_continuous("Density") + scale_color_discrete("Selection method") +
theme(legend.position = "bottom", axis.title = element_text(size=18), panel.grid.major = element_blank(),
axis.text = element_text(size=14), strip.text = element_text(size=18), legend.text = element_text(size=14),
legend.title = element_text(size=14), panel.grid.minor = element_blank())
ggsave("inst_ent_density.pdf", width = 8, height=8)
ggplot(filtered_data, aes(scope_count, colour = selection_method)) +
geom_freqpoly(stat="density", bw=.5) + theme_classic() + facet_wrap(~problem) +
scale_x_continuous("Scope count") + scale_y_continuous("Density") + scale_color_discrete("Selection method")+
theme(legend.position = "bottom", axis.title = element_text(size=18), panel.grid.major = element_blank(),
axis.text = element_text(size=14), strip.text = element_text(size=18), legend.text = element_text(size=14),
legend.title = element_text(size=14), panel.grid.minor = element_blank())
ggsave("scope_count_density.pdf", width = 8, height=8)
MAP-Elites is clearly exploring a wider range of program structures than the other selection schemes.
Let’s look at how these two metrics relate to each other.
ggplot(data=filtered_data, aes(x=scope_count_bin, y=inst_ent_bin, z=fitness)) + stat_summary_2d(binwidth = 1, fun = function(x) length(x)) + theme_classic() + facet_grid(problem~selection_method) + scale_fill_gradient("Solution count", trans="log10") +
scale_y_continuous("Instruction entropy", breaks=c(0,15), labels=c(0,4.7)) + scale_x_continuous("Scope count") +
theme(legend.position = "bottom", axis.title = element_text(size=18),
axis.text = element_text(size=14), strip.text = element_text(size=16), legend.text = element_text(size=10),
legend.title = element_text(size=14), panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
ggsave("heatmaps.pdf", width = 8, height=8)