Chapter 8 Graph property correlations
We screened for graph properties correlated with community transitionability scores.
8.1 Dependencies and setup
library(tidyverse)
library(Hmisc)
library(broom)
library(knitr)
library(kableExtra)# Check if Rmd is being compiled using bookdown
bookdown <- exists("bookdown_build")experiment_slug <- "2024-03-08-varied-interaction-matrices"
working_directory <- paste(
"experiments",
experiment_slug,
"analysis",
sep = "/"
)
# Adjust working directory if being knitted for bookdown build.
if (bookdown) {
working_directory <- paste0(
bookdown_wd_prefix,
working_directory
)
}
plot_dir <- paste(
working_directory,
"plots",
sep = "/"
)
data_path <- paste(
working_directory,
"data",
"world_summary_final_update_with-graph-props.csv",
sep = "/"
)
data <- read_csv(data_path)Set cowplot theme as default plotting theme.
theme_set(theme_cowplot())8.2 Data preprocessing
max_update <- max(data$update)
# Ensure that we just have measurements from final update.
data <- data %>%
filter(update == max_update) %>%
mutate(
interaction_matrix = as.factor(interaction_matrix),
graph_type = as.factor(graph_type),
summary_mode = as.factor(summary_mode),
update = as.numeric(update),
SEED = as.factor(SEED),
graph_file = str_split_i(DIFFUSION_SPATIAL_STRUCTURE_FILE, "/", -1)
) %>%
mutate(
graph_file = as.factor(graph_file)
)
# write_csv(
# data,
# "world_summary_final_update.csv"
# )
# For each row, assign graph properties
properties <- c(
"graph_prop_density",
"graph_prop_degree_mean",
"graph_prop_degree_median",
"graph_prop_degree_variance",
"graph_prop_girth",
"graph_prop_degree_assortivity_coef",
"graph_prop_num_bridges",
"graph_prop_max_clique_size",
"graph_prop_transitivity",
"graph_prop_avg_clustering",
"graph_prop_num_connected_components",
"graph_prop_num_articulation_points",
"graph_prop_avg_node_connectivity",
"graph_prop_edge_connectivity",
"graph_prop_node_connectivity",
"graph_prop_diameter",
"graph_prop_radius",
"graph_prop_kemeny_constant",
"graph_prop_global_efficiency",
"graph_prop_wiener_index",
"graph_prop_longest_shortest_path"
)
# (3) Pivot longer
long_data <- data %>%
mutate(
graph_prop_diameter = case_when(
graph_prop_diameter == "error" ~ "-1",
.default = graph_prop_diameter
),
graph_prop_radius = case_when(
graph_prop_radius == "error" ~ "-1",
.default = graph_prop_radius
),
graph_prop_kemeny_constant = case_when(
graph_prop_kemeny_constant == "error" ~ "-1",
.default = graph_prop_kemeny_constant
)
) %>%
mutate(
graph_prop_diameter = as.numeric(graph_prop_diameter),
graph_prop_radius = as.numeric(graph_prop_radius),
graph_prop_kemeny_constant = as.numeric(graph_prop_kemeny_constant)
) %>%
select(
!c(
DIFFUSION_SPATIAL_STRUCTURE_FILE,
GROUP_REPRO_SPATIAL_STRUCTURE_FILE,
INTERACTION_SOURCE
)
) %>%
filter(
summary_mode == "ranked_threshold"
) %>%
pivot_longer(
cols = properties,
names_to = "graph_property",
values_to = "graph_property_value"
) %>%
filter(
(!is.na(graph_property_value)) & graph_property_value != "Inf" &
(!(graph_property == "graph_prop_diameter" & (graph_property_value == "-1"))) &
(!(graph_property == "graph_prop_radius" & (graph_property_value == "-1"))) &
(!(graph_property == "graph_prop_kemeny_constant" & (graph_property_value == "-1")))
) %>%
mutate(
graph_property_value = as.numeric(graph_property_value),
graph_property = str_remove(graph_property, "graph_prop_")
) %>%
mutate(
graph_property = as.factor(graph_property)
)
# write_csv(long_data, "test.csv")8.3 Plot relationships between transitionability and graph properties
rel_plot <- long_data %>%
ggplot(
aes(
x = graph_property_value,
y = logged_mult_score
)
) +
geom_point(aes(color = graph_type)) +
geom_smooth(
method = "lm",
color = "black"
) +
facet_grid(
interaction_matrix ~ graph_property,
scales = "free"
)
rel_plot
ggsave(
plot = rel_plot,
filename = paste(
plot_dir,
"property_relationships.pdf",
sep = "/"
),
width = 40,
height = 20
)8.4 Measure correlations
# Reference for running correlations over tidy data:
# https://dominicroye.github.io/en/2019/tidy-correlation-tests-in-r/
cor_fun <- function(data) {
cor.test(
data$graph_property_value,
data$logged_mult_score,
method = "spearman",
exact = FALSE
) %>% tidy()
}
nested <- long_data %>%
select(
c(
interaction_matrix,
graph_property,
graph_property_value,
logged_mult_score
)
) %>%
group_by(interaction_matrix, graph_property) %>%
nest() %>%
mutate(
model = map(data, cor_fun)
)
full_corr <- select(nested, -data) %>% unnest()
full_corr <- full_corr %>%
mutate(
abs_estimate = abs(estimate)
) %>%
arrange(
desc(abs_estimate)
) %>%
group_by(
interaction_matrix
) %>%
mutate(
p.value.adj = p.adjust(p.value, method = "holm")
) %>%
filter(
p.value.adj <= 0.05
)
full_corr_table <- kable(full_corr) %>%
kable_styling(latex_options = "striped")
save_kable(
full_corr_table,
paste(
plot_dir,
"correlation_table.pdf",
sep = "/"
)
)8.4.1 Top three significant correlations per interaction matrix
Break correlations down by interaction matrix
interaction_matrices <- levels(long_data$interaction_matrix)
for (mat_type in interaction_matrices) {
mat_data <- filter(long_data, interaction_matrix == mat_type)
nested <- mat_data %>%
select(
c(
interaction_matrix,
graph_property,
graph_property_value,
logged_mult_score
)
) %>%
group_by(interaction_matrix, graph_property) %>%
nest() %>%
mutate(
model = map(data, cor_fun)
)
im_corr <- select(nested, -data) %>% unnest()
im_corr <- im_corr %>%
mutate(
abs_estimate = abs(estimate)
) %>%
arrange(
desc(abs_estimate)
) %>%
ungroup() %>%
group_by(
interaction_matrix
) %>%
mutate(
p.value.adj = p.adjust(p.value, method = "holm")
) %>%
filter(
p.value.adj < 0.05
)
im_corr_table <- kable(im_corr) %>%
kable_styling(latex_options = "striped")
save_kable(
im_corr_table,
paste(
plot_dir,
paste0("correlation_table_", mat_type, ".pdf"),
sep = "/"
)
)
top_corr <- im_corr %>%
slice_max(
abs_estimate,
n = 3
)
top_corr_table <- kable(top_corr) %>%
kable_styling(latex_options = "striped")
save_kable(
top_corr_table,
paste(
plot_dir,
paste0("t3_correlation_table_", mat_type, ".pdf"),
sep = "/"
)
)
}8.4.2 Distrubition of correlations >= 0.5 strength
# Look at distribution of correlations >= 0.5 strength
corr_str_thresh <- 0.5
full_corr_thresh <- full_corr %>%
mutate(
direction = case_when(
estimate < 0 ~ "Negative",
estimate >= 0 ~ "Positive"
)
) %>%
mutate(
direction = as.factor(direction)
) %>%
filter(abs_estimate >= corr_str_thresh & p.value.adj <= 0.05)
corr_counts <- full_corr_thresh %>%
dplyr::group_by(graph_property, direction) %>%
dplyr::summarize(
n = n()
)
# If something has one direction, but not other, fill in 0 for other.
# For property in properties
correlation_dirs_plot <- corr_counts %>%
ggplot(
aes(
x = graph_property,
fill = direction,
y = n
)
) +
geom_bar(stat = "identity", position = position_dodge(), alpha = 0.75) +
coord_flip()
ggsave(
plot = correlation_dirs_plot,
filename = paste(
plot_dir,
"most_moderate_correlations.pdf",
sep = "/"
)
)
correlation_dirs_plot