Chapter 8 Graph property correlations
We screened for graph properties correlated with community transitionability scores.
8.1 Dependencies and setup
library(tidyverse)
library(Hmisc)
library(broom)
library(knitr)
library(kableExtra)
# Check if Rmd is being compiled using bookdown
<- exists("bookdown_build") bookdown
<- "2024-03-08-varied-interaction-matrices"
experiment_slug <- paste(
working_directory "experiments",
experiment_slug,"analysis",
sep = "/"
)# Adjust working directory if being knitted for bookdown build.
if (bookdown) {
<- paste0(
working_directory
bookdown_wd_prefix,
working_directory
)
}
<- paste(
plot_dir
working_directory,"plots",
sep = "/"
)
<- paste(
data_path
working_directory,"data",
"world_summary_final_update_with-graph-props.csv",
sep = "/"
)<- read_csv(data_path) data
Set cowplot theme as default plotting theme.
theme_set(theme_cowplot())
8.2 Data preprocessing
<- max(data$update)
max_update # Ensure that we just have measurements from final update.
<- data %>%
data filter(update == max_update) %>%
mutate(
interaction_matrix = as.factor(interaction_matrix),
graph_type = as.factor(graph_type),
summary_mode = as.factor(summary_mode),
update = as.numeric(update),
SEED = as.factor(SEED),
graph_file = str_split_i(DIFFUSION_SPATIAL_STRUCTURE_FILE, "/", -1)
%>%
) mutate(
graph_file = as.factor(graph_file)
)# write_csv(
# data,
# "world_summary_final_update.csv"
# )
# For each row, assign graph properties
<- c(
properties "graph_prop_density",
"graph_prop_degree_mean",
"graph_prop_degree_median",
"graph_prop_degree_variance",
"graph_prop_girth",
"graph_prop_degree_assortivity_coef",
"graph_prop_num_bridges",
"graph_prop_max_clique_size",
"graph_prop_transitivity",
"graph_prop_avg_clustering",
"graph_prop_num_connected_components",
"graph_prop_num_articulation_points",
"graph_prop_avg_node_connectivity",
"graph_prop_edge_connectivity",
"graph_prop_node_connectivity",
"graph_prop_diameter",
"graph_prop_radius",
"graph_prop_kemeny_constant",
"graph_prop_global_efficiency",
"graph_prop_wiener_index",
"graph_prop_longest_shortest_path"
)
# (3) Pivot longer
<- data %>%
long_data mutate(
graph_prop_diameter = case_when(
== "error" ~ "-1",
graph_prop_diameter .default = graph_prop_diameter
),graph_prop_radius = case_when(
== "error" ~ "-1",
graph_prop_radius .default = graph_prop_radius
),graph_prop_kemeny_constant = case_when(
== "error" ~ "-1",
graph_prop_kemeny_constant .default = graph_prop_kemeny_constant
)%>%
) mutate(
graph_prop_diameter = as.numeric(graph_prop_diameter),
graph_prop_radius = as.numeric(graph_prop_radius),
graph_prop_kemeny_constant = as.numeric(graph_prop_kemeny_constant)
%>%
) select(
!c(
DIFFUSION_SPATIAL_STRUCTURE_FILE,
GROUP_REPRO_SPATIAL_STRUCTURE_FILE,
INTERACTION_SOURCE
)%>%
) filter(
== "ranked_threshold"
summary_mode %>%
) pivot_longer(
cols = properties,
names_to = "graph_property",
values_to = "graph_property_value"
%>%
) filter(
!is.na(graph_property_value)) & graph_property_value != "Inf" &
(!(graph_property == "graph_prop_diameter" & (graph_property_value == "-1"))) &
(!(graph_property == "graph_prop_radius" & (graph_property_value == "-1"))) &
(!(graph_property == "graph_prop_kemeny_constant" & (graph_property_value == "-1")))
(%>%
) mutate(
graph_property_value = as.numeric(graph_property_value),
graph_property = str_remove(graph_property, "graph_prop_")
%>%
) mutate(
graph_property = as.factor(graph_property)
)# write_csv(long_data, "test.csv")
8.3 Plot relationships between transitionability and graph properties
<- long_data %>%
rel_plot ggplot(
aes(
x = graph_property_value,
y = logged_mult_score
)+
) geom_point(aes(color = graph_type)) +
geom_smooth(
method = "lm",
color = "black"
+
) facet_grid(
~ graph_property,
interaction_matrix scales = "free"
)
rel_plot
ggsave(
plot = rel_plot,
filename = paste(
plot_dir,"property_relationships.pdf",
sep = "/"
),width = 40,
height = 20
)
8.4 Measure correlations
# Reference for running correlations over tidy data:
# https://dominicroye.github.io/en/2019/tidy-correlation-tests-in-r/
<- function(data) {
cor_fun cor.test(
$graph_property_value,
data$logged_mult_score,
datamethod = "spearman",
exact = FALSE
%>% tidy()
)
}
<- long_data %>%
nested select(
c(
interaction_matrix,
graph_property,
graph_property_value,
logged_mult_score
)%>%
) group_by(interaction_matrix, graph_property) %>%
nest() %>%
mutate(
model = map(data, cor_fun)
)
<- select(nested, -data) %>% unnest()
full_corr <- full_corr %>%
full_corr mutate(
abs_estimate = abs(estimate)
%>%
) arrange(
desc(abs_estimate)
%>%
) group_by(
interaction_matrix%>%
) mutate(
p.value.adj = p.adjust(p.value, method = "holm")
%>%
) filter(
<= 0.05
p.value.adj
)
<- kable(full_corr) %>%
full_corr_table kable_styling(latex_options = "striped")
save_kable(
full_corr_table,paste(
plot_dir,"correlation_table.pdf",
sep = "/"
) )
8.4.1 Top three significant correlations per interaction matrix
Break correlations down by interaction matrix
<- levels(long_data$interaction_matrix)
interaction_matrices
for (mat_type in interaction_matrices) {
<- filter(long_data, interaction_matrix == mat_type)
mat_data
<- mat_data %>%
nested select(
c(
interaction_matrix,
graph_property,
graph_property_value,
logged_mult_score
)%>%
) group_by(interaction_matrix, graph_property) %>%
nest() %>%
mutate(
model = map(data, cor_fun)
)
<- select(nested, -data) %>% unnest()
im_corr <- im_corr %>%
im_corr mutate(
abs_estimate = abs(estimate)
%>%
) arrange(
desc(abs_estimate)
%>%
) ungroup() %>%
group_by(
interaction_matrix%>%
) mutate(
p.value.adj = p.adjust(p.value, method = "holm")
%>%
) filter(
< 0.05
p.value.adj
)
<- kable(im_corr) %>%
im_corr_table kable_styling(latex_options = "striped")
save_kable(
im_corr_table,paste(
plot_dir,paste0("correlation_table_", mat_type, ".pdf"),
sep = "/"
)
)
<- im_corr %>%
top_corr slice_max(
abs_estimate,n = 3
)<- kable(top_corr) %>%
top_corr_table kable_styling(latex_options = "striped")
save_kable(
top_corr_table,paste(
plot_dir,paste0("t3_correlation_table_", mat_type, ".pdf"),
sep = "/"
)
)
}
8.4.2 Distrubition of correlations >= 0.5 strength
# Look at distribution of correlations >= 0.5 strength
<- 0.5
corr_str_thresh <- full_corr %>%
full_corr_thresh mutate(
direction = case_when(
< 0 ~ "Negative",
estimate >= 0 ~ "Positive"
estimate
)%>%
) mutate(
direction = as.factor(direction)
%>%
) filter(abs_estimate >= corr_str_thresh & p.value.adj <= 0.05)
<- full_corr_thresh %>%
corr_counts ::group_by(graph_property, direction) %>%
dplyr::summarize(
dplyrn = n()
)
# If something has one direction, but not other, fill in 0 for other.
# For property in properties
<- corr_counts %>%
correlation_dirs_plot ggplot(
aes(
x = graph_property,
fill = direction,
y = n
)+
) geom_bar(stat = "identity", position = position_dodge(), alpha = 0.75) +
coord_flip()
ggsave(
plot = correlation_dirs_plot,
filename = paste(
plot_dir,"most_moderate_correlations.pdf",
sep = "/"
)
) correlation_dirs_plot