# --- Load Libraries -----------------------------------------------------------
library(recount3) # manipulating recount data
library(tidyverse) # data manipulation/plotting
library(factoextra) # clustering
library(DGCA) # differential correlation
library(WGCNA) # weighted gene correlation network analysis
library(patchwork) # plot arrangement
# --- Load Data ----------------------------------------------------------------
gtex_brain <- readRDS("./consults/ad_concord/results/gtex_brain.rds")
counts <- assay(
gtex_brain[,gtex_brain$gtex.smtsd=="Brain - Amygdala"],
"counts")
meta <- data.frame(
colData(gtex_brain[,gtex_brain$gtex.smtsd=="Brain - Amygdala"]))
features <- data.frame(
rowRanges(gtex_brain[,gtex_brain$gtex.smtsd=="Brain - Amygdala"]))
rm(gtex_brain)
gc()
# --- Set Variables ------------------------------------------------------------
# Filtering variables
filterTypes = "central"
filterCentralType = "median"
filterCentralPercentile = 0.3
# sample outlier variables
dist_method="euclidean"
hclust_method="average"
# removing sample outlier variables
cutHeight = 50
minSize = 10
# chosing power variables
powerVector=c(c(1:10), seq(from = 12, to=20, by=2))
# module building variables
power = 5
deepSplit = 2
minClusterSize = 30
hclustMethod="ward.D2"
cutHeight=0.25
# trait correlation variables
numeric_vars = "numeric.age"
# module statistics variables
numeric_var = "numeric_age"
gene_sig_thresh = 0.2
mm_thresh = 0.8
# --- Sample Outlier Removal ---------------------------------------------------
# filter out low variance genes
filt <- filterGenes(counts,
filterTypes = filterTypes,
filterCentralType = filterCentralType,
filterCentralPercentile = filterCentralPercentile)
# transpose so that the samples are rows and
# the genes are columns
filt <- t(filt)
# apply ward's clustering
hc <- hclust(d = dist(filt,method = dist_method),
method = hclust_method)
# sample dendrogram
hc_dendro <- fviz_dend(hc)
# cut tree
clust = WGCNA::cutreeStatic(hc,
cutHeight = cutHeight,
minSize = minSize)
# filter out outliers
keepSamples <- (clust==1)
filt <- filt[keepSamples,]
meta <- meta[keepSamples,]
# --- Soft-Thresholding Power --------------------------------------------------
# pick soft thresholding power
sft = WGCNA::pickSoftThreshold(
filt,
powerVector = powerVector,
verbose = 5)
# plot power v. fit
power_v_fit <- ggplot(sft$fitIndices,
aes(
x=sft$fitIndices[,1],
y=-sign(sft$fitIndices[,3])*sft$fitIndices[,2],
label=sft$fitIndices[,1]
))+
geom_point(size=0)+
theme_bw()+
labs(
x="Soft Thresholding Power",
y="Scale Free Topology Model Fit (R^2)"
)+
geom_hline(yintercept = 0.9,color="red")+
geom_text(hjust=0, vjust=0)
# plot power v. connectivity
power_v_connectivity <- ggplot(sft$fitIndices,
aes(
x=sft$fitIndices[,1],
y=sft$fitIndices[,5],
label=sft$fitIndices[,1]
))+
geom_point(size=0)+
theme_bw()+
labs(
x="Soft Thresholding Power",
y="Mean Connectivity"
)+
geom_text(hjust=0, vjust=0)
combined <- power_v_fit|power_v_connectivity
# --- Build Modules ------------------------------------------------------------
# calculate adjacency matrix
adjacency <- adjacency(
filt,
power = power)
# calculate dissimilarity matrix
TOM <- TOMsimilarity(adjacency)
rownames(TOM) <- rownames(adjacency)
colnames(TOM) <- colnames(adjacency)
dissTOM <- 1-TOM
# Call the hierarchical clustering function
geneTree <- hclust(
as.dist(dissTOM),
method = hclustMethod)
# Module identification using dynamic tree cut:
dynamicMods <- cutreeDynamic(
dendro = geneTree,
distM = dissTOM,
deepSplit = deepSplit,
pamRespectsDendro = FALSE,
minClusterSize = minClusterSize)
# Convert numeric lables into colors
dynamicColors <- labels2colors(dynamicMods)
# Calculate eigengenes
MEList = moduleEigengenes(filt, colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs)
# Cluster module eigengenes
METree = hclust(
as.dist(MEDiss),
method = hclustMethod)
# Plot the result
# Plot the cut line into the dendrogram
plot(METree,
main = "Clustering of module eigengenes",
xlab = "",
sub = "")+
abline(h=cutHeight,
col = "red")
module_eigengene_dendrogram<- grDevices::recordPlot()
plot.new()
# Call an automatic merging function
merge = mergeCloseModules(
filt,
dynamicColors,
cutHeight = cutHeight,
verbose = 3)
# The merged module colors
mergedColors = merge$colors
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs
# module dendrogram plot with dynamic and merged clusters
plotDendroAndColors(
geneTree,
cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE,
hang = 0.03,
addGuide = TRUE,
guideHang = 0.05)
module_dendrogram<- grDevices::recordPlot()
plot.new()
# Rename to moduleColors
moduleColors <- mergedColors
# Construct numerical labels corresponding to the colors
colorOrder <- c("grey", standardColors(50))
moduleLabels <- match(moduleColors, colorOrder)-1
# --- Trait Information --------------------------------------------------------
# Define numbers of genes and samples
nGenes <- ncol(filt)
nSamples <- nrow(filt)
# Recalculate MEs with color labels
MEs0 <- WGCNA::moduleEigengenes(
filt,
moduleColors)$eigengenes
MEs <- WGCNA::orderMEs(MEs0)
moduleTraitCor <- WGCNA::cor(
MEs,
meta %>% select(numeric_vars),
use = "p")
moduleTraitPvalue <- WGCNA::corPvalueStudent(
moduleTraitCor,
nSamples)
module_trait_df <- merge(
moduleTraitCor,
moduleTraitPvalue,
by="row.names")
colnames(module_trait_df) <- c(
paste0(numeric_vars,".correlation"),
paste0(numeric_vars,".p.value")
)
# Will display correlations and their p-values
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) <- dim(moduleTraitCor)
# Display the correlation values within a heatmap plot
dev.off()
par(mar = c(6, 8.5, 3, 3))
WGCNA::labeledHeatmap(Matrix = moduleTraitCor,
xLabels = colnames(meta %>%
dplyr::select(numeric_vars)),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
cex.text = .6,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
trait_heatmap<- grDevices::recordPlot()
plot.new()
# --- Gene/Connectivity/Module Membership Significance -------------------------
ADJ1=abs(cor(filt,use="p"))^6
Alldegrees1=intramodularConnectivity(ADJ1, moduleColors)
GS1=as.numeric(cor(meta[numeric_var],
filt,
use="p"))
GeneSignificance=abs(GS1)
conn_gs_plot <- do.call(
patchwork::wrap_plots,
lapply(unique(moduleColors), function(module) {
restrict1 = (moduleColors==module)
df <- data.frame(
connectivity=Alldegrees1$kWithin[restrict1],
gene_significance=GeneSignificance[restrict1]
)
mm_gs_corr <- round(
cor(df$connectivity,
df$gene_significance),
3)
mm_gs_corr_pval <- cor.test(
df$connectivity,
df$gene_significance)$p.value
ggplot(df,
aes(
x=connectivity,
y=gene_significance
))+
geom_point(color=module)+
theme_bw()+
geom_smooth(method = "lm",
color=module)+
labs(
x="Intramodular Connectivity",
y="Gene Significance"
) +
labs(
title=paste0(stringr::str_to_title(module),
" Module"),
subtitle = paste0(
"correlation: ",
as.character(mm_gs_corr),
"\n",
"p-value: ",
as.character(mm_gs_corr_pval))
)
}))
datME=moduleEigengenes(filt,
moduleColors)$eigengenes
datKME=signedKME(filt,
datME,
outputColumnName="MM.")
conn_mm_plot <- do.call(
patchwork::wrap_plots,
lapply(unique(moduleColors), function(module) {
restrict1 = (moduleColors==module)
df <- data.frame(
connectivity=Alldegrees1$kWithin[restrict1],
module_membership=(datKME[restrict1, paste("MM.", module, sep="")])^6
)
mm_gs_corr <- round(
cor(df$connectivity,
df$module_membership),
3)
mm_gs_corr_pval <- cor.test(
df$connectivity,
df$module_membership)$p.value
ggplot(df,
aes(
x=connectivity,
y=module_membership
))+
geom_point(color=module)+
theme_bw()+
geom_smooth(method = "lm",
color=module)+
labs(
x="Intramodular Connectivity",
y="Module Membership "
) +
labs(
title=paste0(stringr::str_to_title(module),
" Module"),
subtitle = paste0(
"correlation: ",
as.character(mm_gs_corr),
"\n",
"p-value: ",
as.character(mm_gs_corr_pval))
)
})
)
sigGenes <- lapply(
unique(moduleColors),
function(module){
FilterGenes= abs(GS1)> gene_sig_thresh & abs(datKME[paste0("MM.",module)])>mm_thresh
return(colnames(filt)[FilterGenes])
})
names(sigGenes) <- unique(moduleColors)