Post mashR pipeline

Previous pipeline: https://github.com/RajLabMSSM/mashR

GTEx paper: https://www.nature.com/articles/s41588-018-0268-8

mash_rds = readRDS(paste0(fastqtl_to_mash_output, "QTLSumStats.mash.rds"))
#names(mash_rds)

maxb <- mash_rds$strong.b
maxz <- mash_rds$strong.z

mash_posterior = readRDS(paste0(mashr_flashr_workflow_output, "QTLSumStats.mash.EZ.FL_PC3.V_simple.posterior.rds"))
# names(mash_posterior)
# head(mash_posterior$lfsr)
# dim(mash_posterior$lfsr)

pm.mash <- mash_posterior$PosteriorMean
lfsr.all <- mash_posterior$lfsr
standard.error <- maxb/maxz
pm.mash.beta <- pm.mash*standard.error

lfsr table at 5%

lfsr = local false sign rate. Method proposed by Stephens, it is analogous to FDR.

Number of significative sclusters:

pm.mash.beta <- pm.mash.beta[rowSums(lfsr.all<0.05)>0,]
lfsr.mash <- lfsr.all[rowSums(lfsr.all<0.05)>0,] # lfsr.mash have the significant results. 
# Means they have lfsr less than 0.05 in at least one condition.
dim(lfsr.mash)[1]

[1] 4614

lfsr.mash_symbol = as.data.frame(lfsr.mash)
rownames(lfsr.mash_symbol) = gsub("_ENS", ":ENS", rownames(lfsr.mash_symbol))
lfsr.mash_symbol$ensembl = gsub("(.*?):(.*)", "\\2", rownames(lfsr.mash_symbol))
lfsr.mash_symbol$ensembl = gsub("(.*?)_(.*)", "\\1", lfsr.mash_symbol$ensembl)

## Get conversion table for Gencode 30
gencode_30 = read.table("~/pd-omics/katia/ens.geneid.gencode.v30", header = T)
gencode_30$ensembl = gsub("(.*?)\\.(.*)", "\\1", gencode_30$gene_id)
lfsr.mash_symbol$scluster = rownames(lfsr.mash_symbol)
lfsr.mash_symbol = merge(lfsr.mash_symbol, gencode_30, by = "ensembl")
colnames(lfsr.mash_symbol) = gsub("(.*?)_(.*)","\\1",colnames(lfsr.mash_symbol))

lfsr.mash_symbol$gene_id <- NULL 
lfsr.mash_symbol = lfsr.mash_symbol[,c("ensembl", "GeneSymbol", "scluster", "MFG", "STG", "SVZ", "THA")]
lfsr.mash_symbol$scluster = gsub("_s", "_rs", lfsr.mash_symbol$scluster)

write.table(lfsr.all, file = paste0(work_dir, "mash_lfsr_sQTL_all.txt"), sep = "\t", quote = F)
write.table(lfsr.mash_symbol, file = paste0(work_dir, "mash_lfsr_sQTL_5per.txt"), sep = "\t", quote = F)

createDT(lfsr.mash_symbol)

Number of unique gene:

length(unique(lfsr.mash_symbol$GeneSymbol))

[1] 2240

Pairwise plot

Pairwise sharing by magnitude of sQtLs among tissues. For each pair of tissues, we considered the top sQTLs that were significant (lfsr < 0.05) in at least one of the two tissues, and plotted the proportion of these that are shared in magnitude—that is, have effect estimates that are the same sign and within a factor of 2 in size of one another.

thresh <- 0.05

shared.fold.size <- matrix(NA,nrow = ncol(lfsr.mash),ncol=ncol(lfsr.mash))
colnames(shared.fold.size) <- rownames(shared.fold.size) <- colnames(maxz)
for (i in 1:ncol(lfsr.mash))
  for (j in 1:ncol(lfsr.mash)) {
    sig.row=which(lfsr.mash[,i]<thresh)
    sig.col=which(lfsr.mash[,j]<thresh)
    a=(union(sig.row,sig.col))
    quotient=(pm.mash.beta[a,i]/pm.mash.beta[a,j])
    shared.fold.size[i,j] = mean(quotient > 0.5 & quotient < 2)
  }

# Plot heatmap of sharing by magnitude
# Generate the heatmap using the “levelplot” function from the lattice package.

clrs <- colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
                               "#E0F3F8","#91BFDB","#4575B4")))(64)
lat <- shared.fold.size
lat[lower.tri(lat)] <- NA
n <- nrow(lat)

rownames(lat) <- gsub("(.*?)_(.*)", "\\1", rownames(lat))
colnames(lat) <- gsub("(.*?)_(.*)", "\\1", colnames(lat))

myPanel <- function(x, y, z, ...) {
  panel.levelplot(x,y,z,...)
  panel.text(x, y, ifelse(is.na(round(z,2)),"",round(z,2)))
}

#pdf(file = "/Users/katia/OneDrive/Documentos/MountSinai/Projects/Microglia/Figures4paper/pairwise_numb_005_sQTL.pdf", width = 5, height = 5)
print(levelplot( lat[n:1,],col.regions = clrs,xlab = "",ylab = "",colorkey = TRUE, panel=myPanel,
                 main = "Pairwise sharing by magnitude"))
#dev.off()

Sharing by sign

Compute overall sharing by sign and magnitude

source(paste0(code_folder, "normfuncs.R"))

# Sharing by sign 
signall <- mean(het.norm(pm.mash.beta)>0)
signall #sQTL = 97 % # eQTL = 89 %

[1] 0.9736671

Sharing by magnitude

# Sharing by magnitude 
magall <- mean(het.norm(pm.mash.beta)>0.5) 
magall #sQTL = 78%  # eQTL = 53%

[1] 0.7787711

sessionInfo()

R version 3.6.2 (2019-12-12) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Catalina 10.15.5

Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages: [1] stats graphics grDevices utils datasets methods base

other attached packages: [1] ggsci_2.9 rmeta_3.0 dplyr_1.0.0 lattice_0.20-38

loaded via a namespace (and not attached): [1] Rcpp_1.0.4.6 later_1.0.0 pillar_1.4.4 compiler_3.6.2
[5] tools_3.6.2 digest_0.6.25 jsonlite_1.6.1 evaluate_0.14
[9] lifecycle_0.2.0 tibble_3.0.1 gtable_0.3.0 pkgconfig_2.0.3
[13] rlang_0.4.6 shiny_1.4.0 crosstalk_1.0.0 yaml_2.2.0
[17] xfun_0.11 fastmap_1.0.1 stringr_1.4.0 knitr_1.26
[21] generics_0.0.2 vctrs_0.3.1 htmlwidgets_1.5.1 grid_3.6.2
[25] DT_0.13 tidyselect_1.1.0 glue_1.4.1 R6_2.4.1
[29] rmarkdown_2.0 purrr_0.3.4 ggplot2_3.3.2 magrittr_1.5
[33] promises_1.1.0 scales_1.1.1 ellipsis_0.3.1 htmltools_0.4.0
[37] xtable_1.8-4 mime_0.8 colorspace_1.4-1 httpuv_1.5.2
[41] stringi_1.4.6 munsell_0.5.0 crayon_1.3.4

Post mashR pipeline

sQTL

Katia de Paiva Lopes
Raj Lab
Department of Neuroscience
Icahn School of Medicine at Mount Sinai
NYC, New York

2020-07-27

lfsr table at 5%

Pairwise plot

Post mashR pipeline

sQTL

Katia de Paiva Lopes Raj Lab Department of Neuroscience Icahn School of Medicine at Mount Sinai NYC, New York

2020-07-27

lfsr table at 5%

Pairwise plot

Sharing by sign

Sharing by magnitude

Katia de Paiva Lopes
Raj Lab
Department of Neuroscience
Icahn School of Medicine at Mount Sinai
NYC, New York