Previous pipeline: https://github.com/RajLabMSSM/mashR
GTEx paper: https://www.nature.com/articles/s41588-018-0268-8
mash_rds = readRDS(paste0(fastqtl_to_mash_output, "QTLSumStats.mash.rds"))
# names(mash_rds)
maxb <- mash_rds$strong.b
maxz <- mash_rds$strong.z
mash_posterior = readRDS(paste0(mashr_flashr_workflow_output, "QTLSumStats.mash.EZ.FL_PC3.V_simple.posterior.rds"))
# names(mash_posterior)
# head(mash_posterior$lfsr)
# dim(mash_posterior$lfsr)
pm.mash <- mash_posterior$PosteriorMean
lfsr.all <- mash_posterior$lfsr
standard.error <- maxb/maxz
pm.mash.beta <- pm.mash*standard.error
lfsr = local false sign rate. Method proposed by Stephens, it is analogous to FDR. Number of significative genes:
pm.mash.beta <- pm.mash.beta[rowSums(lfsr.all<0.10)>0,]
lfsr.mash <- lfsr.all[rowSums(lfsr.all<0.10)>0,] # lfsr.mash have the significant results.
# Means they have lfsr less than 0.10 in at least one condition.
dim(lfsr.mash)[1]
[1] 5722
lfsr.mash_symbol = as.data.frame(lfsr.mash)
lfsr.mash_symbol$ensembl = gsub("(.*?)_(.*)", "\\1", rownames(lfsr.mash_symbol))
## Get conversion table for Gencode 30
gencode_30 = read.table("~/pd-omics/katia/ens.geneid.gencode.v30", header = T)
gencode_30$ensembl = gsub("(.*?)\\.(.*)", "\\1", gencode_30$gene_id)
lfsr.mash_symbol = merge(lfsr.mash_symbol, gencode_30, by = "ensembl")
rownames(lfsr.mash_symbol) <- rownames(lfsr.mash)
lfsr.mash_symbol$ensembl <- NULL
colnames(lfsr.mash_symbol) = gsub("(.*?)_(.*)","\\1",colnames(lfsr.mash_symbol))
lfsr.mash_symbol = lfsr.mash_symbol[,c("gene", "GeneSymbol", "MFG", "STG", "THA", "SVZ")]
rownames(lfsr.mash_symbol) = gsub("_s", "_rs", rownames(lfsr.mash_symbol))
write.table(lfsr.all, file = paste0(work_dir, "mash_lfsr_all.txt"), sep = "\t", quote = F)
write.table(lfsr.mash_symbol, file = paste0(work_dir, "mash_lfsr_10per.txt"), sep = "\t", quote = F)
createDT(lfsr.mash_symbol)
Pairwise sharing by magnitude of eQtLs among tissues. For each pair of tissues, we considered the top eQTLs that were significant (lfsr < 0.10) in at least one of the two tissues, and plotted the proportion of these that are shared in magnitude—that is, have effect estimates that are the same sign and within a factor of 2 in size of one another.
thresh <- 0.10
shared.fold.size <- matrix(NA,nrow = ncol(lfsr.mash),ncol=ncol(lfsr.mash))
colnames(shared.fold.size) <- rownames(shared.fold.size) <- colnames(maxz)
for (i in 1:ncol(lfsr.mash))
for (j in 1:ncol(lfsr.mash)) {
sig.row=which(lfsr.mash[,i]<thresh)
sig.col=which(lfsr.mash[,j]<thresh)
a=(union(sig.row,sig.col))
quotient=(pm.mash.beta[a,i]/pm.mash.beta[a,j])
shared.fold.size[i,j] = mean(quotient > 0.5 & quotient < 2)
}
# Plot heatmap of sharing by magnitude
# Generate the heatmap using the “levelplot” function from the lattice package.
clrs <- colorRampPalette(rev(c("#D73027","#FC8D59","#FEE090","#FFFFBF",
"#E0F3F8","#91BFDB","#4575B4")))(64)
lat <- shared.fold.size
lat[lower.tri(lat)] <- NA
n <- nrow(lat)
rownames(lat) <- gsub("(.*?)_(.*)", "\\1", rownames(lat))
colnames(lat) <- gsub("(.*?)_(.*)", "\\1", colnames(lat))
print(levelplot(lat[n:1,],col.regions = clrs,xlab = "",ylab = "",colorkey = TRUE))
# I am reading the input again to garantee the values!
source(paste0(code_folder, "normfuncs.R"))
mash_rds = readRDS(paste0(fastqtl_to_mash_output, "QTLSumStats.mash.rds"))
b.stat <- mash_rds$strong.b
z.stat <- mash_rds$strong.z
mash_posterior = readRDS(paste0(mashr_flashr_workflow_output, "QTLSumStats.mash.EZ.FL_PC3.V_simple.posterior.rds"))
# names(mash_posterior)
# head(mash_posterior$lfsr)
# dim(mash_posterior$lfsr)
posterior.means <- mash_posterior$PosteriorMean
lfsr <- mash_posterior$lfsr
mar.var <- mash_posterior$PosteriorSD
colnames(lfsr) <-
colnames(mar.var) <-
colnames(posterior.means) <-
colnames(z.stat)
standard.error <- b.stat/z.stat
posterior.betas <- standard.error * posterior.means
pm.beta.norm <- het.norm(posterior.betas)
Each dot shows original (raw) effect estimate for a single tissue, then after mashR. Grey bars indicating ±2 s.e. In each case, mash combines information across all tissues, using the background information (patterns of sharing) learned from data on all eQTLs to produce more precise estimates.
tissue.colors = pal_futurama()(4)
create.metaplot <- function (j) {
plot.new()
pm.beta.norm <- pm.beta.norm # Shuffle columns.
z.shuffle <- z.stat
b.shuffle <- b.stat
post.var <- mar.var
post.bshuffle <- posterior.betas
sem.shuffle <- standard.error
lfsr <- lfsr
plot.title <- strsplit(rownames(z.stat)[j], "[.]")[[1]][1]
x <- as.numeric(post.bshuffle[j,])
layout(t(1:2))
metaplot(as.numeric(b.shuffle[j,]),as.numeric(sem.shuffle[j,]),xlab = "",
ylab = "",colors = meta.colors(box = as.character(tissue.colors)),
labels = gsub("(.*?)_(.*)","\\1",colnames(z.stat)),
xlim = c(-1,1))
title(plot.title)
# Transform to posterior sd of beta.
sd <- as.numeric(sem.shuffle[j,]) * sqrt(as.numeric(post.var[j,]))
metaplot(x,sd,xlab = "",ylab = "",
colors = meta.colors(box = as.character(tissue.colors)),
labels = gsub("(.*?)_(.*)","\\1",colnames(z.stat)),
xlim = c(-1,1))
title(plot.title)
}
Cathepsin B. This gene encodes a member of the C1 family of peptidases. Alternative splicing of this gene results in multiple transcript variants. At least one of these variants encodes a preproprotein that is proteolytically processed to generate multiple protein products. These products include the cathepsin B light and heavy chains, which can dimerize to form the double chain form of the enzyme. This enzyme is a lysosomal cysteine protease with both endopeptidase and exopeptidase activity that may play a role in protein turnover. It is also known as amyloid precursor protein secretase and is involved in the proteolytic processing of amyloid precursor protein (APP). Incomplete proteolytic processing of APP has been suggested to be a causative factor in Alzheimer’s disease, the most common cause of dementia. Overexpression of the encoded protein has been associated with esophageal adenocarcinoma and other tumors. Multiple pseudogenes of this gene have been identified. [provided by RefSeq, Nov 2015]
MS4A6A (Membrane Spanning 4-Domains A6A) is a Protein Coding gene. Diseases associated with MS4A6A include Polycystic Lipomembranous Osteodysplasia With Sclerosing Leukoencephalopathy and Alzheimer Disease.
MS4A14 (Membrane Spanning 4-Domains A14) is a Protein Coding gene. Diseases associated with MS4A14 include Lagophthalmos and Superficial Keratitis.
Example of a gene with max value of posterior beta for the tissue: MFG.
ALDOC (Aldolase, Fructose-Bisphosphate C) is a Protein Coding gene. Among its related pathways are Innate Immune System and Glucose metabolism. Gene Ontology (GO) annotations related to this gene include cytoskeletal protein binding and fructose-bisphosphate aldolase activity.
#posterior.betas[posterior.betas[,1]==max(posterior.betas[,1]),]
create.metaplot(posterior.betas[,1]==max(posterior.betas[,1]))
Example of a gene with max value of posterior beta for the tissue: STG.
INTS4P1 (Integrator Complex Subunit 4 Pseudogene 1) is a Pseudogene.
create.metaplot(posterior.betas[,2]==max(posterior.betas[,2]))
# rownames(z.stat)[posterior.betas[,2]==max(posterior.betas[,2])]
Example of a gene with max value of posterior beta for the tissue: SVZ.
NUDT9 (Nudix Hydrolase 9) is a Protein Coding gene. Diseases associated with NUDT9 include Type 1 Diabetes Mellitus 10 and Arrhythmogenic Right Ventricular Dysplasia. Among its related pathways are Metabolism and Metabolism of nucleotides. Gene Ontology (GO) annotations related to this gene include hydrolase activity and ADP-sugar diphosphatase activity.
create.metaplot(posterior.betas[,3]==max(posterior.betas[,3]))
# rownames(z.stat)[posterior.betas[,3]==max(posterior.betas[,3])]
Example of a gene with max value of posterior beta for the tissue: THA.
TM7SF2 (Transmembrane 7 Superfamily Member 2) is a Protein Coding gene. Among its related pathways are Regulation of cholesterol biosynthesis by SREBP and Metabolism. Gene Ontology (GO) annotations related to this gene include oxidoreductase activity, acting on the CH-CH group of donors, NAD or NADP as acceptor and delta14-sterol reductase activity.
create.metaplot(posterior.betas[,4]==max(posterior.betas[,4]))
# rownames(z.stat)[posterior.betas[,4]==max(posterior.betas[,4])]
Complete table. Number of gene-SNP pairs:
[1] 18799
## Get conversion table for Gencode 30 from previous chunck
posterior.betas_symbol = as.data.frame(posterior.betas)
colnames(posterior.betas_symbol) = gsub("(.*?)_(.*)","\\1",colnames(posterior.betas_symbol))
posterior.betas_symbol$ensembl = gsub("(.*?)_(.*)", "\\1", rownames(posterior.betas_symbol))
posterior.betas_symbol$snp = gsub("(.*?)_(.*)", "\\2", rownames(posterior.betas_symbol))
posterior.betas_symbol = merge(posterior.betas_symbol, gencode_30, by="ensembl")
posterior.betas_symbol$gene_id = NULL
posterior.betas_symbol = posterior.betas_symbol[,c("ensembl", "snp", "GeneSymbol", "MFG", "STG", "THA", "SVZ")]
posterior.betas_symbol$snp = gsub("s", "rs", posterior.betas_symbol$snp)
write.table(posterior.betas, file = paste0(work_dir, "posterior.beta_all.txt"), sep = "\t", quote = F)
write.table(posterior.betas_symbol, file = paste0(work_dir, "posterior.beta_all_symbol.txt"), sep = "\t", quote = F)
createDT(posterior.betas_symbol)
R version 3.6.1 (2019-07-05) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Catalina 10.15.4
Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages: [1] stats graphics grDevices utils datasets methods base
other attached packages: [1] ggsci_2.9 rmeta_3.0 dplyr_0.8.5 lattice_0.20-38
loaded via a namespace (and not attached): [1] Rcpp_1.0.4 later_1.0.0 pillar_1.4.3 compiler_3.6.1
[5] tools_3.6.1 digest_0.6.25 jsonlite_1.6.1 evaluate_0.14
[9] tibble_2.1.3 lifecycle_0.2.0 gtable_0.3.0 pkgconfig_2.0.3
[13] rlang_0.4.5 shiny_1.4.0 crosstalk_1.0.0 yaml_2.2.0
[17] xfun_0.11 fastmap_1.0.1 stringr_1.4.0 knitr_1.26
[21] htmlwidgets_1.5.1 grid_3.6.1 DT_0.13 tidyselect_0.2.5 [25] glue_1.4.0 R6_2.4.1 rmarkdown_2.0 ggplot2_3.3.0
[29] purrr_0.3.3 magrittr_1.5 promises_1.1.0 scales_1.1.0
[33] htmltools_0.4.0 assertthat_0.2.1 xtable_1.8-4 mime_0.8
[37] colorspace_1.4-1 httpuv_1.5.2 stringi_1.4.6 munsell_0.5.0
[41] crayon_1.3.4