Previous pipeline: https://github.com/RajLabMSSM/mashR
GTEx paper: https://www.nature.com/articles/s41588-018-0268-8
mash_rds = readRDS(paste0(fastqtl_to_mash_output, "QTLSumStats.mash.rds"))
#names(mash_rds)
maxb <- mash_rds$strong.b
maxz <- mash_rds$strong.z
mash_posterior = readRDS(paste0(mashr_flashr_workflow_output, "QTLSumStats.mash.EZ.FL_PC3.V_simple.posterior.rds"))
# names(mash_posterior)
# head(mash_posterior$lfsr)
# dim(mash_posterior$lfsr)
pm.mash <- mash_posterior$PosteriorMean
lfsr.all <- mash_posterior$lfsr
standard.error <- maxb/maxz
pm.mash.beta <- pm.mash*standard.error
#pm.mash.beta <- pm.mash.beta[rowSums(lfsr.all<0.10)>0,]
Threshold = lfsr < 0.05
# Compute number of “tissue-specific” effects for each tissue
# “tissue-specific” to mean that the effect is at least 2-fold larger in one tissue than in any other tissue.
source(paste0(code_folder, "normfuncs.R"))
thresh <- 0.05
lfsr.mash = lfsr.all
nsig <- rowSums(lfsr.mash < thresh) # ensembl | number of columns its significative
pm.mash.beta.norm <- het.norm(effectsize = pm.mash.beta)
pm.mash.beta.norm <- pm.mash.beta.norm[nsig > 0,]
lfsr.mash <- as.matrix(lfsr.mash[nsig > 0,])
colnames(lfsr.mash) <- colnames(maxz)
a <- which(rowSums(pm.mash.beta.norm > 0.5) == 1)
lfsr.fold <- as.matrix(lfsr.mash[a,])
pm <- as.matrix(pm.mash.beta.norm[a,])
tspec <- NULL
for(i in 1:ncol(pm))
tspec[i] <- sum(pm[,i] > 0.5)
tspec <- as.matrix(tspec)
rownames(tspec) <- colnames(maxz)
rownames(tspec) <- gsub("(.*?)_(.*)", "\\1", rownames(tspec))
colnames(tspec) <- c("number_TS")
createDT(tspec)
Plot number of “tissue-specific” effects for each tissue. “Tissue-specific” to mean that the effect is at least 2-fold larger in one tissue than in any other tissue.
# Plot number of “tissue-specific” effects for each tissue
colnames(lfsr.fold) <- gsub("(.*?)_(.*)", "\\1", colnames(lfsr.fold))
tissue.colors = pal_futurama()(4)
#pdf(file = "/Users/katia/OneDrive/Documentos/MountSinai/Projects/Microglia/Figures4paper/tissu_spec_005_sQTL.pdf", width = 3, height = 4)
barplot(as.numeric(t(tspec)),las = 1,cex.names = 0.75,col = tissue.colors,
names = colnames(lfsr.fold),
xlab = "Region",
ylab = "Tissue-specific effects")
#dev.off()
Number of total “tissue-specific” effects:
[1] 670
What are the sCluster-SNP pairs from the barplot?
colnames(pm) <- gsub("(.*?)_(.*)", "\\1", colnames(pm))
gene_snp_list = list()
for(i in 1:ncol(pm))
gene_snp_list[[colnames(pm)[i]]] <- rownames(pm)[pm[,i] > 0.5]
## Get conversion table for Gencode 30
gencode_30 = read.table("~/pd-omics/katia/ens.geneid.gencode.v30", header = T)
gencode_30$ensembl = gsub("(.*?)\\.(.*)", "\\1", gencode_30$gene_id)
sClusters
MFG = as.data.frame(gene_snp_list$MFG)
colnames(MFG) = c("scluster_snp")
MFG$ensembl = MFG$scluster_snp
MFG$ensembl = gsub("_ENS", ":ENS", MFG$ensembl)
MFG$ensembl = gsub("(.*?):(.*)", "\\2", MFG$ensembl)
MFG$ensembl = gsub("(.*?)_(.*)", "\\1", MFG$ensembl)
MFG_symbol = merge(MFG, gencode_30, by = "ensembl")
MFG_symbol$gene_id = NULL
MFG_symbol$rs_id = MFG_symbol$scluster_snp
MFG_symbol$rs_id = gsub("_s", ":rs", MFG_symbol$scluster_snp)
MFG_symbol$rs_id = gsub("(.*?):(.*)", "\\2", MFG_symbol$rs_id)
MFG_symbol$scluster_snp = gsub("_s", "_rs", MFG_symbol$scluster_snp)
write.table(MFG_symbol, file = paste0(work_dir, "ts_effect_5per_lists/ts_effect_2fold_MFG_sQTL.txt"), quote = F, row.names = F, sep = "\t")
createDT(MFG_symbol)
sClusters
STG = as.data.frame(gene_snp_list$STG)
colnames(STG) = c("scluster_snp")
STG$ensembl = STG$scluster_snp
STG$ensembl = gsub("_ENS", ":ENS", STG$ensembl)
STG$ensembl = gsub("(.*?):(.*)", "\\2", STG$ensembl)
STG$ensembl = gsub("(.*?)_(.*)", "\\1", STG$ensembl)
STG_symbol = merge(STG, gencode_30, by = "ensembl")
STG_symbol$gene_id = NULL
STG_symbol$rs_id = STG_symbol$scluster_snp
STG_symbol$rs_id = gsub("_s", ":rs", STG_symbol$scluster_snp)
STG_symbol$rs_id = gsub("(.*?):(.*)", "\\2", STG_symbol$rs_id)
STG_symbol$scluster_snp = gsub("_s", "_rs", STG_symbol$scluster_snp)
write.table(STG_symbol, file = paste0(work_dir, "ts_effect_5per_lists/ts_effect_2fold_STG_sQTL.txt"), quote = F, row.names = F, sep = "\t")
createDT(STG_symbol)
sClusters
THA = as.data.frame(gene_snp_list$THA)
colnames(THA) = c("scluster_snp")
THA$ensembl = THA$scluster_snp
THA$ensembl = gsub("_ENS", ":ENS", THA$ensembl)
THA$ensembl = gsub("(.*?):(.*)", "\\2", THA$ensembl)
THA$ensembl = gsub("(.*?)_(.*)", "\\1", THA$ensembl)
THA_symbol = merge(THA, gencode_30, by = "ensembl")
THA_symbol$gene_id = NULL
THA_symbol$rs_id = THA_symbol$scluster_snp
THA_symbol$rs_id = gsub("_s", ":rs", THA_symbol$scluster_snp)
THA_symbol$rs_id = gsub("(.*?):(.*)", "\\2", THA_symbol$rs_id)
THA_symbol$scluster_snp = gsub("_s", "_rs", THA_symbol$scluster_snp)
write.table(THA_symbol, file = paste0(work_dir, "ts_effect_5per_lists/ts_effect_2fold_THA_sQTL.txt"), quote = F, row.names = F, sep = "\t")
createDT(THA_symbol)
sClusters
SVZ = as.data.frame(gene_snp_list$SVZ)
colnames(SVZ) = c("scluster_snp")
SVZ$ensembl = SVZ$scluster_snp
SVZ$ensembl = gsub("_ENS", ":ENS", SVZ$ensembl)
SVZ$ensembl = gsub("(.*?):(.*)", "\\2", SVZ$ensembl)
SVZ$ensembl = gsub("(.*?)_(.*)", "\\1", SVZ$ensembl)
SVZ_symbol = merge(SVZ, gencode_30, by = "ensembl")
SVZ_symbol$gene_id = NULL
SVZ_symbol$rs_id = SVZ_symbol$scluster_snp
SVZ_symbol$rs_id = gsub("_s", ":rs", SVZ_symbol$scluster_snp)
SVZ_symbol$rs_id = gsub("(.*?):(.*)", "\\2", SVZ_symbol$rs_id)
SVZ_symbol$scluster_snp = gsub("_s", "_rs", SVZ_symbol$scluster_snp)
write.table(SVZ_symbol, file = paste0(work_dir, "ts_effect_5per_lists/ts_effect_2fold_SVZ_sQTL.txt"), quote = F, row.names = F, sep = "\t")
createDT(SVZ_symbol)
R version 3.6.2 (2019-12-12) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Catalina 10.15.5
Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages: [1] stats graphics grDevices utils datasets methods base
other attached packages: [1] ggsci_2.9 rmeta_3.0 dplyr_1.0.0 lattice_0.20-38
loaded via a namespace (and not attached): [1] Rcpp_1.0.4.6 later_1.0.0 pillar_1.4.4 compiler_3.6.2
[5] tools_3.6.2 digest_0.6.25 jsonlite_1.6.1 evaluate_0.14
[9] lifecycle_0.2.0 tibble_3.0.1 gtable_0.3.0 pkgconfig_2.0.3
[13] rlang_0.4.6 shiny_1.4.0 crosstalk_1.0.0 yaml_2.2.0
[17] xfun_0.11 fastmap_1.0.1 stringr_1.4.0 knitr_1.26
[21] generics_0.0.2 vctrs_0.3.1 htmlwidgets_1.5.1 grid_3.6.2
[25] DT_0.13 tidyselect_1.1.0 glue_1.4.1 R6_2.4.1
[29] rmarkdown_2.0 purrr_0.3.4 ggplot2_3.3.2 magrittr_1.5
[33] promises_1.1.0 scales_1.1.1 ellipsis_0.3.1 htmltools_0.4.0
[37] xtable_1.8-4 mime_0.8 colorspace_1.4-1 httpuv_1.5.2
[41] stringi_1.4.6 munsell_0.5.0 crayon_1.3.4