Dream tool | coef = sex
expression_dir = "~/ad-omics_hydra/microglia_omics/expression_tables/added_pilot_314s/expr_4brain_regions/"
work_plots = "~/pd-omics/katia/scripts/GitHub_scripts/glia_omics/3rd_pass_mic_255s/sex_related/"
metadata_path = "~/pd-omics/katia/Microglia/mic_255s/metadata_files/"
load(paste0(expression_dir, "Expression_filt_255s.Rdata"))
# dim(genes_counts_exp_3rd) # 19376 255
load(paste0(metadata_path, "metadata_255filt_eng_29jun2020.Rdata"))
# str(metadata3rd_pass)
metadata <- metadata3rd_pass
genes_counts_exp <- genes_counts_exp_3rd
sexByDonor = unique(metadata[,c("donor_id", "sex")])
#createDT(sexByDonor)
as.data.frame(t(as.matrix(unclass( table(sexByDonor$sex, useNA = "ifany"))))) %>%
kable(row.names = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
f | m |
---|---|
58 | 42 |
I’ve removed 619 genes in our dataset, from x and y chromosomes.
position = read.table("/sc/hydra/projects/PBG/REFERENCES/GRCh38/Gencode/release_30/gencode.v30.primary_assembly.annotation.reflat", stringsAsFactors = F, header = F)
position = position[, c(1,3)]
colnames(position) = c("ensembl", "chr")
y_ens = position$ensembl[position$chr %in% "chrY"]
x_ens = position$ensembl[position$chr %in% "chrX"]
genes_counts_nosex = genes_counts_exp[!rownames(genes_counts_exp) %in% c(x_ens, y_ens), ]
params = BiocParallel::MulticoreParam(workers=20, progressbar=T)
register(params)
registerDoParallel(20)
metadata$cause_of_death_categories[metadata$cause_of_death_categories %in% NA] <- "Other"
#table(metadata$cause_of_death_categories)
metadata$C1 = metadata$C1 %>% replace_na(median(metadata$C1, na.rm = T))
metadata$C2 = metadata$C2 %>% replace_na(median(metadata$C2, na.rm = T))
metadata$C3 = metadata$C3 %>% replace_na(median(metadata$C3, na.rm = T))
metadata$C4 = metadata$C4 %>% replace_na(median(metadata$C4, na.rm = T))
# Matching the names from the DE analysis to use the same code
genes_counts4deg = genes_counts_nosex
metadata4deg = metadata
# all(colnames(genes_counts_exp) == metadata$donor_tissue) # Check the order of columns - TRUE
# The dream model operates directly on the results of voom.
# The only change compared to the standard limma workflow is to replace lmFit with dream.
# Check variance partition version
# packageVersion("variancePartition") # Must be 1.17.7
# The variable to be tested should be a fixed effect
form <- ~ sex + (1|donor_id) + age + tissue + (1|cause_of_death_categories) + C1 + C2 + C3 + C4 + picard_pct_mrna_bases + picard_summed_median + picard_pct_ribosomal_bases
# estimate weights using linear mixed model of dream
vobjDream = suppressWarnings( voomWithDreamWeights( genes_counts4deg, form, metadata4deg ) ) # supressing messages because of Biocparallel was generating a lot of messages
# Fit the dream model on each gene
# By default, uses the Satterthwaite approximation for the hypothesis test
fitmm = suppressWarnings (dream( vobjDream, form, metadata4deg ))
# Examine design matrix
# createDT(fitmm$design, 3)
res_dream <- data.frame(topTable(fitmm, coef='sexm',
number=nrow(genes_counts4deg), sort.by = "p"), check.names = F)
The t-statistics are not directly comparable since they have different degrees of freedom. In order to be able to compare test statistics, we report z.std which is the p-value transformed into a signed z-score. This can be used for downstream analysis.
res = res_dream
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")
p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")
plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()
#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)
vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-10,10) + ylim(0, 10) + ylab("-log10 pvalue")
## Get conversion table for Gencode 30
gencode_30 = read.table("~/pd-omics/katia/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
res$ensembl = rownames(res)
res_name = merge(res, gencode_30, by="ensembl")
rownames(res_name) = res_name$ensembl
res_name = res_name[order(res_name$adj.P.Val), ]
res_name = res_name[, c("symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
deg_lists = res_name[which(res_name$adj.P.Val<0.05),]
createDT(res_name)
LogFC > 0 = UP regulated in MALE.
The pvalues in the boxplots are different from the DEG results. We are using the ggpubr library to compare the means. Test = Wilcoxon.
top_6 = head(res_name)
top_6$ensembl = rownames(top_6)
genes_voom = genes_counts_voom_3rd[rownames(genes_counts_voom_3rd) %in% rownames(genes_counts_nosex),]
#genes_voom = genes_counts_voom[, rownames(metadata4deg)] # Voom 1st pass only for the samples used in this comparison
gene2check = as.data.frame( genes_voom[rownames(top_6) ,])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "donor_tissue")
ggplot(gene2check_charac, aes(x = sex, y = value, fill = sex)) +
geom_boxplot(notch = T, na.rm = T, outlier.color = NA) +
# geom_jitter() +
# scale_fill_manual(values = c("magenta", "orange")) +
theme_classic() +
labs(x = "Gender", y = "Gene expression") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "left", method = "wilcox.test") +
facet_wrap(~symbol, scales = "free_y")
R version 3.6.2 (2019-12-12) Platform: x86_64-pc-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core)
Matrix products: default BLAS/LAPACK: /hpc/packages/minerva-centos7/intel/parallel_studio_xe_2019/compilers_and_libraries_2019.0.117/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages: [1] parallel grid stats graphics grDevices utils datasets [8] methods base
other attached packages: [1] tidyr_1.0.2 dplyr_0.8.5 doParallel_1.0.15
[4] iterators_1.0.12 ggfortify_0.4.9 gridExtra_2.3
[7] RColorBrewer_1.1-2 kableExtra_1.1.0 reshape2_1.4.3
[10] pheatmap_1.0.12 readxl_1.3.1 ggpubr_0.2.5
[13] magrittr_1.5 gplots_3.0.3 statmod_1.4.34
[16] edgeR_3.28.1 BiocParallel_1.20.1 variancePartition_1.17.7 [19] Biobase_2.46.0 BiocGenerics_0.32.0 scales_1.1.0
[22] foreach_1.4.8 limma_3.42.2 factoextra_1.0.6
[25] ggplot2_3.3.0 ggsci_2.9 ggeasy_0.1.1
loaded via a namespace (and not attached): [1] nlme_3.1-145 bitops_1.0-6 pbkrtest_0.4-8.6
[4] webshot_0.5.2 progress_1.2.2 httr_1.4.1
[7] numDeriv_2016.8-1.1 tools_3.6.2 DT_0.12
[10] R6_2.4.1 KernSmooth_2.23-16 colorspace_1.4-1
[13] withr_2.1.2 tidyselect_1.0.0 prettyunits_1.1.1
[16] compiler_3.6.2 rvest_0.3.5 xml2_1.2.5
[19] labeling_0.3 caTools_1.18.0 readr_1.3.1
[22] stringr_1.4.0 digest_0.6.25 minqa_1.2.4
[25] rmarkdown_2.1 colorRamps_2.3 pkgconfig_2.0.3
[28] htmltools_0.4.0 lme4_1.1-21 fastmap_1.0.1
[31] highr_0.8 htmlwidgets_1.5.1 rlang_0.4.5
[34] rstudioapi_0.11 shiny_1.4.0 farver_2.0.3
[37] jsonlite_1.6.1 crosstalk_1.0.0 gtools_3.8.1
[40] Matrix_1.2-18 Rcpp_1.0.3 munsell_0.5.0
[43] lifecycle_0.2.0 stringi_1.4.6 yaml_2.2.1
[46] MASS_7.3-51.5 plyr_1.8.6 promises_1.1.0
[49] gdata_2.18.0 ggrepel_0.8.2 crayon_1.3.4
[52] lattice_0.20-40 splines_3.6.2 hms_0.5.3
[55] locfit_1.5-9.1 knitr_1.28 pillar_1.4.3
[58] boot_1.3-24 ggsignif_0.6.0 codetools_0.2-16
[61] glue_1.3.1 evaluate_0.14 BiocManager_1.30.10 [64] httpuv_1.5.2 vctrs_0.2.4 nloptr_1.2.2
[67] cellranger_1.1.0 gtable_0.3.0 purrr_0.3.3
[70] assertthat_0.2.1 xfun_0.12 mime_0.9
[73] xtable_1.8-4 later_1.0.0 viridisLite_0.3.0
[76] tibble_2.1.3 lmerTest_3.1-1 ellipsis_0.3.0