255 samples | Transcripts from RSEM | Dream | coef = age.
## Loading required package: limma
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x purrr::accumulate() masks foreach::accumulate()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::when() masks foreach::when()
## Loading required package: scales
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'variancePartition'
## The following object is masked from 'package:limma':
##
## classifyTestsF
expression_path = "~/ad-omics_hydra/microglia_omics/expression_tables/added_pilot_314s/"
metadata_path = "~/pd-omics/katia/Microglia/mic_255s/metadata_files/"
load(paste0(metadata_path, "metadata_255filt_eng_29jun2020.Rdata"))
# str(metadata3rd_pass)
load(paste0(expression_path, "tx_matrix.RData"))
# dim(tx_counts)
# dim(tx_tpm)
tx_counts_filt = tx_counts
colnames(tx_counts_filt) = gsub("(.*?)\\.(.*)", "\\1", colnames(tx_counts_filt))
tx_counts_filt = tx_counts_filt[, as.character(metadata3rd_pass$donor_tissue)]
#Filter genes:
x = data.frame(tx_counts_filt, check.names = F)
cpm = cpm(x)
keep.exp = rowSums(cpm > 1) >= 0.3*ncol(x)
tx_counts_filt = tx_counts_filt[keep.exp,] # final filtered transcript table
#save(tx_counts_filt, file = paste0(expression_path, "/expr_255s_tx/tx_matrix_255s_filt.RData"))
metadata <- metadata3rd_pass
ageByDonor = unique(metadata[,c("donor_id", "age", "sex")])
ggplot(ageByDonor, aes(x=age, fill=age)) +
geom_histogram(bins = 25, colour='black', position = "stack", fill="#5c88da99") +
labs(x="Age", y="Donors") +
scale_y_continuous(breaks = (1:20)) +
scale_x_continuous(breaks=seq(20,120,10)) +
theme_classic()
To use cause_of_death in the model, I got the NAs and change it for “Other” category.
To use C1-C4 I calculated the median for the missing samples.
params = BiocParallel::MulticoreParam(workers=20, progressbar=T)
register(params)
registerDoParallel(20)
metadata$cause_of_death_categories[metadata$cause_of_death_categories %in% NA] <- "Other"
# table(metadata$cause_of_death_categories)
metadata$C1 = metadata$C1 %>% replace_na(median(metadata$C1, na.rm = T))
metadata$C2 = metadata$C2 %>% replace_na(median(metadata$C2, na.rm = T))
metadata$C3 = metadata$C3 %>% replace_na(median(metadata$C3, na.rm = T))
metadata$C4 = metadata$C4 %>% replace_na(median(metadata$C4, na.rm = T))
# Matching the names from the DE analysis
genes_counts4deg = tx_counts_filt
metadata4deg = metadata
# all(colnames(tx_counts_filt) == as.character(metadata4deg$donor_tissue)) # Check the order of columns - TRUE
# The dream model operates directly on the results of voom.
# The only change compared to the standard limma workflow is to replace lmFit with dream.
# Check variance partition version
# packageVersion("variancePartition") # Must be 1.17.7
# The variable to be tested should be a fixed effect
form <- ~ sex + (1|donor_id) + age + tissue + (1|cause_of_death_categories) + C1 + C2 + C3 + C4 + picard_pct_mrna_bases + picard_summed_median + picard_pct_ribosomal_bases
# estimate weights using linear mixed model of dream
vobjDream = suppressWarnings( voomWithDreamWeights( genes_counts4deg, form, metadata4deg ) ) # supressing messages because of Biocparallel was generating a lot of messages
# Fit the dream model on each gene
# By default, uses the Satterthwaite approximation for the hypothesis test
fitmm = suppressWarnings (dream( vobjDream, form, metadata4deg ))
# Examine design matrix
# createDT(fitmm$design, 3)
res_age <- data.frame(topTable(fitmm, coef='age',
number=nrow(genes_counts4deg), sort.by = "p"), check.names = F)
save(vobjDream, form, fitmm, res_age, genes_counts4deg, metadata4deg, file = "dream_DE_res.RData")
The voom here comes from the Dream object.
genes_voom = as.data.frame(vobjDream$E)
res.pca = prcomp(t(genes_voom))
library(ggfortify); library(ggplot2)
autoplot(res.pca, data = metadata4deg, colour = 'age') +
scale_colour_viridis_c() +
theme_classic()
I’m applying the removeBatchEffect function to get the residuals.
allResiduals <- removeBatchEffect(x = genes_voom,
batch = metadata4deg$tissue,
batch2 = paste(metadata4deg$sex, metadata4deg$cause_of_death_categories),
# design = model.matrix(~ age, data = metadata4deg), #force to not regress age. It didn't change the PCA.
covariates = as.matrix(metadata4deg[, c("picard_pct_mrna_bases", "picard_summed_median", "picard_pct_ribosomal_bases", "C1","C2","C3","C4" )]))
res.pca = prcomp(t(allResiduals))
autoplot(res.pca, data = metadata4deg, colour = 'age') +
scale_colour_viridis_c() +
theme_classic()
R version 4.0.3 (2020-10-10) Platform: x86_64-pc-linux-gnu (64-bit) Running under: CentOS Linux 7 (Core)
Matrix products: default BLAS: /usr/local/lib64/R/lib/libRblas.so LAPACK: /usr/local/lib64/R/lib/libRlapack.so
locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages: [1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages: [1] ggfortify_0.4.11 variancePartition_1.20.0 Biobase_2.50.0
[4] BiocGenerics_0.36.0 scales_1.1.1 forcats_0.5.1
[7] stringr_1.4.0 dplyr_1.0.5 purrr_0.3.4
[10] readr_1.4.0 tidyr_1.1.3 tibble_3.1.0
[13] tidyverse_1.3.0 BiocParallel_1.24.1 doParallel_1.0.16
[16] iterators_1.0.13 foreach_1.5.1 ggplot2_3.3.3
[19] edgeR_3.32.1 limma_3.46.0
loaded via a namespace (and not attached): [1] nlme_3.1-149 bitops_1.0-6 fs_1.5.0 pbkrtest_0.5.1
[5] lubridate_1.7.10 progress_1.2.2 httr_1.4.2 tools_4.0.3
[9] backports_1.2.1 bslib_0.2.4 DT_0.17 utf8_1.2.1
[13] R6_2.5.0 KernSmooth_2.23-17 DBI_1.1.1 colorspace_2.0-0
[17] withr_2.4.1 gridExtra_2.3 tidyselect_1.1.0 prettyunits_1.1.1 [21] compiler_4.0.3 cli_2.3.1 rvest_1.0.0 xml2_1.3.2
[25] labeling_0.4.2 sass_0.3.1 caTools_1.18.1 digest_0.6.27
[29] minqa_1.2.4 rmarkdown_2.7 colorRamps_2.3 pkgconfig_2.0.3
[33] htmltools_0.5.1.1 lme4_1.1-26 highr_0.8 dbplyr_2.1.0
[37] htmlwidgets_1.5.3 rlang_0.4.10 readxl_1.3.1 rstudioapi_0.13
[41] farver_2.1.0 jquerylib_0.1.3 generics_0.1.0 jsonlite_1.7.2
[45] crosstalk_1.1.1 gtools_3.8.2 magrittr_2.0.1 Matrix_1.3-2
[49] Rcpp_1.0.6 munsell_0.5.0 fansi_0.4.2 lifecycle_1.0.0
[53] stringi_1.5.3 yaml_2.2.1 MASS_7.3-53.1 plyr_1.8.6
[57] gplots_3.1.1 grid_4.0.3 crayon_1.4.1 lattice_0.20-41
[61] haven_2.3.1 splines_4.0.3 hms_1.0.0 locfit_1.5-9.4
[65] knitr_1.31 pillar_1.5.1 boot_1.3-25 reshape2_1.4.4
[69] codetools_0.2-16 reprex_1.0.0 glue_1.4.2 evaluate_0.14
[73] modelr_0.1.8 vctrs_0.3.6 nloptr_1.2.2.2 cellranger_1.1.0
[77] gtable_0.3.0 assertthat_0.2.1 xfun_0.22 broom_0.7.5
[81] viridisLite_0.3.0 statmod_1.4.35 ellipsis_0.3.1