255 samples (from 314) | Different brain regions from the same donor | Multi disease cohort.
work_dir = "~/pd-omics/katia/Microglia/mic_255s/"
expression_dir = "~/ad-omics_hydra/microglia_omics/expression_tables/added_pilot_314s/expr_4brain_regions/"
metadata_path = "~/pd-omics/katia/Microglia/mic_255s/metadata_files/"
This metadata contains biological and technical covariates.
load(paste0(expression_dir, "Expression_filt_255s.Rdata"))
# dim(genes_counts_exp_3rd) # 19376 255
load(paste0(metadata_path, "metadata_255filt_eng_29jun2020.Rdata"))
# str(metadata3rd_pass)
metadata <- metadata3rd_pass
metadata$cause_of_death_categories[metadata$cause_of_death_categories %in% NA] <- "Other"
#table(metadata$cause_of_death_categories)
metadata$C1 = metadata$C1 %>% replace_na(median(metadata$C1, na.rm = T))
metadata$C2 = metadata$C2 %>% replace_na(median(metadata$C2, na.rm = T))
metadata$C3 = metadata$C3 %>% replace_na(median(metadata$C3, na.rm = T))
metadata$C4 = metadata$C4 %>% replace_na(median(metadata$C4, na.rm = T))
genes_counts_voom <- genes_counts_voom_3rd
#pca_voom = prcomp(t(genes_counts_voom)) #Same PCA from Analysis_clean
pca_voom = prcomp(genes_counts_voom)
# createDT(pca_voom$rotation[1:10, 1:5])
pcva_voom_selec = pca_voom$rotation[, 1:15] #15 firsts PCs
#autoplot(pca_voom)
Linear regression between the first 15 PCs and selected covariates. Colors = adj.r.square Numbers = p-values adjusted by Bonferroni.
var <- get_pca_var(pca_voom) # description of PCs
ind <- get_pca_ind(pca_voom) # PCs for individuals
# To include our batch
names(metadata) = tolower(names(metadata))
# Transforms characters columns in factors columns
indx <- sapply(metadata, is.character)
metadata[indx] <- lapply(metadata[indx], function(x) as.factor(x))
metadata$ph = as.numeric(as.character(metadata$ph))
#str(metadata) #shows class for all columns
#Covariates
covariates = c( "diagnosis",
"main_diagnosis",
"tissue",
"sex",
"age",
"pmd_minutes",
"ph",
"cause_of_death_categories",
"smoking",
"alcohol_dependence_daily_use",
"suicide_attempts",
"autoimmune_diseases",
"infection_2weeks",
"opiates",
"benzodiazepines",
"psychopharmaca",
"featurecounts_percent_assigned",
"featurecounts_assigned",
"picard_pct_ribosomal_bases",
"picard_pct_mrna_bases",
"picard_pct_pf_reads_aligned",
"picard_summed_median",
"picard_summed_mean",
"picard_percent_duplication",
"star_uniquely_mapped_percent",
"star_uniquely_mapped",
"fastqc_percent_duplicates",
"fastqc_percent_gc",
"fastqc_avg_sequence_length",
"fastqc_percent_fails",
"fastqc_total_sequences",
"lane",
"batch")
matrix_rsquared = matrix(NA, nrow = length(covariates), ncol = 15) #Number of factors
matrix_pvalue = matrix(NA, nrow = length(covariates), ncol = 15)
for (x in 1:length(covariates)){
for (y in 1:15){
matrix_rsquared[x,y] <- summary( lm(var$cor[,y] ~ metadata[,covariates[x]]) )$adj.r.squared
matrix_pvalue[x,y] <- glance(summary( lm(var$cor[,y] ~ metadata[,covariates[x]]) ))$p.value #To insert pvalues in the heatmap
}
}
rownames(matrix_rsquared) = covariates
rownames(matrix_pvalue) = covariates
matrix_pvalue = matrix(p.adjust(as.vector(as.matrix(matrix_pvalue)), method='bonferroni'),ncol=ncol(matrix_pvalue))
matrix_pvalue = formatC(matrix_pvalue, format = "e", digits = 2)
# png(paste0(work_dir, "LinearReg_15pcs_covariates.png"), width = 10, height = 10, res = 600, units = "in")
# pdf(paste0(work_dir, "LinearReg_15pcs_covariates.pdf"), width = 10, height = 10)
heatmap.2(as.matrix(matrix_rsquared), col = colorRampPalette(RColorBrewer::brewer.pal(6,"RdPu"))(40),
scale="none",
cellnote = matrix_pvalue,
notecol="black",
notecex = 0.6,
margins=c(10,12), # ("margin.Y", "margin.X")
trace='none',
#dendrogram=c("row"),
density.info='none',
denscol="black",
breaks = seq(0, 1, length.out = 41),
Colv = FALSE,
xlab = "PCs",
ylab = "Covariates",
key = TRUE,
keysize = 1,
key.title = "teste",
key.xlab = "Adjusted R2",
key.ylab = NULL,
key.xtickfun = NULL,
key.ytickfun = NULL,
key.par=list()
# main = "Linear regression between PCs and covariates"
)
#while (!is.null(dev.list())) dev.off()
# dev.off()
# Colored by adj.r.squared
# Numbers inside cell = pvalues
# png(paste0(work_dir, "LinearReg_15pcs_covariates_clean.png"), width = 8, height = 10, res = 600, units = "in")
# pdf(paste0(work_dir, "LinearReg_15pcs_covariates_clean.pdf"), width = 8, height = 10)
heatmap.2(as.matrix(matrix_rsquared), col = colorRampPalette(RColorBrewer::brewer.pal(6,"RdPu"))(40),
scale="none",
# cellnote = matrix_pvalue,
notecol="black",
notecex = 0.6,
margins=c(10,12), # ("margin.Y", "margin.X")
trace='none',
dendrogram=c("row"),
density.info='none',
denscol="black",
breaks = seq(0, 1, length.out = 41),
Colv = FALSE,
xlab = "PCs",
ylab = "Covariates",
keysize = 1,
key.title = "teste",
key.xlab = "Adjusted R2"
# main = "Linear regression between sva_network and covariates"
)
# dev.off()
# Colored by adj.r.squared
R version 3.6.2 (2019-12-12) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Catalina 10.15.7
Matrix products: default BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages: [1] stats4 parallel stats graphics grDevices utils datasets [8] methods base
other attached packages: [1] forcats_0.4.0 stringr_1.4.0
[3] dplyr_1.0.0 purrr_0.3.4
[5] readr_1.3.1 tidyr_1.1.0
[7] tibble_3.0.1 tidyverse_1.3.0
[9] broom_0.5.6 edgeR_3.28.0
[11] DESeq2_1.26.0 SummarizedExperiment_1.16.1 [13] DelayedArray_0.12.1 BiocParallel_1.20.1
[15] matrixStats_0.55.0 GenomicRanges_1.38.0
[17] GenomeInfoDb_1.22.0 IRanges_2.20.1
[19] S4Vectors_0.24.1 pamr_1.56.1
[21] survival_3.1-8 cluster_2.1.0
[23] factoextra_1.0.6 gplots_3.0.3
[25] ggfortify_0.4.10 variancePartition_1.17.7
[27] Biobase_2.46.0 BiocGenerics_0.32.0
[29] scales_1.1.1 foreach_1.4.8
[31] limma_3.42.0 ggplot2_3.3.2
loaded via a namespace (and not attached): [1] minqa_1.2.4 colorspace_1.4-1 ellipsis_0.3.1
[4] colorRamps_2.3 htmlTable_1.13.3 XVector_0.26.0
[7] fs_1.3.1 base64enc_0.1-3 rstudioapi_0.11
[10] ggrepel_0.8.2 bit64_0.9-7 fansi_0.4.1
[13] lubridate_1.7.9 AnnotationDbi_1.48.0 xml2_1.2.2
[16] codetools_0.2-16 splines_3.6.2 doParallel_1.0.15
[19] geneplotter_1.64.0 knitr_1.26 jsonlite_1.6.1
[22] Formula_1.2-3 nloptr_1.2.1 pbkrtest_0.4-8.6
[25] annotate_1.64.0 dbplyr_1.4.2 png_0.1-7
[28] httr_1.4.1 compiler_3.6.2 backports_1.1.8
[31] assertthat_0.2.1 Matrix_1.2-18 cli_2.0.2
[34] acepack_1.4.1 htmltools_0.4.0 prettyunits_1.1.1
[37] tools_3.6.2 gtable_0.3.0 glue_1.4.1
[40] GenomeInfoDbData_1.2.2 reshape2_1.4.4 Rcpp_1.0.4.6
[43] cellranger_1.1.0 vctrs_0.3.1 gdata_2.18.0
[46] nlme_3.1-142 iterators_1.0.12 xfun_0.11
[49] rvest_0.3.5 lme4_1.1-21 lifecycle_0.2.0
[52] gtools_3.8.1 XML_3.98-1.20 zlibbioc_1.32.0
[55] MASS_7.3-51.6 hms_0.5.3 RColorBrewer_1.1-2
[58] yaml_2.2.0 memoise_1.1.0 gridExtra_2.3
[61] rpart_4.1-15 latticeExtra_0.6-29 stringi_1.4.6
[64] RSQLite_2.1.5 genefilter_1.68.0 checkmate_1.9.4
[67] caTools_1.18.0 boot_1.3-23 rlang_0.4.6
[70] pkgconfig_2.0.3 bitops_1.0-6 evaluate_0.14
[73] lattice_0.20-38 htmlwidgets_1.5.1 bit_1.1-14
[76] tidyselect_1.1.0 plyr_1.8.6 magrittr_1.5
[79] R6_2.4.1 generics_0.0.2 Hmisc_4.3-0
[82] DBI_1.1.0 haven_2.2.0 pillar_1.4.4
[85] foreign_0.8-72 withr_2.2.0 RCurl_1.95-4.12
[88] nnet_7.3-12 modelr_0.1.5 crayon_1.3.4
[91] KernSmooth_2.23-16 rmarkdown_2.0 jpeg_0.1-8.1
[94] progress_1.2.2 readxl_1.3.1 locfit_1.5-9.1
[97] grid_3.6.2 data.table_1.12.8 blob_1.2.0
[100] reprex_0.3.0 digest_0.6.25 xtable_1.8-4
[103] munsell_0.5.0