knitr::opts_chunk$set(echo = TRUE, root.dir = here::here())
library(dplyr)
library(ggplot2)
library(googledrive)
library(data.table)
set.seed(2019)
#### Import utility functions ####
source(here::here("code/utils.R"))
Meta-analysis of genetic correlation studies between Parkinson’s Disease and other phenotypes.
### phenomix is currently under development (not yet released) ####
# devtools::install_github("neurogenomics/phenomix")
library(phenomix)
degas <- phenomix::get_DEGAS()
knn <- phenomix::find_neighors()
# data.table::fwrite(knn, here::here("data/DEGAS/DEGAS_contributionGene_corr.csv"))
I began to explore this dataset but then realized it does not contain any Parkinson’s Disease GWAS.
rg = xlsx::read.xlsx(here::here("data/LD_Hub/LD-Hub_genetic_correlation_221x221_no_ENIGMA.xlsx"), sheetName = "rG") %>%
tibble::column_to_rownames("NA.")
colnames(rg) <- gsub("[.]","-",colnames(rg))
row.names(rg) <- gsub("[.]","-",row.names(rg))
rg[rg=="/"] <- NA
rg <- Matrix::as.matrix(rg, sparse = T)
rp = xlsx::read.xlsx(here::here("data/LD-Hub_genetic_correlation_221x221_no_ENIGMA.xlsx"), sheetName = "rP")%>%
tibble::column_to_rownames("NA.")
colnames(rp) <- gsub("[.]","-",colnames(rp))
row.names(rp) <- gsub("[.]","-",row.names(rp))
rp[rp=="/"] <- NA
rp <- Matrix::as.matrix(rp, sparse = T)
#### Check for Parkinson's ####
grep("Parkinson",colnames(rp), ignore.case = T, value = T)
You can download the meta-analysis tables from Google Drive and process them however you like.
googledrive::drive_download("https://docs.google.com/spreadsheets/d/19jz9l2P7W2f1PWT9t3x0L8h6VQL021RdRI7D4FAOjX0/edit?usp=sharing#gid=1366974120", path = here::here("data/metaanalysis/TableS2.xlsx"), overwrite = TRUE)
sheets <- readxl::excel_sheets(here::here("data/metaanalysis/TableS2.xlsx"))
Alternatively, you can use the function merge_data
we’ve provided in
code/utils.R to automatically download, merge and harmonise the data.
corr_data <- merge_data(googledrive_url = "https://docs.google.com/spreadsheets/d/19jz9l2P7W2f1PWT9t3x0L8h6VQL021RdRI7D4FAOjX0/edit#gid=1366974120",
file_name = "TableS2.xlsx",
sheet_search = "corr")
## ! Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to
## the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The googledrive package is using a cached token for
## 'brian_schilder@alumni.brown.edu'.
## File downloaded:
## • 'TableS2' <id: 19jz9l2P7W2f1PWT9t3x0L8h6VQL021RdRI7D4FAOjX0>
## Saved locally as:
## • '/Users/schilder/Desktop/Mount_Sinai/PD_omics_review/data/metaanalysis/TableS2.xlsx'
## [1] "Bryois2020_corr"
## Warning: Missing target_cols:
## - Dataset
## [1] "Nalls2019_corr"
## Warning: Missing target_cols:
## - Dataset
## [1] "Agarwal2020_corr"
## [1] "DeGAs_corr"
## [1] "dPRS_corr"
## [1] "Yao2021_corr"
## Warning: Missing target_cols:
## - Dataset
## Harmonising labels:
## - Trait1
## - Trait2
## Dropping dataset-specific columns.
## 5,514 rows merged.
## Saving merged data: ==> /Users/schilder/Desktop/Mount_Sinai/PD_omics_review/data/metaanalysis/merged_corr.csv
Get the top N traits per study that are most highly correlated with Parkinson’s Disease.
max_traits <- 10
top_corr <- corr_data %>%
dplyr::group_by(Sheet) %>%
subset(Trait1!=Trait2) %>%
#### Correct for the number of correlations ? ####
dplyr::mutate(corr_q = abs(corr)/n(),
corr_z = scale(abs(corr))[, 1],
valence = ifelse(corr>0,"+",ifelse(corr<0,"-",0)),
n_tests = n()) %>%
dplyr::ungroup() %>%
dplyr::group_by(Sheet, Trait1) %>%
dplyr::mutate(id1 = paste0(Trait1,"@",dplyr::cur_group_id()),
id2 = paste0(Trait2,"@",dplyr::cur_group_id())) %>%
dplyr::slice_max(order_by = corr_q,
n = max_traits,
with_ties = TRUE) %>%
dplyr::select(Sheet, Trait1, Trait2, id1, id2,
corr, corr_q, corr_z, valence, Reference, Source, n_tests) %>%
dplyr::ungroup() %>%
# dplyr::arrange(dplyr::desc(abs(corr_q))) %>%
data.table::data.table()
top_corr$Trait2 <- factor(x = top_corr$Trait2,
levels = rev(unique(top_corr$Trait2)),
ordered = TRUE)
createDT(top_corr)
data.table::fwrite(x = corr_data[,c("Sheet","Trait1","Trait2","corr","Reference","Source")],
file = here::here("data/metaanalysis","merged_top_corr.csv"))
gg_cor <- ggplot(data = top_corr, aes(x=corr, y = Trait2, fill = corr)) +
geom_col() +
geom_point(show.legend = FALSE, alpha=.5) +
facet_grid(facets = paste0(gsub("[(]","\n(",Sheet),
"\n (tests = ",formatC(n_tests,big.mark = ","),")") ~.,
space = "free",
scales = "free",
drop = FALSE) +
scale_x_continuous(limits = c(-1,1)) +
scale_y_discrete(labels = setNames(gsub("@.*","",levels(top_corr$Trait2)),
levels(top_corr$Trait2) ) ) +
scale_fill_gradient(low = "red", high = "blue", limits = c(-1,1)) +
labs(title = "Genetic correlations with Parkinson's Disease",
x = "Correlation") +
theme_bw() +
theme(strip.background = element_rect(fill = "black"),
strip.text = element_text(color = "white", angle = 0),
strip.text.y = element_text(color = "white", angle = 0),
panel.grid.minor.x = element_blank())
print(gg_cor)
ggplot2::ggsave(filename = here::here("plots/metaanalysis/correlation_metaanalysis.pdf"),
plot = gg_cor, dpi = 400, height = 8, width = 9)
ggplot2::ggsave(filename = here::here("plots/metaanalysis/correlation_metaanalysis.png"),
plot = gg_cor, dpi = 400, height = 8, width = 9)
utils::sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] data.table_1.14.2 googledrive_2.0.0 ggplot2_3.3.5 dplyr_1.0.7
## [5] BiocStyle_2.22.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.7 here_1.0.1 assertthat_0.2.1
## [4] rprojroot_2.0.2 digest_0.6.28 utf8_1.2.2
## [7] R6_2.5.1 cellranger_1.1.0 evaluate_0.14
## [10] httr_1.4.2 highr_0.9 pillar_1.6.4
## [13] rlang_0.4.12 curl_4.3.2 readxl_1.3.1
## [16] rstudioapi_0.13 jquerylib_0.1.4 magick_2.7.3
## [19] DT_0.19 rmarkdown_2.11 textshaping_0.3.6
## [22] labeling_0.4.2 stringr_1.4.0 htmlwidgets_1.5.4
## [25] munsell_0.5.0 compiler_4.1.0 xfun_0.28
## [28] pkgconfig_2.0.3 askpass_1.1 systemfonts_1.0.3
## [31] htmltools_0.5.2 openssl_1.4.5 tidyselect_1.1.1
## [34] tibble_3.1.6 bookdown_0.24 fansi_0.5.0
## [37] crayon_1.4.2 withr_2.4.2 rappdirs_0.3.3
## [40] grid_4.1.0 jsonlite_1.7.2 gtable_0.3.0
## [43] lifecycle_1.0.1 DBI_1.1.1 magrittr_2.0.1
## [46] scales_1.1.1 cli_3.1.0 stringi_1.7.5
## [49] farver_2.1.0 fs_1.5.0 bslib_0.3.1
## [52] ellipsis_0.3.2 ragg_1.2.0 generics_0.1.1
## [55] vctrs_0.3.8 tools_4.1.0 glue_1.5.0
## [58] purrr_0.3.4 crosstalk_1.2.0 fastmap_1.1.0
## [61] yaml_2.2.1 colorspace_2.0-2 gargle_1.2.0
## [64] BiocManager_1.30.16 knitr_1.36 sass_0.4.0