vignettes/finemapping_portal.Rmd
finemapping_portal.Rmd
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## ⠊⠉⠡⣀⣀⠊⠉⠡⣀⣀⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠
## ⠌⢁⡐⠉⣀⠊⢂⡐⠑⣀⠊⢂⡐⠑⣀⠊⢂⡐⠑⣀⠊⢂⡐⠑⣀⠊⢂⡐⠑⣀⠉⢂⡈⠑⣀⠉⢄⡈⠡⣀
## ⠌⡈⡐⢂⢁⠒⡈⡐⢂⢁⠒⡈⡐⢂⢁⠑⡈⡈⢄⢁⠡⠌⡈⠤⢁⠡⠌⡈⠤⢁⠡⠌⡈⡠⢁⢁⠊⡈⡐⢂
##
## ── 🦇 🦇 🦇 e c h o l o c a t o R 🦇 🦇 🦇 ─────────────────────────────────
##
## ── v2.0.3 ──────────────────────────────────────────────────────────────────────
## ⠌⡈⡐⢂⢁⠒⡈⡐⢂⢁⠒⡈⡐⢂⢁⠑⡈⡈⢄⢁⠡⠌⡈⠤⢁⠡⠌⡈⠤⢁⠡⠌⡈⡠⢁⢁⠊⡈⡐⢂
## ⠌⢁⡐⠉⣀⠊⢂⡐⠑⣀⠊⢂⡐⠑⣀⠊⢂⡐⠑⣀⠊⢂⡐⠑⣀⠊⢂⡐⠑⣀⠉⢂⡈⠑⣀⠉⢄⡈⠡⣀
## ⠊⠉⠡⣀⣀⠊⠉⠡⣀⣀⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠⠊⠉⠢⣀⡠
## ⓞ If you use echolocatoR or any of the echoverse subpackages, please cite:
## ▶ Brian M Schilder, Jack Humphrey, Towfique
## Raj (2021) echolocatoR: an automated
## end-to-end statistical and functional
## genomic fine-mapping pipeline,
## Bioinformatics; btab658,
## https://doi.org/10.1093/bioinformatics/btab658
## ⓞ Please report any bugs/feature requests on GitHub:
## ▶
## https://github.com/RajLabMSSM/echolocatoR/issues
## ⓞ Contributions are welcome!:
## ▶
## https://github.com/RajLabMSSM/echolocatoR/pulls
##
## ────────────────────────────────────────────────────────────────────────────────
Here we take advantage of the fine-mapping results files already available on the echolocatoR Fine-mapping Portal.
## Limit the number of loci for demo purposes
loci <- echodata::topSNPs_Nalls2019$Locus[1:3]
We can query multiple studies, loci, and even data types at once.
When as_datatable=TRUE
, all file paths and metadata are
conveniently organized into a data.table.
results_dir <- tempdir()
local_files <- echodata::portal_query(dataset_types="GWAS",
phenotypes = c("parkinson"),
file_types = c("multi_finemap","LD"),
loci = loci,
LD_panels=c("UKB"),
results_dir = results_dir,
as_datatable = TRUE)
## Fetching echolocatoR Fine-mapping Portal study metadata.
## + 1 dataset(s) remain after filtering.
## + Searching for multi_finemap files...
## OK (HTTP 200).+ Searching for LD files...
## OK (HTTP 200).+ 6 unique files identified.
## + Downloading 4 files...
## + Returning table with local file paths.
Next, we can gather all of the fine-mapping results generated by
finemap_loci()
previously.merge_finemapping_results
recursively searches for the
correct files within a hierarchical folder structure and imports only
the multi-finemap files.
merged_DT <- echodata::merge_finemapping_results(dataset = results_dir,
minimum_support = 0,
include_leadSNPs = TRUE,
consensus_thresh = 2)
## + Gathering all fine-mapping results from storage...
## + 2 multi-finemap files found.
## + Importing results... ASXL3
## + Importing results... BIN3
## Identifying Consensus SNPs...
## + support_thresh = 2
## + Calculating mean Posterior Probability (mean.PP)...
## + 4 fine-mapping methods used.
## + 17 Credible Set SNPs identified.
## + 6 Consensus SNPs identified.
## + Saving merged results ==> /tmp/RtmpJQtSZ8/file217973ffe623merged_results.csv.gz
echodata::results_report(merged_DT)
## echolocatoR results report (all loci):
## + Overall report:
## ++ 2 Loci.
## ++ 10605 SNPs.
## + Lead SNP report:
## ++ 2 lead SNPs.
## ++ Lead SNP mean PP = 0.5
## + Union Credible Set report:
## ++ 17 UCS SNPs.
## ++ UCS mean PP = 0.35
## ++ 2 UCS SNPs that are also lead SNPs
## + Consensus SNP report:
## ++ 6 Consensus SNPs.
## ++ Consensus SNP mean PP = 0.54
## ++ 1 Consensus SNPs that are also lead SNPs
Next, we import the a subset of the LD matrices for only the lead SNP.
ld_files <- local_files[file_type=="LD",]
ld_matrices <- lapply(stats::setNames(ld_files$local_file,
ld_files$locus),
function(x){
data.table::fread(x)
})
knitr::kable(head(ld_matrices$ASXL3))
SNP | rs1941685 | rs1941685.1 |
---|---|---|
rs12968480 | 0.0396723 | 0.0396723 |
rs12967667 | 0.0424208 | 0.0424208 |
rs9945156 | 0.0424096 | 0.0424096 |
rs1851700 | 0.0419115 | 0.0419115 |
rs117840441 | 0.0664300 | 0.0664300 |
rs1523592 | 0.0424137 | 0.0424137 |
Now let’s plot one locus as an example.
locus <- unique(merged_DT$Locus)[1] # Pick the first locus
dat <- merged_DT[Locus==locus,]
LD_matrix = ld_matrices[[locus]]
locus_dir <- file.path(tempdir(),locus)
plt <- echoplot::plot_locus(dat = dat,
LD_matrix = LD_matrix,
LD_reference = "UKB",
locus_dir = locus_dir,
nott_epigenome = TRUE,
nott_regulatory_rects = TRUE,
nott_show_placseq = TRUE,
zoom = c("20x"))
utils::sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] echolocatoR_2.0.3 BiocStyle_2.26.0
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 rtracklayer_1.58.0
## [3] GGally_2.1.2 R.methodsS3_1.8.2
## [5] ragg_1.2.4 tidyr_1.2.1
## [7] echoLD_0.99.8 ggplot2_3.3.6
## [9] bit64_4.0.5 knitr_1.40
## [11] irlba_2.3.5.1 DelayedArray_0.24.0
## [13] R.utils_2.12.1 data.table_1.14.4
## [15] rpart_4.1.19 KEGGREST_1.38.0
## [17] RCurl_1.98-1.9 AnnotationFilter_1.22.0
## [19] generics_0.1.3 BiocGenerics_0.44.0
## [21] GenomicFeatures_1.50.2 RSQLite_2.2.18
## [23] proxy_0.4-27 bit_4.0.4
## [25] tzdb_0.3.0 xml2_1.3.3
## [27] SummarizedExperiment_1.28.0 assertthat_0.2.1
## [29] viridis_0.6.2 xfun_0.34
## [31] hms_1.1.2 jquerylib_0.1.4
## [33] evaluate_0.17 fansi_1.0.3
## [35] restfulr_0.0.15 progress_1.2.2
## [37] dbplyr_2.2.1 readxl_1.4.1
## [39] Rgraphviz_2.41.2 igraph_1.3.5
## [41] DBI_1.1.3 htmlwidgets_1.5.4
## [43] reshape_0.8.9 downloadR_0.99.5
## [45] stats4_4.2.1 purrr_0.3.5
## [47] ellipsis_0.3.2 dplyr_1.0.10
## [49] backports_1.4.1 bookdown_0.29
## [51] biomaRt_2.54.0 deldir_1.0-6
## [53] MatrixGenerics_1.10.0 vctrs_0.5.0
## [55] Biobase_2.58.0 ensembldb_2.22.0
## [57] cachem_1.0.6 withr_2.5.0
## [59] BSgenome_1.66.1 checkmate_2.1.0
## [61] GenomicAlignments_1.34.0 prettyunits_1.1.1
## [63] cluster_2.1.4 ape_5.6-2
## [65] dir.expiry_1.5.1 lazyeval_0.2.2
## [67] crayon_1.5.2 basilisk.utils_1.9.4
## [69] crul_1.3 pkgconfig_2.0.3
## [71] GenomeInfoDb_1.34.1 nlme_3.1-160
## [73] ProtGenerics_1.30.0 XGR_1.1.8
## [75] nnet_7.3-18 pals_1.7
## [77] rlang_1.0.6 lifecycle_1.0.3
## [79] filelock_1.0.2 httpcode_0.3.0
## [81] BiocFileCache_2.6.0 echotabix_0.99.8
## [83] dichromat_2.0-0.1 cellranger_1.1.0
## [85] coloc_5.1.0.1 rprojroot_2.0.3
## [87] matrixStats_0.62.0 graph_1.76.0
## [89] Matrix_1.5-1 osfr_0.2.9
## [91] boot_1.3-28 base64enc_0.1-3
## [93] png_0.1-7 viridisLite_0.4.1
## [95] rjson_0.2.21 rootSolve_1.8.2.3
## [97] bitops_1.0-7 R.oo_1.25.0
## [99] ggnetwork_0.5.10 Biostrings_2.66.0
## [101] blob_1.2.3 mixsqp_0.3-43
## [103] stringr_1.4.1 echoplot_0.99.5
## [105] dnet_1.1.7 readr_2.1.3
## [107] jpeg_0.1-9 S4Vectors_0.36.0
## [109] echodata_0.99.15 scales_1.2.1
## [111] memoise_2.0.1 magrittr_2.0.3
## [113] plyr_1.8.7 hexbin_1.28.2
## [115] zlibbioc_1.44.0 compiler_4.2.1
## [117] echoconda_0.99.8 BiocIO_1.8.0
## [119] RColorBrewer_1.1-3 catalogueR_1.0.0
## [121] Rsamtools_2.14.0 cli_3.4.1
## [123] XVector_0.38.0 echoannot_0.99.10
## [125] patchwork_1.1.2 htmlTable_2.4.1
## [127] Formula_1.2-4 MASS_7.3-58.1
## [129] tidyselect_1.2.0 stringi_1.7.8
## [131] textshaping_0.3.6 highr_0.9
## [133] yaml_2.3.6 supraHex_1.35.0
## [135] latticeExtra_0.6-30 ggrepel_0.9.1
## [137] grid_4.2.1 sass_0.4.2
## [139] VariantAnnotation_1.44.0 tools_4.2.1
## [141] lmom_2.9 parallel_4.2.1
## [143] rstudioapi_0.14 foreign_0.8-83
## [145] piggyback_0.1.4 gridExtra_2.3
## [147] gld_2.6.6 digest_0.6.30
## [149] snpStats_1.47.1 BiocManager_1.30.19
## [151] Rcpp_1.0.9 GenomicRanges_1.50.0
## [153] OrganismDbi_1.40.0 httr_1.4.4
## [155] AnnotationDbi_1.60.0 RCircos_1.2.2
## [157] ggbio_1.46.0 biovizBase_1.46.0
## [159] colorspace_2.0-3 XML_3.99-0.12
## [161] fs_1.5.2 reticulate_1.26
## [163] IRanges_2.32.0 splines_4.2.1
## [165] RBGL_1.74.0 expm_0.999-6
## [167] pkgdown_2.0.6 echofinemap_0.99.4
## [169] basilisk_1.9.12 Exact_3.2
## [171] mapproj_1.2.9 systemfonts_1.0.4
## [173] jsonlite_1.8.3 susieR_0.12.27
## [175] R6_2.5.1 Hmisc_4.7-1
## [177] pillar_1.8.1 htmltools_0.5.3
## [179] glue_1.6.2 fastmap_1.1.0
## [181] DT_0.26 BiocParallel_1.32.0
## [183] class_7.3-20 codetools_0.2-18
## [185] maps_3.4.1 mvtnorm_1.1-3
## [187] utf8_1.2.2 lattice_0.20-45
## [189] bslib_0.4.0 tibble_3.1.8
## [191] curl_4.3.3 DescTools_0.99.47
## [193] zip_2.2.2 openxlsx_4.2.5.1
## [195] interp_1.1-3 survival_3.4-0
## [197] rmarkdown_2.17 desc_1.4.2
## [199] munsell_0.5.0 e1071_1.7-12
## [201] GenomeInfoDbData_1.2.9 reshape2_1.4.4
## [203] gtable_0.3.1