::opts_chunk$set(echo = TRUE, root.dir=here::here("/Desktop/PD_omics"))
knitr# knitr::opts_knit$set(echo = TRUE, root.dir=here::here("/Desktop/PD_omics"))
library(dplyr)
library(tidyr)
library(ggplot2)
set.seed(2019)
Plot genes that come from entering “Parkinson’s” into Geneshot, which gathers associated gene names from PubMed literature.
<- function(PD_genes,
replace_ENSP gene_col="Gene"){
<- which(startsWith(PD_genes[[gene_col]], "ENSP"))
inds if(length(inds)>0){
<- PD_genes[[gene_col]][inds]
ensembl_ids <- AnnotationDbi::mapIds(EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75,
conversion keys = ensembl_ids,
keytype = "PROTEINID",
column = "SYMBOL")
<- unname(conversion)
PD_genes[inds,gene_col] <- PD_genes[!is.na(PD_genes[[gene_col]]),]
PD_genes
} return(PD_genes)
}
<- data.table::fread(here::here("data/Geneshot/geneshot.parkinsons_disease.tsv")) %>%
PD_genes ::mutate(Zscore=scales::rescale(`Fraction of publications from total gene publication`)) %>%
dplyr::select(Rank, Gene, Zscore)
dplyr<- replace_ENSP(PD_genes) PD_genes
## Warning: Unable to map 8 of 38 requested IDs.
<- data.table::fread(here::here("data/Geneshot/geneshot.parkinsons_disease.tsv"))
gene_counts <- data.table::fread(here::here("data/Geneshot/geneshot_predictions_AutoRIF.parkinsons_disease.tsv")) %>%
PD_pred_genes ::mutate(Zscore=scales::rescale(Score),
dplyrType="Geneshot") %>%
::select(ID=Rank,Name=Gene,Zscore,
dplyrGenes="Genes contributing most to score:similarity score",
%>%
Type) ::mutate(Type_ID=paste(Type,ID,sep="_")) %>%
dplyrmerge(gene_counts, all.x = T, by.x = "Name", by.y = "Gene")
<- replace_ENSP(PD_pred_genes, gene_col = "Name")
PD_pred_genes
<- PD_pred_genes %>%
PD_pred_genes_melt ::separate(col="Genes",
tidyrsep = ";",
into = paste0("gene",1:9)) %>%
::melt.data.table(id.vars = c("ID","Name","Zscore","Type","Type_ID","Rank","Publication count",
data.table"Fraction of publications from total gene publication")) %>%
::separate(col="value",sep = ":", into=c("gene2","similarity")) %>%
tidyrsubset(!is.na(gene2))
## Warning: Expected 9 pieces. Missing pieces filled with `NA` in 38 rows [7,
## 20, 28, 29, 32, 38, 40, 50, 69, 72, 88, 89, 93, 94, 107, 110, 111, 121, 122,
## 132, ...].
<- reshape2::dcast(PD_pred_genes_melt,
EXP formula = Name ~ gene2,
value.var = "similarity",
fill = 0) %>%
::column_to_rownames("Name") %>%
tibbleas.matrix() %>%
as("sparseMatrix")
library(network)
##
## 'network' 1.17.1 (2021-06-12), part of the Statnet Project
## * 'news(package="network")' for changes since last version
## * 'citation("network")' for citation information
## * 'https://statnet.org' for help, support, and other information
library(ggnetwork)
library(knn.covertree)
# seurat <- Seurat::CreateSeuratObject(as(t(EXP), "sparseMatrix"),
# meta.data = data.frame(PD_pred_genes,
# row.names = PD_pred_genes$Name))
# seurat <- Seurat::NormalizeData(seurat)
# seurat <- Seurat::FindVariableFeatures(seurat)
# seurat <- Seurat::ScaleData(seurat)
# seurat <- Seurat::RunPCA(seurat, npcs=50)
# seurat <- Seurat::FindNeighbors(seurat, force.recalc = T)
# seurat <- Seurat::RunUMAP(seurat, dims=1:20)
# seurat <- Seurat::FindClusters(seurat)
# Seurat::DimPlot(seurat, group.by = "Zscore") + Seurat::NoLegend()
#### UMAP ####
<- uwot::umap(X = as.matrix(EXP), n_components = 2,
umap_res ret_extra = c("model","nn","fgraph"))# %>% data.frame()
<- umap_res$embedding %>%
umap_df `colnames<-`(paste0("UMAP",1:ncol(umap_res$embedding)))
#### KNN ####
<- knn.covertree::find_knn(umap_df, k=3)
knn_res # dim(knn_res$dist_mat)
<- igraph::graph_from_adjacency_matrix(knn_res$dist_mat,#umap_res$fgraph,
adjGraph mode = "undirected",
weighted = T)
<- igraph::cluster_louvain(adjGraph) clust
#### Annotated UMAP ####
<- merge(PD_pred_genes,
umap_dat cbind(data.frame(umap_df),
Name = row.names(EXP),
cluster=clust$membership),
by="Name") %>%
::arrange(desc(Zscore))
dplyr
<- umap_dat %>%
label_genes ::group_by(cluster) %>%
dplyr::slice_head(n = 5)
dplyr
#### plot ####
<- ggplot(umap_dat, aes(x=UMAP1, y=UMAP2, color=Zscore,
gg_umap # size=`Publication count`,
size=Zscore*2,
label=Name)) +
stat_density_2d(aes(fill = ..level..), geom = "polygon",adjust = .2, contour_var = "ndensity") +
scale_fill_distiller(palette=4, direction=-1) +
scale_x_continuous(expand = c(0.1, 0.1)) +
scale_y_continuous(expand = c(0.1, 0.1)) +
# stat_density_2d_filled(show.cegend = F, contour_var = "ndensity",
# # linejoin = "bevel",
# # alpha=.8,
# breaks = seq(0.1, 1.0, length.out = 19)) +
# scale_fill_manual(values = c("black",pals::inferno(n = 18))) +
geom_point(alpha=.8) +
scale_color_gradientn(colours = c("magenta","blue")) +
::geom_label_repel(data = label_genes, max.overlaps = 20,na.rm = T,
ggrepelaes(x=UMAP1, y=UMAP2, label=Name),
fill = alpha(c("white"),0.9),
show.legend = F) +
theme_void() +
theme(plot.background = element_rect(fill = "black"),
panel.background = element_rect(fill = "black"),
legend.text = element_text(color="white"),
legend.title = element_text(color="white"))
print(gg_umap)
#### Save ####
# ggsave(here::here("plots/geneshot_UMAP.jpg"), gg_umap, dpi = 300, height = 8, width = 10)
# ggsave(here::here("plots/geneshot_UMAP.pdf"),gg_umap, dpi = 300, height = 8, width = 10)
::ggplotly(gg_umap) plotly
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
Except this is like the one case where Rayshader doesn’t work…
library(rayshader)
::clear3d()
rglplot_gg(gg_umap, width = 5, height = 5,
multicore = TRUE, scale = 250,
zoom = 0.7, theta = 10, phi = 30, windowsize = c(800, 800))
Experimented with similarity networks of gene-gene data. A bit unncessary since UMAP constructs networks anyways.
library(BUS)
#### Construct similarity matrix using correlation (corr) or mutual information (MI)
<- BUS::gene.similarity(EXP, measure="corr",net.trim="none")
sM <- BUS::gene.pvalue(EXP, measure="corr",net.trim="none")
pM <- BUS::pred.network(pM, sM)
netM ==0] <- NA
netM[netM
#### Alternative plotting method with iGraph ####
# adjGraph <- igraph::graph_from_adjacency_matrix(sM, weighted = T, mode = "undirected")
# clusts <- igraph::cluster_fast_greedy(adjGraph)
# plot(adjGraph)
#### Construct network object ####
<- reshape2::melt(netM) %>% `colnames<-`(c("from","to","weight"))
edges <- predictions[,c("Gene","Rank","Score")]
nodes <- network(edges, vertex.attr = nodes, matrix.type = "edgelist", ignore.eval = F)
net # help(layout)
<- ggnetwork(net,
netF layout = "fruchtermanreingold",
cell.jitter = 0)
<- netF[!is.na(netF$weight),]
netF <- netF[netF$weight>.005,]
netF
ggplot(netF,
aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(aes(alpha=weight), color = "black", na.rm = T) +
geom_nodes(aes(size=Score), color = "grey") +
geom_nodetext(aes(color = Score, label = Gene, size=Score),
fontface = "bold", nudge_y = .05) +
theme_blank()
Turn genes into word cloud.
library(ggwordcloud)
ggplot(PD_genes[1:100,], aes(label = Gene, size = Zscore, color=Zscore)) +
geom_text_wordcloud_area() +
theme_minimal()
::sessionInfo() utils
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggwordcloud_0.5.0 knn.covertree_1.0 ggnetwork_0.5.10 network_1.17.1
## [5] ggplot2_3.3.5 tidyr_1.1.3 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-2 RcppEigen_0.3.3.9.1
## [3] rjson_0.2.20 ellipsis_0.3.2
## [5] rprojroot_2.0.2 XVector_0.33.0
## [7] GenomicRanges_1.45.0 farver_2.1.0
## [9] ggrepel_0.9.1 bit64_4.0.5
## [11] AnnotationDbi_1.55.1 RSpectra_0.16-0
## [13] fansi_0.5.0 xml2_1.3.2
## [15] codetools_0.2-18 cachem_1.0.5
## [17] knitr_1.33 jsonlite_1.7.2
## [19] Rsamtools_2.9.1 dbplyr_2.1.1
## [21] png_0.1-7 uwot_0.1.10.9000
## [23] compiler_4.1.0 httr_1.4.2
## [25] assertthat_0.2.1 Matrix_1.3-4
## [27] fastmap_1.1.0 lazyeval_0.2.2
## [29] htmltools_0.5.1.1 prettyunits_1.1.1
## [31] tools_4.1.0 igraph_1.2.6
## [33] coda_0.19-4 gtable_0.3.0
## [35] glue_1.4.2 GenomeInfoDbData_1.2.6
## [37] reshape2_1.4.4 rappdirs_0.3.3
## [39] Rcpp_1.0.7 Biobase_2.53.0
## [41] statnet.common_4.5.0 jquerylib_0.1.4
## [43] vctrs_0.3.8 Biostrings_2.61.2
## [45] rtracklayer_1.53.1 crosstalk_1.1.1
## [47] xfun_0.25 stringr_1.4.0
## [49] lifecycle_1.0.0 restfulr_0.0.13
## [51] ensembldb_2.17.4 XML_3.99-0.7
## [53] zlibbioc_1.39.0 MASS_7.3-54
## [55] scales_1.1.1 hms_1.1.0
## [57] MatrixGenerics_1.5.3 ProtGenerics_1.25.1
## [59] parallel_4.1.0 SummarizedExperiment_1.23.1
## [61] AnnotationFilter_1.17.1 RColorBrewer_1.1-2
## [63] yaml_2.2.1 curl_4.3.2
## [65] memoise_2.0.0 sass_0.4.0
## [67] biomaRt_2.49.4 stringi_1.7.3
## [69] RSQLite_2.2.7 highr_0.9
## [71] S4Vectors_0.31.0 BiocIO_1.3.0
## [73] GenomicFeatures_1.45.1 BiocGenerics_0.39.1
## [75] filelock_1.0.2 BiocParallel_1.27.3
## [77] GenomeInfoDb_1.29.3 rlang_0.4.11
## [79] pkgconfig_2.0.3 matrixStats_0.60.0
## [81] bitops_1.0-7 evaluate_0.14
## [83] lattice_0.20-44 purrr_0.3.4
## [85] htmlwidgets_1.5.3 GenomicAlignments_1.29.0
## [87] labeling_0.4.2 bit_4.0.4
## [89] tidyselect_1.1.1 here_1.0.1
## [91] RcppAnnoy_0.0.19 plyr_1.8.6
## [93] magrittr_2.0.1 R6_2.5.0
## [95] IRanges_2.27.0 generics_0.1.0
## [97] DelayedArray_0.19.1 DBI_1.1.1
## [99] pillar_1.6.2 withr_2.4.2
## [101] KEGGREST_1.33.0 RCurl_1.98-1.4
## [103] tibble_3.1.3 crayon_1.4.1
## [105] utf8_1.2.2 plotly_4.9.4.9000
## [107] BiocFileCache_2.1.1 rmarkdown_2.10
## [109] progress_1.2.2 grid_4.1.0
## [111] isoband_0.2.5 data.table_1.14.0
## [113] blob_1.2.2 digest_0.6.27
## [115] EnsDb.Hsapiens.v75_2.99.0 stats4_4.1.0
## [117] munsell_0.5.0 viridisLite_0.4.0
## [119] bslib_0.2.5.1