knitr::opts_chunk$set(echo = TRUE, root.dir=here::here("/Desktop/PD_omics"))
# knitr::opts_knit$set(echo = TRUE, root.dir=here::here("/Desktop/PD_omics"))

library(dplyr)
library(tidyr)
library(ggplot2)
set.seed(2019)

Geneshot

Plot genes that come from entering “Parkinson’s” into Geneshot, which gathers associated gene names from PubMed literature.

replace_ENSP <- function(PD_genes,
                         gene_col="Gene"){ 
  inds <- which(startsWith(PD_genes[[gene_col]], "ENSP"))
  if(length(inds)>0){
    ensembl_ids <- PD_genes[[gene_col]][inds]
    conversion <- AnnotationDbi::mapIds(EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75,
                                        keys = ensembl_ids,
                                        keytype = "PROTEINID",
                                        column = "SYMBOL")
    PD_genes[inds,gene_col] <- unname(conversion)
    PD_genes <- PD_genes[!is.na(PD_genes[[gene_col]]),]
  } 
  return(PD_genes)
}

Associated genes

PD_genes <- data.table::fread(here::here("data/Geneshot/geneshot.parkinsons_disease.tsv")) %>% 
  dplyr::mutate(Zscore=scales::rescale(`Fraction of publications from total gene publication`)) %>%
  dplyr::select(Rank, Gene, Zscore) 
PD_genes <- replace_ENSP(PD_genes)

## Warning: Unable to map 8 of 38 requested IDs.

Predicted genes

gene_counts <- data.table::fread(here::here("data/Geneshot/geneshot.parkinsons_disease.tsv"))
PD_pred_genes <- data.table::fread(here::here("data/Geneshot/geneshot_predictions_AutoRIF.parkinsons_disease.tsv")) %>%
  dplyr::mutate(Zscore=scales::rescale(Score),
                Type="Geneshot") %>%  
  dplyr::select(ID=Rank,Name=Gene,Zscore,
                Genes="Genes contributing most to score:similarity score",
                Type) %>%
  dplyr::mutate(Type_ID=paste(Type,ID,sep="_")) %>% 
  merge(gene_counts, all.x = T, by.x = "Name", by.y = "Gene")

PD_pred_genes <- replace_ENSP(PD_pred_genes, gene_col = "Name")
  
PD_pred_genes_melt <- PD_pred_genes %>%
  tidyr::separate(col="Genes", 
                  sep = ";",
                  into = paste0("gene",1:9)) %>% 
  data.table::melt.data.table(id.vars = c("ID","Name","Zscore","Type","Type_ID","Rank","Publication count",
                                          "Fraction of publications from total gene publication")) %>% 
  tidyr::separate(col="value",sep = ":", into=c("gene2","similarity")) %>%
  subset(!is.na(gene2))

## Warning: Expected 9 pieces. Missing pieces filled with `NA` in 38 rows [7,
## 20, 28, 29, 32, 38, 40, 50, 69, 72, 88, 89, 93, 94, 107, 110, 111, 121, 122,
## 132, ...].

Geneshot UMAP

Density plots tutorial

Constuct matrix

EXP <- reshape2::dcast(PD_pred_genes_melt,
                       formula = Name ~ gene2, 
                       value.var = "similarity", 
                       fill = 0) %>%
  tibble::column_to_rownames("Name")  %>%
  as.matrix() %>%
  as("sparseMatrix")

Run UMAP

library(network)

## 
## 'network' 1.17.1 (2021-06-12), part of the Statnet Project
## * 'news(package="network")' for changes since last version
## * 'citation("network")' for citation information
## * 'https://statnet.org' for help, support, and other information

library(ggnetwork)
library(knn.covertree)
 

# seurat <- Seurat::CreateSeuratObject(as(t(EXP), "sparseMatrix"), 
#                                      meta.data = data.frame(PD_pred_genes,
#                                                             row.names = PD_pred_genes$Name))
# seurat <- Seurat::NormalizeData(seurat)
# seurat <- Seurat::FindVariableFeatures(seurat)
# seurat <- Seurat::ScaleData(seurat)
# seurat <- Seurat::RunPCA(seurat, npcs=50)
# seurat <- Seurat::FindNeighbors(seurat, force.recalc = T)
# seurat <- Seurat::RunUMAP(seurat, dims=1:20)
# seurat <- Seurat::FindClusters(seurat)
# Seurat::DimPlot(seurat, group.by = "Zscore") + Seurat::NoLegend()

#### UMAP ####
umap_res <- uwot::umap(X = as.matrix(EXP), n_components = 2,
                       ret_extra = c("model","nn","fgraph"))# %>% data.frame() 
umap_df <- umap_res$embedding %>% 
  `colnames<-`(paste0("UMAP",1:ncol(umap_res$embedding)))

#### KNN ####
knn_res <- knn.covertree::find_knn(umap_df, k=3)
# dim(knn_res$dist_mat)
adjGraph <- igraph::graph_from_adjacency_matrix(knn_res$dist_mat,#umap_res$fgraph, 
                                                mode = "undirected", 
                                                weighted = T)
clust <- igraph::cluster_louvain(adjGraph)

Plot UMAP

2D

#### Annotated UMAP ####
umap_dat <- merge(PD_pred_genes, 
                  cbind(data.frame(umap_df),
                        Name = row.names(EXP),
                        cluster=clust$membership),
                  by="Name") %>% 
  dplyr::arrange(desc(Zscore)) 

 
label_genes <- umap_dat %>% 
  dplyr::group_by(cluster) %>%
  dplyr::slice_head(n = 5)

#### plot ####
gg_umap <- ggplot(umap_dat, aes(x=UMAP1, y=UMAP2, color=Zscore, 
                                # size=`Publication count`, 
                                size=Zscore*2,
                                label=Name)) +   
  stat_density_2d(aes(fill = ..level..), geom = "polygon",adjust = .2, contour_var = "ndensity") +
   scale_fill_distiller(palette=4, direction=-1) +
  scale_x_continuous(expand = c(0.1, 0.1)) +
  scale_y_continuous(expand = c(0.1, 0.1)) + 
   # stat_density_2d_filled(show.cegend = F, contour_var = "ndensity",
   #                       # linejoin = "bevel",
   #                       # alpha=.8,
   #                       breaks = seq(0.1, 1.0, length.out = 19)) +
    # scale_fill_manual(values = c("black",pals::inferno(n = 18))) +
   geom_point(alpha=.8) + 
  scale_color_gradientn(colours = c("magenta","blue")) +
  ggrepel::geom_label_repel(data = label_genes, max.overlaps = 20,na.rm = T,
                            aes(x=UMAP1, y=UMAP2, label=Name),
                            fill = alpha(c("white"),0.9),
                            show.legend = F) +
  theme_void() + 
   theme(plot.background = element_rect(fill = "black"),
         panel.background = element_rect(fill = "black"),
         legend.text = element_text(color="white"),
         legend.title = element_text(color="white"))  
print(gg_umap)

#### Save ####
# ggsave(here::here("plots/geneshot_UMAP.jpg"), gg_umap, dpi = 300, height = 8, width = 10)
# ggsave(here::here("plots/geneshot_UMAP.pdf"),gg_umap, dpi = 300, height = 8, width = 10)

2D interactive

plotly::ggplotly(gg_umap)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomLabelRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

3D

Except this is like the one case where Rayshader doesn’t work…

library(rayshader) 

rgl::clear3d()
plot_gg(gg_umap, width = 5, height = 5,
        multicore = TRUE, scale = 250, 
        zoom = 0.7, theta = 10, phi = 30, windowsize = c(800, 800))

Similarity networks

Experimented with similarity networks of gene-gene data. A bit unncessary since UMAP constructs networks anyways.

library(BUS)

#### Construct similarity matrix using correlation (corr) or mutual information (MI)
sM <- BUS::gene.similarity(EXP, measure="corr",net.trim="none")
pM <- BUS::gene.pvalue(EXP, measure="corr",net.trim="none") 
netM <- BUS::pred.network(pM, sM)
netM[netM==0] <- NA

#### Alternative plotting method with iGraph ####
# adjGraph <- igraph::graph_from_adjacency_matrix(sM, weighted = T, mode = "undirected")
# clusts <- igraph::cluster_fast_greedy(adjGraph)
# plot(adjGraph)

#### Construct network object ####
edges <- reshape2::melt(netM) %>% `colnames<-`(c("from","to","weight"))
nodes <- predictions[,c("Gene","Rank","Score")]
net <- network(edges, vertex.attr = nodes, matrix.type = "edgelist", ignore.eval = F) 
# help(layout)

netF <- ggnetwork(net,  
                  layout = "fruchtermanreingold",
                  cell.jitter = 0)
netF <- netF[!is.na(netF$weight),]
netF <- netF[netF$weight>.005,]

ggplot(netF, 
       aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges(aes(alpha=weight), color = "black", na.rm = T) +
  geom_nodes(aes(size=Score), color = "grey") +
  geom_nodetext(aes(color = Score, label = Gene, size=Score),
                fontface = "bold", nudge_y = .05) +
  theme_blank()

Wordcloud

Turn genes into word cloud.

ggwordcloud tutorial

library(ggwordcloud) 

ggplot(PD_genes[1:100,], aes(label = Gene, size = Zscore, color=Zscore)) +
  geom_text_wordcloud_area() + 
  theme_minimal()

Session info

utils::sessionInfo()

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggwordcloud_0.5.0 knn.covertree_1.0 ggnetwork_0.5.10  network_1.17.1   
## [5] ggplot2_3.3.5     tidyr_1.1.3       dplyr_1.0.7      
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-2            RcppEigen_0.3.3.9.1        
##   [3] rjson_0.2.20                ellipsis_0.3.2             
##   [5] rprojroot_2.0.2             XVector_0.33.0             
##   [7] GenomicRanges_1.45.0        farver_2.1.0               
##   [9] ggrepel_0.9.1               bit64_4.0.5                
##  [11] AnnotationDbi_1.55.1        RSpectra_0.16-0            
##  [13] fansi_0.5.0                 xml2_1.3.2                 
##  [15] codetools_0.2-18            cachem_1.0.5               
##  [17] knitr_1.33                  jsonlite_1.7.2             
##  [19] Rsamtools_2.9.1             dbplyr_2.1.1               
##  [21] png_0.1-7                   uwot_0.1.10.9000           
##  [23] compiler_4.1.0              httr_1.4.2                 
##  [25] assertthat_0.2.1            Matrix_1.3-4               
##  [27] fastmap_1.1.0               lazyeval_0.2.2             
##  [29] htmltools_0.5.1.1           prettyunits_1.1.1          
##  [31] tools_4.1.0                 igraph_1.2.6               
##  [33] coda_0.19-4                 gtable_0.3.0               
##  [35] glue_1.4.2                  GenomeInfoDbData_1.2.6     
##  [37] reshape2_1.4.4              rappdirs_0.3.3             
##  [39] Rcpp_1.0.7                  Biobase_2.53.0             
##  [41] statnet.common_4.5.0        jquerylib_0.1.4            
##  [43] vctrs_0.3.8                 Biostrings_2.61.2          
##  [45] rtracklayer_1.53.1          crosstalk_1.1.1            
##  [47] xfun_0.25                   stringr_1.4.0              
##  [49] lifecycle_1.0.0             restfulr_0.0.13            
##  [51] ensembldb_2.17.4            XML_3.99-0.7               
##  [53] zlibbioc_1.39.0             MASS_7.3-54                
##  [55] scales_1.1.1                hms_1.1.0                  
##  [57] MatrixGenerics_1.5.3        ProtGenerics_1.25.1        
##  [59] parallel_4.1.0              SummarizedExperiment_1.23.1
##  [61] AnnotationFilter_1.17.1     RColorBrewer_1.1-2         
##  [63] yaml_2.2.1                  curl_4.3.2                 
##  [65] memoise_2.0.0               sass_0.4.0                 
##  [67] biomaRt_2.49.4              stringi_1.7.3              
##  [69] RSQLite_2.2.7               highr_0.9                  
##  [71] S4Vectors_0.31.0            BiocIO_1.3.0               
##  [73] GenomicFeatures_1.45.1      BiocGenerics_0.39.1        
##  [75] filelock_1.0.2              BiocParallel_1.27.3        
##  [77] GenomeInfoDb_1.29.3         rlang_0.4.11               
##  [79] pkgconfig_2.0.3             matrixStats_0.60.0         
##  [81] bitops_1.0-7                evaluate_0.14              
##  [83] lattice_0.20-44             purrr_0.3.4                
##  [85] htmlwidgets_1.5.3           GenomicAlignments_1.29.0   
##  [87] labeling_0.4.2              bit_4.0.4                  
##  [89] tidyselect_1.1.1            here_1.0.1                 
##  [91] RcppAnnoy_0.0.19            plyr_1.8.6                 
##  [93] magrittr_2.0.1              R6_2.5.0                   
##  [95] IRanges_2.27.0              generics_0.1.0             
##  [97] DelayedArray_0.19.1         DBI_1.1.1                  
##  [99] pillar_1.6.2                withr_2.4.2                
## [101] KEGGREST_1.33.0             RCurl_1.98-1.4             
## [103] tibble_3.1.3                crayon_1.4.1               
## [105] utf8_1.2.2                  plotly_4.9.4.9000          
## [107] BiocFileCache_2.1.1         rmarkdown_2.10             
## [109] progress_1.2.2              grid_4.1.0                 
## [111] isoband_0.2.5               data.table_1.14.0          
## [113] blob_1.2.2                  digest_0.6.27              
## [115] EnsDb.Hsapiens.v75_2.99.0   stats4_4.1.0               
## [117] munsell_0.5.0               viridisLite_0.4.0          
## [119] bslib_0.2.5.1

geneshot

Author

2021-08-22