knitr::opts_chunk$set(echo = TRUE, root.dir=here::here("/Desktop/PD_omics"))
# knitr::opts_knit$set(echo = TRUE, root.dir=here::here("/Desktop/PD_omics"))

library(dplyr)
library(ggplot2)
library(orthogene)
set.seed(2019)

Cell-types / Tissues/ Pathways

Use the genes from GeneShot to then check for similarity with other term-associated gene sets.

CellMeSH

Preprocess data

cellmarker <- data.table::fread(here::here("data/UNCURL/PD_terms/cellmarker.csv"), select = 1:4) %>% 
  dplyr::mutate(Zscore=scales::rescale(-log(`P-value`),c(0,1))) %>%
  dplyr::select(ID=`Cell Ontology ID`, Name=`Cell`, Zscore, Genes=`Overlapping Genes`)%>%
  dplyr::mutate(Type="CellMarker")


cellmesh <- data.table::fread(here::here("data/UNCURL/PD_terms/cellmesh.csv"), select = 1:4) %>% 
  dplyr::mutate(Zscore=scales::rescale(`Log-likelihood`,c(0,1))) %>%
  dplyr::select(ID=`MeSH ID`, Name=`Cell Name`, Zscore, Genes=`Overlapping Genes`)%>%
  dplyr::mutate(Type="CellMeSH")

cellmesh_anatomy <- data.table::fread(here::here("data/UNCURL/PD_terms/cellmesh-anatomy.csv"), select = 1:4) %>%
  dplyr::mutate(Zscore=scales::rescale(`Log-likelihood`,c(0,1))) %>%
  dplyr::select(ID=`MeSH ID`, Name=`Cell Name`, Zscore, Genes=`Overlapping Genes`) %>%
  # Lots of celltypes in the "anatomy" section
  subset(!grepl("neuron|astrocyte|fibroblast|epithelial|endothelial|macrophages|monocytes|lymphocytes|leukocytes",
                tolower(Name))) %>%
  dplyr::mutate(Type="Anatomy")

GO <- data.table::fread(here::here("data/UNCURL/PD_terms/geneontology.csv"), select = 1:4) %>%
  dplyr::mutate(Zscore=scales::rescale(-log10(FDR),c(0,1))) %>%
  subset(FDR<.05) %>%
  dplyr::select(ID=`GO ID`, Name, Zscore, Genes=`Overlapping Genes`) %>%
  subset(!grepl("neuron|dendrite|axon",tolower(Name))) %>%
  dplyr::mutate(Type="GO")

annot <- rbind(cellmarker, cellmesh, cellmesh_anatomy, GO) %>% 
   dplyr::mutate(Type_ID=paste(Type, ID, sep="_"))%>% 
   dplyr::arrange(desc(Zscore)) %>% 
   dplyr::group_by(Type) %>%  
   dplyr::slice_head(n = 20) %>%
  data.table::data.table()
annot %>% dplyr::group_by(Type) %>% count()

## # A tibble: 4 × 2
## # Groups:   Type [4]
##   Type           n
##   <chr>      <int>
## 1 Anatomy       10
## 2 CellMarker    19
## 3 CellMeSH      19
## 4 GO            20

{
  annot_genes <- tidyr::separate(annot,col = "Genes", sep = ", ", 
                               into=paste0("gene",1:100)) %>%
  data.table::melt.data.table(id.vars = c("Type_ID","Type","ID","Name","Zscore"), 
                              value.name = "Gene")  %>%
    subset(Type=="GO" & !is.na(Gene)) %>%
    dplyr::select(-variable)
    
  gene_map <- orthogene::map_genes(genes = annot_genes$Gene, 
                                   mthreshold = 1,
                                   drop_na = TRUE) 
  gene_dict <- setNames(gene_map$name, gene_map$input)
  annot_genes$Gene <- gene_dict[annot_genes$Gene] 
}

## Warning: Expected 100 pieces. Additional pieces discarded in 48 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 18, 30, 31, 32, 33, 34, 35, 36, 37, 38, ...].

## Warning: Expected 100 pieces. Missing pieces filled with `NA` in 20 rows [11,
## 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 66, 67].

## Using stored `gprofiler_orgs`.

## Mapping species name: hsapiens

## 1 organism identified from search: hsapiens

## Extracting genes from name.

## 1,963 / 1,963 (100%) genes mapped.

Import Geneshot data

Import data produced in geneshot.Rmd.

PD_pred_genes_melt <- readRDS(here::here("data/Geneshot/PD_pred_genes_melt.rds"))

Integrate all levels

#### Merge with gene-level data ####
annot_fused <- rbind(annot_genes, 
                     PD_pred_genes_melt[1:100,] %>% 
                       dplyr::mutate(Type="Geneshot",) %>%
                       dplyr::select(Type, ID, Name, Zscore, Gene=gene2, Type_ID)
                     ) %>% 
  dplyr::mutate(Type_ID=paste(Type,ID,sep="_"),
                pseudo=1) %>% 
  # subset(Type!="Geneshot") %>%
  subset(!is.na(Gene))
data.table::fwrite(annot_fused, here::here("data/UNCURL/PD_terms/annot_fused.csv"))

### 
annot_unique <- annot_fused %>% 
  dplyr::select(-Gene) %>% unique() 
  
annot_mat <- data.table::dcast(data = annot_fused, 
                               formula = Type_ID  ~ Gene, 
                               value.var = "Zscore", 
                               fun.aggregate = mean,
                               fill = 0) %>% 
  tibble::column_to_rownames("Type_ID") 

data.table::fwrite(annot_mat, here::here("data/UNCURL/PD_terms/annot_fused.mat.csv"), row.names = TRUE)

Multimodal wordcloud

library(ggwordcloud)

annot <- rbind(cellmarker, cellmesh, cellmesh_anatomy, GO[1:20,] )
annot$Type <- factor(annot$Type, levels = c("CellMarker","CellMeSH","Anatomy","GO","Geneshot"),
                     ordered = T)

gg_annot <- ggplot(annot, aes(label = Name, size = Zscore, color=Type,  x=Type)) +
  geom_text_wordcloud_area() + 
  theme_minimal()
print(gg_annot)

Plot PD annotations

Plot the PD annotations (Tissues, celltypes, GO).

Run UMAP

umap_res <- uwot::umap(X = annot_mat, 
                       n_components = 5, 
                       ret_extra = c("model","nn","fgraph"))
##### high dimensional fuzzy graph ####
umap_dist <- as.matrix(umap_res$fgraph) %>% 
  `row.names<-`(row.names(annot_mat)) %>%
  `colnames<-`(row.names(annot_mat))

Plot UMAP

#### Embeddings  ####
umap_df <- data.frame(umap_res$embedding,  
                      row.names = row.names(annot_mat) ) %>%
  `colnames<-`(paste0("UMAP",1:ncol(umap_res$embedding))) %>%
  tibble::rownames_to_column("Type_ID") %>%
  merge(annot_unique, by="Type_ID")


## Labels 
umap_labels <- umap_df %>% 
  # dplyr::select(-Gene, -variable) %>% unique() %>%
  dplyr::group_by(Type) %>%
  dplyr::slice_max(order_by = Zscore, n = 10, with_ties = F) %>%
  data.frame()
  
gg_umap <- ggplot(umap_df, aes(x=UMAP1, y=UMAP2, shape=Type, color=Type)) +
  geom_point(aes(size=Zscore), alpha=.5) +
  # geom_text(data = umap_labels, aes(x = UMAP1, y=UMAP2, label=Name))
  ggrepel::geom_label_repel(data = umap_labels,
                            aes(x=UMAP1, y=UMAP2, size=Zscore, color=Type, label=Name)) +
  theme_bw()

print(gg_umap)

## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Construct similarity matrix

library(minet)# BiocManager::install("minet")  
#### Construct similarity matrix using mutual information #### 
# miM <- minet::build.mim(t(annot_mat))
miM <- minet::build.mim(t(umap_dist))
# Prune edges
# miM <- minet::aracne(as.matrix(umap_dist))
miM <- minet::clr(miM)
# sM <- BUS::gene.similarity(umap_res, measure="corr", net.trim = "none")
# pM <- BUS::gene.pvalue(umap_res, measure="MI", net.trim = "none")
# netM <- BUS::pred.network(pM$multi.perm.p.value, sM)
 
edges <- reshape2::melt(miM) %>% `colnames<-`(c("from","to","weight"))
edges <- edges[!is.na(edges$weight),]
edges <- edges[edges$weight>0,]
hist(edges$weight, 50)

Construct network

# %>% tibble::column_to_rownames("Type_ID")
net <- network::network(edges,  
                        # vertex.attr = data.frame(umap_df[!duplicated(umap_df$Type_ID),],  row.names = umap_df[!duplicated(umap_df$Type_ID),]$Type_ID)[unique(edges$from),],
                        matrix.type = "edgelist", 
                        layout = "fruchtermanreingold",
                        ignore.eval = F) 
# net.sum <- network::summary.network(net)
# ?gplot.layout {sna}   
netF <- ggnetwork::ggnetwork(net, 
                  # layout="circle",
                  layout = "fruchtermanreingold"
                  # cell.jitter = 0.75
                  )  %>%
  merge(umap_df, by.x = "vertex.names", by.y = "Type_ID")

# netF <- dplyr::group_by(netF, Type) %>% 
#   dplyr::slice_max(order_by = Zscore, n = 100, with_ties = F)
# dplyr::group_by(netF, Type) %>% count()
hubs <-  netF %>%  
  dplyr::mutate(Type_ID=paste(Type,ID,sep="_")) %>% 
  dplyr::group_by(Type, vertex.names, Name) %>% 
  dplyr::summarise(Type_ID, n=n(), x=max(x), y=max(y), Zscore=mean(Zscore)) %>%
  dplyr::group_by(Type) %>%
  dplyr::arrange(desc(n), desc(Zscore))  %>%
  dplyr::slice_max(order_by = n, n = 20, with_ties = F) %>%
    dplyr::relocate(Type_ID, 1) 
hubs%>% group_by(Type) %>% count()

Plot network

ggnetwork

library(ggnetwork)
gg_net <- ggplot(netF, 
       aes(x = x, y = y, xend = xend, yend = yend, label=Name)) +
  geom_edges(aes(alpha=weight),# curvature = .3,
             # curvature = .3, ncp = 100
             color = "black"
             ) +
  geom_nodes(aes(size=Zscore, shape=Type, fill=Type, color=Type), alpha=.8) +
  # geom_nodetext(aes(color = Type, label = Name, size=Zscore),
                # fontface = "bold", nudge_y = .02, show.legend = F) +
  ggrepel::geom_label_repel(data = hubs, 
                           aes(x=x, y=y, label=Name, size=Zscore, color=Type), 
                           inherit.aes = F, alpha=.8, max.overlaps = 20) +
  theme_blank() 
print(gg_net)

ggraph

Has edge bundling!….But can be quite slow with medium-large networks.

Converting between hclust/networks.

library(ggraph)
library(igraph)

hist(edges$weight)
edges <- subset(edges, from %in% hubs$vertex.names | to %in% hubs$vertex.names)
nodes <- annot_unique %>% dplyr::relocate(Type_ID, 1) %>% unique()
graph <- graph_from_data_frame(netF, 
                               directed = T,
                               # vertices = nodes[!duplicated(nodes$Type_ID),]
                               )
# graph <- graph_from_adjacency_matrix(umap_dist, weighted = T)
# phy <- umap_res$embedding %>% 
#   `colnames<-`(paste("UMAP",1:ncol(umap_res$embedding))) %>%
#   `row.names<-`(row.names(annot_mat)) %>%
#   dist() %>%
#   hclust()  %>%
#   ape::as.phylo()

# library(network)
# graph_net<- ape::as.network.phylo(phy,nodes)
# graph_net = graph.edgelist(phy$edge)
# graph_net <- ape::as.igraph.phylo(phy)
# igraph::vertex.attributes(graph_net) <- nodes
 
#  
# ggraph(net, layout = "dendrogram",circular=T) +
#    geom_node_point(alpha=.8) +
#    # geom_edge_link() 
#    geom_edge_diagonal() +
#     coord_fixed()
# 
# ggraph(net, layout = "treemap") + 
#   geom_node_tile(aes(fill = depth), size = 0.25)
#   
# ggraph(net, layout = 'circlepack') + 
#   geom_node_circle(aes(fill = depth), size = 0.25, n = 50) + 
#   coord_fixed()

#### Simple graph ####
ggg <- ggraph(graph, layout = "kk") + 
    geom_edge_link(aes(alpha=weight)) +
   geom_node_point(aes(color=Type, shape=Type, size=Zscore), alpha=.8)+
  geom_node_label(data=hubs, aes(label=Name, color=Type, size=Zscore),
                   alpha=.8, repel = T) + 
  theme_graph()
print(ggg)

Edge bundling

# Create a graph of the flare class system
library(tidygraph)
# vertices <- umap_df %>% dplyr::select(name=Type_ID, size=Zscore, shortName=Type)
# edges <- reshape2::melt(miM) %>% `colnames<-`(c("from","to","weight"))
# edges <- edges[!is.na(edges$weight),]
# edges <- edges[edges$weight>0,]

tgraph <- tidygraph::tbl_graph(edges, nodes=nodes, node_key="Type_ID")

importFrom <- match(edges$from, nodes$Type_ID)
importTo <- match(edges$to, nodes$Type_ID)

# Use class inheritance for layout but plot class imports as bundles
gg_bundle <- ggraph(tgraph, 'kk') +
  geom_conn_bundle(aes(colour = stat(index)),
    data = get_con(importFrom, importTo),
    edge_alpha = 0.25, tension = 1
  ) +
  geom_node_point() +
  scale_edge_colour_distiller('', direction = 1, guide = 'edge_direction') +
  geom_node_point(aes(color=Type, shape=Type, size=Zscore), alpha=.8)+
  geom_node_label(data=hubs, aes(label=Name, color=Type, size=Zscore),
                   alpha=.8, repel = T) + 
  # coord_fixed() +
  ggforce::theme_no_axes()

Animated UMAP

Session info

utils::sessionInfo()

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggwordcloud_0.5.0 orthogene_0.99.0  ggplot2_3.3.5     dplyr_1.0.7      
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2                sass_0.4.0               
##  [3] tidyr_1.1.3               jsonlite_1.7.2           
##  [5] viridisLite_0.4.0         carData_3.0-4            
##  [7] here_1.0.1                gprofiler2_0.2.0         
##  [9] bslib_0.2.5.1             assertthat_0.2.1         
## [11] highr_0.9                 cellranger_1.1.0         
## [13] ggrepel_0.9.1             yaml_2.2.1               
## [15] pillar_1.6.2              backports_1.2.1          
## [17] lattice_0.20-44           glue_1.4.2               
## [19] digest_0.6.27             ggsignif_0.6.2           
## [21] colorspace_2.0-2          htmltools_0.5.1.1        
## [23] Matrix_1.3-4              pkgconfig_2.0.3          
## [25] broom_0.7.9               haven_2.4.3              
## [27] purrr_0.3.4               patchwork_1.1.1          
## [29] scales_1.1.1              RSpectra_0.16-0          
## [31] openxlsx_4.2.4            rio_0.5.27               
## [33] tibble_3.1.3              generics_0.1.0           
## [35] farver_2.1.0              car_3.0-11               
## [37] ellipsis_0.3.2            ggpubr_0.4.0             
## [39] withr_2.4.2               lazyeval_0.2.2           
## [41] cli_3.0.1                 magrittr_2.0.1           
## [43] crayon_1.4.1              readxl_1.3.1             
## [45] evaluate_0.14             fansi_0.5.0              
## [47] rstatix_0.7.0             homologene_1.4.68.19.3.27
## [49] forcats_0.5.1             foreign_0.8-81           
## [51] tools_4.1.0               data.table_1.14.0        
## [53] hms_1.1.0                 lifecycle_1.0.0          
## [55] stringr_1.4.0             plotly_4.9.4.9000        
## [57] munsell_0.5.0             zip_2.2.0                
## [59] compiler_4.1.0            jquerylib_0.1.4          
## [61] rlang_0.4.11              grid_4.1.0               
## [63] RCurl_1.98-1.4            rstudioapi_0.13          
## [65] RcppAnnoy_0.0.19          htmlwidgets_1.5.3        
## [67] bitops_1.0-7              labeling_0.4.2           
## [69] rmarkdown_2.10            codetools_0.2-18         
## [71] gtable_0.3.0              abind_1.4-5              
## [73] DBI_1.1.1                 curl_4.3.2               
## [75] R6_2.5.0                  knitr_1.33               
## [77] uwot_0.1.10.9000          utf8_1.2.2               
## [79] rprojroot_2.0.2           stringi_1.7.3            
## [81] parallel_4.1.0            Rcpp_1.0.7               
## [83] vctrs_0.3.8               png_0.1-7                
## [85] tidyselect_1.1.1          xfun_0.25

multimodal_networks

Author

2021-08-22

Cell-types / Tissues/ Pathways

CellMeSH

Preprocess data

Import Geneshot data

Integrate all levels

Multimodal wordcloud

Plot PD annotations

Run UMAP

Plot UMAP

Construct similarity matrix

Construct network

Plot network

ggnetwork

ggraph

Edge bundling

Animated UMAP

Session info

multimodal_networks

Author

2021-08-22

Cell-types / Tissues/ Pathways

CellMeSH

Preprocess data

Import Geneshot data

Integrate all levels

Multi-modal network

Multimodal wordcloud

Plot PD annotations

Run UMAP

Plot UMAP

Construct similarity matrix

Construct network

Plot network

ggnetwork

ggraph

Edge bundling

Animated UMAP

Session info