::opts_chunk$set(echo = TRUE, root.dir=here::here("/Desktop/PD_omics"))
knitr# knitr::opts_knit$set(echo = TRUE, root.dir=here::here("/Desktop/PD_omics"))
library(dplyr)
library(ggplot2)
library(orthogene)
set.seed(2019)
Use the genes from GeneShot to then check for similarity with other term-associated gene sets.
<- data.table::fread(here::here("data/UNCURL/PD_terms/cellmarker.csv"), select = 1:4) %>%
cellmarker ::mutate(Zscore=scales::rescale(-log(`P-value`),c(0,1))) %>%
dplyr::select(ID=`Cell Ontology ID`, Name=`Cell`, Zscore, Genes=`Overlapping Genes`)%>%
dplyr::mutate(Type="CellMarker")
dplyr
<- data.table::fread(here::here("data/UNCURL/PD_terms/cellmesh.csv"), select = 1:4) %>%
cellmesh ::mutate(Zscore=scales::rescale(`Log-likelihood`,c(0,1))) %>%
dplyr::select(ID=`MeSH ID`, Name=`Cell Name`, Zscore, Genes=`Overlapping Genes`)%>%
dplyr::mutate(Type="CellMeSH")
dplyr
<- data.table::fread(here::here("data/UNCURL/PD_terms/cellmesh-anatomy.csv"), select = 1:4) %>%
cellmesh_anatomy ::mutate(Zscore=scales::rescale(`Log-likelihood`,c(0,1))) %>%
dplyr::select(ID=`MeSH ID`, Name=`Cell Name`, Zscore, Genes=`Overlapping Genes`) %>%
dplyr# Lots of celltypes in the "anatomy" section
subset(!grepl("neuron|astrocyte|fibroblast|epithelial|endothelial|macrophages|monocytes|lymphocytes|leukocytes",
tolower(Name))) %>%
::mutate(Type="Anatomy")
dplyr
<- data.table::fread(here::here("data/UNCURL/PD_terms/geneontology.csv"), select = 1:4) %>%
GO ::mutate(Zscore=scales::rescale(-log10(FDR),c(0,1))) %>%
dplyrsubset(FDR<.05) %>%
::select(ID=`GO ID`, Name, Zscore, Genes=`Overlapping Genes`) %>%
dplyrsubset(!grepl("neuron|dendrite|axon",tolower(Name))) %>%
::mutate(Type="GO")
dplyr
<- rbind(cellmarker, cellmesh, cellmesh_anatomy, GO) %>%
annot ::mutate(Type_ID=paste(Type, ID, sep="_"))%>%
dplyr::arrange(desc(Zscore)) %>%
dplyr::group_by(Type) %>%
dplyr::slice_head(n = 20) %>%
dplyr::data.table()
data.table%>% dplyr::group_by(Type) %>% count() annot
## # A tibble: 4 × 2
## # Groups: Type [4]
## Type n
## <chr> <int>
## 1 Anatomy 10
## 2 CellMarker 19
## 3 CellMeSH 19
## 4 GO 20
{<- tidyr::separate(annot,col = "Genes", sep = ", ",
annot_genes into=paste0("gene",1:100)) %>%
::melt.data.table(id.vars = c("Type_ID","Type","ID","Name","Zscore"),
data.tablevalue.name = "Gene") %>%
subset(Type=="GO" & !is.na(Gene)) %>%
::select(-variable)
dplyr
<- orthogene::map_genes(genes = annot_genes$Gene,
gene_map mthreshold = 1,
drop_na = TRUE)
<- setNames(gene_map$name, gene_map$input)
gene_dict $Gene <- gene_dict[annot_genes$Gene]
annot_genes }
## Warning: Expected 100 pieces. Additional pieces discarded in 48 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 18, 30, 31, 32, 33, 34, 35, 36, 37, 38, ...].
## Warning: Expected 100 pieces. Missing pieces filled with `NA` in 20 rows [11,
## 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 66, 67].
## Using stored `gprofiler_orgs`.
## Mapping species name: hsapiens
## 1 organism identified from search: hsapiens
## Extracting genes from name.
## 1,963 / 1,963 (100%) genes mapped.
Import data produced in geneshot.Rmd
.
<- readRDS(here::here("data/Geneshot/PD_pred_genes_melt.rds")) PD_pred_genes_melt
#### Merge with gene-level data ####
<- rbind(annot_genes,
annot_fused 1:100,] %>%
PD_pred_genes_melt[::mutate(Type="Geneshot",) %>%
dplyr::select(Type, ID, Name, Zscore, Gene=gene2, Type_ID)
dplyr%>%
) ::mutate(Type_ID=paste(Type,ID,sep="_"),
dplyrpseudo=1) %>%
# subset(Type!="Geneshot") %>%
subset(!is.na(Gene))
::fwrite(annot_fused, here::here("data/UNCURL/PD_terms/annot_fused.csv"))
data.table
###
<- annot_fused %>%
annot_unique ::select(-Gene) %>% unique()
dplyr
<- data.table::dcast(data = annot_fused,
annot_mat formula = Type_ID ~ Gene,
value.var = "Zscore",
fun.aggregate = mean,
fill = 0) %>%
::column_to_rownames("Type_ID")
tibble
::fwrite(annot_mat, here::here("data/UNCURL/PD_terms/annot_fused.mat.csv"), row.names = TRUE) data.table
Now let’s connect these terms at multiple levels using the gene names.
library(ggwordcloud)
<- rbind(cellmarker, cellmesh, cellmesh_anatomy, GO[1:20,] )
annot $Type <- factor(annot$Type, levels = c("CellMarker","CellMeSH","Anatomy","GO","Geneshot"),
annotordered = T)
<- ggplot(annot, aes(label = Name, size = Zscore, color=Type, x=Type)) +
gg_annot geom_text_wordcloud_area() +
theme_minimal()
print(gg_annot)
Plot the PD annotations (Tissues, celltypes, GO).
<- uwot::umap(X = annot_mat,
umap_res n_components = 5,
ret_extra = c("model","nn","fgraph"))
##### high dimensional fuzzy graph ####
<- as.matrix(umap_res$fgraph) %>%
umap_dist `row.names<-`(row.names(annot_mat)) %>%
`colnames<-`(row.names(annot_mat))
#### Embeddings ####
<- data.frame(umap_res$embedding,
umap_df row.names = row.names(annot_mat) ) %>%
`colnames<-`(paste0("UMAP",1:ncol(umap_res$embedding))) %>%
::rownames_to_column("Type_ID") %>%
tibblemerge(annot_unique, by="Type_ID")
## Labels
<- umap_df %>%
umap_labels # dplyr::select(-Gene, -variable) %>% unique() %>%
::group_by(Type) %>%
dplyr::slice_max(order_by = Zscore, n = 10, with_ties = F) %>%
dplyrdata.frame()
<- ggplot(umap_df, aes(x=UMAP1, y=UMAP2, shape=Type, color=Type)) +
gg_umap geom_point(aes(size=Zscore), alpha=.5) +
# geom_text(data = umap_labels, aes(x = UMAP1, y=UMAP2, label=Name))
::geom_label_repel(data = umap_labels,
ggrepelaes(x=UMAP1, y=UMAP2, size=Zscore, color=Type, label=Name)) +
theme_bw()
print(gg_umap)
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
library(minet)# BiocManager::install("minet")
#### Construct similarity matrix using mutual information ####
# miM <- minet::build.mim(t(annot_mat))
<- minet::build.mim(t(umap_dist))
miM # Prune edges
# miM <- minet::aracne(as.matrix(umap_dist))
<- minet::clr(miM)
miM # sM <- BUS::gene.similarity(umap_res, measure="corr", net.trim = "none")
# pM <- BUS::gene.pvalue(umap_res, measure="MI", net.trim = "none")
# netM <- BUS::pred.network(pM$multi.perm.p.value, sM)
<- reshape2::melt(miM) %>% `colnames<-`(c("from","to","weight"))
edges <- edges[!is.na(edges$weight),]
edges <- edges[edges$weight>0,]
edges hist(edges$weight, 50)
# %>% tibble::column_to_rownames("Type_ID")
<- network::network(edges,
net # vertex.attr = data.frame(umap_df[!duplicated(umap_df$Type_ID),], row.names = umap_df[!duplicated(umap_df$Type_ID),]$Type_ID)[unique(edges$from),],
matrix.type = "edgelist",
layout = "fruchtermanreingold",
ignore.eval = F)
# net.sum <- network::summary.network(net)
# ?gplot.layout {sna}
<- ggnetwork::ggnetwork(net,
netF # layout="circle",
layout = "fruchtermanreingold"
# cell.jitter = 0.75
%>%
) merge(umap_df, by.x = "vertex.names", by.y = "Type_ID")
# netF <- dplyr::group_by(netF, Type) %>%
# dplyr::slice_max(order_by = Zscore, n = 100, with_ties = F)
# dplyr::group_by(netF, Type) %>% count()
<- netF %>%
hubs ::mutate(Type_ID=paste(Type,ID,sep="_")) %>%
dplyr::group_by(Type, vertex.names, Name) %>%
dplyr::summarise(Type_ID, n=n(), x=max(x), y=max(y), Zscore=mean(Zscore)) %>%
dplyr::group_by(Type) %>%
dplyr::arrange(desc(n), desc(Zscore)) %>%
dplyr::slice_max(order_by = n, n = 20, with_ties = F) %>%
dplyr::relocate(Type_ID, 1)
dplyr%>% group_by(Type) %>% count() hubs
library(ggnetwork)
<- ggplot(netF,
gg_net aes(x = x, y = y, xend = xend, yend = yend, label=Name)) +
geom_edges(aes(alpha=weight),# curvature = .3,
# curvature = .3, ncp = 100
color = "black"
+
) geom_nodes(aes(size=Zscore, shape=Type, fill=Type, color=Type), alpha=.8) +
# geom_nodetext(aes(color = Type, label = Name, size=Zscore),
# fontface = "bold", nudge_y = .02, show.legend = F) +
::geom_label_repel(data = hubs,
ggrepelaes(x=x, y=y, label=Name, size=Zscore, color=Type),
inherit.aes = F, alpha=.8, max.overlaps = 20) +
theme_blank()
print(gg_net)
Has edge bundling!….But can be quite slow with medium-large networks.
Converting between hclust/networks.
library(ggraph)
library(igraph)
hist(edges$weight)
<- subset(edges, from %in% hubs$vertex.names | to %in% hubs$vertex.names)
edges <- annot_unique %>% dplyr::relocate(Type_ID, 1) %>% unique()
nodes <- graph_from_data_frame(netF,
graph directed = T,
# vertices = nodes[!duplicated(nodes$Type_ID),]
)# graph <- graph_from_adjacency_matrix(umap_dist, weighted = T)
# phy <- umap_res$embedding %>%
# `colnames<-`(paste("UMAP",1:ncol(umap_res$embedding))) %>%
# `row.names<-`(row.names(annot_mat)) %>%
# dist() %>%
# hclust() %>%
# ape::as.phylo()
# library(network)
# graph_net<- ape::as.network.phylo(phy,nodes)
# graph_net = graph.edgelist(phy$edge)
# graph_net <- ape::as.igraph.phylo(phy)
# igraph::vertex.attributes(graph_net) <- nodes
#
# ggraph(net, layout = "dendrogram",circular=T) +
# geom_node_point(alpha=.8) +
# # geom_edge_link()
# geom_edge_diagonal() +
# coord_fixed()
#
# ggraph(net, layout = "treemap") +
# geom_node_tile(aes(fill = depth), size = 0.25)
#
# ggraph(net, layout = 'circlepack') +
# geom_node_circle(aes(fill = depth), size = 0.25, n = 50) +
# coord_fixed()
#### Simple graph ####
<- ggraph(graph, layout = "kk") +
ggg geom_edge_link(aes(alpha=weight)) +
geom_node_point(aes(color=Type, shape=Type, size=Zscore), alpha=.8)+
geom_node_label(data=hubs, aes(label=Name, color=Type, size=Zscore),
alpha=.8, repel = T) +
theme_graph()
print(ggg)
# Create a graph of the flare class system
library(tidygraph)
# vertices <- umap_df %>% dplyr::select(name=Type_ID, size=Zscore, shortName=Type)
# edges <- reshape2::melt(miM) %>% `colnames<-`(c("from","to","weight"))
# edges <- edges[!is.na(edges$weight),]
# edges <- edges[edges$weight>0,]
<- tidygraph::tbl_graph(edges, nodes=nodes, node_key="Type_ID")
tgraph
<- match(edges$from, nodes$Type_ID)
importFrom <- match(edges$to, nodes$Type_ID)
importTo
# Use class inheritance for layout but plot class imports as bundles
<- ggraph(tgraph, 'kk') +
gg_bundle geom_conn_bundle(aes(colour = stat(index)),
data = get_con(importFrom, importTo),
edge_alpha = 0.25, tension = 1
+
) geom_node_point() +
scale_edge_colour_distiller('', direction = 1, guide = 'edge_direction') +
geom_node_point(aes(color=Type, shape=Type, size=Zscore), alpha=.8)+
geom_node_label(data=hubs, aes(label=Name, color=Type, size=Zscore),
alpha=.8, repel = T) +
# coord_fixed() +
::theme_no_axes() ggforce
::sessionInfo() utils
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggwordcloud_0.5.0 orthogene_0.99.0 ggplot2_3.3.5 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.2 sass_0.4.0
## [3] tidyr_1.1.3 jsonlite_1.7.2
## [5] viridisLite_0.4.0 carData_3.0-4
## [7] here_1.0.1 gprofiler2_0.2.0
## [9] bslib_0.2.5.1 assertthat_0.2.1
## [11] highr_0.9 cellranger_1.1.0
## [13] ggrepel_0.9.1 yaml_2.2.1
## [15] pillar_1.6.2 backports_1.2.1
## [17] lattice_0.20-44 glue_1.4.2
## [19] digest_0.6.27 ggsignif_0.6.2
## [21] colorspace_2.0-2 htmltools_0.5.1.1
## [23] Matrix_1.3-4 pkgconfig_2.0.3
## [25] broom_0.7.9 haven_2.4.3
## [27] purrr_0.3.4 patchwork_1.1.1
## [29] scales_1.1.1 RSpectra_0.16-0
## [31] openxlsx_4.2.4 rio_0.5.27
## [33] tibble_3.1.3 generics_0.1.0
## [35] farver_2.1.0 car_3.0-11
## [37] ellipsis_0.3.2 ggpubr_0.4.0
## [39] withr_2.4.2 lazyeval_0.2.2
## [41] cli_3.0.1 magrittr_2.0.1
## [43] crayon_1.4.1 readxl_1.3.1
## [45] evaluate_0.14 fansi_0.5.0
## [47] rstatix_0.7.0 homologene_1.4.68.19.3.27
## [49] forcats_0.5.1 foreign_0.8-81
## [51] tools_4.1.0 data.table_1.14.0
## [53] hms_1.1.0 lifecycle_1.0.0
## [55] stringr_1.4.0 plotly_4.9.4.9000
## [57] munsell_0.5.0 zip_2.2.0
## [59] compiler_4.1.0 jquerylib_0.1.4
## [61] rlang_0.4.11 grid_4.1.0
## [63] RCurl_1.98-1.4 rstudioapi_0.13
## [65] RcppAnnoy_0.0.19 htmlwidgets_1.5.3
## [67] bitops_1.0-7 labeling_0.4.2
## [69] rmarkdown_2.10 codetools_0.2-18
## [71] gtable_0.3.0 abind_1.4-5
## [73] DBI_1.1.1 curl_4.3.2
## [75] R6_2.5.0 knitr_1.33
## [77] uwot_0.1.10.9000 utf8_1.2.2
## [79] rprojroot_2.0.2 stringi_1.7.3
## [81] parallel_4.1.0 Rcpp_1.0.7
## [83] vctrs_0.3.8 png_0.1-7
## [85] tidyselect_1.1.1 xfun_0.25