# read in data
all_res_file <- "data/COLOC/all_microglia_coloc_res.1kgp3_ld.tsv.gz"

if( !file.exists(all_res_file)){
  
  all_ld_files <- list.files("data/COLOC/LD", full.names = TRUE, pattern = "coloc_results.snp.1kgp3_ld.tsv.gz")
  
  eqtl_ld <- all_ld_files[ !grepl("sQTL", all_ld_files)]
  
  sqtl_ld <- all_ld_files[ grepl("sQTL", all_ld_files)]
  
  names(eqtl_ld) <- apply(str_split_fixed(basename(eqtl_ld), "_", 7)[,c(4,5)], MARGIN = 1, FUN = function(x){ paste(x, collapse = "_")})
  
  names(sqtl_ld) <- apply(str_split_fixed(basename(sqtl_ld), "_", 7)[,c(5,6)], MARGIN = 1, FUN = function(x){ paste(x, collapse = "_")})
  
  sqtl_ld_res <- map_df(sqtl_ld, ~{read_tsv(.x) %>% mutate(Locus = as.character(Locus)) }, .id = "GWAS" ) %>% mutate(QTL = "sQTL")
  eqtl_ld_res <- map_df(eqtl_ld, ~{read_tsv(.x) %>% mutate(Locus = as.character(Locus))}, .id = "GWAS" ) %>% mutate(QTL = "eQTL")
  
  all_ld_res <- bind_rows(sqtl_ld_res, eqtl_ld_res)
  
  all_ld_res <- rename(all_ld_res, Gene = gene, SNP = snp)
  
  all_ld_res$geneid <- map_chr(str_split(all_ld_res$Gene, ":"), ~{ .x[ length(.x) ] })
  all_ld_res$geneid <- str_split_fixed(all_ld_res$geneid, "\\.", 2)[,1]
  
  # have separate junction column
  all_ld_res$QTL_junction <- map_chr(str_split(all_ld_res$Gene, ":"), ~{ paste0(.x[1], ":", .x[2], "-", .x[3])  })
  
  gene_meta <- read_tsv("~/GENCODE/gencode.v30.tx2gene.tsv.gz") %>% 
    janitor::clean_names() %>%
    select(genename, geneid) %>% distinct()
  
  # remove tags
  gene_meta$geneid <- str_split_fixed(gene_meta$geneid, "\\.", 2)[,1]
  
  # match on gene symbols
  all_ld_res$genename <- gene_meta$genename[ match(all_ld_res$geneid, gene_meta$geneid) ]
  
  # coalesce - if no gene symbol found use ID
  all_ld_res$QTL_Gene <- coalesce(all_ld_res$genename, all_ld_res$geneid)
  
  # add geneid back to make sure
  all_ld_res$QTL_Ensembl <- gene_meta$geneid[match(all_ld_res$QTL_Gene, gene_meta$genename)] 
  
  # make junction NA if eQTL
  all_ld_res$QTL_junction <- ifelse(all_ld_res$QTL == "sQTL", all_ld_res$QTL_junction, ".")
  
  
  write_tsv(all_ld_res, path = "data/COLOC/all_microglia_coloc_res.1kgp3_ld.tsv.gz")
}else{
  all_ld_res <- read_tsv(all_res_file)
}
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   GWAS = col_character(),
##   Locus = col_character(),
##   SNP = col_character(),
##   A1 = col_character(),
##   A2 = col_character(),
##   gwas.type = col_character(),
##   Gene = col_character(),
##   QTL_chr = col_character(),
##   qtl.type = col_character(),
##   Dataset = col_character(),
##   old_locus_name = col_character(),
##   QTL = col_character(),
##   geneid = col_character(),
##   QTL_junction = col_character(),
##   genename = col_character(),
##   QTL_Gene = col_character(),
##   QTL_Ensembl = col_character()
## )
## See spec(...) for full column specifications.
## fine-mapped SNPs
all_finemap <- data.table::fread("data/Fine_Mapping/brian/microgliaQTL_multiGWAS.finemapping_merged.labelled.tsv")

## coloc

all_coloc <- read_tsv("data/microglia_qtls_coloc_filtered_harmonised.tsv.gz")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   disease = col_character(),
##   Dataset = col_character(),
##   Locus = col_character(),
##   GWAS_SNP = col_character(),
##   GWAS_chr = col_character(),
##   QTL = col_character(),
##   type = col_character(),
##   QTL_SNP = col_character(),
##   QTL_chr = col_character(),
##   QTL_junction = col_character(),
##   Gene = col_character(),
##   QTL_Ensembl = col_character(),
##   cell_type = col_character(),
##   gene_snp = col_character(),
##   mashr_lsfr = col_logical(),
##   distance_filter = col_character()
## )
## See spec(...) for full column specifications.
disease_fancy_table <- 
  tibble(
    disease = c("AD", "PD", "BPD", "SCZ", "ALS", "MS"),
    disease_full = c("Alzheimer's Disease", "Parkinson's Disease", "Bipolar Disorder", "Schizophrenia", "Amyotrophic Lateral Sclerosis", "Multiple Sclerosis" )
  )

gwas_fancy_table <- 
  tibble(
    Dataset = c("Nalls23andMe_2019", "Marioni_2018", "Jansen_2018", "Lambert_2013", "Kunkle_2019", "IMSGC_2019", "Ripke_2014", "Stahl_2019", "Daner_2020", "Nicolas_2018"),
    GWAS_full = c("Nalls et al 2019", "Marioni et al 2018", "Jansen et al 2018", "Lambert et al 2013", "Kunkle et al 2019", "IMSGC et al 2019", "Ripke et al 2014", "Stahl et al 2019", "Daner et al 2020", "Nicolas et al 2018")
  )

all_coloc <- all_coloc %>%
    left_join(disease_fancy_table, by = "disease") %>%
    left_join(gwas_fancy_table, by = "Dataset")
# for a given Locus in a given GWAS and QTL gene
# produce LocusZoom-like plots for the GWAS and QTL
# plot the 2 distributions against each other
locus_zoom_plot <- function(qtl, locus, gene, gwas, label = TRUE, layout = "shiny"){
  stopifnot(qtl %in% c("eQTL", "sQTL"))
  stopifnot(gwas %in% unique(all_ld_res$GWAS))
  ld_loc <- filter(all_ld_res, GWAS == gwas)#, Dataset == gwas )
  
  coloc_loc <- filter(all_coloc, Dataset == gwas, gene == Gene, Locus == locus, type == qtl) 
  stopifnot(nrow(coloc_loc) == 1)
  
  coloc_h4 <- coloc_loc$PP.H4.abf
  
  
  coloc_h4 <- signif(coloc_h4, digits = 2)
  stopifnot(length(coloc_h4) > 0)
  
  if(!(all(is.na(ld_loc$Dataset)))){
    ld_loc <- filter(ld_loc, Dataset == gwas)
  }
  
  stopifnot(locus %in% ld_loc$Locus)
  ld_loc <- filter(ld_loc, Locus == locus, Gene == gene) #%>%
   # rename(SNP = snp)
  
  finemap_loc <- 
    filter(all_finemap, Dataset == gwas, Locus == locus, GWAS == gwas, !is.na(label) ) 
  
  ld_loc$label_name <- ifelse(ld_loc$SNP %in% finemap_loc$SNP, ld_loc$SNP, "")
  
  # add in lead QTL SNP
  lead_qtl_snp <- filter(ld_loc, SNP == coloc_loc$QTL_SNP)
  if(nrow(lead_qtl_snp) == 0){return(NULL)}  
  ld_loc$label_name <- ifelse(ld_loc$SNP == lead_qtl_snp$SNP, ld_loc$SNP, ld_loc$label_name)
  
  # lead GWAS SNP - from LD - not from COLOC as COLOC P value is sometimes the combined P rather than the raw GWAS P (Ripke)
  if( gwas == "Ripke_2014"){
  lead_gwas_snp <- arrange(ld_loc, gwas.pvalues) %>% head(1)
  }else{ lead_gwas_snp <- coloc_loc %>%
     select(SNP = GWAS_SNP, gwas.pvalues = GWAS_P, pos = GWAS_pos_hg19)
  }
  # 
 # return(ld_loc)
  colour_guide_string <- bquote(R^2~with~.(lead_gwas_snp$SNP))
  
  
  compare_plot <-
    ld_loc %>%
    ggplot(aes(x = -log10(qtl.pvalues), y = -log10(gwas.pvalues), colour = lead_gwas_ld)) + 
    scale_colour_distiller(palette = "Spectral", limits = c(0,1), breaks = c(0,1), labels = c(0,1) ) +
    labs(x = -log[10]~(P[QTL]), y = expression(-log[10]~(P[GWAS])), colour = colour_guide_string, title = paste0("COLOC PP H4 = ", coloc_h4)) + 
    theme_classic() + 
    theme(legend.position = "bottom") +
    guides(colour = guide_colourbar(barwidth = 3, ticks = FALSE, barheight = 0.5, label.position = "top",label.vjust = -1) ) +
    scale_x_continuous(expand = c(0, 0), limits = c(0, -log10(lead_qtl_snp$qtl.pvalues) + 1)) +
    scale_y_continuous(expand = c(0, 0), limits = c(0, -log10(lead_gwas_snp$gwas.pvalues) + 1))
  
  if(label == TRUE){
    compare_plot <- compare_plot + 
      geom_text_repel(data = filter(ld_loc, gwas.pvalues < 0.1 & qtl.pvalues < 0.1), 
                    aes(label = label_name), colour = "black", force = 10, size = 2.5, box.padding = unit(0.5, "lines"), min.segment.length = 0 )
  }
  
  compare_plot <- compare_plot + rasterise(geom_point(size = 0.8), dpi = 300) 
  
  #return(compare_plot)
  gwas_plot <-
    ld_loc %>%
    ggplot(aes(x = pos / 1e6, y = -log10(gwas.pvalues), colour = lead_gwas_ld)) + 
    scale_colour_distiller(palette = "Spectral" ) +
    labs(y = expression(-log[10]~(P)), x = "", colour = colour_guide_string, title = coloc_loc$disease_full, subtitle = coloc_loc$GWAS_full ) + 
    geom_text_repel(data = lead_gwas_snp, aes(label = SNP), colour = "black", direction = "x", hjust = 0, nudge_x = 0.3, size = 2.5  ) +
    rasterise(geom_point(size = 0.8), dpi = 300) + 
    theme_classic() +
    guides(colour = FALSE) +
    scale_y_continuous(expand = c(0, 0), limits = c(0, -log10(lead_gwas_snp$gwas.pvalues) + 1))
  
  qtl_plot <-
    ld_loc %>%
    ggplot(aes(x = pos / 1e6, y = -log10(qtl.pvalues), colour = lead_gwas_ld)) + 
    scale_colour_distiller(palette = "Spectral" ) +
    labs(y = -log[10]~(P), x = "Mbp", colour = colour_guide_string, title = paste0("MiGA ", qtl, " - ", gene)) + 
    geom_text_repel(data = lead_qtl_snp, aes(label = SNP), colour = "black", direction = "x", hjust = 0, nudge_x = 0.3, size  = 2.5 ) +
    rasterise(geom_point(size = 0.8), dpi = 300) + 
    theme_classic() +
    guides(colour = FALSE) +
    scale_y_continuous(expand = c(0, 0), limits = c(0, -log10(lead_qtl_snp$qtl.pvalues) + 1))
  
  if( layout == "shiny"){
    multiplot <- 
    (gwas_plot + qtl_plot + plot_layout(nrow = 2)) / (plot_spacer() + compare_plot + plot_spacer() + plot_layout(ncol=1, heights = c(0.1, 0.9, 0)) ) +  
    #plot_annotation(subtitle = paste0("Locus: ", locus, " Gene: ", gene), subsubtitle = paste0("GWAS: ", gwas) ) +
    plot_layout( ncol = 2, widths = c(1,1.1))  &
    theme(axis.text = element_text(colour = "black"), plot.title = element_text(size = 10), plot.subtitle = element_text(size = 7.5)) 
  }
  
  if(layout == "figure"){
     multiplot <- 
    gwas_plot + qtl_plot + compare_plot + plot_layout(nrow = 1, ncol =3)
  }
  
  return(multiplot)
}

usp <- locus_zoom_plot(qtl = "eQTL", locus = "ECHDC3", gene = "USP6NL", gwas = "Marioni_2018", layout = "figure")

#ggsave(plot = usp, filename = "plots/USP6NL_Marioni_locus_zoom.pdf", width = 6, height = 4.5)
# bin1 <- locus_zoom_plot(qtl = "eQTL", locus = "BIN1", gene = "BIN1", gwas = "Kunkle_2019")
# 
# ggsave(plot = bin1, filename = "plots/BIN1_Kunkle_locus_zoom.pdf", width = 6, height = 4.5)
# #locus_zoom_plot(qtl = "eQTL", locus = "PICALM", gene = "PICALM", gwas = "Kunkle_2019")
# 
# #locus_zoom_plot(qtl = "eQTL", locus = "ECHDC3", gene = "USP6NL", gwas = "Kunkle_2019")

usp <- locus_zoom_plot(qtl = "eQTL", locus = "ECHDC3", gene = "USP6NL", gwas = "Marioni_2018", layout = "figure")

ggsave(plot = usp, filename = "plots/USP6NL_Marioni_locus_zoom.pdf", width = 9, height = 3)

p2ry <- locus_zoom_plot(qtl = "eQTL", locus = "MED12L", gene = "P2RY12", gwas = "Nalls23andMe_2019", layout = "figure")

ggsave(plot = p2ry, filename = "plots/P2RY12_Nalls_locus_zoom.pdf", width = 9, height = 3)
all_loci <- all_coloc %>% 
  dplyr::filter(! Dataset %in% c("Nicolas_2018","Daner_2020", "Wray_2018"), QTL == "MiGA eQTL", PP.H4.abf > 0.5 ) %>% 
  dplyr::filter(Gene != "") %>%  
  dplyr::select(Dataset, Locus, Gene, disease) %>% 
  distinct() %>% 
  filter(paste(Dataset, Locus) %in% 
  paste(all_finemap$Dataset, all_finemap$Locus)) %>%
  split(.$disease)#filter(Gene %in% all_coloc$Gene)
map_locus_zoom <- function(dataset, qtl_type = "eQTL"){
 plots <- map(1:nrow(dataset), ~{
   df <- dataset[.x,]
   print(df)
  p <- locus_zoom_plot(qtl = qtl_type, locus = df$Locus, gene = df$Gene, gwas = df$Dataset, layout = "figure")
  return(p)
 })
 return(plots)
}

plots <- map_locus_zoom(all_loci$AD[1:4,])
## # A tibble: 1 x 4
##   Dataset     Locus Gene  disease
##   <chr>       <chr> <chr> <chr>  
## 1 Jansen_2018 BIN1  BIN1  AD     
## # A tibble: 1 x 4
##   Dataset     Locus  Gene   disease
##   <chr>       <chr>  <chr>  <chr>  
## 1 Jansen_2018 ECHDC3 USP6NL AD     
## # A tibble: 1 x 4
##   Dataset     Locus  Gene       disease
##   <chr>       <chr>  <chr>      <chr>  
## 1 Jansen_2018 ECHDC3 AL512631.1 AD     
## # A tibble: 1 x 4
##   Dataset     Locus  Gene   disease
##   <chr>       <chr>  <chr>  <chr>  
## 1 Jansen_2018 PICALM PICALM AD
plots
## [[1]]
## Warning: Removed 1 rows containing missing values (geom_text_repel).

## 
## [[2]]
## Warning: Removed 1 rows containing missing values (geom_text_repel).

## 
## [[3]]
## Warning: Removed 1 rows containing missing values (geom_text_repel).

## 
## [[4]]
## Warning: Removed 1 rows containing missing values (geom_text_repel).

#locus_zoom_plot(qtl = "eQTL", gene = "PCCB", locus = "42", gwas = "Ripke_2014")