In [1]:
suppressMessages(library(ComplexHeatmap))
suppressMessages(library(RColorBrewer))
suppressMessages(library(tidyverse))
suppressMessages(library(dplyr))
suppressMessages(library(ggplot2))
suppressMessages(library(cowplot))
In [2]:
df <- read.table("./DiffFootprinting/All_statistics.txt", header = TRUE) %>%
    as.data.frame()

## sum up the TF and protection score for each cell type
df$CLP <- df$Protection_Score_CLP + df$TC_CLP
df$CMP <- df$Protection_Score_CMP + df$TC_CMP
df$GMP <- df$Protection_Score_GMP + df$TC_GMP
df$HSC <- df$Protection_Score_HSC + df$TC_HSC
df$LMPP <- df$Protection_Score_LMPP + df$TC_LMPP
df$MEP <- df$Protection_Score_MEP + df$TC_MEP
df$MPP <- df$Protection_Score_MPP + df$TC_MPP
df$pDC <- df$Protection_Score_pDC + df$TC_pDC
In [3]:
# For some TFs, JASPAR database includes different variants of motifs, here we remove them
df <- df %>%
    dplyr::filter(!grepl("var", Motif)) %>%
    textshape::column_to_rownames("Motif")
    

# We only keep TFs with more than 1000 binding sites
df <- subset(df, Num > 1000)

df <- subset(df, select = c("CLP", "CMP", "GMP",
                            "HSC", "LMPP", "MEP",
                            "MPP", "pDC"))
In [4]:
# we can select the top 100 highly variable TFs
df$Var <- apply(df, 1, sd)

df <- df %>%
    top_n(50, wt = Var) %>%
    select(-Var)
    
df <- t(scale(t(df)))

options(repr.plot.width = 6, repr.plot.height = 8)

p <- Heatmap(as.matrix(df),
             name = "TF Activity",
             cluster_columns = TRUE,
             cluster_rows = TRUE,
             show_row_names = TRUE,
             rect_gp = gpar(col = "black", lwd = 0.5)
            )

p
In [ ]: