rgt-hint 转录因子预测结果可视化

引言


这里主要对 rgt 预测差异转录因子的输出结果进行重新绘图。

Lineplots 结果是具体预测的 motif 在全基因组上的 footprint 结合信息, 差异的结果则储存在 differential_statistics.txt。
绘图

绘制差异结果的火山图:
setwd(“~/JunJun/8.atacSeq_proj/6.footprint-data/CTL_siNCoR/”)

library(ggplot2)
library(ggrepel)
library(dplyr)

de <- read.delim(“differential_statistics.txt”) %>%
mutate(type = if_else(P_values < 0.05,”sigTF”,”nonsigTF”))

check

head(de)

Motif Num Protection_Score_CTL Protection_Score_siNCoR TC_CTL TC_siNCoR TF_Activity

1 MA1645.1.NKX2-2 4477 0.4885421 0.3008693 0.7111927 0.4536498 -0.44521579

2 MA1114.1.PBX3 4827 0.2360842 0.1123626 0.7566007 0.6198544 -0.26046793

3 MA1493.1.HES6 2399 0.3567710 0.3080597 0.8004308 0.6267882 -0.22235388

4 MA0467.1.Crx 2767 0.3652523 0.3291425 0.5681782 0.5047526 -0.09953544

5 MA1649.1.ZBTB12 3235 -0.4410400 -0.3347849 0.6667690 0.5216612 -0.03885269

6 MA1653.1.ZNF148 36771 1.0893183 0.8442928 1.0740597 0.8580158 -0.46106941

Z_score P_values type

1 -1.50718866 0.1317623 nonsigTF

2 -0.85208810 0.3941652 nonsigTF

3 -0.71693883 0.4734118 nonsigTF

4 -0.28143485 0.7783769 nonsigTF

5 -0.06625888 0.9471717 nonsigTF

6 -1.56340425 0.1179575 nonsigTF

plot

ggplot(de,aes(x = TF_Activity,y = -log10(P_values))) +
geom_point(aes(color = type)) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(colour = “black”)) +
geom_hline(yintercept = -log10(0.05),lty = “dashed”) +
scale_color_manual(values = c(“sigTF” = “red”,”nonsigTF” = “grey50”)) +
geom_label_repel(data = . %>% filter(P_values < 0.05),
aes(label = Motif))

换个展示方式,比如按 TF activity 进行排序:
de <- de %>% arrange(desc(TF_Activity))
de$x <- rownames(de) %>% as.numeric()

ggplot(de,aes(x = x,y = TF_Activity)) +
geom_point(aes(color = type)) +
theme_bw() +
theme(panel.grid = element_blank(),
axis.text = element_text(colour = “black”)) +
geom_hline(yintercept = 0,lty = “dashed”) +
scale_color_manual(values = c(“sigTF” = “red”,”nonsigTF” = “grey50”)) +
geom_label_repel(data = . %>% filter(P_values < 0.05),
aes(label = Motif)) +
xlab(“Rank by TF Activity”)


footprint 的折线图我们可以把显著的(p < 0.05)都画出来, 但是输出结果没有关于 motif 的数据,所以画不了 motif。

批量读取数据然后合并,再绘图:

==============================================================================

plot footprinting

==============================================================================

sigTF <- de %>% filter(P_values < 0.05)

TF_name <- sigTF$Motif

loop plot footprinting

x = 1

lapply(seq_along(TF_name),function(x){
# check names
var <- str_detect(TF_name[x],pattern = “var”)

if(var){
tmp_name <- str_replace(TF_name[x],pattern = “\(|\)”,replacement = “_”)
tmp_name <- str_replace(tmp_name,pattern = “\)”,replacement = “”)
}else{
tmp_name <- TF_name[x]
}

# load data
ft <- read.delim(paste0(“Lineplots/”,tmp_name,”.txt”)) %>%
mutate(pos = -100:99,TF = tmp_name) %>%
pivot_longer(cols = c(“CTL”,”siNCoR”),
names_to = “group”,
values_to = “Occupancy”)

}) %>% do.call(“rbind”,.) -> ft_df

check

head(ft_df,3)

A tibble: 3 × 4

pos TF group Occupancy

1 -100 MA0862.1.GMEB2 CTL 0.685

2 -100 MA0862.1.GMEB2 siNCoR 0.832

3 -99 MA0862.1.GMEB2 CTL 0.723

plot

ggplot(ft_df,aes(x = pos,y = Occupancy,color = group)) +
geom_line() +
theme_bw() +
theme(panel.grid = element_blank(),
strip.background = element_rect(fill = “grey90”),
strip.text = element_text(face = “bold.italic”),
axis.text = element_text(colour = “black”)) +
facet_wrap(~TF,scales = “free”,ncol = 5)

我们选第一个看看 KLF4 是一样的,我们看看已知的这个 motif 的情况:
library(ATACseqQC)
library(MotifDb)
library(ggseqlogo)

MT <- query(MotifDb, andStrings = c(“hsapiens”,”KLF4″))
ggseqlogo(as.list(MT))

可以看到 Hsapiens-jaspar2022 数据库的 motif 数据和重新预测的很像。


路漫漫其修远兮,吾将上下而求索。
欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群 (微信交流群需收取 20 元入群费用,一旦交费,拒不退还!(防止骗子和便于管理)) 。QQ 群可免费加入, 记得进群按格式修改备注哦。

声明:文中观点不代表本站立场。本文传送门:https://eyangzhen.com/415552.html

(0)
联系我们
联系我们
分享本页
返回顶部