ggcirclize 绘制标签和连接

1引言

主要讲讲 geom_trackgenomiclabel 和 geom_trackgenomiclink

2geom_trackgenomiclabel

geom_trackgenomiclabel 用来展示需要显示的基因名称等标签,适用于展示感兴趣的部分基因。标签绘制在对应染色体的 sector 里面。因此如果该染色体基因过多,则可能会重叠。

library(ggcirclize)
library(ggplot2)
library(circlize)


data("hg19_chrom_info")
chrom_hg19 <- hg19_chrom_info$chromsize
cytoband_hg19 <- hg19_chrom_info$cytoband

set.seed(111)
bed = generateRandomBed(nr = 50)
bed$label <- sample(paste0("gene ",1:55),55,replace = F)

# check
head(bed,3)
#    chr     start       end      value1   label
# 1 chr1  11430681  19926061 -0.47423780  gene 3
# 2 chr1  45779013 130923003 -0.24698111  gene 8
# 3 chr1 133984842 239572762 -0.08683706 gene 20

绘制标签:

ggcirclize(bed,aes(end = 360,r0 = 0.7,r1 = 0.7,
                   chr = chr,gstart = start,gend = end,
                   label = label)) +
  geom_trackgenomiclabel(chrom_data = chrom_hg19)
图片

修改连接线的长度:

ggcirclize(bed,aes(end = 360,r0 = 0.7,r1 = 0.7,
                   chr = chr,gstart = start,gend = end,
                   label = label)) +
  geom_trackgenomiclabel(chrom_data = chrom_hg19,link_r = 0.05)
图片

调整位置:

ggcirclize(bed,aes(end = 360,r0 = 0.7,r1 = 0.7,
                   chr = chr,gstart = start,gend = end,
                   label = label)) +
  geom_trackgenomiclabel(chrom_data = chrom_hg19,
                         link_pos = "bottom",
                         strip.label.pos = "bottom")
图片

修改连接线的颜色:

ggcirclize(bed,aes(end = 360,r0 = 0.7,r1 = 0.7,
                   chr = chr,gstart = start,gend = end,
                   label = label)) +
  geom_trackgenomiclabel(chrom_data = chrom_hg19,
                         link_col = circlize::rand_color(24))
图片

修改标签颜色:

ggcirclize(bed,aes(end = 360,r0 = 0.9,r1 = 0.9,
                   chr = chr,gstart = start,gend = end,
                   label = label)) +
  geom_trackgenomiclabel(chrom_data = chrom_hg19,
                         link_col = circlize::rand_color(24),
                         label.col = circlize::rand_color(24))
图片

添加染色体:

ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
                   chr = chr,gstart = start,gend = end)) +
  geom_trackgenomiclabel(aes(label = label),
                         chrom_data = chrom_hg19,
                         strip.label = F,
                         link_col = circlize::rand_color(24),
                         label.col = circlize::rand_color(24)) +
  geom_trackgenomicrect(data = cytoband_hg19,
                        aes(r0 = 0.95,r1 = 1,fill = stain),
                        color = NA,chrom_data = chrom_hg19) +
  scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
                               "gpos100" = "black","gvar" = "black","acen" = "red",
                               "stalk" = "blue"))
图片

这里举例一个情况就是如果一个染色体需要标记的标签过多,我们看看 geom_trackgenomiclabel 会有什么表现:

st <- sample(seq(1,100000,500),10,replace = F)
bed1 <- data.frame(chr = rep("chr2",10),
                   start = st,end = st + 500,label = paste0("gene ",1:10))
bed2 <- data.frame(chr = rep("chr12",10),
                   start = st,end = st + 500,label = paste0("gene ",1:10))

bed <- rbind(bed1,bed2)

绘图:

ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
                   chr = chr,gstart = start,gend = end)) +
  geom_trackgenomiclabel(aes(label = label),
                         chrom_data = chrom_hg19,
                         strip.label = F,
                         link_col = circlize::rand_color(24),
                         label.col = circlize::rand_color(24)) +
  geom_trackgenomicrect(data = cytoband_hg19 %>% dplyr::filter(chr %in% c("chr2","chr12")),
                        aes(r0 = 0.95,r1 = 1,fill = stain),
                        color = NA,chrom_data = chrom_hg19) +
  scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
                               "gpos100" = "black","gvar" = "black","acen" = "red",
                               "stalk" = "blue"))
图片

默认 keep.all.chrom = FALSE, 即只显示数据里包含的染色体,假如我们设置显示所有染色体:

ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
                   chr = chr,gstart = start,gend = end)) +
  geom_trackgenomiclabel(aes(label = label),
                         chrom_data = chrom_hg19,
                         strip.label = F,
                         link_col = circlize::rand_color(24),
                         label.col = circlize::rand_color(24),
                         keep.all.chrom = T) +
  geom_trackgenomicrect(data = cytoband_hg19 %>% dplyr::filter(chr %in% c("chr2","chr12")),
                        aes(r0 = 0.95,r1 = 1,fill = stain),
                        color = NA,chrom_data = chrom_hg19,
                        keep.all.chrom = T) +
  scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
                               "gpos100" = "black","gvar" = "black","acen" = "red",
                               "stalk" = "blue"))
图片

这样你就会发现基因名重叠了。

针对这种情况你可以使用 geom_trackgenomiclabel2, 它会利用整个弧形空间进行调整文字位置。

ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
                   chr = chr,gstart = start,gend = end)) +
  geom_trackgenomiclabel2(aes(label = label),
                         chrom_data = chrom_hg19,
                         strip.label = F) +
  geom_trackgenomicrect(data = cytoband_hg19 %>% dplyr::filter(chr %in% c("chr2","chr12")),
                        aes(r0 = 0.95,r1 = 1,fill = stain),
                        color = NA,chrom_data = chrom_hg19) +
  scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
                               "gpos100" = "black","gvar" = "black","acen" = "red",
                               "stalk" = "blue"))
图片

包里所有染色体:

ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
                   chr = chr,gstart = start,gend = end)) +
  geom_trackgenomiclabel2(aes(label = label),
                          chrom_data = chrom_hg19,
                          strip.label = F,
                          keep.all.chrom = T) +
  geom_trackgenomicrect(data = cytoband_hg19 %>% dplyr::filter(chr %in% c("chr2","chr12")),
                        aes(r0 = 0.95,r1 = 1,fill = stain),
                        keep.all.chrom = T,
                        color = NA,chrom_data = chrom_hg19) +
  scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
                               "gpos100" = "black","gvar" = "black","acen" = "red",
                               "stalk" = "blue"))
图片

3geom_trackgenomiclink

geom_trackgenomiclink 用来展示不同区域直接的联系。至少需要 4 列信息,也就是两个位点的坐标信息。如果是单个位点的话,绘制的就是线,否则就是带状形式。

准备数据:

set.seed(123)
bed1 = generateRandomBed(nr = 100)
bed1 = bed1[sample(nrow(bed1), 20), ]
bed2 = generateRandomBed(nr = 100)
bed2 = bed2[sample(nrow(bed2), 20), ]

bed_link <- cbind(bed1[,1:3],bed2[,1:3])
colnames(bed_link) <- c("chr1","start1","end1","chr2","start2","end2")
bed_link$value <- rnorm(nrow(bed_link))
bed_link$group <- sample(LETTERS[1:6],nrow(bed_link),replace = T)

# check
head(bed_link,3)
#     chr1    start1      end1  chr2    start2      end2      value group
# 41  chr6 102324459 147617643 chr16  59796357  69153026 -0.2506477     A
# 89 chr17  65167455  77619820  chrX  46532875  81569809  1.5202752     B
# 63 chr11  13366995  32331617  chr5 172937687 180577320 -2.0932565     B

bed_c <- rbind(bed1,bed2)

# check
head(bed_c,3)
#      chr     start       end     value1
# 41  chr6 102324459 147617643 -0.5041883
# 89 chr17  65167455  77619820 -0.1026496
# 63 chr11  13366995  32331617  0.4248215

单个位点:

ggcirclize(bed_link,aes(end = 360,
                        chr0 = chr1,gstart0 = start1,
                        chr1 = chr2,gstart1 = start2)) +
  geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19)
图片

或者都是 end:

ggcirclize(bed_link,aes(end = 360,
                        chr0 = chr1,gend0 = end1,
                        chr1 = chr2,gend1 = end2)) +
  geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19)
图片

添加箭头:

ggcirclize(bed_link,aes(end = 360,color = group,fill = group,
                        chr0 = chr1,gstart0 = start1,
                        chr1 = chr2,gstart1 = start2)) +
  geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19,
                        curve.arrow = arrow(type = "closed",end = "both"))
图片

区间连接:

ggcirclize(bed_link,aes(end = 360,
                        chr0 = chr1,gstart0 = start1,gend0 = end1,
                        chr1 = chr2,gstart1 = start2,gend1 = end2)) +
  geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19)
图片

保留所有染色体,可以看到有些染色体是没有连接的:

ggcirclize(bed_link,aes(end = 360,
                        chr0 = chr1,gstart0 = start1,gend0 = end1,
                        chr1 = chr2,gstart1 = start2,gend1 = end2)) +
  geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19,
                        keep.all.chrom = T)
图片

映射颜色:

ggcirclize(bed_link,aes(end = 360,
                        chr0 = chr1,gstart0 = start1,gend0 = end1,
                        chr1 = chr2,gstart1 = start2,gend1 = end2)) +
  geom_trackgenomiclink(aes(r = 1,fill = group),
                        chrom_data = chrom_hg19,
                        keep.all.chrom = T)
图片

添加箭头:

ggcirclize(bed_link,aes(end = 360,
                        chr0 = chr1,gstart0 = start1,gend0 = end1,
                        chr1 = chr2,gstart1 = start2,gend1 = end2)) +
  geom_trackgenomiclink(aes(r = 1,fill = group),
                        chrom_data = chrom_hg19,
                        keep.all.chrom = T,
                        start.arrow = T,end.arrow = T)
图片

映射连续变量:

ggcirclize(bed_link,aes(end = 360,
                        chr0 = chr1,gstart0 = start1,gend0 = end1,
                        chr1 = chr2,gstart1 = start2,gend1 = end2)) +
  geom_trackgenomiclink(aes(r = 1,fill = value),
                        chrom_data = chrom_hg19,
                        keep.all.chrom = T,
                        start.arrow = T,end.arrow = T)
图片

修改线条:

ggcirclize(bed_link,aes(end = 360,
                        chr0 = chr1,gstart0 = start1,gend0 = end1,
                        chr1 = chr2,gstart1 = start2,gend1 = end2)) +
  geom_trackgenomiclink(aes(r = 1),fill = "grey90",
                        chrom_data = chrom_hg19,
                        keep.all.chrom = T,
                        start.arrow = T,end.arrow = T,
                        linetype = "dashed",linewidth = 1.5)
图片

组合一下:

ggcirclize(bed_link,aes(end = 360)) +
  geom_trackgenomicrect(data = cytoband_hg19,
                        aes(r0 = 0.95,r1 = 1,
                            chr = chr,gstart = start,gend = end,
                            fill = stain),
                        keep.all.chrom = T,
                        color = NA,chrom_data = chrom_hg19) +
  scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
                               "gpos100" = "black","gvar" = "black","acen" = "red",
                               "stalk" = "blue")) +
  ggnewscale::new_scale_fill() +
  geom_trackgenomiclink(aes(chr0 = chr1,gstart0 = start1,gend0 = end1,
                            chr1 = chr2,gstart1 = start2,gend1 = end2,
                            r = 0.5,fill = group),
                        strip.label = F,
                        keep.all.chrom = T,
                        chrom_data = chrom_hg19) +
  geom_trackgenomicrect(data = bed_c,
                        aes(start = 0,r0 = 0.55,r1 = 0.75,
                            chr = chr, gstart = start, gend = end),
                        fill = "orange",strip.label = F,add.xaxis = F,
                        keep.all.chrom = T,
                        chrom_data = chrom_hg19) +
  geom_trackgenomicpoint(data = bed_c,
                         aes(start = 0,r0 = 0.75,r1 = 0.95,
                             chr = chr, gstart = start, gend = end,
                             value = value1,color = chr),
                         keep.all.chrom = T,strip.label = F,
                         add.xaxis = F,
                         chrom_data = chrom_hg19)
图片

4结尾

路漫漫其修远兮,吾将上下而求索。


欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群 (微信交流群需收取 20 元入群费用,一旦交费,拒不退还!(防止骗子和便于管理)) 。QQ 群可免费加入, 记得进群按格式修改备注哦。

老俊俊微信:

知识星球:

声明:文中观点不代表本站立场。本文传送门:https://eyangzhen.com/384692.html

(0)
联系我们
联系我们
分享本页
返回顶部