1引言
主要讲讲 geom_trackgenomiclabel 和 geom_trackgenomiclink。
2geom_trackgenomiclabel
geom_trackgenomiclabel 用来展示需要显示的基因名称等标签,适用于展示感兴趣的部分基因。标签绘制在对应染色体的 sector 里面。因此如果该染色体基因过多,则可能会重叠。
library(ggcirclize)
library(ggplot2)
library(circlize)
data("hg19_chrom_info")
chrom_hg19 <- hg19_chrom_info$chromsize
cytoband_hg19 <- hg19_chrom_info$cytoband
set.seed(111)
bed = generateRandomBed(nr = 50)
bed$label <- sample(paste0("gene ",1:55),55,replace = F)
# check
head(bed,3)
# chr start end value1 label
# 1 chr1 11430681 19926061 -0.47423780 gene 3
# 2 chr1 45779013 130923003 -0.24698111 gene 8
# 3 chr1 133984842 239572762 -0.08683706 gene 20
绘制标签:
ggcirclize(bed,aes(end = 360,r0 = 0.7,r1 = 0.7,
chr = chr,gstart = start,gend = end,
label = label)) +
geom_trackgenomiclabel(chrom_data = chrom_hg19)
修改连接线的长度:
ggcirclize(bed,aes(end = 360,r0 = 0.7,r1 = 0.7,
chr = chr,gstart = start,gend = end,
label = label)) +
geom_trackgenomiclabel(chrom_data = chrom_hg19,link_r = 0.05)
调整位置:
ggcirclize(bed,aes(end = 360,r0 = 0.7,r1 = 0.7,
chr = chr,gstart = start,gend = end,
label = label)) +
geom_trackgenomiclabel(chrom_data = chrom_hg19,
link_pos = "bottom",
strip.label.pos = "bottom")
修改连接线的颜色:
ggcirclize(bed,aes(end = 360,r0 = 0.7,r1 = 0.7,
chr = chr,gstart = start,gend = end,
label = label)) +
geom_trackgenomiclabel(chrom_data = chrom_hg19,
link_col = circlize::rand_color(24))
修改标签颜色:
ggcirclize(bed,aes(end = 360,r0 = 0.9,r1 = 0.9,
chr = chr,gstart = start,gend = end,
label = label)) +
geom_trackgenomiclabel(chrom_data = chrom_hg19,
link_col = circlize::rand_color(24),
label.col = circlize::rand_color(24))
添加染色体:
ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
chr = chr,gstart = start,gend = end)) +
geom_trackgenomiclabel(aes(label = label),
chrom_data = chrom_hg19,
strip.label = F,
link_col = circlize::rand_color(24),
label.col = circlize::rand_color(24)) +
geom_trackgenomicrect(data = cytoband_hg19,
aes(r0 = 0.95,r1 = 1,fill = stain),
color = NA,chrom_data = chrom_hg19) +
scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
"gpos100" = "black","gvar" = "black","acen" = "red",
"stalk" = "blue"))
这里举例一个情况就是如果一个染色体需要标记的标签过多,我们看看 geom_trackgenomiclabel 会有什么表现:
st <- sample(seq(1,100000,500),10,replace = F)
bed1 <- data.frame(chr = rep("chr2",10),
start = st,end = st + 500,label = paste0("gene ",1:10))
bed2 <- data.frame(chr = rep("chr12",10),
start = st,end = st + 500,label = paste0("gene ",1:10))
bed <- rbind(bed1,bed2)
绘图:
ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
chr = chr,gstart = start,gend = end)) +
geom_trackgenomiclabel(aes(label = label),
chrom_data = chrom_hg19,
strip.label = F,
link_col = circlize::rand_color(24),
label.col = circlize::rand_color(24)) +
geom_trackgenomicrect(data = cytoband_hg19 %>% dplyr::filter(chr %in% c("chr2","chr12")),
aes(r0 = 0.95,r1 = 1,fill = stain),
color = NA,chrom_data = chrom_hg19) +
scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
"gpos100" = "black","gvar" = "black","acen" = "red",
"stalk" = "blue"))
默认 keep.all.chrom = FALSE, 即只显示数据里包含的染色体,假如我们设置显示所有染色体:
ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
chr = chr,gstart = start,gend = end)) +
geom_trackgenomiclabel(aes(label = label),
chrom_data = chrom_hg19,
strip.label = F,
link_col = circlize::rand_color(24),
label.col = circlize::rand_color(24),
keep.all.chrom = T) +
geom_trackgenomicrect(data = cytoband_hg19 %>% dplyr::filter(chr %in% c("chr2","chr12")),
aes(r0 = 0.95,r1 = 1,fill = stain),
color = NA,chrom_data = chrom_hg19,
keep.all.chrom = T) +
scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
"gpos100" = "black","gvar" = "black","acen" = "red",
"stalk" = "blue"))
这样你就会发现基因名重叠了。
针对这种情况你可以使用 geom_trackgenomiclabel2, 它会利用整个弧形空间进行调整文字位置。
ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
chr = chr,gstart = start,gend = end)) +
geom_trackgenomiclabel2(aes(label = label),
chrom_data = chrom_hg19,
strip.label = F) +
geom_trackgenomicrect(data = cytoband_hg19 %>% dplyr::filter(chr %in% c("chr2","chr12")),
aes(r0 = 0.95,r1 = 1,fill = stain),
color = NA,chrom_data = chrom_hg19) +
scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
"gpos100" = "black","gvar" = "black","acen" = "red",
"stalk" = "blue"))
包里所有染色体:
ggcirclize(bed,aes(end = 360,r0 = 0.8,r1 = 0.8,
chr = chr,gstart = start,gend = end)) +
geom_trackgenomiclabel2(aes(label = label),
chrom_data = chrom_hg19,
strip.label = F,
keep.all.chrom = T) +
geom_trackgenomicrect(data = cytoband_hg19 %>% dplyr::filter(chr %in% c("chr2","chr12")),
aes(r0 = 0.95,r1 = 1,fill = stain),
keep.all.chrom = T,
color = NA,chrom_data = chrom_hg19) +
scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
"gpos100" = "black","gvar" = "black","acen" = "red",
"stalk" = "blue"))
3geom_trackgenomiclink
geom_trackgenomiclink 用来展示不同区域直接的联系。至少需要 4 列信息,也就是两个位点的坐标信息。如果是单个位点的话,绘制的就是线,否则就是带状形式。
准备数据:
set.seed(123)
bed1 = generateRandomBed(nr = 100)
bed1 = bed1[sample(nrow(bed1), 20), ]
bed2 = generateRandomBed(nr = 100)
bed2 = bed2[sample(nrow(bed2), 20), ]
bed_link <- cbind(bed1[,1:3],bed2[,1:3])
colnames(bed_link) <- c("chr1","start1","end1","chr2","start2","end2")
bed_link$value <- rnorm(nrow(bed_link))
bed_link$group <- sample(LETTERS[1:6],nrow(bed_link),replace = T)
# check
head(bed_link,3)
# chr1 start1 end1 chr2 start2 end2 value group
# 41 chr6 102324459 147617643 chr16 59796357 69153026 -0.2506477 A
# 89 chr17 65167455 77619820 chrX 46532875 81569809 1.5202752 B
# 63 chr11 13366995 32331617 chr5 172937687 180577320 -2.0932565 B
bed_c <- rbind(bed1,bed2)
# check
head(bed_c,3)
# chr start end value1
# 41 chr6 102324459 147617643 -0.5041883
# 89 chr17 65167455 77619820 -0.1026496
# 63 chr11 13366995 32331617 0.4248215
单个位点:
ggcirclize(bed_link,aes(end = 360,
chr0 = chr1,gstart0 = start1,
chr1 = chr2,gstart1 = start2)) +
geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19)
或者都是 end:
ggcirclize(bed_link,aes(end = 360,
chr0 = chr1,gend0 = end1,
chr1 = chr2,gend1 = end2)) +
geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19)
添加箭头:
ggcirclize(bed_link,aes(end = 360,color = group,fill = group,
chr0 = chr1,gstart0 = start1,
chr1 = chr2,gstart1 = start2)) +
geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19,
curve.arrow = arrow(type = "closed",end = "both"))
区间连接:
ggcirclize(bed_link,aes(end = 360,
chr0 = chr1,gstart0 = start1,gend0 = end1,
chr1 = chr2,gstart1 = start2,gend1 = end2)) +
geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19)
保留所有染色体,可以看到有些染色体是没有连接的:
ggcirclize(bed_link,aes(end = 360,
chr0 = chr1,gstart0 = start1,gend0 = end1,
chr1 = chr2,gstart1 = start2,gend1 = end2)) +
geom_trackgenomiclink(aes(r = 1),chrom_data = chrom_hg19,
keep.all.chrom = T)
映射颜色:
ggcirclize(bed_link,aes(end = 360,
chr0 = chr1,gstart0 = start1,gend0 = end1,
chr1 = chr2,gstart1 = start2,gend1 = end2)) +
geom_trackgenomiclink(aes(r = 1,fill = group),
chrom_data = chrom_hg19,
keep.all.chrom = T)
添加箭头:
ggcirclize(bed_link,aes(end = 360,
chr0 = chr1,gstart0 = start1,gend0 = end1,
chr1 = chr2,gstart1 = start2,gend1 = end2)) +
geom_trackgenomiclink(aes(r = 1,fill = group),
chrom_data = chrom_hg19,
keep.all.chrom = T,
start.arrow = T,end.arrow = T)
映射连续变量:
ggcirclize(bed_link,aes(end = 360,
chr0 = chr1,gstart0 = start1,gend0 = end1,
chr1 = chr2,gstart1 = start2,gend1 = end2)) +
geom_trackgenomiclink(aes(r = 1,fill = value),
chrom_data = chrom_hg19,
keep.all.chrom = T,
start.arrow = T,end.arrow = T)
修改线条:
ggcirclize(bed_link,aes(end = 360,
chr0 = chr1,gstart0 = start1,gend0 = end1,
chr1 = chr2,gstart1 = start2,gend1 = end2)) +
geom_trackgenomiclink(aes(r = 1),fill = "grey90",
chrom_data = chrom_hg19,
keep.all.chrom = T,
start.arrow = T,end.arrow = T,
linetype = "dashed",linewidth = 1.5)
组合一下:
ggcirclize(bed_link,aes(end = 360)) +
geom_trackgenomicrect(data = cytoband_hg19,
aes(r0 = 0.95,r1 = 1,
chr = chr,gstart = start,gend = end,
fill = stain),
keep.all.chrom = T,
color = NA,chrom_data = chrom_hg19) +
scale_fill_manual(values = c("gneg" = "white","gpos25" = "grey75","gpos50" = "grey50",
"gpos100" = "black","gvar" = "black","acen" = "red",
"stalk" = "blue")) +
ggnewscale::new_scale_fill() +
geom_trackgenomiclink(aes(chr0 = chr1,gstart0 = start1,gend0 = end1,
chr1 = chr2,gstart1 = start2,gend1 = end2,
r = 0.5,fill = group),
strip.label = F,
keep.all.chrom = T,
chrom_data = chrom_hg19) +
geom_trackgenomicrect(data = bed_c,
aes(start = 0,r0 = 0.55,r1 = 0.75,
chr = chr, gstart = start, gend = end),
fill = "orange",strip.label = F,add.xaxis = F,
keep.all.chrom = T,
chrom_data = chrom_hg19) +
geom_trackgenomicpoint(data = bed_c,
aes(start = 0,r0 = 0.75,r1 = 0.95,
chr = chr, gstart = start, gend = end,
value = value1,color = chr),
keep.all.chrom = T,strip.label = F,
add.xaxis = F,
chrom_data = chrom_hg19)
4结尾
路漫漫其修远兮,吾将上下而求索。
欢迎加入生信交流群。加我微信我也拉你进 微信群聊 老俊俊生信交流群 (微信交流群需收取 20 元入群费用,一旦交费,拒不退还!(防止骗子和便于管理)) 。QQ 群可免费加入, 记得进群按格式修改备注哦。
老俊俊微信:
知识星球:
声明:文中观点不代表本站立场。本文传送门:https://eyangzhen.com/384692.html