论文
A high-quality genome compendium of the human gut microbiome of Inner Mongolians
2023Naturemicrobiology--Ahigh-qualitygenomecompendiumofthehumangutmicrobiomeofInnerMongolians4.pdf
论文中大部分作图数据都有,争取把论文中的图都复现一下
今天的推文我们试着复现一下论文中的Figure2b
今天推文的主要知识点就是四个柱子,每个柱子单独配色,然后4个图例如何调整四个图例的前后顺序
数据的部分截图
读取数据
library(readxl)
library(tidyverse)
df<-read_excel("data/20230305/41564_2022_1270_MOESM5_ESM.xlsx",
sheet = "Fig2b")
head(df)
每一列单独统计频率
df %>%
select(Phylum) %>%
mutate(Phylum=str_replace(Phylum,"p__","")) %>%
group_by(Phylum) %>%
summarise(phylum_counts=n()) %>%
ungroup() %>%
mutate(group01=case_when(
Phylum == "Actinobacteriota" ~ "Actinobacteriota",
Phylum == "Firmicutes_A" ~ "Firmicutes_A",
Phylum == "Bacteroidota" ~ "Bacteroidota",
Phylum == "Firmicutes" ~ "Firmicutes",
Phylum == "Proteobacteria" ~ "Proteobacteria",
TRUE ~ "Others"
)) %>%
group_by(group01) %>%
summarise(value=sum(phylum_counts)) %>%
ungroup() %>%
mutate(group01=factor(group01,
levels = c("Others","Proteobacteria",
"Firmicutes","Bacteroidota",
"Firmicutes_A",
"Actinobacteriota"))) -> df01
df01
以上代码需要单独运行四次
这里统计的和论文中的内容有些出入,暂时搞不清楚问题出在哪里
df %>%
select(Class) %>%
mutate(Class=str_replace(Class,"c__","")) %>%
group_by(Class) %>%
summarise(class_counts=n()) %>%
ungroup() %>%
mutate(group02=case_when(
Class == "Negativicutes" ~ "Negativicutes",
Class == "Clostridia" ~ "Clostridia",
Class == "Bacteroidia" ~ "Bacteroidia",
Class == "Bacilli" ~ "Bacilli",
Class == "Gammaproteobacteria" ~ "Gammaproteobacteria",
TRUE ~ "Others"
)) %>%
group_by(group02) %>%
summarise(value=sum(class_counts)) %>%
ungroup() %>%
mutate(group02=factor(group02,
levels = c("Others","Gammaproteobacteria",
"Bacilli","Bacteroidia",
"Clostridia",
"Negativicutes"))) -> df02
df %>%
select(Order) %>%
mutate(Order=str_replace(Order,"o__","")) %>%
group_by(Order) %>%
summarise(order_counts=n()) %>%
ungroup() %>%
mutate(group03=case_when(
Order == "Lachnospirales" ~ "Lachnospirales",
Order == "Oscillospirales" ~ "Oscillospirales",
Order == "Bacteroidales" ~ "Bacteroidales",
Order == "Christensenellales" ~ "Christensenellales",
Order == "Lactobacillales" ~ "Lactobacillales",
TRUE ~ "Others"
)) %>%
group_by(group03) %>%
summarise(value=sum(order_counts)) %>%
ungroup() %>%
mutate(group03=factor(group03,
levels = c("Others","Lactobacillales",
"Christensenellales","Bacteroidales",
"Oscillospirales",
"Lachnospirales"))) -> df03
df %>%
select(Family) %>%
mutate(Family=str_replace(Family,"f__","")) %>%
group_by(Family) %>%
summarise(family_counts=n()) %>%
ungroup() %>%
mutate(group04=case_when(
Family == "Lachnospiraceae" ~ "Lachnospiraceae",
Family == "Oscillospiraceae" ~ "Oscillospiraceae",
Family == "Ruminococcaceae" ~ "Ruminococcaceae",
Family == "Acutalibacteraceae" ~ "Acutalibacteraceae",
Family == "Bacteroidaceae" ~ "Bacteroidaceae",
TRUE ~ "Others"
)) %>%
group_by(group04) %>%
summarise(value=sum(family_counts)) %>%
ungroup() %>%
mutate(group04=factor(group04,
levels = c("Others","Bacteroidaceae",
"Acutalibacteraceae",
"Ruminococcaceae",
"Oscillospiraceae",
"Lachnospiraceae"))) -> df04
作图代码
ggplot()+
geom_bar(data=df01,
aes(x=1,y=value,fill=group01),
stat="identity",position = "fill")+
scale_fill_manual(values = c("#827f88","#3288bd","#f36c44",
"#e4e569","#b9b9dd","#000000"),
breaks = rev(c("Others","Proteobacteria",
"Firmicutes","Bacteroidota",
"Firmicutes_A",
"Actinobacteriota")),
name="Phylum",
guide=guide_legend(order=1))+
ggnewscale::new_scale_fill()+
geom_bar(data=df02,
aes(x=2,y=value,fill=group02),
stat="identity",position = "fill")+
scale_fill_manual(values = c("#7ed0de","#5f50a1","#add8a4",
"#fddf8a","#8a95ab","#b57c82"),
breaks = rev(c("Others","Gammaproteobacteria",
"Bacilli","Bacteroidia",
"Clostridia",
"Negativicutes")),
name="Class",
guide=guide_legend(order=2))+
ggnewscale::new_scale_fill()+
geom_bar(data=df03,
aes(x=3,y=value,fill=group03),
stat="identity",position = "fill")+
scale_fill_manual(values = c("#134b5f","#9ba791","#eb9486",
"#adc0e3","#cc141d","#1d933a"),
breaks = rev(c("Others","Lactobacillales",
"Christensenellales","Bacteroidales",
"Oscillospirales",
"Lachnospirales")),
name="Order",
guide=guide_legend(order=3))+
ggnewscale::new_scale_fill()+
geom_bar(data=df04,
aes(x=4,y=value,fill=group04),
stat="identity",position = "fill")+
scale_fill_manual(values = c("#59a691","#505d75","#c9b014",
"#9d1b45","#ee8354","#bb7b53"),
breaks = rev(c("Others","Bacteroidaceae",
"Acutalibacteraceae",
"Ruminococcaceae",
"Oscillospiraceae",
"Lachnospiraceae")),
name="Family",
guide=guide_legend(order=4))+
scale_x_continuous(breaks = c(1,2,3,4),
label=c("Phylum","Class","Order","Family"))+
theme_bw()+
theme(panel.grid = element_blank(),
legend.key.size = unit(3,'mm'))+
labs(x=NULL,y="Proportion")
推文记录的是自己的学习笔记,很可能存在错误,请大家批判着看
示例数据和代码可以给推文打赏一元获取
声明:文中观点不代表本站立场。本文传送门:https://eyangzhen.com/57192.html