跟着Nature microbiology学作图:R语言ggplot2堆积柱形图柱子单独配色/多个图例排序

论文

A high-quality genome compendium of the human gut microbiome of Inner Mongolians

https://www.nature.com/articles/s41564-022-01270-1

2023Naturemicrobiology--Ahigh-qualitygenomecompendiumofthehumangutmicrobiomeofInnerMongolians4.pdf

论文中大部分作图数据都有,争取把论文中的图都复现一下

今天的推文我们试着复现一下论文中的Figure2b

图片
image.png

今天推文的主要知识点就是四个柱子,每个柱子单独配色,然后4个图例如何调整四个图例的前后顺序

数据的部分截图

图片
image.png

读取数据

library(readxl)
library(tidyverse)
df<-read_excel("data/20230305/41564_2022_1270_MOESM5_ESM.xlsx",
               sheet = "Fig2b")
head(df)

每一列单独统计频率

df %>% 
  select(Phylum) %>% 
  mutate(Phylum=str_replace(Phylum,"p__","")) %>% 
  group_by(Phylum) %>% 
  summarise(phylum_counts=n()) %>% 
  ungroup() %>% 
  mutate(group01=case_when(
    Phylum == "Actinobacteriota" ~ "Actinobacteriota",
    Phylum == "Firmicutes_A" ~ "Firmicutes_A",
    Phylum == "Bacteroidota" ~ "Bacteroidota",
    Phylum == "Firmicutes" ~ "Firmicutes",
    Phylum == "Proteobacteria" ~ "Proteobacteria",
    TRUE ~ "Others"
  )) %>%
  group_by(group01) %>% 
  summarise(value=sum(phylum_counts)) %>% 
  ungroup() %>%
  mutate(group01=factor(group01,
                      levels = c("Others","Proteobacteria",
                                 "Firmicutes","Bacteroidota",
                                 "Firmicutes_A",
                                 "Actinobacteriota"))) -> df01


df01
图片
image.png

以上代码需要单独运行四次

这里统计的和论文中的内容有些出入,暂时搞不清楚问题出在哪里

df %>% 
  select(Class) %>% 
  mutate(Class=str_replace(Class,"c__","")) %>% 
  group_by(Class) %>% 
  summarise(class_counts=n()) %>% 
  ungroup() %>% 
  mutate(group02=case_when(
    Class == "Negativicutes" ~ "Negativicutes",
    Class == "Clostridia" ~ "Clostridia",
    Class == "Bacteroidia" ~ "Bacteroidia",
    Class == "Bacilli" ~ "Bacilli",
    Class == "Gammaproteobacteria" ~ "Gammaproteobacteria",
    TRUE ~ "Others"
  )) %>%
  group_by(group02) %>% 
  summarise(value=sum(class_counts)) %>% 
  ungroup() %>% 
  mutate(group02=factor(group02,
                      levels = c("Others","Gammaproteobacteria",
                                 "Bacilli","Bacteroidia",
                                 "Clostridia",
                                 "Negativicutes"))) -> df02


df %>% 
  select(Order) %>% 
  mutate(Order=str_replace(Order,"o__","")) %>% 
  group_by(Order) %>% 
  summarise(order_counts=n()) %>% 
  ungroup() %>%
  mutate(group03=case_when(
    Order == "Lachnospirales" ~ "Lachnospirales",
    Order == "Oscillospirales" ~ "Oscillospirales",
    Order == "Bacteroidales" ~ "Bacteroidales",
    Order == "Christensenellales" ~ "Christensenellales",
    Order == "Lactobacillales" ~ "Lactobacillales",
    TRUE ~ "Others"
  )) %>%
  group_by(group03) %>% 
  summarise(value=sum(order_counts)) %>% 
  ungroup() %>% 
  mutate(group03=factor(group03,
                      levels = c("Others","Lactobacillales",
                                 "Christensenellales","Bacteroidales",
                                 "Oscillospirales",
                                 "Lachnospirales"))) -> df03


df %>% 
  select(Family) %>% 
  mutate(Family=str_replace(Family,"f__","")) %>% 
  group_by(Family) %>% 
  summarise(family_counts=n()) %>% 
  ungroup() %>%
  mutate(group04=case_when(
    Family == "Lachnospiraceae" ~ "Lachnospiraceae",
    Family == "Oscillospiraceae" ~ "Oscillospiraceae",
    Family == "Ruminococcaceae" ~ "Ruminococcaceae",
    Family == "Acutalibacteraceae" ~ "Acutalibacteraceae",
    Family == "Bacteroidaceae" ~ "Bacteroidaceae",
    TRUE ~ "Others"
  )) %>%
  group_by(group04) %>% 
  summarise(value=sum(family_counts)) %>% 
  ungroup() %>% 
  mutate(group04=factor(group04,
                      levels = c("Others","Bacteroidaceae",
                                 "Acutalibacteraceae",
                                 "Ruminococcaceae",
                                 "Oscillospiraceae",
                                 "Lachnospiraceae"))) -> df04

作图代码

ggplot()+
  geom_bar(data=df01,
           aes(x=1,y=value,fill=group01),
           stat="identity",position = "fill")+
  scale_fill_manual(values = c("#827f88","#3288bd","#f36c44",
                               "#e4e569","#b9b9dd","#000000"),
                    breaks = rev(c("Others","Proteobacteria",
                                   "Firmicutes","Bacteroidota",
                                   "Firmicutes_A",
                                   "Actinobacteriota")),
                    name="Phylum",
                    guide=guide_legend(order=1))+
  ggnewscale::new_scale_fill()+
  geom_bar(data=df02,
           aes(x=2,y=value,fill=group02),
           stat="identity",position = "fill")+
  scale_fill_manual(values = c("#7ed0de","#5f50a1","#add8a4",
                               "#fddf8a","#8a95ab","#b57c82"),
                    breaks = rev(c("Others","Gammaproteobacteria",
                                   "Bacilli","Bacteroidia",
                                   "Clostridia",
                                   "Negativicutes")),
                    name="Class",
                    guide=guide_legend(order=2))+
  ggnewscale::new_scale_fill()+
  geom_bar(data=df03,
           aes(x=3,y=value,fill=group03),
           stat="identity",position = "fill")+
  scale_fill_manual(values = c("#134b5f","#9ba791","#eb9486",
                               "#adc0e3","#cc141d","#1d933a"),
                    breaks = rev(c("Others","Lactobacillales",
                                   "Christensenellales","Bacteroidales",
                                   "Oscillospirales",
                                   "Lachnospirales")),
                    name="Order",
                    guide=guide_legend(order=3))+
  ggnewscale::new_scale_fill()+
  geom_bar(data=df04,
           aes(x=4,y=value,fill=group04),
           stat="identity",position = "fill")+
  scale_fill_manual(values = c("#59a691","#505d75","#c9b014",
                               "#9d1b45","#ee8354","#bb7b53"),
                    breaks = rev(c("Others","Bacteroidaceae",
                                   "Acutalibacteraceae",
                                   "Ruminococcaceae",
                                   "Oscillospiraceae",
                                   "Lachnospiraceae")),
                    name="Family",
                    guide=guide_legend(order=4))+
  scale_x_continuous(breaks = c(1,2,3,4),
                     label=c("Phylum","Class","Order","Family"))+
  theme_bw()+
  theme(panel.grid = element_blank(),
        legend.key.size = unit(3,'mm'))+
  labs(x=NULL,y="Proportion")
图片
image.png

推文记录的是自己的学习笔记,很可能存在错误,请大家批判着看

示例数据和代码可以给推文打赏一元获取

声明:文中观点不代表本站立场。本文传送门:https://eyangzhen.com/57192.html

联系我们
联系我们
分享本页
返回顶部