zl程序教程

您现在的位置是:首页 >  数据库

当前栏目

跟着Nature学数据分析:R语言iNEXT包估计物种数并使用ggplot2作图展示结果

2023-02-19 12:27:40 时间

论文

Environmental factors shaping the gut microbiome in a Dutch population

https://www.nature.com/articles/s41586-022-04567-7

数据和代码下载链接

https://github.com/GRONINGEN-MICROBIOME-CENTRE/DMP

论文中提供的是模拟数据集

这个分析的具体原理暂时还看不明白,当前只能试着把代码跑通

输入数据集部分截图

image.png

读取数据集

inDFmeta <- read.table('Mock_data/taxa.txt')
inDF <- inDFmeta

对数据集进行过滤

他这里自定义了一个函数,很长很长,这里把他自定义的函数准备到一个文件里,然后加载

source("filterMetaGenomeDF.R")

对数据集过滤

inDFmm <- filterMetaGenomeDF(inDF,
                             presPerc = -1, 
                             minMedRelAb = -1, 
                             minMRelAb = -1, 
                             keepDomains = "All",
                             keepLevels = c("S","G","F","O","C","P","K"))
dag3S <- filterMetaGenomeDF(inDFmm,keepLevels = "S",presPerc = -1,minMRelAb = 0.0,minMedRelAb = -1)

这个是物种水平的操作

对数据集进行操作

dag3S.t <- t.data.frame(dag3S)
dag3S.t.pa <- dag3S.t
dag3S.t.pa[dag3S.t.pa > 0] <- 1
dag3S.t.pa.rs <- rowSums(dag3S.t.pa)

使用iNEXT包进行计算

iNEXT包的帮助文档 https://cran.r-project.org/web/packages/iNEXT/vignettes/Introduction.html

#install.packages("iNEXT")
library(iNEXT)
D_abund <- iNEXT(dag3S.t.pa.rs, 
                 datatype = 'abundance',
                 knots = 250,
                 endpoint = sum(dag3S.t.pa.rs)*1.25)
D_abund$DataInfo$n <- 2000
D_abund$iNextEst$m <- D_abund$iNextEst$m/sum(dag3S.t.pa.rs)*2000

作图代码

library(ggplot2)
gg.s <- ggiNEXT(D_abund, 
              type=1, 
              se=TRUE, 
              facet.var="none", 
              color.var="site", 
              grey=FALSE) + 
  theme_classic() + 
  ylab("Number of Species") + 
  xlab("Sample size") + 
  theme(text = element_text(size = 18))
print(gg.s)

image.png

属水平的操作

dag3G <- filterMetaGenomeDF(inDFmm,keepLevels = "G",presPerc = -1,minMRelAb = 0.0000,minMedRelAb = -1)
dag3G.t <- t.data.frame(dag3G)
dag3G.t.pa <- dag3G.t
dag3G.t.pa[dag3G.t.pa > 0] <- 1
dag3G.t.pa.rs <- rowSums(dag3G.t.pa)
D_abundG <- iNEXT (dag3G.t.pa.rs, datatype = 'abundance',knots = 250,endpoint = sum(dag3G.t.pa.rs)*1.25)
D_abundG$DataInfo$n <- 2000
D_abundG$iNextEst$m <- D_abundG$iNextEst$m/sum(dag3G.t.pa.rs)*2000

gg.g <- ggiNEXT(D_abundG, 
              type=1, 
              se=TRUE, 
              facet.var="none", 
              color.var="site", 
              grey=FALSE) + 
  theme_classic() + ylab("Number of Genera") + 
  xlab("Sample size") + theme(text = element_text(size = 18))
print(gg.g)

image.png

把两个图拼接到一起

gg.s + 
  theme(legend.position = c(0.8,0.2))+
  scale_color_manual(values = "red")+
  guides(color="none",shape="none",fill="none") -> p1

gg.g + 
  theme(legend.position = c(0.8,0.2))+
  scale_color_manual(values = "darkgreen")+
  guides(color="none",shape="none",fill="none") -> p2

library(patchwork)

p1+p2 +
  plot_annotation(tag_levels = "a",tag_suffix = ".")

image.png