zl程序教程

您现在的位置是:首页 >  其它

当前栏目

geo(三)

Geo
2023-06-13 09:16:58 时间

1.GO富集分析

rm(list = ls())  
load(file = 'step4output.Rdata')
library(clusterProfiler)
library(ggthemes)
library(org.Hs.eg.db)
library(dplyr)
library(ggplot2)
library(stringr)
library(enrichplot)

1)输入数据

gene_up = deg$ENTREZID[deg$change == 'up'] 
gene_down = deg$ENTREZID[deg$change == 'down'] 
gene_diff = c(gene_up,gene_down)

!如何避免运行限速步骤

# 初阶版本
if(F){
  a = 1 #假装是限速步骤
  #保存运行结果,下次运行到这里时直接加载结果
  save(a,file = "a.Rdata")
}
load("a.Rdata")
# 高阶版本
f = "a.Rdata"
if(!file.exists(f)){
  #只有f文件(a.Rdata)在工作目录下不存在时才运行,否则跳过这段代码
  a = 1 #假装是限速步骤
  print("bye")
  #保存运行结果,下次运行到这里时直接加载结果
  save(a,file = f)
}
load(f)
# 完美版本
q = "124"
f = paste0(q,"a.Rdata")
if(!file.exists(f)){
  #只有f文件(a.Rdata)在工作目录下不存在时才运行,否则跳过这段代码
  a = 1 #假装是限速步骤
  print("bye")
  #保存运行结果,下次运行到这里时直接加载结果
  save(a,file = f)
}
load(f)

2)富集

#以下步骤耗时很长,设置了存在即跳过
f = paste0(gse_number,"_GO.Rdata")
if(!file.exists(f)){
  ego <- enrichGO(gene = gene_diff,
                  OrgDb= org.Hs.eg.db,
                  ont = "ALL",
                  readable = TRUE) #直接将entrezid id 翻译为symbol id
  ego_BP <- enrichGO(gene = gene_diff,
                  OrgDb= org.Hs.eg.db,
                  ont = "BP",
                  readable = TRUE)
  #ont参数:One of "BP", "MF", and "CC" subontologies, or "ALL" for all three.
  save(ego,ego_BP,file = f)
}
load(f)

3)可视化

#条带图
barplot(ego)
barplot(ego, split = "ONTOLOGY", font.size = 10, 
        showCategory = 5) + 
  facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y") 

#气泡图
dotplot(ego)

dotplot(ego, split = "ONTOLOGY", font.size = 10, 
        showCategory = 5) + 
  facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y") 

#(3)展示top通路的共同基因,要放大看。

#gl 用于设置下图的颜色
gl = deg$logFC
names(gl)=deg$ENTREZID
#Gene-Concept Network,要放大看
cnetplot(ego,
         #layout = "star",
         color.params = list(foldChange = gl),
         showCategory = 3)

2.KEGG富集

可能会没有结果,因为数据库里的数据很少

# 2.KEGG pathway analysis----
#上调、下调、差异、所有基因
#(1)输入数据
gene_up = deg[deg$change == 'up','ENTREZID'] 
gene_down = deg[deg$change == 'down','ENTREZID'] 
gene_diff = c(gene_up,gene_down)

#(2)对上调/下调/所有差异基因进行富集分析
f2 = paste0(gse_number,"_KEGG.Rdata")
if(!file.exists(f2)){
  kk.up <- enrichKEGG(gene         = gene_up,
                      organism     = 'hsa')
  kk.down <- enrichKEGG(gene         =  gene_down,
                        organism     = 'hsa')
  kk.diff <- enrichKEGG(gene         = gene_diff,
                        organism     = 'hsa')
  save(kk.diff,kk.down,kk.up,file = f2)
}
load(f2)

#(3)看看富集到了吗?https://mp.weixin.qq.com/s/NglawJgVgrMJ0QfD-YRBQg
table(kk.diff@result$p.adjust<0.05)
table(kk.up@result$p.adjust<0.05)
table(kk.down@result$p.adjust<0.05)

富集不到的补救措施:

#(4)双向图
# 富集分析所有图表默认都是用p.adjust,富集不到可以退而求其次用p值,在文中说明即可
source("kegg_plot_function.R")
g_kegg <- kegg_plot(kk.up,kk.down)
g_kegg
#g_kegg +scale_y_continuous(labels = c(4,2,0,2,4,6))

3.辅助资料

# GSEA:https://www.yuque.com/docs/share/a67a180f-dd2b-4f6f-96c2-68a4b86fe862?#

# 富集分析学习更多:http://yulab-smu.top/clusterProfiler-book/index.html

# 弦图:https://www.yuque.com/xiaojiewanglezenmofenshen/dbwkg1/dgs65p

# GOplot:https://mp.weixin.qq.com/s/LonwdDhDn8iFUfxqSJ2Wew

# 网上的资料和宝藏无穷无尽,学好R语言慢慢发掘~

4.问题数据和常见错误分析

>代码和ppt来源于生信技能树