生物信息数据分析教程视频——17-多种算法评估肿瘤免疫细胞浸润水平
R基础:生信分析的R语言基础教程都在这里了,包括语法,绘图和数据分析。
生物信息数据分析教程视频——01-TCGA数据库RNAseq数据下载与整理
生物信息数据分析教程视频——02-TCGA数据库miRNA数据下载与整理
生物信息数据分析教程视频——03-有关TCGA数据库临床数据的问题
生物信息数据分析教程视频——04-TCGA数据库中SNV和CNV数据的下载
生物信息数据分析教程视频——05-TCGA数据库中甲基化数据的下载和整理
生物信息数据分析教程视频——06-GEO数据库中芯片数据的下载和整理
生物信息数据分析教程视频——07-TCGA数据库:基因的表达探索
生物信息数据分析教程视频——08-TCGA+GTEx数据库的数据整理
生物信息数据分析教程视频——09-TCGA+GTEx数据库联合表达分析
生物信息数据分析教程视频——10-TCGA数据库:miRNA的表达探索
生物信息数据分析教程视频——12-基因之间的相关性分析及可视化
生物信息数据分析教程视频——13-3种R包(DESeq2、edgeR和limma)进行RNAseq的差异表达分析与比较
生物信息数据分析教程视频——15-clusterProfiler包+ClueGO做富集分析
生物信息数据分析教程视频——16-单样本基因集富集分析(ssGSEA)用于肿瘤相关免疫细胞浸润水平评估 http://mpvideo.qpic.cn/0b2ezeadqaaanaafvdb5cjrvbsodhdeqaoaa.f10002.mp4?dis_k=434c024ce3cb08025f1257e30ff4b4a4&dis_t=1671187731&vid=wxv_2606760913223974912&format_id=10002&support_redirect=0&mmversion=false
参考视频: http://mpvideo.qpic.cn/0bf2nyabqaaa3iabiz7dnnqfa3wddbxaagaa.f10002.mp4?dis_k=dc98b18769e0ad4768ae32e8f35e4b2e&dis_t=1671187731&vid=wxv_1944485866878402561&format_id=10002&support_redirect=0&mmversion=false
实际应用:
estimate代码:
# https://mp.weixin.qq.com/s/X2kybql--KPgjUumWX4z9Q
# setwd("H:/MedBioInfoCloud/analysis/TCGA/new/conventionalAnalysis")
options(stringsAsFactors = F)
library(TCGAbiolinks)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(corrplot)
library(pheatmap)
library(estimate)
FilePath <- dir("H:/MedBioInfoCloud/analysis/TCGA/new/processedTCGAdata/TCGA-STAR_Exp/",
"STARdata.Rdata$",full.names = T)
opt <- "output/009-CellFractions/estimate/"
ifelse(dir.exists(opt),FALSE,dir.create(opt,recursive = T))
record_files <- dir(opt,"RecordInformation.txt$",full.names = T)
ifelse(length(record_files)>0,file.remove(record_files),
"There is no record file that can be removed")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/filterGeneTypeExpr.R")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/del_dup_sample.R")
###TCGA数据库中33中癌症类型
project <- getGDCprojects()$project_id
project <- project[grep("TCGA-",project)]
# proj = "TCGA-LUAD"
for(proj in project[1]){
message("===============================")
message(proj)
load(FilePath[grep(proj,FilePath)])#STARdata
tpm <- STARdata[["tpm"]]
tpm <- filterGeneTypeExpr(expr = tpm,
fil_col = "gene_type",
filter = FALSE)
##过滤不表达的基因
tpm <- tpm[apply(tpm,1,var) !=0,]
##正常组织样本ID
SamN <- TCGAquery_SampleTypes(barcode = colnames(tpm),
typesample = c("NT","NB","NBC","NEBV","NBM"))
##肿瘤组织样本ID
SamT <- setdiff(colnames(tpm),SamN)
###去除重复样本
tur_exp <- del_dup_sample(tpm[,SamT],col_rename = T)
# ###long2转换
# tur_exp <- log2(tur_exp + 1)
# dat=log2(edgeR::cpm(pd_mat)+1)
# scores = estimate_RNAseq(dat,pro)
diffgene2ent <- bitr(rownames(tur_exp),
fromType="SYMBOL", toType="ENTREZID",
OrgDb="org.Hs.eg.db")
tur_exp <- tur_exp[diffgene2ent$SYMBOL,]
rownames(tur_exp) <- diffgene2ent$ENTREZID
colnames(tur_exp) <- gsub("-","_",colnames(tur_exp))
head(tur_exp)[,1:3]
###输出表达数据,作为filterCommonGenes的输入
input.f <- paste0(opt,"sample_input.txt")
write.table(tur_exp,file = input.f,row.names = T,sep = "\t",quote = F)
output.f <- paste0(opt,"estimate_input.gct")##定义输出
filterCommonGenes(input.f= input.f, output.f=output.f, id="EntrezID")#过滤掉细胞中共有的基因
### 计算免疫浸润打分 ###
estimate_path <- paste0(opt,"sample_estimate.gct")
estimateScore(output.f, estimate_path, platform="illumina")#计算肿瘤微环境的评分
# ### 可视化 ###
# # 绘制单个样本
# output.dir <- paste0(opt,"figures/")##定义输出
# plotPurity(scores= estimate_path, samples=colnames(tur_exp)[1],
# platform="illumina",
# output.dir = opt)
# # 绘制多个样本
# plotPurity(scores= estimate_path,
# platform="illumina", output.dir = opt_path)
estimate_scores = read.table(estimate_path,skip = 2,header = T)
rownames(estimate_scores)=estimate_scores[,1]
estimate_scores= t(estimate_scores[,3:ncol(estimate_scores)]) %>% as.data.frame()
head(estimate_scores)
estimate_scores$Tumour_purity <- cos(0.6049872018+0.0001467884*estimate_scores$ESTIMATEScore)
save(estimate_scores,file = paste0(opt,proj,"-estimate.Rdata"))
file.remove(dir(opt,"(.gct)|(.txt)$",full.names = T))
}
###============老版本数据
message("===============================")
message(proj)
load("H:/data_analysis/TCGA/old/TCGA/RNASeqTPM/TCGA-LUAD-protein_coding-TPM.Rdata")
tpm <- filterGeneTypeExpr(expr = tpmdata,
fil_col = "gene_type",
filter = FALSE)
##过滤不表达的基因
tpm <- tpm[apply(tpm,1,var) !=0,]
##正常组织样本ID
SamN <- TCGAquery_SampleTypes(barcode = colnames(tpm),
typesample = c("NT","NB","NBC","NEBV","NBM"))
##肿瘤组织样本ID
SamT <- setdiff(colnames(tpm),SamN)
###去除重复样本
tur_exp <- del_dup_sample(tpm[,SamT],col_rename = T)
# ###long2转换
# tur_exp <- log2(tur_exp + 1)
diffgene2ent <- bitr(rownames(tur_exp),
fromType="SYMBOL", toType="ENTREZID",
OrgDb="org.Hs.eg.db")
tur_exp <- tur_exp[diffgene2ent$SYMBOL,]
rownames(tur_exp) <- diffgene2ent$ENTREZID
colnames(tur_exp) <- gsub("-","_",colnames(tur_exp))
head(tur_exp)[,1:3]
###输出表达数据,作为filterCommonGenes的输入
input.f <- paste0(opt,"sample_input.txt")
write.table(tur_exp,file = input.f,row.names = T,sep = "\t",quote = F)
output.f <- paste0(opt,"estimate_input.gct")##定义输出
filterCommonGenes(input.f= input.f, output.f=output.f, id="EntrezID")#过滤掉细胞中共有的基因
### 计算免疫浸润打分 ###
estimate_path <- paste0(opt,"sample_estimate.gct")
estimateScore(output.f, estimate_path, platform="illumina")#计算肿瘤微环境的评分
estimate_scores = read.table(estimate_path,skip = 2,header = T)
rownames(estimate_scores)=estimate_scores[,1]
estimate_scores= t(estimate_scores[,3:ncol(estimate_scores)]) %>% as.data.frame()
head(estimate_scores)
estimate_scores$Tumour_purity <- cos(0.6049872018+0.0001467884*estimate_scores$ESTIMATEScore)
save(estimate_scores,file = paste0(opt,proj,"-estimate.Rdata"))
file.remove(dir(opt,"(.gct)|(.txt)$",full.names = T))
CIBERSORTx代码:
# https://mp.weixin.qq.com/s/X2kybql--KPgjUumWX4z9Q
# setwd("H:/MedBioInfoCloud/analysis/TCGA/new/conventionalAnalysis")
options(stringsAsFactors = F)
library(TCGAbiolinks)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(pheatmap)
#https://rdrr.io/github/singha53/amritr/src/R/supportFunc_cibersort.R
# install.packages("devtools")
# devtools::install_github("BioInfoCloud/csCIBERSORTx")
library(csCIBERSORTx)
FilePath <- dir("H:/MedBioInfoCloud/analysis/TCGA/new/processedTCGAdata/TCGA-STAR_Exp/",
"STARdata.Rdata$",full.names = T)
opt <- "output/009-CellFractions/CIBERSORT/"
ifelse(dir.exists(opt),FALSE,dir.create(opt,recursive = T))
record_files <- dir(opt,"RecordInformation.txt$",full.names = T)
ifelse(length(record_files)>0,file.remove(record_files),
"There is no record file that can be removed")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/filterGeneTypeExpr.R")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/del_dup_sample.R")
###TCGA数据库中33中癌症类型
project <- getGDCprojects()$project_id
project <- project[grep("TCGA-",project)]
# proj = "TCGA-LUAD"
for(proj in project[2:33]){
message("===============================")
message(proj)
load(FilePath[grep(proj,FilePath)])#STARdata
tpm <- STARdata[["tpm"]]
tpm <- filterGeneTypeExpr(expr = tpm,
fil_col = "gene_type",
filter = FALSE)
##过滤不表达的基因
tpm <- tpm[apply(tpm,1,var) !=0,]
##正常组织样本ID
SamN <- TCGAquery_SampleTypes(barcode = colnames(tpm),
typesample = c("NT","NB","NBC","NEBV","NBM"))
##肿瘤组织样本ID
SamT <- setdiff(colnames(tpm),SamN)
###去除重复样本
tur_exp <- del_dup_sample(tpm[,SamT],col_rename = T)
# ###long2转换,该方法不需要log转化内部函数会有判断
# tur_exp <- log2(tur_exp + 1)
# head(tur_exp)[,1:3]
file <- paste0(opt,proj,"-CIBERSORT-Result.txt")
cibr <- CIBERSORT2(LM22_matrix = LM22,
mixture = tur_exp,
opt_path = file,
perm=100,
QN=TRUE)
# 提取CIBERSORT的打分结果
CIBERSORT_Score <- cibr[,1:(ncol(cibr)-3)]#因为绘图不需要最后三列,所以去掉最后三列,取22列
# 生成标准化函数normalize,构建一个function
normalize <- function(x){# (x-x的最小值)/(X的最大值-X的最小值)
return((x-min(x))/(max(x)-min(x)))}
# 使用normalize标准化CIBERSORT打分
CIBERSORT_Score <- normalize(CIBERSORT_Score)
# 将处理后CIBERSORT的打分写出到文件
save(CIBERSORT_Score, file = paste0(opt,proj,"-CIBERSORT-Result.Rdata"))
# # 构建热图注释数据框
# annotation <- data.frame(rownames(CIBERSORT_Score))
# colnames(annotation) <- "sample"#设定列名
# rownames(annotation) <- rownames(CIBERSORT_Score)#设定行名
# annotation_row <- cbind(annotation, cibr[,(ncol(cibr)-2):ncol(cibr)])#合并列,CIBERSORT_Result的最后三列
#
# # 绘制热图
# pheatmap(CIBERSORT_Score,
# show_colnames = T,
# cluster_rows = F,
# cluster_cols = F,#cluster_cols = F 对列不进行聚类
# annotation_row = annotation_row,#注释数据框放在行
# cellwidth=10,
# cellheight=40,
# fontsize=5,
# fontsize_row=5,
# scale="row",
# fontsize_col=3)
}
MCPcounter代码:
# https://mp.weixin.qq.com/s/X2kybql--KPgjUumWX4z9Q
# setwd("H:/MedBioInfoCloud/analysis/TCGA/new/conventionalAnalysis")
options(stringsAsFactors = F)
library(TCGAbiolinks)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(pheatmap)
# devtools::install_github('ebecht/MCPcounter',ref='master', subdir='Source')
library(MCPcounter)
library(preprocessCore)
# https://github.com/ebecht/MCPcounter
FilePath <- dir("H:/MedBioInfoCloud/analysis/TCGA/new/processedTCGAdata/TCGA-STAR_Exp/",
"STARdata.Rdata$",full.names = T)
opt <- "output/009-CellFractions/MCPcounter/"
ifelse(dir.exists(opt),FALSE,dir.create(opt,recursive = T))
record_files <- dir(opt,"RecordInformation.txt$",full.names = T)
ifelse(length(record_files)>0,file.remove(record_files),
"There is no record file that can be removed")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/filterGeneTypeExpr.R")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/del_dup_sample.R")
###TCGA数据库中33中癌症类型
project <- getGDCprojects()$project_id
project <- project[grep("TCGA-",project)]
# proj = "TCGA-LUAD"
# 读取探针注释结果
probesets = read.table(curl('http://raw.githubusercontent.com/ebecht/MCPcounter/master/Signatures/probesets.txt'),
sep='\t',stringsAsFactors=FALSE,colClasses='character')
# 读取基因名注释结果
genes = read.table(curl('http://raw.githubusercontent.com/ebecht/MCPcounter/master/Signatures/genes.txt'),
sep='\t',
stringsAsFactors=FALSE,
header=TRUE,
colClasses='character',
check.names=FALSE)
library(csGeneset)
# Probe annotation.
probesets <- annoMCPcounter[["probesets"]]
# gene annotation
genes <- annoMCPcounter[["genes"]]
?MCPcounter.estimate
for(proj in project[1]){
message("===============================")
message(proj)
load(FilePath[grep(proj,FilePath)])#STARdata
tpm <- STARdata[["tpm"]]
tpm <- filterGeneTypeExpr(expr = tpm,
fil_col = "gene_type",
filter = FALSE)
##过滤不表达的基因
tpm <- tpm[apply(tpm,1,var) !=0,]
##正常组织样本ID
SamN <- TCGAquery_SampleTypes(barcode = colnames(tpm),
typesample = c("NT","NB","NBC","NEBV","NBM"))
##肿瘤组织样本ID
SamT <- setdiff(colnames(tpm),SamN)
###去除重复样本
tur_exp <- del_dup_sample(tpm[,SamT],col_rename = T)
# ###long2转换
# tur_exp <- log2(tur_exp + 1)
#
head(tur_exp)[,1:3]
# 进行MCPcounter分析
MCPcounterScore <- MCPcounter.estimate(tur_exp, featuresType = "HUGO_symbols",#featuresType就是根据input来变化的
probesets = probesets,genes = genes)
# 生成标准化函数normalize,构建一个function
normalize <- function(x){# (x-x的最小值)/(X的最大值-X的最小值)
return((x-min(x))/(max(x)-min(x)))}
# 使用normalize函数进行标准化
MCPcounterScore <- normalize(data.matrix(MCPcounterScore))
# 将结果写出到文件
write.table(MCPcounterScore, file = "MCPcounterScore.txt",sep="\t", quote=F, col.names=T)
# 构建包含注释信息的数据框
annotation_col <- data.frame(colnames(MCPcounterScore))
colnames(annotation_col) <- "sample"
rownames(annotation_col) <- colnames(MCPcounterScore)
# 绘制热图
pheatmap(MCPcounterScore,show_colnames = F,cluster_rows = F,
cluster_cols = T,annotation_col = annotation_col,#annotation_col 注释放在列, cluster_cols = T对列也就是样本进行聚类
cellwidth=15,cellheight=15,fontsize=5,
filename = 'figures/MCPcounter-heatmap.tiff')
}
xCell代码:
# https://mp.weixin.qq.com/s/X2kybql--KPgjUumWX4z9Q
# setwd("H:/MedBioInfoCloud/analysis/TCGA/new/conventionalAnalysis")
options(stringsAsFactors = F)
library(TCGAbiolinks)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(pheatmap)
# devtools::install_github('dviraran/xCell')
library(xCell)
FilePath <- dir("H:/MedBioInfoCloud/analysis/TCGA/new/processedTCGAdata/TCGA-STAR_Exp/",
"STARdata.Rdata$",full.names = T)
opt <- "output/009-CellFractions/xCell/"
ifelse(dir.exists(opt),FALSE,dir.create(opt,recursive = T))
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/filterGeneTypeExpr.R")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/del_dup_sample.R")
###TCGA数据库中33中癌症类型
project <- getGDCprojects()$project_id
project <- project[grep("TCGA-",project)]
# proj = "TCGA-LUAD"
for(proj in project){
message("===============================")
message(proj)
load(FilePath[grep(proj,FilePath)])#STARdata
tpm <- STARdata[["tpm"]]
tpm <- filterGeneTypeExpr(expr = tpm,
fil_col = "gene_type",
filter = FALSE)
##过滤不表达的基因
tpm <- tpm[apply(tpm,1,var) !=0,]
##正常组织样本ID
SamN <- TCGAquery_SampleTypes(barcode = colnames(tpm),
typesample = c("NT","NB","NBC","NEBV","NBM"))
##肿瘤组织样本ID
SamT <- setdiff(colnames(tpm),SamN)
###去除重复样本
tur_exp <- del_dup_sample(tpm[,SamT],col_rename = T)
# 进行xCell分析
xCell <- xCellAnalysis(tur_exp)
dim(xCell)
rownames(xCell)
## 如果是RNA-seq数据,则
xCell_RNAseq <- xCellAnalysis(tur_exp,rnaseq = T)
dim(xCell_RNAseq)
# 将结果写出到文件
save(xCell_RNAseq, file = paste0(opt,proj,"-xCell.Rdata"),sep="\t", quote=F, col.names=T)
}
ImmuCellAI代码:
# https://mp.weixin.qq.com/s/X2kybql--KPgjUumWX4z9Q
# setwd("H:/MedBioInfoCloud/analysis/TCGA/new/conventionalAnalysis")
options(stringsAsFactors = F)
library(TCGAbiolinks)
# devtools::install_github("lydiaMyr/ImmuCellAI@main")
# https://github.com/lydiaMyr/ImmuCellAI
library(ImmuCellAI)
# https://github.com/lydiaMyr/ImmuCellAI-mouse
# devtools::install_github("lydiaMyr/ImmuCellAI-mouse@main")
data("train_data")
data("train_tag")
data("marker_exp")
data("marker_exp_T")
data("paper_marker")
data("train_data")
data("train_tag")
data("compensation_matrix")
data("immune_infiltate_marker")
FilePath <- dir("H:/MedBioInfoCloud/analysis/TCGA/new/processedTCGAdata/TCGA-STAR_Exp/",
"STARdata.Rdata$",full.names = T)
opt <- "output/009-CellFractions/ImmuCellAI/"
ifelse(dir.exists(opt),FALSE,dir.create(opt,recursive = T))
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/filterGeneTypeExpr.R")
source("H:/MedBioInfoCloud/analysis/TCGA/new/00-fun/del_dup_sample.R")
###TCGA数据库中33中癌症类型
project <- getGDCprojects()$project_id
project <- project[grep("TCGA-",project)]
# proj = "TCGA-LUAD"
for(proj in project){
message("===============================")
message(proj)
load(FilePath[grep(proj,FilePath)])#STARdata
tpm <- STARdata[["tpm"]]
tpm <- filterGeneTypeExpr(expr = tpm,
fil_col = "gene_type",
filter = FALSE)
##过滤不表达的基因
tpm <- tpm[apply(tpm,1,var) !=0,]
##正常组织样本ID
SamN <- TCGAquery_SampleTypes(barcode = colnames(tpm),
typesample = c("NT","NB","NBC","NEBV","NBM"))
##肿瘤组织样本ID
SamT <- setdiff(colnames(tpm),SamN)
###去除重复样本
tur_exp <- del_dup_sample(tpm[,SamT],col_rename = T)
dim(tur_exp)
# group <- data.frame(group = rep(c("High","low"),each = 258)) %>% t()
# colnames(group) <- colnames(tur_exp)
# tur_exp <- rbind(group,tur_exp)
#The prediction process of ImmuCellAI is updated, which simulated the flow cytometry process to predict cell type abundance by hierarchical strategy. All 24 cell types were divided into two layers, layer1:DC, B cell, Monocyte, Macrophage, NK, Neutrophil, CD4 T, CD8 T, NKT, Tgd; layer2:CD4 naive, CD8 naive, Tc, Tex, Tr1, nTreg, iTreg, Th1, Th2, Th17, Tfh, Tcm, Tem, MAIT.
#Parameters
#sample_expression: Sample expression profile in FPKM, TPM format by RNA-seq or log2-transformed signal by microarray.
#datatype: One of "rnaseq" and "microarray"
#group_tag: One of 0 and 1, if there is the need to perform the comparision between different groups. If the value is 1, users need to add a group tag row in the input epxression matrix to explain the group of each sample.
#response_tag: One of 0 and 1, if there is the need to predict the ICB response of each sample.
ImmuCellAIscorelist <- ImmuCellAI_new(tur_exp,"rnaseq",group_tag =0,0)
ImmuCellAIscore <- ImmuCellAIscorelist[["Sample_abundance"]]
#output
# The output of the function is a list with three variables including sample immune cell abundance, group comparison result and ICB response prediction result.
# 将结果写出到文件
save(ImmuCellAIscore, file = paste0(opt,proj,"-ImmuCellAI.Rdata"),sep="\t", quote=F, col.names=T)
}
相关文章
- 『C语言』getchar() & putchar() 〖input & output〗
- 效率工具推荐(第15期)
- 『C语言』系统日期&时间
- 效率工具推荐(第16期)
- 『C语言』题集 of ⑩
- 效率工具推荐(第17期)
- C4D Arnold阿诺德渲染器R21-R26/2023支持win mac M1
- 效率工具推荐(第18期)
- SAP ERP实施项目,到底是公司适应系统还是系统适应公司?
- 开发说这个需求没法实现,到底在说啥?
- 产品经理 | 什么是网关?
- 「B端」用户使用文档网站的几种实现方案
- 『C语言』深度走入取整 & 4种函数
- 那些适用于跨境电商的ERP系统
- SAP 异常现象之同一个IDoc可以被POST两次触发2张不同的物料凭证
- 『C语言』递归思想
- HLA基因泛癌分析发6分+SCI
- 【Proteus】梦开始的地方〔LED灯〕
- 肿瘤微环境分层模型如何再创9分+SCI
- 『单片机原理』程序存储器的结构