您现在的位置是：首页 > 数据库

当前栏目

单细胞水平看小鼠胰腺导管腺癌进展中的细胞异质性

数据

2023-04-18 14:38:59 时间

下面11月份学徒的投稿

文章标题：《Cellular heterogeneity during mouse pancreatic ductal adenocarcinoma progression at single-cell resolution》

文章链接：https://insight.jci.org/articles/view/129212

文章概述：

胰腺导管腺癌 (PDA) 是癌症相关死亡的主要原因，可用的治疗选择有限。通过单细胞 RNA 测序技术在基因工程小鼠模型的 PDA 进展的不同阶段对细胞异质性进行分析。发现**除了具有增加炎症特征的不同巨噬细胞群外，癌细胞的上皮向间充质转变伴随着肿瘤进展；**正常小鼠胰腺中存在三种不同的成纤维细胞分子亚型，最终在晚期 PDA 中产生两种不同的成纤维细胞群；癌细胞和成纤维细胞可能受到表观遗传机制的动态调节。

文章复现：

此次复现的图为文章Figure 1 的B-E图，是3个单细胞样品各自独立的降维聚类分群并且进行生物学命名：

image-20211104152226887

可以看出文章中的是将Normal，early-KIC，late-KIC分别进行降维聚类分群及细胞亚群注释。接下来进行文章复现：

数据概览：

文章中处理的数据集是GSE125588，其中包括5个样本：

（1）数据读取质控过滤：

数据读取之前需要将每个样本的标准10X文件整理成如下格式：

image-20211105174422303

# 加载R包 -------------------------------------------------------------------
rm(list = ls())
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(clustree)) 
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr)) 
suppressPackageStartupMessages(library(data.table)) 
suppressPackageStartupMessages(library(stringr)) 

# 加载数据 -------------------------------------------------------------------
dir = '../data/GSE125588/'
samples=list.files(dir)
samples
sceList = lapply(samples,function(pro){ 
  # pro=samples[1]
  folder=file.path(dir,pro) 
  print(pro)
  print(folder)
  print(list.files(folder))
  sce=CreateSeuratObject(counts = Read10X(folder),
                         project =  pro )
  
  return(sce)
})
names(sceList)  
samples
names(sceList) =  samples
sce.all <- merge(sceList[[1]], y= sceList[ -1 ] ,
                 add.cell.ids=samples) 
# 查看数据
as.data.frame(sce.all@assays$RNA@counts[1:10, 1:2])
head(sce.all@meta.data, 10)
tail(sce.all@meta.data, 10)
dim(sce.all)
# [1] 27998 11636
table(sce.all$orig.ident)
# early.KIC  late.KIC  late.KPC late.KPfC    normal 
# 3534       938      1377      3143      2644 

# 添加分组信息
# sce.all$group <- ifelse(sce.all$orig.ident %in% c(),"a组","b组")
dir.create("../subdata")
save(sce.all,file = "../subdata/sce.all.Rdata")

# 质控和过滤 -------------------------------------------------------------------
# 1. 计算线粒体基因比例
# 人和鼠的基因名字稍微不一样 
mito_genes=rownames(sce.all)[grep("^mt-", rownames(sce.all))] 
mito_genes #13个线粒体基因
sce.all=PercentageFeatureSet(sce.all, "^mt-", col.name = "percent_mito")
fivenum(sce.all@meta.data$percent_mito)
# 2. 计算核糖体基因比例
ribo_genes=rownames(sce.all)[grep("^Rp[sl]", rownames(sce.all),ignore.case = T)]
ribo_genes
sce.all=PercentageFeatureSet(sce.all, "^Rp[sl]", col.name = "percent_ribo")
fivenum(sce.all@meta.data$percent_ribo)
# 3. 计算红血细胞基因比例
hb_genes <- rownames(sce.all)[grep("^Hb[^(p)]", rownames(sce.all),ignore.case = T)]
hb_genes
sce.all=PercentageFeatureSet(sce.all, "^Hb[^(P)]", col.name = "percent_hb")
fivenum(sce.all@meta.data$percent_hb)

# 过滤指标1:最少表达基因数的细胞&最少表达细胞数的基因
selected_c <- WhichCells(sce.all, expression = nFeature_RNA >300) # 每个细胞中基因表达>300
selected_f <- rownames(sce.all)[Matrix::rowSums(sce.all@assays$RNA@counts > 0 ) > 3] # 至少在3个细胞中表达 
sce.all.filt <- subset(sce.all, features = selected_f, cells = selected_c)
# 过滤指标2:线粒体/核糖体基因比例(根据上面的violin图)
selected_mito <- WhichCells(sce.all.filt, expression = percent_mito < 15)
selected_ribo <- WhichCells(sce.all.filt, expression = percent_ribo > 1)
selected_hb <- WhichCells(sce.all.filt, expression = percent_hb < 0.5)
sce.all.filt <- subset(sce.all.filt, cells = selected_mito)
sce.all.filt <- subset(sce.all.filt, cells = selected_ribo)
sce.all.filt <- subset(sce.all.filt, cells = selected_hb)
dim(sce.all.filt)
# [1] 17935  9621
table(sce.all$orig.ident)
# early.KIC  late.KIC  late.KPC late.KPfC    normal 
# 3534       938      1377      3143      2644 
table(sce.all.filt$orig.ident)
# early.KIC  late.KIC  late.KPC late.KPfC    normal 
# 3379       765       917      2632      1928

(2)拆分细胞亚群：

sce.all.list <- SplitObject(sce, split.by = "orig.ident") # "orig.ident" 中包含了样本信息
sce.all.list 
names(sce.all.list)
for (i in names(sce.all.list)) {
  print(i)
  epi_mat = sce.all.list[[i]]@assays$RNA@counts
  epi_phe = sce.all.list[[i]]@meta.data
  sce=CreateSeuratObject(counts = epi_mat, 
                         meta.data = epi_phe )
  sce
  table(sce@meta.data$orig.ident) 
  save(sce,file = paste0("../subcluster/",i,'.Rdata'))
}

(3)进行降维聚类分群

# 循环进行降维聚类分群 --------------------------------------------------------------

names <- c("normal","early.KIC","late.KIC")
for (name in names) {
  # name <- "normal"
  print(name)
  # 降维聚类分析 ------------------------------------------------------------------
  load(paste0("../subcluster/",name,".Rdata"))
 
  #数据预处理
  sce <- NormalizeData(sce)
  sce = ScaleData(sce, 
                  features = rownames(sce),
                  # vars.to.regress = c("nFeature_RNA", "percent_mito")
  )  #加上vars.to.regress参数后运行so slow!!!
  # 降维处理
  sce <- FindVariableFeatures(sce, nfeatures = 3000)
  sce <- RunPCA(sce, npcs = 30)
  sce <- RunTSNE(sce, dims = 1:30)
  sce <- RunUMAP(sce, dims = 1:30)
  
    # CCA 聚类分析------------------------------------------------------------------
 
  seob <- sce
  seob <- FindNeighbors(seob, dims = 1:30)
  seob <- FindClusters(seob, graph.name = "RNA_snn", 
                         resolution = c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5,0.8,1),
                         algorithm = 1)
  # CCA分群数据----
  
  #聚类树可视化
  p2_tree <- clustree(seob@meta.data, prefix = "RNA_snn_res.") 
  p2_tree
  ggsave(p2_tree, filename = paste0("../picture/2_PCA_UMAP_TSNE/",name,"_cluster_tree_by_CCA.pdf"), width = 10, height = 8)
  
  #umap可视化
  cluster_umap <- plot_grid(ncol = 4, 
                   DimPlot(seob, reduction = "umap", group.by = "RNA_snn_res.0.01", label = T) & NoAxes(), 
                   DimPlot(seob, reduction = "umap", group.by = "RNA_snn_res.0.05", label = T)& NoAxes(),
                   DimPlot(seob, reduction = "umap", group.by = "RNA_snn_res.0.1", label = T) & NoAxes(),
                   DimPlot(seob, reduction = "umap", group.by = "RNA_snn_res.0.2", label = T)& NoAxes(),
                   DimPlot(seob, reduction = "umap", group.by = "RNA_snn_res.0.3", label = T)& NoAxes(),
                   DimPlot(seob, reduction = "umap", group.by = "RNA_snn_res.0.5", label = T) & NoAxes(),
                   DimPlot(seob, reduction = "umap", group.by = "RNA_snn_res.0.8", label = T) & NoAxes(), 
                   DimPlot(seob, reduction = "umap", group.by = "RNA_snn_res.1", label = T) & NoAxes()
  )
  ggsave(cluster_umap, filename = paste0("../picture/2_PCA_UMAP_TSNE/",name,"_cluster_umap_all_res_by_CCA.pdf"), width = 16, height = 8)
  
  cluster_tsne <- plot_grid(ncol = 4, 
                    DimPlot(seob, reduction = "tsne", group.by = "RNA_snn_res.0.01", label = T) & NoAxes(), 
                    DimPlot(seob, reduction = "tsne", group.by = "RNA_snn_res.0.05", label = T)& NoAxes(),
                    DimPlot(seob, reduction = "tsne", group.by = "RNA_snn_res.0.1", label = T) & NoAxes(),
                    DimPlot(seob, reduction = "tsne", group.by = "RNA_snn_res.0.2", label = T)& NoAxes(),
                    DimPlot(seob, reduction = "tsne", group.by = "RNA_snn_res.0.3", label = T)& NoAxes(),
                    DimPlot(seob, reduction = "tsne", group.by = "RNA_snn_res.0.5", label = T) & NoAxes(),
                    DimPlot(seob, reduction = "tsne", group.by = "RNA_snn_res.0.8", label = T) & NoAxes(), 
                    DimPlot(seob, reduction = "tsne", group.by = "RNA_snn_res.1", label = T) & NoAxes()
  )
  ggsave(cluster_tsne, 
         filename = paste0("../picture/2_PCA_UMAP_TSNE/",name,"_cluster_tsne_all_res_by_CCA.pdf"), 
         width = 16, height = 8)
  
  sce <- seob
  save(sce,file = paste0("../subdata/",name,"_after.Rdata"))
}

Normal

elay_KIC

Late-KIC

细胞亚群注释：

# 细胞亚群注释 Normal------------------------------------------------------------------
rm(list = ls())
load("../subdata/normal_after.Rdata")
p_umap <- DimPlot(sce,label = T)
p_umap
# paper -------------------------------------------------------------------
genes_to_check = c("Amy1","Amy2a2","Pyy","Sst","Sox9","Krt18","Col1a1","Col1a2","Cd14","Adgre1","Cd3d","Cd3e","Cd19","Cd79b")
# 该基因名称（大小写转换）----
genes_to_check=str_to_title(genes_to_check)
genes_to_check
p_all_markers <- DotPlot(sce, features = genes_to_check,
                         group.by = "RNA_snn_res.1",assay='RNA'  )  + coord_flip()
p_all_markers
ggsave(plot=p_all_markers, filename="../picture/Annotation/normal_check_all_marker_by_CCA_res_0.8.pdf")
p1 <- plot_grid(p_all_markers,p_umap,rel_widths = c(1.5,1))
p1
ggsave(p1,filename = '../picture/Annotation/normal_paper_markers_umap_by_CCA_res.0.8.pdf',units = "cm",width = 35,height = 18)
# 注释细胞亚群 ------------------------------------------------------------------
###### step6: 细胞类型注释 (根据marker gene) ######  
sce.all <- sce
celltype=data.frame(ClusterID=0:11,
                    celltype='NA')
celltype[celltype$ClusterID %in% c( 0,1,2,11 ),2]='Acinar cells'   
celltype[celltype$ClusterID %in% c( 9 ),2]='Islet & Ductal cell'
celltype[celltype$ClusterID %in% c( 3),2]='Fibroblasts_1'
celltype[celltype$ClusterID %in% c( 4),2]='Fibroblasts_2'  
celltype[celltype$ClusterID %in% c( 10 ),2]='Fibroblasts_3' 
celltype[celltype$ClusterID %in% c( 8 ),2]='Macrophages'
celltype[celltype$ClusterID %in% c( 6,7),2]='T cells' 
celltype[celltype$ClusterID %in% c( 5 ),2]='B cells'   
head(celltype)
celltype 
table(celltype$celltype)
sce.all@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  sce.all@meta.data[which(sce.all@meta.data$RNA_snn_res.1 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(sce.all@meta.data$celltype)
# 可视化 ---------------------------------------------------------------------
p1 <- DimPlot(sce.all,group.by = "celltype",label = T,reduction = "tsne",pt.size = 1.5)
ggsave(p1,filename = "../picture/Annotation/normal_tsne.pdf")
p2 <- VlnPlot(sce.all,group.by = "celltype",features = genes_to_check,stack = T,flip = T)
ggsave(p2,filename = "../picture/Annotation/normal_vlnplot.pdf")
sce.normal <- sce.all
save(sce.normal,p1,p2,file = "../picture/Annotation/figure.Rdata")

# 细胞亚群注释 early.KIC------------------------------------------------------------------
rm(list = ls())
load("../subdata/early.KIC_after.Rdata")
p_umap <- DimPlot(sce,label = T)
p_umap
# paper -------------------------------------------------------------------
genes_to_check = c("Amy1","Amy2a2","Pyy","Sst","Ins1","Ins2","Sox9","Krt18","Col1a1","Col1a2","Cd52","Cd14","Cdh5","Pecam1")
# 该基因名称（大小写转换）----
genes_to_check=str_to_title(genes_to_check)
genes_to_check
p_all_markers <- DotPlot(sce, features = genes_to_check,
                         group.by = "RNA_snn_res.1",assay='RNA'  )  + coord_flip()
p_all_markers
ggsave(plot=p_all_markers, filename="../picture/Annotation/early.KIC_check_all_marker_by_CCA_res_0.8.pdf")

p1 <- plot_grid(p_all_markers,p_umap,rel_widths = c(1.5,1))
p1
ggsave(p1,filename = '../picture/Annotation/early.KIC_paper_markers_umap_by_CCA_res.0.8.pdf',units = "cm",width = 35,height = 18)
# 注释细胞亚群 ------------------------------------------------------------------
###### step6: 细胞类型注释 (根据marker gene) ######  
sce.all <- sce
celltype=data.frame(ClusterID=0:15,
                    celltype='NA')
celltype[celltype$ClusterID %in% c( 4,15),2]='Acinar cells'   
celltype[celltype$ClusterID %in% c( 11,13),2]='Islet & Ductal cell'
celltype[celltype$ClusterID %in% c( 0,1,5),2]='Fibroblasts_1'
celltype[celltype$ClusterID %in% c( 8),2]='Fibroblasts_2'  
celltype[celltype$ClusterID %in% c( 14 ),2]='Fibroblasts_3' 
celltype[celltype$ClusterID %in% c( 3,7,6,9),2]='Macrophages'
celltype[celltype$ClusterID %in% c( 2),2]='Cancer cell' 
celltype[celltype$ClusterID %in% c( 12 ),2]='Beta cells'   
celltype[celltype$ClusterID %in% c( 10 ),2]='Endothelial cells'
head(celltype)
celltype 
table(celltype$celltype)
sce.all@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  sce.all@meta.data[which(sce.all@meta.data$RNA_snn_res.1 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(sce.all@meta.data$celltype)

# 可视化 ---------------------------------------------------------------------

p3 <- DimPlot(sce.all,group.by = "celltype",label = T,reduction = "tsne",pt.size = 1.5)
p3
ggsave(p3,filename = "../picture/Annotation/early.KIC_tsne.pdf")
p4 <- VlnPlot(sce.all,group.by = "celltype",features = genes_to_check,stack = T,flip = T)
ggsave(p4,filename = "../picture/Annotation/early.KIC_vlnPlot.pdf")
sce.early <- sce.all
save(sce.early,p3,p4,file = "../picture/Annotation/figure2.Rdata")
# 细胞亚群注释 late.KIC------------------------------------------------------------------
rm(list = ls())
load("../subdata/late.KIC_after.Rdata")
p_umap <- DimPlot(sce,label = T)
p_umap
# paper -------------------------------------------------------------------
genes_to_check = c("Sox9","Krt18","Col1a1","Col1a2","Adgre1","Cd3d","Cd14","Cd79b","Hba-a1","Hba-a2")
# 该基因名称（大小写转换）----
genes_to_check
p_all_markers <- DotPlot(sce, features = genes_to_check,
                         group.by = "RNA_snn_res.1",assay='RNA'  )  + coord_flip()
p_all_markers
ggsave(plot=p_all_markers, filename="../picture/Annotation/late.KIC_check_all_marker_by_CCA_res_0.8.pdf")

p1 <- plot_grid(p_all_markers,p_umap,rel_widths = c(1.5,1))
p1
ggsave(p1,filename = '../picture/Annotation/late.KIC_paper_markers_umap_by_CCA_res.0.8.pdf',units = "cm",width = 35,height = 18)

# 注释细胞亚群 ------------------------------------------------------------------
###### step6: 细胞类型注释 (根据marker gene) ######  
sce.all <- sce
celltype=data.frame(ClusterID=0:11,
                    celltype='NA')
celltype[celltype$ClusterID %in% c( 0,3,6 ),2]='Cancer cells-1'   
celltype[celltype$ClusterID %in% c( 11,13),2]='Cancer cells-2'
celltype[celltype$ClusterID %in% c( 7 ),2]='Fibroblasts_1'
celltype[celltype$ClusterID %in% c( 8 ),2]='Fibroblasts_3' 
celltype[celltype$ClusterID %in% c( 1,2,4,9,10),2]='Macrophages'
celltype[celltype$ClusterID %in% c( 5),2]='Lymphocytes' 
celltype[celltype$ClusterID %in% c( 12 ),2]='Red blood cells'   
head(celltype)
celltype 
table(celltype$celltype)
sce.all@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  sce.all@meta.data[which(sce.all@meta.data$RNA_snn_res.1 == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(sce.all@meta.data$celltype)

# 可视化 ---------------------------------------------------------------------

p5 <- DimPlot(sce.all,group.by = "celltype",label = T,reduction = "tsne",pt.size = 1.5)
p5
ggsave(p5,filename = "../picture/Annotation/late.KIC_tsne.pdf")
p6 <- VlnPlot(sce.all,group.by = "celltype",features = genes_to_check,stack = T,flip = T)
p6
ggsave(p6,filename = "../picture/Annotation/late.KIC_vlnPlot.pdf")
sce.late <- sce.all
save(sce.late,p5,p6,file = "../picture/Annotation/figure3.Rdata")

# 拼图 ----------------------------------------------------------------------
rm(list = ls())
load("../picture/Annotation/figure.Rdata")
load("../picture/Annotation/figure2.Rdata")
load("../picture/Annotation/figure3.Rdata")
ps <- p1/p2|p3/p4|p5/p6
ps
ggsave(ps,filename = "../picture/Annotation/umap_vlnplot.pdf")

至此，这篇文章的复现就完成了。

大家觉得我们的复现咋样呢？

虽然说上面的代码都是复制粘贴即可运行，但是如果要更好地完成上面的图表，通常是需要掌握5个R包，分别是: scater,monocle,Seurat,scran,M3Drop，需要熟练掌握它们的对象，：一些单细胞转录组R包的对象 而且分析流程也大同小异：

step1: 创建对象
step2: 质量控制
step3: 表达量的标准化和归一化
step4: 去除干扰因素(多个样本整合)
step5: 判断重要的基因
step6: 多种降维算法
step7: 可视化降维结果
step8: 多种聚类算法
step9: 聚类后找每个细胞亚群的标志基因
step10: 继续分类

猜你喜欢

MarkDown使用
从最简单的斐波那契数列来学习动态规划（JavaScript版本）
日本16岁编程少年，课余打造一款新冠感染追踪App
2020，企业该怎么生存？这五大技术趋势告诉你答案 | 埃森哲报告
idea相关配置及插件安装
linux tcpdump 抓包常用命令使用
IDEA 神器推荐: 一键帮你生成所需IDEA~
java 文件上传：MultipartFile 类型转换为file类型
wireshark 报文颜色
9个很棒的CSS边框技巧，对你的样式设计有重要帮助
wireshark 显示过滤表达式
rm删除文件空间就释放了吗？天真！
wireshark 抓包使用
2020年您应该知道的7种前端JavaScript趋势和工具
mybatis plus 获取新增实体的主键
通过 Feign 进行文件上传
为什么你会被限制登录网页版微信？
mybatis plus 更新值为null的字段
杭州女程序员自述：疫情之下被迫离职，仲裁说理被公司索赔百万
idea 配置 service 服务，多模块同时启动

zl程序教程

当前栏目

单细胞水平看小鼠胰腺导管腺癌进展中的细胞异质性

文章概述：

文章复现：

细胞亚群注释：

相关文章