zl程序教程

您现在的位置是:首页 >  大数据

当前栏目

超大数据集处理 心机成纤维 heart fibroblast

数据 处理 超大
2023-09-14 09:09:45 时间

1. 不同模态的数据


getwd()
dir.create("D:/yll/heart_fibroblast/project/gse183852_second")
setwd("D:/yll/heart_fibroblast/project/gse183852_second")
getwd()
library(stringr)
load("D:/yll/heart_fibroblast/gse183852/GSE183852_DCM_Integrated.Robj")
DefaultAssay(RefMerge)
Assays(RefMerge)
RefMerge
head(RefMerge@meta.data)
table(RefMerge$Names)
table(RefMerge$condition)
library(Seurat)
######################################for-integration-data#########3
dir.create("D:/yll/heart_fibroblast/project/gse183852_second/integration")
setwd("D:/yll/heart_fibroblast/project/gse183852_second/integration/")
getwd()
DefaultAssay(RefMerge)
table(RefMerge$Names)
RefMerge@commands
dim(RefMerge@assays$SCT)
dim(RefMerge@assays$prediction.score.celltype.l1)
dim(RefMerge@assays$prediction.score.celltype.l2)
dim(RefMerge@assays$RNA)
#DimPlot(RefMerge,assay="prediction.score.celltype.l1" )
DimPlot(RefMerge,label = T,repel = T)
################marker genes
DefaultAssay(RefMerge)
table(RefMerge$Names)
Idents(RefMerge)=RefMerge$Names
library(dplyr)
library(tibble)


markers_for_fibroblast_compared_with_other_celltypes=FindMarkers(RefMerge,ident.1 = "Fibroblasts",logfc.threshold = 0.1,
                                                                 only.pos = T,min.pct = 0.01)
markers_for_fibroblast_compared_with_other_celltypes=markers_for_fibroblast_compared_with_other_celltypes %>% rownames_to_column(var="gene")
head(markers_for_fibroblast_compared_with_other_celltypes)
openxlsx::write.xlsx(markers_for_fibroblast_compared_with_other_celltypes,file = "markers_for_fibroblast_compared_with_other_celltypes.xlsx")
dim(markers_for_fibroblast_compared_with_other_celltypes)
#################plot
p=DimPlot(RefMerge,group.by = "Names",label = T,repel = T)
pdf("all_cell_types_umap.pdf",height = 6,width = 8)
print(p)
dev.off()

p=DimPlot(RefMerge,group.by = "Names",label = T,split.by = "condition",repel = T)
pdf("all_cell_types_umap_split.pdf",height = 6,width = 12)
print(p)
dev.off()

#####################gene plot###########
head(RefMerge@meta.data)
table(RefMerge$CondTech)

#FeaturePlot(All.merge,features = "HAS1")
VlnPlot(RefMerge,features = 'HAS1',group.by = "Names")

VlnPlot(RefMerge,features = 'DNM3OS',split.by = "condition",
        group.by = "Names")
library(ggplot2)
p=FeaturePlot(RefMerge,label = T,
              split.by = "condition",
              repel = T,features = "DNM3OS")& theme(legend.position = "right") 
pdf("Feature_dnm30s_umap_split.pdf",height = 6,width = 12)
print(p)
dev.off()

p=FeaturePlot(RefMerge,label = T,repel = T,features = "DNM3OS")
pdf("Feature_dnm30s_umap.pdf")
print(p)
dev.off()

#############################fibroblast _df_markers
dir.create("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib")
setwd("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib")

table(subset_data$seurat_clusters)
head(subset_data@meta.data)
table(subset_data$id)
table(subset_data$CondTech)
#提取fibro
if (T) {
  library(stringr)
  
  #step1 
  subset_data=subset(RefMerge,idents = "Fibroblasts")
  dim(subset_data)
  DefaultAssay(subset_data)
  gc()
  Idents(subset_data)=subset_data$id
  
  ##reference
  reference.sn=subset(subset_data,idents = 'reference')
  reference.sn=CreateSeuratObject(counts = reference.sn@assays$RNA@counts, project = "fib.sn")
  reference.sn$samples=str_replace_all(colnames(reference.sn),pattern ="[A|T|C|G]",replacement = "")
  Idents(reference.sn)=reference.sn$samples
  table(reference.sn$samples) %>%length()
  reference.sn$tech="sn"
  #DefaultAssay(reference.sn)="RNA"
  #reference.sn=SCTransform(reference.sn, verbose = FALSE)
  #reference.sn=RunPCA( reference.sn,verbose = FALSE)
  
  ##query
  subset_data.fib.sc=subset(subset_data,idents = "query")
  subset_data.fib.sc=CreateSeuratObject(counts = subset_data.fib.sc@assays$RNA@counts, project = "fib.sc")
  #DefaultAssay(subset_data.fib.sc)="RNA"
  subset_data.fib.sc$samples=str_replace_all(colnames(subset_data.fib.sc),pattern ="[A|T|C|G]",replacement = "")
  Idents(subset_data.fib.sc)=subset_data.fib.sc$samples
  table(subset_data.fib.sc$samples) %>%length()
  subset_data.fib.sc$tech="sc"  
  
  
  getwd()  
  
  dir.create("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/rpca")
  setwd("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/rpca")
  
  table(reference.sn$samples) %>%length()
  head(subset_data@meta.data)
  table(subset_data$CondTech,subset_data$condition)
  table(subset_data$tech)
  table(subset_data$condition)
  table(colnames(reference.sn) %in% 
          colnames(subset_data[,subset_data$condition=="DCM"]))
  reference.sn$condition=ifelse(colnames(reference.sn) %in% 
                                  colnames(subset_data[,subset_data$condition=="DCM"]),"DCM","Donor")
  table(reference.sn$condition)
  subset_data.fib.sc$condition=ifelse(colnames(subset_data.fib.sc) %in% 
                                        colnames(subset_data[,subset_data$condition=="DCM"]),"DCM","Donor")
  table(subset_data.fib.sc$condition)
  
  getwd()
  #https://satijalab.org/seurat/archive/v3.0/integration.html
  #save(subset_data.fib.sc,reference.sn,file = "D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/rpca/subset_fib.RData")
  load("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/rpca/subset_fib.RData")
  
  
  library(future)
  library(future.apply)
  plan("multiprocess", workers = 1)
  options(future.globals.maxSize = 999999900000 * 1024^2)
  
  hcabm40k=merge(reference.sn,subset_data.fib.sc)
  head(hcabm40k@meta.data)

  getwd()
  dir.create("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/reference_baset_sct")
  setwd("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/reference_baset_sct")
  #https://satijalab.org/seurat/archive/v3.0/integration.html  
  pbmc.list <- SplitObject(hcabm40k, split.by = "samples")
  
  for (i in names(pbmc.list)) {
    pbmc.list[[i]] <- SCTransform(pbmc.list[[i]], verbose = FALSE)
  }
  pbmc.features <- SelectIntegrationFeatures(object.list = pbmc.list, nfeatures = 3000)
  pbmc.list <- PrepSCTIntegration(object.list = pbmc.list, 
                                  anchor.features = pbmc.features)

    
  # This command returns dataset 5.  We can also specify multiple refs. (i.e. c(5,6)) 
  reference_dataset <- which(names(pbmc.list) %in% c('HDM6_-1', 'WM-LVD3_',
                                                      "HDM5_-1","WM-13-96_"))
  
  pbmc.anchors <- FindIntegrationAnchors(object.list = pbmc.list, normalization.method = "SCT", 
                                         anchor.features = pbmc.features, 
                                         reference = reference_dataset)
  
  names(pbmc.anchors)
  pbmc.anchors@object.list
  getwd()
save(pbmc.anchors,file = "D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/reference_baset_sct/anchors.rds")  
load()

#数据太大转到Linux服务器处理 "/home/data/t040413/heart_muscle"
fib.integrated <- IntegrateData(anchorset = pbmc.anchors, 
                                  normalization.method = "SCT")
  
  dim(fib.integrated)
  fib.integrated <- RunPCA(object = fib.integrated, verbose = FALSE)
  fib.integrated <- RunUMAP(object = fib.integrated, dims = 1:30)  
  save(fib.integrated,file = "fib.integrated.sct.rds")  

getwd()
  load("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/reference_baset_sct/fib.integrated.sct_PrepSCTFindMarkers.rds")
############################ fib diff
  subset_data=fib.integrated
  subset_data$group=subset_data$condition

  DefaultAssay(subset_data)
  Assays(subset_data)
  dim(subset_data@assays$RNA@counts)
  dim(subset_data@assays$SCT@counts)
  DefaultAssay(subset_data)="SCT"
  library(tibble)
      
  Idents(subset_data)=subset_data$group
  
  subset_data=PrepSCTFindMarkers(subset_data)
  markers_DCM_VS_Donor=FindMarkers(subset_data,ident.1 = "DCM",ident.2 = "Donor",
                                   min.pct = 0.01,logfc.threshold = 0.1)
  
  markers_DCM_VS_Donor=markers_DCM_VS_Donor %>%  rownames_to_column(var = "gene")
  openxlsx::write.xlsx(markers_DCM_VS_Donor,rownames=T,
                       file ="Fibro_integrated_markers_DCM_vs_Donor.xlsx" )
  dim(markers_DCM_VS_Donor)
  
  
  
  
  
  
  #########plot umap
  subset_data@meta.data %>%head()
  p=DimPlot(subset_data,repel = T)
  pdf("all_cell_types_umap.pdf")
  print(p)
  dev.off()
  
  p=DimPlot(subset_data,split.by = "group",repel = T)
  pdf("all_cell_types_umap_split.pdf",height = 6,width = 14)
  print(p)
  dev.off()
  
  
  
  
  #########plot gene
  p=FeaturePlot(subset_data,
                split.by = "group",
                repel = T,features = "DNM3OS")& theme(legend.position = "right") 
  pdf("FIB_Feature_dnm30s_umap_split.pdf",height = 6,width = 14)
  print(p)
  dev.off()
  
  p=FeaturePlot(subset_data,repel = T,features = "DNM3OS")
  pdf("FIB_Feature_dnm30s_umap.pdf")
  print(p)
  dev.off()
  
  
  

}  
  
    
  

2. 相同模态的数据

.libPaths(c("/home/data/refdir/Rlib/",  "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local/lib/R/library"))
library(Seurat)
library(dplyr)
library(tibble)
library(ggplot2)
library(dplyr)
library(tibble)
library(stringr)

load("./GSE183852_DCM_Integrated.Robj")




#DefaultAssay(RefMerge)="prediction.score.celltype.l2"
RefMerge$stim=str_replace_all(colnames(RefMerge),pattern ="[A|T|C|G]",replacement = "")
Idents(RefMerge)=RefMerge$stim
table(RefMerge$stim)


DefaultAssay(RefMerge)
table(RefMerge$Names)
table(RefMerge$tech)


Idents(RefMerge)=RefMerge$Names
Idents(RefMerge)=RefMerge$tech
All.merge=subset(RefMerge,idents = "SN")

DefaultAssay(All.merge)="SCT"
head(All.merge@meta.data)

#################################
All.merge=SCTransform(All.merge, verbose = FALSE)  %>% RunPCA(npcs = 50, verbose = FALSE)
library('harmony')
All.merge <- All.merge  %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(All.merge , 'harmony') 
#######################cluster
dims = 1:30
All.merge  <- All.merge  %>% 
  RunUMAP(reduction = "harmony", dims = dims) %>% 
  RunTSNE(reduction = "harmony", dims = dims) %>% 
  FindNeighbors(reduction = "harmony", dims = dims)


save(All.merge , file="All.merge .rds")
getwd()
list.files()

#setwd("D:/yll/heart_fibroblast/project/gse183852")
load("./All.merge .rds")
#################################################################################
library(Seurat)
head(All.merge@meta.data)
table(All.merge$condition)

All.merge$group=All.merge$condition

DimPlot(All.merge,group.by = "Names",label = T)
RefMerge@meta.data %>%head()
colnames(RefMerge@meta.data)

p=DimPlot(All.merge,split.by = "group")
pdf("two_groups_umap.pdf",width=10,height=5)
print(p)
dev.off()

p=DimPlot(All.merge,split.by = "group",label = T)
pdf("two_groups_umap_split.pdf",width=14,height=7)
print(p)
dev.off()

#########
All.merge@meta.data %>%head() %>%dim()
table(All.merge$Names)
Idents(All.merge)=All.merge$Names

markers_for_fibroblast_compared_with_other_celltypes=FindMarkers(All.merge,ident.1 = "Fibroblasts", logfc.threshold = 0.4,only.pos = T)
markers_for_fibroblast_compared_with_other_celltypes %>% rownames_to_column(var="gene")
openxlsx::write.xlsx(markers_for_fibroblast_compared_with_other_celltypes,file = "markers_for_fibroblast_compared_with_other_celltypes_sn.xlsx")
#table(grepl("nm3os",rownames(All.merge)))
#dim(All.merge)
#rownames(All.merge)
getwd()

p=FeaturePlot(All.merge,features = "DNM3OS",label = T)
pdf("4_Dnm3os_indifferent_celltyes_umap.pdf",width=5,height=5)
print(p)
dev.off()

p=VlnPlot(All.merge,features = "DNM3OS",group.by = "group")
pdf("4_Dnm3osin_different_celltypes_vlnplot.pdf",width=6,height=5)
print(p)
dev.off()

#linux中运行
##################################################Fibroblast##################
.libPaths(c("/home/data/refdir/Rlib/",  "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local/lib/R/library"))
library(Seurat)
library(dplyr)
library(tibble)
library(ggplot2)
library(dplyr)
library(tibble)
library(stringr)
load("./All.merge .rds")

Idents(All.merge)=All.merge$Names
subset_data=subset(All.merge,idents = "Fibroblasts")
dim(subset_data)



subset_data=SCTransform(subset_data, verbose = FALSE)  %>% RunPCA(npcs = 50, verbose = FALSE)
library('harmony')
library(dplyr)
library(reshape2)
library(tibble)
subset_data <- subset_data  %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(subset_data , 'harmony') 
#######################cluster
dims = 1:30
subset_data  <- subset_data  %>% 
  RunUMAP(reduction = "harmony", dims = dims) %>% 
  RunTSNE(reduction = "harmony", dims = dims) %>% 
  FindNeighbors(reduction = "harmony", dims = dims)
#table(subset_data$group)
getwd()

#############################fib differential markers

Idents(subset_data)=subset_data$group
markers_DCM_VS_Donor=FindMarkers(subset_data,ident.1 = "DCM",ident.2 = "Donor")
markers_DCM_VS_Donor=markers_DCM_VS_Donor %>%  rownames_to_column(var = "gene")
openxlsx::write.xlsx(markers_DCM_VS_Donor,rownames=T,
                     file ="Fibro_markers_DCM_vs_Donor_sn.xlsx" )
save(subset_data,file="fibroblast_sn.rds")


#########plot umap
subset_data@meta.data %>%head()
p=DimPlot(subset_data,repel = T)
pdf("all_cell_types_umap.pdf")
print(p)
dev.off()

p=DimPlot(subset_data,split.by = "group",repel = T)
pdf("all_cell_types_umap_split.pdf",height = 6,width = 14)
print(p)
dev.off()




#########plot gene
p=FeaturePlot(subset_data,
              split.by = "group",
              repel = T,features = "DNM3OS")& theme(legend.position = "right") 
pdf("FIB_Feature_dnm30s_umap_split.pdf",height = 6,width = 14)
print(p)
dev.off()

p=FeaturePlot(subset_data,repel = T,features = "DNM3OS")
pdf("FIB_Feature_dnm30s_umap.pdf")
print(p)
dev.off()