超大数据集处理 心机成纤维 heart fibroblast
数据 处理 超大
2023-09-14 09:09:45 时间
1. 不同模态的数据
getwd()
dir.create("D:/yll/heart_fibroblast/project/gse183852_second")
setwd("D:/yll/heart_fibroblast/project/gse183852_second")
getwd()
library(stringr)
load("D:/yll/heart_fibroblast/gse183852/GSE183852_DCM_Integrated.Robj")
DefaultAssay(RefMerge)
Assays(RefMerge)
RefMerge
head(RefMerge@meta.data)
table(RefMerge$Names)
table(RefMerge$condition)
library(Seurat)
######################################for-integration-data#########3
dir.create("D:/yll/heart_fibroblast/project/gse183852_second/integration")
setwd("D:/yll/heart_fibroblast/project/gse183852_second/integration/")
getwd()
DefaultAssay(RefMerge)
table(RefMerge$Names)
RefMerge@commands
dim(RefMerge@assays$SCT)
dim(RefMerge@assays$prediction.score.celltype.l1)
dim(RefMerge@assays$prediction.score.celltype.l2)
dim(RefMerge@assays$RNA)
#DimPlot(RefMerge,assay="prediction.score.celltype.l1" )
DimPlot(RefMerge,label = T,repel = T)
################marker genes
DefaultAssay(RefMerge)
table(RefMerge$Names)
Idents(RefMerge)=RefMerge$Names
library(dplyr)
library(tibble)
markers_for_fibroblast_compared_with_other_celltypes=FindMarkers(RefMerge,ident.1 = "Fibroblasts",logfc.threshold = 0.1,
only.pos = T,min.pct = 0.01)
markers_for_fibroblast_compared_with_other_celltypes=markers_for_fibroblast_compared_with_other_celltypes %>% rownames_to_column(var="gene")
head(markers_for_fibroblast_compared_with_other_celltypes)
openxlsx::write.xlsx(markers_for_fibroblast_compared_with_other_celltypes,file = "markers_for_fibroblast_compared_with_other_celltypes.xlsx")
dim(markers_for_fibroblast_compared_with_other_celltypes)
#################plot
p=DimPlot(RefMerge,group.by = "Names",label = T,repel = T)
pdf("all_cell_types_umap.pdf",height = 6,width = 8)
print(p)
dev.off()
p=DimPlot(RefMerge,group.by = "Names",label = T,split.by = "condition",repel = T)
pdf("all_cell_types_umap_split.pdf",height = 6,width = 12)
print(p)
dev.off()
#####################gene plot###########
head(RefMerge@meta.data)
table(RefMerge$CondTech)
#FeaturePlot(All.merge,features = "HAS1")
VlnPlot(RefMerge,features = 'HAS1',group.by = "Names")
VlnPlot(RefMerge,features = 'DNM3OS',split.by = "condition",
group.by = "Names")
library(ggplot2)
p=FeaturePlot(RefMerge,label = T,
split.by = "condition",
repel = T,features = "DNM3OS")& theme(legend.position = "right")
pdf("Feature_dnm30s_umap_split.pdf",height = 6,width = 12)
print(p)
dev.off()
p=FeaturePlot(RefMerge,label = T,repel = T,features = "DNM3OS")
pdf("Feature_dnm30s_umap.pdf")
print(p)
dev.off()
#############################fibroblast _df_markers
dir.create("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib")
setwd("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib")
table(subset_data$seurat_clusters)
head(subset_data@meta.data)
table(subset_data$id)
table(subset_data$CondTech)
#提取fibro
if (T) {
library(stringr)
#step1
subset_data=subset(RefMerge,idents = "Fibroblasts")
dim(subset_data)
DefaultAssay(subset_data)
gc()
Idents(subset_data)=subset_data$id
##reference
reference.sn=subset(subset_data,idents = 'reference')
reference.sn=CreateSeuratObject(counts = reference.sn@assays$RNA@counts, project = "fib.sn")
reference.sn$samples=str_replace_all(colnames(reference.sn),pattern ="[A|T|C|G]",replacement = "")
Idents(reference.sn)=reference.sn$samples
table(reference.sn$samples) %>%length()
reference.sn$tech="sn"
#DefaultAssay(reference.sn)="RNA"
#reference.sn=SCTransform(reference.sn, verbose = FALSE)
#reference.sn=RunPCA( reference.sn,verbose = FALSE)
##query
subset_data.fib.sc=subset(subset_data,idents = "query")
subset_data.fib.sc=CreateSeuratObject(counts = subset_data.fib.sc@assays$RNA@counts, project = "fib.sc")
#DefaultAssay(subset_data.fib.sc)="RNA"
subset_data.fib.sc$samples=str_replace_all(colnames(subset_data.fib.sc),pattern ="[A|T|C|G]",replacement = "")
Idents(subset_data.fib.sc)=subset_data.fib.sc$samples
table(subset_data.fib.sc$samples) %>%length()
subset_data.fib.sc$tech="sc"
getwd()
dir.create("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/rpca")
setwd("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/rpca")
table(reference.sn$samples) %>%length()
head(subset_data@meta.data)
table(subset_data$CondTech,subset_data$condition)
table(subset_data$tech)
table(subset_data$condition)
table(colnames(reference.sn) %in%
colnames(subset_data[,subset_data$condition=="DCM"]))
reference.sn$condition=ifelse(colnames(reference.sn) %in%
colnames(subset_data[,subset_data$condition=="DCM"]),"DCM","Donor")
table(reference.sn$condition)
subset_data.fib.sc$condition=ifelse(colnames(subset_data.fib.sc) %in%
colnames(subset_data[,subset_data$condition=="DCM"]),"DCM","Donor")
table(subset_data.fib.sc$condition)
getwd()
#https://satijalab.org/seurat/archive/v3.0/integration.html
#save(subset_data.fib.sc,reference.sn,file = "D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/rpca/subset_fib.RData")
load("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/rpca/subset_fib.RData")
library(future)
library(future.apply)
plan("multiprocess", workers = 1)
options(future.globals.maxSize = 999999900000 * 1024^2)
hcabm40k=merge(reference.sn,subset_data.fib.sc)
head(hcabm40k@meta.data)
getwd()
dir.create("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/reference_baset_sct")
setwd("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/reference_baset_sct")
#https://satijalab.org/seurat/archive/v3.0/integration.html
pbmc.list <- SplitObject(hcabm40k, split.by = "samples")
for (i in names(pbmc.list)) {
pbmc.list[[i]] <- SCTransform(pbmc.list[[i]], verbose = FALSE)
}
pbmc.features <- SelectIntegrationFeatures(object.list = pbmc.list, nfeatures = 3000)
pbmc.list <- PrepSCTIntegration(object.list = pbmc.list,
anchor.features = pbmc.features)
# This command returns dataset 5. We can also specify multiple refs. (i.e. c(5,6))
reference_dataset <- which(names(pbmc.list) %in% c('HDM6_-1', 'WM-LVD3_',
"HDM5_-1","WM-13-96_"))
pbmc.anchors <- FindIntegrationAnchors(object.list = pbmc.list, normalization.method = "SCT",
anchor.features = pbmc.features,
reference = reference_dataset)
names(pbmc.anchors)
pbmc.anchors@object.list
getwd()
save(pbmc.anchors,file = "D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/reference_baset_sct/anchors.rds")
load()
#数据太大转到Linux服务器处理 "/home/data/t040413/heart_muscle"
fib.integrated <- IntegrateData(anchorset = pbmc.anchors,
normalization.method = "SCT")
dim(fib.integrated)
fib.integrated <- RunPCA(object = fib.integrated, verbose = FALSE)
fib.integrated <- RunUMAP(object = fib.integrated, dims = 1:30)
save(fib.integrated,file = "fib.integrated.sct.rds")
getwd()
load("D:/yll/heart_fibroblast/project/gse183852_second/integration/fib/reference_baset_sct/fib.integrated.sct_PrepSCTFindMarkers.rds")
############################ fib diff
subset_data=fib.integrated
subset_data$group=subset_data$condition
DefaultAssay(subset_data)
Assays(subset_data)
dim(subset_data@assays$RNA@counts)
dim(subset_data@assays$SCT@counts)
DefaultAssay(subset_data)="SCT"
library(tibble)
Idents(subset_data)=subset_data$group
subset_data=PrepSCTFindMarkers(subset_data)
markers_DCM_VS_Donor=FindMarkers(subset_data,ident.1 = "DCM",ident.2 = "Donor",
min.pct = 0.01,logfc.threshold = 0.1)
markers_DCM_VS_Donor=markers_DCM_VS_Donor %>% rownames_to_column(var = "gene")
openxlsx::write.xlsx(markers_DCM_VS_Donor,rownames=T,
file ="Fibro_integrated_markers_DCM_vs_Donor.xlsx" )
dim(markers_DCM_VS_Donor)
#########plot umap
subset_data@meta.data %>%head()
p=DimPlot(subset_data,repel = T)
pdf("all_cell_types_umap.pdf")
print(p)
dev.off()
p=DimPlot(subset_data,split.by = "group",repel = T)
pdf("all_cell_types_umap_split.pdf",height = 6,width = 14)
print(p)
dev.off()
#########plot gene
p=FeaturePlot(subset_data,
split.by = "group",
repel = T,features = "DNM3OS")& theme(legend.position = "right")
pdf("FIB_Feature_dnm30s_umap_split.pdf",height = 6,width = 14)
print(p)
dev.off()
p=FeaturePlot(subset_data,repel = T,features = "DNM3OS")
pdf("FIB_Feature_dnm30s_umap.pdf")
print(p)
dev.off()
}
2. 相同模态的数据
.libPaths(c("/home/data/refdir/Rlib/", "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local/lib/R/library"))
library(Seurat)
library(dplyr)
library(tibble)
library(ggplot2)
library(dplyr)
library(tibble)
library(stringr)
load("./GSE183852_DCM_Integrated.Robj")
#DefaultAssay(RefMerge)="prediction.score.celltype.l2"
RefMerge$stim=str_replace_all(colnames(RefMerge),pattern ="[A|T|C|G]",replacement = "")
Idents(RefMerge)=RefMerge$stim
table(RefMerge$stim)
DefaultAssay(RefMerge)
table(RefMerge$Names)
table(RefMerge$tech)
Idents(RefMerge)=RefMerge$Names
Idents(RefMerge)=RefMerge$tech
All.merge=subset(RefMerge,idents = "SN")
DefaultAssay(All.merge)="SCT"
head(All.merge@meta.data)
#################################
All.merge=SCTransform(All.merge, verbose = FALSE) %>% RunPCA(npcs = 50, verbose = FALSE)
library('harmony')
All.merge <- All.merge %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(All.merge , 'harmony')
#######################cluster
dims = 1:30
All.merge <- All.merge %>%
RunUMAP(reduction = "harmony", dims = dims) %>%
RunTSNE(reduction = "harmony", dims = dims) %>%
FindNeighbors(reduction = "harmony", dims = dims)
save(All.merge , file="All.merge .rds")
getwd()
list.files()
#setwd("D:/yll/heart_fibroblast/project/gse183852")
load("./All.merge .rds")
#################################################################################
library(Seurat)
head(All.merge@meta.data)
table(All.merge$condition)
All.merge$group=All.merge$condition
DimPlot(All.merge,group.by = "Names",label = T)
RefMerge@meta.data %>%head()
colnames(RefMerge@meta.data)
p=DimPlot(All.merge,split.by = "group")
pdf("two_groups_umap.pdf",width=10,height=5)
print(p)
dev.off()
p=DimPlot(All.merge,split.by = "group",label = T)
pdf("two_groups_umap_split.pdf",width=14,height=7)
print(p)
dev.off()
#########
All.merge@meta.data %>%head() %>%dim()
table(All.merge$Names)
Idents(All.merge)=All.merge$Names
markers_for_fibroblast_compared_with_other_celltypes=FindMarkers(All.merge,ident.1 = "Fibroblasts", logfc.threshold = 0.4,only.pos = T)
markers_for_fibroblast_compared_with_other_celltypes %>% rownames_to_column(var="gene")
openxlsx::write.xlsx(markers_for_fibroblast_compared_with_other_celltypes,file = "markers_for_fibroblast_compared_with_other_celltypes_sn.xlsx")
#table(grepl("nm3os",rownames(All.merge)))
#dim(All.merge)
#rownames(All.merge)
getwd()
p=FeaturePlot(All.merge,features = "DNM3OS",label = T)
pdf("4_Dnm3os_indifferent_celltyes_umap.pdf",width=5,height=5)
print(p)
dev.off()
p=VlnPlot(All.merge,features = "DNM3OS",group.by = "group")
pdf("4_Dnm3osin_different_celltypes_vlnplot.pdf",width=6,height=5)
print(p)
dev.off()
#linux中运行
##################################################Fibroblast##################
.libPaths(c("/home/data/refdir/Rlib/", "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/usr/local/lib/R/library"))
library(Seurat)
library(dplyr)
library(tibble)
library(ggplot2)
library(dplyr)
library(tibble)
library(stringr)
load("./All.merge .rds")
Idents(All.merge)=All.merge$Names
subset_data=subset(All.merge,idents = "Fibroblasts")
dim(subset_data)
subset_data=SCTransform(subset_data, verbose = FALSE) %>% RunPCA(npcs = 50, verbose = FALSE)
library('harmony')
library(dplyr)
library(reshape2)
library(tibble)
subset_data <- subset_data %>% RunHarmony("stim", plot_convergence = TRUE)
harmony_embeddings <- Embeddings(subset_data , 'harmony')
#######################cluster
dims = 1:30
subset_data <- subset_data %>%
RunUMAP(reduction = "harmony", dims = dims) %>%
RunTSNE(reduction = "harmony", dims = dims) %>%
FindNeighbors(reduction = "harmony", dims = dims)
#table(subset_data$group)
getwd()
#############################fib differential markers
Idents(subset_data)=subset_data$group
markers_DCM_VS_Donor=FindMarkers(subset_data,ident.1 = "DCM",ident.2 = "Donor")
markers_DCM_VS_Donor=markers_DCM_VS_Donor %>% rownames_to_column(var = "gene")
openxlsx::write.xlsx(markers_DCM_VS_Donor,rownames=T,
file ="Fibro_markers_DCM_vs_Donor_sn.xlsx" )
save(subset_data,file="fibroblast_sn.rds")
#########plot umap
subset_data@meta.data %>%head()
p=DimPlot(subset_data,repel = T)
pdf("all_cell_types_umap.pdf")
print(p)
dev.off()
p=DimPlot(subset_data,split.by = "group",repel = T)
pdf("all_cell_types_umap_split.pdf",height = 6,width = 14)
print(p)
dev.off()
#########plot gene
p=FeaturePlot(subset_data,
split.by = "group",
repel = T,features = "DNM3OS")& theme(legend.position = "right")
pdf("FIB_Feature_dnm30s_umap_split.pdf",height = 6,width = 14)
print(p)
dev.off()
p=FeaturePlot(subset_data,repel = T,features = "DNM3OS")
pdf("FIB_Feature_dnm30s_umap.pdf")
print(p)
dev.off()
相关文章
- java 16进制数据格式化处理工具类,16进制byte数组转String
- 大数据Hadoop生态圈各个组件介绍(详情)
- 大数据学习之Linux基础[通俗易懂]
- 翻译模型:小数据集处理
- Pandas处理大数据的性能优化技巧
- 命令行上的数据科学第二版:十、多语言数据科学
- SQLServer 错误 41350 警告:在为加密启用的数据库中创建了具有持续性 SCHEMA_AND_DATA 的内存优化表。 不会对内存优化表中的数据加密。 故障 处理 修复 支持远程
- SQL Server中数据行批量插入脚本的存储实现
- 隐私计算:数据隐私保护的技术退路
- 分页处理MySQL实现亿级数据高效分页处理(mysql亿级数据)
- 数据MongoDB查询导出:简单高效的数据获取方式(mongodb查询导出)
- 时间处理Redis Java中数据过期时间的方法(redisjava过期)
- 格式MySQL中处理二进制数据的方法(mysql二进制数据)
- MySQL处理二进制数据的智能方式(mysql二进制数据)
- 处理MySQL中的二进制数据处理(mysql二进制数据)
- MySQL存储:处理二进制数据的方式(mysql二进制数据)
- MySQL存储与处理二进制数据(mysql二进制数据)
- KDD业界 | 滴滴出行研究院副院长叶杰平:如何利用大数据进行智能调度和供需预测
- Redis重启后数据丢失吗?(redis重启 丢失吗)
- SQL Server数据库缺失:恢复丢失的数据(sqlserver缺少)
- MongoDB,一款高效、可扩展的数据库管理系统赢得众多用户的青睐,带来更快、更灵活、更可靠的数据存储和处理。(mongodb好处)
- PHP与MSSQL联合 处理日期数据(php mssql 日期)
- 17410号 Oracle 实现平稳数据迁移(17410 oracle)
- 应对大数据时代,如何高效处理亿级数据量的MySQL数据库(mysql 上亿数据量)
- Oracle中精彩纷呈的数据之旅(oracle 中数据长度)
- phpcache类代码(php数据缓存类)
- C#TextBox扩展方法数据验证详细说明