您现在的位置是：首页 > 大数据

当前栏目

单细胞ATAC实战04: 联合scRNA-seq数据给细胞注释

数据实战联合注释单细胞 04 细胞 Seq

2023-06-13 09:18:25 时间

from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import scanpy as sc
import snapatac2 as snap
import scvi

import bioquest as bq
import sckit as sk

基因组注释文件

gff_file="~/DataHub/Genomics/GENCODE/hg38.v43.chr_patch_hapl_scaff.basic.annotation.gff3.gz"

简单合并scRNA-seq和scATAC-seq

adata = snap.read_dataset("PBMC.h5ads")

gene activity matrix

query = snap.pp.make_gene_matrix(adata, gff_file=gff_file)
query.obs_names = adata.obs_names

scRNA-seq数据

从网站下载的已经注释好的h5ad文件作为reference

reference = sc.read("10x-Multiome-Pbmc10k-RNA.h5ad")

简单合并scATAC-seq和scRNA-seq数据

data = reference.concatenate(query, batch_categories=["reference", "query"])

标准流程处理

data.layers["counts"] = data.X.copy()
#过滤
sc.pp.filter_genes(data, min_cells=5)
#文库大小
sc.pp.normalize_total(data, target_sum=1e4)
#log转化
sc.pp.log1p(data)
# 前5000高变基因
sc.pp.highly_variable_genes(
    data,
    n_top_genes = 5000,
    flavor="seurat_v3",
    layer="counts",
    batch_key="batch",
    subset=True
)

Data integration

First we setup the scvi-tools to pretrain the model.

scvi.model.SCVI.setup_anndata(data, layer="counts", batch_key="batch")
vae = scvi.model.SCVI(
    data,
    n_layers=2,
    n_latent=30,
    gene_likelihood="nb",
    dispersion="gene-batch",
)

训练vae模型

vae.train(max_epochs=1000, early_stopping=True,use_gpu=True)

Let’s plot the training history and make sure the model has converged.

converged 即1000个epochs前，模型的损失不在大幅度减小

ax = vae.history['elbo_train'][1:].plot()
vae.history['elbo_validation'].plot(ax=ax)

data.obs["celltype_scanvi"] = 'Unknown'
ref_idx = data.obs['batch'] == "reference"
data.obs["celltype_scanvi"][ref_idx] = data.obs['cell_type'][ref_idx]

训练lvae模型

lvae = scvi.model.SCANVI.from_scvi_model(
    vae,
    adata=data,
    labels_key="celltype_scanvi",
    unlabeled_category="Unknown",
)

lvae.train(max_epochs=1000, n_samples_per_label=100,use_gpu=True)
lvae.history['elbo_train'][1:].plot()

细胞注释

We now can perform the label transfer/prediction and obtain the joint embedding of reference and query data.

data.obs["C_scANVI"] = lvae.predict(data)
data.obsm["X_scANVI"] = lvae.get_latent_representation(data)

sc.pp.neighbors(data, use_rep="X_scANVI")
sc.tl.umap(data)
sc.pl.umap(data, color=['C_scANVI', "batch"], wspace=0.45)

obs = data.obs
obs = obs[obs['batch'] == 'query']
obs.index = list(map(lambda x: x.split("-query")[0], obs.index))

Save the predicted cell type labels back to the original cell by bin matrix.

atac=adata.to_adata()
adata.close()
atac.obs['cell_type'] = obs.loc[atac.obs.index]['C_scANVI']

refine the cell type labels at the leiden cluster level, and save the result.

根据leiden cluster手动注释细胞类型

from collections import Counter

cell_type_labels = np.unique(atac.obs['cell_type'])

count_table = {}
for cl, ct in zip(atac.obs['leiden'], atac.obs['cell_type']):
    if cl in count_table:
        count_table[cl].append(ct)
    else:
        count_table[cl] = [ct]

mat = []
for cl, counts in count_table.items():
    c = Counter(counts)
    c = np.array([c[ct] for ct in cell_type_labels])
    c = c / c.sum()
    mat.append(c)

import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

df_cm = pd.DataFrame(
    mat,
    index = count_table.keys(),
    columns = cell_type_labels,
)
sn.clustermap(df_cm, annot=True)

sk.label_helper(len(atac.obs.leiden.unique())-1)
new_cluster_names ='''
0,CD14 Mono
1,MAIT
2,CD4 Naive
3,NK
4,Memory B
5,CD14 Mono
6,CD14 Mono
7,Intermediate B
8,cDC
9,Naive B
10,Naive B
11,Treg
12,cDC
13,Treg
14,CD14 Mono
15,pDC
'''
sk.labeled(atac,cluster_names=new_cluster_names,reference_key='leiden')

sc.pl.umap(atac, color=['leiden', "CellType"], wspace=0.45,legend_loc="on data",legend_fontoutline=3)

atac.write_h5ad("atac_annot.h5ad",compression='lzf')
adata.close()

猜你喜欢

Access数据库不能写不能更新的解决方法
ue4封装接口_ue4导出模型
flutter系列之:移动端手势的具体使用
声纹识别技术的现状、局限与趋势丨雷锋网公开课
asp最常用的分页函数
科技、媒体、电信业：投资起区块链来比金融业更凶
报名 | 人工智能发展前沿论坛，倒计时1天！
技术陈硕：开创Linux技术新世界（陈硕linux）

zl程序教程

当前栏目

单细胞ATAC实战04: 联合scRNA-seq数据给细胞注释

简单合并scRNA-seq和scATAC-seq

Data integration

细胞注释

相关文章