跟着Nat. Biotechnol.学作图:R语言ComplexHeatmap热图展示TCGA-BRCA数据集概况
2023-06-13 09:16:32 时间
论文
Removing unwanted variation from large-scale RNA sequencing data with PRPS
https://www.nature.com/articles/s41587-022-01440-w#data-availability
数据链接
https://zenodo.org/record/6459560#.Y2D2NHZBzid
https://zenodo.org/record/6392171#.Y2D2SXZBzid
代码链接
https://github.com/RMolania/TCGA_PanCancer_UnwantedVariation
今天推文重复的图没有出现在论文中,是论文中提供的代码里的一个图
首先是示例数据集
image.png
这个数据集论文中并没有提供,需要运行一系列代码获取,获取这个数据集的代码就不介绍了,推文内容主要介绍是如何绘图
读取数据集
sample.info.01<-read.csv("example_data/sample_info_brca_tcga_rnaseq.csv",
row.names = 1)
dim(sample.info.01)
head(sample.info.01)
ComplexHeatmap的帮助文档
https://jokergoo.github.io/ComplexHeatmap-reference/book/
ComplexHeatmap安装
BiocManager::install("ComplexHeatmap")
组图代码
library(ComplexHeatmap)
years.colors <- c(
'green',
'purple4',
'blue',
'brown',
'tan1',
'darkgreen',
'black'
)
names(years.colors) <- c(
'2009',
'2010',
'2011',
'2012',
'2013',
'2014',
'2015'
)
H.year <- ComplexHeatmap::Heatmap(
rev(sample.info.01$year_mda),
cluster_columns = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = years.colors,
name = 'Time (years)',
heatmap_legend_param = list(
color_bar = "discrete" ,
ncol = 2,
title_gp = grid::gpar(fontsize = 12)))
H.year
image.png
还可以把好多个热图拼到一起
完整代码
sample.info.01<-read.csv("example_data/sample_info_brca_tcga_rnaseq.csv",
row.names = 1)
dim(sample.info.01)
head(sample.info.01)
library(ComplexHeatmap)
years.colors <- c(
'green',
'purple4',
'blue',
'brown',
'tan1',
'darkgreen',
'black'
)
names(years.colors) <- c(
'2009',
'2010',
'2011',
'2012',
'2013',
'2014',
'2015'
)
H.year <- ComplexHeatmap::Heatmap(
rev(sample.info.01$year_mda),
cluster_columns = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = years.colors,
name = 'Time (years)',
heatmap_legend_param = list(
color_bar = "discrete" ,
ncol = 2,
title_gp = grid::gpar(fontsize = 12)))
H.year
n.plate <- length(unique(sample.info$plate_RNAseq)) # 38
colfunc <- grDevices::colorRampPalette(
RColorBrewer::brewer.pal(11, 'PRGn')[-6])
color.plates <- colfunc(n.plate)
H.plate <- ComplexHeatmap::Heatmap(
rev(sample.info$plate_RNAseq),
cluster_rows = FALSE,
cluster_columns = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = color.plates,
name = 'Plates',
heatmap_legend_param = list(
color_bar = "discrete" ,
ncol = 4,
title_gp = grid::gpar(fontsize = 12)))
### TSS
n.tss <- length(unique(sample.info$tss_RNAseq)) # 40
colfunc <- grDevices::colorRampPalette(
RColorBrewer::brewer.pal(11, 'BrBG')[-6]
)
color.tss <- colfunc(n.tss)
H.tss <- ComplexHeatmap::Heatmap(
rev(sample.info$tss_RNAseq),
cluster_rows = FALSE,
cluster_columns = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = color.tss,
name = 'Tissue source sites',
heatmap_legend_param = list(
color_bar = "discrete" ,
ncol = 4,
title_gp = grid::gpar(fontsize = 12)))
### Tissue
H.tissue <- ComplexHeatmap::Heatmap(
rev(sample.info$Tissue.Type),
cluster_rows = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = c("#252525", 'blue', "#D9D9D9"),
name = 'Tissues',
heatmap_legend_param = list(
color_bar = "discrete" ,
direction = "vertical",
ncol = 1,
title_gp = grid::gpar(fontsize = 12),
labels = c(
'Primary tumor',
'Metastatic tumor',
'Adjacent normal')))
### Purity
H.purity <- ComplexHeatmap::Heatmap(
rev(sample.info$purity_HTseq_FPKM),
column_names_gp = grid::gpar(fontsize = 12),
cluster_rows = FALSE,
name = 'Tumor purity score',
col = viridis::plasma(n = 10),
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12)))
### library size
H.ls <- ComplexHeatmap::Heatmap(
rev(sample.info$libSize),
cluster_rows = FALSE,
name = 'Library size',
column_names_gp = grid::gpar(fontsize = 12),
col = viridis::viridis(n = 10),
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12)))
### PAM50
H.pam50.tcga <- ComplexHeatmap::Heatmap(
rev(sample.info$Call),
cluster_rows = FALSE,
name = 'PAM50 (TCGA calls)',
column_names_gp = grid::gpar(fontsize = 12),
col = pam50.colors,
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12)))
### PAM50 genefu
H.pam50.genefu <- ComplexHeatmap::Heatmap(
rev(sample.info$pam50.geneFu.fpkm),
cluster_rows = FALSE,
name = 'PAM50 (Genefu calls)',
column_names_gp = grid::gpar(fontsize = 12),
col = pam50.colors,
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12)))
### Flow cell chemistry
H.fcch <- ComplexHeatmap::Heatmap(
rev(sample.info$FcCh),
cluster_rows = FALSE,
name = 'Flow cell chemistry',
column_names_gp = grid::gpar(fontsize = 12),
col = FcCh.colors,
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12),
direction = "horizontal"))
ComplexHeatmap::draw(
H.year +
H.fcch +
H.plate +
H.tss +
H.tissue +
H.pam50.tcga +
H.pam50.genefu +
H.ls +
H.purity,
merge_legends = FALSE,
heatmap_legend_side = 'right')
image.png
今天的推文没有对代码的细节进行研究,主要就是能够运行出结果,如果后续需要用到这个R包来作图,可以仔细研究这个R包的函数
相关文章
- 【视频】K近邻KNN算法原理与R语言结合新冠疫情对股票价格预测|数据分享|附代码数据
- CDGA|要成功实施数据治理项目,必须在组织内采用通用语言
- 8-TCL事务控制语言
- R语言、SAS潜类别(分类)轨迹模型LCTM分析体重指数 (BMI)数据可视化|附代码数据
- R语言广义相加模型 (GAMs)分析预测CO2时间序列数据|附代码数据
- 高级语言中的语句在汇编中是如何实现的
- R语言缺失数据变量选择LASSO回归:Bootstrap重(再)抽样插补和推算
- R语言代做编程辅导回归模型分析工资数据案例报告(附答案)
- 数据分享|R语言分析上海空气质量指数数据:kmean聚类、层次聚类、时间序列分析:arima模型、指数平滑法|附代码数据
- R语言中的时间序列分析模型:ARIMA-ARCH / GARCH模型分析股票价格|附代码数据
- R语言用逻辑回归、决策树和随机森林对信贷数据集进行分类预测|附代码数据
- 跟着Nature Communications学数据分析:R语言做随机森林模型并对变量重要性排序
- 2-R语言数据结构
- 数据分享|R语言分析上海空气质量指数数据:kmean聚类、层次聚类、时间序列分析:arima模型、指数平滑法|附代码数据
- 数据分享|R语言逻辑回归、Naive Bayes贝叶斯、决策树、随机森林算法预测心脏病|附代码数据
- R语言用灰色模型 GM (1,1)、神经网络预测房价数据和可视化
- R语言如何做马尔可夫转换模型markov switching model|附代码数据
- 面向现实世界场景,多语言大数据集PRESTO来了
- Go语言圣经-函数多返回值习题详解编程语言
- java语言概述详解编程语言
- Go语言使用匿名结构体解析JSON数据
- C语言连接Oracle数据库实现数据访问(c语言访问oracle)
- 《代码英雄》第三季(4):深入 Perl 语言的世界
- MySQL 中 DML 语句数据的操纵语言简介(mysql中dml语句)
- MySQL存储多语言数据的首选数据库系统(mysql 一个汉字多岁)