zl程序教程

您现在的位置是:首页 >  其他

当前栏目

ggplot2绘制森林图(有亚组和没亚组)

2023-03-07 09:12:31 时间

之前写了很多篇推文介绍森林图,包括了常见的forestplot/forestploter/ggforestplot等多个R包:

虽然写的很详细,有亚组和没亚组的都包括了,但是base r的语法对于新手来说确实很难理解,不如ggplot2系列清晰易懂,而且各种空格/NA等占位符的使用也不好理解。

所以今天介绍下如何使用ggplot2画森林图,相比于之前介绍的森林图画法,主要是数据不复杂,只要在图层上改改细节即可。

但是对于零基础的人来说,依然是有难度的

没有亚组的森林图

rm(list = ls())
tmp <- read.csv("../森林图/ggplot_forest.csv")

library(tidyverse)
str(tmp)
## 'data.frame': 16 obs. of  7 variables:
##  $ Study : chr  "Lotfollahzadeh" "Recio" "Cagir" "Bindea" ...
##  $ Number: int  658 38 23 34 56 32 78 68 23 45 ...
##  $ Pvalue: num  0.002 0.003 0.71 0.03 0.002 0.002 0.87 0.001 0.003 0.02 ...
##  $ OR    : chr  "0.75(0.61-0.93)" "0.71(0.55-0.92)" "0.86(0.58-1.28)" "0.71(0.54-0.94)" ...
##  $ mean  : num  0.75 0.71 0.86 0.71 1.13 0.68 0.81 0.6 1.24 0.71 ...
##  $ lower : num  0.61 0.55 0.58 0.54 1.02 0.48 0.61 0.42 1.07 0.51 ...
##  $ upper : num  0.93 0.92 1.28 0.94 1.29 0.96 1.06 0.87 1.45 0.9 ...

先把误差线画出来,可以参考这篇推文:R语言画误差线的5种方法

tmp <- tmp |> mutate(id = row_number())
p1 <- tmp |> mutate(type = ifelse(Pvalue < 0.05,"#4575b4","grey")
                    ) |> 
  ggplot()+
  geom_errorbarh(aes(y=id,xmin=lower, xmax=upper,color=type),
                 height=0.5, # 控制左右端点两条小竖线的长短
                 size=.8)+
  geom_point(aes(y = id, x = mean,color=type),size=3)+
  scale_color_identity()+
  geom_vline(aes(xintercept=1),linetype=2,size=0.8,color="grey")+
  theme_minimal()+
  theme(axis.title.y = element_blank(),
        axis.text.y = element_blank()
        )
p1
p2 <- tmp |> mutate(x="col1") |> 
ggplot(aes(x=x,y=id))+
  geom_text(aes(label=Study))+
  theme_minimal()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank()
        )+
  labs(x="Study")+
  scale_x_discrete(position = "top")
p2
p3 <- tmp |>  
  mutate(x = "col2") |> 
  ggplot(aes(x=x,y=id))+
  geom_text(aes(label=Number))+
  theme_minimal()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank()
        )+
  labs(x="Number")+
  scale_x_discrete(position = "top")
p4 <- tmp |>  
  mutate(x = "col3") |> 
  ggplot(aes(x=x,y=id))+
  geom_text(aes(label=Pvalue))+
  theme_minimal()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank()
        )+
  labs(x="Pvalue")+
  scale_x_discrete(position = "top")
p5 <- tmp |>  
  mutate(x = "col4") |> 
  ggplot(aes(x=x,y=id))+
  geom_text(aes(label=OR))+
  theme_minimal()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank()
        )+
  labs(x="OR(95%)")+
  scale_x_discrete(position = "top")
library(patchwork)
p2+p3+p4+p1+p5+plot_layout(widths = c(0.4,0.2,0.3,1,0.5))

有亚组的森林图

df <- read.csv("../森林图/ggplot2_forest.csv")

# 改个列名
colnames(df)[4:7] <- c("HR","mean","low","high")

df
##            Subgroup   AP   PA              HR mean  low high
## 1      All patients   NR 27.2 0.75(0.61-0.93) 0.75 0.61 0.93
## 2     Baseling ECOG                           0.00 0.00 0.00
## 3                 0   NR 27.2 0.71(0.55-0.92) 0.71 0.55 0.92
## 4                 1   NR 26.4 0.86(0.58-1.28) 0.86 0.58 1.28
## 5   Baseling BPI-SF                           0.00 0.00 0.00
## 6               0~1   NR 27.2 0.71(0.54-0.94) 0.71 0.54 0.94
## 7               2~3 25.5   NR 0.87(0.59-1.29) 0.87 0.59 1.29
## 8   Bone metastases                           0.00 0.00 0.00
## 9               Yes   NR 27.2 0.68(0.48-0.96) 0.68 0.48 0.96
## 10               No   NR 27.5 0.81(0.61-1.06) 0.81 0.61 1.06
## 11              Age                           0.00 0.00 0.00
## 12              <65   NR   NR  0.8(0.51-1.24) 0.80 0.51 1.24
## 13             <=65   NR 26.4  0.73(0.57-0.94 0.73 0.57 0.94
## 14             >=75   NR 23.8    0.71(0.51-1) 0.71 0.51 1.00
## 15     Baseline PSA                           0.00 0.00 0.00
## 16              Yes 26.9 23.8 0.72(0.43-0.94) 0.72 0.43 0.94
## 17               No   NR   NR  0.77(0.38-1.09 0.77 0.38 1.09
## 18     Baseline LDH                           0.00 0.00 0.00
## 19              Yes   NR 23.6 0.69(0.53-0.91) 0.69 0.53 0.91
## 20               No   NR 27.5 0.79(0.55-1.12) 0.79 0.55 1.12
## 21   Baseline ALK-P                           0.00 0.00 0.00
## 22              Yes   NR 23.6  0.79(0.6-1.04) 0.79 0.60 1.04
## 23               No   NR 27.5 0.66(0.46-0.94) 0.66 0.46 0.94
## 24           Region                           0.00 0.00 0.00
## 25    North America   NR 27.2 0.66(0.49-0.88) 0.66 0.49 0.88
## 26            Other   NR   NR 0.89(0.65-1.22) 0.89 0.65 1.22
df <- df |> mutate(id = factor(letters[1:26]))
p1 <- df |> mutate(type = ifelse(high<1 | low >1,"#4575b4","grey")) |> 
  ggplot()+
  geom_errorbarh(aes(y=fct_rev(id),xmin=low, xmax=high,color=type),
                 height=0.5, 
                 size=.8)+
  geom_point(aes(y = fct_rev(id), x = mean,color=type),size=3)+
  scale_color_identity()+
  geom_vline(aes(xintercept=1),linetype=2,size=0.8,color="grey")+
  scale_x_continuous(limits = c(0.3,1.3))+
  theme(axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks = element_blank(),
        panel.grid = element_blank(),
        panel.background = element_blank()
        )
p1
library(GGally)
align <- ifelse(df$mean==0,"righr","inward")
p2 <- df |> mutate(x="col1") |> 
ggplot(aes(x=x,y=fct_rev(id)))+
  geom_text(aes(label=Subgroup),hjust=align,size=3)+
  geom_stripped_rows()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        panel.background = element_blank(),
        plot.margin = unit(rep(0,4),"cm")
        )+
  labs(x="Subgroup")+
  scale_x_discrete(position = "top")
p2
p3 <- df |> mutate(x="col1") |> 
ggplot(aes(x=x,y=fct_rev(id)))+
  geom_text(aes(label= HR),size=3)+
  geom_stripped_rows()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        panel.background = element_blank(),
        plot.margin = unit(rep(0,4),"cm")
        )+
  labs(x="HR(95%CI)")+
  scale_x_discrete(position = "top")
p3
p4 <- df |> mutate(x="col1") |> 
ggplot(aes(x=x,y=fct_rev(id)))+
  geom_text(aes(label= AP),size=3)+
  geom_stripped_rows()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        panel.background = element_blank(),
        plot.margin = unit(rep(0,4),"cm")
        )+
  labs(x="AP")+
  scale_x_discrete(position = "top")
p5 <- df |> mutate(x="col1") |> 
ggplot(aes(x=x,y=fct_rev(id)))+
  geom_text(aes(label= PA),size=3)+
  geom_stripped_rows()+
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        panel.background = element_blank(),
        plot.margin = unit(rep(0,4),"cm")
        )+
  labs(x="PA")+
  scale_x_discrete(position = "top")
library(patchwork)

p2+p4+p5+p1+p3+plot_layout(widths = c(0.1,0.05,0.05,0.1,0.1))

这颜值已经算是不错了,但是和之前的相比还是有些差距,不过发文章的话也够用了。

最后大家思考一个问题:多因素回归的森林图和亚组分析的森林图是一样的吗?