Add p value and significant marker for ggplot based on ggpubr

Liang / 2018-06-07

The ‘ggpubr’ package provides some easy-to-use functions for creating and customizing ‘ggplot2’- based publication ready plots. -A. Kassambara.


1. Preparation

install the package


or you can install the latest version form github

if(!require(devtools)) install.packages("devtools") # if havn`t install devtools before, install it first

load package


load data

##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5

2. Comparing means in R

Reference here:

a short summary here

MethodsR functionDescribe
T testt.test()two groups (parametric)
Wilcoxon testwilcox.testtwo groups (nonparametric)
ANOVAanova()multiple groups (parametric)
Kruskal-Walliskruskal.test()multiple groups (nonparametric)

3. Two functions

  • compare_means()
  • stat_compare_means()


compare_means(formula, data, method="wilcox.test", paired=FALSE, 
    , = NULL, ...)

formula: a formula of the form x ~ group where x is a numeric variable giving the data values and group is a factor with one or multiple levels giving the corresponding groups a character string specifying the reference group. If specified, for a given grouping variable, each of the group levels will be compared to the reference group

stat_compare_means(mapping = NULL, 
                   comparisons = NULL,
                   hide.ns = FALSE,
                   label = NULL,
                   label.x = NULL,
                   label.y = NULL)

4. Compare two group

compare_means(len ~ supp, data = ToothGrowth) 
## # A tibble: 1 x 8
##   .y.   group1 group2      p p.adj p.format p.signif method  
##   <chr> <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 len   OJ     VC     0.0645 0.064 0.064    ns       Wilcoxon
p <- ggboxplot(ToothGrowth, x="supp",
               y = "len", color = "supp", add = "point")
# add p value
p + stat_compare_means() # the default method for comparision is wilcox.test() 

change comparision method

p + stat_compare_means(method = "t.test")

add the p.signif and change the location

p + stat_compare_means(aes(label = ..p.signif..),
                       label.x = 1.5, 
                       label.y = 40)

5. Compare two paired group

compare_means(len ~ supp, data = ToothGrowth, paired = TRUE)
## # A tibble: 1 x 8
##   .y.   group1 group2       p  p.adj p.format p.signif method  
##   <chr> <chr>  <chr>    <dbl>  <dbl> <chr>    <chr>    <chr>   
## 1 len   OJ     VC     0.00431 0.0043 0.0043   **       Wilcoxon
ggpaired(ToothGrowth, x="supp", y="len",
         color="supp", line.color="gray",
         line.size=0.4, palette = "point") + 
    stat_compare_means(paired = TRUE)

6. multiple group comparision

global test

compare_means(len ~ dose, data = ToothGrowth, method = "anova")
## # A tibble: 1 x 6
##   .y.          p    p.adj p.format p.signif method
##   <chr>    <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 len   9.53e-16 9.50e-16 9.5e-16  ****     Anova
ggboxplot(ToothGrowth, x = "dose", y = "len", color = "dose", add = "point") + 
    stat_compare_means(method = "anova")

you can specify the comparison group

my_comparisons <- list(c("0.5","1"), c("1", "2"),
                       c("0.5", "2"))
ggboxplot(ToothGrowth, x="dose", y="len",
          color="dose", add = "point") +
    stat_compare_means(comparisons = my_comparisons) + #add comparision group p value 
    stat_compare_means(label.y = 50) # add global p value 

7. multiple group comparision with a reference group

compare_means(len ~ dose, data = ToothGrowth,
     = "0.5", 
              method = "t.test")
## # A tibble: 2 x 8
##   .y.   group1 group2        p    p.adj p.format p.signif method
##   <chr> <chr>  <chr>     <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 len   0.5    1      1.27e- 7 1.30e- 7 1.3e-07  ****     T-test
## 2 len   0.5    2      4.40e-14 8.80e-14 4.4e-14  ****     T-test

Visualize the result

ggboxplot(ToothGrowth, x="dose", y="len",
          color="dose", add = "point") + 
    stat_compare_means(method="anova", label.y=40) + # add global p value 
    stat_compare_means(label="p.signif", method="t.test",
              = "0.5") # add signif marker

use all data as the base-mean do the paired comparison

# Load myeloma data from GitHub
myeloma <- read.delim("")

compare_means(DEPDC1 ~ molecular_group,  data = myeloma,
     = ".all.", method = "t.test")
## # A tibble: 7 x 8
##   .y.    group1 group2                 p    p.adj p.format p.signif method
##   <chr>  <chr>  <chr>              <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 DEPDC1 .all.  Cyclin D-1       2.88e-1  1.00e+0 0.29     ns       T-test
## 2 DEPDC1 .all.  Cyclin D-2       4.24e-1  1.00e+0 0.42     ns       T-test
## 3 DEPDC1 .all.  Hyperdiploid     2.73e-8  1.90e-7 2.7e-08  ****     T-test
## 4 DEPDC1 .all.  Low bone dis…    5.26e-6  3.20e-5 5.3e-06  ****     T-test
## 5 DEPDC1 .all.  MAF              2.54e-1  1.00e+0 0.25     ns       T-test
## 6 DEPDC1 .all.  MMSET            5.78e-1  1.00e+0 0.58     ns       T-test
## 7 DEPDC1 .all.  Proliferation    2.39e-5  1.20e-4 2.4e-05  ****     T-test
ggboxplot(myeloma, x="molecular_group", y="DEPDC1",
          color="molecular_group", add="jitter",
          legend="none") + 
    rotate_x_text(angle = 45) + 
    geom_hline(yintercept = mean(myeloma$DEPDC1),
               linetype=2) + #  add base mean
     stat_compare_means(method = "anova", label.y = 1600)+        # Add global annova p-value
  stat_compare_means(label = "p.signif", method = "t.test",
            = ".all.")                      # Pairwise comparison against all

8. generate sub figure

p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
          color = "supp",
          add = "point",
 = "dose", short.panel.labs = FALSE)
# Use only p.format as label. Remove method name.
p + stat_compare_means(label = "p.format")

# Or use significance symbol as label
p + stat_compare_means(label =  "p.signif", label.x = 1.5)

put all figure in one

p <- ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "supp", palette = "jco",
          add = "jitter")
p + stat_compare_means(aes(group = supp))

# only p value 
p + stat_compare_means(aes(group = supp), label = "p.format")

9. Other figure

# barplot add mean_se
ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se")+
  stat_compare_means() +                                         # Global p-value
  stat_compare_means( = "0.5", label = "p.signif",
                     label.y = c(22, 29))                   # compare to

ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se")+
  stat_compare_means() +                                         # Global p-value
  stat_compare_means( = "0.5", label = "p.signif",
                     label.y = c(22, 29))  

ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco", 
          position = position_dodge(0.8))+
  stat_compare_means(aes(group = supp), label = "p.signif", label.y = 29)

ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco")+
  stat_compare_means(aes(group = supp), label = "p.signif", 
                     label.y = c(16, 25, 29))


Last modified on 2018-06-07