Add p value and significant marker for ggplot based on ggpubr

Liang / 2018-06-07


The ‘ggpubr’ package provides some easy-to-use functions for creating and customizing ‘ggplot2’- based publication ready plots. -A. Kassambara.

Reference

1. Preparation

install the package

install.packages("ggpubr")

or you can install the latest version form github

if(!require(devtools)) install.packages("devtools") # if havn`t install devtools before, install it first
devtools::install_github("kassambara/ggpubr")

load package

library(ggpubr)

load data

data("ToothGrowth")
head(ToothGrowth)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5

2. Comparing means in R

Reference here: http://www.sthda.com/english/wiki/comparing-means-in-r

a short summary here

MethodsR functionDescribe
T testt.test()two groups (parametric)
Wilcoxon testwilcox.testtwo groups (nonparametric)
ANOVAanova()multiple groups (parametric)
Kruskal-Walliskruskal.test()multiple groups (nonparametric)

3. Two functions

  • compare_means()
  • stat_compare_means()

usage

compare_means(formula, data, method="wilcox.test", paired=FALSE, 
              group.by=NULL, ref.group = NULL, ...)

formula: a formula of the form x ~ group where x is a numeric variable giving the data values and group is a factor with one or multiple levels giving the corresponding groups

ref.group: a character string specifying the reference group. If specified, for a given grouping variable, each of the group levels will be compared to the reference group

stat_compare_means(mapping = NULL, 
                   comparisons = NULL,
                   hide.ns = FALSE,
                   label = NULL,
                   label.x = NULL,
                   label.y = NULL)

4. Compare two group

compare_means(len ~ supp, data = ToothGrowth) 
## # A tibble: 1 x 8
##   .y.   group1 group2      p p.adj p.format p.signif method  
##   <chr> <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 len   OJ     VC     0.0645 0.064 0.064    ns       Wilcoxon
p <- ggboxplot(ToothGrowth, x="supp",
               y = "len", color = "supp", add = "point")
# add p value
p + stat_compare_means() # the default method for comparision is wilcox.test() 

change comparision method

p + stat_compare_means(method = "t.test")

add the p.signif and change the location

p + stat_compare_means(aes(label = ..p.signif..),
                       label.x = 1.5, 
                       label.y = 40)

5. Compare two paired group

compare_means(len ~ supp, data = ToothGrowth, paired = TRUE)
## # A tibble: 1 x 8
##   .y.   group1 group2       p  p.adj p.format p.signif method  
##   <chr> <chr>  <chr>    <dbl>  <dbl> <chr>    <chr>    <chr>   
## 1 len   OJ     VC     0.00431 0.0043 0.0043   **       Wilcoxon
ggpaired(ToothGrowth, x="supp", y="len",
         color="supp", line.color="gray",
         line.size=0.4, palette = "point") + 
    stat_compare_means(paired = TRUE)

6. multiple group comparision

global test

compare_means(len ~ dose, data = ToothGrowth, method = "anova")
## # A tibble: 1 x 6
##   .y.          p    p.adj p.format p.signif method
##   <chr>    <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 len   9.53e-16 9.50e-16 9.5e-16  ****     Anova
ggboxplot(ToothGrowth, x = "dose", y = "len", color = "dose", add = "point") + 
    stat_compare_means(method = "anova")

you can specify the comparison group

my_comparisons <- list(c("0.5","1"), c("1", "2"),
                       c("0.5", "2"))
ggboxplot(ToothGrowth, x="dose", y="len",
          color="dose", add = "point") +
    stat_compare_means(comparisons = my_comparisons) + #add comparision group p value 
    stat_compare_means(label.y = 50) # add global p value 

7. multiple group comparision with a reference group

compare_means(len ~ dose, data = ToothGrowth,
              ref.group = "0.5", 
              method = "t.test")
## # A tibble: 2 x 8
##   .y.   group1 group2        p    p.adj p.format p.signif method
##   <chr> <chr>  <chr>     <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 len   0.5    1      1.27e- 7 1.30e- 7 1.3e-07  ****     T-test
## 2 len   0.5    2      4.40e-14 8.80e-14 4.4e-14  ****     T-test

Visualize the result

ggboxplot(ToothGrowth, x="dose", y="len",
          color="dose", add = "point") + 
    stat_compare_means(method="anova", label.y=40) + # add global p value 
    stat_compare_means(label="p.signif", method="t.test",
                       ref.group = "0.5") # add signif marker

use all data as the base-mean do the paired comparison

# Load myeloma data from GitHub
myeloma <- read.delim("https://raw.githubusercontent.com/kassambara/data/master/myeloma.txt")

compare_means(DEPDC1 ~ molecular_group,  data = myeloma,
              ref.group = ".all.", method = "t.test")
## # A tibble: 7 x 8
##   .y.    group1 group2                 p    p.adj p.format p.signif method
##   <chr>  <chr>  <chr>              <dbl>    <dbl> <chr>    <chr>    <chr> 
## 1 DEPDC1 .all.  Cyclin D-1       2.88e-1  1.00e+0 0.29     ns       T-test
## 2 DEPDC1 .all.  Cyclin D-2       4.24e-1  1.00e+0 0.42     ns       T-test
## 3 DEPDC1 .all.  Hyperdiploid     2.73e-8  1.90e-7 2.7e-08  ****     T-test
## 4 DEPDC1 .all.  Low bone dis…    5.26e-6  3.20e-5 5.3e-06  ****     T-test
## 5 DEPDC1 .all.  MAF              2.54e-1  1.00e+0 0.25     ns       T-test
## 6 DEPDC1 .all.  MMSET            5.78e-1  1.00e+0 0.58     ns       T-test
## 7 DEPDC1 .all.  Proliferation    2.39e-5  1.20e-4 2.4e-05  ****     T-test
ggboxplot(myeloma, x="molecular_group", y="DEPDC1",
          color="molecular_group", add="jitter",
          legend="none") + 
    rotate_x_text(angle = 45) + 
    geom_hline(yintercept = mean(myeloma$DEPDC1),
               linetype=2) + #  add base mean
     stat_compare_means(method = "anova", label.y = 1600)+        # Add global annova p-value
  stat_compare_means(label = "p.signif", method = "t.test",
                     ref.group = ".all.")                      # Pairwise comparison against all

8. generate sub figure

p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
          color = "supp",
          add = "point",
          facet.by = "dose", short.panel.labs = FALSE)
# Use only p.format as label. Remove method name.
p + stat_compare_means(label = "p.format")

# Or use significance symbol as label
p + stat_compare_means(label =  "p.signif", label.x = 1.5)

put all figure in one

p <- ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "supp", palette = "jco",
          add = "jitter")
p + stat_compare_means(aes(group = supp))

# only p value 
p + stat_compare_means(aes(group = supp), label = "p.format")

9. Other figure

# barplot add mean_se
ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se")+
  stat_compare_means() +                                         # Global p-value
  stat_compare_means(ref.group = "0.5", label = "p.signif",
                     label.y = c(22, 29))                   # compare to ref.group

ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se")+
  stat_compare_means() +                                         # Global p-value
  stat_compare_means(ref.group = "0.5", label = "p.signif",
                     label.y = c(22, 29))  

ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco", 
          position = position_dodge(0.8))+
  stat_compare_means(aes(group = supp), label = "p.signif", label.y = 29)

ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se",
          color = "supp", palette = "jco")+
  stat_compare_means(aes(group = supp), label = "p.signif", 
                     label.y = c(16, 25, 29))

SessionInfo

sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2  ggpubr_0.1.8    magrittr_1.5    forcats_0.3.0  
##  [5] stringr_1.3.1   dplyr_0.7.7     purrr_0.2.5     readr_1.1.1    
##  [9] tidyr_0.8.1     tibble_1.4.2    ggplot2_3.0.0   tidyverse_1.2.1
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5 xfun_0.3         haven_1.1.2      lattice_0.20-35 
##  [5] colorspace_1.3-2 htmltools_0.3.6  yaml_2.2.0       utf8_1.1.4      
##  [9] rlang_0.2.2      pillar_1.3.0     glue_1.3.0       withr_2.1.2     
## [13] modelr_0.1.2     readxl_1.1.0     bindr_0.1.1      plyr_1.8.4      
## [17] ggsignif_0.4.0   munsell_0.5.0    blogdown_0.8     gtable_0.2.0    
## [21] cellranger_1.1.0 ggsci_2.9        rvest_0.3.2      evaluate_0.12   
## [25] labeling_0.3     knitr_1.20       fansi_0.4.0      broom_0.5.0     
## [29] Rcpp_0.12.19     scales_1.0.0     backports_1.1.2  jsonlite_1.5    
## [33] hms_0.4.2        digest_0.6.18    stringi_1.2.4    bookdown_0.7    
## [37] grid_3.5.0       rprojroot_1.3-2  cli_1.0.1        tools_3.5.0     
## [41] lazyeval_0.2.1   crayon_1.3.4     pkgconfig_2.0.2  xml2_1.2.0      
## [45] lubridate_1.7.4  assertthat_0.2.0 rmarkdown_1.10   httr_1.3.1      
## [49] rstudioapi_0.8   R6_2.3.0         nlme_3.1-137     compiler_3.5.0

Last modified on 2018-06-07