The ‘ggpubr’ package provides some easy-to-use functions for creating and customizing ‘ggplot2’- based publication ready plots. -A. Kassambara.
1. Preparation
install the package
install.packages("ggpubr")
or you can install the latest version form github
if(!require(devtools)) install.packages("devtools") # if havn`t install devtools before, install it first
devtools::install_github("kassambara/ggpubr")
load package
library(ggpubr)
load data
data("ToothGrowth")
head(ToothGrowth)
## len supp dose
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
2. Comparing means in R
Reference here: http://www.sthda.com/english/wiki/comparing-means-in-r
a short summary here
Methods | R function | Describe |
---|---|---|
T test | t.test() | two groups (parametric) |
Wilcoxon test | wilcox.test | two groups (nonparametric) |
ANOVA | anova() | multiple groups (parametric) |
Kruskal-Wallis | kruskal.test() | multiple groups (nonparametric) |
3. Two functions
- compare_means()
- stat_compare_means()
usage
compare_means(formula, data, method="wilcox.test", paired=FALSE,
group.by=NULL, ref.group = NULL, ...)
formula: a formula of the form x ~ group where x is a numeric variable giving the data values and group is a factor with one or multiple levels giving the corresponding groups
ref.group: a character string specifying the reference group. If specified, for a given grouping variable, each of the group levels will be compared to the reference group
stat_compare_means(mapping = NULL,
comparisons = NULL,
hide.ns = FALSE,
label = NULL,
label.x = NULL,
label.y = NULL)
4. Compare two group
compare_means(len ~ supp, data = ToothGrowth)
## # A tibble: 1 x 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 len OJ VC 0.0645 0.064 0.064 ns Wilcoxon
p <- ggboxplot(ToothGrowth, x="supp",
y = "len", color = "supp", add = "point")
# add p value
p + stat_compare_means() # the default method for comparision is wilcox.test()
change comparision method
p + stat_compare_means(method = "t.test")
add the p.signif and change the location
p + stat_compare_means(aes(label = ..p.signif..),
label.x = 1.5,
label.y = 40)
5. Compare two paired group
compare_means(len ~ supp, data = ToothGrowth, paired = TRUE)
## # A tibble: 1 x 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 len OJ VC 0.00431 0.0043 0.0043 ** Wilcoxon
ggpaired(ToothGrowth, x="supp", y="len",
color="supp", line.color="gray",
line.size=0.4, palette = "point") +
stat_compare_means(paired = TRUE)
6. multiple group comparision
global test
compare_means(len ~ dose, data = ToothGrowth, method = "anova")
## # A tibble: 1 x 6
## .y. p p.adj p.format p.signif method
## <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 len 9.53e-16 9.50e-16 9.5e-16 **** Anova
ggboxplot(ToothGrowth, x = "dose", y = "len", color = "dose", add = "point") +
stat_compare_means(method = "anova")
you can specify the comparison group
my_comparisons <- list(c("0.5","1"), c("1", "2"),
c("0.5", "2"))
ggboxplot(ToothGrowth, x="dose", y="len",
color="dose", add = "point") +
stat_compare_means(comparisons = my_comparisons) + #add comparision group p value
stat_compare_means(label.y = 50) # add global p value
7. multiple group comparision with a reference group
compare_means(len ~ dose, data = ToothGrowth,
ref.group = "0.5",
method = "t.test")
## # A tibble: 2 x 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 len 0.5 1 1.27e- 7 1.30e- 7 1.3e-07 **** T-test
## 2 len 0.5 2 4.40e-14 8.80e-14 4.4e-14 **** T-test
Visualize the result
ggboxplot(ToothGrowth, x="dose", y="len",
color="dose", add = "point") +
stat_compare_means(method="anova", label.y=40) + # add global p value
stat_compare_means(label="p.signif", method="t.test",
ref.group = "0.5") # add signif marker
use all data as the base-mean do the paired comparison
# Load myeloma data from GitHub
myeloma <- read.delim("https://raw.githubusercontent.com/kassambara/data/master/myeloma.txt")
compare_means(DEPDC1 ~ molecular_group, data = myeloma,
ref.group = ".all.", method = "t.test")
## # A tibble: 7 x 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 DEPDC1 .all. Cyclin D-1 2.88e-1 1.00e+0 0.29 ns T-test
## 2 DEPDC1 .all. Cyclin D-2 4.24e-1 1.00e+0 0.42 ns T-test
## 3 DEPDC1 .all. Hyperdiploid 2.73e-8 1.90e-7 2.7e-08 **** T-test
## 4 DEPDC1 .all. Low bone dis… 5.26e-6 3.20e-5 5.3e-06 **** T-test
## 5 DEPDC1 .all. MAF 2.54e-1 1.00e+0 0.25 ns T-test
## 6 DEPDC1 .all. MMSET 5.78e-1 1.00e+0 0.58 ns T-test
## 7 DEPDC1 .all. Proliferation 2.39e-5 1.20e-4 2.4e-05 **** T-test
ggboxplot(myeloma, x="molecular_group", y="DEPDC1",
color="molecular_group", add="jitter",
legend="none") +
rotate_x_text(angle = 45) +
geom_hline(yintercept = mean(myeloma$DEPDC1),
linetype=2) + # add base mean
stat_compare_means(method = "anova", label.y = 1600)+ # Add global annova p-value
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = ".all.") # Pairwise comparison against all
8. generate sub figure
p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
color = "supp",
add = "point",
facet.by = "dose", short.panel.labs = FALSE)
# Use only p.format as label. Remove method name.
p + stat_compare_means(label = "p.format")
# Or use significance symbol as label
p + stat_compare_means(label = "p.signif", label.x = 1.5)
put all figure in one
p <- ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "supp", palette = "jco",
add = "jitter")
p + stat_compare_means(aes(group = supp))
# only p value
p + stat_compare_means(aes(group = supp), label = "p.format")
9. Other figure
# barplot add mean_se
ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se")+
stat_compare_means() + # Global p-value
stat_compare_means(ref.group = "0.5", label = "p.signif",
label.y = c(22, 29)) # compare to ref.group
ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se")+
stat_compare_means() + # Global p-value
stat_compare_means(ref.group = "0.5", label = "p.signif",
label.y = c(22, 29))
ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se",
color = "supp", palette = "jco",
position = position_dodge(0.8))+
stat_compare_means(aes(group = supp), label = "p.signif", label.y = 29)
ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se",
color = "supp", palette = "jco")+
stat_compare_means(aes(group = supp), label = "p.signif",
label.y = c(16, 25, 29))
SessionInfo
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 ggpubr_0.1.8 magrittr_1.5 forcats_0.3.0
## [5] stringr_1.3.1 dplyr_0.7.7 purrr_0.2.5 readr_1.1.1
## [9] tidyr_0.8.1 tibble_1.4.2 ggplot2_3.0.0 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 xfun_0.3 haven_1.1.2 lattice_0.20-35
## [5] colorspace_1.3-2 htmltools_0.3.6 yaml_2.2.0 utf8_1.1.4
## [9] rlang_0.2.2 pillar_1.3.0 glue_1.3.0 withr_2.1.2
## [13] modelr_0.1.2 readxl_1.1.0 bindr_0.1.1 plyr_1.8.4
## [17] ggsignif_0.4.0 munsell_0.5.0 blogdown_0.8 gtable_0.2.0
## [21] cellranger_1.1.0 ggsci_2.9 rvest_0.3.2 evaluate_0.12
## [25] labeling_0.3 knitr_1.20 fansi_0.4.0 broom_0.5.0
## [29] Rcpp_0.12.19 scales_1.0.0 backports_1.1.2 jsonlite_1.5
## [33] hms_0.4.2 digest_0.6.18 stringi_1.2.4 bookdown_0.7
## [37] grid_3.5.0 rprojroot_1.3-2 cli_1.0.1 tools_3.5.0
## [41] lazyeval_0.2.1 crayon_1.3.4 pkgconfig_2.0.2 xml2_1.2.0
## [45] lubridate_1.7.4 assertthat_0.2.0 rmarkdown_1.10 httr_1.3.1
## [49] rstudioapi_0.8 R6_2.3.0 nlme_3.1-137 compiler_3.5.0
Last modified on 2018-06-07