Heatmap.2 and pheatmap in R practice

Liang / 2018-07-17


Part 1 heatmap.2

1. get the data

library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(RColorBrewer)

data <- read.table(header = T, text = "
                   ,var1,var2,var3,var4
measurement1,0.094,0.668,0.4153,0.4613
measurement2,0.1138,-0.3847,0.2671,0.1529
measurement3,0.1893,0.3303,0.5821,0.2632
measurement4,-0.0102,-0.4259,-0.5967,0.18
measurement5,0.1587,0.2948,0.153,-0.2208
measurement6,-0.4558,0.2244,0.6619,0.0457
measurement7,-0.6241,-0.3119,0.3642,0.2003
measurement8,-0.227,0.499,0.3067,0.3289
measurement9,0.7365,-0.0872,-0.069,-0.4252
measurement10,0.9761,0.4355,0.8663,0.8107", fill = T, sep =",", row.names = 1)
data
##                  var1    var2    var3    var4
## measurement1   0.0940  0.6680  0.4153  0.4613
## measurement2   0.1138 -0.3847  0.2671  0.1529
## measurement3   0.1893  0.3303  0.5821  0.2632
## measurement4  -0.0102 -0.4259 -0.5967  0.1800
## measurement5   0.1587  0.2948  0.1530 -0.2208
## measurement6  -0.4558  0.2244  0.6619  0.0457
## measurement7  -0.6241 -0.3119  0.3642  0.2003
## measurement8  -0.2270  0.4990  0.3067  0.3289
## measurement9   0.7365 -0.0872 -0.0690 -0.4252
## measurement10  0.9761  0.4355  0.8663  0.8107

transform the data to matrix

mat_data <- data.matrix(data)
mat_data
##                  var1    var2    var3    var4
## measurement1   0.0940  0.6680  0.4153  0.4613
## measurement2   0.1138 -0.3847  0.2671  0.1529
## measurement3   0.1893  0.3303  0.5821  0.2632
## measurement4  -0.0102 -0.4259 -0.5967  0.1800
## measurement5   0.1587  0.2948  0.1530 -0.2208
## measurement6  -0.4558  0.2244  0.6619  0.0457
## measurement7  -0.6241 -0.3119  0.3642  0.2003
## measurement8  -0.2270  0.4990  0.3067  0.3289
## measurement9   0.7365 -0.0872 -0.0690 -0.4252
## measurement10  0.9761  0.4355  0.8663  0.8107
str(mat_data)
##  num [1:10, 1:4] 0.094 0.1138 0.1893 -0.0102 0.1587 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:10] "measurement1" "measurement2" "measurement3" "measurement4" ...
##   ..$ : chr [1:4] "var1" "var2" "var3" "var4"
rownames(mat_data)
##  [1] "measurement1"  "measurement2"  "measurement3"  "measurement4" 
##  [5] "measurement5"  "measurement6"  "measurement7"  "measurement8" 
##  [9] "measurement9"  "measurement10"

2. choose color palettes and breaks

my_palette <- colorRampPalette(c("red", "yellow", "green"))(n=299) # n =299 define how many individuals colors used, the higher of the number of individual colors, the smoother the transitin will be
col_breaks = c(seq(-1,0,length = 100), # for red
               seq(0.01,0.8, length = 100), # for yellow
               seq(0.81, 1, length = 100) # for green
               )

3. plootting the heat map

heatmap.2(mat_data,
          cellnote = mat_data, # same data set for cell labels
          main = "correlation", # title
          notecol = "black", # change the font color of cell labels
          notecex = 0.7, # change the font size
          cexCol = 1, srtCol=45, # change the column labels size 
          cexRow = 0.7, srtRow = 45, # change the row labels size
          density.info = "none", # turn off density info in the color legend
          trace = "none", # turn off the trace inside
          #margins = c(3,7), # widens margins 
          col = my_palette, # use my_palette as color
          breaks = col_breaks, # use color transition at specified limits
          dendrogram = "row", # only draw the row dendrogram
          Colv = "NA", # turn off the column clustering
          
          # add row side bar
          RowSideColors = c(    # grouping row-variables into different
           rep("gray", 3),   # categories, Measurement 1-3: green
           rep("blue", 3),    # Measurement 4-6: blue
           rep("black", 4))    # Measurement 7-10: red
          )

# add the color legend
par(lend = 1)           # square line ends for the color legend
legend("topright",      # location of the legend on the heatmap plot
    legend = c("category1", "category2", "category3"), # category labels
    col = c("gray", "blue", "black"),  # color key
    lty= 1,             # line style
    lwd = 10,           # line width
    cex = 0.7)          # text size  

Part 2 pheatmap

random data

set.seed(42)
random_sting <- function(n){
  substr(paste(sample(letters), collapse = ""), 1, n)
}

mat <- matrix(rgamma(1000, shape = 1)*5, ncol = 50)

colnames(mat) <- paste(
  rep(1:3, each = ncol(mat) / 3), 
  replicate(ncol(mat), random_sting(5)),
  sep = ""
)

rownames(mat) <- replicate(nrow(mat), random_sting(3))

# see the data
mat[1:5,1:5]
##        1tshyr     1kxdlq   1nmrwk    1vjqot     1mwjly
## rcs 9.6964789  9.1728114 2.827695 0.3945351  8.0549350
## elh 0.9020955 15.5758530 4.328376 2.0908362 34.3081971
## dxc 2.6721643  3.1270386 1.765077 0.3404244  2.3428120
## nwd 0.1198261  0.3569485 4.980206 1.7912319  2.4935602
## fji 2.1388712  4.6040106 9.897896 0.1263967  0.3518315

split the data into 3 groups

col_groups <- substr(colnames(mat), 1, 1)
table(col_groups)
## col_groups
##  1  2  3 
## 18 16 16

increase the data

mat[,col_groups == "1"] <- mat[, col_groups == "1"]*5

see the data distribution

library(ggplot2)
#Set the theme for all the following plots.
theme_set(theme_bw(base_size = 16))

dat <- data.frame(values = as.numeric(mat))
ggplot(dat, aes(values)) + geom_density(bw = "SJ")

## make the heatmap

library(pheatmap)
library(RColorBrewer) # color palette
library(viridis) # color palette
## Loading required package: viridisLite
# Data frame with column annotations
mat_col <- data.frame(group = col_groups)
rownames(mat_col) <- colnames(mat)

# list with colors for each annotation
mat_colors <- list(group = brewer.pal(3, "Set1"))
names(mat_colors$group) <- unique(col_groups)

pheatmap(
  mat = mat,
  color = inferno(10), # inferno can be used to get the color
  border_color = NA, #color for cell borders
  cluster_rows = T, 
  cluster_cols = T,
  show_colnames = F,
  show_rownames = F,
  annotation_col = mat_col,
  annotation_colors = mat_colors,
  drop_levels = T, # unused levels are also shown in the legend
  fontsize = 14,
  main = "Default Heatmap"
)

see the breaks

quantile_breaks <- function(xs, n = 10){
  breaks <- quantile(xs, probs = seq(0, 1, length.out = n)) # produce the sample quantiles corresponding to the given probabilities
  breaks[!duplicated(breaks)]
}

mat_breaks <- quantile_breaks(mat, n = 11)

# see the new break
mat_breaks
##           0%          10%          20%          30%          40% 
## 1.699323e-03 7.035943e-01 1.594124e+00 2.489215e+00 3.777056e+00 
##          50%          60%          70%          80%          90% 
## 5.095493e+00 7.017412e+00 9.508040e+00 1.445645e+01 2.794179e+01 
##         100% 
## 1.715410e+02
pheatmap(
  mat               = mat,
  color             = inferno(length(mat_breaks) - 1),
  breaks            = mat_breaks,
  border_color      = NA,
  show_colnames     = FALSE,
  show_rownames     = FALSE,
  annotation_col    = mat_col,
  annotation_colors = mat_colors,
  drop_levels       = TRUE,
  fontsize          = 14,
  main              = "Quantile Color Scale"
)

Sorting the dendrograms

mat_cluster_cols <- hclust(dist(t(mat)))
plot(mat_cluster_cols, main = "Unsorted Dendrogram", xlab = "", sub = "")

flip the branches to sort the dendrogram. The most similar columns will appear clustered toward the left side of the plot. The columns that are more distant from each other will appear clustered toward the right side of the plot.

library(dendsort)

sort_hclust <- function(...) as.hclust(dendsort(as.dendrogram(...)))

mat_cluster_cols <- sort_hclust(mat_cluster_cols)
plot(mat_cluster_cols, main = "Sorted Dendrogram", xlab = "", sub = "")

get the new heatmap

mat_cluster_rows <- sort_hclust(hclust(dist(mat)))
pheatmap(
  mat               = mat,
  color             = inferno(length(mat_breaks) - 1), 
  breaks            = mat_breaks,
  border_color      = NA,
  cluster_cols      = mat_cluster_cols,
  cluster_rows      = mat_cluster_rows,
  show_colnames     = FALSE,
  show_rownames     = FALSE,
  annotation_col    = mat_col,
  annotation_colors = mat_colors,
  drop_levels       = TRUE,
  fontsize          = 14,
  main              = "Sorted Dendrograms"
)

Last modified on 2018-07-17