Part 1 heatmap.2
1. get the data
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer)
data <- read.table(header = T, text = "
,var1,var2,var3,var4
measurement1,0.094,0.668,0.4153,0.4613
measurement2,0.1138,-0.3847,0.2671,0.1529
measurement3,0.1893,0.3303,0.5821,0.2632
measurement4,-0.0102,-0.4259,-0.5967,0.18
measurement5,0.1587,0.2948,0.153,-0.2208
measurement6,-0.4558,0.2244,0.6619,0.0457
measurement7,-0.6241,-0.3119,0.3642,0.2003
measurement8,-0.227,0.499,0.3067,0.3289
measurement9,0.7365,-0.0872,-0.069,-0.4252
measurement10,0.9761,0.4355,0.8663,0.8107", fill = T, sep =",", row.names = 1)
data
## var1 var2 var3 var4
## measurement1 0.0940 0.6680 0.4153 0.4613
## measurement2 0.1138 -0.3847 0.2671 0.1529
## measurement3 0.1893 0.3303 0.5821 0.2632
## measurement4 -0.0102 -0.4259 -0.5967 0.1800
## measurement5 0.1587 0.2948 0.1530 -0.2208
## measurement6 -0.4558 0.2244 0.6619 0.0457
## measurement7 -0.6241 -0.3119 0.3642 0.2003
## measurement8 -0.2270 0.4990 0.3067 0.3289
## measurement9 0.7365 -0.0872 -0.0690 -0.4252
## measurement10 0.9761 0.4355 0.8663 0.8107
transform the data to matrix
mat_data <- data.matrix(data)
mat_data
## var1 var2 var3 var4
## measurement1 0.0940 0.6680 0.4153 0.4613
## measurement2 0.1138 -0.3847 0.2671 0.1529
## measurement3 0.1893 0.3303 0.5821 0.2632
## measurement4 -0.0102 -0.4259 -0.5967 0.1800
## measurement5 0.1587 0.2948 0.1530 -0.2208
## measurement6 -0.4558 0.2244 0.6619 0.0457
## measurement7 -0.6241 -0.3119 0.3642 0.2003
## measurement8 -0.2270 0.4990 0.3067 0.3289
## measurement9 0.7365 -0.0872 -0.0690 -0.4252
## measurement10 0.9761 0.4355 0.8663 0.8107
str(mat_data)
## num [1:10, 1:4] 0.094 0.1138 0.1893 -0.0102 0.1587 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:10] "measurement1" "measurement2" "measurement3" "measurement4" ...
## ..$ : chr [1:4] "var1" "var2" "var3" "var4"
rownames(mat_data)
## [1] "measurement1" "measurement2" "measurement3" "measurement4"
## [5] "measurement5" "measurement6" "measurement7" "measurement8"
## [9] "measurement9" "measurement10"
2. choose color palettes and breaks
my_palette <- colorRampPalette(c("red", "yellow", "green"))(n=299) # n =299 define how many individuals colors used, the higher of the number of individual colors, the smoother the transitin will be
col_breaks = c(seq(-1,0,length = 100), # for red
seq(0.01,0.8, length = 100), # for yellow
seq(0.81, 1, length = 100) # for green
)
3. plootting the heat map
heatmap.2(mat_data,
cellnote = mat_data, # same data set for cell labels
main = "correlation", # title
notecol = "black", # change the font color of cell labels
notecex = 0.7, # change the font size
cexCol = 1, srtCol=45, # change the column labels size
cexRow = 0.7, srtRow = 45, # change the row labels size
density.info = "none", # turn off density info in the color legend
trace = "none", # turn off the trace inside
#margins = c(3,7), # widens margins
col = my_palette, # use my_palette as color
breaks = col_breaks, # use color transition at specified limits
dendrogram = "row", # only draw the row dendrogram
Colv = "NA", # turn off the column clustering
# add row side bar
RowSideColors = c( # grouping row-variables into different
rep("gray", 3), # categories, Measurement 1-3: green
rep("blue", 3), # Measurement 4-6: blue
rep("black", 4)) # Measurement 7-10: red
)
# add the color legend
par(lend = 1) # square line ends for the color legend
legend("topright", # location of the legend on the heatmap plot
legend = c("category1", "category2", "category3"), # category labels
col = c("gray", "blue", "black"), # color key
lty= 1, # line style
lwd = 10, # line width
cex = 0.7) # text size
Part 2 pheatmap
random data
set.seed(42)
random_sting <- function(n){
substr(paste(sample(letters), collapse = ""), 1, n)
}
mat <- matrix(rgamma(1000, shape = 1)*5, ncol = 50)
colnames(mat) <- paste(
rep(1:3, each = ncol(mat) / 3),
replicate(ncol(mat), random_sting(5)),
sep = ""
)
rownames(mat) <- replicate(nrow(mat), random_sting(3))
# see the data
mat[1:5,1:5]
## 1tshyr 1kxdlq 1nmrwk 1vjqot 1mwjly
## rcs 9.6964789 9.1728114 2.827695 0.3945351 8.0549350
## elh 0.9020955 15.5758530 4.328376 2.0908362 34.3081971
## dxc 2.6721643 3.1270386 1.765077 0.3404244 2.3428120
## nwd 0.1198261 0.3569485 4.980206 1.7912319 2.4935602
## fji 2.1388712 4.6040106 9.897896 0.1263967 0.3518315
split the data into 3 groups
col_groups <- substr(colnames(mat), 1, 1)
table(col_groups)
## col_groups
## 1 2 3
## 18 16 16
increase the data
mat[,col_groups == "1"] <- mat[, col_groups == "1"]*5
see the data distribution
library(ggplot2)
#Set the theme for all the following plots.
theme_set(theme_bw(base_size = 16))
dat <- data.frame(values = as.numeric(mat))
ggplot(dat, aes(values)) + geom_density(bw = "SJ")
## make the heatmap
library(pheatmap)
library(RColorBrewer) # color palette
library(viridis) # color palette
## Loading required package: viridisLite
# Data frame with column annotations
mat_col <- data.frame(group = col_groups)
rownames(mat_col) <- colnames(mat)
# list with colors for each annotation
mat_colors <- list(group = brewer.pal(3, "Set1"))
names(mat_colors$group) <- unique(col_groups)
pheatmap(
mat = mat,
color = inferno(10), # inferno can be used to get the color
border_color = NA, #color for cell borders
cluster_rows = T,
cluster_cols = T,
show_colnames = F,
show_rownames = F,
annotation_col = mat_col,
annotation_colors = mat_colors,
drop_levels = T, # unused levels are also shown in the legend
fontsize = 14,
main = "Default Heatmap"
)
see the breaks
quantile_breaks <- function(xs, n = 10){
breaks <- quantile(xs, probs = seq(0, 1, length.out = n)) # produce the sample quantiles corresponding to the given probabilities
breaks[!duplicated(breaks)]
}
mat_breaks <- quantile_breaks(mat, n = 11)
# see the new break
mat_breaks
## 0% 10% 20% 30% 40%
## 1.699323e-03 7.035943e-01 1.594124e+00 2.489215e+00 3.777056e+00
## 50% 60% 70% 80% 90%
## 5.095493e+00 7.017412e+00 9.508040e+00 1.445645e+01 2.794179e+01
## 100%
## 1.715410e+02
pheatmap(
mat = mat,
color = inferno(length(mat_breaks) - 1),
breaks = mat_breaks,
border_color = NA,
show_colnames = FALSE,
show_rownames = FALSE,
annotation_col = mat_col,
annotation_colors = mat_colors,
drop_levels = TRUE,
fontsize = 14,
main = "Quantile Color Scale"
)
Sorting the dendrograms
mat_cluster_cols <- hclust(dist(t(mat)))
plot(mat_cluster_cols, main = "Unsorted Dendrogram", xlab = "", sub = "")
flip the branches to sort the dendrogram. The most similar columns will appear clustered toward the left side of the plot. The columns that are more distant from each other will appear clustered toward the right side of the plot.
library(dendsort)
sort_hclust <- function(...) as.hclust(dendsort(as.dendrogram(...)))
mat_cluster_cols <- sort_hclust(mat_cluster_cols)
plot(mat_cluster_cols, main = "Sorted Dendrogram", xlab = "", sub = "")
get the new heatmap
mat_cluster_rows <- sort_hclust(hclust(dist(mat)))
pheatmap(
mat = mat,
color = inferno(length(mat_breaks) - 1),
breaks = mat_breaks,
border_color = NA,
cluster_cols = mat_cluster_cols,
cluster_rows = mat_cluster_rows,
show_colnames = FALSE,
show_rownames = FALSE,
annotation_col = mat_col,
annotation_colors = mat_colors,
drop_levels = TRUE,
fontsize = 14,
main = "Sorted Dendrograms"
)
Last modified on 2018-07-17