Practice for 'R for Biologists'

Liang / 2018-07-16


Part 1 Basic

R displays 7 digits by default. You can display more digits with options(). More than 15 digits could be unreliable. This is a global option; remains in effect until further notice

options(digits = 15)
2/3; 2.1^3.1
## [1] 0.666666666666667
## [1] 9.97423999265871

built-in mathematical constants and functions

2*pi
## [1] 6.28318530717959
sin(2*pi)
## [1] -2.44929359829471e-16
# This is e
exp(1)               
## [1] 2.71828182845905
sqrt(2)
## [1] 1.4142135623731

cat() function concatenates strings, inside double quotes, and interpolated values of variables

x= 2.1
cat("x =", x, "\n")
## x = 2.1
cat("root2 =", sqrt(2), "\n", "root3 =", sqrt(3), "\n")
## root2 = 1.4142135623731 
##  root3 = 1.73205080756888

Part 2 Help

# To see a sequence of simple graphics pictures
demo(graphics)        
## 
## 
##  demo(graphics)
##  ---- ~~~~~~~~
## 
## > #  Copyright (C) 1997-2009 The R Core Team
## > 
## > require(datasets)
## 
## > require(grDevices); require(graphics)
## 
## > ## Here is some code which illustrates some of the differences between
## > ## R and S graphics capabilities.  Note that colors are generally specified
## > ## by a character string name (taken from the X11 rgb.txt file) and that line
## > ## textures are given similarly.  The parameter "bg" sets the background
## > ## parameter for the plot and there is also an "fg" parameter which sets
## > ## the foreground color.
## > 
## > 
## > x <- stats::rnorm(50)
## 
## > opar <- par(bg = "white")
## 
## > plot(x, ann = FALSE, type = "n")

## 
## > abline(h = 0, col = gray(.90))
## 
## > lines(x, col = "green4", lty = "dotted")
## 
## > points(x, bg = "limegreen", pch = 21)
## 
## > title(main = "Simple Use of Color In a Plot",
## +       xlab = "Just a Whisper of a Label",
## +       col.main = "blue", col.lab = gray(.8),
## +       cex.main = 1.2, cex.lab = 1.0, font.main = 4, font.lab = 3)
## 
## > ## A little color wheel.    This code just plots equally spaced hues in
## > ## a pie chart.    If you have a cheap SVGA monitor (like me) you will
## > ## probably find that numerically equispaced does not mean visually
## > ## equispaced.  On my display at home, these colors tend to cluster at
## > ## the RGB primaries.  On the other hand on the SGI Indy at work the
## > ## effect is near perfect.
## > 
## > par(bg = "gray")
## 
## > pie(rep(1,24), col = rainbow(24), radius = 0.9)

## 
## > title(main = "A Sample Color Wheel", cex.main = 1.4, font.main = 3)
## 
## > title(xlab = "(Use this as a test of monitor linearity)",
## +       cex.lab = 0.8, font.lab = 3)
## 
## > ## We have already confessed to having these.  This is just showing off X11
## > ## color names (and the example (from the postscript manual) is pretty "cute".
## > 
## > pie.sales <- c(0.12, 0.3, 0.26, 0.16, 0.04, 0.12)
## 
## > names(pie.sales) <- c("Blueberry", "Cherry",
## +              "Apple", "Boston Cream", "Other", "Vanilla Cream")
## 
## > pie(pie.sales,
## +     col = c("purple","violetred1","green3","cornsilk","cyan","white"))

## 
## > title(main = "January Pie Sales", cex.main = 1.8, font.main = 1)
## 
## > title(xlab = "(Don't try this at home kids)", cex.lab = 0.8, font.lab = 3)
## 
## > ## Boxplots:  I couldn't resist the capability for filling the "box".
## > ## The use of color seems like a useful addition, it focuses attention
## > ## on the central bulk of the data.
## > 
## > par(bg="cornsilk")
## 
## > n <- 10
## 
## > g <- gl(n, 100, n*100)
## 
## > x <- rnorm(n*100) + sqrt(as.numeric(g))
## 
## > boxplot(split(x,g), col="lavender", notch=TRUE)

## 
## > title(main="Notched Boxplots", xlab="Group", font.main=4, font.lab=1)
## 
## > ## An example showing how to fill between curves.
## > 
## > par(bg="white")
## 
## > n <- 100
## 
## > x <- c(0,cumsum(rnorm(n)))
## 
## > y <- c(0,cumsum(rnorm(n)))
## 
## > xx <- c(0:n, n:0)
## 
## > yy <- c(x, rev(y))
## 
## > plot(xx, yy, type="n", xlab="Time", ylab="Distance")

## 
## > polygon(xx, yy, col="gray")
## 
## > title("Distance Between Brownian Motions")
## 
## > ## Colored plot margins, axis labels and titles.    You do need to be
## > ## careful with these kinds of effects.    It's easy to go completely
## > ## over the top and you can end up with your lunch all over the keyboard.
## > ## On the other hand, my market research clients love it.
## > 
## > x <- c(0.00, 0.40, 0.86, 0.85, 0.69, 0.48, 0.54, 1.09, 1.11, 1.73, 2.05, 2.02)
## 
## > par(bg="lightgray")
## 
## > plot(x, type="n", axes=FALSE, ann=FALSE)

## 
## > usr <- par("usr")
## 
## > rect(usr[1], usr[3], usr[2], usr[4], col="cornsilk", border="black")
## 
## > lines(x, col="blue")
## 
## > points(x, pch=21, bg="lightcyan", cex=1.25)
## 
## > axis(2, col.axis="blue", las=1)
## 
## > axis(1, at=1:12, lab=month.abb, col.axis="blue")
## 
## > box()
## 
## > title(main= "The Level of Interest in R", font.main=4, col.main="red")
## 
## > title(xlab= "1996", col.lab="red")
## 
## > ## A filled histogram, showing how to change the font used for the
## > ## main title without changing the other annotation.
## > 
## > par(bg="cornsilk")
## 
## > x <- rnorm(1000)
## 
## > hist(x, xlim=range(-4, 4, x), col="lavender", main="")

## 
## > title(main="1000 Normal Random Variates", font.main=3)
## 
## > ## A scatterplot matrix
## > ## The good old Iris data (yet again)
## > 
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", font.main=4, pch=19)

## 
## > pairs(iris[1:4], main="Edgar Anderson's Iris Data", pch=21,
## +       bg = c("red", "green3", "blue")[unclass(iris$Species)])

## 
## > ## Contour plotting
## > ## This produces a topographic map of one of Auckland's many volcanic "peaks".
## > 
## > x <- 10*1:nrow(volcano)
## 
## > y <- 10*1:ncol(volcano)
## 
## > lev <- pretty(range(volcano), 10)
## 
## > par(bg = "lightcyan")
## 
## > pin <- par("pin")
## 
## > xdelta <- diff(range(x))
## 
## > ydelta <- diff(range(y))
## 
## > xscale <- pin[1]/xdelta
## 
## > yscale <- pin[2]/ydelta
## 
## > scale <- min(xscale, yscale)
## 
## > xadd <- 0.5*(pin[1]/scale - xdelta)
## 
## > yadd <- 0.5*(pin[2]/scale - ydelta)
## 
## > plot(numeric(0), numeric(0),
## +      xlim = range(x)+c(-1,1)*xadd, ylim = range(y)+c(-1,1)*yadd,
## +      type = "n", ann = FALSE)

## 
## > usr <- par("usr")
## 
## > rect(usr[1], usr[3], usr[2], usr[4], col="green3")
## 
## > contour(x, y, volcano, levels = lev, col="yellow", lty="solid", add=TRUE)
## 
## > box()
## 
## > title("A Topographic Map of Maunga Whau", font= 4)
## 
## > title(xlab = "Meters North", ylab = "Meters West", font= 3)
## 
## > mtext("10 Meter Contour Spacing", side=3, line=0.35, outer=FALSE,
## +       at = mean(par("usr")[1:2]), cex=0.7, font=3)
## 
## > ## Conditioning plots
## > 
## > par(bg="cornsilk")
## 
## > coplot(lat ~ long | depth, data = quakes, pch = 21, bg = "green3")

## 
## > par(opar)
# To see a sequence of 3D graphics
#demo(persp)           
 
# To list available demonstrations in base package of R
#demo() 
# To list all arithmetic operators in R
help(Arithmetic)      
 
# To get help on the log function
help(log)             
 
# To display R help on the plot() function
help(plot)            
 
# Same as help(plot)
?plot                 
 
# To see example of the usage of plot function
example(plot)         
## 
## plot> require(stats) # for lowess, rpois, rnorm
## 
## plot> plot(cars)

## 
## plot> lines(lowess(cars))
## 
## plot> plot(sin, -pi, 2*pi) # see ?plot.function

## 
## plot> ## Discrete Distribution Plot:
## plot> plot(table(rpois(100, 5)), type = "h", col = "red", lwd = 10,
## plot+      main = "rpois(100, lambda = 5)")

## 
## plot> ## Simple quantiles/ECDF, see ecdf() {library(stats)} for a better one:
## plot> plot(x <- sort(rnorm(47)), type = "s", main = "plot(x, type = \"s\")")

## 
## plot> points(x, cex = .5, col = "dark red")
# To list all R commands containing the string plot; could be too much info
help.search("plot")   
 
# Same as help.search("plot")
??plot   

: operator has higher predecence than the arithmetical operations.

sequence_1 = 0 : 10 - 2
print(sequence_1)
##  [1] -2 -1  0  1  2  3  4  5  6  7  8
sequence_2 = 0 : (10 - 2)
print(sequence_2)
## [1] 0 1 2 3 4 5 6 7 8
x = seq(from = 0, to = 10, by = 0.5)
square_root_x = sqrt(x)
 
plot(x, square_root_x)

Part 3 Graphics

x = seq(from =0, to =10, by=0.5)
square_root_x = sqrt(x)
plot(x, square_root_x)

# To pause between plots
par(ask = TRUE) 

# To add title
plot(x, square_root_x, main = "My First Plot")                         

# To add label to y-axis
plot(x, square_root_x, main = "My First Plot", ylab = "sqrt(x)")        

# To set limits of y-axis
plot(x, square_root_x, main = "My First Plot", ylab = "sqrt(x)", ylim = c(0, 10))  

# add color
plot(x, square_root_x, main = "my fisrt plot", ylab = "sqrt(x)", ylim = c(0,10), col = "blue")

# To over strike with both plotting characters, pch, and connecting lines
plot(x, square_root_x, main = "My First Plot", ylab = "sqrt(x)", ylim = c(0, 10),col = "blue", type = "o")  

# To set the line type
plot(x, square_root_x, main = "My First Plot", ylab = "sqrt(x)", ylim = c(0, 10), col = "blue", type = "o", lty = "dotted") 

# To set plotting character 
plot(x, square_root_x, main = "My First Plot", ylab = "sqrt(x)", ylim = c(0, 10), col = "blue", type = "o", lty = "dotted", pch = 22)   

plot() is a high-level plotting function that opens a new plotting window, using low-level functions, one can add to the open plot,some of the commonly used adding functions are: points(), lines(), abline(), legend(), text()

t = seq(from = 0, to = 10, by = 0.5)
        
# To pause between plots
#par(ask = TRUE)                                           
    
plot(t, sqrt(2 * t), main = "Adding to Plot", sub = "A function and its inverse", xlab = "", ylab = "", xlim = c(0, 10), ylim = c(0, 10), pch = 16, type = "o") 
 
# To add the inverse function
points(t, 0.5 * t * t, col = "red" , pch = 16, type = "o")    
 
# To add the line with intercept = 0, slope = 1
abline(0, 1, col = "gray", lwd = 3, lty = "dashed")      
 
# To add text
text(7, 7.3, "45-degree line", srt = 45)                 
legend("bottomright", legend = c("funtion", "inverse"), pch = 16, col = c("black", "red"))
 
# One can add graphs of mathematical functions using, for example:
x = seq(from =0, to =10, by=0.5)
lines(curve(sin(x) + cos(x) + 5,  add = TRUE))

curve() is also a high-level mathematical plotting function that opens a new plotting window, Variable in the mathematical formula must be x, add more graphs by setting add = TRUE

curve(sin(x), xlim = c(-4*pi, 4*pi), ylim = c(-2, 2), col = "red", ylab = "")
 
# To plot horizontal line using h
abline(h=0, lty= "dotted")    
 
# To mark the origin      
points(0, 0, pch = 3)                
 
curve(cos(x),  col = "blue", add = TRUE) 
curve(sin(x) + cos(x),  col = "purple", lwd = 3, add = TRUE)
 
title("sin(x) + cos(x)")
 
legend("bottomright", legend = c("sin(x)", "cos(x)", "sin(x) + cos(x)"), 
        lty = 1, col = c("red", "blue", "purple"))

par() command can set the multi plot environment. mfcol = c(nr,nc) partitions the graphic window as a matrix of nr rows and nc columns, the plots are then drawn in columns. mfrow = c(nr,nc) partitions the graphic window as a matrix of nr rows and nc columns, the plots are then drawn in rows. You can get fancy partitions of the plotting window with layout() function

# Prepare for 2x2 plots, to be filled by rows
par(mfrow = c(2, 2))          
 
t = seq(0, 10, 0.2)
plot(t, sin(t)) 
curve(sinh(x), -5, 5)
curve(tan(x), n = 500, -5, 5)
curve(round(x), n = 500, -5, 5) 

# Return to single plot
par(mfrow = c(1, 1))          
 
curve(sin(x), -5, 5) 

Part 4 Curve Fitting

Linear Model function lm(y~x) computes the least square fit line to data points; y as a function of x. Type ?formula for help with legal formulas. lm() can also be used to fit nonlinear models where the parameters enter into model linearly

# fit a line to two data points
x2 = c(1,2)
y2 = c(1.5,2.5)
plot(x2,y2, main = "Unique line through 2 points", xlim = c(0,4), ylim = c(0, 4))

# compute the least square line 

ls_fit_line = lm(y2~x2)
print(ls_fit_line)
## 
## Call:
## lm(formula = y2 ~ x2)
## 
## Coefficients:
## (Intercept)           x2  
##         0.5          1.0
# plot the least square line stored in ls_fit_line
abline(ls_fit_line, col = "red")

# plot the vector contain the intercept and slope 
print(coef(ls_fit_line))
## (Intercept)          x2 
##         0.5         1.0
# more information
print(summary(ls_fit_line))
## 
## Call:
## lm(formula = y2 ~ x2)
## 
## Residuals:
## ALL 2 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)      0.5         NA      NA       NA
## x2               1.0         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:              1,  Adjusted R-squared:            NaN 
## F-statistic:           NaN on 1 and 0 DF,  p-value: NA
# Compute least-square line fit for 3 data points
x3 = c(1, 2, 3)
y3 = c(1.5, 2.5, 2.8)
 
plot(x3, y3, main="Best-fit line through 3 points", xlim=c(0, 4), ylim = c(0, 4)) 
fit3 = lm(y3 ~ x3)
abline(fit3, col = "red")

print(fit3)
## 
## Call:
## lm(formula = y3 ~ x3)
## 
## Coefficients:
##    (Intercept)              x3  
## 0.966666666667  0.650000000000
print(summary(fit3))
## 
## Call:
## lm(formula = y3 ~ x3)
## 
## Residuals:
##               1               2               3 
## -0.116666666667  0.233333333333 -0.116666666667 
## 
## Coefficients:
##                   Estimate     Std. Error t value Pr(>|t|)
## (Intercept) 0.966666666667 0.436526695124 2.21445  0.27003
## x3          0.650000000000 0.202072594216 3.21667  0.19188
## 
## Residual standard error: 0.285773803325 on 1 degrees of freedom
## Multiple R-squared:  0.911870503597, Adjusted R-squared:  0.823741007194 
## F-statistic: 10.3469387755 on 1 and 1 DF,  p-value: 0.191883024573
# Compute the parabola through 3 data points
 
plot(x3, y3, main="Unique parabola through 3 points", xlim=c(0, 6), ylim = c(0, 6)) 
fit_parabola3 = lm(y3 ~ x3 + I(x3^2))
print(fit_parabola3 )
## 
## Call:
## lm(formula = y3 ~ x3 + I(x3^2))
## 
## Coefficients:
## (Intercept)           x3      I(x3^2)  
##       -0.20         2.05        -0.35
curve(coef(fit_parabola3)[1] + coef(fit_parabola3)[2]* x + coef(fit_parabola3)[3]*x^2, 0, 6, col = "red", add=TRUE)

# Compute a best-fit parabola for 4 data points
 
x4 = c(1, 2, 3, 4)
y4 = c(1.5, 2.5, 2.8, 1.7)
 
plot(x4, y4, main="Best-fit parabola through 4 points", xlim=c(0, 6), ylim = c(0, 6)) 
fit_parabola4 = lm(y4 ~ x4 + I(x4^2))
curve(coef(fit_parabola4)[1] + coef(fit_parabola4)[2]* x + coef(fit_parabola4)[3]*x^2, 0, 6, col = "red", add=TRUE)
 
# Add the predicted points
points(x4, predict(fit_parabola4), col = "green") 

print(fit_parabola4)
## 
## Call:
## lm(formula = y4 ~ x4 + I(x4^2))
## 
## Coefficients:
## (Intercept)           x4      I(x4^2)  
##      -0.725        2.715       -0.525

Last modified on 2018-07-16