Example R programs and commands
15. Covariance and correlation
# All lines preceded by the "#" character are my comments.
# All other left-justified lines are my input.
# All other indented lines are the R program output.
# Generate some normally distributed random points with independent x,y
# coordinates:
x <- rnorm(50,0,1) # 50 copies of N(0,1)
y <- rnorm(50,0,10) # 50 copies of N(0,10)
xy <- cbind(x,y) # 50 coordinate pairs (x,y) with independent x,y
# Rotation of coordinate makes them dependent:
rot <- matrix(c(1,-1,1,1),nrow=2,ncol=2)
xy0 <- xy %*% rot # rotate by 45 degrees
# Compute the covariance matrix Sigma^2:
cov(xy) # independent x,y
cov(xy0) # dependent x,y
# Correlation matrix is the normalized covariance matrix:
cor(xy) # independent x,y
cor(xy0) # dependent x,y
# The correlation coefficient is in both off-diagonal positions:
cor(xy)[1,2] # should be close to 0 due to independence
cor(xy)[2,1] # should be the same as the previous result
cor(xy0)[1,2] # should be close to 1 (or -1) due to strong dependence
cor(xy0)[2,1] # should be the same as the previous result
# 95% confidence interval for the correlation coefficient:
cor.test(x,y) # independent x,y
cor.test(xy0[,1],xy0[,2]) # dependent x,y columns extracted from xy0 matrix
# Coefficient of determination is the squared correlation coefficient:
cor(xy)[1,2]*cor(xy)[1,2] # should be close to 0 due to independence
cor(xy0)[1,2]*cor(xy0)[1,2] # should be close to 1 due to strong dependence
# Use R's cov() function to compute the covariance matrix in Example 13:
# Read 17 samples from a bivariate normal density:
# x1 x2 i (of 17 samples)
#---------- ---------- --
data <- scan()
1.033024 0.418857 1
0.322703 0.613913 2
-1.949918 0.505329 3
-3.910608 0.081451 4
-1.163915 0.904262 5
-4.944068 -0.086801 6
0.936824 0.889408 7
-2.697000 0.185941 8
-1.484696 0.318464 9
-2.969133 -0.019170 10
-0.565761 0.682739 11
0.314755 0.010014 12
3.181993 0.862641 13
-0.816990 0.440558 14
-2.053569 0.444235 15
-0.311087 0.797796 16
-1.131430 0.522798 17
# Impose structure: 3 rows (x1, x2, i) with 17 columns of data triplets.
x123 <- matrix(data, nrow=3) # ncol is computed; x123 is a 3x17 matrix
# NOTE: R fills columns of matrices first, in the top-to-bottom then
# left-to-right order. Thus the data is stored as the transpose of the table
# Extract the first two rows, omitting the third (index i), and transpose:
x12t <- t(x123[1:2,])
# Use R to compute the covariance matrix:
cov(x12t) # NOTE: cov() uses DF = 16 = 17-1 in the denominator
# Get the matrix A in the density formula rho(x)= sqrt(det A) exp(-pi x'Ax):
A <- solve(2*pi*cov(x12t))
# Correlation matrix is the normalized covariance matrix:
cor(x12t)
# The correlation coefficient is in both off-diagonal positions:
cc12 <- cor(x12t)[1,2]; cc12
cc21 <- cor(x12t)[2,1]; cc21 # Theorem: cc12 == cc21
# Coefficient of determination is the squared correlation coefficient:
cd <- cc12*cc21; cd # Theorem: cc12*cc21 == cc21^2 == cc21^2
# Alternatively, extract the vectors and use lm(x1~x2) to compute the
# covariance matrix, correlation matrix, correlation coefficient,
# and coefficient of determination (called "Multiple R-squared" in R):
x1 <- x123[1,]
x2 <- x123[2,]
cov(x1,x2) # alternative R method for covariance matrix given 2 vectors
cor(x1,x2) # alternative R method for correlation matrix given 2 vectors
cor(x1,x2)[1,2] # alt. R method for correlation coefficient of 2 vectors
cor.test(x1,x2) # 95% confidence interval for the correlation coefficient
cor(x1,x2)[1,2]*cor(x1,x2)[2,1] # coefficient of determination directly
summary(lm(x1~x2)) # computes coefficient of determination from regression
summary(lm(x2~x1)) # computes the same coefficient of determination
# Spearman rank correlation
data<-scan()
44 66
58 81
46 70
31 28
81 105
21 13
70 108
35 38
42 61
22 12
x<- data[1+2*(0:9)]; # first column
y<- data[2+2*(0:9)]; # second column
cor.test(x,y, method="spearman")