Example R programs and commands 15. Covariance and correlation # All lines preceded by the "#" character are my comments. # All other left-justified lines are my input. # All other indented lines are the R program output. # Generate some normally distributed random points with independent x,y # coordinates: x <- rnorm(50,0,1) # 50 copies of N(0,1) y <- rnorm(50,0,10) # 50 copies of N(0,10) xy <- cbind(x,y) # 50 coordinate pairs (x,y) with independent x,y # Rotation of coordinate makes them dependent: rot <- matrix(c(1,-1,1,1),nrow=2,ncol=2) xy0 <- xy %*% rot # rotate by 45 degrees # Compute the covariance matrix Sigma^2: cov(xy) # independent x,y cov(xy0) # dependent x,y # Correlation matrix is the normalized covariance matrix: cor(xy) # independent x,y cor(xy0) # dependent x,y # The correlation coefficient is in both off-diagonal positions: cor(xy)[1,2] # should be close to 0 due to independence cor(xy)[2,1] # should be the same as the previous result cor(xy0)[1,2] # should be close to 1 (or -1) due to strong dependence cor(xy0)[2,1] # should be the same as the previous result # 95% confidence interval for the correlation coefficient: cor.test(x,y) # independent x,y cor.test(xy0[,1],xy0[,2]) # dependent x,y columns extracted from xy0 matrix # Coefficient of determination is the squared correlation coefficient: cor(xy)[1,2]*cor(xy)[1,2] # should be close to 0 due to independence cor(xy0)[1,2]*cor(xy0)[1,2] # should be close to 1 due to strong dependence # Use R's cov() function to compute the covariance matrix in Example 13: # Read 17 samples from a bivariate normal density: # x1 x2 i (of 17 samples) #---------- ---------- -- data <- scan() 1.033024 0.418857 1 0.322703 0.613913 2 -1.949918 0.505329 3 -3.910608 0.081451 4 -1.163915 0.904262 5 -4.944068 -0.086801 6 0.936824 0.889408 7 -2.697000 0.185941 8 -1.484696 0.318464 9 -2.969133 -0.019170 10 -0.565761 0.682739 11 0.314755 0.010014 12 3.181993 0.862641 13 -0.816990 0.440558 14 -2.053569 0.444235 15 -0.311087 0.797796 16 -1.131430 0.522798 17 # Impose structure: 3 rows (x1, x2, i) with 17 columns of data triplets. x123 <- matrix(data, nrow=3) # ncol is computed; x123 is a 3x17 matrix # NOTE: R fills columns of matrices first, in the top-to-bottom then # left-to-right order. Thus the data is stored as the transpose of the table # Extract the first two rows, omitting the third (index i), and transpose: x12t <- t(x123[1:2,]) # Use R to compute the covariance matrix: cov(x12t) # NOTE: cov() uses DF = 16 = 17-1 in the denominator # Get the matrix A in the density formula rho(x)= sqrt(det A) exp(-pi x'Ax): A <- solve(2*pi*cov(x12t)) # Correlation matrix is the normalized covariance matrix: cor(x12t) # The correlation coefficient is in both off-diagonal positions: cc12 <- cor(x12t)[1,2]; cc12 cc21 <- cor(x12t)[2,1]; cc21 # Theorem: cc12 == cc21 # Coefficient of determination is the squared correlation coefficient: cd <- cc12*cc21; cd # Theorem: cc12*cc21 == cc21^2 == cc21^2 # Alternatively, extract the vectors and use lm(x1~x2) to compute the # covariance matrix, correlation matrix, correlation coefficient, # and coefficient of determination (called "Multiple R-squared" in R): x1 <- x123[1,] x2 <- x123[2,] cov(x1,x2) # alternative R method for covariance matrix given 2 vectors cor(x1,x2) # alternative R method for correlation matrix given 2 vectors cor(x1,x2)[1,2] # alt. R method for correlation coefficient of 2 vectors cor.test(x1,x2) # 95% confidence interval for the correlation coefficient cor(x1,x2)[1,2]*cor(x1,x2)[2,1] # coefficient of determination directly summary(lm(x1~x2)) # computes coefficient of determination from regression summary(lm(x2~x1)) # computes the same coefficient of determination # Spearman rank correlation data<-scan() 44 66 58 81 46 70 31 28 81 105 21 13 70 108 35 38 42 61 22 12 x<- data[1+2*(0:9)]; # first column y<- data[2+2*(0:9)]; # second column cor.test(x,y, method="spearman")