##################################################################### # # R code illustrating multicollinearity # by Professor M. Zhu # ##################################################################### # repeat experiments 200 times a <- numeric(200); b<- numeric(200) for (i in 1:200) { # generate some data, with true coefficient for x1 = 5 n <- 50 x1 <- rnorm(n,0,1) y <- 2+5*x1 + rnorm(n,0,1) # Experiment 1 ... # create a second, un-used variable x2 x2 <- rnorm(n,0,1) # fit models with both x1 and x2 m<-lm(y~x1+x2) a[i]<-m$coef[2] # Experiment 2 ... # create a second, un-used, but highly collinear variable x3 x3 <- 3*x1 - 2 + rnorm(n,0,0.25) # fit model with both x1 and x3 m<-lm(y~x1+x3) b[i]<-m$coef[2] } # look at estimated coefficient for x1 over 200 repetitions par(mfrow=c(2,1)) hi<-max(c(a,b))+1 lo<-min(c(a,b))-1 hist(a,main='Experiment 1', xlab='', ylab='', xlim=c(lo,hi), border=F, col='dark gray') abline(v=5, col='red', lwd=2) hist(b,main='Experiment 2', xlab='', ylab='', xlim=c(lo,hi), border=F, col='dark gray') abline(v=5, col='red', lwd=2)