# This file contains a artificial neural network analysis of # the Checker data from Will Welch's notes Chapt. 1 # # Authors: # R.W. Oldford, 2004 # # # just a handy function to capture the pathname for the files we will load. web441 <- function(x) {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/', x, sep='') } # (in class we just loaded this file since the # machine wasn't connected to the internet). # # Get the data: source(web441('checker.R')) # plot it x1.range <- range(checker.train[,"x1"]) x2.range <- range(checker.train[,"x2"]) plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) # # Get the neural net library # library(nnet) # # First, save the values of the obeserved "y" s in the training # data (since I will need these for a calculation later) # checker.y <- checker.train[,"y"] # For the neural net code, the response "y" needs to be # turned into a "factor" (of known levels to match the classes) # checker.train[,"y"] <- factor(checker.train[,"y"]) # # Build the neural net (two input variables; one output, y; # one hidden layer of two nodes; penalty based on # weight decay; max iterations = 1000) checker.net <- nnet( y ~ x1 + x2, data = checker.train, size = 2, decay=1.0E-2, maxit = 1000) summary(checker.net) # From this you can actually construct the weights for the neural net # # The weights are stored on checker.net$wts # as is the value of the fitting criterion checker.net$value # fitted values for the training data checker.net$fitted.values # and the residuals checker.net$residuals # which are just (here's where we use the original y's) checker.y - checker.net$fitted.values # The following shows this (checker.y - checker.net$fitted.values) - checker.net$residuals # # Now we see what the net's predictions are on the usual grid: # xgrid <- expand.grid(x1=seq(x1.range[1], x1.range[2], length = 21), x2=seq(x2.range[1], x2.range[2], length = 21)) # Compute the fitted probabilities over this grid so that we can plot them. pHat <- predict(checker.net, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat <- matrix(pHat, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("checker net. Value = ",prettyNum(checker.net$value), " RSS = ", prettyNum(sum(checker.net$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # What happens when we repeat this? # checker.net1 <- nnet( y ~ x1 + x2, data = checker.train, size = 2, decay=1.0E-2, maxit = 1000) summary(checker.net1) # Compute the fitted probabilities over this grid so that we can plot them. pHat1 <- predict(checker.net1, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat1 <- matrix(pHat1, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("checker net 1. Value = ",prettyNum(checker.net1$value), " RSS = ", prettyNum(sum(checker.net1$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat1, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat1, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # And again? # checker.net2 <- nnet( y ~ x1 + x2, data = checker.train, size = 2, decay=1.0E-2, maxit = 1000) summary(checker.net2) # Compute the fitted probabilities over this grid and plot them. pHat2 <- predict(checker.net2, newdata = xgrid, type="raw") pHat2 <- matrix(pHat2, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("checker net 2. Value = ",prettyNum(checker.net2$value), " RSS = ", prettyNum(sum(checker.net2$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat2, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat2, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Each of the above was a local minimum, determined by a random start. # We could fix the start by ensuring that the seed of the random number # generator is identical at each repetition (and hence the same start) set.seed(12345) checker.netfixed <- nnet( y ~ x1 + x2, data = checker.train, size = 2, decay=1.0E-2, maxit = 1000) summary(checker.netfixed) pHatf <- predict(checker.netfixed, newdata = xgrid, type="raw") pHatf <- matrix(pHatf, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("checker net fixed. Value = ",prettyNum(checker.netfixed$value), " RSS = ", prettyNum(sum(checker.netfixed$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHatf, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHatf, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Look at misclassification probabilities # # On the training data # # Checker.net checker.net$value table(checker.train[,"y"], predict(checker.net, newdata= checker.train, type="raw") > 0.5) # checker.net1 checker.net1$value table(checker.train[,"y"], predict(checker.net1, newdata= checker.train, type="raw") > 0.5) # checker.net2 checker.net2$value table(checker.train[,"y"], predict(checker.net2, newdata= checker.train, type="raw") > 0.5) # checker.netfixed checker.netfixed$value table(checker.train[,"y"], predict(checker.netfixed, newdata= checker.train, type="raw") > 0.5) # # On the test data # # Checker.net checker.net$value table(checker.test[,"y"], predict(checker.net, newdata= checker.test, type="raw") > 0.5) # checker.net1 checker.net1$value table(checker.test[,"y"], predict(checker.net1, newdata= checker.test, type="raw") > 0.5) # checker.net2 checker.net2$value table(checker.test[,"y"], predict(checker.net2, newdata= checker.test, type="raw") > 0.5) # checker.netfixed checker.netfixed$value table(checker.test[,"y"], predict(checker.netfixed, newdata= checker.test, type="raw") > 0.5) #################### # # # Build the neural net exactly as before, except that we # add another input variable, the interaction term x1:x2 # nn.int <- nnet( y ~ x1 + x2 + x1:x2, data = checker.train, size = 2, decay=1.0E-2, maxit = 1000) summary(nn.int) # From this you can actually construct the weights for the neural net # Compute the fitted probabilities over this grid so that we can plot them. pHati <- predict(nn.int, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHati <- matrix(pHati, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN int. Value = ",prettyNum(nn.int$value), " RSS = ", prettyNum(sum(nn.int$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHati, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHati, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Repeat this twice # nn.int1 <- nnet( y ~ x1 + x2 + x1:x2, data = checker.train, size = 2, decay=1.0E-2, maxit = 1000) summary(nn.int1) # From this you can actually construct the weights for the neural net # Compute the fitted probabilities over this grid so that we can plot them. pHati1 <- predict(nn.int1, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHati1 <- matrix(pHati1, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN int 1. Value = ",prettyNum(nn.int1$value), " RSS = ", prettyNum(sum(nn.int1$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHati1, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHati1, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Again # nn.int2 <- nnet( y ~ x1 + x2 + x1:x2, data = checker.train, size = 2, decay=1.0E-2, maxit = 1000) summary(nn.int2) # From this you can actually construct the weights for the neural net # Compute the fitted probabilities over this grid so that we can plot them. pHati2 <- predict(nn.int2, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHati2 <- matrix(pHati2, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n" ) title(paste("NN int 2. Value = ",prettyNum(nn.int2$value), " RSS = ", prettyNum(sum(nn.int2$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHati2, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHati2, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # Look at misclassification probabilities # # On the training data # # Checker.net nn.int$value table(checker.train[,"y"], predict(nn.int, newdata= checker.train, type="raw") > 0.5) # checker.net1 nn.int1$value table(checker.train[,"y"], predict(nn.int1, newdata= checker.train, type="raw") > 0.5) # checker.net2 nn.int2$value table(checker.train[,"y"], predict(nn.int2, newdata= checker.train, type="raw") > 0.5) # # On the test data # # Checker.net nn.int$value table(checker.test[,"y"], predict(nn.int, newdata= checker.test, type="raw") > 0.5) # checker.net1 nn.int1$value table(checker.test[,"y"], predict(nn.int1, newdata= checker.test, type="raw") > 0.5) # checker.net2 nn.int2$value table(checker.test[,"y"], predict(nn.int2, newdata= checker.test, type="raw") > 0.5) ##################### # # # Changing the size of the hidden layer # (no interaction) # nn.3sz <- nnet( y ~ x1 + x2 , data = checker.train, size = 3, decay=1.0E-2, maxit = 1000) summary(nn.3sz) # Compute the fitted probabilities over this grid so that we can plot them. pHat3sz <- predict(nn.3sz, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat3sz <- matrix(pHat3sz, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 3sz. Value = ",prettyNum(nn.3sz$value), " RSS = ", prettyNum(sum(nn.3sz$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat3sz, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat3sz, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Repeat this twice # nn.3sz1 <- nnet( y ~ x1 + x2 , data = checker.train, size = 3, decay=1.0E-2, maxit = 1000) summary(nn.3sz1) # Compute the fitted probabilities over this grid so that we can plot them. pHat3sz1 <- predict(nn.3sz1, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat3sz1 <- matrix(pHat3sz1, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 3sz1. Value = ",prettyNum(nn.3sz1$value), " RSS = ", prettyNum(sum(nn.3sz1$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat3sz1, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat3sz1, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Again # nn.3sz2 <- nnet( y ~ x1 + x2 , data = checker.train, size = 3, decay=1.0E-2, maxit = 1000) summary(nn.3sz2) # Compute the fitted probabilities over this grid so that we can plot them. pHat3sz2 <- predict(nn.3sz2, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat3sz2 <- matrix(pHat3sz2, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 3sz2. Value = ",prettyNum(nn.3sz2$value), " RSS = ", prettyNum(sum(nn.3sz2$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat3sz2, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat3sz2, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # Look at misclassification probabilities # # On the training data # # nn.3sz nn.3sz$value table(checker.train[,"y"], predict(nn.3sz, newdata= checker.train, type="raw") > 0.5) # nn.3sz1 nn.3sz1$value table(checker.train[,"y"], predict(nn.3sz1, newdata= checker.train, type="raw") > 0.5) # nn.3sz2 nn.3sz2$value table(checker.train[,"y"], predict(nn.3sz2, newdata= checker.train, type="raw") > 0.5) # # On the test data # # Checker.net nn.3sz$value table(checker.test[,"y"], predict(nn.3sz, newdata= checker.test, type="raw") > 0.5) # checker.net1 nn.3sz1$value table(checker.test[,"y"], predict(nn.3sz1, newdata= checker.test, type="raw") > 0.5) # checker.net2 nn.3sz2$value table(checker.test[,"y"], predict(nn.3sz2, newdata= checker.test, type="raw") > 0.5) ##################### # # # Size of the hidden layer = 4 # (no interaction) # nn.4sz <- nnet( y ~ x1 + x2 , data = checker.train, size = 4, decay=1.0E-2, maxit = 1000) summary(nn.4sz) # Compute the fitted probabilities over this grid so that we can plot them. pHat4sz <- predict(nn.4sz, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat4sz <- matrix(pHat4sz, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 4sz. Value = ",prettyNum(nn.4sz$value), " RSS = ", prettyNum(sum(nn.4sz$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4sz, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4sz, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Repeat this twice # nn.4sz1 <- nnet( y ~ x1 + x2 , data = checker.train, size = 4, decay=1.0E-2, maxit = 1000) summary(nn.4sz1) # Compute the fitted probabilities over this grid so that we can plot them. pHat4sz1 <- predict(nn.4sz1, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat4sz1 <- matrix(pHat4sz1, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 4sz1. Value = ",prettyNum(nn.4sz1$value), " RSS = ", prettyNum(sum(nn.4sz1$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4sz1, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4sz1, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Again # nn.4sz2 <- nnet( y ~ x1 + x2 , data = checker.train, size = 4, decay=1.0E-2, maxit = 1000) summary(nn.4sz2) # Compute the fitted probabilities over this grid so that we can plot them. pHat4sz2 <- predict(nn.4sz2, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat4sz2 <- matrix(pHat4sz2, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 4sz2. Value = ",prettyNum(nn.4sz2$value), " RSS = ", prettyNum(sum(nn.4sz2$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4sz2, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat4sz2, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # Look at misclassification probabilities # # On the training data # # nn.4sz nn.4sz$value table(checker.train[,"y"], predict(nn.4sz, newdata= checker.train, type="raw") > 0.5) # nn.4sz1 nn.4sz1$value table(checker.train[,"y"], predict(nn.4sz1, newdata= checker.train, type="raw") > 0.5) # nn.4sz2 nn.4sz2$value table(checker.train[,"y"], predict(nn.4sz2, newdata= checker.train, type="raw") > 0.5) # # On the test data # # Checker.net nn.4sz$value table(checker.test[,"y"], predict(nn.4sz, newdata= checker.test, type="raw") > 0.5) # checker.net1 nn.4sz1$value table(checker.test[,"y"], predict(nn.4sz1, newdata= checker.test, type="raw") > 0.5) # checker.net2 nn.4sz2$value table(checker.test[,"y"], predict(nn.4sz2, newdata= checker.test, type="raw") > 0.5) ##################### # # # Size of the hidden layer = 5 # (no interaction) # nn.5sz <- nnet( y ~ x1 + x2 , data = checker.train, size = 5, decay=1.0E-2, maxit = 1000) summary(nn.5sz) # Compute the fitted probabilities over this grid so that we can plot them. pHat5sz <- predict(nn.5sz, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat5sz <- matrix(pHat5sz, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 5sz. Value = ",prettyNum(nn.5sz$value), " RSS = ", prettyNum(sum(nn.5sz$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5sz, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5sz, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Repeat this twice # nn.5sz1 <- nnet( y ~ x1 + x2 , data = checker.train, size = 5, decay=1.0E-2, maxit = 1000) summary(nn.5sz1) # Compute the fitted probabilities over this grid so that we can plot them. pHat5sz1 <- predict(nn.5sz1, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat5sz1 <- matrix(pHat5sz1, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 5sz1. Value = ",prettyNum(nn.5sz1$value), " RSS = ", prettyNum(sum(nn.5sz1$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5sz1, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5sz1, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Again # nn.5sz2 <- nnet( y ~ x1 + x2 , data = checker.train, size = 5, decay=1.0E-2, maxit = 1000) summary(nn.5sz2) # Compute the fitted probabilities over this grid so that we can plot them. pHat5sz2 <- predict(nn.5sz2, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat5sz2 <- matrix(pHat5sz2, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 5sz2. Value = ",prettyNum(nn.5sz2$value), " RSS = ", prettyNum(sum(nn.5sz2$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5sz2, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5sz2, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # Look at misclassification probabilities # # On the training data # # nn.5sz nn.5sz$value table(checker.train[,"y"], predict(nn.5sz, newdata= checker.train, type="raw") > 0.5) # nn.5sz1 nn.5sz1$value table(checker.train[,"y"], predict(nn.5sz1, newdata= checker.train, type="raw") > 0.5) # nn.5sz2 nn.5sz2$value table(checker.train[,"y"], predict(nn.5sz2, newdata= checker.train, type="raw") > 0.5) # # On the test data # # Checker.net nn.5sz$value table(checker.test[,"y"], predict(nn.5sz, newdata= checker.test, type="raw") > 0.5) # checker.net1 nn.5sz1$value table(checker.test[,"y"], predict(nn.5sz1, newdata= checker.test, type="raw") > 0.5) # checker.net2 nn.5sz2$value table(checker.test[,"y"], predict(nn.5sz2, newdata= checker.test, type="raw") > 0.5) ##################### # # # Size of the hidden layer = 5 # (with quadratic and interaction terms) # nn.5qsz <- nnet( y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1:x2, data = checker.train, size = 5, decay=1.0E-2, maxit = 1000) summary(nn.5qsz) # Compute the fitted probabilities over this grid so that we can plot them. pHat5qsz <- predict(nn.5qsz, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat5qsz <- matrix(pHat5qsz, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 5qsz. Value = ",prettyNum(nn.5qsz$value), " RSS = ", prettyNum(sum(nn.5qsz$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5qsz, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5qsz, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Repeat this twice # nn.5qsz1 <- nnet( y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1:x2, data = checker.train, size = 5, decay=1.0E-2, maxit = 1000) summary(nn.5qsz1) # Compute the fitted probabilities over this grid so that we can plot them. pHat5qsz1 <- predict(nn.5qsz1, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat5qsz1 <- matrix(pHat5qsz1, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 5qsz1. Value = ",prettyNum(nn.5qsz1$value), " RSS = ", prettyNum(sum(nn.5qsz1$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5qsz1, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5qsz1, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # # Again # nn.5qsz2 <- nnet( y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1:x2, data = checker.train, size = 5, decay=1.0E-2, maxit = 1000) summary(nn.5qsz2) # Compute the fitted probabilities over this grid so that we can plot them. pHat5qsz2 <- predict(nn.5qsz2, newdata = xgrid, type="raw") # Put the predicted probabilities in the grid to be used by the contour # program pHat5qsz2 <- matrix(pHat5qsz2, nrow = 21, ncol = 21, byrow = FALSE) # plot it quartz() plot(0,0,xlim=x1.range, ylim = x2.range, xlab = "x1", ylab = "x2", type ="n") title(paste("NN 5qsz2. Value = ",prettyNum(nn.5qsz2$value), " RSS = ", prettyNum(sum(nn.5qsz2$residuals^2)))) points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0", col="red", cex=1.2) points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+", col="blue", cex=1.2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5qsz2, levels = 0.5, add=T, lty=1, col="brown", lwd=2) contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]), pHat5qsz2, levels =c(1/11, 10/11), add=T, lty=2, col="brown", lwd=2) # Look at misclassification probabilities # # On the training data # # nn.5qsz nn.5qsz$value table(checker.train[,"y"], predict(nn.5qsz, newdata= checker.train, type="raw") > 0.5) # nn.5qsz1 nn.5qsz1$value table(checker.train[,"y"], predict(nn.5qsz1, newdata= checker.train, type="raw") > 0.5) # nn.5qsz2 nn.5qsz2$value table(checker.train[,"y"], predict(nn.5qsz2, newdata= checker.train, type="raw") > 0.5) # # On the test data # # Checker.net nn.5qsz$value table(checker.test[,"y"], predict(nn.5qsz, newdata= checker.test, type="raw") > 0.5) # checker.net1 nn.5qsz1$value table(checker.test[,"y"], predict(nn.5qsz1, newdata= checker.test, type="raw") > 0.5) # checker.net2 nn.5qsz2$value table(checker.test[,"y"], predict(nn.5qsz2, newdata= checker.test, type="raw") > 0.5)