#  This file  contains a artificial neural network analysis of 
#  the Checker data from Will Welch's notes Chapt. 1
#      
#  Authors:
#       R.W. Oldford, 2004
#
#
# just a handy function to capture the pathname for the files we will load.
web441 <- function(x)
         {paste('http://www.undergrad.math.uwaterloo.ca/~stat441/R-code/',
                 x,
                 sep='')
         }

# (in class we just loaded this file since the 
#  machine wasn't connected to the internet).
#
# Get the data:
source(web441('checker.R'))

# plot it
x1.range <- range(checker.train[,"x1"])
x2.range <- range(checker.train[,"x2"])
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)


# 
# Get the neural net library
#
library(nnet)

#
#  First, save the values of the obeserved "y" s in the training
#  data (since I will need these for a calculation later)
#
checker.y <- checker.train[,"y"]

# For the neural net code, the response "y" needs to be 
# turned into a "factor" (of known levels to match the classes)
# 

checker.train[,"y"] <- factor(checker.train[,"y"])

# 
# Build the neural net  (two input variables; one output, y;
# one hidden layer of two nodes; penalty based on
# weight decay; max iterations = 1000)

checker.net <- nnet( y ~ x1 + x2, 
                    data = checker.train, 
                    size = 2, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(checker.net)

# From this you can actually construct the weights for the neural net
#
# The weights are stored on
checker.net$wts

#  as is the value of the fitting criterion
checker.net$value

#  fitted values for the training data
checker.net$fitted.values

# and the residuals
checker.net$residuals

# which are just (here's where we use the original y's)
checker.y - checker.net$fitted.values
# The following shows this
(checker.y - checker.net$fitted.values) - checker.net$residuals


#  
#  Now we see what the net's predictions are on the usual grid:
#
xgrid <- expand.grid(x1=seq(x1.range[1], x1.range[2], length = 21),
                     x2=seq(x2.range[1], x2.range[2], length = 21))

# Compute the fitted probabilities over this grid so that we can plot them.

pHat <- predict(checker.net, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat <- matrix(pHat, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("checker net. Value = ",prettyNum(checker.net$value),
            "  RSS = ", prettyNum(sum(checker.net$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

# 
#  What happens when we repeat this?
#

checker.net1 <- nnet( y ~ x1 + x2, 
                    data = checker.train, 
                    size = 2, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(checker.net1)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat1 <- predict(checker.net1, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat1 <- matrix(pHat1, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("checker net 1. Value = ",prettyNum(checker.net1$value),
            "  RSS = ", prettyNum(sum(checker.net1$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat1, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat1, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)
# 
#  And again?
#

checker.net2 <- nnet( y ~ x1 + x2, 
                    data = checker.train, 
                    size = 2, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(checker.net2)

# Compute the fitted probabilities over this grid and plot them.

pHat2 <- predict(checker.net2, newdata = xgrid, type="raw")
pHat2 <- matrix(pHat2, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("checker net 2. Value = ",prettyNum(checker.net2$value),
            "  RSS = ", prettyNum(sum(checker.net2$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat2, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat2, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)


#
#  Each of the above was a local minimum, determined by a random start.
#  We could fix the start by ensuring that the seed of the random number 
#  generator is identical at each repetition (and hence the same start)
set.seed(12345)


checker.netfixed <- nnet( y ~ x1 + x2, 
                    data = checker.train, 
                    size = 2, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(checker.netfixed)

pHatf <- predict(checker.netfixed, newdata = xgrid, type="raw")

pHatf <- matrix(pHatf, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("checker net fixed. Value = ",prettyNum(checker.netfixed$value),
            "  RSS = ", prettyNum(sum(checker.netfixed$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHatf, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHatf, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
# Look at misclassification probabilities
#
#  On the training data
#
# Checker.net
checker.net$value
table(checker.train[,"y"], 
      predict(checker.net, newdata= checker.train, type="raw") > 0.5)
# checker.net1
checker.net1$value
table(checker.train[,"y"], 
      predict(checker.net1, newdata= checker.train, type="raw") > 0.5)
# checker.net2
checker.net2$value
table(checker.train[,"y"], 
      predict(checker.net2, newdata= checker.train, type="raw") > 0.5)
# checker.netfixed
checker.netfixed$value
table(checker.train[,"y"], 
      predict(checker.netfixed, newdata= checker.train, type="raw") > 0.5)

#
#  On the test data
#
# Checker.net
checker.net$value
table(checker.test[,"y"], 
      predict(checker.net, newdata= checker.test, type="raw") > 0.5)
# checker.net1
checker.net1$value
table(checker.test[,"y"], 
      predict(checker.net1, newdata= checker.test, type="raw") > 0.5)
# checker.net2
checker.net2$value
table(checker.test[,"y"], 
      predict(checker.net2, newdata= checker.test, type="raw") > 0.5)
# checker.netfixed
checker.netfixed$value
table(checker.test[,"y"], 
      predict(checker.netfixed, newdata= checker.test, type="raw") > 0.5)


####################
#
# 
# Build the neural net exactly as before, except that we
# add another input variable, the interaction term x1:x2
#

nn.int <- nnet( y ~ x1 + x2 + x1:x2, 
                    data = checker.train, 
                    size = 2, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.int)

# From this you can actually construct the weights for the neural net

# Compute the fitted probabilities over this grid so that we can plot them.

pHati <- predict(nn.int, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHati <- matrix(pHati, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN int. Value = ",prettyNum(nn.int$value),
            "  RSS = ", prettyNum(sum(nn.int$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHati, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHati, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
#  Repeat this twice
#


nn.int1 <- nnet( y ~ x1 + x2 + x1:x2, 
                    data = checker.train, 
                    size = 2, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.int1)

# From this you can actually construct the weights for the neural net

# Compute the fitted probabilities over this grid so that we can plot them.

pHati1 <- predict(nn.int1, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHati1 <- matrix(pHati1, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN int 1. Value = ",prettyNum(nn.int1$value),
            "  RSS = ", prettyNum(sum(nn.int1$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHati1, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHati1, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
# Again
#
nn.int2 <- nnet( y ~ x1 + x2 + x1:x2, 
                    data = checker.train, 
                    size = 2, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.int2)

# From this you can actually construct the weights for the neural net

# Compute the fitted probabilities over this grid so that we can plot them.

pHati2 <- predict(nn.int2, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHati2 <- matrix(pHati2, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n" )
title(paste("NN int 2. Value = ",prettyNum(nn.int2$value),
            "  RSS = ", prettyNum(sum(nn.int2$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHati2, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHati2, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)
# Look at misclassification probabilities
#
#  On the training data
#
# Checker.net
nn.int$value
table(checker.train[,"y"], 
      predict(nn.int, newdata= checker.train, type="raw") > 0.5)
# checker.net1
nn.int1$value
table(checker.train[,"y"], 
      predict(nn.int1, newdata= checker.train, type="raw") > 0.5)
# checker.net2
nn.int2$value
table(checker.train[,"y"], 
      predict(nn.int2, newdata= checker.train, type="raw") > 0.5)

#
#  On the test data
#
# Checker.net
nn.int$value
table(checker.test[,"y"], 
      predict(nn.int, newdata= checker.test, type="raw") > 0.5)
# checker.net1
nn.int1$value
table(checker.test[,"y"], 
      predict(nn.int1, newdata= checker.test, type="raw") > 0.5)
# checker.net2
nn.int2$value
table(checker.test[,"y"], 
      predict(nn.int2, newdata= checker.test, type="raw") > 0.5)


#####################
#
#
#   Changing the size of the hidden layer
#   (no interaction)
#


nn.3sz <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 3, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.3sz)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat3sz <- predict(nn.3sz, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat3sz <- matrix(pHat3sz, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 3sz. Value = ",prettyNum(nn.3sz$value),
            "  RSS = ", prettyNum(sum(nn.3sz$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat3sz, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat3sz, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
#  Repeat this twice
#


nn.3sz1 <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 3, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.3sz1)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat3sz1 <- predict(nn.3sz1, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat3sz1 <- matrix(pHat3sz1, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 3sz1. Value = ",prettyNum(nn.3sz1$value),
            "  RSS = ", prettyNum(sum(nn.3sz1$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat3sz1, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat3sz1, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
# Again
#

nn.3sz2 <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 3, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.3sz2)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat3sz2 <- predict(nn.3sz2, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat3sz2 <- matrix(pHat3sz2, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 3sz2. Value = ",prettyNum(nn.3sz2$value),
            "  RSS = ", prettyNum(sum(nn.3sz2$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat3sz2, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat3sz2, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)


# Look at misclassification probabilities
#
#  On the training data
#
# nn.3sz
nn.3sz$value
table(checker.train[,"y"], 
      predict(nn.3sz, newdata= checker.train, type="raw") > 0.5)
# nn.3sz1
nn.3sz1$value
table(checker.train[,"y"], 
      predict(nn.3sz1, newdata= checker.train, type="raw") > 0.5)
# nn.3sz2
nn.3sz2$value
table(checker.train[,"y"], 
      predict(nn.3sz2, newdata= checker.train, type="raw") > 0.5)

#
#  On the test data
#
# Checker.net
nn.3sz$value
table(checker.test[,"y"], 
      predict(nn.3sz, newdata= checker.test, type="raw") > 0.5)
# checker.net1
nn.3sz1$value
table(checker.test[,"y"], 
      predict(nn.3sz1, newdata= checker.test, type="raw") > 0.5)
# checker.net2
nn.3sz2$value
table(checker.test[,"y"], 
      predict(nn.3sz2, newdata= checker.test, type="raw") > 0.5)


#####################
#
#
#   Size of the hidden layer = 4
#   (no interaction)
#


nn.4sz <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 4, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.4sz)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat4sz <- predict(nn.4sz, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat4sz <- matrix(pHat4sz, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 4sz. Value = ",prettyNum(nn.4sz$value),
            "  RSS = ", prettyNum(sum(nn.4sz$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat4sz, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat4sz, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
#  Repeat this twice
#


nn.4sz1 <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 4, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.4sz1)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat4sz1 <- predict(nn.4sz1, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat4sz1 <- matrix(pHat4sz1, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 4sz1. Value = ",prettyNum(nn.4sz1$value),
            "  RSS = ", prettyNum(sum(nn.4sz1$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat4sz1, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat4sz1, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
# Again
#

nn.4sz2 <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 4, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.4sz2)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat4sz2 <- predict(nn.4sz2, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat4sz2 <- matrix(pHat4sz2, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 4sz2. Value = ",prettyNum(nn.4sz2$value),
            "  RSS = ", prettyNum(sum(nn.4sz2$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat4sz2, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat4sz2, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)


# Look at misclassification probabilities
#
#  On the training data
#
# nn.4sz
nn.4sz$value
table(checker.train[,"y"], 
      predict(nn.4sz, newdata= checker.train, type="raw") > 0.5)
# nn.4sz1
nn.4sz1$value
table(checker.train[,"y"], 
      predict(nn.4sz1, newdata= checker.train, type="raw") > 0.5)
# nn.4sz2
nn.4sz2$value
table(checker.train[,"y"], 
      predict(nn.4sz2, newdata= checker.train, type="raw") > 0.5)

#
#  On the test data
#
# Checker.net
nn.4sz$value
table(checker.test[,"y"], 
      predict(nn.4sz, newdata= checker.test, type="raw") > 0.5)
# checker.net1
nn.4sz1$value
table(checker.test[,"y"], 
      predict(nn.4sz1, newdata= checker.test, type="raw") > 0.5)
# checker.net2
nn.4sz2$value
table(checker.test[,"y"], 
      predict(nn.4sz2, newdata= checker.test, type="raw") > 0.5)


#####################
#
#
#   Size of the hidden layer = 5
#   (no interaction)
#


nn.5sz <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 5, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.5sz)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat5sz <- predict(nn.5sz, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat5sz <- matrix(pHat5sz, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 5sz. Value = ",prettyNum(nn.5sz$value),
            "  RSS = ", prettyNum(sum(nn.5sz$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5sz, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5sz, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
#  Repeat this twice
#


nn.5sz1 <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 5, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.5sz1)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat5sz1 <- predict(nn.5sz1, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat5sz1 <- matrix(pHat5sz1, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 5sz1. Value = ",prettyNum(nn.5sz1$value),
            "  RSS = ", prettyNum(sum(nn.5sz1$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5sz1, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5sz1, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
# Again
#

nn.5sz2 <- nnet( y ~ x1 + x2 , 
                    data = checker.train, 
                    size = 5, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.5sz2)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat5sz2 <- predict(nn.5sz2, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat5sz2 <- matrix(pHat5sz2, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 5sz2. Value = ",prettyNum(nn.5sz2$value),
            "  RSS = ", prettyNum(sum(nn.5sz2$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5sz2, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5sz2, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)


# Look at misclassification probabilities
#
#  On the training data
#
# nn.5sz
nn.5sz$value
table(checker.train[,"y"], 
      predict(nn.5sz, newdata= checker.train, type="raw") > 0.5)
# nn.5sz1
nn.5sz1$value
table(checker.train[,"y"], 
      predict(nn.5sz1, newdata= checker.train, type="raw") > 0.5)
# nn.5sz2
nn.5sz2$value
table(checker.train[,"y"], 
      predict(nn.5sz2, newdata= checker.train, type="raw") > 0.5)

#
#  On the test data
#
# Checker.net
nn.5sz$value
table(checker.test[,"y"], 
      predict(nn.5sz, newdata= checker.test, type="raw") > 0.5)
# checker.net1
nn.5sz1$value
table(checker.test[,"y"], 
      predict(nn.5sz1, newdata= checker.test, type="raw") > 0.5)
# checker.net2
nn.5sz2$value
table(checker.test[,"y"], 
      predict(nn.5sz2, newdata= checker.test, type="raw") > 0.5)


#####################
#
#
#   Size of the hidden layer = 5
#   (with quadratic and interaction terms)
#


nn.5qsz <- nnet( y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1:x2, 
                    data = checker.train, 
                    size = 5, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.5qsz)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat5qsz <- predict(nn.5qsz, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat5qsz <- matrix(pHat5qsz, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 5qsz. Value = ",prettyNum(nn.5qsz$value),
            "  RSS = ", prettyNum(sum(nn.5qsz$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5qsz, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5qsz, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
#  Repeat this twice
#


nn.5qsz1 <- nnet( y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1:x2, 
                    data = checker.train, 
                    size = 5, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.5qsz1)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat5qsz1 <- predict(nn.5qsz1, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat5qsz1 <- matrix(pHat5qsz1, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 5qsz1. Value = ",prettyNum(nn.5qsz1$value),
            "  RSS = ", prettyNum(sum(nn.5qsz1$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5qsz1, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5qsz1, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)

#
# Again
#

nn.5qsz2 <- nnet( y ~ x1 + x2 + I(x1^2) + I(x2^2) + x1:x2, 
                    data = checker.train, 
                    size = 5, 
                    decay=1.0E-2,
                    maxit = 1000)

summary(nn.5qsz2)

# Compute the fitted probabilities over this grid so that we can plot them.

pHat5qsz2 <- predict(nn.5qsz2, newdata = xgrid, type="raw")

# Put the predicted probabilities in the grid to be used by the contour
#  program

pHat5qsz2 <- matrix(pHat5qsz2, nrow = 21, ncol = 21, byrow = FALSE)

# plot it
quartz()
plot(0,0,xlim=x1.range, ylim = x2.range,
     xlab = "x1", ylab = "x2", type ="n")
title(paste("NN 5qsz2. Value = ",prettyNum(nn.5qsz2$value),
            "  RSS = ", prettyNum(sum(nn.5qsz2$residuals^2))))
points(checker.train[checker.train[,"y"] == 0, c("x1", "x2")], pch="0",
        col="red", cex=1.2)
points(checker.train[checker.train[,"y"] == 1, c("x1", "x2")], pch="+",
        col="blue", cex=1.2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5qsz2, 
        levels = 0.5, 
        add=T, 
        lty=1, col="brown", lwd=2)
contour(x=unique(xgrid[,1]), y=unique(xgrid[,2]),
        pHat5qsz2, 
        levels =c(1/11, 10/11), 
        add=T, 
        lty=2, col="brown", lwd=2)


# Look at misclassification probabilities
#
#  On the training data
#
# nn.5qsz
nn.5qsz$value
table(checker.train[,"y"], 
      predict(nn.5qsz, newdata= checker.train, type="raw") > 0.5)
# nn.5qsz1
nn.5qsz1$value
table(checker.train[,"y"], 
      predict(nn.5qsz1, newdata= checker.train, type="raw") > 0.5)
# nn.5qsz2
nn.5qsz2$value
table(checker.train[,"y"], 
      predict(nn.5qsz2, newdata= checker.train, type="raw") > 0.5)

#
#  On the test data
#
# Checker.net
nn.5qsz$value
table(checker.test[,"y"], 
      predict(nn.5qsz, newdata= checker.test, type="raw") > 0.5)
# checker.net1
nn.5qsz1$value
table(checker.test[,"y"], 
      predict(nn.5qsz1, newdata= checker.test, type="raw") > 0.5)
# checker.net2
nn.5qsz2$value
table(checker.test[,"y"], 
      predict(nn.5qsz2, newdata= checker.test, type="raw") > 0.5)