# This file contains multinomial and nnet discrimination with c > 2 classes # # Authors: # R.W. Oldford, 2004 # # # quartz() is the Mac OS call for a new window. # Change the call inside from quartz() to whatever the call is # for the OS you are using. e.g. X11() or pdf(), etc. newwindow <- function(x) {quartz()} # library(MASS) library(nnet) data(iris3) # # We need to construct an appropriate "data frame" for # the iris data to be used by the various modelling # functions. # There are several way in which we might # proceed. TWO DIFFERENT approaches will be shown here. # # # First some info common to the two approaches. # # Extract the separate Species of the data from the # original source where they are stored in a three way array. # Each species is a slice in that three way array, # and we would like these as separate matrices to # begin with. setosa <- iris3[,,1] versicolor <- iris3[,,2] virginica <- iris3[,,3] # Put these together into a single array iris <- rbind(setosa,versicolor,virginica) # Construct some Species labels # Species <- c(rep("Setosa",nrow(setosa)), rep("Versicolor",nrow(versicolor)), rep("Virginica",nrow(virginica)) ) # some meaningful row labels rownames(iris) <-c(paste("Setosa", 1:nrow(setosa)), paste("Versicolor", 1:nrow(versicolor)), paste("Virginica", 1:nrow(virginica))) # # And to be perverse, a more meaningful collection # of variate names # irisVars <- c("SepalLength", "SepalWidth", "PetalLength", "PetalWidth") # # which can replace the column names we started with # colnames(iris) <- irisVars # # Now to construct the data frame(s) # # METHOD 1 iris.df1 <- data.frame(iris, Species = Species) # METHOD 2 iris.df2 <- data.frame(cbind(iris, Species = factor(Species))) # plot the data pairs(iris.df1, main = "Anderson's Iris Data -- 3 species", col = c("red", "green3", "blue")[iris.df1[,"Species"]]) # or equivalently, pairs(iris.df2, main = "Anderson's Iris Data -- 3 species", col = c("red", "green3", "blue")[iris.df2[,"Species"]]) # or to avoid the Species appearing, newwindow() pairs(~ PetalWidth + PetalLength + SepalWidth + SepalLength, data = iris.df1, main = "Anderson's Iris Data -- 3 species", col = c("red", "green3", "blue")[iris.df1[,"Species"]]) # or how about, newwindow() pairs(cbind(iris.df1[,1:4], sqrt(iris.df1[,1]*iris.df1[,2]), sqrt(iris.df1[,3]*iris.df1[,4]) ) , labels = c(colnames(iris.df1[,1:4]), "Sqrt(SepalArea)", "Sqrt(PetalArea)"), main = "Anderson's Iris Data -- 3 species", col = c("red", "green3", "blue")[iris.df1[,"Species"]]) # # Select a training sample # (Say half the data) # Note that unlike other authors, I choose to randomly # sample from the whole data set rather than stratify by the # Species. The thinking here is to produce a training and # a test set according to how the data might arrive rather than # to have two sets which most resemble the whole dataset. # set.seed(2568) n <- nrow(iris.df1) train <- sort(sample(1:n, floor(n/2))) # Training data will be: iris.train1 <- iris.df1[train,] # or equivalently iris.train2 <- iris.df2[train,] # negate the indices to get the test data: iris.test1 <- iris.df1[-train,] # or equivalently iris.test2 <- iris.df2[-train,] # Training data look like: newwindow() pairs(~ PetalWidth + PetalLength + SepalWidth + SepalLength, data = iris.train1, main = "Iris Training Data -- 3 species", col = c("red", "green3", "blue")[iris.train1[,"Species"]]) # Test data look like: newwindow() pairs(~ PetalWidth + PetalLength + SepalWidth + SepalLength, data = iris.test1, main = "Iris Test Data -- 3 species", col = c("red", "green3", "blue")[iris.test1[,"Species"]]) # # Fit a multinomial model # iris.mn <- multinom(formula = Species ~ PetalWidth + PetalLength + SepalWidth + SepalLength, data = iris.train1, maxit = 1000 ) summary(iris.mn) phat <- predict(iris.mn, iris.train1[,1:4], type="probs") pred.tr <- apply(phat,1, function(x){ c("Setosa pred", "Versicolor pred", "Virginica pred")[x==max(x)] } ) table(c("Setosa", "Versicolor","Virginica")[iris.train1[,"Species"]], pred.tr) # # A picture # pred.class <- apply(phat,1, function(x){ c("Setosa", "Versicolor", "Virginica")[x==max(x)] } ) point.symbol <- apply(matrix( pred.class== iris.train1[,"Species"]), 1, function(x){if (x) 1 else 3} ) # # The above would have to be modified for iris.df2 because there # Species has numeric valules. Instead, something like: # point.symbol <- apply(matrix( pred.class == c("Setosa", "Versicolor", "Virginica")[iris.train2[,"Species"]]), 1, function(x){if (x) 1 else 3} ) # would be necessary. pred.col <- apply(phat,1, function(x){ c("red", "green3", "blue")[x==max(x)] } ) newwindow() pairs(~ PetalWidth + PetalLength + SepalWidth + SepalLength, data = iris.train1, main = "Iris Training Data -- predicted colours (mistakes are +)", col = pred.col , cex = 2, pch = point.symbol) # # Try on the test data # phat.te <- predict(iris.mn, iris.test1[,1:4], type="probs") pred.te <- apply(phat.te,1, function(x){ c("Setosa pred", "Versicolor pred", "Virginica pred")[x==max(x)] } ) # # Alternatively we could have used "class" for here instead of the apply # function. # predict(iris.mn, iris.test1[,1:4], type="class") table(c("Setosa", "Versicolor","Virginica")[iris.test1[,"Species"]], pred.te) # # A picture # pred.class <- apply(phat.te,1, function(x){ c("Setosa", "Versicolor", "Virginica")[x==max(x)] } ) # # See previous comment about iris.df2 to adjust the following # point.symbol <- apply(matrix( pred.class== iris.test1[,"Species"]), 1, function(x){if (x) 1 else 3} ) pred.col <- apply(phat.te,1, function(x){ c("red", "green3", "blue")[x==max(x)] } ) newwindow() pairs(~ PetalWidth + PetalLength + SepalWidth + SepalLength, data = iris.test1, main = "Iris Test Data -- predicted colours (mistakes are +)", col = pred.col , cex = 2, pch = point.symbol) # # Except where otherwise noted, # the above should work identically with iris.df2, etc. # i.e. with the second method of constructing the data frame. # # # # A new multinomial model # Include the two "area terms" # iris.mn1 <- multinom(formula = Species ~ PetalWidth * PetalLength + SepalWidth * SepalLength, data = iris.df1, maxit = 1000 ) summary(iris.mn1) phat1 <- predict(iris.mn1, iris.train1[,1:4], type="probs") pred1.tr <- apply(phat1,1, function(x){ c("Setosa pred", "Versicolor pred", "Virginica pred")[x==max(x)] } ) table(c("Setosa", "Versicolor","Virginica")[iris.train1[,"Species"]], pred1.tr) # # Try on the test data # phat1.te <- predict(iris.mn1, iris.test1[,1:4], type="probs") pred1.te <- apply(phat1.te,1, function(x){ c("Setosa pred", "Versicolor pred", "Virginica pred")[x==max(x)] } ) table(c("Setosa", "Versicolor","Virginica")[iris.test1[,"Species"]], pred1.te) # # A picture # pred.class <- apply(phat1.te,1, function(x){ c("Setosa", "Versicolor", "Virginica")[x==max(x)] } ) # # Note the difference for df2 again (as before) for the following: # point.symbol <- apply(matrix( pred.class== iris.test1[,"Species"]), 1, function(x){if (x) 1 else 3} ) pred.col <- apply(phat1.te,1, function(x){ c("red", "green3", "blue")[x==max(x)] } ) newwindow() pairs(~ PetalWidth + PetalLength + SepalWidth + SepalLength, data = iris.test1, main = "Iris Test Data -- predicted colours (mistakes are +)", col = pred.col , cex = 2, pch = point.symbol) ############################################################### # # A neural net model (Done in two ways ... see help("nnet") # # First way with iris.df1 iris.nnet1 <- nnet(Species ~ . , data = iris.df1, subset = train, size=4, skip = TRUE, decay=1.0E-2, maxit = 1000) # # Note the multiple outoput nodes and the direct connection # of the input to the output (skip layer). # Note also that the subset argument is used together # with the whole of the dataset ... see help("nnet") # summary(iris.nnet1) # # Look at the predicted values for the test data # table(iris.df1$Species[-train], predict(iris.nnet1, iris.df1[-train,], type = "class") ) # # IMPORTANT NOTE: The above does not seeem to work for iris.df2 # # Try it with iris.df2 iris.nnet2 <- nnet(Species ~ . , data = iris.df2, subset = train, size=4, skip = TRUE, decay=1.0E-2, maxit = 1000) # The above works but seems to converge at a much higher value # of the criterion. # # Indeed, summary(iris.nnet2) # shows that there is only one out put node. # And the following # # predict(iris.nnet2, iris.df2[-train,], type = "class") # # fails (hence commented out here). As does # # predict(iris.nnet2, iris.df2[-train,], type = "probs") # # The only predictions available are the following: predict(iris.nnet2, iris.df2[-train,], type = "raw") # which do not appear to be of much use (single output, all near 1) # # Instead, we have to build the model as follows with iris.df2 # # # First we need to produce multi-valued categorical data for # Species. # "targets" will be a matrix of three columns full of 1s and 0s # to indicate whether the targets <- class.ind(Species) # # The nnet is fit using the data from the data frame # just as if it were a plain matrix rather than the # richer data frame structure. # # IMPORTANT NOTE. We do not use the subset argument here. # Rather, we subset the data directly *before* # nnet handles it. iris.nnet2 <- nnet(iris.df2[train,1:4], targets[train,], size=4, skip = TRUE, decay=1.0E-2, maxit = 1000) # RELATED TO THE ABOVE NOTE: # The following fits the whole of the data, the # subset argument is ignored. Try it. # set.seed(12345) iris.nnet2a <- nnet(iris.df2[,1:4], targets, subset = train, # <-- ignored size=4, skip = TRUE, decay=1.0E-2, maxit = 1000) set.seed(12345) iris.nnet2b <- nnet(iris.df2[,1:4], targets, size=4, skip = TRUE, decay=1.0E-2, maxit = 1000) # # OK, back to the model for df2 # summary(iris.nnet2) # # Which has a network of the correct toplogy. # # To get predictions on its test data takes # a little more work; the following, for example, # fails: # # predict(iris.nnet2, iris.df2[-train,], type = "class") # # as does # # predict(iris.nnet2, iris.df2[-train,], type = "probs") # # We must instead use predict(iris.nnet2, iris.df2[-train,], type = "raw") # which contains the probability estimates of the three classes. # # Or, equivalently, # predict(iris.nnet2, iris.df2[-train,]) # # We can find out which column has the largest value # for each row: # pred <- max.col(predict(iris.nnet2, iris.df2[-train,], type = "raw")) # # Similarly for the true values we have # true <- max.col(targets[-train,]) # # And so, build our classification table as follows # table(c("Setosa", "Versicolor", "Virginica")[true], c("Pred Setosa", "Pred Versicolor", "Pred Virginica")[pred] )