#################################################################
#
# Example for how to run sga() and pga()
# by Mu Zhu
# Original version = Apr 2005
# This version     = Aug 2006; extra comments added Jan 2007
#
# Basically, the syntax is: pga(y,X,m,N,B,prior)
# where
# y = response vector
# X = predictor matrix
# m = population size (usually set to be ncol(X) or ncol(X)+1)
# N = number of generations per path
# B = number of parallel paths
# prior = density of 1's among initial population
#
#################################################################

# clear everything
rm(list=ls(all=TRUE))

# get functions; this is like calling library(pga) if
# such a library is packaged ...
source("main-func.s")
source("gcv-fitness.s")          # we always use GCV in practice

#################################################################
# BEGIN EXAMPLE 1 
#################################################################

# generate some data; similar to 
# Technometrics 48(4), 491-502, Sec 1.2, 2.1, 4.3 (variation 0)

set.seed(2006) 
sigma <- 1
N <- 120
d <- 100
truth <- c(20,50,80)
beta <- rep(0,d)
beta[truth] <- c(1,2,3) 
X <- matrix(rnorm(N*d), N, d)
y <- X %*% beta + sigma*rnorm(N)

# SGA
singleton <- sga(y,X,m=d,N=200,mutation=1/d, prior=0.25)
which(singleton$best==1)

# check convergence --- will use half the number of generations
# needed to achieve convergence in PGA
check <- numeric(5)
for (i in 1:5) {
 check[i] <- sga(y,X,m=d,N=200,mutation=1/d, prior=0.25, check=T)$ans
}
mean(check)                      # this turns out to be about 16

# PGA
junk <- pga(y,X,m=d,N=16,B=50,mutation=1/d,prior=0.25)
judge<-apply(junk,2,mean)
get.pga.ans(judge, 50)
get.topfew(judge, num=3)

# make plots
plot.movie(d, junk, truth, filename="pga100")
plot.order(judge, num=50, grid=T)
plot.bubble(d, judge, truth, num=50, grid=T)

#################################################################
# END EXAMPLE 1
#################################################################


#################################################################
# BEGIN EXAMPLE 2 #
#################################################################

# repeated simulation; exactly the same as 
# Technometrics 48(4), 491-502, Sec 4.3 (variation 0)

sigma <- 1
N <- 40
d <- 20
truth <- c(5,10,15)
beta <- rep(0,d)
beta[truth] <- c(1,2,3) 

SIM<-100
rslt.hard <-rslt.soft <- matrix(0, SIM, d)

for (i in 1:SIM) {
# generate some data
set.seed(i)
X <- matrix(rnorm(N*d), N, d)
y <- X %*% beta + sigma*rnorm(N)
# PGA
junk <- pga(y,X,m=d,N=8,B=25,mutation=1/d,prior=0.35)
judge<-apply(junk,2,mean)
ans<-get.pga.ans(judge, 25)
rslt.hard[i,ans]<-1
rslt.soft[i,]<-judge
}

rbind(
summary.hard(rslt.hard, truth, "PGAHard"),
summary.soft(rslt.soft, truth, "PGASoft")
)->junk

print(junk)

#################################################################
# END EXAMPLE 2
#################################################################