################################################################# # # Example for how to run sga() and pga() # by Mu Zhu # Original version = Apr 2005 # This version = Aug 2006; extra comments added Jan 2007 # # Basically, the syntax is: pga(y,X,m,N,B,prior) # where # y = response vector # X = predictor matrix # m = population size (usually set to be ncol(X) or ncol(X)+1) # N = number of generations per path # B = number of parallel paths # prior = density of 1's among initial population # ################################################################# # clear everything rm(list=ls(all=TRUE)) # get functions; this is like calling library(pga) if # such a library is packaged ... source("main-func.s") source("gcv-fitness.s") # we always use GCV in practice ################################################################# # BEGIN EXAMPLE 1 ################################################################# # generate some data; similar to # Technometrics 48(4), 491-502, Sec 1.2, 2.1, 4.3 (variation 0) set.seed(2006) sigma <- 1 N <- 120 d <- 100 truth <- c(20,50,80) beta <- rep(0,d) beta[truth] <- c(1,2,3) X <- matrix(rnorm(N*d), N, d) y <- X %*% beta + sigma*rnorm(N) # SGA singleton <- sga(y,X,m=d,N=200,mutation=1/d, prior=0.25) which(singleton$best==1) # check convergence --- will use half the number of generations # needed to achieve convergence in PGA check <- numeric(5) for (i in 1:5) { check[i] <- sga(y,X,m=d,N=200,mutation=1/d, prior=0.25, check=T)$ans } mean(check) # this turns out to be about 16 # PGA junk <- pga(y,X,m=d,N=16,B=50,mutation=1/d,prior=0.25) judge<-apply(junk,2,mean) get.pga.ans(judge, 50) get.topfew(judge, num=3) # make plots plot.movie(d, junk, truth, filename="pga100") plot.order(judge, num=50, grid=T) plot.bubble(d, judge, truth, num=50, grid=T) ################################################################# # END EXAMPLE 1 ################################################################# ################################################################# # BEGIN EXAMPLE 2 # ################################################################# # repeated simulation; exactly the same as # Technometrics 48(4), 491-502, Sec 4.3 (variation 0) sigma <- 1 N <- 40 d <- 20 truth <- c(5,10,15) beta <- rep(0,d) beta[truth] <- c(1,2,3) SIM<-100 rslt.hard <-rslt.soft <- matrix(0, SIM, d) for (i in 1:SIM) { # generate some data set.seed(i) X <- matrix(rnorm(N*d), N, d) y <- X %*% beta + sigma*rnorm(N) # PGA junk <- pga(y,X,m=d,N=8,B=25,mutation=1/d,prior=0.35) judge<-apply(junk,2,mean) ans<-get.pga.ans(judge, 25) rslt.hard[i,ans]<-1 rslt.soft[i,]<-judge } rbind( summary.hard(rslt.hard, truth, "PGAHard"), summary.soft(rslt.soft, truth, "PGASoft") )->junk print(junk) ################################################################# # END EXAMPLE 2 #################################################################