#################################################################
#
# Main Functions for PGA and SGA
# by Mu Zhu
# Apr 2005
#
#################################################################

pga <- function(y, X, m, N, B=100, mutation=1/ncol(X), 
  start=NULL, prior = 0.3, ...)
{
# performs parallel genetic algorithm for variable selection
# by Mu Zhu
# first version = Jan 2002
# packaged again Apr 2005
#
# Inputs:
# m        = population size, default set to roughly ncol(X)
# N        = number of generations per path
# B        = number of parallel paths
# mutation = mutation rate, default set to roughly 1/m
# start    = starting population, pretty much useless
# prior    = controls the density of 1's in the initial population

  b <- 0; p <- ncol(X)
  rslt <- matrix(0, B, p)
  cat("Starting PGA...\n")
  while (b < B)
  {
    print(b)
    b <- b + 1    
    tmp <- sga(y, X, m, N, start, mutation, prior)
    rslt[b,] <- tmp$combo.gene
  }
  return(rslt)
}
	
sga <- function(y, X, m, N, start=NULL, mutation=1/ncol(X), 
  prior=0.3, check.convg=F, thresh=0.05)
{
# performs single genetic algorithm for variable selection
# by Mu Zhu
# first version = Jan 2002
# packaged again Apr 2005

  X <- as.matrix(X)			
  n <- nrow(X); p <- ncol(X)
  perf <- convg <- numeric(N+1)

  if (check.convg)
  {
     entro<-numeric(N)
  }

  if (length(mutation) != N)
  {
  # possible to use a different mutation rate for each generation
    mutation <- rep(mutation[1], N)
  }

  if (is.null(start))
  {
    curr <- matrix(sample(0:1, size=(m*p), prob=c((1-prior), prior), replace=T),m,p)
  }
  else
  {
    curr <- start
  }

  for (i in 1:N)
  {
    rslt <- onegen(curr, y, X, m, mutation[i])
    curr <- rslt$new.gen

    # addition Jan 2005 by Mu Zhu
    if (check.convg)
    {
      pp <- apply(curr,2,mean)
      qq <- 1-pp  
      lp <- ifelse(pp==0, 0, log(pp,base=2))
      lq <- ifelse(qq==0, 0, log(qq,base=2))
      entro[i]<-entropy <- mean(-(pp*lp+qq*lq))
      print(entropy)
      if (i > 5 && mean(entro[(i-5):i]) < thresh) break
    }
    # addition ends

    convg[i] <- rslt$prev.best
    perf[i] <- rslt$prev.avg
  }
  final.score <- fitness(curr, y, X, m)
  convg[N+1] <- min(final.score)
  perf[N+1] <- mean(final.score)

  if (check.convg)      # modified Jan 2005 by Mu Zhu
  {
    cat("Convergence achieved with ", i, "generations...\n")
    return(list(ans=i, popn=curr, combo.gene=apply(curr,2,mean)))
  }
  else{                 # original return statement
   return(list(popn=curr, combo.gene=apply(curr,2,mean),
    best=curr[order(final.score),][1,],
    optval=min(final.score),
    convg, perf))
  }
}
	
onegen <- function(curr, y, X, m, mutation)
{
# one iteration of GA
# by Mu Zhu
# Jan 2002
  n <- nrow(X); p <- ncol(X)

  # evaluate the fitness of members of the current generation
  orig.score <- fitness(curr, y, X, m)

  # survival
  survived <- (1:m)[order(orig.score)][(1:(m/2))]
  new <- curr[survived,]
	
  # reproduction and mutation
  offspring <- get.offspring(new, p, m=m/2, mutation)
  combo <- rbind(new, offspring)
  return(list(prev.best=min(orig.score),
         prev.avg=mean(orig.score), 
         new.gen=combo))
}

get.offspring <- function(X, p, m, mutation)
{
# reproduction and mutation operations for GA
# by Mu Zhu
# Jan 2002
  rslt <- matrix(0, m, p)	
  for (i in 1:m)
  {
    # crossover/reproduction
    father <- sample(1:m, size=1)
    mother <- sample(1:m, size=1)
    breakpt <- sample(1:p, size=1)
    if (breakpt == 1)
    {
      tmp <- as.vector(X[mother,])
    }
    else if(breakpt == p)
    {
      tmp <- as.vector(X[father,])
    }
    else
    {
      tmp <- as.vector(
             c(X[father,][1:breakpt],
	         X[mother,][(breakpt+1):p]))
    }		
    # mutation
    u <- runif(1,0,1)
    if (u < mutation)
    {
        pos <- sample(1:p, size=1)
        tmp[pos] <- 1 - tmp[pos]
    }  
    rslt[i,] <- tmp
  }
  return(rslt)	
}	

plot.bubble <- function(p, j, truth=NULL, num, grid=F, filename=NULL, ...)
{
# make a bubble plot
# by Mu Zhu
# first version = Jan 2002
# packaged again Apr 2005
#
# Inputs:
# p        = dimension; how many variables in total
# j        = current generation summary to be plotted
# truth    = for simulation problems only; true variables are plotted with a different symbol
# filename = if NULL, plot to standard R graphics window
# num      = how many parallel paths are used
# grid     = whether horizontal grid is turned on or not in the plot

 if (!is.null(filename)) {
  postscript(file=paste(filename, "-combo", ".ps", sep=""), 
   width=8, height=8, horiz=F)
 }
 plot(1:p, j, type="n", 
  xlab="Variables", 
  ylab="Importance", 
  main=paste("Bubble Plot: B =", num, "Parallel Paths", sep=" "), 
  cex.lab=1.5,
  cex.main=1.5,
    ...)
 if (grid) abline(h=(1:9)/10, col="red", lty=2)
 if (!is.null(truth))
 {
  points((1:p)[-truth], j[-truth], pch=4)
  points((1:p)[truth], j[truth], pch=1, cex=1.5) 
 }
 else
 {
   points((1:p), j, pch=4)
 }
 if (!is.null(filename)) {
  dev.off()
 }
}

plot.movie <- function(p, info, truth=NULL, filename, ...)
{
# make a dynamic bubble plot
# by Mu Zhu
# first version = Jan 2002
# packaged again Apr 2005
#
# Inputs:
# p        = dimension; how many variables in total
# info     = information to be plotted
# truth    = for simulation problems only; true variables are plotted with a different symbol
# filename = can't be NULL for dynamic plots
 
  B <- nrow(info)
  postscript(file=paste(filename, "-movie", ".ps", sep=""), 
    width=8, height=8, horiz=F)
  j.all<-info[1,]  

  plot(1:p, j.all, type="n", 
   xlab="Variables", 
   ylab="Importance", 
   main=paste("Using B = 1 Parallel Paths"), 
   cex.lab=1.5,
   cex.main=1.5, 
   ylim=c(0,1),
   ...)

  if (!is.null(truth))
  {
   points((1:p)[-truth], j.all[-truth], pch=4)
   points((1:p)[truth], j.all[truth], pch=1, cex=1.5) 
  }
  else
  {
   points((1:p), j.all, pch=1)
  }

  for (i in 2:B)
  {
   j.all<-apply(info[1:i,], 2, mean)  

   plot(1:p, j.all, type="n", 
   xlab="Variables", 
   ylab="Importance", 
   main=paste("Using B =", i, "Parallel Paths", sep=" "), 
   cex.lab=1.5,
   cex.main=1.5,
   ylim=c(0,1),
    ...)

   if (!is.null(truth))
   {
    points((1:p)[-truth], j.all[-truth], pch=4)
    points((1:p)[truth], j.all[truth], pch=1, cex=1.5) 
   }
   else
   {
    points((1:p), j.all, pch=1)
   }

  }
  dev.off()
}


plot.order <- function(j.all, num, grid = F,  filename=NULL, ...)
{
# make an order plot
# by Mu Zhu
# first version = Jan 2002
# packaged again Apr 2005
#
# Inputs:
# j.all    = information to be plotted
# filename = if NULL, plots to standard R graphics device
# num      = number of parallel paths used
# grid     = whether horizontal grid lines should be turned on or not
  if (!is.null(filename)) {
   postscript(file=paste(filename, "-val.ps", sep=""), 
    height=8, width=8, horiz=F)
  }
  plot(j.all[order(-j.all)], 
    xlab="Ordered Variables", 
    ylab="Importance",
    main=paste("Order Plot: B=", num, "Parallel Paths", sep=" "), 
    cex.lab=1.5, cex.main=1.5, ...)
  if (grid) abline(h=(1:9)/10, col="red", lty=2)
  if (!is.null(filename)) {
    dev.off()
  }
}

get.gap <- function(obj, subset)
{
  tmp <- apply(obj,2,mean)
  return(min(tmp[subset])-max(tmp[-subset]))
}

get.pga.ans <- function(judge, B)
{
# make automatic variable selection decisions based on the largest
# gap in the bubble plot
# by Mu Zhu
# Jan 2005
 x <- sort(judge, decreasing=T); n<-length(x)
 gap <-   x[1:(n-1)] - x[2:n]; m<-max(gap)
 number <-  which(gap==m)
 if (m < 0.8225/sqrt(B)) cat("Gap not significant!\n")
 ans<-which(judge>=judge[order(judge,decreasing=T)][number])
 return(ans)
}

get.topfew <- function(judge, num=5)
{
# pick the top few numbers of variables in the bubble plot
# by Mu Zhu
# Jan 2005
 thresh <- judge[order(judge,decreasing=T)][num]
 ans <- which(judge >= thresh)
 return(ans)
}

summary.hard <- function(rslt, truth, name)
{
# summarizes simulation results; hard performanace metric
# by Mu Zhu
# Jan 2005
 q <- length(truth)
 extra <- apply(rslt[,-truth],1,sum)
 miss <- q - apply(rslt[,truth],1,sum)
 correct <- (extra==0) & (miss==0)
 x<-data.frame(
 perc.correct=mean(correct))
 dimnames(x)[[1]]<-name
 return(x)
}

summary.soft <- function(sv, truth, name)
{
# summarizes simulation results; soft performanace metric
# by Mu Zhu
# Feb 2005
 q <- ncol(sv); k<-length(truth)
 ind<-rep(0,q); ind[truth]<-1
 correct <- 0
 for (i in 1:nrow(sv))
 {
  ind.sorted <- ind[order(sv[i,], decreasing=T)]
  if (sum(which(ind.sorted==1)) == sum(1:k))
   correct <- correct + 1
 }
 x<-data.frame(
 perc.correct=correct/nrow(sv))
 dimnames(x)[[1]]<-name
 return(x)
}