###############################################################################################
##
##   R Simulation Codes for "Bayesian Empirical Likelihood Inference with Complex Survey Data"
##
##   A. Implement Bayesian Model Selection Procedure
##   B. Quantile Regression Modelling with Survey Data
##   C. Stratified PPS Sampling
##
##   CREATED:        Tuesday, January 28, 2018
##   MODIFIED:       Saturday, June 07, 2019
##   AUTHORS:       Puying Zhao and Changbao Wu
##   AFFILIATION:   Department of Statistics and Actuarial Science, University of Waterloo
##   EMAIL:              cbwu at uwaterloo dot ca
##
###############################################################################################

set.seed(7654321,kind=NULL)
library(MASS)
library(quantreg)
library(mvtnorm)

#########################################################################################
#                     R Function for Randomized Systematic PPS Sampling
#########################################################################################
syspps = function(x, n) {
  ##
  ##Population is first randomized!
  ##
  N = length(x)
  U = sample(N, N)
  xx = x[U]
  z = rep(0, N)
  for (i in 1:N)
    z[i] = n * sum(xx[1:i]) / sum(x)
  r = runif(1)
  s = numeric()
  for (i in 1:N) {
    if (z[i] >= r) {
      s = c(s, U[i])
      r = r + 1
    }
  }
  return(s[order(s)])
}

#########################################################################################
#                               Metropolis-Hastings MCMC
#########################################################################################
# Runs a Metropolis-Hasting MCMC chain for a given likelihood function.
# Proposal steps are sampled from a Gaussian distribution, either in a single
# step or sequentially over the parameter space.
#########################################################################################

run_metropolis_MCMC <- function(startvalue, prop_sigma, iterations, burn_in, XY, ds, tau) 
{
  chain = array(dim = c(iterations + 1, length(startvalue)))
  chain[1, ] = startvalue
  for (i in 1:iterations) {
    proposal = mvrnorm(1,mu = chain[i, ], Sigma = prop_sigma)
    
    probab = exp(posterior_el(proposal, XY, ds, tau) - posterior_el(chain[i, ], XY, ds, tau))
    if (runif(1) < probab) {
      chain[i + 1, ] = proposal
    } else{
      chain[i + 1, ] = chain[i, ]
    }
  }
  chain <- chain[burn_in:iterations,]
  
  if (length(startvalue) > 1)
    accept_rate <-
    length(unique(chain[, 1])) / (iterations - burn_in)
  else
    accept_rate <- length(unique(chain)) / (iterations - burn_in)
  
  # message(sprintf("Acceptance ratio = %.4f", accept_rate))
  
  val <- list("trace" = chain, 'accetance_ratio' = accept_rate)
  return(val)
}


##############################################################################
# Estimate Marginal Likelihood
#
# Reference:
#   Marginal likelihood from the Metropolis--Hastings output.
#   Chib, S. and Jeliazkov, I.,
#   Journal of the American Statistical Association, 2001
##############################################################################

marginal.likelihood <- function(trace, lik.fun, prior.fun, num.samples = 1000,
                                log = TRUE, mom, XY, ds, tau) {
  n = length(ds)
  # get mean parameters
  theta.star <- colMeans(trace)
  V = cov(trace)
  lik.star <- lik.fun(theta.star, mom, XY, ds, tau)
  # get samples from posterior
  g <- sample(1:nrow(trace), num.samples, replace = TRUE)
  theta.g <- trace[g, ]
  q.g <- dmvnorm(theta.g, mean = theta.star, sigma = V, log = FALSE)
  lik.g <- apply(theta.g, 1, lik.fun, mom, XY, ds, tau)
  alpha.g <- sapply(lik.g, function(l) min(1, exp(lik.star - l)))
  # get samples from proposal
  theta.j <- rmvnorm(num.samples, mean = theta.star, sigma = V)
  lik.j <- apply(theta.j, 1, lik.fun, mom, XY, ds, tau)
  alpha.j <- sapply(lik.j, function(l) min(1, exp(l - lik.star)))
  pi.hat <- mean(alpha.g * q.g)/mean(alpha.j)
  pi.star <- 0
  if (!is.null(prior.fun)) pi.star <- prior.fun(theta.star)
  ln.m <- lik.star + pi.star - log(pi.hat)
  if (!log) ln.m <- exp(ln.m)
  list(ln.m = ln.m, ln.lik.star = lik.star, ln.pi.star = pi.star, ln.pi.hat = log(pi.hat))
}

#########################################################################################
# The Following Procedures Calculate Empirical Likelihood Ratio for Estimating Equations
#########################################################################################
#
#  The main function el.ee() calculates the empirical likelihood
#  ratio for estimating equations.  
#

#  INPUTS:
#     mx: The survey weighted estimating functions of the form m1(pars, x, ds, tau) and
#         which returns a n times r matrix with typical element 
#         m1_j(pars,x_i, ds_i, tau) for j=1,...r and i=1,...,n.
#     lam         Optional starting value for Lagrange multiplier
#     maxit       Optional maximum number of iterations
#     gradtol     Optional tolerance for convergence test
#     svdtol      Optional tolerance for detecting singularity
#                   while solving equations
#     itertrace   Optional flag to print results during iteration
#

#  OUTPUTS:
#     logelr      Log empirical likelihood
#     lambda      Lagrange multiplier
#     grad        Gradient of log likelihood
#     hess        Hessian of log likelihood
#     wts         Relative observation weights at the solution
#     nits        Number of iterations used

logelr.ee <- function (mx, lam)
{
  mx <- as.matrix(mx)
  n <- nrow(mx)
  p <- ncol(mx)
  if (n <= p)
    stop("Need more observations than variables in logelr.")
  z <- mx
  arg <- 1 + z %*% lam
  return(-sum(llog(arg, 1/n)))
}

llog <- function (z, eps)
{
  ans <- z
  lo <- (z < eps)
  ans[lo] <- log(eps) - 1.5 + 2 * z[lo]/eps - 0.5 * (z[lo]/eps)^2
  ans[!lo] <- log(z[!lo])
  ans
}

llogp <- function (z, eps)
{
  ans <- z
  lo <- (z < eps)
  ans[lo] <- 2/eps - z[lo]/eps^2
  ans[!lo] <- 1/z[!lo]
  ans
}

llogpp <- function (z, eps)
{
  ans <- z
  lo <- (z < eps)
  ans[lo] <- -1/eps^2
  ans[!lo] <- -1/z[!lo]^2
  ans
}


## EL from Own, for estimating equation
## mx: m1(pars, x, ds, tau)
el.ee <- function (mx, lam, maxit=25, gradtol=1e-7, svdtol=1e-9,
                   itertrace=F, TINY=sqrt(.Machine$double.xmin))
{
  mx <- as.matrix(mx)
  n <- nrow(mx)
  p <- ncol(mx)
  
  if (n <= p)
    stop("Need more observations than length(mu) in el.test().")
  z <- mx
  scale <- mean(abs(z)) + TINY
  z <- z/scale
  if (!missing(lam)) {
    lam <- as.vector(lam)
    lam <- lam * scale
    if (logelr.ee(z, lam) > 0)
      lam <- rep(0, p)
  }
  if (missing(lam))
    lam <- rep(0, p)
  if (svdtol < TINY)
    svdtol <- TINY
  if (gradtol < TINY)
    gradtol <- TINY
  nwts <- c(3^-c(0:3), rep(0, 12))
  gwts <- 2^(-c(0:(length(nwts) - 1)))
  gwts <- (gwts^2 - nwts^2)^0.5
  gwts[12:16] <- gwts[12:16] * 10^-c(1:5)
  nits <- 0
  gsize <- gradtol + 1
  while (nits < maxit && gsize > gradtol) {
    arg <- 1 + z %*% lam
    wts1 <- as.vector(llogp(arg, 1/(n)))
    wts2 <- as.vector(-llogpp(arg, 1/(n)))^0.5
    grad <- as.matrix(-z * wts1)
    grad <- as.vector(rowsum(grad, rep(1, nrow(grad))))
    gsize <- mean(abs(grad))
    hess <- z * wts2
    svdh <- svd(hess)
    if (min(svdh$d) < max(svdh$d) * svdtol)
      svdh$d <- svdh$d + max(svdh$d) * svdtol
    nstep <- svdh$v %*% (t(svdh$u)/svdh$d)
    nstep <- as.vector(nstep %*% matrix(wts1/wts2, n, 1))
    gstep <- -grad
    if (sum(nstep^2) < sum(gstep^2))
      gstep <- gstep * (sum(nstep^2)^0.5/sum(gstep^2)^0.5)
    ologelr <- -sum(llog(arg, 1/(n)))
    ninner <- 0
    for (i in 1:length(nwts)) {
      nlogelr <- logelr.ee(z, lam + nwts[i] * nstep + gwts[i] * gstep)
      if (nlogelr < ologelr) {
        lam <- lam + nwts[i] * nstep + gwts[i] * gstep
        ninner <- i
        break
      }
    }
    nits <- nits + 1
    if (ninner == 0)
      nits <- maxit
    if (itertrace)
      print(c(lam, nlogelr, gsize, ninner))
  }
  list(`-2LLR` = -2 * nlogelr, Pval = 1 - pchisq(-2 * nlogelr, df = p),
       lambda = lam/scale, grad = grad * scale, hess = t(hess) %*%
         hess * scale^2, wts = wts1, nits = nits)
}

#########################################################################################
# R Function for Survey Weighted Estimating Functions Using Quantile Regression Modelling
#########################################################################################
m1 = function(pars, XY, ds, tau) {
  X = XY[, -1]
  Y = XY[, 1]
  pars = as.numeric(pars)
  mc = c(ds * ((Y < X %*% pars) - tau)) * X
  return(mc)
}

#########################################################################################
##                    R Function for Empirical Likelihood Function
#########################################################################################

el_fun <- function(pars, mom, XY, ds, tau)
{
  e <- mom(pars, XY, ds, tau)
  log_likelihood = el.ee(e)$'-2LLR'
  return(-0.5*log_likelihood)
}

#########################################################################################
##                      R Function for the Prior Distribution
#########################################################################################

prior_reg <- function(pars)
{
  d_par = length(pars)
  prior_d = rep(0, d_par)
  for (i in 1:d_par) {
    prior_d[i] = dnorm(pars[i], 0, 100, log = TRUE)
  }
  return(sum(prior_d))
}

#########################################################################################
##        R Function for Bayesian Empirical Likelihood Posterior Distribution
#########################################################################################

posterior_el <- function(pars, XY, ds, tau)
{
  e <- m1(pars, XY, ds, tau)
  log_likelihood = el.ee(e)$'-2LLR'
  prior <- prior_reg(pars)
  return(-0.5*log_likelihood + prior)
}

#########################################################################################
#       Compute Bayesian Model Selection Results Based on 500 Simulation Runs:
#########################################################################################

#---------------------------------------------------
##        Finite Population Generation
#---------------------------------------------------

nsim = 500
N1 = 4000
N2 = 6000
N3 = 10000
N = N1 + N2 + N3
n1 = 15
n2 = 25
n3 = 60
n = n1 + n2 + n3
tau = 0.25

beta = c(0.5, 1, 0, 0)
p = length(beta)
rho = 0.7
K = 7
mf = matrix(0, nsim, K)

X1 = runif(N)
X2 = rnorm(N)
X3 = 0.5 + X1 + X2 + rnorm(N)
X = cbind(X1, X2, X3)
X = cbind(rep(1,N), X)

XX = rchisq(N, 1) / 2 

err_n = rnorm(N)
err_ca = rcauchy(N)
err_t = rt(N, 3)
err_chi = rchisq(N, 2)

Y = X %*% beta + (err_n - qnorm(tau))
# Y = X %*% beta + (1 + X[,2])*(err_n - qnorm(tau))
XY = cbind(Y, X)
fit1 = rq(Y ~ X - 1, tau = tau)
beta_N = fit1$coefficients

#---------------------------------------------
# Size variable for PPS sampling
#---------------------------------------------
Z = X[, 2] + Y
Z = Z - min(Z) + 0.2                           #Z>=0.4 and max(Z)/min(Z)=40
Z = Z / sum(Z)                                 #sum(Z)=1; cor(Z,Y)=0.557

Z1 = Z[1:N1] / sum(Z[1:N1])
Z2 = Z[(N1 + 1):(N1 + N2)] / sum(Z[(N1 + 1):(N1 + N2)])
Z3 = Z[(N1 + N2 + 1):N] / sum(Z[(N1 + N2 + 1):N])

#---------------------------------------------------
##    Repeated Simulations Start from Here:
#---------------------------------------------------
for (m in 1:nsim) {
  sam1 = syspps(Z1, n1)
  sam2 = syspps(Z2, n2)
  sam3 = syspps(Z3, n3)
  XYs1 = XY[sam1,]
  XYs2 = XY[N1 + sam2,]
  XYs3 = XY[N1 + N2 + sam3,]
  pi11 = n1 * Z1[sam1]
  pi12 = n2 * Z2[sam2]
  pi13 = n3 * Z3[sam3]
  XYs = rbind(XYs1, XYs2, XYs3)
  pis = c(pi11, pi12, pi13)
  ds = 1 / pis
  
  XXs1 = XX[sam1]
  XXs2 = XX[N1 + sam2]
  XXs3 = XX[N1 + N2 + sam3]
  XXs = c(XXs1, XXs2, XXs3)
  
  #---------------------------------------------------
  mcmc_gmm1 = run_metropolis_MCMC(startvalue = beta_N[1:2], 
                                  prop_sigma = 0.25*diag(abs(beta_N[1:2])), 
                                  iterations = 5000, 
                                  burn_in = 2500, XY = XYs[,1:3], ds, tau)
  
  ml_1 = marginal.likelihood(trace = mcmc_gmm1$trace, lik.fun = el_fun, 
                             prior.fun = prior_reg, num.samples = 1000, log = TRUE,
                             mom = m1, XY = XYs[,1:3], ds = ds, tau = tau)
  
  #---------------------------------------------------
  mcmc_gmm2 = run_metropolis_MCMC(startvalue = beta_N[1:3],
                                  prop_sigma = 0.25*diag(abs(beta_N[1:3])),
                                  iterations = 5000,
                                  burn_in = 2500, XY = XYs[,1:4], ds, tau)
  
  ml_2 = marginal.likelihood(trace = mcmc_gmm2$trace, lik.fun = el_fun,
                             prior.fun = prior_reg, num.samples = 1000, log = TRUE,
                             mom = m1, XY = XYs[,1:4], ds = ds, tau = tau)
  
  #---------------------------------------------------
  mcmc_gmm3 = run_metropolis_MCMC(startvalue = c(beta_N[1:2],beta_N[4]),
                                  prop_sigma = 0.25*diag(abs(c(beta_N[1:2],beta_N[4]))),
                                  iterations = 5000, burn_in = 2500, 
                                  XY = cbind(XYs[, 1:3], XYs[, 5]), ds, tau)
  
  ml_3 = marginal.likelihood(trace = mcmc_gmm3$trace, lik.fun = el_fun,
                             prior.fun = prior_reg, num.samples = 1000, log = TRUE,
                             mom = m1, XY = cbind(XYs[, 1:3], XYs[, 5]), ds = ds, tau = tau)
  
  #---------------------------------------------------
  mcmc_gmm4 = run_metropolis_MCMC(startvalue = beta_N,
                                  prop_sigma = 0.25*diag(abs(beta_N)),
                                  iterations = 5000,
                                  burn_in = 2500, XY = XYs, ds, tau)
  
  ml_4 = marginal.likelihood(trace = mcmc_gmm4$trace, lik.fun = el_fun,
                             prior.fun = prior_reg, num.samples = 1000, log = TRUE,
                             mom = m1, XY = XYs, ds = ds, tau = tau)
  
  #---------------------------------------------------
  
  mcmc_gmm5 = run_metropolis_MCMC(startvalue = c(beta_N[1],beta_N[3]), 
                                  prop_sigma = 0.25*diag(abs(c(beta_N[1],beta_N[3]))), 
                                  iterations = 5000, burn_in = 2500, 
                                  XY = cbind(XYs[, 1:2], XYs[, 4]), ds, tau) 
  
  ml_5 = marginal.likelihood(trace = mcmc_gmm5$trace, lik.fun = el_fun, 
                             prior.fun = prior_reg, num.samples = 1000, log = TRUE, 
                             mom = m1, XY = cbind(XYs[, 1:2], XYs[, 4]), ds = ds, tau = tau)
  
  #---------------------------------------------------
  
  mcmc_gmm6 = run_metropolis_MCMC(startvalue = c(beta_N[1],beta_N[4]), 
                                  prop_sigma = 0.25*diag(abs(c(beta_N[1],beta_N[4]))), 
                                  iterations = 5000, burn_in = 2500, 
                                  XY = cbind(XYs[, 1:2], XYs[, 5]), ds, tau) 
  
  ml_6 = marginal.likelihood(trace = mcmc_gmm6$trace, lik.fun = el_fun, 
                             prior.fun = prior_reg, num.samples = 1000, log = TRUE, 
                             mom = m1, XY = cbind(XYs[, 1:2], XYs[, 5]), ds = ds, tau = tau)
  
  #---------------------------------------------------
  mcmc_gmm7 = run_metropolis_MCMC(startvalue = beta_N[1:2], 
                                  prop_sigma = 0.25*diag(abs(beta_N[1:2])), 
                                  iterations = 5000, burn_in = 2500, 
                                  XY = cbind(XYs[, 1:2], XXs), ds, tau)
  
  ml_7 = marginal.likelihood(trace = mcmc_gmm7$trace, lik.fun = el_fun, 
                             prior.fun = prior_reg, num.samples = 1000, 
                             log = TRUE, mom = m1, XY = cbind(XYs[, 1:2], XXs), 
                             ds = ds, tau = tau)
  
  #---------------------------------------------------
  
  lik_mat = c(ml_1$ln.m, ml_2$ln.m, ml_3$ln.m, ml_4$ln.m, ml_5$ln.m, ml_6$ln.m, ml_7$ln.m)
  lik_max = max(lik_mat)
  
  for (k in 1:K) {
    if (lik_mat[k] == lik_max) {
      mf[m, k] = 1
    } else {
      mf[m, k] = 0
    }
  }
  
  cat("ML =", lik_mat, "\n")
  cat("MS =", colSums(mf) / m, "\n")
  
  print(m)
  
}