###############################################################################################
##
##   R Simulation Codes for "Bayesian Empirical Likelihood Inference with Complex Survey Data"
##
##   A. Obtain Bayesian Point Estimator Under Four Different Priors
##   B. Quantile Regression Modelling with Survey Data
##   C. PPS Sampling Without Replacement with Large f = n / N
##
##   CREATED:        Tuesday, January 03, 2018
##   MODIFIED:       Saturday, June 01, 2019
##   AUTHORS:       Puying Zhao and Changbao Wu
##   AFFILIATION:   Department of Statistics and Actuarial Science, University of Waterloo
##   EMAIL:              cbwu at uwaterloo dot ca
##
###############################################################################################

set.seed(7654321,kind=NULL)
library(MASS)
library(quantreg)
library(mvtnorm)

#########################################################################################
#                     R Function for Randomized Systematic PPS Sampling
#########################################################################################
syspps = function(x, n) {
  ##
  ##Population is first randomized!
  ##
  N = length(x)
  U = sample(N, N)
  xx = x[U]
  z = rep(0, N)
  for (i in 1:N)
    z[i] = n * sum(xx[1:i]) / sum(x)
  r = runif(1)
  s = numeric()
  for (i in 1:N) {
    if (z[i] >= r) {
      s = c(s, U[i])
      r = r + 1
    }
  }
  return(s[order(s)])
}

#########################################################################################
#                               Metropolis-Hastings MCMC
#########################################################################################
# Runs a Metropolis-Hasting MCMC chain for a given likelihood function.
# Proposal steps are sampled from a Gaussian distribution, either in a single
# step or sequentially over the parameter space.
#
# Input:
#   theta      : starting value of the chain
#   lik.fun    : likelihood function
#   prior.fun  : prior probability function
#   ...        : parameters passed to lik.fun and prior.fun
#   V          : variance-covariance matrix for the proposal
#   maxit      : length of the chain
#   sequential : TRUE or FALSE whether proposals are chosen for all parameters
#                in a single step or sequentially.
#
# Output:
#   list()
#     chain    : matrix with the posterior samples
#     lik      : likelihood values of those posterior samples
##############################################################################

metrop <- function(pars, lik.fun, prior.fun, V = diag(pars), par_names = NULL, 
                   maxit, thin, burn_in, sequential = FALSE, pb = FALSE,
                   pars.info, x, ds, tau) {
  n = length(ds)
  
  if (is.null(par_names))
    par_names<-letters[1:length(pars)]
  if (!sequential) {
    if (!require(mvtnorm))
      stop("You need to install the mvtnorm library.")
  }
  if (pb) {
    .pb <- txtProgressBar(0, maxit, style = 3)
  }
  ndim <- length(pars)
  chain <- matrix(NA, nrow = maxit, ncol = ndim)
  lik <- vector(length = maxit)
  last <- pars
  chain[1, ] <- pars
  last.lik <- lik.fun(pars, x, ds, tau)
  lik[1] <- last.lik
  last.prior <- prior.fun(pars, pars.info, n)
  it <- 1
  naccept <- 0
  if (is.finite(last.lik)) {
    for (it in seq(2, maxit)) {
      if (pb)
        setTxtProgressBar(.pb, it)
      if (sequential) {
        for (j in 1:thin) {
          for (i in 1:length(pars)) {
            if (V[i, i] > 0) {
              accept <- FALSE
              proposal <- last
              proposal[i] <- rnorm(1, mean = last[i], sd = sqrt(V[i, i]))
              proposal.prior <- prior.fun(proposal, pars.info, n)
              if (is.finite(proposal.prior)) {
                proposal.lik <- lik.fun(proposal, x, ds, tau)
                alpha <- exp(proposal.lik + proposal.prior - last.lik - last.prior)
                if (alpha > runif(1))
                  accept <- TRUE
              }
              if (accept) {
                last <- proposal
                last.lik <- proposal.lik
                last.prior <- proposal.prior
                naccept <- naccept + 1 / sum(diag(V) > 0)
              }
            }
          }
        }
        chain[it, ] <- last
        lik[it] <- last.lik
      } else {
        for (j in 1:thin) {
          accept <- FALSE
          if (length(pars) > 1)
            proposal <- rmvnorm(1, mean = last, sigma = V)
          if (length(pars) == 1)
            proposal <- rnorm(1, mean = last, sd = V)
          proposal.prior <- prior.fun(proposal, pars.info, n)
          if (is.finite(proposal.prior)) {
            proposal.lik <- lik.fun(proposal, x, ds, tau)
            alpha <-
              exp(proposal.lik + proposal.prior - last.lik - last.prior)
            if (alpha > runif(1))
              accept <- TRUE
          }
          if (accept) {
            last <- proposal
            last.lik <- proposal.lik
            last.prior <- proposal.prior
            naccept <- naccept + 1
          }
        }
        chain[it, ] <- last
        lik[it] <- last.lik
        # if (!pb) {
        #   message(sprintf("Acceptance ratio = %.4f", naccept / (it * thin)))
        # }
      }
    }
  }
  if (pb)
    close(.pb)
  accetance_ratio = naccept / (maxit * thin)
  chain <- chain[burn_in:maxit,]
  
  val<-list("trace" = chain, "par_names" = par_names, 'accetance_ratio' = accetance_ratio)
  return(val)
}
 
#########################################################################################
# The Following Procedures Calculate Empirical Likelihood Ratio for Estimating Equations
#########################################################################################
#
#  The main function el.ee() calculates the empirical likelihood
#  ratio for estimating equations.  
#

#  INPUTS:
#     mx: The survey weighted estimating functions of the form m1(pars, x, ds, tau) and
#         which returns a n times r matrix with typical element 
#         m1_j(pars,x_i, ds_i, tau) for j=1,...r and i=1,...,n.
#     lam         Optional starting value for Lagrange multiplier
#     maxit       Optional maximum number of iterations
#     gradtol     Optional tolerance for convergence test
#     svdtol      Optional tolerance for detecting singularity
#                   while solving equations
#     itertrace   Optional flag to print results during iteration
#

#  OUTPUTS:
#     logelr      Log empirical likelihood
#     lambda      Lagrange multiplier
#     grad        Gradient of log likelihood
#     hess        Hessian of log likelihood
#     wts         Relative observation weights at the solution
#     nits        Number of iterations used

logelr.ee <- function (mx, lam)
{
  mx <- as.matrix(mx)
  n <- nrow(mx)
  p <- ncol(mx)
  if (n <= p)
    stop("Need more observations than variables in logelr.")
  z <- mx
  arg <- 1 + z %*% lam
  return(-sum(llog(arg, 1/n)))
}

llog <- function (z, eps)
{
  ans <- z
  lo <- (z < eps)
  ans[lo] <- log(eps) - 1.5 + 2 * z[lo]/eps - 0.5 * (z[lo]/eps)^2
  ans[!lo] <- log(z[!lo])
  ans
}

llogp <- function (z, eps)
{
  ans <- z
  lo <- (z < eps)
  ans[lo] <- 2/eps - z[lo]/eps^2
  ans[!lo] <- 1/z[!lo]
  ans
}

llogpp <- function (z, eps)
{
  ans <- z
  lo <- (z < eps)
  ans[lo] <- -1/eps^2
  ans[!lo] <- -1/z[!lo]^2
  ans
}


## EL from Own, for estimating equation
## mx: m1(pars, x, ds, tau)
el.ee <- function (mx, lam, maxit=25, gradtol=1e-7, svdtol=1e-9,
                   itertrace=F, TINY=sqrt(.Machine$double.xmin))
{
  mx <- as.matrix(mx)
  n <- nrow(mx)
  p <- ncol(mx)
  
  if (n <= p)
    stop("Need more observations than length(mu) in el.test().")
  z <- mx
  scale <- mean(abs(z)) + TINY
  z <- z/scale
  if (!missing(lam)) {
    lam <- as.vector(lam)
    lam <- lam * scale
    if (logelr.ee(z, lam) > 0)
      lam <- rep(0, p)
  }
  if (missing(lam))
    lam <- rep(0, p)
  if (svdtol < TINY)
    svdtol <- TINY
  if (gradtol < TINY)
    gradtol <- TINY
  nwts <- c(3^-c(0:3), rep(0, 12))
  gwts <- 2^(-c(0:(length(nwts) - 1)))
  gwts <- (gwts^2 - nwts^2)^0.5
  gwts[12:16] <- gwts[12:16] * 10^-c(1:5)
  nits <- 0
  gsize <- gradtol + 1
  while (nits < maxit && gsize > gradtol) {
    arg <- 1 + z %*% lam
    wts1 <- as.vector(llogp(arg, 1/(n)))
    wts2 <- as.vector(-llogpp(arg, 1/(n)))^0.5
    grad <- as.matrix(-z * wts1)
    grad <- as.vector(rowsum(grad, rep(1, nrow(grad))))
    gsize <- mean(abs(grad))
    hess <- z * wts2
    svdh <- svd(hess)
    if (min(svdh$d) < max(svdh$d) * svdtol)
      svdh$d <- svdh$d + max(svdh$d) * svdtol
    nstep <- svdh$v %*% (t(svdh$u)/svdh$d)
    nstep <- as.vector(nstep %*% matrix(wts1/wts2, n, 1))
    gstep <- -grad
    if (sum(nstep^2) < sum(gstep^2))
      gstep <- gstep * (sum(nstep^2)^0.5/sum(gstep^2)^0.5)
    ologelr <- -sum(llog(arg, 1/(n)))
    ninner <- 0
    for (i in 1:length(nwts)) {
      nlogelr <- logelr.ee(z, lam + nwts[i] * nstep + gwts[i] * gstep)
      if (nlogelr < ologelr) {
        lam <- lam + nwts[i] * nstep + gwts[i] * gstep
        ninner <- i
        break
      }
    }
    nits <- nits + 1
    if (ninner == 0)
      nits <- maxit
    if (itertrace)
      print(c(lam, nlogelr, gsize, ninner))
  }
  list(`-2LLR` = -2 * nlogelr, Pval = 1 - pchisq(-2 * nlogelr, df = p),
       lambda = lam/scale, grad = grad * scale, hess = t(hess) %*%
         hess * scale^2, wts = wts1, nits = nits)
}

#########################################################################################
# The Following Procedure Calculate Maximum Sample Empirical Likelihood Estimator (SEL)
#########################################################################################
elhood.optim1 = function(m, x, ds, tau, init.theta) {
  n = nrow(x)
  q = ncol(x)
  p = length(init.theta)
  mx = m(init.theta, x, ds, tau)
  r = ncol(mx)
  
  # init value for lambda
  init.lambda = rep(0, r)
  
  elhood = function(theta) {
    mx = m(theta, x, ds, tau)
    lambda = el.ee(mx, init.lambda)$lambda
    z = mx
    arg = 1 + z %*% lambda
    nlogelr = sum(llog(arg, 1 / n))
    
    vf = nlogelr
    
    return(vf)
  }
  counter = 1
  repeat {
    #browser()
    #zz=nlm(elhood, init.theta, print.level=0,iterlim=30,
    #check.analyticals=F)
    #dff=zz$estimate-init.theta
    #init.theta=zz$estimate
    
    zz = optim(par = init.theta, elhood, method = "Nelder-Mead")
    dff = zz$par - init.theta
    init.theta = zz$par
    
    mx = m(init.theta, x, ds, tau)
    lambda = el.ee(mx, init.lambda)$lambda
    init.lambda = lambda
    
    if (max(abs(dff)) < 1e-4)
      break
    
    counter = counter + 1
    if (counter >= 5) {
      warning("iteration fails to converge")
      break
    }
  }
  #minithetam value
  est = init.theta
  z = m(est, x, ds, tau)
  arg = 1 + z %*% lambda
  val = 2 * sum(llog(arg, 1 / n))
  
  list(estimate = est, val = val)
}

#########################################################################################
##                      R Function for the Type I Prior Distribution
#########################################################################################

prior_reg_1 <- function(pars, pars.info, n)
{
  p = length(pars)
  prior_pars = matrix(0, p, 1)
  for (m in 1:p) {
    a = pars[m]
    b = pars.info[m]
    prior_pars[m] = dnorm(a, 0, 100, log = TRUE)
  }
  psum = sum(prior_pars)
  return(psum)
}

#########################################################################################
##                    R Function for the Type II Prior Distribution
#########################################################################################

prior_reg_2 <- function(pars, pars.info, n)
{
  p = length(pars)
  prior_pars = matrix(0, p, 1)
  for (m in 1:p) {
    a = pars[m]
    b = pars.info[m]
    prior_pars[m] = dnorm(a, b, sqrt(1/n), log = TRUE)
  }
  psum = sum(prior_pars)
  return(psum)
}

#########################################################################################
##                      R Function for the Type III Prior Distribution
#########################################################################################

prior_reg_3 <- function(pars, pars.info, n)
{
  p = length(pars)
  prior_pars = matrix(0, p, 1)
  for (m in 1:p) {
    a = pars[m]
    b = pars.info[m]
    prior_pars[m] = dunif(a, b - (1/n)^(1/4), b + (1/n)^(1/4), log = TRUE)
  }
  psum = sum(prior_pars)
  return(psum)
}

#########################################################################################
##                    R Function for the Type IV Prior Distribution
#########################################################################################

prior_reg_4 <- function(pars, pars.info, n)
{
  p = length(pars)
  prior_pars = matrix(0, p, 1)
  for (m in 1:p) {
    a = pars[m]
    b = pars.info[m]
    prior_pars[m] = dnorm(a, b + 2, sqrt(1/n), log = TRUE)
  }
  psum = sum(prior_pars)
  return(psum)
}

Bstat <-
  function(mcmc_object,interval=c(0.025,0.975))
  {
    num_params<-length(mcmc_object$par_names)
    if(num_params > 1)
    {
      bci<-array(dim=c(num_params,length(interval)))
      post_mean<-numeric(num_params)
      for(i in 1:num_params)
      {
        bci[i,]<-quantile(mcmc_object$trace[,i],probs=interval)
        post_mean[i]<-mean(mcmc_object$trace[,i])
      }
      rownames(bci)<-mcmc_object$par_names
      colnames(bci)<-as.character(interval)
    }
    if(num_params == 1)
    {  
      bci<-quantile(mcmc_object$trace,probs=interval)
      post_mean<-mean(mcmc_object$trace)
    }
    #return(cbind(bci,post_mean))
    list(bci = bci, post_mean = post_mean)
  }

#########################################################################################
# R Function for Survey Weighted Estimating Functions Using Quantile Regression Modelling
#########################################################################################
m1 = function(pars, XY, ds, tau) {
  X = XY[, -1]
  Y = XY[, 1]
  pars = as.numeric(pars)
  #browser()
  c(ds * ((Y < X %*% pars) - tau)) * X
}

#########################################################################################
##                    R Function for Empirical Likelihood Function
#########################################################################################

lik_el <- function(pars, XY, ds, tau)
{
  e <- m1(pars, XY, ds, tau)
  log_likelihood = el.ee(e)$'-2LLR'
  return(-0.5*log_likelihood)
}

#########################################################################################
#         Compute Bayesian Point Estimator Using 1000 Simulation Runs:
#########################################################################################

#---------------------------------------------------
##       Finite Population Generation
#---------------------------------------------------
N = 1000
tau = 0.25
p = 2
beta = c(0.5, 1)

X = rnorm(N)
X = cbind(rep(1,N), X)
err_n = rnorm(N)
err_ca = rcauchy(N)
err_t = rt(N, 3)
err_chi = rchisq(N, 2)

Y = X %*% beta + (err_n - qnorm(tau))
#Y = X %*% beta + (1 + X[,2])*(err_n - qnorm(tau))
XY = cbind(Y, X)
fit1 = rq(Y ~ X - 1, tau = tau)
beta_N = fit1$coefficients

#---------------------------------------------
# Size variable for PPS sampling
#---------------------------------------------
Z = X[, 2] + 0.1 * Y
Z = Z - min(Z) + 0.4                           #Z>=0.4 and max(Z)/min(Z)=40
Z = Z / sum(Z)                                 #sum(Z)=1; cor(Z,Y)=0.557

#---------------------------------------------------
nsim = 1000
n = 100

BIAS1 = c(0,0)
COV_PR1 = c(0,0)
LEN1 = c(0,0)

beta.bayes.est.matrix1 = matrix(0, nsim, p)
beta.bayes.est.matrix2 = matrix(0, nsim, p)
beta.bayes.est.matrix3 = matrix(0, nsim, p)
beta.bayes.est.matrix4 = matrix(0, nsim, p)
beta.sel.est.matrix = matrix(0, nsim, p)


#---------------------------------------------------
##   Repeated Simulations Start From Here:
#---------------------------------------------------
for (m in 1:nsim) {
  sam = syspps(Z, n)                        #Take a PPS sample
  pis = n * Z[sam]                              #Inclusion probabilities
  ds = 1 / pis                                      #design weights
  
  XYs = XY[sam, ]
  
  Xs = XYs[, -1]
  Ys = XYs[, 1]
  qrfit = rq(Ys ~ Xs-1, tau = tau, weights = ds)
  beta.wee.est = qrfit$coefficients
  el.opt = elhood.optim1(m1, XYs, ds, tau, beta.wee.est)
  beta.el.est = el.opt$estimate # Obtain the maximum sample empirical likelihood estimator (SEL)
  
  #---------------------------------------------------
  ##    Design-Based Bayesian Empirical Likelihood 
  ##    Estimation Under Different Priors
  #---------------------------------------------------
  
  mcmc_r1 = metrop(pars = c(0.45, 0.95),lik.fun = lik_el, prior.fun = prior_reg_1,
                   V = 0.015*diag(c(0.5, 1.0)),  par_names = c('a', 'b'),
                   maxit = 5000, thin = 1, burn_in = 2500,
                   sequential = FALSE, pb = FALSE, pars.info = beta_N,
                   x = XYs, ds = ds, tau = tau)
  
  mcmc_r2 = metrop(pars = c(0.45, 0.95),lik.fun = lik_el, prior.fun = prior_reg_2,
                   V = 0.015*diag(c(0.5, 1.0)),  par_names = c('a', 'b'),
                   maxit = 5000, thin = 1, burn_in = 2500,
                   sequential = FALSE, pb = FALSE, pars.info = beta_N,
                   x = XYs, ds = ds, tau = tau)
  
  mcmc_r3 = metrop(pars = c(0.45, 0.95),lik.fun = lik_el, prior.fun = prior_reg_3,
                   V = 0.015*diag(c(0.5, 1.0)),  par_names = c('a', 'b'),
                   maxit = 5000, thin = 1, burn_in = 2500,
                   sequential = FALSE, pb = FALSE, pars.info = beta_N,
                   x = XYs, ds = ds, tau = tau)
  
  mcmc_r4 = metrop(pars = c(0.45, 0.95),lik.fun = lik_el, prior.fun = prior_reg_4,
                   V = 0.015*diag(c(0.5, 1.0)),  par_names = c('a', 'b'),
                   maxit = 5000, thin = 1, burn_in = 2500,
                   sequential = FALSE, pb = FALSE, pars.info = beta_N,
                   x = XYs, ds = ds, tau = tau)
  
  BST1 = Bstat(mcmc_r1)
  BST2 = Bstat(mcmc_r2)
  BST3 = Bstat(mcmc_r3)
  BST4 = Bstat(mcmc_r4)
  
  beta.bayes.est.matrix1[m,] = BST1$post_mean
  beta.bayes.est.matrix2[m,] = BST2$post_mean
  beta.bayes.est.matrix3[m,] = BST3$post_mean
  beta.bayes.est.matrix4[m,] = BST4$post_mean
  
  beta.sel.est.matrix[m,] = beta.el.est
  
  print(m)
  
}

#########################################################################################
##                         Simulation Results:
#########################################################################################
BIAS1 = colMeans(beta.bayes.est.matrix1) - beta_N
BIAS1 = as.numeric(BIAS1)
SD1 = apply(beta.bayes.est.matrix1,2,sd)
SD1 = as.numeric(SD1)
RMSE1 = sqrt(colMeans((beta.bayes.est.matrix1 - t(array(beta_N,c(2,nsim))))^2))
RMSE1 = as.numeric(RMSE1)

BIAS2 = colMeans(beta.bayes.est.matrix2) - beta_N
BIAS2 = as.numeric(BIAS2)
SD2 = apply(beta.bayes.est.matrix2,2,sd)
SD2 = as.numeric(SD2)
RMSE2 = sqrt(colMeans((beta.bayes.est.matrix2 - t(array(beta_N,c(2,nsim))))^2))
RMSE2 = as.numeric(RMSE2)

BIAS3 = colMeans(beta.bayes.est.matrix3) - beta_N
BIAS3 = as.numeric(BIAS3)
SD3 = apply(beta.bayes.est.matrix3,2,sd)
SD3 = as.numeric(SD3)
RMSE3 = sqrt(colMeans((beta.bayes.est.matrix3 - t(array(beta_N,c(2,nsim))))^2))
RMSE3 = as.numeric(RMSE3)

BIAS4 = colMeans(beta.bayes.est.matrix4) - beta_N
BIAS4 = as.numeric(BIAS4)
SD4 = apply(beta.bayes.est.matrix4,2,sd)
SD4 = as.numeric(SD4)
RMSE4 = sqrt(colMeans((beta.bayes.est.matrix4 - t(array(beta_N,c(2,nsim))))^2))
RMSE4 = as.numeric(RMSE4)

BIAS5 = colMeans(beta.sel.est.matrix) - beta_N
BIAS5 = as.numeric(BIAS5)
SD5 = apply(beta.sel.est.matrix,2,sd)
SD5 = as.numeric(SD5)
RMSE5 = sqrt(colMeans((beta.sel.est.matrix - t(array(beta_N,c(2,nsim))))^2))
RMSE5 = as.numeric(RMSE5)

BIAS1 = floor(BIAS1 * 1000) / 1000
RMSE1 = floor(RMSE1 * 1000) / 1000
SD1 = floor(SD1 * 1000) / 1000

BIAS2 = floor(BIAS2 * 1000) / 1000
RMSE2 = floor(RMSE2 * 1000) / 1000
SD2 = floor(SD2 * 1000) / 1000

BIAS3 = floor(BIAS3 * 1000) / 1000
RMSE3 = floor(RMSE3 * 1000) / 1000
SD3 = floor(SD3 * 1000) / 1000

BIAS4 = floor(BIAS4 * 1000) / 1000
RMSE4 = floor(RMSE4 * 1000) / 1000
SD4 = floor(SD4 * 1000) / 1000

BIAS5 = floor(BIAS5 * 1000) / 1000
RMSE5 = floor(RMSE5 * 1000) / 1000
SD5 = floor(SD5 * 1000) / 1000

BIAS1
RMSE1
SD1

BIAS2
RMSE2
SD2

BIAS3
RMSE3
SD3

BIAS4
RMSE4
SD4

BIAS5
RMSE5
SD5