\documentclass{slides}
\pagestyle{plain}
%\setlength{\textheight}{5.3in}
%\setlength{\textwidth}{5.35in}
\newcommand{\F}{{\cal F}}
\newcommand{\Kprod}{\otimes}
\newcommand{\Diag}{\rm Diag\,}
\newcommand{\Sn}{{\cal S}_n}
\newcommand{\tran}{^T}
\newcommand{\diag}{\rm diag\,}
\newcommand{\tr}{\rm trace\,}
\newcommand{\trace}{\rm trace\,}
\newcommand{\svec}{{\rm svec\,}}
\newcommand{\sMat}{{\rm sMat\,}}
\newcommand{\hMat}{{\rm hMat\,}}
\newcommand{\dsvec}{{\rm dsvec\,}}
\newcommand{\sdiag}{{\rm sdiag\,}}
\newcommand{\vsMat}{{\rm vsMat\,}}
\newcommand{\kvec}{{\rm vec\,}}
\newcommand{\kmat}{{\rm Mat\,}}
\newcommand{\beq}{\begin{equation}}
\newcommand{\eeq}{\end{equation}}
\newcommand{\nc}{\newcommand}
\nc{\Hcal}{{\cal H}}


\def\QED{~\rule[-1pt] {8pt}{8pt}\par\medskip ~~}
\begin{document}
%=======================
%=======================
\begin{slide}{}
\begin{center}
{\bf
Is Lagrangian (SDP) Relaxation Best for Quadratic Optimization?
}
\end{center}


~~\\
~~\\
~~\\
~~\\
~~\\
~~\\
Henry Wolkowicz \\
University of Waterloo\\

~\\
~\\

\end{slide}
\begin{slide}{}
%\large{
\begin{center}
{\bf OUTLINE}
\end{center}
%}
\begin{enumerate}
\item[$\bullet$]
Quadratically constrained quadratic program, QQP.

\item[$\bullet$]
Special case of Max-Cut, MC, problem:\\
equivalent bounds to Lagrangian (SDP) relaxation\\
redundant constraints

\item[$\bullet$]
SDP equivalent to Lagrangian relaxation for QQPs.

\item[$\bullet$]
Theme (or conjecture):
{\em Lagrangian relaxation is best}

\item[$\bullet$]
zero duality gap for nonconvex problems
\end{enumerate}

\end{slide}
\begin{slide}{}


\begin{center}
{\bf QQP}
\end{center}

\[
         q_i(x) : =  x^TQ_ix+2g_i^Tx +\alpha_i, \quad Q_i=Q_i^T
\]
\[
(\mbox{QQP}_x)
   \begin{array}{rcl}
        q^*:= &  \min  & q_0(x)\\
&{\rm subject~to} & q_k(x) \leq 0\\
  && k \in {\cal I} :=\{1, \ldots , m\}\\
&& x \in \Re^n
            \end{array}
\]
Lagrangian of QQP$_x$
\[
          L(x,\lambda):= q_0(x)
 + \sum_{k \in {\cal I} } \lambda_k q_k(x),
\]
$\lambda=(\lambda_k) \geq 0$ are nonnegative Lagrange
multipliers.

\end{slide}
\begin{slide}{}

{\bf Lagrange multipliers - two uses.}

1. Classical: if a CQ holds at optimum
$\bar{x}$, then the KKT necessary conditions for optimality
hold
\begin{eqnarray*}
    \bar{\lambda} \geq 0, \nabla L(\bar{x},\bar{\lambda})=0\\
     \bar{x} \mbox{ feasible }\\
     \bar{\lambda}_k q_k(\bar{x})=0, \forall k \in {\cal I}
\end{eqnarray*}
If the Lagrangian at $\bar{\lambda}$ is also convex
in $x$, then KKT sufficient for optimality.

2. Lagrangian Relaxation (duality):
\[
       q^* \geq d^*:=  \max\limits_{\lambda \geq 0}
          \min_x  q_0(x)  
 + \sum_{k \in {\cal I} } \lambda_k q_k(x).
\]


\end{slide}
\begin{slide}{}

Zero duality gap holds if $q^*=d^*.$ 

Strong duality holds if $q^*=d^*$ and also $d^*$ is attained. 

Moreover, $d^*$ can be efficiently evaluated using SDP.

Questions:\\
$\bullet$Can one do better than the Lagrangian relaxation?\\
$\bullet$When does a zero duality gap hold?\\
$\bullet$What redundant constraints can
be added to close the duality gap?


\end{slide}
\begin{slide}{}

\begin{center}
{\bf Tractable Relaxations of Max-Cut}
\end{center}
\[
(MCQ)~~~  \begin{array}{c}
   \mu^*:= \max\limits_{x \in \F} q_0(x) \quad ( : = x^TQx -2 c^Tx).
\end{array}
\]
where $\F=\left\{\pm 1\right\}^n$\\
--------------------------------

perturbing diagonal of $Q$ on $\F:$
\[
\begin{array}{rcl}
q_u(x)& : = &x^T(Q+\Diag(u))x -2 c^Tx - u^Te\\
      &~=&q_0(x),~~\forall x \in \F.
\end{array}
\]

\end{slide}
\begin{slide}{}
{\bf Bound 0:}
the trivial bound from allowing the diagonal perturbations:
\[
  \mu^* \leq f_0 (u) := \max_x q_u(x).
\]
The function $f_0$ can take on the value $+\infty.$ 
Let 
\[S:=\left\{u:u^Te=0, Q+\Diag(u)\preceq 0\right\}.
\]
Then:
\[
\begin{array}{|c|}
\hline\\
  \mu^* \leq B_0 := \min_u  f_0(u) \\
    \left(= \min\limits_{u^Te=0}  f_0(u),
       \mbox{ if } S \neq \emptyset   \right).\\
\hline
\end{array}
\]
Using the hidden semidefinite constraint:
\[
  \mu^* \leq B_0 = \min_{Q+\Diag(u) \preceq 0}  f_0(u).
\]

\end{slide}
\begin{slide}{}
{\bf Bound 1:}
relax the feasible set to
the sphere of radius $\sqrt{n}$ (tractable trust region subproblem, TRS):
\[
\mu^* \leq f_1(u):= \max_{|| x ||^2 =n} ~ q_u(x)
\]
and
\[
\begin{array}{|c|}
\hline\\
  \mu^* \leq B_1 :=\min_u f_1(u).\\
\hline
\end{array}
\]


The inner maximization problem is called a
trust region subproblem and is tractable.
This bound provides the central tool in the proofs of equivalence.

\end{slide}
\begin{slide}{}
{\bf Bound 2:} box constraint:
\[
\mu^* \leq f_2(u):= \max_{| x_i | \leq 1} ~ q_u(x).
\]
add the semidefinite constraint to make bound
tractable.
\[
  \mu^* \leq \min_{u} f_2(u)
\]
and
\[
\begin{array}{|c|}
\hline\\
  \mu^* \leq B_2 :=\min_{Q+\Diag(u) \preceq 0} f_2(u).\\
~~\\
\hline
\end{array}
\]


\end{slide}
\begin{slide}{}
{\bf Bound $B_1^c$:} lift to eigenvalue bound:

\[
 Q^c := \left[ \begin{array}{cc}
   0 &  - c^T \\
  - c &Q    \end{array}  \right]
\]
\[
 q^c_u(y) :=   y^T( Q^c+\diag(u))y-u^Te
\]
\[
  \mu^* \leq f_1^c(u) := \max_{||y||^2=n+1} q^c_u(y)
\]
where
\[
   \max_{||y||^2=n+1} q^c_u(y)
   = (n+1) \lambda_{\max} (Q^c + \diag (u) ) - u^Te
\]
\[
\begin{array}{|c|}
\hline\\
  \mu^* \leq B_1^c :=  \min_{u} f_1^c(u).\\
~~\\
\hline
\end{array}
\]

Similarly, we get equivalent bounds $B_0^c$
and homogenized bounds for the other models.


\end{slide}
\begin{slide}{}
{\bf Bound $B_3$:} SDP bound:

After homogenization ( $c=0$), use
\[ x^TQx = \tr x^TQx = \tr Qxx^T\]
and, for $x \in \F,$ $y_{ij} = x_ix_j$ defines a
symmetric, rank one, positive semidefinite matrix $Y$ with diagonal elements 1.
Relax the rank one condition.
\[
\begin{array}{|ccc|}
\hline
&&\\
       B_3 :=&\max &\tr QY \\
 &  \mbox{~subject to~} &\diag(Y) = e \\
        && Y \succeq 0.\\
\hline
    \end{array}
\]


\end{slide}
\begin{slide}{}
Summary: ref. PW:
(without restrictions e.g. $u^Te=0$)


\[
\begin{array}{|rcl|}
\hline
&&~\\
 B_0 &=&\min\limits_u \max\limits_x q_u(x)\\
 B_1 &=&\min\limits_u \max\limits_{x^Tx=n} q_u(x)\\
 B_2 &=&\min\limits_u \max\limits_{-1 \leq x_i \leq 1} q_u(x)\\
 B_3 &=&\max \{\tr Q^cY : \diag(Y) = e,~ Y \succeq 0. \}\\
 B_1^c &=&\min\limits_u \max\limits_{y^Ty=n+1} q^c_u(y)\\
&&~\\
\hline
\end{array}
\]


\end{slide}
\begin{slide}{}

Now replace $\pm 1$ constraints with $x_i^2=1, \forall i.$
\[
\begin{array}{ccc}
       (P_E)&\max & q_0(x)=x^TQx-2c^Tx \\
 &  \mbox{~subject to~} &
        x_i^2 = 1,~~i=1, \cdots, n.  \\
    \end{array}
\]
$B_L$ denotes Lagrangian relaxation bound.

Following our theme:
\begin{center}
\fbox{
{\bf Theorem } $B_L$ equals all above bounds.
}
\end{center}

 ref PRW.\\
(The proofs come from exploiting strong Lagrangian duality of TRS)


\end{slide}
\begin{slide}{}

{\bf A Strengthened SDP Bound for MC}


(second lifting - motivated by the strong duality results to follow)

lifting procedure
\[ X=xx^T\]
implies
\[X^2=xx^Txx^T=nX.\]
equivalent quadratic matrix model for CQ

\end{slide}
\begin{slide}{}

\begin{eqnarray*}
\mu^*:=&\max& \tr QX\\
&\mbox{s.t.}&\diag(X)=e\\
&&X^2-nX=0\\
&&X \succeq 0.
\end{eqnarray*}
Note: $X^2=nX,~\trace X = n$ implies $X$ is rank one.
But, we cannot solve this nonconvex problem in general, i.e. we have to
look at the Lagrangian relaxation.


\end{slide}
\begin{slide}{}

notation:\\
~$\bullet$ $S \in \Sn,$\\
~$\bullet$ $s=\svec(S) \in \Re^{t(n)}$ 
vector formed (columnwise) from $S$
ignoring strictly lower triang.\\
~$\bullet$ $S=\sMat(s)$ inverse of $\svec$\\
~$\bullet$ $\hMat (v)$ 
is adjoint of $\svec$,
off-diagonal terms are multiplied by a half\\
~$\bullet$ $\dsvec (S)$ is
adjoint of $\sMat$, operator 
like $\svec$ but off diagonal elements are
multiplied by 2\\
~$\bullet$ $\sdiag(s):= \diag( \sMat(s))$\\
~$\bullet$ $\vsMat(s)=\kvec(\sMat(s))$
\[\vsMat^*(s)=\dsvec \left( \left(\kmat(v)+\kmat(v)^T\right)/2 \right).
\]


\end{slide}
\begin{slide}{}

As above - use equivalent program
\[
 {\rm MC_O} \begin{array}{ccc}
    \mu^*=
    &\max & \frac 12 \tr QX \\
  &\mbox{s.t.<}& \diag(X) = e\\
     &&X^2-nX=0\\
     &&X \succeq 0,
\end{array}
\]
replace linear constraint with $ ||\diag(X) - e||^2=0$ and
homogenize the problem.
use $X=\sMat(x)$
\[
 {\rm MC_O} \begin{array}{cll}
     \mu^*=
    &\max & \frac 12 \tr \left(Q\, \sMat(x)\right)y_0 \\
  &\mbox{s.t.}& \sdiag(x)^T \sdiag(x) \\
          && ~~~- 2 e^T \sdiag(x)y_0 + n=0\\
     &&\sMat(x)^2-n\, \sMat(x)y_0=0\\
     &&\sMat(x)y_0 \succeq 0\\
     &&1-y_0^2=0.
\end{array}
\]


\end{slide}
\begin{slide}{}

Lagrangian dual
\[
 \begin{array}{lcc}
     \mu^*\leq \mu^*_3:=
    \min\limits_{T \succeq 0,w,S}
\max\limits_{y_0^2=1}  \frac 12 \tr \left(Q\sMat(x)\right)y_0 \\
  ~~~~~~~~~~~~~~+w\left( \sdiag(x)^T \sdiag(x)\right. \\
  ~~~~~~~~~~~~~~~~~~~~~ \left. -2 e^T \sdiag(x)y_0 + n\right)\\
     ~~~~~~~~~~~~~~~~ +\tr S\left((\sMat(x))^2-n\, \sMat(x)y_0\right)\\
     ~~~~~~~~~~~~~~~~~~~~-\tr T(\sMat(x)y_0).
\end{array}
\]

Note:  $\sMat(x) \succeq 0$ necessarily;
$\sdiag(x)=e$ necessarily holds. Therefore,
relaxation is strengthened.
Move $y_0$ into Lagrangian
without increasing the duality gap, since a TRS
i.e. add $ t(1-y_0^2)$ into Lagrangian.
Then apply the hidden semidefinite constraint theme. 


\end{slide}
\begin{slide}{}

The constant part (no Lagrange multipliers) of the Hessian is:
\beq \label{eq:hessian1}
 H_c:=\pmatrix{
  0 & \frac 12 \dsvec(Q)^T\cr
 \frac 12 \dsvec(Q)  & 0\cr
}.
\eeq
The nonconstant part of the Hessian is made up of a linear
combination of matrices, i.e. it is a linear operator on the
Lagrange multipliers. 
Note that 
\begin{eqnarray*}
\tr S (\sMat(x))^2 &=& \tr S\, \sMat(x)\, I\, \sMat(x)^T\\
     &=& \left(\kvec( \sMat(x))\right)^T\left( I \Kprod S \right)
                             \left(\kvec( \sMat(x))\right)\\
     &=& x^T \vsMat^* \left( I \Kprod S \right)
                             \vsMat(x).
\end{eqnarray*}
For notational convenience,
we use the {\em negative} of the Hessian and split it up into four
linear operators:
\beq \label{eq:hessian2}
   \begin{array}{ccc}
 \Hcal(w,S,T,t):= \Hcal_1(w) + \Hcal_2(S) + \Hcal_3(T) +
             \Hcal_4(t) \\
  :=w\pmatrix{
  0 & 2\, \svec(I)^T\cr
  2\, \svec(I) & -2\Diag(\svec(I)) }
  \\
  +\pmatrix{
  0 & 2n\, \dsvec(S)^T\cr
   2n\, \dsvec(S)
        & -2\vsMat^*(I \Kprod S) \vsMat }\\
  +\pmatrix{
  0 & 2\, \dsvec(T)^T\cr
   2\, \dsvec(T) 
        & 0 }\\
  +t\pmatrix{
  2 & 0\cr
   0 & 0 }.\\
\end{array}
\eeq

We then get the (equivalent to the Lagrangian dual) semidefinite program 
\beq \label{eq:MCDSDP}
 {\rm MCDSDP} \begin{array}{ccc}
     \mu^*_{MCSDP3} =
    &\min & nw +\tr 0S + \tr 0T +t\\
  &\mbox{s.t.}& \Hcal(w,S,T,t) \succeq H_c\\
     && T \succeq 0, S=S^T\\
\end{array}
\eeq

The dual  of this SDP  is the strengthened SDP relaxation of MC:
\beq \label{eq:MCPSDP}
 {\rm MCPSDP} \begin{array}{ccc}
     \mu^*_{MCSDP3} =
    &\max & \trace H_cY\\
  &\mbox{s.t.}& \Hcal_1^*(Y) = n\\
  &                 & \Hcal_2^*(Y) = 0\\
  &                 & \Hcal_3^*(Y) \preceq 0\\
  &                 & \Hcal_4^*(Y) = 1\\
     && Y \succeq 0.
\end{array}
\eeq


\end{slide}
\begin{slide}{}

{\bf Convex Quadratic Program}
\begin{eqnarray*}
\mu^*:=&\min& q_0(x)\\
&\mbox{s.t.}& q_k(x) \leq 0,\quad k=1, \ldots m,
\end{eqnarray*}
where all $q_i(x)$ are convex quadratic functions.
The dual is
\[ 
\mbox{DCQP}\qquad
\nu^*:= \max\limits_{\lambda \geq 0}\ \min\limits_x\ q_0(x)
           + \sum_{k=1}^m \lambda_k q_k(x).
\]

$\bullet$(KKT) conditions are sufficient for global optimality

$\bullet$if the primal value of CQP is bounded then it is
attained and there is no duality gap, (e.g. Peterson and Ecker
or  Terlaky).

$\bullet$ the dual may not be attained


\end{slide}
\begin{slide}{}
{\bf Nonconvex Cases}

{\bf 1. Rayleigh Quotient}

$A=A\tran \in \Sn$
\[
 \lambda_{\min} = \min \{x\tran Ax : x\tran  x = 1\}.
\]
tractable (nonconvex) problem

no (Lagrangian) duality gap for this nonconvex problem
\[
\begin{array}{rcl}
 \mu^*&:=& \max\limits_{\lambda}\ \min\limits_x\ 
      x\tran Ax -\lambda (x\tran  x - 1)\\
 &=& \max\limits_{A - \lambda I \succeq 0}\ \min\limits_x\ 
               x\tran (A -\lambda I)x  + \lambda\\
 &=& \max\limits_{A - \lambda I \succeq 0}\ \lambda\\
 &=& \lambda_{\min}
\end{array}
\]


\end{slide}
\begin{slide}{}

{\bf 2. Trust Region Subproblem}
\begin{eqnarray*}
 &\mu^*:=\min& q_0(x)\\
&\mbox{s.t.}& x\tran x - \delta^2\leq 0 \mbox{ (or } =0). \\
\end{eqnarray*}
for ``$\le$," the Lagrangian dual is:
\[ 
\mbox{DTRS}\qquad
 \nu^*:=\max\limits_{\lambda \geq 0}\ \min\limits_x\ q_0(x) + \lambda 
(x\tran x - \delta^2).
\]
strong duality holds and equivalent to (ref Stern-Wolk.) the
(concave) nonlinear semidefinite program 
\begin{eqnarray*}
\mbox{DTRS}\qquad
&\nu^*:=\max& g_0\tran  (Q+\lambda I)^{\dagger} g_0 - \lambda \delta^2\\
&\mbox{s.t.}& Q+\lambda I \succeq 0\\
&&\lambda \geq  0.
\end{eqnarray*}

theme: tractable problem (ref More-Sorensen) but strong duality holds.


\end{slide}
\begin{slide}{}
{\bf 3. Two Trust Region Subproblem}
TTRS consists in minimizing a
(possibly nonconvex) quadratic function subject to a norm and a
least squares constraint.

ref in SQP methods by CDT.

TTRS can have a nonzero duality gap, ref PY and Y.

if objective not convex,
then the primal may not be attained, ref LZ.

TRS can have at most one local and nonglobal optimum, ref M.

Still an open problem whether TTRS is an NP-hard or a polynomial time
problem.


\end{slide}
\begin{slide}{}

Note: a quadratic constraint can be written as
\[  y^tPy \leq \delta  \]
after the homogenization. This is lifted to
\[  \trace PY \leq \delta. \]
Second lifting:
\[   YPY \preceq \delta Y. \]
This strictly strengthens the SDP relaxation.


\end{slide}
\begin{slide}{}


{\bf Orthogonally Constrained Programs with Zero Duality Gaps}

\[ \{X: X X\tran =I\} \]
Stiefel manifold ref EAS

$A$ and $B$ $n\times n$ symmetric matrices
\begin{equation}
\begin{array}{rcl}
{\rm QQP_O}\qquad  \mu^O:=&\min& \tr AXBX\tran\\
&{\rm s.t.}& XX\tran=I.
\end{array}
\end{equation}

Tractable problem using
classical Hoffman-Wielandt inequality.


But, Lagrangian dual has a duality gap, ref ZKRW.

\end{slide}
\begin{slide}{}


The constraints $XX\tran=I$ and
$X\tran X=I$ are equivalent. Add redundant constraints.
\begin{eqnarray*}
{\rm QQP_{OO}}\qquad  \mu^O:=&\min& \tr AXBX\tran\\
&{\rm s.t.}& XX\tran=I,\ X\tran X=I.
\end{eqnarray*}
dual problem is
\begin{eqnarray*}
{\rm DQQP_{OO}}\\
\mu^O \geq \mu^D:=&\max& \tr S+\tr T\\
&\mbox{s.t.}&
(I\Kprod S)+(T\Kprod I)\preceq (B\Kprod A)\\
&& S=S\tran,\ T=T\tran.
\end{eqnarray*}

{\bf Theorem}
Strong duality holds for $\rm QQP_{OO}$ and $\rm DQQP_{OO},$ 
i.e.  $\mu^D=\mu^O$ and both primal and dual are attained.
\QED

Theme again holds.

\end{slide}
\begin{slide}{}
Other applications:

Weighted Sums of Eigenvalues;

Graph Partitioning Problem;

TRS like constraints $\{X: XX\tran \preceq I\}$
(extension of Hoffman-Wielandt inequality)


\end{slide}
\begin{slide}{}

Concluding Remarks:

In each case of a tractable bound for a nonconvex problem, the structure
allows for redundant constraints to be added to close the Lagrangian
duality gap.

What is the correct question about ``best''? Can we close or reduce the
duality gap in general? (ref KT)
\end{slide}
\begin{slide}{}
\end{slide}
\begin{slide}{}
\end{slide}


\end{document}