\documentclass{slides} \pagestyle{plain} %\setlength{\textheight}{5.3in} %\setlength{\textwidth}{5.35in} \newcommand{\F}{{\cal F}} \newcommand{\Kprod}{\otimes} \newcommand{\Diag}{\rm Diag\,} \newcommand{\Sn}{{\cal S}_n} \newcommand{\tran}{^T} \newcommand{\diag}{\rm diag\,} \newcommand{\tr}{\rm trace\,} \newcommand{\trace}{\rm trace\,} \newcommand{\svec}{{\rm svec\,}} \newcommand{\sMat}{{\rm sMat\,}} \newcommand{\hMat}{{\rm hMat\,}} \newcommand{\dsvec}{{\rm dsvec\,}} \newcommand{\sdiag}{{\rm sdiag\,}} \newcommand{\vsMat}{{\rm vsMat\,}} \newcommand{\kvec}{{\rm vec\,}} \newcommand{\kmat}{{\rm Mat\,}} \newcommand{\beq}{\begin{equation}} \newcommand{\eeq}{\end{equation}} \newcommand{\nc}{\newcommand} \nc{\Hcal}{{\cal H}} \def\QED{~\rule[-1pt] {8pt}{8pt}\par\medskip ~~} \begin{document} %======================= %======================= \begin{slide}{} \begin{center} {\bf Is Lagrangian (SDP) Relaxation Best for Quadratic Optimization? } \end{center} ~~\\ ~~\\ ~~\\ ~~\\ ~~\\ ~~\\ Henry Wolkowicz \\ University of Waterloo\\ ~\\ ~\\ \end{slide} \begin{slide}{} %\large{ \begin{center} {\bf OUTLINE} \end{center} %} \begin{enumerate} \item[$\bullet$] Quadratically constrained quadratic program, QQP. \item[$\bullet$] Special case of Max-Cut, MC, problem:\\ equivalent bounds to Lagrangian (SDP) relaxation\\ redundant constraints \item[$\bullet$] SDP equivalent to Lagrangian relaxation for QQPs. \item[$\bullet$] Theme (or conjecture): {\em Lagrangian relaxation is best} \item[$\bullet$] zero duality gap for nonconvex problems \end{enumerate} \end{slide} \begin{slide}{} \begin{center} {\bf QQP} \end{center} \[ q_i(x) : = x^TQ_ix+2g_i^Tx +\alpha_i, \quad Q_i=Q_i^T \] \[ (\mbox{QQP}_x) \begin{array}{rcl} q^*:= & \min & q_0(x)\\ &{\rm subject~to} & q_k(x) \leq 0\\ && k \in {\cal I} :=\{1, \ldots , m\}\\ && x \in \Re^n \end{array} \] Lagrangian of QQP$_x$ \[ L(x,\lambda):= q_0(x) + \sum_{k \in {\cal I} } \lambda_k q_k(x), \] $\lambda=(\lambda_k) \geq 0$ are nonnegative Lagrange multipliers. \end{slide} \begin{slide}{} {\bf Lagrange multipliers - two uses.} 1. Classical: if a CQ holds at optimum $\bar{x}$, then the KKT necessary conditions for optimality hold \begin{eqnarray*} \bar{\lambda} \geq 0, \nabla L(\bar{x},\bar{\lambda})=0\\ \bar{x} \mbox{ feasible }\\ \bar{\lambda}_k q_k(\bar{x})=0, \forall k \in {\cal I} \end{eqnarray*} If the Lagrangian at $\bar{\lambda}$ is also convex in $x$, then KKT sufficient for optimality. 2. Lagrangian Relaxation (duality): \[ q^* \geq d^*:= \max\limits_{\lambda \geq 0} \min_x q_0(x) + \sum_{k \in {\cal I} } \lambda_k q_k(x). \] \end{slide} \begin{slide}{} Zero duality gap holds if $q^*=d^*.$ Strong duality holds if $q^*=d^*$ and also $d^*$ is attained. Moreover, $d^*$ can be efficiently evaluated using SDP. Questions:\\ $\bullet$Can one do better than the Lagrangian relaxation?\\ $\bullet$When does a zero duality gap hold?\\ $\bullet$What redundant constraints can be added to close the duality gap? \end{slide} \begin{slide}{} \begin{center} {\bf Tractable Relaxations of Max-Cut} \end{center} \[ (MCQ)~~~ \begin{array}{c} \mu^*:= \max\limits_{x \in \F} q_0(x) \quad ( : = x^TQx -2 c^Tx). \end{array} \] where $\F=\left\{\pm 1\right\}^n$\\ -------------------------------- perturbing diagonal of $Q$ on $\F:$ \[ \begin{array}{rcl} q_u(x)& : = &x^T(Q+\Diag(u))x -2 c^Tx - u^Te\\ &~=&q_0(x),~~\forall x \in \F. \end{array} \] \end{slide} \begin{slide}{} {\bf Bound 0:} the trivial bound from allowing the diagonal perturbations: \[ \mu^* \leq f_0 (u) := \max_x q_u(x). \] The function $f_0$ can take on the value $+\infty.$ Let \[S:=\left\{u:u^Te=0, Q+\Diag(u)\preceq 0\right\}. \] Then: \[ \begin{array}{|c|} \hline\\ \mu^* \leq B_0 := \min_u f_0(u) \\ \left(= \min\limits_{u^Te=0} f_0(u), \mbox{ if } S \neq \emptyset \right).\\ \hline \end{array} \] Using the hidden semidefinite constraint: \[ \mu^* \leq B_0 = \min_{Q+\Diag(u) \preceq 0} f_0(u). \] \end{slide} \begin{slide}{} {\bf Bound 1:} relax the feasible set to the sphere of radius $\sqrt{n}$ (tractable trust region subproblem, TRS): \[ \mu^* \leq f_1(u):= \max_{|| x ||^2 =n} ~ q_u(x) \] and \[ \begin{array}{|c|} \hline\\ \mu^* \leq B_1 :=\min_u f_1(u).\\ \hline \end{array} \] The inner maximization problem is called a trust region subproblem and is tractable. This bound provides the central tool in the proofs of equivalence. \end{slide} \begin{slide}{} {\bf Bound 2:} box constraint: \[ \mu^* \leq f_2(u):= \max_{| x_i | \leq 1} ~ q_u(x). \] add the semidefinite constraint to make bound tractable. \[ \mu^* \leq \min_{u} f_2(u) \] and \[ \begin{array}{|c|} \hline\\ \mu^* \leq B_2 :=\min_{Q+\Diag(u) \preceq 0} f_2(u).\\ ~~\\ \hline \end{array} \] \end{slide} \begin{slide}{} {\bf Bound $B_1^c$:} lift to eigenvalue bound: \[ Q^c := \left[ \begin{array}{cc} 0 & - c^T \\ - c &Q \end{array} \right] \] \[ q^c_u(y) := y^T( Q^c+\diag(u))y-u^Te \] \[ \mu^* \leq f_1^c(u) := \max_{||y||^2=n+1} q^c_u(y) \] where \[ \max_{||y||^2=n+1} q^c_u(y) = (n+1) \lambda_{\max} (Q^c + \diag (u) ) - u^Te \] \[ \begin{array}{|c|} \hline\\ \mu^* \leq B_1^c := \min_{u} f_1^c(u).\\ ~~\\ \hline \end{array} \] Similarly, we get equivalent bounds $B_0^c$ and homogenized bounds for the other models. \end{slide} \begin{slide}{} {\bf Bound $B_3$:} SDP bound: After homogenization ( $c=0$), use \[ x^TQx = \tr x^TQx = \tr Qxx^T\] and, for $x \in \F,$ $y_{ij} = x_ix_j$ defines a symmetric, rank one, positive semidefinite matrix $Y$ with diagonal elements 1. Relax the rank one condition. \[ \begin{array}{|ccc|} \hline &&\\ B_3 :=&\max &\tr QY \\ & \mbox{~subject to~} &\diag(Y) = e \\ && Y \succeq 0.\\ \hline \end{array} \] \end{slide} \begin{slide}{} Summary: ref. PW: (without restrictions e.g. $u^Te=0$) \[ \begin{array}{|rcl|} \hline &&~\\ B_0 &=&\min\limits_u \max\limits_x q_u(x)\\ B_1 &=&\min\limits_u \max\limits_{x^Tx=n} q_u(x)\\ B_2 &=&\min\limits_u \max\limits_{-1 \leq x_i \leq 1} q_u(x)\\ B_3 &=&\max \{\tr Q^cY : \diag(Y) = e,~ Y \succeq 0. \}\\ B_1^c &=&\min\limits_u \max\limits_{y^Ty=n+1} q^c_u(y)\\ &&~\\ \hline \end{array} \] \end{slide} \begin{slide}{} Now replace $\pm 1$ constraints with $x_i^2=1, \forall i.$ \[ \begin{array}{ccc} (P_E)&\max & q_0(x)=x^TQx-2c^Tx \\ & \mbox{~subject to~} & x_i^2 = 1,~~i=1, \cdots, n. \\ \end{array} \] $B_L$ denotes Lagrangian relaxation bound. Following our theme: \begin{center} \fbox{ {\bf Theorem } $B_L$ equals all above bounds. } \end{center} ref PRW.\\ (The proofs come from exploiting strong Lagrangian duality of TRS) \end{slide} \begin{slide}{} {\bf A Strengthened SDP Bound for MC} (second lifting - motivated by the strong duality results to follow) lifting procedure \[ X=xx^T\] implies \[X^2=xx^Txx^T=nX.\] equivalent quadratic matrix model for CQ \end{slide} \begin{slide}{} \begin{eqnarray*} \mu^*:=&\max& \tr QX\\ &\mbox{s.t.}&\diag(X)=e\\ &&X^2-nX=0\\ &&X \succeq 0. \end{eqnarray*} Note: $X^2=nX,~\trace X = n$ implies $X$ is rank one. But, we cannot solve this nonconvex problem in general, i.e. we have to look at the Lagrangian relaxation. \end{slide} \begin{slide}{} notation:\\ ~$\bullet$ $S \in \Sn,$\\ ~$\bullet$ $s=\svec(S) \in \Re^{t(n)}$ vector formed (columnwise) from $S$ ignoring strictly lower triang.\\ ~$\bullet$ $S=\sMat(s)$ inverse of $\svec$\\ ~$\bullet$ $\hMat (v)$ is adjoint of $\svec$, off-diagonal terms are multiplied by a half\\ ~$\bullet$ $\dsvec (S)$ is adjoint of $\sMat$, operator like $\svec$ but off diagonal elements are multiplied by 2\\ ~$\bullet$ $\sdiag(s):= \diag( \sMat(s))$\\ ~$\bullet$ $\vsMat(s)=\kvec(\sMat(s))$ \[\vsMat^*(s)=\dsvec \left( \left(\kmat(v)+\kmat(v)^T\right)/2 \right). \] \end{slide} \begin{slide}{} As above - use equivalent program \[ {\rm MC_O} \begin{array}{ccc} \mu^*= &\max & \frac 12 \tr QX \\ &\mbox{s.t.<}& \diag(X) = e\\ &&X^2-nX=0\\ &&X \succeq 0, \end{array} \] replace linear constraint with $ ||\diag(X) - e||^2=0$ and homogenize the problem. use $X=\sMat(x)$ \[ {\rm MC_O} \begin{array}{cll} \mu^*= &\max & \frac 12 \tr \left(Q\, \sMat(x)\right)y_0 \\ &\mbox{s.t.}& \sdiag(x)^T \sdiag(x) \\ && ~~~- 2 e^T \sdiag(x)y_0 + n=0\\ &&\sMat(x)^2-n\, \sMat(x)y_0=0\\ &&\sMat(x)y_0 \succeq 0\\ &&1-y_0^2=0. \end{array} \] \end{slide} \begin{slide}{} Lagrangian dual \[ \begin{array}{lcc} \mu^*\leq \mu^*_3:= \min\limits_{T \succeq 0,w,S} \max\limits_{y_0^2=1} \frac 12 \tr \left(Q\sMat(x)\right)y_0 \\ ~~~~~~~~~~~~~~+w\left( \sdiag(x)^T \sdiag(x)\right. \\ ~~~~~~~~~~~~~~~~~~~~~ \left. -2 e^T \sdiag(x)y_0 + n\right)\\ ~~~~~~~~~~~~~~~~ +\tr S\left((\sMat(x))^2-n\, \sMat(x)y_0\right)\\ ~~~~~~~~~~~~~~~~~~~~-\tr T(\sMat(x)y_0). \end{array} \] Note: $\sMat(x) \succeq 0$ necessarily; $\sdiag(x)=e$ necessarily holds. Therefore, relaxation is strengthened. Move $y_0$ into Lagrangian without increasing the duality gap, since a TRS i.e. add $ t(1-y_0^2)$ into Lagrangian. Then apply the hidden semidefinite constraint theme. \end{slide} \begin{slide}{} The constant part (no Lagrange multipliers) of the Hessian is: \beq \label{eq:hessian1} H_c:=\pmatrix{ 0 & \frac 12 \dsvec(Q)^T\cr \frac 12 \dsvec(Q) & 0\cr }. \eeq The nonconstant part of the Hessian is made up of a linear combination of matrices, i.e. it is a linear operator on the Lagrange multipliers. Note that \begin{eqnarray*} \tr S (\sMat(x))^2 &=& \tr S\, \sMat(x)\, I\, \sMat(x)^T\\ &=& \left(\kvec( \sMat(x))\right)^T\left( I \Kprod S \right) \left(\kvec( \sMat(x))\right)\\ &=& x^T \vsMat^* \left( I \Kprod S \right) \vsMat(x). \end{eqnarray*} For notational convenience, we use the {\em negative} of the Hessian and split it up into four linear operators: \beq \label{eq:hessian2} \begin{array}{ccc} \Hcal(w,S,T,t):= \Hcal_1(w) + \Hcal_2(S) + \Hcal_3(T) + \Hcal_4(t) \\ :=w\pmatrix{ 0 & 2\, \svec(I)^T\cr 2\, \svec(I) & -2\Diag(\svec(I)) } \\ +\pmatrix{ 0 & 2n\, \dsvec(S)^T\cr 2n\, \dsvec(S) & -2\vsMat^*(I \Kprod S) \vsMat }\\ +\pmatrix{ 0 & 2\, \dsvec(T)^T\cr 2\, \dsvec(T) & 0 }\\ +t\pmatrix{ 2 & 0\cr 0 & 0 }.\\ \end{array} \eeq We then get the (equivalent to the Lagrangian dual) semidefinite program \beq \label{eq:MCDSDP} {\rm MCDSDP} \begin{array}{ccc} \mu^*_{MCSDP3} = &\min & nw +\tr 0S + \tr 0T +t\\ &\mbox{s.t.}& \Hcal(w,S,T,t) \succeq H_c\\ && T \succeq 0, S=S^T\\ \end{array} \eeq The dual of this SDP is the strengthened SDP relaxation of MC: \beq \label{eq:MCPSDP} {\rm MCPSDP} \begin{array}{ccc} \mu^*_{MCSDP3} = &\max & \trace H_cY\\ &\mbox{s.t.}& \Hcal_1^*(Y) = n\\ & & \Hcal_2^*(Y) = 0\\ & & \Hcal_3^*(Y) \preceq 0\\ & & \Hcal_4^*(Y) = 1\\ && Y \succeq 0. \end{array} \eeq \end{slide} \begin{slide}{} {\bf Convex Quadratic Program} \begin{eqnarray*} \mu^*:=&\min& q_0(x)\\ &\mbox{s.t.}& q_k(x) \leq 0,\quad k=1, \ldots m, \end{eqnarray*} where all $q_i(x)$ are convex quadratic functions. The dual is \[ \mbox{DCQP}\qquad \nu^*:= \max\limits_{\lambda \geq 0}\ \min\limits_x\ q_0(x) + \sum_{k=1}^m \lambda_k q_k(x). \] $\bullet$(KKT) conditions are sufficient for global optimality $\bullet$if the primal value of CQP is bounded then it is attained and there is no duality gap, (e.g. Peterson and Ecker or Terlaky). $\bullet$ the dual may not be attained \end{slide} \begin{slide}{} {\bf Nonconvex Cases} {\bf 1. Rayleigh Quotient} $A=A\tran \in \Sn$ \[ \lambda_{\min} = \min \{x\tran Ax : x\tran x = 1\}. \] tractable (nonconvex) problem no (Lagrangian) duality gap for this nonconvex problem \[ \begin{array}{rcl} \mu^*&:=& \max\limits_{\lambda}\ \min\limits_x\ x\tran Ax -\lambda (x\tran x - 1)\\ &=& \max\limits_{A - \lambda I \succeq 0}\ \min\limits_x\ x\tran (A -\lambda I)x + \lambda\\ &=& \max\limits_{A - \lambda I \succeq 0}\ \lambda\\ &=& \lambda_{\min} \end{array} \] \end{slide} \begin{slide}{} {\bf 2. Trust Region Subproblem} \begin{eqnarray*} &\mu^*:=\min& q_0(x)\\ &\mbox{s.t.}& x\tran x - \delta^2\leq 0 \mbox{ (or } =0). \\ \end{eqnarray*} for ``$\le$," the Lagrangian dual is: \[ \mbox{DTRS}\qquad \nu^*:=\max\limits_{\lambda \geq 0}\ \min\limits_x\ q_0(x) + \lambda (x\tran x - \delta^2). \] strong duality holds and equivalent to (ref Stern-Wolk.) the (concave) nonlinear semidefinite program \begin{eqnarray*} \mbox{DTRS}\qquad &\nu^*:=\max& g_0\tran (Q+\lambda I)^{\dagger} g_0 - \lambda \delta^2\\ &\mbox{s.t.}& Q+\lambda I \succeq 0\\ &&\lambda \geq 0. \end{eqnarray*} theme: tractable problem (ref More-Sorensen) but strong duality holds. \end{slide} \begin{slide}{} {\bf 3. Two Trust Region Subproblem} TTRS consists in minimizing a (possibly nonconvex) quadratic function subject to a norm and a least squares constraint. ref in SQP methods by CDT. TTRS can have a nonzero duality gap, ref PY and Y. if objective not convex, then the primal may not be attained, ref LZ. TRS can have at most one local and nonglobal optimum, ref M. Still an open problem whether TTRS is an NP-hard or a polynomial time problem. \end{slide} \begin{slide}{} Note: a quadratic constraint can be written as \[ y^tPy \leq \delta \] after the homogenization. This is lifted to \[ \trace PY \leq \delta. \] Second lifting: \[ YPY \preceq \delta Y. \] This strictly strengthens the SDP relaxation. \end{slide} \begin{slide}{} {\bf Orthogonally Constrained Programs with Zero Duality Gaps} \[ \{X: X X\tran =I\} \] Stiefel manifold ref EAS $A$ and $B$ $n\times n$ symmetric matrices \begin{equation} \begin{array}{rcl} {\rm QQP_O}\qquad \mu^O:=&\min& \tr AXBX\tran\\ &{\rm s.t.}& XX\tran=I. \end{array} \end{equation} Tractable problem using classical Hoffman-Wielandt inequality. But, Lagrangian dual has a duality gap, ref ZKRW. \end{slide} \begin{slide}{} The constraints $XX\tran=I$ and $X\tran X=I$ are equivalent. Add redundant constraints. \begin{eqnarray*} {\rm QQP_{OO}}\qquad \mu^O:=&\min& \tr AXBX\tran\\ &{\rm s.t.}& XX\tran=I,\ X\tran X=I. \end{eqnarray*} dual problem is \begin{eqnarray*} {\rm DQQP_{OO}}\\ \mu^O \geq \mu^D:=&\max& \tr S+\tr T\\ &\mbox{s.t.}& (I\Kprod S)+(T\Kprod I)\preceq (B\Kprod A)\\ && S=S\tran,\ T=T\tran. \end{eqnarray*} {\bf Theorem} Strong duality holds for $\rm QQP_{OO}$ and $\rm DQQP_{OO},$ i.e. $\mu^D=\mu^O$ and both primal and dual are attained. \QED Theme again holds. \end{slide} \begin{slide}{} Other applications: Weighted Sums of Eigenvalues; Graph Partitioning Problem; TRS like constraints $\{X: XX\tran \preceq I\}$ (extension of Hoffman-Wielandt inequality) \end{slide} \begin{slide}{} Concluding Remarks: In each case of a tractable bound for a nonconvex problem, the structure allows for redundant constraints to be added to close the Lagrangian duality gap. What is the correct question about ``best''? Can we close or reduce the duality gap in general? (ref KT) \end{slide} \begin{slide}{} \end{slide} \begin{slide}{} \end{slide} \end{document}