%======================= main.tex =========================================== \documentstyle{slides} %\input psfig \newtheorem{exam}{Example} \newtheorem{prop}{Proposition} \newtheorem{lem}{Lemma} \newtheorem{thm}{Theorem} \newtheorem{cor}{Corollary} \newcommand{\Diag}{{\rm Diag\,}} \newcommand{\diag}{{\rm diag\,}} \newcommand{\tr}{{\rm trace\,}} \newcommand{\trace}{{\rm trace\,}} \pagestyle{plain} \begin{document} \blackandwhite{example} \begin{slide}{} %This is slide 1 \begin{center} {\bf A General Framework\\ for\\ Trust Region Subproblems\\ with \\ Applications to Large Scale Minimization} \end{center} and\\ Semidefinite Programming\\ (Linear Programming for the 90's and 00's) ~~\\ ~~\\ ~~\\ ~~\\ ~~\\ ~~\\ Henry Wolkowicz\\ University of Waterloo (work with Franz Rendl, Univ. of Graz) \end{slide} \begin{slide}{} %%This is slide 2. {\bf The Trust Region Subproblem:} Let \[ q(x):= x^tAx - 2a^tx, \] \begin{eqnarray*} (TRS)~~~~~ \mu^* := &\min& q(x)\\ &\mbox{s.t.}& x^tx = s^2~~(\leq s^2). \end{eqnarray*} where $A=A^t$, not neccessarily semidefinite\\ $a \in \Re^n$, $s>0.$ ~~\\ (application: quadratic model for unconstrained minimization) ~~\\ ~~\\ Main Result: an algorithm that solves TRS using only matrix-vector multiplication; therefore exploits sparsity \end{slide} \begin{slide}{} %%This is slide 3 {\bf Semidefinite Programming Framework} (linear programming for the 90's and 00's) \[ (SDP) \begin{array}{cc} \max & \trace CZ\\ \mbox{s.t.} & L(Z) = B\\ & Z \succeq 0 \end{array} \] $Z \succeq 0$ denotes positive semidefinite\\ $L$ is a linear operator \end{slide} \begin{slide}{} %%This is slide 4 Applications: e.g. combinatorial optimization \[ \begin{array}{cc} \max & x^tQx=\trace Qxx^t\\ \mbox{s.t.} & x_i = \pm 1,~ x^2_i=1~\forall i \end{array} \] equivalently (with $X \equiv xx^t$) \[ (SDP) \begin{array}{cc} \max & \trace QX\\ \mbox{s.t.} & \diag (X) = e\\ & X \succeq 0 \end{array} \] rank($X$)=1 ~~(missing constraint - relaxation) ~~\\ ~~\\ ~~\\ Other Applications:\\ Control theory (Lyapunov stability, Ricatti equations), structural computational complexity theory, min-max eigenvalue problems, graph theory \end{slide} \begin{slide}{} %%This is slide 5 Back to TRS: \begin{eqnarray*} (TRS)~~~~~ \mu^* := &\min& q(x)\\ &\mbox{s.t.}& x^tx = s^2~~(\leq s^2). \end{eqnarray*} nonlinear least squares, Levenberg 1944, Marquadt 1963 Applications to general minimization, Goldfeld, Quandt and Trotter 1966 Early theoretical results, Forsythe and Golub 1965 Hebden 1973, Reinsch 1967 efficient numerics, Gay 1981 state of the art algorithm, More-Sorenson 1983 \end{slide} %\begin{slide}{} %% start figure 1 % \begin{figure}[htb] % \centering % \centerline{\ %\psfig{figure=trpic.ps,height=4in,width=5.9in}} % \label{fig1} % \ecfigure{Geometric Graph with Bisection} %% end figure 1 %\end{slide} \begin{slide}{} %%This is slide 5 Currently: strong Lagrangian Duality, Stern and Wolkowicz 1993 polynomial time, Ye 1992 at most one local-nonglobal, Martinez 1993 two trust regions, CDT 1984 multiple trust regions: Shor, Ramana, Wolkowicz Lanczos-eigenvalue approach: Sorensen 1994,\\ (with SDP) Rendl and Wolkowicz 1994 \end{slide} \begin{slide}{} %%This is slide 7 \[ L(x,\lambda) = q(x) - \lambda(x^tx-s^2) \] {\bf Theorem}(Stern and Wolkowicz, 1993)\\ (i) Strong duality holds for TRS, i.e. \[ \mu^* = \min_x \max_\lambda L(x,\lambda) = \max_\lambda \min_x L(x,\lambda). \] Moreover, attainment holds for $x$ and uniquely for $\lambda$.\\ (ii) A dual problem for TRS, without a duality gap, is \[ (D) \begin{array}{c} \max_{A-\lambda I \succeq 0} h(\lambda), \end{array} \] where \[h(\lambda) = \lambda s^2 - a^t(A-\lambda I)^\dagger a, ~(\mbox{concave}) \] and $\cdot^\dagger$ denotes the Moore-Penrose generalized inverse. \end{slide} \begin{slide}{} %%This is slide 8 {\bf Proof} \begin{eqnarray*} \mu^* & = & \min\limits_{||x||=s} q(x) \nonumber\\ & \geq & \max_\lambda \min_{x} L(x,\lambda) \nonumber\\ & =& \max\limits_{A-\lambda I \succeq 0} \min_{x} L(x,\lambda), \end{eqnarray*} hidden constraints - Hessian $\succeq 0$ and stationarity global minimum of the Lagrangian is \[ x_\lambda := (A-\lambda I)^\dagger a, \] Therefore, the right hand side of the above is equal to \begin{eqnarray*} & =& \max\limits_{A-\lambda I \succeq 0} \lambda s^2 -a^t(A-\lambda I)^\dagger a\\ & =& \max\limits_{A-\lambda I \succeq 0} h(\lambda), \end{eqnarray*} This functional is strictly concave and coercive.\\ $h^\prime = 0$ feasibility \end{slide} \begin{slide}{} %%This is slide 9 {\bf Example} (hard case) Let \[ A = \left[ \begin{array}{cc} 1 & 0 \\ 0 & -1 \end{array} \right],~a=(1~0)^t,~s=1. \] $A-\lambda I \succeq 0,$ implies $\lambda \leq -1$ $\lambda^* = -1$, $\mu^* = -1.5 = h(\lambda^*)$ $x_\lambda = (A-\lambda I)^{\dagger} a = (.5 ~ 0)^t$ $q(x_\lambda) = -.75 > \mu^*$ $||x_\lambda|| < 1$ complementary slackness fails zero duality gap but no primal attainment \end{slide} \begin{slide}{} %%This is slide 10 {\bf Nonlinear Primal-Dual Pair} The dual problem is \[ \begin{array}{c} \max_{A-\lambda I \succeq 0} h(\lambda) \end{array} ~~~(D) \] where \[h(\lambda) = \lambda s^2 - a^t(A-\lambda I)^\dagger a \] dual of the dual: \[ \begin{array}{cc} \min & h(\lambda)+ \tr Z(A-\lambda I) \\ \mbox{s.t.} & s^2-||(A-\lambda I)^\dagger a ||^2 -\trace Z = 0 \\ & Z \succeq 0. \end{array}~~~ (DD) \] For feasible pair $Z,\lambda$, the duality gap is: \[\tr Z(A-\lambda I)~ \] complementary slackness \end{slide} \begin{slide}{} %%%This is slide 11 {\bf Gale and More-Sorenson Algorithm\\ explained by SDP Framework} maintain 2nd order cond. $(A-\lambda I) \succ 0$\\ maximize the dual function $h(\lambda)$\\ ~~ (Newton's method) solve $0=h^\prime (\lambda)=s^2-||x_\lambda||^2$\\ better solve $0=\frac 1s - \frac 1{||x_\lambda||}$\\ (almost linear, convex) Cholesky factoriz: $R^tR=A-\lambda I$ \\ ~~~~~~~~~~~(safeguard positive def.)\\ Solve~~~~~~~~~~~~~~: $R^tRp=a$\\ Solve~~~~~~~~~~~~~~: $R^tq=p$\\ iterate~~~~~~~~~~~~: $\lambda \leftarrow \lambda + \left( \frac {||p||}{||q||}\right)^2 \left( \frac {||p||-s}{s}\right) $ \end{slide} \begin{slide}{} %%This is slide 12 {\bf Hard Case} indicated by $0 < s^2-||x_\lambda||^2$ ~~inside TR\\ DD feasible \[s^2-||(A-\lambda I)^\dagger a ||^2 -\trace Z = 0 \] so take a primal step to boundary and try to minimize the duality gap with \[ Z=zz^t,~\trace Z = ||z||^2\] \[\min \trace Z(A-\lambda I) = z^t(A-\lambda I)z\] (eigenvalue problem) \end{slide} \begin{slide}{} %%This is slide 13 Another approach:\\ {\bf Homogenization of TRS} \begin{eqnarray*} \mu^* &=& \min\limits_{||x||=s,~y_0^2=1} x^tAx - 2y_0a^tx \\ &=& \max\limits_t \min\limits_{||x||=s} x^tAx - 2y_0a^tx +ty_0^2-t \\ &=& \max\limits_t \min\limits_{||x||=s,~y_0^2=1} x^tAx - 2y_0a^tx +ty_0^2-t \\ &=& \max\limits_t \min\limits_{||x||^2+y_0^2=s^2+1} x^tAx - 2y_0a^tx +ty_0^2-t \end{eqnarray*} \[ = \max\limits_t (s^2+1)\lambda_1(D(t)) -t\] $$D(t) = \left( \begin{array}{cc} t & -a^t \\ -a & A \end{array} \right). $$ \end{slide} \begin{slide}{} %%This is slide 14 {\bf unconstrained dual problem to TRS} $$D(t) = \left( \begin{array}{cc} t & -a^t \\ -a & A \end{array} \right) $$ $y=\left( \begin{array}{c} y_0\\ x \end{array} \right)$ normalized eigenvector for $\lambda_{\min} D(t)$ \[ k(t) = (s^2+1)\lambda_{\min}(D(t)) -t, \] \[ \mbox{*** }~~~ \max_t k(t) \] Note \[ k^\prime(t) = (s^2+1)y_0^2 -1=0 \] is feasibility again for $x$ \end{slide} \begin{slide}{} %%This is slide 15 {\bf SDP Primal-Dual Pair} \[ \max_t k(t) = (s^2+1)\lambda_{\min}(D(t)) -t, \] add the variable $\lambda$ \[ \begin{array}{cc} \max & (s^2+1)\lambda - t \\ \mbox{s.t.} & D(t) \succeq \lambda I \end{array} (DSDP) \] Lagrangian dual of this dual is: \[ \begin{array}{cc} \min & \tr D(0)X \\ \mbox{s.t.} & \tr X = s^2+1 \\ & X_{11} = 1\\ & X \succeq 0 \end{array}(PSDP) \] \end{slide} \begin{slide}{} %%This is slide 16 primal-dual interior point method: approx. solve perturbed optimality conditions using Newton's method: \[ \begin{array}{c} \tr X = s^2+1 \\ X_{11} = 1\\ D(t) - \lambda I - Z = 0\\ \mu Z^{-1} - X = 0 \\ X \succ 0, Z \succ 0 \end{array} \] \end{slide} \begin{slide}{} %%This is slide 17 Dual Simplex Method basic feasible dual solution at given $t$: \[ D(t) - \lambda_{\min}(D(t)) I \succeq 0 ~ \mbox{and singular}\] eigenvector $ y=\left( \begin{array}{c} y_0\\x \end{array} \right) $ normalize $y_0=1$ test primal feasibility of $X=yy^t$ to get better value for $t$ (interpolation problem) Alternatively, normalize $||y||=1,$ and exploit almost linear structure of \[ \frac 1{y_0(t)} = \sqrt{s^2+1} \] \end{slide} \begin{slide}{} %%This is slide 18 \[ Z = D(t) -\lambda I \] complementary slackness \[ \trace ZX = 0 \] \[ 0=ZX= \left[ \begin{array}{cc} t-\lambda & -a^t \\ -a & A-\lambda I \end{array} \right] \left[ \begin{array}{cc} X_{11} & y^t\\ y & \bar{X} \end{array} \right]. \] \[ t=a^ty+\lambda; \] \[ \bar{X}a=(t-\lambda)y; \] \[ (A-\lambda I)y=a; \] \[ (A-\lambda I)\bar{X} = ay^t; \] \[ A-\lambda I \succeq 0. \] \end{slide} \begin{slide}{} %%This is slide 19 \[ \lambda_i (A) = \alpha_i \] $J := \lbrace i: a_i \not= 0 \rbrace$ $ \det (D(t) - \lambda I)=$ \begin{eqnarray*} &=&(t- \lambda ) \prod_{k=1}^n (\alpha_k - \lambda ) - \sum_{k=1}^n a_k^2 \prod_{j \not= k}^n (\alpha_j - \lambda )\\ &=&\lbrack t- \lambda - \sum_{j \in J} \frac{a_j^2}{\alpha_j - \lambda} \rbrack \prod_{k=1}^n (\alpha_k - \lambda )\\ &=&d(\lambda) \prod_{k=1}^n (\alpha_k - \lambda ) \end{eqnarray*} \end{slide} \begin{slide}{} %%This is slide 20 Easy case: \begin{enumerate} \item[] $$\lambda_1 (D(t)) \mbox{~is simple and~} \lambda_1 (D(t)) < \alpha_1$$ \item[] if $\lambda < \alpha_1$ given. Then \[ D(d(\lambda)) - \lambda I \succeq 0 ~ \mbox{and singular}. \] \item[] Let $y(t)$ be a normalized eigenvector corresponding to $\lambda_1(D(t))$ and denote its first component by $y_0(t)$. Then $y_0(t) \not= 0$. (wlog $>0$) \item[] $y_0(t) : \Re \mapsto (0,1)$ is strictly monotonically decreasing. \end{enumerate} \end{slide} \begin{slide}{} %%This is slide 21 {\bf Theorem}\\ $y(t)=\left(\begin{array}{c} y_0(t)\\ z(t) \end{array} \right)$ normal eigenvector for \[\lambda_1(D(t))\] Then\\ $y_0(t) \not= 0$ and $v := \frac{1}{y_0(t)}z(t)$ is unique opt of \[ \min \lbrace v^tAv -2a^tv: v^tv = \frac{1 - y_0(t)^2}{y_0(t)^2} \rbrace. \] Conversely, if $v \in \Re^n ~and~ \lambda \in \Re$ satisfy\\ \[(A - \lambda I)v = a; ~~A - \lambda I \succeq 0;~ v^tv = \frac{1-y_0^2}{y_0^2}\] thereby defining $ y_{0} > 0,$\\ then\\ $y := y_0\left(\begin{array}{c}1\\ v \end{array} \right)$\\ is eigenvector for $D(t)$ for $t := a^tv + \lambda$\\ and \[\lambda_1(D(t)) = \lambda\] \end{slide} \begin{slide}{} %%This is slide 22 \begin{center} {\bf Dual-Simplex Method} \end{center} \begin{tabbing} 123\=456\=789\=\kill {\bf Initialization:}\\ Find bounds for intervals of uncertainty.\\ $ t_0^l \leq t^* \leq t^u_0$\\ $\mu^l \leq \mu^* \leq \mu^u$\\ ~~\\ Begin the main iterations:\\ {\bf while} (feasibility or duality gap tols too large) ~~\\ \> Part 1: Find a new estimate $t_+$.\\ \> $t_+ = \frac{t_c^l+t_c^u}{2}$ (default value) \\ \> \> Find intersection point of 2 tangent lines\\ \> \> ~~~~~to the graph of $k(t)$\\ \> \> update the upper bound $\mu^u$\\ \> {\bf if} last iterate, on infeasible side, good side)\\ \> \> use inverse interpolation\\ \> {\bf elseif} on feasible side, bad side)\\ \> \> use inverse interpolation\\ \> {\bf endif}\\ \> Part 2: Update info for new estimate $t_+$.\\ \> {\bf if} $\lambda > 0$ (wrong sign for Lagrange multiplier)\\ \> \> {\bf if} $t_+ <= t^*$ then {\bf STOP}\\ \> {\bf else} (correct sign for Lagrange multiplier)\\ \> \> Update lower bound using dual function\\ \> {\bf endif}\\ \> {\bf if} $t_+ \> Perform a PSDP (negative curvature) \\ \> \> ~~~~step, update bounds \\ \> {\bf else}\\ \> \> steepest descent step - update bounds\\ \> {\bf endif}\\ \> Update the tolerances\\ {\bf endwhile} \end{tabbing} \end{slide} \begin{slide}{} Tests on SUN SPARC station 1 using MATLAB \begin{enumerate} \item dimensions 1540 to 1565 \\ 30 problems for each dimension \\ density of nonzeros .01 \\ average iterations 4.4453 \\ average cpu time 54.1876 sec \\ average work 1.73 times one Lanczos step\\ \item dimensions 1540 to 1558 \\ 30 problems for each dimension \\ density of nonzeros .013 \\ average iterations 4.1148 \\ average cpu time 56.1322 sec \\ average work 1.75 times one Lanczos step\\ comment: a multiple of the identity was added to get a positive definite Hessian \newpage \item dimensions 1500 to 1565 \\ 2 problems for each dimension \\ density of nonzeros .01 \\ average iterations 4.2951 \\ average cpu time 57.0429 sec \\ average work 4.8589 times one Lanczos step\\ comment: hard case; multiplicity of smallest eigenvalue varied from 1 to 6 \item dimensions 1800 to 1865 \\ 12 problems for each dimension \\ density of nonzeros .005 \\ average iterations 4.2453 \\ average cpu time 28.1692 sec \\ average work 4.2589 times one Lanczos step\\ comment: hard case; multiplicity of smallest eigenvalue varied from 1 to 6 \end{enumerate} \end{slide} %\begin{slide}{} %%This is slide one of two. %\input{slide13} %\end{slide} %\begin{slide}{} %%This is slide one of two. %\input{slide131} %\end{slide} %\begin{slide}{} %%This is slide one of two. %\input{slide131} %\end{slide} %\begin{slide}{} %%This is slide one of two. %\input{slide132} %\end{slide} %\begin{slide}{} %%This is slide one of two. %\input{slide14} %\end{slide} %\begin{slide}{} %%This is slide one of two. %\input{slide141} %\end{slide} %\begin{slide}{} %%This is slide one of two. %\input{slide15} %\end{slide} %\begin{slide}{} %%This is slide one of two. %\input{slide151} %\end{slide} \end{document}