%======================= main.tex ===========================================
\documentstyle{slides}
%\input psfig
\newtheorem{exam}{Example}
\newtheorem{prop}{Proposition}
\newtheorem{lem}{Lemma}
\newtheorem{thm}{Theorem}
\newtheorem{cor}{Corollary}
\newcommand{\Diag}{{\rm Diag\,}}
\newcommand{\diag}{{\rm diag\,}}
\newcommand{\tr}{{\rm trace\,}}
\newcommand{\trace}{{\rm trace\,}}
\pagestyle{plain}
\begin{document}
\blackandwhite{example}
\begin{slide}{}
%This is slide 1
\begin{center}
{\bf A General Framework\\  
for\\
Trust Region Subproblems\\
 with \\
Applications to Large Scale Minimization}
\end{center}
 
and\\
Semidefinite Programming\\
(Linear Programming for the 90's and 00's)

~~\\
~~\\
~~\\
~~\\
~~\\
~~\\
Henry Wolkowicz\\
University of Waterloo

(work with Franz Rendl, Univ. of Graz)
\end{slide}
\begin{slide}{}
%%This is slide 2.
{\bf The Trust Region Subproblem:}

Let
\[ q(x):= x^tAx - 2a^tx,
\]
\begin{eqnarray*}
(TRS)~~~~~ \mu^* := &\min& q(x)\\
&\mbox{s.t.}&
x^tx = s^2~~(\leq s^2).
\end{eqnarray*}

where

$A=A^t$, not neccessarily semidefinite\\
$a \in \Re^n$, $s>0.$
~~\\
(application: quadratic model for unconstrained minimization)
~~\\
~~\\
Main Result: an algorithm that solves TRS using only matrix-vector multiplication; therefore
exploits sparsity

\end{slide}
\begin{slide}{}
%%This is slide 3
{\bf Semidefinite Programming Framework}

(linear programming for the 90's and 00's)
\[
(SDP)
\begin{array}{cc}
\max & \trace CZ\\
\mbox{s.t.} & L(Z) = B\\
  & Z \succeq 0
\end{array}
\]

$Z \succeq 0$ denotes positive semidefinite\\
$L$ is a linear operator

\end{slide}
\begin{slide}{}
%%This is slide 4
Applications: e.g. combinatorial optimization

\[
\begin{array}{cc}
\max & x^tQx=\trace Qxx^t\\
\mbox{s.t.} & x_i = \pm 1,~ x^2_i=1~\forall i 
\end{array}
\]

equivalently (with $X \equiv xx^t$)
\[
(SDP)
\begin{array}{cc}
\max & \trace QX\\
\mbox{s.t.} & \diag (X) = e\\
  & X \succeq 0
\end{array}
\]

rank($X$)=1 ~~(missing constraint - relaxation)
~~\\
~~\\
~~\\
Other Applications:\\
Control theory (Lyapunov stability, Ricatti equations), structural
computational complexity theory, min-max eigenvalue problems, graph
theory

\end{slide}
\begin{slide}{}
%%This is slide 5
Back to TRS:
\begin{eqnarray*}
(TRS)~~~~~ \mu^* := &\min& q(x)\\
&\mbox{s.t.}&
x^tx = s^2~~(\leq s^2).
\end{eqnarray*}

nonlinear least squares, Levenberg 1944, Marquadt 1963

Applications to general minimization, Goldfeld, Quandt and Trotter 1966

Early theoretical results, Forsythe and Golub 1965

Hebden 1973,  Reinsch 1967

efficient numerics, Gay 1981

state of the art algorithm, More-Sorenson 1983

\end{slide}
%\begin{slide}{}
%% start figure 1
%    \begin{figure}[htb]
%      \centering
%      \centerline{\
%\psfig{figure=trpic.ps,height=4in,width=5.9in}}
%      \label{fig1}
%    \ecfigure{Geometric Graph with Bisection}
%%  end figure 1
%\end{slide}
\begin{slide}{}
%%This is slide 5
Currently:

strong Lagrangian Duality, Stern and Wolkowicz 1993

polynomial time, Ye 1992

at most one local-nonglobal, Martinez 1993

two trust regions, CDT 1984

multiple trust regions: Shor, Ramana, Wolkowicz

Lanczos-eigenvalue approach: Sorensen 1994,\\
(with SDP) Rendl and Wolkowicz 1994
\end{slide}
\begin{slide}{}
%%This is slide 7
\[ L(x,\lambda) = q(x) - \lambda(x^tx-s^2) \]

{\bf Theorem}(Stern and Wolkowicz, 1993)\\
(i) Strong duality holds for TRS, i.e.
\[ \mu^* =  \min_x \max_\lambda L(x,\lambda) = \max_\lambda \min_x
L(x,\lambda).
\]
Moreover, attainment holds for $x$ and uniquely for $\lambda$.\\
(ii) A dual problem for TRS, without a duality gap, is
\[ (D)  \begin{array}{c}
     \max_{A-\lambda I \succeq 0} h(\lambda),
         \end{array}
\]
where 
\[h(\lambda) = \lambda s^2 - a^t(A-\lambda I)^\dagger a, ~(\mbox{concave})
\]
and $\cdot^\dagger$ denotes the Moore-Penrose generalized inverse.
\end{slide}

\begin{slide}{}
%%This is slide 8
{\bf Proof}
\begin{eqnarray*}
\mu^* & = & \min\limits_{||x||=s} q(x) \nonumber\\
      & \geq & \max_\lambda \min_{x} L(x,\lambda) \nonumber\\
      & =& \max\limits_{A-\lambda I \succeq 0} \min_{x} L(x,\lambda),
\end{eqnarray*}
hidden constraints - Hessian $\succeq 0$ and stationarity

global minimum of the Lagrangian is
\[ x_\lambda := (A-\lambda I)^\dagger a,
\]
Therefore, the right hand side of the above is equal to
\begin{eqnarray*}
   & =& \max\limits_{A-\lambda I \succeq 0}
         \lambda s^2 -a^t(A-\lambda I)^\dagger a\\
   & =& \max\limits_{A-\lambda I \succeq 0} h(\lambda),
\end{eqnarray*}
This functional is strictly concave and coercive.\\
$h^\prime = 0$ feasibility

\end{slide}
\begin{slide}{}
%%This is slide 9
{\bf Example} (hard case)
Let
\[ A = \left[ \begin{array}{cc}
1 & 0 \\
0 & -1
\end{array}   \right],~a=(1~0)^t,~s=1.
\]
$A-\lambda I \succeq 0,$ implies $\lambda \leq -1$

$\lambda^* = -1$,  $\mu^* = -1.5 = h(\lambda^*)$

$x_\lambda = (A-\lambda I)^{\dagger} a = (.5 ~ 0)^t$

$q(x_\lambda) = -.75 > \mu^*$ 

$||x_\lambda|| < 1$

complementary slackness fails

zero duality gap but no primal attainment
\end{slide}
\begin{slide}{}
%%This is slide 10
{\bf Nonlinear Primal-Dual Pair}

The dual problem is
\[ \begin{array}{c}
     \max_{A-\lambda I \succeq 0} h(\lambda)
         \end{array} ~~~(D) 
\]
where 
\[h(\lambda) = \lambda s^2 - a^t(A-\lambda I)^\dagger a
\]

dual of the dual:
\[
\begin{array}{cc}
\min & h(\lambda)+ \tr Z(A-\lambda I) \\
\mbox{s.t.} & s^2-||(A-\lambda I)^\dagger a ||^2 -\trace Z = 0 \\
      & Z \succeq 0.
\end{array}~~~
(DD)
\]

For feasible pair $Z,\lambda$, the duality gap is:

\[\tr Z(A-\lambda I)~ 
\]
complementary slackness
\end{slide}
\begin{slide}{}
%%%This is slide 11
{\bf Gale and More-Sorenson Algorithm\\ explained by SDP Framework}

maintain 2nd order cond. $(A-\lambda I) \succ 0$\\
maximize the dual function $h(\lambda)$\\
 ~~ (Newton's method)

solve $0=h^\prime (\lambda)=s^2-||x_\lambda||^2$\\
better solve $0=\frac 1s - \frac 1{||x_\lambda||}$\\
(almost linear, convex)

Cholesky factoriz: $R^tR=A-\lambda I$ \\
   ~~~~~~~~~~~(safeguard positive def.)\\
Solve~~~~~~~~~~~~~~: $R^tRp=a$\\
Solve~~~~~~~~~~~~~~: $R^tq=p$\\
iterate~~~~~~~~~~~~: $\lambda \leftarrow \lambda + 
   \left( \frac {||p||}{||q||}\right)^2
   \left( \frac {||p||-s}{s}\right)
     $
\end{slide}
\begin{slide}{}
%%This is slide 12
{\bf Hard Case}

indicated by 
$0 < s^2-||x_\lambda||^2$ ~~inside TR\\

DD feasible 
\[s^2-||(A-\lambda I)^\dagger a ||^2 -\trace Z = 0  \]

so take a primal step to boundary and try to minimize the
duality gap with
\[ Z=zz^t,~\trace Z = ||z||^2\]
\[\min \trace Z(A-\lambda I) = z^t(A-\lambda I)z\]
(eigenvalue problem)
\end{slide}
\begin{slide}{}
%%This is slide 13
Another approach:\\
{\bf Homogenization of TRS}
\begin{eqnarray*}
\mu^* &=& \min\limits_{||x||=s,~y_0^2=1}  x^tAx - 2y_0a^tx \\
&=& \max\limits_t \min\limits_{||x||=s}  x^tAx - 2y_0a^tx +ty_0^2-t \\
&=& \max\limits_t \min\limits_{||x||=s,~y_0^2=1}  x^tAx - 2y_0a^tx
+ty_0^2-t \\
&=& \max\limits_t \min\limits_{||x||^2+y_0^2=s^2+1}  x^tAx - 2y_0a^tx
+ty_0^2-t
\end{eqnarray*}

\[ =  \max\limits_t (s^2+1)\lambda_1(D(t)) -t\]

$$D(t) = \left(
\begin{array}{cc}
t & -a^t \\
-a & A
\end{array}
\right).
$$
\end{slide}
\begin{slide}{}
%%This is slide 14
{\bf unconstrained dual problem to TRS}

$$D(t) = \left(
\begin{array}{cc}
t & -a^t \\
-a & A
\end{array}
\right)
$$

$y=\left( \begin{array}{c} y_0\\ x \end{array} \right)$ 
normalized eigenvector for $\lambda_{\min} D(t)$
\[
k(t) =  (s^2+1)\lambda_{\min}(D(t)) -t,
\]

\[  \mbox{***   }~~~ \max_t k(t)
\]

Note
\[  k^\prime(t) = (s^2+1)y_0^2 -1=0  \]
is feasibility again for $x$
\end{slide}
\begin{slide}{}
%%This is slide 15
{\bf SDP Primal-Dual Pair}
\[
\max_t k(t) =  (s^2+1)\lambda_{\min}(D(t)) -t,
\]


add the variable $\lambda$
\[
\begin{array}{cc}
\max & (s^2+1)\lambda - t \\
\mbox{s.t.} & D(t) \succeq \lambda I
\end{array}
(DSDP)
\]

Lagrangian dual of this dual is:
\[
\begin{array}{cc}
\min & \tr D(0)X \\
\mbox{s.t.} & \tr X = s^2+1 \\
      &  X_{11} = 1\\
      & X \succeq 0
\end{array}(PSDP)
\]
\end{slide}
\begin{slide}{}
%%This is slide 16
primal-dual interior point method:

approx. solve perturbed optimality conditions
using Newton's method:
\[
\begin{array}{c}
  \tr X = s^2+1 \\
        X_{11} = 1\\
  D(t) - \lambda I - Z = 0\\
  \mu Z^{-1} - X = 0  \\
       X \succ 0, Z \succ 0
\end{array}
\]


\end{slide}
\begin{slide}{}
%%This is slide 17
Dual Simplex Method

basic feasible dual solution at given $t$:
\[ D(t) - \lambda_{\min}(D(t)) I  \succeq 0 ~ \mbox{and singular}\]

eigenvector $ y=\left( \begin{array}{c} y_0\\x \end{array} \right) $

normalize $y_0=1$

test primal feasibility of $X=yy^t$ to get better value for $t$ (interpolation problem)

Alternatively, normalize $||y||=1,$ and exploit almost linear structure of
\[
\frac 1{y_0(t)} = \sqrt{s^2+1}
\]
\end{slide}
\begin{slide}{}
%%This is slide 18
\[ Z = D(t) -\lambda I
\]
complementary slackness
\[ \trace ZX = 0  \]
\[  0=ZX=
\left[  \begin{array}{cc}
       t-\lambda  & -a^t \\
      -a          & A-\lambda I  \end{array} \right]
\left[  \begin{array}{cc}
       X_{11} & y^t\\
      y         & \bar{X}  \end{array} \right].
\]

\[
   t=a^ty+\lambda;
\]
\[
   \bar{X}a=(t-\lambda)y;
\]
\[
   (A-\lambda I)y=a;
\]
\[
   (A-\lambda I)\bar{X} = ay^t;
\]
\[
   A-\lambda I \succeq 0.
\]

\end{slide}
\begin{slide}{}
%%This is slide 19
\[ \lambda_i (A) = \alpha_i  \]

$J := \lbrace i: a_i \not= 0 \rbrace$

$  \det (D(t) - \lambda I)=$ 
\begin{eqnarray*}
&=&(t- \lambda ) \prod_{k=1}^n (\alpha_k -
\lambda ) - \sum_{k=1}^n a_k^2 \prod_{j \not= k}^n (\alpha_j - \lambda )\\
&=&\lbrack t- \lambda  -
\sum_{j \in J} \frac{a_j^2}{\alpha_j - \lambda} \rbrack
\prod_{k=1}^n (\alpha_k - \lambda )\\
&=&d(\lambda)
\prod_{k=1}^n (\alpha_k - \lambda )
\end{eqnarray*}

\end{slide}
\begin{slide}{}
%%This is slide 20
Easy case:
\begin{enumerate}
\item[]
$$\lambda_1 (D(t)) \mbox{~is simple and~} \lambda_1 (D(t)) <
\alpha_1$$
\item[]
if $\lambda < \alpha_1$ given. Then
\[  D(d(\lambda)) - \lambda I \succeq 0 ~ \mbox{and singular}.
\]
\item[]
Let $y(t)$ be a
normalized eigenvector corresponding to $\lambda_1(D(t))$ and denote its
first component by $y_0(t)$. Then $y_0(t) \not= 0$. (wlog $>0$)
\item[]
$y_0(t) : \Re \mapsto (0,1)$ is strictly
monotonically decreasing.
\end{enumerate}
\end{slide}
\begin{slide}{}
%%This is slide 21
{\bf Theorem}\\
$y(t)=\left(\begin{array}{c} y_0(t)\\ z(t) \end{array}
        \right)$ normal eigenvector for 
\[\lambda_1(D(t))\]
Then\\
$y_0(t) \not= 0$ and $v := \frac{1}{y_0(t)}z(t)$
is unique opt of
\[
\min \lbrace v^tAv -2a^tv: v^tv = \frac{1 - y_0(t)^2}{y_0(t)^2} \rbrace.
\]

Conversely, if $v \in \Re^n ~and~ \lambda \in \Re$ satisfy\\
\[(A - \lambda I)v = a; ~~A - \lambda I \succeq 0;~
v^tv = \frac{1-y_0^2}{y_0^2}\]
thereby defining $ y_{0} > 0,$\\ 
then\\
 $y := y_0\left(\begin{array}{c}1\\ v \end{array}
        \right)$\\
 is eigenvector for $D(t)$ for $t := a^tv + \lambda$\\
 and \[\lambda_1(D(t)) = \lambda\]
\end{slide}
\begin{slide}{}
%%This is slide 22
\begin{center}  {\bf Dual-Simplex Method}   \end{center}
\begin{tabbing}
123\=456\=789\=\kill
{\bf Initialization:}\\ 
Find bounds for intervals of uncertainty.\\
$ t_0^l \leq t^* \leq t^u_0$\\
$\mu^l \leq \mu^* \leq \mu^u$\\
~~\\
Begin the main iterations:\\
{\bf while} (feasibility or duality gap tols too large)
~~\\
\> Part 1: Find a new estimate $t_+$.\\
\> $t_+ = \frac{t_c^l+t_c^u}{2}$  (default value) \\
\> \> Find intersection point of 2 tangent lines\\
\> \> ~~~~~to the graph of $k(t)$\\
\> \> update the upper bound $\mu^u$\\
\> {\bf if} last iterate, on infeasible side, good side)\\ 
\> \> use inverse interpolation\\
\> {\bf elseif} on feasible side, bad side)\\
\> \> use inverse interpolation\\
\> {\bf endif}\\
\> Part 2: Update info for new estimate $t_+$.\\
\> {\bf if} $\lambda > 0$  (wrong sign for Lagrange multiplier)\\
\> \> {\bf if} $t_+ <= t^*$  then {\bf STOP}\\
\> {\bf else}   (correct sign for Lagrange multiplier)\\
\> \> Update lower bound using dual function\\
\> {\bf endif}\\
\> {\bf if} $t_+<t_*$  ( bad side)\\
\> \> Perform a PSDP (negative curvature) \\
\> \> ~~~~step, update bounds \\
\> {\bf else}\\
\> \> steepest descent step - update bounds\\
\> {\bf endif}\\
\> Update the tolerances\\
{\bf endwhile}
\end{tabbing}
\end{slide}
\begin{slide}{}
Tests on SUN SPARC station 1 using MATLAB
\begin{enumerate}
\item
dimensions 1540 to 1565  \\
     30 problems for each dimension  \\
     density of nonzeros .01  \\
     average iterations   4.4453  \\
     average cpu time 54.1876 sec  \\
     average work 1.73 times one Lanczos step\\

\item
dimensions 1540 to 1558  \\
     30 problems for each dimension  \\
     density of nonzeros .013 \\
     average iterations   4.1148  \\
     average cpu time 56.1322 sec  \\
     average work 1.75 times one Lanczos step\\
     comment: a multiple of the identity was added to get a positive
            definite  Hessian

\newpage
\item
dimensions 1500 to 1565  \\
     2 problems for each dimension  \\
     density of nonzeros .01  \\
     average iterations   4.2951  \\
     average cpu time 57.0429 sec  \\
     average work 4.8589 times one Lanczos step\\
     comment: hard case; multiplicity of smallest eigenvalue varied from 1 to 6

\item
dimensions 1800 to 1865  \\
     12 problems for each dimension  \\
     density of nonzeros .005  \\
     average iterations   4.2453  \\
     average cpu time 28.1692 sec  \\
     average work 4.2589 times one Lanczos step\\
     comment: hard case; multiplicity of smallest eigenvalue varied from 1 to 6
\end{enumerate}
\end{slide}
%\begin{slide}{}
%%This is slide one of two.
%\input{slide13}
%\end{slide}
%\begin{slide}{}
%%This is slide one of two.
%\input{slide131}
%\end{slide}
%\begin{slide}{}
%%This is slide one of two.
%\input{slide131}
%\end{slide}
%\begin{slide}{}
%%This is slide one of two.
%\input{slide132}
%\end{slide}
%\begin{slide}{}
%%This is slide one of two.
%\input{slide14}
%\end{slide}
%\begin{slide}{}
%%This is slide one of two.
%\input{slide141}
%\end{slide}
%\begin{slide}{}
%%This is slide one of two.
%\input{slide15}
%\end{slide}
%\begin{slide}{}
%%This is slide one of two.
%\input{slide151}
%\end{slide}
\end{document}