\begin{filecontents}{seceqn.sty}
% seceqn.sty
% From TeXhax 89.37
%
% Substyle file for use with "article" to cause equations to be numbered
% within sections.
%
% 13 Apr 89	Jerry Leichter
\typeout{Document Option `seceqn':  13 Apr 89}
\@addtoreset{equation}{section}		%Make equation=0 when section steps
\def\theequation{\thesection.\arabic{equation}}
					%How an equation number looks

% From NUMINSEC.STY (with SIAM stuff).
\@addtoreset{figure}{section}
\def\thefigure{\thesection.\@arabic\c@figure}

\@addtoreset{table}{section}
\def\thetable{\thesection.\@arabic\c@table}
\end{filecontents}

\documentclass[a4,landscape]{seminar}
\usepackage{semcolor}
\input{seminar.bug}
%\usepackage{theorem,exscale}
%\usepackage{fullpage}        % was not available on PC?
\usepackage{float}
\usepackage{psfrag}
%\usepackage{algorithmic}     % was not available on PC?
\usepackage{hyperref}
\usepackage{seceqn}
\usepackage{amssymb,epsfig}
\usepackage{amsmath,amsthm,amsfonts,latexsym,amssymb,epsfig}
\newenvironment{pf}{\parindent=0pt{\textbf{Proof: }}}{\hfill
  $\blacksquare$ \\} 
\newcounter{ctr}
\newtheorem{thm}{Theorem}[section]
\newtheorem{lemma}{Lemma}[section]
\newtheorem{cor}{Corollary}[section]
\newtheorem{con}{Conjecture}[section]
\newtheorem{prop}{Proposition}[section]
\newtheorem{defi}{Definition}[section]
\newtheorem{example}{Example}[section]
\newtheorem{rem}{Remark}[section]
\newtheorem{alg}{Algorithm}[section]
\newtheorem{ex}{Exercise}[section]
\newcommand{\adj}{{\rm adj\,}}
\newcommand{\trace}{{\rm trace\,}}
\newcommand{\spanl}{{\rm span\,}}
\newcommand{\tr}{{\rm trace\,}}
\newcommand{\Rn}{\mathbb{R}^{n}}
\newcommand{\Sn}{{\mathcal S^n\,}}
\newcommand{\beq}{\begin{equation}}
\newcommand{\eeq}{\end{equation}}
\newcommand{\beqr}{\begin{eqnarray}}
\newcommand\C{\mathbb C}
\newcommand{\NN}{\mathcal N}
\newcommand{\RR}{\mathbb R}
\newcommand{\benum}{\begin{enumerate}}
\newcommand{\eenum}{\end{enumerate}}
\newcommand{\st}{\textnormal{s.t.}}
\newcommand{\TRSe}{TRS$_=\;$}
\newcommand{\disp}{\displaystyle}
\newcommand{\QED}{\hfill ~\rule[-1pt] {8pt}{8pt}\par\medskip ~~}
%\newcommand{\bpr}{\textsc{proof: }}
\newcommand{\epr}{\QED}


% Nick's definitions.
\def\Rnbyn{\mathbb{R}^{n\times n}}
\def\R{\mathbb{R}}
\def\normF#1{\|#1\|_F}
\def\normt#1{\|#1\|_2}
\def\norm#1{\|#1\|}
\def\cm{correlation matrix}
\def\cms{correlation matrices}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% All refs in roman.
\def\eqref#1{{\normalfont(\ref{#1})}}

\begin{document}
\psfrag{t*}{$\scriptstyle t^*$}
\psfrag{kt}{$\scriptstyle k(t)$}
\psfrag{tvalues}{$\scriptstyle t-\text{values}$}
\psfrag{kvalues}{$\scriptstyle k-\text{values}$}
\psfrag{psit}{$\scriptstyle \psi (t)$}
\psfrag{psivalues}{$\scriptstyle \psi -\text{values}$}
\psfrag{t0}{$\scriptstyle t_0$}
\psfrag{tt_0}{$\scriptstyle t = t_0$}
\psfrag{(t0,phit0)}{$\scriptstyle (t_0,\phi (t_0)$}
\psfrag{(t*,0)}{$\scriptstyle (t^*,0)$}
\psfrag{(t*,k(t*))}{$\scriptstyle (t^*,k(t^*))$}
\psfrag{(t0,k(t0))}{$\scriptstyle (t_0,k(t_0))$}
\psfrag{sqrtss1111111}{$\scriptstyle \psi = \sqrt{s^2+1}-1$}
\psfrag{x}{$\scriptstyle f(x)$}
\psfrag{interpolationoft*}{$\scriptstyle \text{interpolation of}\; t^*$}
\psfrag{interpolationpoints}{$\scriptstyle \text{interpolation points}$}
\psfrag{tpsi}{$\scriptstyle t(\psi )$}
\psfrag{tt0t*}{$\scriptstyle t = t_0 = t^*$}
\psfrag{dlambda}{$\scriptstyle d(\lambda)$}
\psfrag{l=l1A}{$\scriptstyle \lambda = \lambda_1(A)$}
\psfrag{lambdavalues}{$\scriptstyle \lambda -\text{values}$}
\psfrag{dvalues}{$\scriptstyle d-\text{values}$}
\psfrag{l1At0}{$\scriptstyle (\lambda_1(A),t_0)$}
\psfrag{l=12A}{$\scriptstyle \lambda = \lambda_2(A)$}
\psfrag{interpolationpoints}{$\scriptstyle \text{interpolation points}$}
\psfrag{tnewmuup}{$\scriptstyle (t_{new},\mu_{up})$}
\psfrag{\tiny Pointsfromtheeasyandhardside}{$\scriptstyle \text{points from
 the easy and hard side}$}
\psfrag{newupperboundont*}{$\scriptstyle \text{new upper bound on} t^*$}
\psfrag{y0=0}{$\scriptstyle y_0 = 0$}
\psfrag{y0=1}{$\scriptstyle y_0 = 1$}
\psfrag{y0t}{$\scriptstyle y_0(t)$}
\psfrag{y0values}{$\scriptstyle y_0-\text{values}$}
\psfrag{kpvalues}{$\scriptstyle k'-\text{values}$}
\psfrag{kp1}{$\scriptstyle k' = -1$}
\psfrag{kprimes2}{$\scriptstyle k' = s^2$}
\psfrag{kpt}{$\scriptstyle k'(t)$}
\psfrag{t0omega0}{$\scriptstyle (t_0,\omega_0)$}









\begin{slide}


\begin{center}
{\bf 
A Survey of the Trust Region Subproblem\\ Within a\\ Semidefinite 
Programming Framework}
\end{center}

\vspace{5mm}

     Charles Fortin and Henry Wolkowicz \\

\vspace{5mm}

Department of Combinatorics \& Optimization \\
University of Waterloo

\vspace{10mm}

\mbox{
\begin{figure}
\psfig{file=UWlogori.ps,height=20mm}
\end{figure}
}






\end{slide}
\begin{slide}{}

\begin{center}
{\bf OUTLINE}
\end{center}
\begin{description}
\item $\bullet$
Background, Duality
\item $\bullet$
MS (1983) and GLTR (1999) algorithms within SDP Framework
\item $\bullet$
RW (1997) Algorithm (Revisited)
\item $\bullet$
Robustness, Exploiting Sparsity
\item $\bullet$
Numerics
\end{description}




\end{slide}
\begin{slide}{}



\section{Introduction:\\ Trust Region Subproblem}
\label{sect:intro}
\[
\mbox{(TRS)}\qquad
\begin{array}{lll}
q^* = & \min & q(x):= x^TAx - 2a^Tx \\
& \mbox{s.t.} & \Vert x \Vert \leq s .
\end{array}
\] 

~~\\
~~\\
~~\\
$A$ is: $n \times n$ symmetric (possibly indefinite) matrix\\
 $a$ is: $n$-vector\\
 $s$ is:  positive scalar (TR radius)\\
 $x$ is: $n$-vector of unknowns





\end{slide}
\begin{slide}{}


{\bf Many Applications}, e.g.:
~~\\
~~\\
$\bullet$ forming subproblems for constrained optimization\\
$\bullet$ regularization of ill-posed problems\\
$\bullet$ theoretical applications\\
$\bullet$ etc...\\
$\bullet$ {\bf AND trust region (TR) methods}
~~\\
~~\\
{\bf Many Advantages for TR}, e.g.:\\
second order optimality conditions~~\\
q-quadratic convergence
~~\\
{\bf BUT:} popularity?\\
sparsity?\\
hard case?




\end{slide}
\begin{slide}{}




\section{Optimality/Duality}
\label{sect:optcond}
$x^*$ optimal for TRS if and only if 
\begin{equation}
\label{eq:optTRS}
\begin{array}{ccccc}
\left.
\begin{array}{ccccc}
(A-\lambda^* I)x^* = a, \\
A-\lambda^* I \succeq 0, \lambda^* \leq 0
\end{array} \right\}  & \mbox{dual feasibility}\\
\Vert x^* \Vert^2\leq s^2  & \mbox{primal feasibility}\\
\lambda^* (s^2-\Vert x^* \Vert) = 0 & \mbox{complementary slackness}\\
\end{array}
\end{equation}
{\bf Surprising:}\\
$\bullet$ characterization of optimality of a possibly nonconvex problem\\
$\bullet$ 2nd order psd
necessary conditions hold on all of $\RR^n$




\end{slide}
\begin{slide}{}


\subsection{(Dual) Algorithmic Theme}



If $A-\lambda I \succ 0$, define
\begin{equation}
\label{eq:xlambda}
x(\lambda)=(A-\lambda I)^{-1}a \quad (\mbox{stationarity})
\end{equation}

If optimum is on the boundary and $A-\lambda^* I \succ 0$, then solve
\[
\psi(\lambda):=\Vert x(\lambda) \Vert - s=0 \quad (\mbox{feasibility})
\]

(maintain $A-\lambda I \succ 0, ~\lambda \leq 0$)



\end{slide}
\begin{slide}{}


\subsection{The Hard Case}
\label{subsect:hardcase}
$x(\lambda)=(A-\lambda I)^{\dagger}a$;
$\NN(\cdot)$ null space; ${\mathcal R}(\cdot)$ range space
\begin{enumerate}
\item
{\bf Easy Case}:
$a \notin {\mathcal R}(A-\lambda_1(A)I)$, \quad 
               \mbox{implies } $\lambda^* < \lambda_1(A)$
\item
{\bf Hard Case}:
$a \in {\mathcal R}(A-\lambda_1(A)I)$
\begin{enumerate}
\item
{\bf Hard Case (case 1)}:
$\lambda^* < \lambda_1(A)$
\item
{\bf Hard Case (case 2)}:
$\lambda^* = \lambda_1(A)$ (singularity)
\begin{enumerate}
\item
$\|(A-\lambda^* I)^{\dagger} a \| = s$ or $\lambda^* = 0$
(then the pair
$x^*=x(\lambda^*),\lambda^*$ satisfy the optimality conditions)
\item
$x(\lambda^*)=\|(A-\lambda^* I)^{\dagger} a \| < s, \lambda^* < 0$
(choose an eigenvector $z \in {\mathcal N}(A-\lambda^* I)$ with
$s=\|x(\lambda^*) + \tau z \|$; nonunique optimum)
\end{enumerate}
\end{enumerate}
\end{enumerate}



\end{slide}
\begin{slide}{}



{\tiny
\begin{table}[ht]
\begin{center}
\begin{tabular}{|l|l|l|}
\hline
1. Easy case & 2.(a) Hard case (case 1) & 2.(b) Hard case (case 2) \\
\hline
%\hspace{1mm} & & \\
 $a \notin \mathcal{R}(A-\lambda_1(A)I)$ & 
  $a\hspace{1mm} \bot\hspace{1mm} \mathcal{N}(A-\lambda_1(A)I)$
&
 $a\hspace{1mm} \bot\hspace{1mm} \mathcal{N}(A-\lambda_1(A)I)$ \\
& and &and  \\
(implies $\lambda^* < \lambda_1(A)$) & $\lambda^* < \lambda_1(A)$ & 
$\lambda^* = \lambda_1(A)$ \\
&& (i) $\|(A-\lambda^* I)^{\dagger} a \| = s$ or $\lambda^* = 0$\\
&& (ii) $\|(A-\lambda^* I)^{\dagger} a \| < s, \lambda^* < 0$\\
%\hspace{1mm} &  & %\\
\hline
\end{tabular}
\caption{\label{tabcases}The three different cases for the trust
region subproblem. We include two subcases (i) and (ii) for
the hard case (case 2).}
\label{table:cases}
\end{center}
\end{table}
}

\end{slide}
\begin{slide}{}

\begin{lemma}
\label{lem:hardcs2}
({\bf Handling the Hard Case.})
spectral decomposition of $A$:
$A=\sum_{i=1}^n \lambda_i(A) v_iv_i^T=P\Lambda P^T$,
$v_i$ othonormal eigenvectors;\\
$\bar{a}=P^Ta$. \\
$A_i := \sum_{i \in S_i}^n \lambda_i(A) v_iv_i^T, ~ i=1,2,3$\\
$S_1 = \{i: \bar{a}_i \neq 0\},~
S_2 = \{i:\bar{a}_i = 0,~\lambda_i(A)=\lambda_1(A)\}$\\
$S_3 = \{i: \bar{a}_i = 0, \lambda_i(A)>\lambda_1(A)\}$.\\
Define projections
$P_i := \sum_{i \in S_i}^n  v_iv_i^T, ~ i=1,2,3$. Then \\
$x^*$ solves TRS {\bf if and only if}
\[
P_3x^*=0, \mbox{ and } x^* \mbox{ solves TRS when } A \mbox{ is
replaced by } A_1.
\]
Moreover, $x^*$ solves TRS implies: 
$P_2x^*=0$ unless the hard case (case 2(ii)) holds.
\end{lemma}

\end{slide}
\begin{slide}{}



\subsubsection{Hard Case (case 2); Ill-Posed?}
Note: $A-\lambda^* I$ is singular.\\
Is near hard case ill-conditioned?

\begin{lemma}
\label{lem:shift}
Suppose that the hard case (case 2 (ii)) holds for TRS. Then $\lambda_1(A)
< 0$ and TRS is equivalent to the following stable convex program
\[
\mbox{(TRS$_s$)}\quad
\begin{array}{lll}
q_s^* := & \min & q_s(x):= x^T(A-\lambda_1(A))x - 2a^Tx +s^2 \lambda_1(A)\\
& \mbox{s.t.} & \Vert x \Vert \leq s.
\end{array}
\]
The equivalence is in the sense that
the optimal values satisy $q^*=q^*_s$; and
$x^*$ solves TRS if and only if 
$x^*=x^*_s+z$ with $x^*_s$ optimal for TRS$_s$ and $z \in {\mathcal N}
(A-\lambda_1(A))$.
\QED
\end{lemma}




\end{slide}
\begin{slide}{}




\section{The Mor\'{e}-Sorensen (MS) Algorithm\\ 1983}
\label{sect:moreandsorensen}

\begin{quote}
\noindent
{\bf Outline}:\\
(i) Use safeguarding/updating: reduce interval of uncertainty 
$[\lambda_L,\lambda_U]$ for $\lambda^*$ and improve upper bound $\lambda_S$ 
for $\lambda_1(A)$\\
(ii) Take a Newton step to
implicitly solve $\Vert (A-\lambda I)^{-1}a \Vert = s$ for $\lambda$.\\
(iii) If the possibility of the hard case (case 2) is detected 
($\|x(\lambda)\| < s$), then take a  {\em primal step}
to the boundary while simultaneously reducing the objective
function.\\
(iv)   Check if  the (unique) unconstrained minimizer
exists and is in the strict interior of the trust region.
\end{quote}




\end{slide}
\begin{slide}{}



For $\psi(\lambda)=\Vert x(\lambda) \Vert - s$.
Use orthogonal diagonalization of $A$:
\[
\psi(\lambda)=\Vert Q(\Lambda-\lambda I)^{-1} Q^Ta\Vert - s
  \approx \frac {c_1}{\lambda_1(A)-\lambda} + d,
\]
for some constants $c_1>0,d$. This function is highly nonlinear for
values of $\lambda$ near $\lambda_1(A)$.
convergence for Newton's method.\\
MS use so-called secular equation
\begin{equation}
\label{phi}
\phi(\lambda):= \frac{1}{s} - \frac{1}{\Vert x(\lambda) \Vert} =0.
\end{equation}
(Reinsch and Hebden) since
rational structure of $\Vert x(\lambda)\Vert^2$, 
shows that this function is less nonlinear
\[
\phi(\lambda)\approx \frac{1}{s} - 
  \frac {\lambda_1(A)-\lambda}{c_2},
\]
for some $c_2>0$.
Also $\phi(\lambda)$ is a convex function strictly increasing 
on $(-\infty, \lambda_1(A))$.




\end{slide}
\begin{slide}{}



compute the Newton direction 

\begin{alg}
\label{MoSoeasy}
Assume $\lambda_k \leq 0$ and $A - \lambda_k I \succ 0$
(i.e. $\lambda_k < \lambda_1(A))$.
\benum
 \item Factor $A-\lambda_k I = R^TR$ (Cholesky factorization).
 \item Solve, for $x$, $\;R^TRx = a\;$ ( $x=x(\lambda_k)$).
 \item Solve, for $y$, $\;R^Ty = x$.
 \item Let $\lambda_{k+1} = \lambda_k - \left[\frac{\Vert x \Vert}{\Vert 
      y \Vert}\right]^2\left[\frac{(\Vert x \Vert - s)}{s}\right]$
(Newton step).
\eenum
\end{alg}


\end{slide}
\begin{slide}{}


\begin{figure}
\epsfxsize=250pt
\centerline{\epsfbox{secular.ps}}
%%\includegraphics[totalheight=3in]{meritplot.pdf}
%%\includegraphics[width=6in]{meritplot.pdf}
\caption{Newton's method with the secular function, $\phi(\lambda)$.}
\label{fig:secular}
\end{figure}



\end{slide}
\begin{slide}{}


\subsection{Handling the Hard Case}
\begin{lemma}[Primal step to the boundary]
\label{primstep}
Let $0 < \sigma < 1$ be given and suppose that
\begin{equation}
\label{eq:dualfeaslem}
A - \lambda I = R^TR,\quad (A-\lambda I)x = a, \quad \lambda \leq 0.
\end{equation}
Let $z \in \RR^n$ satisfy
\begin{equation}
\label{eq:primfeaslem}
\Vert x+z \Vert^2 = s^2
\end{equation}
and
\begin{equation}
\label{eq:Restimate}
\Vert Rz \Vert^2 \leq \sigma (\Vert Rx
\Vert^2-\lambda s^2).
\end{equation}
Then
\begin{equation}
\label{approx}
\vert q(x + z) - q(x^*) \vert \leq \sigma\vert q(x^*) \vert.
\end{equation}
where $x^*$ is optimal for TRS.
\epr
\end{lemma}



\end{slide}
\begin{slide}{}




\section{Generalized Lanczos Trust Region 1999 (GLTR) Algorithm}
\label{sect:lanczos}
GLTR (or GLRT)
method of Gould, Lucidi, Roma and Toint\\
focus on exploiting sparsity\\
Lanczos tridiagonalization of the matrix $A$\\
solve a {\em sequence} of restricted problems
\begin{equation}
\label{compromise}
\begin{array}{ll}
\min & q(x) \\
\st & \Vert x \Vert \leq s \\
     & x \in S, \quad \mbox{Krylov subspace}
\end{array}
\end{equation}



\end{slide}
\begin{slide}{}


\begin{quote}
\noindent
{\bf Outline}:\\
(i) Apply the conjugate-gradient method to
$q(\cdot)$ until the boundary is reached; or until a
global minimizer is found; or
a direction of nonpositive curvature is found to move to the boundary.\\
(ii)
Solve the TRS subproblem
\begin{equation}
\label{compromiseequal}
(TRS_{\mbox{sub}}) \quad
\begin{array}{ll}
\min & q(x) \\
\st & \Vert x \Vert \leq s \\
     & x \in S\equiv \mathcal{K}_k,
\end{array}
\end{equation}
where $\mathcal{K}_k := \textnormal{span}\{
a,Aa,A^2a,A^3a,\ldots ,A^ka\}$\\
(iii)
Increase $k$, the size of the Krylov subspace using the conjugate
gradient method.
\end{quote}



\end{slide}
\begin{slide}{}

Efficiency based on efficiently finding
approximate solutions of TRS$_{\mbox{sub}}$ \eqref{compromiseequal},
or equivalently the following tridiagonal TRS
\begin{equation}
\label{tridequivalent2}
\begin{array}{ll}
\min & h^T T_kh - 2 \gamma_0 e_1^Th \\
\st & \Vert h \Vert \leq s, \\
\end{array}
\end{equation}
where
\[
T_k = \left[
\begin{array}{ccccc}
\delta_0 & \gamma_1 \\
\gamma_1 & \delta_1 & . \\
& . & . & . \\
&& . & \delta_{k-1} & \gamma_k \\
&&   & \gamma_{k}   & \delta_k
\end{array}
\right],
\] 
$\delta_k = q_k^TAq_k$, $\gamma_0 = \Vert a \Vert$, $q_{-1} = 0$, $q_0 
=a/ \Vert a \Vert$, $\gamma_k =
\Vert (A-q_{k-1}^TAq_{k-1} I)q_{k-1} - \gamma_{k-1}q_{k-2} \Vert$, and
$q_k =  ((A-q_{k-1}^TAq_{k-1} I)q_{k-1} - \gamma_{k-1}q_{k-2})/\gamma_k$, 
for $k \geq 1$. 


\end{slide}
\begin{slide}{}

\section{Duality; an SDP Framework for TRS}
\label{sect:framework}
For simplicity, use equality TRS:
\begin{equation}
\label{TRS=}
\mbox{(TRS$_=$)}\quad
\begin{array}{lll}
q^* = & \min & q(x) \\
      & \st & \Vert x \Vert = s, \\
\end{array}
\end{equation}
strong Lagrangian duality holds for \TRSe, i.e.
\begin{equation}
\label{LD}
q^* = \nu^*:=\disp{\max_{\lambda} \; \min_{x} \; L(x,\lambda)},
\end{equation}
where the Lagrangian is
$L(x,\lambda) := x^T(A-\lambda I)x -2a^Tx + \lambda s^2$.

Define
\[
h(\lambda) := \lambda s^2 -a^T(A-\lambda I)^{\dagger}a. 
\]



\end{slide}
\begin{slide}{}


Wolfe dual implies
\begin{thm}
\label{LagrDual}
The Lagrangian dual for \TRSe is
\begin{equation}
\label{hdual}
(D) \qquad 
   \disp{q^* = \; \sup_{ A-\lambda I \succ 0} \; h(\lambda)}.
\end{equation}
In the easy case and hard case (case 1), the $\sup$ can be replaced by
a $\max$. 
\end{thm}



\end{slide}
\begin{slide}{}


\subsection{Unconstrained and Linear Duals}
\label{subsect:unconstrdual}
Homogenizing \TRSe
\begin{equation}
\label{homogen}
\begin{array}{lll}
q^*= & \min & x^TAx-2y_0 a^Tx \\
     & \st  & \Vert x \Vert ^2 = s^2, \\
     &      & y_0^2 = 1.
\end{array}
\end{equation}



\end{slide}
\begin{slide}{}

Therefore
\[
\begin{array}{rcl}
& & q^* \mbox{ is }\\
&=&
\max\limits_{t} \; \min\limits_{\Vert x \Vert ^2 = s^2,y_0^2=1} \; x^TAx
  -2y_0a^Tx +t(y_0^2-1)\\
&\geq & \max\limits_{t} \; \min\limits_{\Vert x \Vert ^2 + y_0^2 = s^2+1} \;
  x^TAx -2y_0a^Tx +t(y_0^2-1) \\
&\geq &\; \sup\limits_{t,\lambda} \; \min\limits_{x,y_0} \; x^TAx -2y_0a^Tx +t(y_0^2-1)
  +\lambda(\Vert x \Vert ^2 + y_0^2 - s^2 - 1) \\
 &=&  \sup\limits_{ r,\lambda} \; \min\limits_{x,y_0} \; x^TAx -2y_0a^Tx +r(y_0^2-1)
  +\lambda(\Vert x \Vert ^2 - s^2) \\
& =& \sup\limits_{\lambda} \left(  \sup\limits_{r} \; \min\limits_{x,y_0} \; x^TAx
    -2y_0a^Tx +r(y_0^2-1) +\lambda(\Vert x \Vert ^2 - s^2) \right)
\end{array}
\]
where $r= t + \lambda$.

\end{slide}
\begin{slide}{}

By Strong Duality
\[
\begin{array}{rcl}
q^*&=&\disp{\sup_{\lambda} \left( \min_{x,y_0} \; \sup_{r} \; x^TAx
    -2y_0a^Tx +r(y_0^2-1) +\lambda(\Vert x \Vert ^2 - s^2) \right) } \\
 &=&  \disp{\sup_{\lambda} \; \min_{x,y_0^2=1} \; x^TAx -2y_0a^Tx
  +\lambda(\Vert x \Vert ^2 - s^2)}  \\
&=&\disp{\min_{x,y_0^2=1} \; \sup_{\lambda} \; x^TAx -2y_0a^Tx
  +\lambda(\Vert x \Vert ^2 - s^2)}\\
&= & \min\limits_{\Vert x \Vert ^2 = s^2,y_0^2 = 1}  x^TAx-2y_0 a^Tx  \\
&= & q^*.
\end{array} 
\]



\end{slide}
\begin{slide}{}


All of the above are equal, and
\[\disp{q^* =  \max_{t} \; \min_{\Vert x \Vert ^2 + y_0^2 = s^2+1} \;
  x^TAx -2y_0a^Tx +t(y_0^2-1)}\]
\[\disp{=  \max_{t} \min_{\Vert z \Vert ^2 = s^2+1} \; z^TD(t)z -t} =
 \max_{t} \; (s^2+1)\lambda_1(D(t)) -t\]
where $z = \left( \begin{array}{c} y_0 \\ x \end{array} \right)$ and 
$D(t) = \left( \begin{array}{cc} t & -a^T \\ -a & A \end{array} \right).$

Define 
\begin{equation}
\label{eq:ktfunction}
k(t):= (s^2+1)\lambda_1(D(t)) -t,
\end{equation}
An unconstrained dual problem for TRS:
\begin{equation}
\label{UD}
\disp{\max_{t} \; k(t)},  \quad \mbox{(concave, coercive)}
\end{equation}



\end{slide}
\begin{slide}{}


rewrite into a linear semidefinite program:
\begin{equation}
\label{UDsdp}
\disp{\max_{ D(t) \succeq \lambda I} \; (s^2+1)\lambda - t}.
\end{equation}
Equivalently,
\begin{equation}
\label{DDequiv}
\begin{array}{lll}
q^* = & \max &  (s^2+1)\lambda - t \\
      & \st  & \lambda I - t E_{00} \preceq D(0),
\end{array}
\end{equation}
and its dual
\begin{equation}
\label{DDequivdual}
\begin{array}{lll}
q^* = & \min &  \trace D(0) Y\\
      & \st  & \trace Y=s^2+1 \\
      &      & -Y_{00}=-1 \\
       &&  Y \succeq 0.
\end{array}
\end{equation}
(Slater's holds for both - stable problems!)


\end{slide}
\begin{slide}{}


\section{The Rendl-Wolkowicz 1997 (RW) Algorithm, Revisited}
\label{sect:RWalgorithm}
\subsection{Three Useful Functions}
\subsubsection{$k(t) = (s^2+1)\lambda_1(D(t))-t \longrightarrow \max$}

\[\lim_{t\rightarrow\infty} \lambda_1(D(t))=\lambda_1(A) \ \textnormal{and}
\ \lim_{t \rightarrow -\infty} (\lambda_1(D(t))-t)=0,\]
asymptotic behavior of $k(t)$ is
\[
\begin{array}{lll} 
k(t) \sim (s^2+1)\lambda_1(A) - t, & \textnormal{as} \
  t\rightarrow\infty 
& \mbox{(linear with slope $-1$)}\\
k(t)\sim s^2t, & \textnormal{as} \ t\rightarrow -\infty 
& \mbox{(linear with slope $s^2$)}
\end{array}
\]




\end{slide}
\begin{slide}{}

\begin{figure}[!h]
\centering
\epsfig{file=kteasy.ps,width=10cm,height=6cm}
\caption{\label{kt_easycase}$k(t)$ in the easy case}
\end{figure}



\end{slide}
\begin{slide}{}



\begin{figure}[!h]
\centering
\epsfig{file=kthard1.ps,width=9cm,height=6cm}
\caption{\label{kt_hardcase1}$k(t)$ in the hard case (case 1)}
\end{figure}


\end{slide}
\begin{slide}{}


\begin{figure}[!h]
\centering
\epsfig{file=kthard2.ps,width=9cm,height=6cm}
\caption{\label{kt_hardcase2}$k(t)$ in the hard case (case 2)}
\end{figure}

\end{slide}
\begin{slide}{}

\subsubsection{$k^\prime(t) = (s^2+1)y_0(t)^2-1$}
$y(t)$ normalized eigenvector for $\lambda_1(D(t))$\\
$y(t)=\left(
\begin{array}{c}  y_0(t) \\ x(t) \end{array} \right)$\\
\[
\frac 1{y_0(t)} \|x(t)\|=s \mbox{ if and only if } k^\prime (t)=0
\]



\end{slide}
\begin{slide}{}


\begin{thm}
\label{eigenvectors}
Let y(t) be a normalized eigenvector for $\lambda_1(D(t))$ 
and let $y_0(t)$ be
its first component. Then:
\begin{enumerate}
\item
{\bf In the easy case}: for $t \in \RR$, $y_0(t) \not= 0$; 
\item
{\bf In the hard case}: 
\begin{enumerate}
\item
{\bf for $t<t_0$}: $y_0(t) \not=0$; 
\item
{\bf for $t>t_0$}: 
$y_0(t) = 0$; 
\item
{\bf for $t=t_0$}: there exists a basis of eigenvectors for
the eigenspace of $\lambda_1(D(t_0))$, such that
one eigenvector of this basis,
$\omega$, has a non-zero first component ($\omega_0\not=0$) and the other
eigenvectors in the basis have a zero first component ($y_0(t) =0$).
\end{enumerate}
\end{enumerate}
\end{thm}



\end{slide}
\begin{slide}{}


In the easy case,
\begin{quote}
$y_0(t)$ is strictly monotonically decreasing,\\
$y_0(t) \rightarrow 1$ as $t \rightarrow -\infty$\\
 and $y_0(t) \rightarrow 0$ as $t \rightarrow \infty$.  
\end{quote}
In particular, $y_0(t) \not= 0$ for all $t \in \mathbb{R}$.



\end{slide}
\begin{slide}{}


\begin{figure}[!h]
\centering
\epsfig{file=y0teasy.ps,width=9cm,height=6cm}
\caption{\label{y0t_easycase}$y_0(t)$ in the easy case}
\end{figure}



\end{slide}
\begin{slide}{}



\begin{figure}[!h]
\centering
\epsfig{file=y0thard.ps,width=9cm,height=6cm}
\caption{\label{y0t_hardcase}$y_0(t)$ in the hard case}
\end{figure}


\end{slide}
\begin{slide}{}


\begin{figure}[!h]
\centering
\epsfig{file=kprimeeasy.ps,width=9cm,height=6cm}
\caption{\label{kprime_easycase}$k^\prime(t)$ in the easy case}
\end{figure}



\end{slide}
\begin{slide}{}



\begin{figure}[!h]
\centering
\epsfig{file=kprimehard1.ps,width=9cm,height=6cm}
\caption{\label{kprime_hardcase}$k^\prime(t)$ in the hard case
  (\textit{case 1})}
\end{figure}

\end{slide}
\begin{slide}{}


\subsubsection{$\psi (t) = \sqrt{s^2+1}-\frac{1}{y_0(t)}$}
Solving $\psi (t) = 0$ or $k^\prime(t) = 0$ is equivalent.
\begin{figure}[!h]
\centering
\epsfig{file=psieasy.ps,width=8cm,height=5cm}
\caption{\label{psit_easycase}$\psi (t)$ in the easy case}
\end{figure}


\end{slide}
\begin{slide}{}


\begin{figure}[!h]
\centering
\epsfig{file=psihard1.ps,width=9cm,height=6cm}
\caption{\label{psit_hardcase1}$\psi (t)$ in the hard case
  (\textit{case 1})}
\end{figure}


\end{slide}
\begin{slide}{}


\begin{figure}[!h]
\centering
\epsfig{file=psihard2.ps,width=9cm,height=6cm}
\caption{\label{psit_hardcase2}$\psi (t)$ in the hard case
(\textit{case 2})}
\end{figure}



\end{slide}
\begin{slide}{}


assume that $y_0 > 0$
\begin{equation}
\mbox{\bf Easy Case Indicators:} \quad \left\{
\begin{array}{rcl}
\phi(\lambda) &>& 0\\
k^{\prime}(t) &<& 0 \quad ((s^2+1)y_0^2-1 < 0 )\\
y_0 &<& \frac 1{\sqrt{s^2+1}}\\
t &>& t^*\\
\lambda &>& \lambda^*\\
\|x(\lambda)\| &> & s \\
h^\prime (\lambda) &< & 0.
\end{array}\right.
\end{equation}



\end{slide}
\begin{slide}{}


\section{Flowchart (RW Revisited)}
\label{subsect:flowchart}

{\bf INITIALIZATION:}
  \begin{enumerate}
    \item Compute $\lambda_1(A)$
and corresponding eigenvector $v_1$.
\begin{quote}
If $a^Tv_1$ is small (near hard case), then deflate, i.e. add  the
vector $y_1= \left( \begin{array}{c}  0\\v_1  \end{array}  \right)$ to the set
$\mathcal Y$.
\end{quote}
    \item Obtain bounds on $q^*,\lambda^*$ and $t^*$. 
\begin{quote}
   If $\lambda_1 > 0$, test if the
            optimum is an unconstrained minimizer. EXIT here if so.
\end{quote}
    \item Initialize parameters and the stopping criteria based on the
optimality conditions, duality gap, and intervals of uncertainty.
  \end{enumerate}



\end{slide}
\begin{slide}{}


{\bf ITERATION LOOP:} (until convergence to the desired tolerance or
  until we find the solution is the unconstrained minimizer.)
  \begin{enumerate}
    \item {\bf FIND a NEW VALUE of $t$.}
    \begin{enumerate}
       \item Set $t$ using Newton's method on $k(t)-M_t$
if possible; otherwise set it to the the midpoint (default) of the
interval of uncertainty for $t$.

       \item If points from the hard and easy side are available:
       \begin{enumerate}
          \item Do TRIANGLE INTERPOLATION (Update
upper bound on $q_{*}$ and set $t$, if possible.)
          \item Do VERTICAL CUT (Update
lower or upper bound for interval of uncertainty for $t$.)
       \end{enumerate}
       \item
                    Do INVERSE INTERPOLATION (Set $t$, if possible)
    \end{enumerate}
        \item
\newpage
            {\bf UPDATE}
    \begin{enumerate}
        \item
    
                    With new $t$, compute 
(with restarts)
$\lambda=\lambda_{1}(D(t))$, corresponding
                    eigenvector $y$, $y_0\geq 0$. ($y$ orthogonal to
${\mathcal Y}$)
        \item
                    $\lambda>0$ and $y_0 > 1/(s^2+1)$ implies solution is
                    unconstrained minimizer. Use CG,
                    EXIT.
        \item
                    Update bounds on interval of uncertainty of $q^*$.
        \item
         \begin{enumerate}
        \item
                    If $y_0$ is small, then deflate,
i.e. add $y$ to $\mathcal Y$. 
        \item
                    elseif $t$ is on easy side, update parameters.
Take primal step to boundary if hard side point exists.
        \item
                    elseif t is on hard side, update parameters.
Take primal step to boundary from this hard side point.
    \end{enumerate}
        \item
                    Save new bounds and update stopping criteria.
    \end{enumerate}
  \end{enumerate}
   {\bf END ITERATION LOOP}\\


\end{slide}
\begin{slide}{}

\subsection{Techniques used in the algorithm}
\label{techniques}


\subsubsection{Newton's Method on $k(t)-M_t=0$}
\[
t_+ = t_c- \frac{k(t_c)-M_t}{k^\prime (t_c)}
=\frac{ (s^2+1)(t_cy_0^2(t_c)-\lambda(D(t_c))-M_t}{(s^2+1)y_0(t_c)^2-1}.
\]



\end{slide}
\begin{slide}{}


\subsubsection{Triangle Interpolation using $k(t)$}
Reduce interval of uncertainty of $t$; improve upper bound on $q^*$.
\begin{figure}[!h]
\centering
\epsfig{file=triangleinter.ps,width=8.8cm,height=6cm}
\end{figure}

\end{slide}
\begin{slide}{}


\subsubsection{Vertical Cut}
\begin{figure}[!h]
\centering
\epsfig{file=verticalcut.ps,width=9cm,height=6cm}
\caption{\label{verticalcut}Vertical cut}
\end{figure}



\end{slide}
\begin{slide}{}


\subsubsection{Inverse Interpolation}
\[
\left[\begin{array}{ccc}
  \psi_1^2 & \psi_1 & 1\\
  \psi_2^2 & \psi_2 & 1\\
  \psi_3^2 & \psi_3 & 1
\end{array} \right]
\left[\begin{array}{ccc}
a \\
b \\
t_{\mbox{new}} 
\end{array} \right]
=
\left[\begin{array}{ccc}
t_1 \\
t_2 \\
t_3 
\end{array} \right]
\]


\end{slide}
\begin{slide}{}


\begin{figure}[!h]
\centering
\epsfig{file=inverseintereasy.ps,width=9cm,height=6cm}
\caption{\label{tpsi_easycase}Inverse interpolation; easy case \& 
hard case (\textit{case 1})}
\end{figure}

\end{slide}
\begin{slide}{}

\begin{figure}[!h]
\centering
\epsfig{file=inverseinterhard2.ps,width=9cm,height=6cm}
\caption{\label{tpsi_hardcase2}Inverse interpolation in the hard case
  (\textit{case 2})}
\end{figure}


\end{slide}
\begin{slide}{}


\begin{figure}[!h]
\centering
\epsfig{file=cputime.ps,width=9cm,height=6cm}
\caption{\label{cputime}cputime in the hard case
  (\textit{case 2})}
\end{figure}


\end{slide}
\begin{slide}{}


\begin{figure}[!h]
\centering
\epsfig{file=logcputime.ps,width=9cm,height=6cm}
\caption{\label{tpsi_hardcase2}log of cputime in the hard case
  (\textit{case 2})}
\end{figure}


\end{slide}{}

\end{document}
