htswanalysis/MACS/lib/gsl/gsl-1.11/eigen/francis.c

   1 /* eigen/francis.c
   2  *
   3  * Copyright (C) 2006, 2007 Patrick Alken
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 3 of the License, or (at
   8  * your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with this program; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  */
  19
  20 #include <config.h>
  21 #include <stdlib.h>
  22 #include <math.h>
  23 #include <gsl/gsl_eigen.h>
  24 #include <gsl/gsl_linalg.h>
  25 #include <gsl/gsl_math.h>
  26 #include <gsl/gsl_blas.h>
  27 #include <gsl/gsl_vector.h>
  28 #include <gsl/gsl_vector_complex.h>
  29 #include <gsl/gsl_matrix.h>
  30 #include <gsl/gsl_cblas.h>
  31 #include <gsl/gsl_complex.h>
  32 #include <gsl/gsl_complex_math.h>
  33
  34 /*
  35  * This module computes the eigenvalues of a real upper hessenberg
  36  * matrix, using the classical double shift Francis QR algorithm.
  37  * It will also optionally compute the full Schur form and matrix of
  38  * Schur vectors.
  39  *
  40  * See Golub & Van Loan, "Matrix Computations" (3rd ed),
  41  * algorithm 7.5.2
  42  */
  43
  44 /* exceptional shift coefficients - these values are from LAPACK DLAHQR */
  45 #define GSL_FRANCIS_COEFF1        (0.75)
  46 #define GSL_FRANCIS_COEFF2        (-0.4375)
  47
  48 static inline void francis_schur_decomp(gsl_matrix * H,
  49                                         gsl_vector_complex * eval,
  50                                         gsl_eigen_francis_workspace * w);
  51 static inline size_t francis_search_subdiag_small_elements(gsl_matrix * A);
  52 static inline int francis_qrstep(gsl_matrix * H,
  53                                  gsl_eigen_francis_workspace * w);
  54 static inline void francis_schur_standardize(gsl_matrix *A,
  55                                              gsl_complex *eval1,
  56                                              gsl_complex *eval2,
  57                                              gsl_eigen_francis_workspace *w);
  58 static inline size_t francis_get_submatrix(gsl_matrix *A, gsl_matrix *B);
  59 static void francis_standard_form(gsl_matrix *A, double *cs, double *sn);
  60
  61 /*
  62 gsl_eigen_francis_alloc()
  63
  64 Allocate a workspace for solving the nonsymmetric eigenvalue problem.
  65 The size of this workspace is O(1)
  66
  67 Inputs: none
  68
  69 Return: pointer to workspace
  70 */
  71
  72 gsl_eigen_francis_workspace *
  73 gsl_eigen_francis_alloc(void)
  74 {
  75   gsl_eigen_francis_workspace *w;
  76
  77   w = (gsl_eigen_francis_workspace *)
  78       calloc (1, sizeof (gsl_eigen_francis_workspace));
  79
  80   if (w == 0)
  81     {
  82       GSL_ERROR_NULL ("failed to allocate space for workspace", GSL_ENOMEM);
  83     }
  84
  85   /* these are filled in later */
  86
  87   w->size = 0;
  88   w->max_iterations = 0;
  89   w->n_iter = 0;
  90   w->n_evals = 0;
  91
  92   w->compute_t = 0;
  93   w->Z = NULL;
  94   w->H = NULL;
  95
  96   return (w);
  97 } /* gsl_eigen_francis_alloc() */
  98
  99 /*
 100 gsl_eigen_francis_free()
 101   Free francis workspace w
 102 */
 103
 104 void
 105 gsl_eigen_francis_free (gsl_eigen_francis_workspace *w)
 106 {
 107   free(w);
 108 } /* gsl_eigen_francis_free() */
 109
 110 /*
 111 gsl_eigen_francis_T()
 112   Called when we want to compute the Schur form T, or no longer
 113 compute the Schur form T
 114
 115 Inputs: compute_t - 1 to compute T, 0 to not compute T
 116         w         - francis workspace
 117 */
 118
 119 void
 120 gsl_eigen_francis_T (const int compute_t, gsl_eigen_francis_workspace *w)
 121 {
 122   w->compute_t = compute_t;
 123 }
 124
 125 /*
 126 gsl_eigen_francis()
 127
 128 Solve the nonsymmetric eigenvalue problem
 129
 130 H x = \lambda x
 131
 132 for the eigenvalues \lambda using algorithm 7.5.2 of
 133 Golub & Van Loan, "Matrix Computations" (3rd ed)
 134
 135 Inputs: H    - upper hessenberg matrix
 136         eval - where to store eigenvalues
 137         w    - workspace
 138
 139 Return: success or error - if error code is returned,
 140         then the QR procedure did not converge in the
 141         allowed number of iterations. In the event of non-
 142         convergence, the number of eigenvalues found will
 143         still be stored in the beginning of eval,
 144
 145 Notes: On output, the diagonal of H contains 1-by-1 or 2-by-2
 146        blocks containing the eigenvalues. If T is desired,
 147        H will contain the full Schur form on output.
 148 */
 149
 150 int
 151 gsl_eigen_francis (gsl_matrix * H, gsl_vector_complex * eval,
 152                    gsl_eigen_francis_workspace * w)
 153 {
 154   /* check matrix and vector sizes */
 155
 156   if (H->size1 != H->size2)
 157     {
 158       GSL_ERROR ("matrix must be square to compute eigenvalues", GSL_ENOTSQR);
 159     }
 160   else if (eval->size != H->size1)
 161     {
 162       GSL_ERROR ("eigenvalue vector must match matrix size", GSL_EBADLEN);
 163     }
 164   else
 165     {
 166       const size_t N = H->size1;
 167       int j;
 168
 169       /*
 170        * Set internal parameters which depend on matrix size.
 171        * The Francis solver can be called with any size matrix
 172        * since the workspace does not depend on N.
 173        * Furthermore, multishift solvers which call the Francis
 174        * solver may need to call it with different sized matrices
 175        */
 176       w->size = N;
 177       w->max_iterations = 30 * N;
 178
 179       /*
 180        * save a pointer to original matrix since francis_schur_decomp
 181        * is recursive
 182        */
 183       w->H = H;
 184
 185       w->n_iter = 0;
 186       w->n_evals = 0;
 187
 188       /*
 189        * zero out the first two subdiagonals (below the main subdiagonal)
 190        * needed as scratch space by the QR sweep routine
 191        */
 192       for (j = 0; j < (int) N - 3; ++j)
 193         {
 194           gsl_matrix_set(H, (size_t) j + 2, (size_t) j, 0.0);
 195           gsl_matrix_set(H, (size_t) j + 3, (size_t) j, 0.0);
 196         }
 197
 198       if (N > 2)
 199         gsl_matrix_set(H, N - 1, N - 3, 0.0);
 200
 201       /*
 202        * compute Schur decomposition of H and store eigenvalues
 203        * into eval
 204        */
 205       francis_schur_decomp(H, eval, w);
 206
 207       if (w->n_evals != N)
 208         return GSL_EMAXITER;
 209
 210       return GSL_SUCCESS;
 211     }
 212 } /* gsl_eigen_francis() */
 213
 214 /*
 215 gsl_eigen_francis_Z()
 216
 217 Solve the nonsymmetric eigenvalue problem for a Hessenberg
 218 matrix
 219
 220 H x = \lambda x
 221
 222 for the eigenvalues \lambda using the Francis double-shift
 223 method.
 224
 225 Here we compute the real Schur form
 226
 227 T = Q^t H Q
 228
 229 with the diagonal blocks of T giving us the eigenvalues.
 230 Q is the matrix of Schur vectors.
 231
 232 Originally, H was obtained from a general nonsymmetric matrix
 233 A via a transformation
 234
 235 H = U^t A U
 236
 237 so that
 238
 239 T = (UQ)^t A (UQ) = Z^t A Z
 240
 241 Z is the matrix of Schur vectors computed by this algorithm
 242
 243 Inputs: H    - upper hessenberg matrix
 244         eval - where to store eigenvalues
 245         Z    - where to store Schur vectors
 246         w    - workspace
 247
 248 Notes: 1) If T is computed, it is stored in H on output. Otherwise,
 249           the diagonal of H will contain 1-by-1 and 2-by-2 blocks
 250           containing the eigenvalues.
 251
 252        2) The matrix Z must be initialized to the Hessenberg
 253           similarity matrix U. Or if you want the eigenvalues
 254           of H, initialize Z to the identity matrix.
 255 */
 256
 257 int
 258 gsl_eigen_francis_Z (gsl_matrix * H, gsl_vector_complex * eval,
 259                      gsl_matrix * Z, gsl_eigen_francis_workspace * w)
 260 {
 261   int s;
 262
 263   /* set internal Z pointer so we know to accumulate transformations */
 264   w->Z = Z;
 265
 266   s = gsl_eigen_francis(H, eval, w);
 267
 268   w->Z = NULL;
 269
 270   return s;
 271 } /* gsl_eigen_francis_Z() */
 272
 273 /********************************************
 274  *           INTERNAL ROUTINES              *
 275  ********************************************/
 276
 277 /*
 278 francis_schur_decomp()
 279   Compute the Schur decomposition of the matrix H
 280
 281 Inputs: H     - hessenberg matrix
 282         eval  - where to store eigenvalues
 283         w     - workspace
 284
 285 Return: none
 286 */
 287
 288 static inline void
 289 francis_schur_decomp(gsl_matrix * H, gsl_vector_complex * eval,
 290                      gsl_eigen_francis_workspace * w)
 291 {
 292   gsl_matrix_view m;   /* active matrix we are working on */
 293   size_t N;            /* size of matrix */
 294   size_t q;            /* index of small subdiagonal element */
 295   gsl_complex lambda1, /* eigenvalues */
 296               lambda2;
 297
 298   N = H->size1;
 299   m = gsl_matrix_submatrix(H, 0, 0, N, N);
 300
 301   while ((N > 2) && ((w->n_iter)++ < w->max_iterations))
 302     {
 303       q = francis_search_subdiag_small_elements(&m.matrix);
 304
 305       if (q == 0)
 306         {
 307           /*
 308            * no small subdiagonal element found - perform a QR
 309            * sweep on the active reduced hessenberg matrix
 310            */
 311           francis_qrstep(&m.matrix, w);
 312           continue;
 313         }
 314
 315       /*
 316        * a small subdiagonal element was found - one or two eigenvalues
 317        * have converged or the matrix has split into two smaller matrices
 318        */
 319
 320       if (q == (N - 1))
 321         {
 322           /*
 323            * the last subdiagonal element of the matrix is 0 -
 324            * m_{NN} is a real eigenvalue
 325            */
 326           GSL_SET_COMPLEX(&lambda1,
 327                           gsl_matrix_get(&m.matrix, q, q), 0.0);
 328           gsl_vector_complex_set(eval, w->n_evals, lambda1);
 329           w->n_evals += 1;
 330           w->n_iter = 0;
 331
 332           --N;
 333           m = gsl_matrix_submatrix(&m.matrix, 0, 0, N, N);
 334         }
 335       else if (q == (N - 2))
 336         {
 337           gsl_matrix_view v;
 338
 339           /*
 340            * The bottom right 2-by-2 block of m is an eigenvalue
 341            * system
 342            */
 343
 344           v = gsl_matrix_submatrix(&m.matrix, q, q, 2, 2);
 345           francis_schur_standardize(&v.matrix, &lambda1, &lambda2, w);
 346
 347           gsl_vector_complex_set(eval, w->n_evals, lambda1);
 348           gsl_vector_complex_set(eval, w->n_evals + 1, lambda2);
 349           w->n_evals += 2;
 350           w->n_iter = 0;
 351
 352           N -= 2;
 353           m = gsl_matrix_submatrix(&m.matrix, 0, 0, N, N);
 354         }
 355       else if (q == 1)
 356         {
 357           /* the first matrix element is an eigenvalue */
 358           GSL_SET_COMPLEX(&lambda1,
 359                           gsl_matrix_get(&m.matrix, 0, 0), 0.0);
 360           gsl_vector_complex_set(eval, w->n_evals, lambda1);
 361           w->n_evals += 1;
 362           w->n_iter = 0;
 363
 364           --N;
 365           m = gsl_matrix_submatrix(&m.matrix, 1, 1, N, N);
 366         }
 367       else if (q == 2)
 368         {
 369           gsl_matrix_view v;
 370
 371           /* the upper left 2-by-2 block is an eigenvalue system */
 372
 373           v = gsl_matrix_submatrix(&m.matrix, 0, 0, 2, 2);
 374           francis_schur_standardize(&v.matrix, &lambda1, &lambda2, w);
 375
 376           gsl_vector_complex_set(eval, w->n_evals, lambda1);
 377           gsl_vector_complex_set(eval, w->n_evals + 1, lambda2);
 378           w->n_evals += 2;
 379           w->n_iter = 0;
 380
 381           N -= 2;
 382           m = gsl_matrix_submatrix(&m.matrix, 2, 2, N, N);
 383         }
 384       else
 385         {
 386           gsl_matrix_view v;
 387
 388           /*
 389            * There is a zero element on the subdiagonal somewhere
 390            * in the middle of the matrix - we can now operate
 391            * separately on the two submatrices split by this
 392            * element. q is the row index of the zero element.
 393            */
 394
 395           /* operate on lower right (N - q)-by-(N - q) block first */
 396           v = gsl_matrix_submatrix(&m.matrix, q, q, N - q, N - q);
 397           francis_schur_decomp(&v.matrix, eval, w);
 398
 399           /* operate on upper left q-by-q block */
 400           v = gsl_matrix_submatrix(&m.matrix, 0, 0, q, q);
 401           francis_schur_decomp(&v.matrix, eval, w);
 402
 403           N = 0;
 404         }
 405     }
 406
 407   /* handle special cases of N = 1 or 2 */
 408
 409   if (N == 1)
 410     {
 411       GSL_SET_COMPLEX(&lambda1, gsl_matrix_get(&m.matrix, 0, 0), 0.0);
 412       gsl_vector_complex_set(eval, w->n_evals, lambda1);
 413       w->n_evals += 1;
 414       w->n_iter = 0;
 415     }
 416   else if (N == 2)
 417     {
 418       francis_schur_standardize(&m.matrix, &lambda1, &lambda2, w);
 419       gsl_vector_complex_set(eval, w->n_evals, lambda1);
 420       gsl_vector_complex_set(eval, w->n_evals + 1, lambda2);
 421       w->n_evals += 2;
 422       w->n_iter = 0;
 423     }
 424 } /* francis_schur_decomp() */
 425
 426 /*
 427 francis_qrstep()
 428   Perform a Francis QR step.
 429
 430 See Golub & Van Loan, "Matrix Computations" (3rd ed),
 431 algorithm 7.5.1
 432
 433 Inputs: H - upper Hessenberg matrix
 434         w - workspace
 435
 436 Notes: The matrix H must be "reduced", ie: have no tiny subdiagonal
 437        elements. When computing the first householder reflection,
 438        we divide by H_{21} so it is necessary that this element
 439        is not zero. When a subdiagonal element becomes negligible,
 440        the calling function should call this routine with the
 441        submatrices split by that element, so that we don't divide
 442        by zeros.
 443 */
 444
 445 static inline int
 446 francis_qrstep(gsl_matrix * H, gsl_eigen_francis_workspace * w)
 447 {
 448   const size_t N = H->size1;
 449   size_t i;        /* looping */
 450   gsl_matrix_view m;
 451   double tau_i;    /* householder coefficient */
 452   double dat[3];   /* householder vector */
 453   double scale;    /* scale factor to avoid overflow */
 454   gsl_vector_view v2, v3;
 455   size_t q, r;
 456   size_t top = 0;  /* location of H in original matrix */
 457   double s,
 458          disc;
 459   double h_nn,     /* H(n,n) */
 460          h_nm1nm1, /* H(n-1,n-1) */
 461          h_cross,  /* H(n,n-1) * H(n-1,n) */
 462          h_tmp1,
 463          h_tmp2;
 464
 465   v2 = gsl_vector_view_array(dat, 2);
 466   v3 = gsl_vector_view_array(dat, 3);
 467
 468   if ((w->n_iter % 10) == 0)
 469     {
 470       /*
 471        * exceptional shifts: we have gone 10 iterations
 472        * without finding a new eigenvalue, try a new choice of shifts.
 473        * See LAPACK routine DLAHQR
 474        */
 475       s = fabs(gsl_matrix_get(H, N - 1, N - 2)) +
 476           fabs(gsl_matrix_get(H, N - 2, N - 3));
 477       h_nn = gsl_matrix_get(H, N - 1, N - 1) + GSL_FRANCIS_COEFF1 * s;
 478       h_nm1nm1 = h_nn;
 479       h_cross = GSL_FRANCIS_COEFF2 * s * s;
 480     }
 481   else
 482     {
 483       /*
 484        * normal shifts - compute Rayleigh quotient and use
 485        * Wilkinson shift if possible
 486        */
 487
 488       h_nn = gsl_matrix_get(H, N - 1, N - 1);
 489       h_nm1nm1 = gsl_matrix_get(H, N - 2, N - 2);
 490       h_cross = gsl_matrix_get(H, N - 1, N - 2) *
 491                 gsl_matrix_get(H, N - 2, N - 1);
 492
 493       disc = 0.5 * (h_nm1nm1 - h_nn);
 494       disc = disc * disc + h_cross;
 495       if (disc > 0.0)
 496         {
 497           double ave;
 498
 499           /* real roots - use Wilkinson's shift twice */
 500           disc = sqrt(disc);
 501           ave = 0.5 * (h_nm1nm1 + h_nn);
 502           if (fabs(h_nm1nm1) - fabs(h_nn) > 0.0)
 503             {
 504               h_nm1nm1 = h_nm1nm1 * h_nn - h_cross;
 505               h_nn = h_nm1nm1 / (disc * GSL_SIGN(ave) + ave);
 506             }
 507           else
 508             {
 509               h_nn = disc * GSL_SIGN(ave) + ave;
 510             }
 511
 512           h_nm1nm1 = h_nn;
 513           h_cross = 0.0;
 514         }
 515     }
 516
 517   h_tmp1 = h_nm1nm1 - gsl_matrix_get(H, 0, 0);
 518   h_tmp2 = h_nn - gsl_matrix_get(H, 0, 0);
 519
 520   /*
 521    * These formulas are equivalent to those in Golub & Van Loan
 522    * for the normal shift case - the terms have been rearranged
 523    * to reduce possible roundoff error when subdiagonal elements
 524    * are small
 525    */
 526
 527   dat[0] = (h_tmp1*h_tmp2 - h_cross) / gsl_matrix_get(H, 1, 0) +
 528            gsl_matrix_get(H, 0, 1);
 529   dat[1] = gsl_matrix_get(H, 1, 1) - gsl_matrix_get(H, 0, 0) - h_tmp1 -
 530            h_tmp2;
 531   dat[2] = gsl_matrix_get(H, 2, 1);
 532
 533   scale = fabs(dat[0]) + fabs(dat[1]) + fabs(dat[2]);
 534   if (scale != 0.0)
 535     {
 536       /* scale to prevent overflow or underflow */
 537       dat[0] /= scale;
 538       dat[1] /= scale;
 539       dat[2] /= scale;
 540     }
 541
 542   if (w->Z || w->compute_t)
 543     {
 544       /*
 545        * get absolute indices of this (sub)matrix relative to the
 546        * original Hessenberg matrix
 547        */
 548       top = francis_get_submatrix(w->H, H);
 549     }
 550
 551   for (i = 0; i < N - 2; ++i)
 552     {
 553       tau_i = gsl_linalg_householder_transform(&v3.vector);
 554
 555       if (tau_i != 0.0)
 556         {
 557           /* q = max(1, i - 1) */
 558           q = (1 > ((int)i - 1)) ? 0 : (i - 1);
 559
 560           /* r = min(i + 3, N - 1) */
 561           r = ((i + 3) < (N - 1)) ? (i + 3) : (N - 1);
 562
 563           if (w->compute_t)
 564             {
 565               /*
 566                * We are computing the Schur form T, so we
 567                * need to transform the whole matrix H
 568                *
 569                * H -> P_k^t H P_k
 570                *
 571                * where P_k is the current Householder matrix
 572                */
 573
 574               /* apply left householder matrix (I - tau_i v v') to H */
 575               m = gsl_matrix_submatrix(w->H,
 576                                        top + i,
 577                                        top + q,
 578                                        3,
 579                                        w->size - top - q);
 580               gsl_linalg_householder_hm(tau_i, &v3.vector, &m.matrix);
 581
 582               /* apply right householder matrix (I - tau_i v v') to H */
 583               m = gsl_matrix_submatrix(w->H,
 584                                        0,
 585                                        top + i,
 586                                        top + r + 1,
 587                                        3);
 588               gsl_linalg_householder_mh(tau_i, &v3.vector, &m.matrix);
 589             }
 590           else
 591             {
 592               /*
 593                * We are not computing the Schur form T, so we
 594                * only need to transform the active block
 595                */
 596
 597               /* apply left householder matrix (I - tau_i v v') to H */
 598               m = gsl_matrix_submatrix(H, i, q, 3, N - q);
 599               gsl_linalg_householder_hm(tau_i, &v3.vector, &m.matrix);
 600
 601               /* apply right householder matrix (I - tau_i v v') to H */
 602               m = gsl_matrix_submatrix(H, 0, i, r + 1, 3);
 603               gsl_linalg_householder_mh(tau_i, &v3.vector, &m.matrix);
 604             }
 605
 606           if (w->Z)
 607             {
 608               /* accumulate the similarity transformation into Z */
 609               m = gsl_matrix_submatrix(w->Z, 0, top + i, w->size, 3);
 610               gsl_linalg_householder_mh(tau_i, &v3.vector, &m.matrix);
 611             }
 612         } /* if (tau_i != 0.0) */
 613
 614       dat[0] = gsl_matrix_get(H, i + 1, i);
 615       dat[1] = gsl_matrix_get(H, i + 2, i);
 616       if (i < (N - 3))
 617         {
 618           dat[2] = gsl_matrix_get(H, i + 3, i);
 619         }
 620
 621       scale = fabs(dat[0]) + fabs(dat[1]) + fabs(dat[2]);
 622       if (scale != 0.0)
 623         {
 624           /* scale to prevent overflow or underflow */
 625           dat[0] /= scale;
 626           dat[1] /= scale;
 627           dat[2] /= scale;
 628         }
 629     } /* for (i = 0; i < N - 2; ++i) */
 630
 631   scale = fabs(dat[0]) + fabs(dat[1]);
 632   if (scale != 0.0)
 633     {
 634       /* scale to prevent overflow or underflow */
 635       dat[0] /= scale;
 636       dat[1] /= scale;
 637     }
 638
 639   tau_i = gsl_linalg_householder_transform(&v2.vector);
 640
 641   if (w->compute_t)
 642     {
 643       m = gsl_matrix_submatrix(w->H,
 644                                top + N - 2,
 645                                top + N - 3,
 646                                2,
 647                                w->size - top - N + 3);
 648       gsl_linalg_householder_hm(tau_i, &v2.vector, &m.matrix);
 649
 650       m = gsl_matrix_submatrix(w->H,
 651                                0,
 652                                top + N - 2,
 653                                top + N,
 654                                2);
 655       gsl_linalg_householder_mh(tau_i, &v2.vector, &m.matrix);
 656     }
 657   else
 658     {
 659       m = gsl_matrix_submatrix(H, N - 2, N - 3, 2, 3);
 660       gsl_linalg_householder_hm(tau_i, &v2.vector, &m.matrix);
 661
 662       m = gsl_matrix_submatrix(H, 0, N - 2, N, 2);
 663       gsl_linalg_householder_mh(tau_i, &v2.vector, &m.matrix);
 664     }
 665
 666   if (w->Z)
 667     {
 668       /* accumulate transformation into Z */
 669       m = gsl_matrix_submatrix(w->Z, 0, top + N - 2, w->size, 2);
 670       gsl_linalg_householder_mh(tau_i, &v2.vector, &m.matrix);
 671     }
 672
 673   return GSL_SUCCESS;
 674 } /* francis_qrstep() */
 675
 676 /*
 677 francis_search_subdiag_small_elements()
 678   Search for a small subdiagonal element starting from the bottom
 679 of a matrix A. A small element is one that satisfies:
 680
 681 |A_{i,i-1}| <= eps * (|A_{i,i}| + |A_{i-1,i-1}|)
 682
 683 Inputs: A - matrix (must be at least 3-by-3)
 684
 685 Return: row index of small subdiagonal element or 0 if not found
 686
 687 Notes: the first small element that is found (starting from bottom)
 688        is set to zero
 689 */
 690
 691 static inline size_t
 692 francis_search_subdiag_small_elements(gsl_matrix * A)
 693 {
 694   const size_t N = A->size1;
 695   size_t i;
 696   double dpel = gsl_matrix_get(A, N - 2, N - 2);
 697
 698   for (i = N - 1; i > 0; --i)
 699     {
 700       double sel = gsl_matrix_get(A, i, i - 1);
 701       double del = gsl_matrix_get(A, i, i);
 702
 703       if ((sel == 0.0) ||
 704           (fabs(sel) < GSL_DBL_EPSILON * (fabs(del) + fabs(dpel))))
 705         {
 706           gsl_matrix_set(A, i, i - 1, 0.0);
 707           return (i);
 708         }
 709
 710       dpel = del;
 711     }
 712
 713   return (0);
 714 } /* francis_search_subdiag_small_elements() */
 715
 716 /*
 717 francis_schur_standardize()
 718   Convert a 2-by-2 diagonal block in the Schur form to standard form
 719 and update the rest of T and Z matrices if required.
 720
 721 Inputs: A     - 2-by-2 matrix
 722         eval1 - where to store eigenvalue 1
 723         eval2 - where to store eigenvalue 2
 724         w     - francis workspace
 725 */
 726
 727 static inline void
 728 francis_schur_standardize(gsl_matrix *A, gsl_complex *eval1,
 729                           gsl_complex *eval2,
 730                           gsl_eigen_francis_workspace *w)
 731 {
 732   const size_t N = w->size;
 733   double cs, sn;
 734   size_t top;
 735
 736   /*
 737    * figure out where the submatrix A resides in the
 738    * original matrix H
 739    */
 740   top = francis_get_submatrix(w->H, A);
 741
 742   /* convert 2-by-2 block to standard form */
 743   francis_standard_form(A, &cs, &sn);
 744
 745   /* set eigenvalues */
 746
 747   GSL_SET_REAL(eval1, gsl_matrix_get(A, 0, 0));
 748   GSL_SET_REAL(eval2, gsl_matrix_get(A, 1, 1));
 749   if (gsl_matrix_get(A, 1, 0) == 0.0)
 750     {
 751       GSL_SET_IMAG(eval1, 0.0);
 752       GSL_SET_IMAG(eval2, 0.0);
 753     }
 754   else
 755     {
 756       double tmp = sqrt(fabs(gsl_matrix_get(A, 0, 1)) *
 757                         fabs(gsl_matrix_get(A, 1, 0)));
 758       GSL_SET_IMAG(eval1, tmp);
 759       GSL_SET_IMAG(eval2, -tmp);
 760     }
 761
 762   if (w->compute_t)
 763     {
 764       gsl_vector_view xv, yv;
 765
 766       /*
 767        * The above call to francis_standard_form transformed a 2-by-2 block
 768        * of T into upper triangular form via the transformation
 769        *
 770        * U = [ CS -SN ]
 771        *     [ SN  CS ]
 772        *
 773        * The original matrix T was
 774        *
 775        * T = [ T_{11} | T_{12} | T_{13} ]
 776        *     [   0*   |   A    | T_{23} ]
 777        *     [   0    |   0*   | T_{33} ]
 778        *
 779        * where 0* indicates all zeros except for possibly
 780        * one subdiagonal element next to A.
 781        *
 782        * After francis_standard_form, T looks like this:
 783        *
 784        * T = [ T_{11} | T_{12}  | T_{13} ]
 785        *     [   0*   | U^t A U | T_{23} ]
 786        *     [   0    |    0*   | T_{33} ]
 787        *
 788        * since only the 2-by-2 block of A was changed. However,
 789        * in order to be able to back transform T at the end,
 790        * we need to apply the U transformation to the rest
 791        * of the matrix T since there is no way to apply a
 792        * similarity transformation to T and change only the
 793        * middle 2-by-2 block. In other words, let
 794        *
 795        * M = [ I 0 0 ]
 796        *     [ 0 U 0 ]
 797        *     [ 0 0 I ]
 798        *
 799        * and compute
 800        *
 801        * M^t T M = [ T_{11} | T_{12} U |   T_{13}   ]
 802        *           [ U^t 0* | U^t A U  | U^t T_{23} ]
 803        *           [   0    |   0* U   |   T_{33}   ]
 804        *
 805        * So basically we need to apply the transformation U
 806        * to the i x 2 matrix T_{12} and the 2 x (n - i + 2)
 807        * matrix T_{23}, where i is the index of the top of A
 808        * in T.
 809        *
 810        * The BLAS routine drot() is suited for this.
 811        */
 812
 813       if (top < (N - 2))
 814         {
 815           /* transform the 2 rows of T_{23} */
 816
 817           xv = gsl_matrix_subrow(w->H, top, top + 2, N - top - 2);
 818           yv = gsl_matrix_subrow(w->H, top + 1, top + 2, N - top - 2);
 819           gsl_blas_drot(&xv.vector, &yv.vector, cs, sn);
 820         }
 821
 822       if (top > 0)
 823         {
 824           /* transform the 2 columns of T_{12} */
 825
 826           xv = gsl_matrix_subcolumn(w->H, top, 0, top);
 827           yv = gsl_matrix_subcolumn(w->H, top + 1, 0, top);
 828           gsl_blas_drot(&xv.vector, &yv.vector, cs, sn);
 829         }
 830     } /* if (w->compute_t) */
 831
 832   if (w->Z)
 833     {
 834       gsl_vector_view xv, yv;
 835
 836       /*
 837        * Accumulate the transformation in Z. Here, Z -> Z * M
 838        *
 839        * So:
 840        *
 841        * Z -> [ Z_{11} | Z_{12} U | Z_{13} ]
 842        *      [ Z_{21} | Z_{22} U | Z_{23} ]
 843        *      [ Z_{31} | Z_{32} U | Z_{33} ]
 844        *
 845        * So we just need to apply drot() to the 2 columns
 846        * starting at index 'top'
 847        */
 848
 849       xv = gsl_matrix_column(w->Z, top);
 850       yv = gsl_matrix_column(w->Z, top + 1);
 851
 852       gsl_blas_drot(&xv.vector, &yv.vector, cs, sn);
 853     } /* if (w->Z) */
 854 } /* francis_schur_standardize() */
 855
 856 /*
 857 francis_get_submatrix()
 858   B is a submatrix of A. The goal of this function is to
 859 compute the indices in A of where the matrix B resides
 860 */
 861
 862 static inline size_t
 863 francis_get_submatrix(gsl_matrix *A, gsl_matrix *B)
 864 {
 865   size_t diff;
 866   double ratio;
 867   size_t top;
 868
 869   diff = (size_t) (B->data - A->data);
 870
 871   ratio = (double)diff / ((double) (A->tda + 1));
 872
 873   top = (size_t) floor(ratio);
 874
 875   return top;
 876 } /* francis_get_submatrix() */
 877
 878 /*
 879 francis_standard_form()
 880   Compute the Schur factorization of a real 2-by-2 matrix in
 881 standard form:
 882
 883 [ A B ] = [ CS -SN ] [ T11 T12 ] [ CS SN ]
 884 [ C D ]   [ SN  CS ] [ T21 T22 ] [-SN CS ]
 885
 886 where either:
 887 1) T21 = 0 so that T11 and T22 are real eigenvalues of the matrix, or
 888 2) T11 = T22 and T21*T12 < 0, so that T11 +/- sqrt(|T21*T12|) are
 889    complex conjugate eigenvalues
 890
 891 Inputs: A  - 2-by-2 matrix
 892         cs - where to store cosine parameter of rotation matrix
 893         sn - where to store sine parameter of rotation matrix
 894
 895 Notes: 1) based on LAPACK routine DLANV2
 896        2) On output, A is modified to contain the matrix in standard form
 897 */
 898
 899 static void
 900 francis_standard_form(gsl_matrix *A, double *cs, double *sn)
 901 {
 902   double a, b, c, d; /* input matrix values */
 903   double tmp;
 904   double p, z;
 905   double bcmax, bcmis, scale;
 906   double tau, sigma;
 907   double cs1, sn1;
 908   double aa, bb, cc, dd;
 909   double sab, sac;
 910
 911   a = gsl_matrix_get(A, 0, 0);
 912   b = gsl_matrix_get(A, 0, 1);
 913   c = gsl_matrix_get(A, 1, 0);
 914   d = gsl_matrix_get(A, 1, 1);
 915
 916   if (c == 0.0)
 917     {
 918       /*
 919        * matrix is already upper triangular - set rotation matrix
 920        * to the identity
 921        */
 922       *cs = 1.0;
 923       *sn = 0.0;
 924     }
 925   else if (b == 0.0)
 926     {
 927       /* swap rows and columns to make it upper triangular */
 928
 929       *cs = 0.0;
 930       *sn = 1.0;
 931
 932       tmp = d;
 933       d = a;
 934       a = tmp;
 935       b = -c;
 936       c = 0.0;
 937     }
 938   else if (((a - d) == 0.0) && (GSL_SIGN(b) != GSL_SIGN(c)))
 939     {
 940       /* the matrix has complex eigenvalues with a == d */
 941       *cs = 1.0;
 942       *sn = 0.0;
 943     }
 944   else
 945     {
 946       tmp = a - d;
 947       p = 0.5 * tmp;
 948       bcmax = GSL_MAX(fabs(b), fabs(c));
 949       bcmis = GSL_MIN(fabs(b), fabs(c)) * GSL_SIGN(b) * GSL_SIGN(c);
 950       scale = GSL_MAX(fabs(p), bcmax);
 951       z = (p / scale) * p + (bcmax / scale) * bcmis;
 952
 953       if (z >= 4.0 * GSL_DBL_EPSILON)
 954         {
 955           /* real eigenvalues, compute a and d */
 956
 957           z = p + GSL_SIGN(p) * fabs(sqrt(scale) * sqrt(z));
 958           a = d + z;
 959           d -= (bcmax / z) * bcmis;
 960
 961           /* compute b and the rotation matrix */
 962
 963           tau = gsl_hypot(c, z);
 964           *cs = z / tau;
 965           *sn = c / tau;
 966           b -= c;
 967           c = 0.0;
 968         }
 969       else
 970         {
 971           /*
 972            * complex eigenvalues, or real (almost) equal eigenvalues -
 973            * make diagonal elements equal
 974            */
 975
 976           sigma = b + c;
 977           tau = gsl_hypot(sigma, tmp);
 978           *cs = sqrt(0.5 * (1.0 + fabs(sigma) / tau));
 979           *sn = -(p / (tau * (*cs))) * GSL_SIGN(sigma);
 980
 981           /*
 982            * Compute [ AA BB ] = [ A B ] [ CS -SN ]
 983            *         [ CC DD ]   [ C D ] [ SN  CS ]
 984            */
 985           aa = a * (*cs) + b * (*sn);
 986           bb = -a * (*sn) + b * (*cs);
 987           cc = c * (*cs) + d * (*sn);
 988           dd = -c * (*sn) + d * (*cs);
 989
 990           /*
 991            * Compute [ A B ] = [ CS SN ] [ AA BB ]
 992            *         [ C D ]   [-SN CS ] [ CC DD ]
 993            */
 994           a = aa * (*cs) + cc * (*sn);
 995           b = bb * (*cs) + dd * (*sn);
 996           c = -aa * (*sn) + cc * (*cs);
 997           d = -bb * (*sn) + dd * (*cs);
 998
 999           tmp = 0.5 * (a + d);
1000           a = d = tmp;
1001
1002           if (c != 0.0)
1003             {
1004               if (b != 0.0)
1005                 {
1006                   if (GSL_SIGN(b) == GSL_SIGN(c))
1007                     {
1008                       /*
1009                        * real eigenvalues: reduce to upper triangular
1010                        * form
1011                        */
1012                       sab = sqrt(fabs(b));
1013                       sac = sqrt(fabs(c));
1014                       p = GSL_SIGN(c) * fabs(sab * sac);
1015                       tau = 1.0 / sqrt(fabs(b + c));
1016                       a = tmp + p;
1017                       d = tmp - p;
1018                       b -= c;
1019                       c = 0.0;
1020
1021                       cs1 = sab * tau;
1022                       sn1 = sac * tau;
1023                       tmp = (*cs) * cs1 - (*sn) * sn1;
1024                       *sn = (*cs) * sn1 + (*sn) * cs1;
1025                       *cs = tmp;
1026                     }
1027                 }
1028               else
1029                 {
1030                   b = -c;
1031                   c = 0.0;
1032                   tmp = *cs;
1033                   *cs = -(*sn);
1034                   *sn = tmp;
1035                 }
1036             }
1037         }
1038     }
1039
1040   /* set new matrix elements */
1041
1042   gsl_matrix_set(A, 0, 0, a);
1043   gsl_matrix_set(A, 0, 1, b);
1044   gsl_matrix_set(A, 1, 0, c);
1045   gsl_matrix_set(A, 1, 1, d);
1046 } /* francis_standard_form() */