1 /* blas/source_tpsv_c.h
3 * Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or (at
8 * your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 const int conj = (TransA == CblasConjTrans) ? -1 : 1;
22 const int Trans = (TransA != CblasConjTrans) ? TransA : CblasTrans;
23 const int nonunit = (Diag == CblasNonUnit);
29 /* form x := inv( A )*x */
31 if ((order == CblasRowMajor && Trans == CblasNoTrans && Uplo == CblasUpper)
32 || (order == CblasColMajor && Trans == CblasTrans && Uplo == CblasLower)) {
34 INDEX ix = OFFSET(N, incX) + incX * (N - 1);
37 const BASE a_real = CONST_REAL(Ap, TPUP(N, (N - 1), (N - 1)));
38 const BASE a_imag = conj * CONST_IMAG(Ap, TPUP(N, (N - 1), (N - 1)));
39 const BASE x_real = REAL(X, ix);
40 const BASE x_imag = IMAG(X, ix);
41 const BASE s = xhypot(a_real, a_imag);
42 const BASE b_real = a_real / s;
43 const BASE b_imag = a_imag / s;
44 REAL(X, ix) = (x_real * b_real + x_imag * b_imag) / s;
45 IMAG(X, ix) = (x_imag * b_real - b_imag * x_real) / s;
50 for (i = N - 1; i > 0 && i--;) {
51 BASE tmp_real = REAL(X, ix);
52 BASE tmp_imag = IMAG(X, ix);
54 for (j = i + 1; j < N; j++) {
55 const BASE Aij_real = CONST_REAL(Ap, TPUP(N, i, j));
56 const BASE Aij_imag = conj * CONST_IMAG(Ap, TPUP(N, i, j));
57 const BASE x_real = REAL(X, jx);
58 const BASE x_imag = IMAG(X, jx);
59 tmp_real -= Aij_real * x_real - Aij_imag * x_imag;
60 tmp_imag -= Aij_real * x_imag + Aij_imag * x_real;
65 const BASE a_real = CONST_REAL(Ap, TPUP(N, i, i));
66 const BASE a_imag = conj * CONST_IMAG(Ap, TPUP(N, i, i));
67 const BASE s = xhypot(a_real, a_imag);
68 const BASE b_real = a_real / s;
69 const BASE b_imag = a_imag / s;
70 REAL(X, ix) = (tmp_real * b_real + tmp_imag * b_imag) / s;
71 IMAG(X, ix) = (tmp_imag * b_real - tmp_real * b_imag) / s;
73 REAL(X, ix) = tmp_real;
74 IMAG(X, ix) = tmp_imag;
79 } else if ((order == CblasRowMajor && Trans == CblasNoTrans && Uplo == CblasLower)
80 || (order == CblasColMajor && Trans == CblasTrans && Uplo == CblasUpper)) {
81 /* forward substitution */
83 INDEX ix = OFFSET(N, incX);
86 const BASE a_real = CONST_REAL(Ap, TPLO(N, 0, 0));
87 const BASE a_imag = conj * CONST_IMAG(Ap, TPLO(N, 0, 0));
88 const BASE x_real = REAL(X, ix);
89 const BASE x_imag = IMAG(X, ix);
90 const BASE s = xhypot(a_real, a_imag);
91 const BASE b_real = a_real / s;
92 const BASE b_imag = a_imag / s;
93 REAL(X, ix) = (x_real * b_real + x_imag * b_imag) / s;
94 IMAG(X, ix) = (x_imag * b_real - b_imag * x_real) / s;
99 for (i = 1; i < N; i++) {
100 BASE tmp_real = REAL(X, ix);
101 BASE tmp_imag = IMAG(X, ix);
102 INDEX jx = OFFSET(N, incX);
103 for (j = 0; j < i; j++) {
104 const BASE Aij_real = CONST_REAL(Ap, TPLO(N, i, j));
105 const BASE Aij_imag = conj * CONST_IMAG(Ap, TPLO(N, i, j));
106 const BASE x_real = REAL(X, jx);
107 const BASE x_imag = IMAG(X, jx);
108 tmp_real -= Aij_real * x_real - Aij_imag * x_imag;
109 tmp_imag -= Aij_real * x_imag + Aij_imag * x_real;
113 const BASE a_real = CONST_REAL(Ap, TPLO(N, i, i));
114 const BASE a_imag = conj * CONST_IMAG(Ap, TPLO(N, i, i));
115 const BASE s = xhypot(a_real, a_imag);
116 const BASE b_real = a_real / s;
117 const BASE b_imag = a_imag / s;
118 REAL(X, ix) = (tmp_real * b_real + tmp_imag * b_imag) / s;
119 IMAG(X, ix) = (tmp_imag * b_real - tmp_real * b_imag) / s;
121 REAL(X, ix) = tmp_real;
122 IMAG(X, ix) = tmp_imag;
126 } else if ((order == CblasRowMajor && Trans == CblasTrans && Uplo == CblasUpper)
127 || (order == CblasColMajor && Trans == CblasNoTrans && Uplo == CblasLower)) {
128 /* form x := inv( A' )*x */
130 /* forward substitution */
132 INDEX ix = OFFSET(N, incX);
135 const BASE a_real = CONST_REAL(Ap, TPUP(N, 0, 0));
136 const BASE a_imag = conj * CONST_IMAG(Ap, TPUP(N, 0, 0));
137 const BASE x_real = REAL(X, ix);
138 const BASE x_imag = IMAG(X, ix);
139 const BASE s = xhypot(a_real, a_imag);
140 const BASE b_real = a_real / s;
141 const BASE b_imag = a_imag / s;
142 REAL(X, ix) = (x_real * b_real + x_imag * b_imag) / s;
143 IMAG(X, ix) = (x_imag * b_real - b_imag * x_real) / s;
148 for (i = 1; i < N; i++) {
149 BASE tmp_real = REAL(X, ix);
150 BASE tmp_imag = IMAG(X, ix);
151 INDEX jx = OFFSET(N, incX);
152 for (j = 0; j < i; j++) {
153 const BASE Aij_real = CONST_REAL(Ap, TPUP(N, j, i));
154 const BASE Aij_imag = conj * CONST_IMAG(Ap, TPUP(N, j, i));
155 const BASE x_real = REAL(X, jx);
156 const BASE x_imag = IMAG(X, jx);
157 tmp_real -= Aij_real * x_real - Aij_imag * x_imag;
158 tmp_imag -= Aij_real * x_imag + Aij_imag * x_real;
162 const BASE a_real = CONST_REAL(Ap, TPUP(N, i, i));
163 const BASE a_imag = conj * CONST_IMAG(Ap, TPUP(N, i, i));
164 const BASE s = xhypot(a_real, a_imag);
165 const BASE b_real = a_real / s;
166 const BASE b_imag = a_imag / s;
167 REAL(X, ix) = (tmp_real * b_real + tmp_imag * b_imag) / s;
168 IMAG(X, ix) = (tmp_imag * b_real - tmp_real * b_imag) / s;
170 REAL(X, ix) = tmp_real;
171 IMAG(X, ix) = tmp_imag;
175 } else if ((order == CblasRowMajor && Trans == CblasTrans && Uplo == CblasLower)
176 || (order == CblasColMajor && Trans == CblasNoTrans && Uplo == CblasUpper)) {
178 /* backsubstitution */
180 INDEX ix = OFFSET(N, incX) + incX * (N - 1);
183 const BASE a_real = CONST_REAL(Ap, TPLO(N, (N - 1), (N - 1)));
184 const BASE a_imag = conj * CONST_IMAG(Ap, TPLO(N, (N - 1), (N - 1)));
185 const BASE x_real = REAL(X, ix);
186 const BASE x_imag = IMAG(X, ix);
187 const BASE s = xhypot(a_real, a_imag);
188 const BASE b_real = a_real / s;
189 const BASE b_imag = a_imag / s;
190 REAL(X, ix) = (x_real * b_real + x_imag * b_imag) / s;
191 IMAG(X, ix) = (x_imag * b_real - b_imag * x_real) / s;
196 for (i = N - 1; i > 0 && i--;) {
197 BASE tmp_real = REAL(X, ix);
198 BASE tmp_imag = IMAG(X, ix);
199 INDEX jx = ix + incX;
200 for (j = i + 1; j < N; j++) {
201 const BASE Aij_real = CONST_REAL(Ap, TPLO(N, j, i));
202 const BASE Aij_imag = conj * CONST_IMAG(Ap, TPLO(N, j, i));
203 const BASE x_real = REAL(X, jx);
204 const BASE x_imag = IMAG(X, jx);
205 tmp_real -= Aij_real * x_real - Aij_imag * x_imag;
206 tmp_imag -= Aij_real * x_imag + Aij_imag * x_real;
211 const BASE a_real = CONST_REAL(Ap, TPLO(N, i, i));
212 const BASE a_imag = conj * CONST_IMAG(Ap, TPLO(N, i, i));
213 const BASE s = xhypot(a_real, a_imag);
214 const BASE b_real = a_real / s;
215 const BASE b_imag = a_imag / s;
216 REAL(X, ix) = (tmp_real * b_real + tmp_imag * b_imag) / s;
217 IMAG(X, ix) = (tmp_imag * b_real - tmp_real * b_imag) / s;
219 REAL(X, ix) = tmp_real;
220 IMAG(X, ix) = tmp_imag;
225 BLAS_ERROR("unrecognized operation");