1 /* blas/source_trsm_c.h
3 * Copyright (C) 2001, 2007 Brian Gough
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or (at
8 * your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 const int nonunit = (Diag == CblasNonUnit);
24 const int conj = (TransA == CblasConjTrans) ? -1 : 1;
25 int side, uplo, trans;
27 const BASE alpha_real = CONST_REAL0(alpha);
28 const BASE alpha_imag = CONST_IMAG0(alpha);
30 if (Order == CblasRowMajor) {
36 trans = (TransA == CblasNoTrans) ? CblasNoTrans : CblasTrans;
40 side = (Side == CblasLeft) ? CblasRight : CblasLeft; /* exchanged */
41 uplo = (Uplo == CblasUpper) ? CblasLower : CblasUpper; /* exchanged */
42 trans = (TransA == CblasNoTrans) ? CblasNoTrans : CblasTrans; /* same */
45 if (side == CblasLeft && uplo == CblasUpper && trans == CblasNoTrans) {
47 /* form B := alpha * inv(TriU(A)) *B */
49 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
50 for (i = 0; i < n1; i++) {
51 for (j = 0; j < n2; j++) {
52 const BASE Bij_real = REAL(B, ldb * i + j);
53 const BASE Bij_imag = IMAG(B, ldb * i + j);
54 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
55 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
60 for (i = n1; i > 0 && i--;) {
62 const BASE Aii_real = CONST_REAL(A, lda * i + i);
63 const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i);
64 const BASE s = xhypot(Aii_real, Aii_imag);
65 const BASE a_real = Aii_real / s;
66 const BASE a_imag = Aii_imag / s;
68 for (j = 0; j < n2; j++) {
69 const BASE Bij_real = REAL(B, ldb * i + j);
70 const BASE Bij_imag = IMAG(B, ldb * i + j);
71 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
72 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
76 for (k = 0; k < i; k++) {
77 const BASE Aki_real = CONST_REAL(A, k * lda + i);
78 const BASE Aki_imag = conj * CONST_IMAG(A, k * lda + i);
79 for (j = 0; j < n2; j++) {
80 const BASE Bij_real = REAL(B, ldb * i + j);
81 const BASE Bij_imag = IMAG(B, ldb * i + j);
82 REAL(B, ldb * k + j) -= Aki_real * Bij_real - Aki_imag * Bij_imag;
83 IMAG(B, ldb * k + j) -= Aki_real * Bij_imag + Aki_imag * Bij_real;
88 } else if (side == CblasLeft && uplo == CblasUpper && trans == CblasTrans) {
90 /* form B := alpha * inv(TriU(A))' *B */
92 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
93 for (i = 0; i < n1; i++) {
94 for (j = 0; j < n2; j++) {
95 const BASE Bij_real = REAL(B, ldb * i + j);
96 const BASE Bij_imag = IMAG(B, ldb * i + j);
97 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
98 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
103 for (i = 0; i < n1; i++) {
106 const BASE Aii_real = CONST_REAL(A, lda * i + i);
107 const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i);
108 const BASE s = xhypot(Aii_real, Aii_imag);
109 const BASE a_real = Aii_real / s;
110 const BASE a_imag = Aii_imag / s;
112 for (j = 0; j < n2; j++) {
113 const BASE Bij_real = REAL(B, ldb * i + j);
114 const BASE Bij_imag = IMAG(B, ldb * i + j);
115 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
116 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
120 for (k = i + 1; k < n1; k++) {
121 const BASE Aik_real = CONST_REAL(A, i * lda + k);
122 const BASE Aik_imag = conj * CONST_IMAG(A, i * lda + k);
123 for (j = 0; j < n2; j++) {
124 const BASE Bij_real = REAL(B, ldb * i + j);
125 const BASE Bij_imag = IMAG(B, ldb * i + j);
126 REAL(B, ldb * k + j) -= Aik_real * Bij_real - Aik_imag * Bij_imag;
127 IMAG(B, ldb * k + j) -= Aik_real * Bij_imag + Aik_imag * Bij_real;
132 } else if (side == CblasLeft && uplo == CblasLower && trans == CblasNoTrans) {
134 /* form B := alpha * inv(TriL(A))*B */
136 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
137 for (i = 0; i < n1; i++) {
138 for (j = 0; j < n2; j++) {
139 const BASE Bij_real = REAL(B, ldb * i + j);
140 const BASE Bij_imag = IMAG(B, ldb * i + j);
141 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
142 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
147 for (i = 0; i < n1; i++) {
150 const BASE Aii_real = CONST_REAL(A, lda * i + i);
151 const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i);
152 const BASE s = xhypot(Aii_real, Aii_imag);
153 const BASE a_real = Aii_real / s;
154 const BASE a_imag = Aii_imag / s;
156 for (j = 0; j < n2; j++) {
157 const BASE Bij_real = REAL(B, ldb * i + j);
158 const BASE Bij_imag = IMAG(B, ldb * i + j);
159 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
160 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
164 for (k = i + 1; k < n1; k++) {
165 const BASE Aki_real = CONST_REAL(A, k * lda + i);
166 const BASE Aki_imag = conj * CONST_IMAG(A, k * lda + i);
167 for (j = 0; j < n2; j++) {
168 const BASE Bij_real = REAL(B, ldb * i + j);
169 const BASE Bij_imag = IMAG(B, ldb * i + j);
170 REAL(B, ldb * k + j) -= Aki_real * Bij_real - Aki_imag * Bij_imag;
171 IMAG(B, ldb * k + j) -= Aki_real * Bij_imag + Aki_imag * Bij_real;
177 } else if (side == CblasLeft && uplo == CblasLower && trans == CblasTrans) {
179 /* form B := alpha * TriL(A)' *B */
181 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
182 for (i = 0; i < n1; i++) {
183 for (j = 0; j < n2; j++) {
184 const BASE Bij_real = REAL(B, ldb * i + j);
185 const BASE Bij_imag = IMAG(B, ldb * i + j);
186 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
187 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
192 for (i = n1; i > 0 && i--;) {
194 const BASE Aii_real = CONST_REAL(A, lda * i + i);
195 const BASE Aii_imag = conj * CONST_IMAG(A, lda * i + i);
196 const BASE s = xhypot(Aii_real, Aii_imag);
197 const BASE a_real = Aii_real / s;
198 const BASE a_imag = Aii_imag / s;
200 for (j = 0; j < n2; j++) {
201 const BASE Bij_real = REAL(B, ldb * i + j);
202 const BASE Bij_imag = IMAG(B, ldb * i + j);
203 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
204 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
208 for (k = 0; k < i; k++) {
209 const BASE Aik_real = CONST_REAL(A, i * lda + k);
210 const BASE Aik_imag = conj * CONST_IMAG(A, i * lda + k);
211 for (j = 0; j < n2; j++) {
212 const BASE Bij_real = REAL(B, ldb * i + j);
213 const BASE Bij_imag = IMAG(B, ldb * i + j);
214 REAL(B, ldb * k + j) -= Aik_real * Bij_real - Aik_imag * Bij_imag;
215 IMAG(B, ldb * k + j) -= Aik_real * Bij_imag + Aik_imag * Bij_real;
220 } else if (side == CblasRight && uplo == CblasUpper && trans == CblasNoTrans) {
222 /* form B := alpha * B * inv(TriU(A)) */
224 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
225 for (i = 0; i < n1; i++) {
226 for (j = 0; j < n2; j++) {
227 const BASE Bij_real = REAL(B, ldb * i + j);
228 const BASE Bij_imag = IMAG(B, ldb * i + j);
229 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
230 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
235 for (i = 0; i < n1; i++) {
236 for (j = 0; j < n2; j++) {
238 const BASE Ajj_real = CONST_REAL(A, lda * j + j);
239 const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j);
240 const BASE s = xhypot(Ajj_real, Ajj_imag);
241 const BASE a_real = Ajj_real / s;
242 const BASE a_imag = Ajj_imag / s;
243 const BASE Bij_real = REAL(B, ldb * i + j);
244 const BASE Bij_imag = IMAG(B, ldb * i + j);
245 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
246 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
250 const BASE Bij_real = REAL(B, ldb * i + j);
251 const BASE Bij_imag = IMAG(B, ldb * i + j);
252 for (k = j + 1; k < n2; k++) {
253 const BASE Ajk_real = CONST_REAL(A, j * lda + k);
254 const BASE Ajk_imag = conj * CONST_IMAG(A, j * lda + k);
255 REAL(B, ldb * i + k) -= Ajk_real * Bij_real - Ajk_imag * Bij_imag;
256 IMAG(B, ldb * i + k) -= Ajk_real * Bij_imag + Ajk_imag * Bij_real;
262 } else if (side == CblasRight && uplo == CblasUpper && trans == CblasTrans) {
264 /* form B := alpha * B * inv(TriU(A))' */
266 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
267 for (i = 0; i < n1; i++) {
268 for (j = 0; j < n2; j++) {
269 const BASE Bij_real = REAL(B, ldb * i + j);
270 const BASE Bij_imag = IMAG(B, ldb * i + j);
271 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
272 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
277 for (i = 0; i < n1; i++) {
278 for (j = n2; j > 0 && j--;) {
281 const BASE Ajj_real = CONST_REAL(A, lda * j + j);
282 const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j);
283 const BASE s = xhypot(Ajj_real, Ajj_imag);
284 const BASE a_real = Ajj_real / s;
285 const BASE a_imag = Ajj_imag / s;
286 const BASE Bij_real = REAL(B, ldb * i + j);
287 const BASE Bij_imag = IMAG(B, ldb * i + j);
288 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
289 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
293 const BASE Bij_real = REAL(B, ldb * i + j);
294 const BASE Bij_imag = IMAG(B, ldb * i + j);
295 for (k = 0; k < j; k++) {
296 const BASE Akj_real = CONST_REAL(A, k * lda + j);
297 const BASE Akj_imag = conj * CONST_IMAG(A, k * lda + j);
298 REAL(B, ldb * i + k) -= Akj_real * Bij_real - Akj_imag * Bij_imag;
299 IMAG(B, ldb * i + k) -= Akj_real * Bij_imag + Akj_imag * Bij_real;
306 } else if (side == CblasRight && uplo == CblasLower && trans == CblasNoTrans) {
308 /* form B := alpha * B * inv(TriL(A)) */
310 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
311 for (i = 0; i < n1; i++) {
312 for (j = 0; j < n2; j++) {
313 const BASE Bij_real = REAL(B, ldb * i + j);
314 const BASE Bij_imag = IMAG(B, ldb * i + j);
315 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
316 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
321 for (i = 0; i < n1; i++) {
322 for (j = n2; j > 0 && j--;) {
325 const BASE Ajj_real = CONST_REAL(A, lda * j + j);
326 const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j);
327 const BASE s = xhypot(Ajj_real, Ajj_imag);
328 const BASE a_real = Ajj_real / s;
329 const BASE a_imag = Ajj_imag / s;
330 const BASE Bij_real = REAL(B, ldb * i + j);
331 const BASE Bij_imag = IMAG(B, ldb * i + j);
332 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
333 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
337 const BASE Bij_real = REAL(B, ldb * i + j);
338 const BASE Bij_imag = IMAG(B, ldb * i + j);
339 for (k = 0; k < j; k++) {
340 const BASE Ajk_real = CONST_REAL(A, j * lda + k);
341 const BASE Ajk_imag = conj * CONST_IMAG(A, j * lda + k);
342 REAL(B, ldb * i + k) -= Ajk_real * Bij_real - Ajk_imag * Bij_imag;
343 IMAG(B, ldb * i + k) -= Ajk_real * Bij_imag + Ajk_imag * Bij_real;
349 } else if (side == CblasRight && uplo == CblasLower && trans == CblasTrans) {
351 /* form B := alpha * B * inv(TriL(A))' */
354 if (!(alpha_real == 1.0 && alpha_imag == 0.0)) {
355 for (i = 0; i < n1; i++) {
356 for (j = 0; j < n2; j++) {
357 const BASE Bij_real = REAL(B, ldb * i + j);
358 const BASE Bij_imag = IMAG(B, ldb * i + j);
359 REAL(B, ldb * i + j) = alpha_real * Bij_real - alpha_imag * Bij_imag;
360 IMAG(B, ldb * i + j) = alpha_real * Bij_imag + alpha_imag * Bij_real;
365 for (i = 0; i < n1; i++) {
366 for (j = 0; j < n2; j++) {
368 const BASE Ajj_real = CONST_REAL(A, lda * j + j);
369 const BASE Ajj_imag = conj * CONST_IMAG(A, lda * j + j);
370 const BASE s = xhypot(Ajj_real, Ajj_imag);
371 const BASE a_real = Ajj_real / s;
372 const BASE a_imag = Ajj_imag / s;
373 const BASE Bij_real = REAL(B, ldb * i + j);
374 const BASE Bij_imag = IMAG(B, ldb * i + j);
375 REAL(B, ldb * i + j) = (Bij_real * a_real + Bij_imag * a_imag) / s;
376 IMAG(B, ldb * i + j) = (Bij_imag * a_real - Bij_real * a_imag) / s;
380 const BASE Bij_real = REAL(B, ldb * i + j);
381 const BASE Bij_imag = IMAG(B, ldb * i + j);
383 for (k = j + 1; k < n2; k++) {
384 const BASE Akj_real = CONST_REAL(A, k * lda + j);
385 const BASE Akj_imag = conj * CONST_IMAG(A, k * lda + j);
386 REAL(B, ldb * i + k) -= Akj_real * Bij_real - Akj_imag * Bij_imag;
387 IMAG(B, ldb * i + k) -= Akj_real * Bij_imag + Akj_imag * Bij_real;
395 BLAS_ERROR("unrecognized operation");