13 #include "common_magma.h"
15 #define A(i, j) (a+(j)*lda + (i))
16 #define B(i, j) (b+(j)*ldb + (i))
18 #define dA(i, j) (dw+(j)*ldda + (i))
19 #define dB(i, j) (dw+n*ldda+(j)*lddb + (i))
92 char uplo_[2] = {
uplo, 0};
107 if (itype<1 || itype>3){
113 }
else if (lda <
max(1,n)) {
115 }
else if (ldb <
max(1,n)) {
134 static cudaStream_t stream[2];
148 for(k = 0; k<n; k+=nb){
158 dA(k, k), ldda, stream[0] );
164 c_one,
dB(k,k), lddb,
171 c_neg_half,
dA(k,k), ldda,
173 c_one,
dA(k, k+kb), ldda);
177 c_neg_one,
dA(k,k+kb), ldda,
179 d_one,
dA(k+kb,k+kb), ldda);
182 dA(k+kb, k+kb), ldda,
183 A(k+kb, k+kb), lda, stream[1] );
187 c_neg_half,
dA(k,k), ldda,
189 c_one,
dA(k, k+kb), ldda);
193 c_one ,
dB(k+kb,k+kb), lddb,
208 for(k = 0; k<n; k+=nb){
218 dA(k, k), ldda, stream[0] );
224 c_one,
dB(k,k), lddb,
231 c_neg_half,
dA(k,k), ldda,
233 c_one,
dA(k+kb, k), ldda);
237 c_neg_one,
dA(k+kb,k), ldda,
239 d_one,
dA(k+kb,k+kb), ldda);
242 dA(k+kb, k+kb), ldda,
243 A(k+kb, k+kb), lda, stream[1] );
247 c_neg_half,
dA(k,k), ldda,
249 c_one,
dA(k+kb, k), ldda);
253 c_one,
dB(k+kb,k+kb), lddb,
271 for(k = 0; k<n; k+=nb){
276 A(k, k), lda, stream[0] );
283 c_one ,
dB(0,0), lddb,
288 c_half,
dA(k,k), ldda,
290 c_one,
dA(0, k), ldda);
296 c_one,
dA(0,k), ldda,
298 d_one,
dA(0,0), ldda);
302 c_half,
dA(k,k), ldda,
304 c_one,
dA(0, k), ldda);
308 c_one,
dB(k,k), lddb,
319 dA(k, k), ldda, stream[1] );
329 for(k = 0; k<n; k+=nb){
334 A(k, k), lda, stream[0] );
341 c_one ,
dB(0,0), lddb,
346 c_half,
dA(k,k), ldda,
348 c_one,
dA(k, 0), ldda);
354 c_one,
dA(k,0), ldda,
356 d_one,
dA(0,0), ldda);
360 c_half,
dA(k,k), ldda,
362 c_one,
dA(k, 0), ldda);
366 c_one,
dB(k,k), lddb,
376 dA(k, k), ldda, stream[1] );