MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
zhegst_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  August 2013
7
8  @author Raffaele Solca
9  @author Azzam Haidar
10
11  @precisions normal z -> s d c
12 */
13
14 #include "common_magma.h"
15
16 #define A(i, j) (w + (j)*lda + (i))
17 #define B(i, j) (w+nb*lda + (j)*ldb + (i))
18
19 #define dA(i, j) (da + (j)*ldda + (i))
20 #define dB(i, j) (db + (j)*lddb + (i))
21
22 extern "C" magma_int_t
25  magmaDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
26 {
27 /* -- MAGMA (version 1.4.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
31  August 2013
32
33  Purpose
34  =======
35  ZHEGST_GPU reduces a complex Hermitian-definite generalized
36  eigenproblem to standard form.
37
38  If ITYPE = 1, the problem is A*x = lambda*B*x,
39  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
40
41  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
42  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
43
44  B must have been previously factorized as U**H*U or L*L**H by ZPOTRF.
45
46  Arguments
47  =========
48  ITYPE (input) INTEGER
49  = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
50  = 2 or 3: compute U*A*U**H or L**H*A*L.
51
52  UPLO (input) CHARACTER*1
53  = 'U': Upper triangle of A is stored and B is factored as
54  U**H*U;
55  = 'L': Lower triangle of A is stored and B is factored as
56  L*L**H.
57
58  N (input) INTEGER
59  The order of the matrices A and B. N >= 0.
60
61  DA (device input/output) COMPLEX_16 array, dimension (LDA,N)
62  On entry, the Hermitian matrix A. If UPLO = 'U', the leading
63  N-by-N upper triangular part of A contains the upper
64  triangular part of the matrix A, and the strictly lower
65  triangular part of A is not referenced. If UPLO = 'L', the
66  leading N-by-N lower triangular part of A contains the lower
67  triangular part of the matrix A, and the strictly upper
68  triangular part of A is not referenced.
69
70  On exit, if INFO = 0, the transformed matrix, stored in the
71  same format as A.
72
73  LDDA (input) INTEGER
74  The leading dimension of the array A. LDA >= max(1,N).
75
76  DB (device input) COMPLEX_16 array, dimension (LDB,N)
77  The triangular factor from the Cholesky factorization of B,
78  as returned by ZPOTRF.
79
80  LDDB (input) INTEGER
81  The leading dimension of the array B. LDB >= max(1,N).
82
83  INFO (output) INTEGER
84  = 0: successful exit
85  < 0: if INFO = -i, the i-th argument had an illegal value
86  =====================================================================*/
87
88  char uplo_[2] = {uplo, 0};
89  magma_int_t nb;
90  magma_int_t k, kb, kb2;
96  magma_int_t lda;
97  magma_int_t ldb;
98  double d_one = 1.0;
99  int upper = lapackf77_lsame(uplo_, "U");
100
101  /* Test the input parameters. */
102  *info = 0;
103  if (itype<1 || itype>3){
104  *info = -1;
105  }else if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) {
106  *info = -2;
107  } else if (n < 0) {
108  *info = -3;
109  } else if (ldda < max(1,n)) {
110  *info = -5;
111  }else if (lddb < max(1,n)) {
112  *info = -7;
113  }
114  if (*info != 0) {
115  magma_xerbla( __func__, -(*info) );
116  return *info;
117  }
118
119  /* Quick return */
120  if ( n == 0 )
121  return *info;
122
123  nb = magma_get_zhegst_nb(n);
124
125  lda = nb;
126  ldb = nb;
127
128  if (MAGMA_SUCCESS != magma_zmalloc_pinned( &w, 2*nb*nb )) {
129  *info = MAGMA_ERR_DEVICE_ALLOC;
130  return *info;
131  }
132
133  magma_queue_t stream[3];
134  magma_queue_create( &stream[0] );
135  magma_queue_create( &stream[1] );
136  magma_queue_create( &stream[2] );
137
138  /* Use hybrid blocked code */
139  if (itype==1) {
140  if (upper) {
141  kb = min(n,nb);
142
143  /* Compute inv(U')*A*inv(U) */
144  magma_zgetmatrix_async( kb, kb,
145  dB(0, 0), lddb,
146  B(0, 0), nb, stream[2] );
147  magma_zgetmatrix_async( kb, kb,
148  dA(0, 0), ldda,
149  A(0, 0), nb, stream[1] );
150
151  for(k = 0; k<n; k+=nb){
152  kb = min(n-k,nb);
153  kb2= min(n-k-nb,nb);
154
155  /* Update the upper triangle of A(k:n,k:n) */
156
157  magma_queue_sync( stream[2] );
158  magma_queue_sync( stream[1] );
159
160  lapackf77_zhegst( &itype, uplo_, &kb, A(0,0), &lda, B(0,0), &ldb, info);
161
162  magma_zsetmatrix_async( kb, kb,
163  A(0, 0), lda,
164  dA(k, k), ldda, stream[0] );
165
166  if(k+kb<n){
167  // Start copying the new B block
168  magma_zgetmatrix_async( kb2, kb2,
169  dB(k+kb, k+kb), lddb,
170  B(0, 0), nb, stream[2] );
171
173  kb, n-k-kb,
174  c_one, dB(k,k), lddb,
175  dA(k,k+kb), ldda);
176
177  magma_queue_sync( stream[0] );
178
180  kb, n-k-kb,
181  c_neg_half, dA(k,k), ldda,
182  dB(k,k+kb), lddb,
183  c_one, dA(k, k+kb), ldda);
184
186  n-k-kb, kb,
187  c_neg_one, dA(k,k+kb), ldda,
188  dB(k,k+kb), lddb,
189  d_one, dA(k+kb,k+kb), ldda);
190
191  magma_zgetmatrix_async( kb2, kb2,
192  dA(k+kb, k+kb), ldda,
193  A(0, 0), lda, stream[1] );
194
196  kb, n-k-kb,
197  c_neg_half, dA(k,k), ldda,
198  dB(k,k+kb), lddb,
199  c_one, dA(k, k+kb), ldda);
200
202  kb, n-k-kb,
203  c_one ,dB(k+kb,k+kb), lddb,
204  dA(k,k+kb), ldda);
205  }
206  }
207
208  magma_queue_sync( stream[0] );
209  }
210  else {
211  kb = min(n,nb);
212
213  /* Compute inv(L)*A*inv(L') */
214  magma_zgetmatrix_async( kb, kb,
215  dB(0, 0), lddb,
216  B(0, 0), nb, stream[2] );
217  magma_zgetmatrix_async( kb, kb,
218  dA(0, 0), ldda,
219  A(0, 0), nb, stream[1] );
220
221  for(k = 0; k<n; k+=nb){
222  kb= min(n-k,nb);
223  kb2= min(n-k-nb,nb);
224
225  /* Update the lower triangle of A(k:n,k:n) */
226
227  magma_queue_sync( stream[2] );
228  magma_queue_sync( stream[1] );
229
230  lapackf77_zhegst( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
231
232  magma_zsetmatrix_async( kb, kb,
233  A(0, 0), lda,
234  dA(k, k), ldda, stream[0] );
235
236  if(k+kb<n){
237  // Start copying the new B block
238  magma_zgetmatrix_async( kb2, kb2,
239  dB(k+kb, k+kb), lddb,
240  B(0, 0), nb, stream[2] );
241
243  n-k-kb, kb,
244  c_one, dB(k,k), lddb,
245  dA(k+kb,k), ldda);
246
247  magma_queue_sync( stream[0] );
248
250  n-k-kb, kb,
251  c_neg_half, dA(k,k), ldda,
252  dB(k+kb,k), lddb,
253  c_one, dA(k+kb, k), ldda);
254
256  n-k-kb, kb,
257  c_neg_one, dA(k+kb,k), ldda,
258  dB(k+kb,k), lddb,
259  d_one, dA(k+kb,k+kb), ldda);
260
261  magma_zgetmatrix_async( kb2, kb2,
262  dA(k+kb, k+kb), ldda,
263  A(0, 0), lda, stream[1] );
264
266  n-k-kb, kb,
267  c_neg_half, dA(k,k), ldda,
268  dB(k+kb,k), lddb,
269  c_one, dA(k+kb, k), ldda);
270
272  n-k-kb, kb,
273  c_one, dB(k+kb,k+kb), lddb,
274  dA(k+kb,k), ldda);
275  }
276  }
277  }
278
279  magma_queue_sync( stream[0] );
280  }
281  else {
282  if (upper) {
283  /* Compute U*A*U' */
284  for(k = 0; k<n; k+=nb){
285  kb= min(n-k,nb);
286
287  magma_zgetmatrix_async( kb, kb,
288  dB(k, k), lddb,
289  B(0, 0), nb, stream[2] );
290
291  /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */
292  if(k>0){
294  k, kb,
295  c_one ,dB(0,0), lddb,
296  dA(0,k), ldda);
297
299  k, kb,
300  c_half, dA(k,k), ldda,
301  dB(0,k), lddb,
302  c_one, dA(0, k), ldda);
303
304  magma_queue_sync( stream[1] );
305  }
306
307  magma_zgetmatrix_async( kb, kb,
308  dA(k, k), ldda,
309  A(0, 0), lda, stream[0] );
310
311  if(k>0){
313  k, kb,
314  c_one, dA(0,k), ldda,
315  dB(0,k), lddb,
316  d_one, dA(0,0), ldda);
317
319  k, kb,
320  c_half, dA(k,k), ldda,
321  dB(0,k), lddb,
322  c_one, dA(0, k), ldda);
323
325  k, kb,
326  c_one, dB(k,k), lddb,
327  dA(0,k), ldda);
328  }
329
330  magma_queue_sync( stream[2] );
331  magma_queue_sync( stream[0] );
332
333  lapackf77_zhegst( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
334
335  magma_zsetmatrix_async( kb, kb,
336  A(0, 0), lda,
337  dA(k, k), ldda, stream[1] );
338  }
339
340  magma_queue_sync( stream[1] );
341  }
342  else {
343  /* Compute L'*A*L */
344  for(k = 0; k<n; k+=nb){
345  kb= min(n-k,nb);
346
347  magma_zgetmatrix_async( kb, kb,
348  dB(k, k), lddb,
349  B(0, 0), nb, stream[2] );
350
351  /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */
352  if(k>0){
354  kb, k,
355  c_one ,dB(0,0), lddb,
356  dA(k,0), ldda);
357
359  kb, k,
360  c_half, dA(k,k), ldda,
361  dB(k,0), lddb,
362  c_one, dA(k, 0), ldda);
363
364  magma_queue_sync( stream[1] );
365  }
366
367  magma_zgetmatrix_async( kb, kb,
368  dA(k, k), ldda,
369  A(0, 0), lda, stream[0] );
370
371  if(k>0){
373  k, kb,
374  c_one, dA(k,0), ldda,
375  dB(k,0), lddb,
376  d_one, dA(0,0), ldda);
377
379  kb, k,
380  c_half, dA(k,k), ldda,
381  dB(k,0), lddb,
382  c_one, dA(k, 0), ldda);
383
385  kb, k,
386  c_one, dB(k,k), lddb,
387  dA(k,0), ldda);
388  }
389
390  magma_queue_sync( stream[2] );
391  magma_queue_sync( stream[0] );
392
393  lapackf77_zhegst( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
394
395  magma_zsetmatrix_async( kb, kb,
396  A(0, 0), lda,
397  dA(k, k), ldda, stream[1] );
398  }
399
400  magma_queue_sync( stream[1] );
401  }
402  }
403  magma_queue_destroy( stream[0] );
404  magma_queue_destroy( stream[1] );
405  magma_queue_destroy( stream[2] );
406
407  magma_free_pinned( w );
408
409  return *info;
410 } /* magma_zhegst_gpu */
411 #undef A
412 #undef B
413 #undef dA
414 #undef dB
415
#define MAGMA_Z_HALF
Definition: magma.h:133
void magma_ztrmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
#define A(i, j)
Definition: zhegst_gpu.cpp:16
#define min(a, b)
Definition: common_magma.h:86
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
#define MagmaUpper
Definition: magma.h:61
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free_pinned(ptr)
Definition: magma.h:60
#define MAGMA_Z_NEG_HALF
Definition: magma.h:135
magma_int_t magma_zhegst_gpu(magma_int_t itype, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
void magma_zher2k(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
void magma_ztrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
#define MagmaLower
Definition: magma.h:62
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define B(i, j)
Definition: zhegst_gpu.cpp:17
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaConjTrans
Definition: magma.h:59
magma_int_t magma_get_zhegst_nb(magma_int_t m)
Definition: get_nb.cpp:552
#define dB(i, j)
Definition: zhegst_gpu.cpp:20
magma_int_t ldda
static magma_err_t magma_zmalloc_pinned(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:92
#define MagmaNonUnit
Definition: magma.h:65
#define MAGMA_SUCCESS
Definition: magma.h:106
#define dA(i, j)
Definition: zhegst_gpu.cpp:19
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define MagmaRight
Definition: magma.h:69
#define MAGMA_Z_ONE
Definition: magma.h:132
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_zhegst(const magma_int_t *itype, const char *uplo, const magma_int_t *n, magmaDoubleComplex *A, const magma_int_t *lda, magmaDoubleComplex *B, const magma_int_t *ldb, magma_int_t *info)
#define magma_queue_sync(queue)
Definition: magma.h:119