MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zhegst_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 
8  @author Raffaele Solca
9  @author Azzam Haidar
10 
11  @precisions normal z -> s d c
12 */
13 
14 #include "common_magma.h"
15 
16 #define A(i, j) (w + (j)*lda + (i))
17 #define B(i, j) (w+nb*lda + (j)*ldb + (i))
18 
19 #define dA(i, j) (da + (j)*ldda + (i))
20 #define dB(i, j) (db + (j)*lddb + (i))
21 
22 extern "C" magma_int_t
24  magmaDoubleComplex *da, magma_int_t ldda,
25  magmaDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
26 {
27 /* -- MAGMA (version 1.4.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
30  Univ. of Colorado, Denver
31  August 2013
32 
33  Purpose
34  =======
35  ZHEGST_GPU reduces a complex Hermitian-definite generalized
36  eigenproblem to standard form.
37 
38  If ITYPE = 1, the problem is A*x = lambda*B*x,
39  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
40 
41  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
42  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
43 
44  B must have been previously factorized as U**H*U or L*L**H by ZPOTRF.
45 
46  Arguments
47  =========
48  ITYPE (input) INTEGER
49  = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
50  = 2 or 3: compute U*A*U**H or L**H*A*L.
51 
52  UPLO (input) CHARACTER*1
53  = 'U': Upper triangle of A is stored and B is factored as
54  U**H*U;
55  = 'L': Lower triangle of A is stored and B is factored as
56  L*L**H.
57 
58  N (input) INTEGER
59  The order of the matrices A and B. N >= 0.
60 
61  DA (device input/output) COMPLEX_16 array, dimension (LDA,N)
62  On entry, the Hermitian matrix A. If UPLO = 'U', the leading
63  N-by-N upper triangular part of A contains the upper
64  triangular part of the matrix A, and the strictly lower
65  triangular part of A is not referenced. If UPLO = 'L', the
66  leading N-by-N lower triangular part of A contains the lower
67  triangular part of the matrix A, and the strictly upper
68  triangular part of A is not referenced.
69 
70  On exit, if INFO = 0, the transformed matrix, stored in the
71  same format as A.
72 
73  LDDA (input) INTEGER
74  The leading dimension of the array A. LDA >= max(1,N).
75 
76  DB (device input) COMPLEX_16 array, dimension (LDB,N)
77  The triangular factor from the Cholesky factorization of B,
78  as returned by ZPOTRF.
79 
80  LDDB (input) INTEGER
81  The leading dimension of the array B. LDB >= max(1,N).
82 
83  INFO (output) INTEGER
84  = 0: successful exit
85  < 0: if INFO = -i, the i-th argument had an illegal value
86  =====================================================================*/
87 
88  char uplo_[2] = {uplo, 0};
89  magma_int_t nb;
90  magma_int_t k, kb, kb2;
91  magmaDoubleComplex c_one = MAGMA_Z_ONE;
92  magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
93  magmaDoubleComplex c_half = MAGMA_Z_HALF;
94  magmaDoubleComplex c_neg_half = MAGMA_Z_NEG_HALF;
95  magmaDoubleComplex *w;
96  magma_int_t lda;
97  magma_int_t ldb;
98  double d_one = 1.0;
99  int upper = lapackf77_lsame(uplo_, "U");
100 
101  /* Test the input parameters. */
102  *info = 0;
103  if (itype<1 || itype>3){
104  *info = -1;
105  }else if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) {
106  *info = -2;
107  } else if (n < 0) {
108  *info = -3;
109  } else if (ldda < max(1,n)) {
110  *info = -5;
111  }else if (lddb < max(1,n)) {
112  *info = -7;
113  }
114  if (*info != 0) {
115  magma_xerbla( __func__, -(*info) );
116  return *info;
117  }
118 
119  /* Quick return */
120  if ( n == 0 )
121  return *info;
122 
123  nb = magma_get_zhegst_nb(n);
124 
125  lda = nb;
126  ldb = nb;
127 
128  if (MAGMA_SUCCESS != magma_zmalloc_pinned( &w, 2*nb*nb )) {
129  *info = MAGMA_ERR_DEVICE_ALLOC;
130  return *info;
131  }
132 
133  magma_queue_t stream[3];
134  magma_queue_create( &stream[0] );
135  magma_queue_create( &stream[1] );
136  magma_queue_create( &stream[2] );
137 
138  /* Use hybrid blocked code */
139  if (itype==1) {
140  if (upper) {
141  kb = min(n,nb);
142 
143  /* Compute inv(U')*A*inv(U) */
144  magma_zgetmatrix_async( kb, kb,
145  dB(0, 0), lddb,
146  B(0, 0), nb, stream[2] );
147  magma_zgetmatrix_async( kb, kb,
148  dA(0, 0), ldda,
149  A(0, 0), nb, stream[1] );
150 
151  for(k = 0; k<n; k+=nb){
152  kb = min(n-k,nb);
153  kb2= min(n-k-nb,nb);
154 
155  /* Update the upper triangle of A(k:n,k:n) */
156 
157  magma_queue_sync( stream[2] );
158  magma_queue_sync( stream[1] );
159 
160  lapackf77_zhegst( &itype, uplo_, &kb, A(0,0), &lda, B(0,0), &ldb, info);
161 
162  magma_zsetmatrix_async( kb, kb,
163  A(0, 0), lda,
164  dA(k, k), ldda, stream[0] );
165 
166  if(k+kb<n){
167  // Start copying the new B block
168  magma_zgetmatrix_async( kb2, kb2,
169  dB(k+kb, k+kb), lddb,
170  B(0, 0), nb, stream[2] );
171 
173  kb, n-k-kb,
174  c_one, dB(k,k), lddb,
175  dA(k,k+kb), ldda);
176 
177  magma_queue_sync( stream[0] );
178 
180  kb, n-k-kb,
181  c_neg_half, dA(k,k), ldda,
182  dB(k,k+kb), lddb,
183  c_one, dA(k, k+kb), ldda);
184 
186  n-k-kb, kb,
187  c_neg_one, dA(k,k+kb), ldda,
188  dB(k,k+kb), lddb,
189  d_one, dA(k+kb,k+kb), ldda);
190 
191  magma_zgetmatrix_async( kb2, kb2,
192  dA(k+kb, k+kb), ldda,
193  A(0, 0), lda, stream[1] );
194 
196  kb, n-k-kb,
197  c_neg_half, dA(k,k), ldda,
198  dB(k,k+kb), lddb,
199  c_one, dA(k, k+kb), ldda);
200 
202  kb, n-k-kb,
203  c_one ,dB(k+kb,k+kb), lddb,
204  dA(k,k+kb), ldda);
205  }
206  }
207 
208  magma_queue_sync( stream[0] );
209  }
210  else {
211  kb = min(n,nb);
212 
213  /* Compute inv(L)*A*inv(L') */
214  magma_zgetmatrix_async( kb, kb,
215  dB(0, 0), lddb,
216  B(0, 0), nb, stream[2] );
217  magma_zgetmatrix_async( kb, kb,
218  dA(0, 0), ldda,
219  A(0, 0), nb, stream[1] );
220 
221  for(k = 0; k<n; k+=nb){
222  kb= min(n-k,nb);
223  kb2= min(n-k-nb,nb);
224 
225  /* Update the lower triangle of A(k:n,k:n) */
226 
227  magma_queue_sync( stream[2] );
228  magma_queue_sync( stream[1] );
229 
230  lapackf77_zhegst( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
231 
232  magma_zsetmatrix_async( kb, kb,
233  A(0, 0), lda,
234  dA(k, k), ldda, stream[0] );
235 
236  if(k+kb<n){
237  // Start copying the new B block
238  magma_zgetmatrix_async( kb2, kb2,
239  dB(k+kb, k+kb), lddb,
240  B(0, 0), nb, stream[2] );
241 
243  n-k-kb, kb,
244  c_one, dB(k,k), lddb,
245  dA(k+kb,k), ldda);
246 
247  magma_queue_sync( stream[0] );
248 
250  n-k-kb, kb,
251  c_neg_half, dA(k,k), ldda,
252  dB(k+kb,k), lddb,
253  c_one, dA(k+kb, k), ldda);
254 
256  n-k-kb, kb,
257  c_neg_one, dA(k+kb,k), ldda,
258  dB(k+kb,k), lddb,
259  d_one, dA(k+kb,k+kb), ldda);
260 
261  magma_zgetmatrix_async( kb2, kb2,
262  dA(k+kb, k+kb), ldda,
263  A(0, 0), lda, stream[1] );
264 
266  n-k-kb, kb,
267  c_neg_half, dA(k,k), ldda,
268  dB(k+kb,k), lddb,
269  c_one, dA(k+kb, k), ldda);
270 
272  n-k-kb, kb,
273  c_one, dB(k+kb,k+kb), lddb,
274  dA(k+kb,k), ldda);
275  }
276  }
277  }
278 
279  magma_queue_sync( stream[0] );
280  }
281  else {
282  if (upper) {
283  /* Compute U*A*U' */
284  for(k = 0; k<n; k+=nb){
285  kb= min(n-k,nb);
286 
287  magma_zgetmatrix_async( kb, kb,
288  dB(k, k), lddb,
289  B(0, 0), nb, stream[2] );
290 
291  /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */
292  if(k>0){
294  k, kb,
295  c_one ,dB(0,0), lddb,
296  dA(0,k), ldda);
297 
299  k, kb,
300  c_half, dA(k,k), ldda,
301  dB(0,k), lddb,
302  c_one, dA(0, k), ldda);
303 
304  magma_queue_sync( stream[1] );
305  }
306 
307  magma_zgetmatrix_async( kb, kb,
308  dA(k, k), ldda,
309  A(0, 0), lda, stream[0] );
310 
311  if(k>0){
313  k, kb,
314  c_one, dA(0,k), ldda,
315  dB(0,k), lddb,
316  d_one, dA(0,0), ldda);
317 
319  k, kb,
320  c_half, dA(k,k), ldda,
321  dB(0,k), lddb,
322  c_one, dA(0, k), ldda);
323 
325  k, kb,
326  c_one, dB(k,k), lddb,
327  dA(0,k), ldda);
328  }
329 
330  magma_queue_sync( stream[2] );
331  magma_queue_sync( stream[0] );
332 
333  lapackf77_zhegst( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
334 
335  magma_zsetmatrix_async( kb, kb,
336  A(0, 0), lda,
337  dA(k, k), ldda, stream[1] );
338  }
339 
340  magma_queue_sync( stream[1] );
341  }
342  else {
343  /* Compute L'*A*L */
344  for(k = 0; k<n; k+=nb){
345  kb= min(n-k,nb);
346 
347  magma_zgetmatrix_async( kb, kb,
348  dB(k, k), lddb,
349  B(0, 0), nb, stream[2] );
350 
351  /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */
352  if(k>0){
354  kb, k,
355  c_one ,dB(0,0), lddb,
356  dA(k,0), ldda);
357 
359  kb, k,
360  c_half, dA(k,k), ldda,
361  dB(k,0), lddb,
362  c_one, dA(k, 0), ldda);
363 
364  magma_queue_sync( stream[1] );
365  }
366 
367  magma_zgetmatrix_async( kb, kb,
368  dA(k, k), ldda,
369  A(0, 0), lda, stream[0] );
370 
371  if(k>0){
373  k, kb,
374  c_one, dA(k,0), ldda,
375  dB(k,0), lddb,
376  d_one, dA(0,0), ldda);
377 
379  kb, k,
380  c_half, dA(k,k), ldda,
381  dB(k,0), lddb,
382  c_one, dA(k, 0), ldda);
383 
385  kb, k,
386  c_one, dB(k,k), lddb,
387  dA(k,0), ldda);
388  }
389 
390  magma_queue_sync( stream[2] );
391  magma_queue_sync( stream[0] );
392 
393  lapackf77_zhegst( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
394 
395  magma_zsetmatrix_async( kb, kb,
396  A(0, 0), lda,
397  dA(k, k), ldda, stream[1] );
398  }
399 
400  magma_queue_sync( stream[1] );
401  }
402  }
403  magma_queue_destroy( stream[0] );
404  magma_queue_destroy( stream[1] );
405  magma_queue_destroy( stream[2] );
406 
407  magma_free_pinned( w );
408 
409  return *info;
410 } /* magma_zhegst_gpu */
411 #undef A
412 #undef B
413 #undef dA
414 #undef dB
415 
#define MAGMA_Z_HALF
Definition: magma.h:133
void magma_ztrmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
#define A(i, j)
Definition: zhegst_gpu.cpp:16
#define min(a, b)
Definition: common_magma.h:86
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
#define MagmaUpper
Definition: magma.h:61
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free_pinned(ptr)
Definition: magma.h:60
#define MAGMA_Z_NEG_HALF
Definition: magma.h:135
magma_int_t magma_zhegst_gpu(magma_int_t itype, char uplo, magma_int_t n, cuDoubleComplex *da, magma_int_t ldda, cuDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
void magma_zher2k(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
void magma_ztrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
#define MagmaLower
Definition: magma.h:62
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define B(i, j)
Definition: zhegst_gpu.cpp:17
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaConjTrans
Definition: magma.h:59
magma_int_t magma_get_zhegst_nb(magma_int_t m)
Definition: get_nb.cpp:552
#define dB(i, j)
Definition: zhegst_gpu.cpp:20
void magma_zhemm(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_const_ptr dB, magma_int_t lddb, magmaDoubleComplex beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
magma_int_t ldda
static magma_err_t magma_zmalloc_pinned(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:92
#define MagmaNonUnit
Definition: magma.h:65
#define MAGMA_SUCCESS
Definition: magma.h:106
#define dA(i, j)
Definition: zhegst_gpu.cpp:19
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define MagmaRight
Definition: magma.h:69
#define MAGMA_Z_ONE
Definition: magma.h:132
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
void lapackf77_zhegst(const magma_int_t *itype, const char *uplo, const magma_int_t *n, magmaDoubleComplex *A, const magma_int_t *lda, magmaDoubleComplex *B, const magma_int_t *ldb, magma_int_t *info)
#define magma_queue_sync(queue)
Definition: magma.h:119