MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zhegst_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @author Raffaele Solca
9 
10  @precisions normal z -> s d c
11 */
12 
13 #include "common_magma.h"
14 
15 #define A(i, j) (w + (j)*lda + (i))
16 #define B(i, j) (w+nb*lda + (j)*ldb + (i))
17 
18 #define dA(i, j) (da + (j)*ldda + (i))
19 #define dB(i, j) (db + (j)*lddb + (i))
20 
21 extern "C" magma_int_t
23  cuDoubleComplex *da, magma_int_t ldda,
24  cuDoubleComplex *db, magma_int_t lddb, magma_int_t *info)
25 {
26 /*
27  -- MAGMA (version 1.2.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
30  Univ. of Colorado, Denver
31  May 2012
32 
33  Purpose
34  =======
35  ZHEGST_GPU reduces a complex Hermitian-definite generalized
36  eigenproblem to standard form.
37 
38  If ITYPE = 1, the problem is A*x = lambda*B*x,
39  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
40 
41  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
42  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
43 
44  B must have been previously factorized as U**H*U or L*L**H by ZPOTRF.
45 
46  Arguments
47  =========
48  ITYPE (input) INTEGER
49  = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
50  = 2 or 3: compute U*A*U**H or L**H*A*L.
51 
52  UPLO (input) CHARACTER*1
53  = 'U': Upper triangle of A is stored and B is factored as
54  U**H*U;
55  = 'L': Lower triangle of A is stored and B is factored as
56  L*L**H.
57 
58  N (input) INTEGER
59  The order of the matrices A and B. N >= 0.
60 
61  DA (device input/output) COMPLEX*16 array, dimension (LDA,N)
62  On entry, the Hermitian matrix A. If UPLO = 'U', the leading
63  N-by-N upper triangular part of A contains the upper
64  triangular part of the matrix A, and the strictly lower
65  triangular part of A is not referenced. If UPLO = 'L', the
66  leading N-by-N lower triangular part of A contains the lower
67  triangular part of the matrix A, and the strictly upper
68  triangular part of A is not referenced.
69 
70  On exit, if INFO = 0, the transformed matrix, stored in the
71  same format as A.
72 
73  LDDA (input) INTEGER
74  The leading dimension of the array A. LDA >= max(1,N).
75 
76  DB (device input) COMPLEX*16 array, dimension (LDB,N)
77  The triangular factor from the Cholesky factorization of B,
78  as returned by ZPOTRF.
79 
80  LDDB (input) INTEGER
81  The leading dimension of the array B. LDB >= max(1,N).
82 
83  INFO (output) INTEGER
84  = 0: successful exit
85  < 0: if INFO = -i, the i-th argument had an illegal value
86  =====================================================================*/
87 
88  char uplo_[2] = {uplo, 0};
89  magma_int_t nb;
90  magma_int_t k, kb, kb2;
91  cuDoubleComplex c_one = MAGMA_Z_ONE;
92  cuDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
93  cuDoubleComplex c_half = MAGMA_Z_HALF;
94  cuDoubleComplex c_neg_half = MAGMA_Z_NEG_HALF;
95  cuDoubleComplex *w;
96  magma_int_t lda;
97  magma_int_t ldb;
98  double d_one = 1.0;
99  long int upper = lapackf77_lsame(uplo_, "U");
100 
101  /* Test the input parameters. */
102  *info = 0;
103  if (itype<1 || itype>3){
104  *info = -1;
105  }else if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) {
106  *info = -2;
107  } else if (n < 0) {
108  *info = -3;
109  } else if (ldda < max(1,n)) {
110  *info = -5;
111  }else if (lddb < max(1,n)) {
112  *info = -7;
113  }
114  if (*info != 0) {
115  magma_xerbla( __func__, -(*info) );
116  return *info;
117  }
118 
119  /* Quick return */
120  if ( n == 0 )
121  return *info;
122 
123  nb = magma_get_zhegst_nb(n);
124 
125  lda = nb;
126  ldb = nb;
127 
128  if (MAGMA_SUCCESS != magma_zmalloc_host( &w, 2*nb*nb )) {
129  *info = MAGMA_ERR_DEVICE_ALLOC;
130  return *info;
131  }
132 
133  static cudaStream_t stream[3];
134  magma_queue_create( &stream[0] );
135  magma_queue_create( &stream[1] );
136  magma_queue_create( &stream[2] );
137 
138  /* Use hybrid blocked code */
139  if (itype==1)
140  {
141  if (upper)
142  {
143  kb = min(n,nb);
144 
145  /* Compute inv(U')*A*inv(U) */
146  magma_zgetmatrix_async( kb, kb,
147  dB(0, 0), lddb,
148  B(0, 0), nb, stream[2] );
149  magma_zgetmatrix_async( kb, kb,
150  dA(0, 0), ldda,
151  A(0, 0), nb, stream[1] );
152 
153  for(k = 0; k<n; k+=nb){
154  kb = min(n-k,nb);
155  kb2= min(n-k-nb,nb);
156 
157  /* Update the upper triangle of A(k:n,k:n) */
158 
159  magma_queue_sync( stream[2] );
160  magma_queue_sync( stream[1] );
161 
162  lapackf77_zhegs2( &itype, uplo_, &kb, A(0,0), &lda, B(0,0), &ldb, info);
163 
164  magma_zsetmatrix_async( kb, kb,
165  A(0, 0), lda,
166  dA(k, k), ldda, stream[0] );
167 
168  if(k+kb<n){
169 
170  // Start copying the new B block
171  magma_zgetmatrix_async( kb2, kb2,
172  dB(k+kb, k+kb), lddb,
173  B(0, 0), nb, stream[2] );
174 
176  kb, n-k-kb,
177  c_one, dB(k,k), lddb,
178  dA(k,k+kb), ldda);
179 
180  magma_queue_sync( stream[0] );
181 
183  kb, n-k-kb,
184  c_neg_half, dA(k,k), ldda,
185  dB(k,k+kb), lddb,
186  c_one, dA(k, k+kb), ldda);
187 
189  n-k-kb, kb,
190  c_neg_one, dA(k,k+kb), ldda,
191  dB(k,k+kb), lddb,
192  d_one, dA(k+kb,k+kb), ldda);
193 
194  magma_zgetmatrix_async( kb2, kb2,
195  dA(k+kb, k+kb), ldda,
196  A(0, 0), lda, stream[1] );
197 
199  kb, n-k-kb,
200  c_neg_half, dA(k,k), ldda,
201  dB(k,k+kb), lddb,
202  c_one, dA(k, k+kb), ldda);
203 
205  kb, n-k-kb,
206  c_one ,dB(k+kb,k+kb), lddb,
207  dA(k,k+kb), ldda);
208 
209  }
210 
211  }
212 
213  magma_queue_sync( stream[0] );
214 
215  } else {
216 
217  kb = min(n,nb);
218 
219  /* Compute inv(L)*A*inv(L') */
220 
221  magma_zgetmatrix_async( kb, kb,
222  dB(0, 0), lddb,
223  B(0, 0), nb, stream[2] );
224  magma_zgetmatrix_async( kb, kb,
225  dA(0, 0), ldda,
226  A(0, 0), nb, stream[1] );
227 
228  for(k = 0; k<n; k+=nb){
229  kb= min(n-k,nb);
230  kb2= min(n-k-nb,nb);
231 
232  /* Update the lower triangle of A(k:n,k:n) */
233 
234  magma_queue_sync( stream[2] );
235  magma_queue_sync( stream[1] );
236 
237  lapackf77_zhegs2( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
238 
239  magma_zsetmatrix_async( kb, kb,
240  A(0, 0), lda,
241  dA(k, k), ldda, stream[0] );
242 
243  if(k+kb<n){
244 
245  // Start copying the new B block
246  magma_zgetmatrix_async( kb2, kb2,
247  dB(k+kb, k+kb), lddb,
248  B(0, 0), nb, stream[2] );
249 
251  n-k-kb, kb,
252  c_one, dB(k,k), lddb,
253  dA(k+kb,k), ldda);
254 
255  magma_queue_sync( stream[0] );
256 
258  n-k-kb, kb,
259  c_neg_half, dA(k,k), ldda,
260  dB(k+kb,k), lddb,
261  c_one, dA(k+kb, k), ldda);
262 
264  n-k-kb, kb,
265  c_neg_one, dA(k+kb,k), ldda,
266  dB(k+kb,k), lddb,
267  d_one, dA(k+kb,k+kb), ldda);
268 
269  magma_zgetmatrix_async( kb2, kb2,
270  dA(k+kb, k+kb), ldda,
271  A(0, 0), lda, stream[1] );
272 
274  n-k-kb, kb,
275  c_neg_half, dA(k,k), ldda,
276  dB(k+kb,k), lddb,
277  c_one, dA(k+kb, k), ldda);
278 
280  n-k-kb, kb,
281  c_one, dB(k+kb,k+kb), lddb,
282  dA(k+kb,k), ldda);
283  }
284 
285  }
286 
287  }
288 
289  magma_queue_sync( stream[0] );
290 
291  } else {
292 
293  if (upper) {
294 
295  /* Compute U*A*U' */
296 
297  for(k = 0; k<n; k+=nb){
298  kb= min(n-k,nb);
299 
300  magma_zgetmatrix_async( kb, kb,
301  dB(k, k), lddb,
302  B(0, 0), nb, stream[2] );
303 
304  /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */
305  if(k>0){
306 
308  k, kb,
309  c_one ,dB(0,0), lddb,
310  dA(0,k), ldda);
311 
313  k, kb,
314  c_half, dA(k,k), ldda,
315  dB(0,k), lddb,
316  c_one, dA(0, k), ldda);
317 
318  magma_queue_sync( stream[1] );
319 
320  }
321 
322  magma_zgetmatrix_async( kb, kb,
323  dA(k, k), ldda,
324  A(0, 0), lda, stream[0] );
325 
326  if(k>0){
327 
329  k, kb,
330  c_one, dA(0,k), ldda,
331  dB(0,k), lddb,
332  d_one, dA(0,0), ldda);
333 
335  k, kb,
336  c_half, dA(k,k), ldda,
337  dB(0,k), lddb,
338  c_one, dA(0, k), ldda);
339 
341  k, kb,
342  c_one, dB(k,k), lddb,
343  dA(0,k), ldda);
344 
345  }
346 
347  magma_queue_sync( stream[2] );
348  magma_queue_sync( stream[0] );
349 
350  lapackf77_zhegs2( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
351 
352  magma_zsetmatrix_async( kb, kb,
353  A(0, 0), lda,
354  dA(k, k), ldda, stream[1] );
355 
356  }
357 
358  magma_queue_sync( stream[1] );
359 
360  } else {
361 
362  /* Compute L'*A*L */
363 
364  for(k = 0; k<n; k+=nb){
365  kb= min(n-k,nb);
366 
367  magma_zgetmatrix_async( kb, kb,
368  dB(k, k), lddb,
369  B(0, 0), nb, stream[2] );
370 
371  /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */
372  if(k>0){
373 
375  kb, k,
376  c_one ,dB(0,0), lddb,
377  dA(k,0), ldda);
378 
380  kb, k,
381  c_half, dA(k,k), ldda,
382  dB(k,0), lddb,
383  c_one, dA(k, 0), ldda);
384 
385  magma_queue_sync( stream[1] );
386 
387  }
388 
389  magma_zgetmatrix_async( kb, kb,
390  dA(k, k), ldda,
391  A(0, 0), lda, stream[0] );
392 
393  if(k>0){
394 
396  k, kb,
397  c_one, dA(k,0), ldda,
398  dB(k,0), lddb,
399  d_one, dA(0,0), ldda);
400 
402  kb, k,
403  c_half, dA(k,k), ldda,
404  dB(k,0), lddb,
405  c_one, dA(k, 0), ldda);
406 
408  kb, k,
409  c_one, dB(k,k), lddb,
410  dA(k,0), ldda);
411  }
412 
413  magma_queue_sync( stream[2] );
414  magma_queue_sync( stream[0] );
415 
416  lapackf77_zhegs2( &itype, uplo_, &kb, A(0, 0), &lda, B(0, 0), &ldb, info);
417 
418  magma_zsetmatrix_async( kb, kb,
419  A(0, 0), lda,
420  dA(k, k), ldda, stream[1] );
421  }
422 
423  magma_queue_sync( stream[1] );
424 
425  }
426  }
427  magma_queue_destroy( stream[0] );
428  magma_queue_destroy( stream[1] );
429  magma_queue_destroy( stream[2] );
430 
431  magma_free_host( w );
432 
433  return *info;
434 } /* magma_zhegst_gpu */