MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
chegst.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @author Raffaele Solca
9 
10  @generated c Thu May 10 22:27:02 2012
11 */
12 
13 #include "common_magma.h"
14 
15 #define A(i, j) (a+(j)*lda + (i))
16 #define B(i, j) (b+(j)*ldb + (i))
17 
18 #define dA(i, j) (dw+(j)*ldda + (i))
19 #define dB(i, j) (dw+n*ldda+(j)*lddb + (i))
20 
21 extern "C" magma_int_t
23  cuFloatComplex *a, magma_int_t lda,
24  cuFloatComplex *b, magma_int_t ldb, magma_int_t *info)
25 {
26 /*
27  -- MAGMA (version 1.2.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
30  Univ. of Colorado, Denver
31  May 2012
32 
33 
34  Purpose
35  =======
36 
37  CHEGST reduces a complex Hermitian-definite generalized
38  eigenproblem to standard form.
39 
40  If ITYPE = 1, the problem is A*x = lambda*B*x,
41  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
42 
43  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
44  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
45 
46  B must have been previously factorized as U**H*U or L*L**H by CPOTRF.
47 
48  Arguments
49  =========
50 
51  ITYPE (input) INTEGER
52  = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
53  = 2 or 3: compute U*A*U**H or L**H*A*L.
54 
55  UPLO (input) CHARACTER*1
56  = 'U': Upper triangle of A is stored and B is factored as
57  U**H*U;
58  = 'L': Lower triangle of A is stored and B is factored as
59  L*L**H.
60 
61  N (input) INTEGER
62  The order of the matrices A and B. N >= 0.
63 
64  A (input/output) COMPLEX*16 array, dimension (LDA,N)
65  On entry, the Hermitian matrix A. If UPLO = 'U', the leading
66  N-by-N upper triangular part of A contains the upper
67  triangular part of the matrix A, and the strictly lower
68  triangular part of A is not referenced. If UPLO = 'L', the
69  leading N-by-N lower triangular part of A contains the lower
70  triangular part of the matrix A, and the strictly upper
71  triangular part of A is not referenced.
72 
73  On exit, if INFO = 0, the transformed matrix, stored in the
74  same format as A.
75 
76  LDA (input) INTEGER
77  The leading dimension of the array A. LDA >= max(1,N).
78 
79  B (input) COMPLEX*16 array, dimension (LDB,N)
80  The triangular factor from the Cholesky factorization of B,
81  as returned by CPOTRF.
82 
83  LDB (input) INTEGER
84  The leading dimension of the array B. LDB >= max(1,N).
85 
86  INFO (output) INTEGER
87  = 0: successful exit
88  < 0: if INFO = -i, the i-th argument had an illegal value
89 
90  =====================================================================*/
91 
92  char uplo_[2] = {uplo, 0};
93  magma_int_t nb;
94  magma_int_t k, kb, kb2;
95  cuFloatComplex c_one = MAGMA_C_ONE;
96  cuFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
97  cuFloatComplex c_half = MAGMA_C_HALF;
98  cuFloatComplex c_neg_half = MAGMA_C_NEG_HALF;
99  cuFloatComplex *dw;
100  magma_int_t ldda = n;
101  magma_int_t lddb = n;
102  float d_one = 1.0;
103  long int upper = lapackf77_lsame(uplo_, "U");
104 
105  /* Test the input parameters. */
106  *info = 0;
107  if (itype<1 || itype>3){
108  *info = -1;
109  }else if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) {
110  *info = -2;
111  } else if (n < 0) {
112  *info = -3;
113  } else if (lda < max(1,n)) {
114  *info = -5;
115  }else if (ldb < max(1,n)) {
116  *info = -7;
117  }
118  if (*info != 0) {
119  magma_xerbla( __func__, -(*info) );
120  return *info;
121  }
122 
123  /* Quick return */
124  if ( n == 0 )
125  return *info;
126 
127  if (MAGMA_SUCCESS != magma_cmalloc( &dw, 2*n*n )) {
128  *info = MAGMA_ERR_DEVICE_ALLOC;
129  return *info;
130  }
131 
132  nb = magma_get_chegst_nb(n);
133 
134  static cudaStream_t stream[2];
135  magma_queue_create( &stream[0] );
136  magma_queue_create( &stream[1] );
137 
138  magma_csetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
139  magma_csetmatrix( n, n, B(0, 0), ldb, dB(0, 0), lddb );
140 
141  /* Use hybrid blocked code */
142 
143  if (itype==1) {
144  if (upper) {
145 
146  /* Compute inv(U')*A*inv(U) */
147 
148  for(k = 0; k<n; k+=nb){
149  kb = min(n-k,nb);
150  kb2= min(n-k-nb,nb);
151 
152  /* Update the upper triangle of A(k:n,k:n) */
153 
154  lapackf77_chegs2( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
155 
156  magma_csetmatrix_async( kb, kb,
157  A(k, k), lda,
158  dA(k, k), ldda, stream[0] );
159 
160  if(k+kb<n){
161 
163  kb, n-k-kb,
164  c_one, dB(k,k), lddb,
165  dA(k,k+kb), ldda);
166 
167  magma_queue_sync( stream[0] );
168 
170  kb, n-k-kb,
171  c_neg_half, dA(k,k), ldda,
172  dB(k,k+kb), lddb,
173  c_one, dA(k, k+kb), ldda);
174 
176  n-k-kb, kb,
177  c_neg_one, dA(k,k+kb), ldda,
178  dB(k,k+kb), lddb,
179  d_one, dA(k+kb,k+kb), ldda);
180 
181  magma_cgetmatrix_async( kb2, kb2,
182  dA(k+kb, k+kb), ldda,
183  A(k+kb, k+kb), lda, stream[1] );
184 
186  kb, n-k-kb,
187  c_neg_half, dA(k,k), ldda,
188  dB(k,k+kb), lddb,
189  c_one, dA(k, k+kb), ldda);
190 
192  kb, n-k-kb,
193  c_one ,dB(k+kb,k+kb), lddb,
194  dA(k,k+kb), ldda);
195 
196  magma_queue_sync( stream[1] );
197 
198  }
199 
200  }
201 
202  magma_queue_sync( stream[0] );
203 
204  } else {
205 
206  /* Compute inv(L)*A*inv(L') */
207 
208  for(k = 0; k<n; k+=nb){
209  kb= min(n-k,nb);
210  kb2= min(n-k-nb,nb);
211 
212  /* Update the lower triangle of A(k:n,k:n) */
213 
214  lapackf77_chegs2( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
215 
216  magma_csetmatrix_async( kb, kb,
217  A(k, k), lda,
218  dA(k, k), ldda, stream[0] );
219 
220  if(k+kb<n){
221 
223  n-k-kb, kb,
224  c_one, dB(k,k), lddb,
225  dA(k+kb,k), ldda);
226 
227  magma_queue_sync( stream[0] );
228 
230  n-k-kb, kb,
231  c_neg_half, dA(k,k), ldda,
232  dB(k+kb,k), lddb,
233  c_one, dA(k+kb, k), ldda);
234 
236  n-k-kb, kb,
237  c_neg_one, dA(k+kb,k), ldda,
238  dB(k+kb,k), lddb,
239  d_one, dA(k+kb,k+kb), ldda);
240 
241  magma_cgetmatrix_async( kb2, kb2,
242  dA(k+kb, k+kb), ldda,
243  A(k+kb, k+kb), lda, stream[1] );
244 
246  n-k-kb, kb,
247  c_neg_half, dA(k,k), ldda,
248  dB(k+kb,k), lddb,
249  c_one, dA(k+kb, k), ldda);
250 
252  n-k-kb, kb,
253  c_one, dB(k+kb,k+kb), lddb,
254  dA(k+kb,k), ldda);
255  }
256 
257  magma_queue_sync( stream[1] );
258 
259  }
260 
261  }
262 
263  magma_queue_sync( stream[0] );
264 
265  } else {
266 
267  if (upper) {
268 
269  /* Compute U*A*U' */
270 
271  for(k = 0; k<n; k+=nb){
272  kb= min(n-k,nb);
273 
274  magma_cgetmatrix_async( kb, kb,
275  dA(k, k), ldda,
276  A(k, k), lda, stream[0] );
277 
278  /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */
279  if(k>0){
280 
282  k, kb,
283  c_one ,dB(0,0), lddb,
284  dA(0,k), ldda);
285 
287  k, kb,
288  c_half, dA(k,k), ldda,
289  dB(0,k), lddb,
290  c_one, dA(0, k), ldda);
291 
292  magma_queue_sync( stream[1] );
293 
295  k, kb,
296  c_one, dA(0,k), ldda,
297  dB(0,k), lddb,
298  d_one, dA(0,0), ldda);
299 
301  k, kb,
302  c_half, dA(k,k), ldda,
303  dB(0,k), lddb,
304  c_one, dA(0, k), ldda);
305 
307  k, kb,
308  c_one, dB(k,k), lddb,
309  dA(0,k), ldda);
310 
311  }
312 
313  magma_queue_sync( stream[0] );
314 
315  lapackf77_chegs2( &itype, uplo_, &kb, A(k, k), &lda, B(k, k), &ldb, info);
316 
317  magma_csetmatrix_async( kb, kb,
318  A(k, k), lda,
319  dA(k, k), ldda, stream[1] );
320 
321  }
322 
323  magma_queue_sync( stream[1] );
324 
325  } else {
326 
327  /* Compute L'*A*L */
328 
329  for(k = 0; k<n; k+=nb){
330  kb= min(n-k,nb);
331 
332  magma_cgetmatrix_async( kb, kb,
333  dA(k, k), ldda,
334  A(k, k), lda, stream[0] );
335 
336  /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */
337  if(k>0){
338 
340  kb, k,
341  c_one ,dB(0,0), lddb,
342  dA(k,0), ldda);
343 
345  kb, k,
346  c_half, dA(k,k), ldda,
347  dB(k,0), lddb,
348  c_one, dA(k, 0), ldda);
349 
350  magma_queue_sync( stream[1] );
351 
353  k, kb,
354  c_one, dA(k,0), ldda,
355  dB(k,0), lddb,
356  d_one, dA(0,0), ldda);
357 
359  kb, k,
360  c_half, dA(k,k), ldda,
361  dB(k,0), lddb,
362  c_one, dA(k, 0), ldda);
363 
365  kb, k,
366  c_one, dB(k,k), lddb,
367  dA(k,0), ldda);
368  }
369 
370  magma_queue_sync( stream[0] );
371 
372  lapackf77_chegs2( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
373 
374  magma_csetmatrix_async( kb, kb,
375  A(k, k), lda,
376  dA(k, k), ldda, stream[1] );
377  }
378 
379  magma_queue_sync( stream[1] );
380 
381  }
382  }
383 
384  magma_cgetmatrix( n, n, dA(0, 0), ldda, A(0, 0), lda );
385 
386  magma_queue_destroy( stream[0] );
387  magma_queue_destroy( stream[1] );
388 
389  magma_free( dw );
390 
391  return *info;
392 } /* magma_chegst_gpu */