MAGMA  1.2.0 MatrixAlgebraonGPUandMulticoreArchitectures
dsygst.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  May 2012
7
8  @author Raffaele Solca
9
10  @generated d Thu May 10 22:27:02 2012
11 */
12
13 #include "common_magma.h"
14
15 #define A(i, j) (a+(j)*lda + (i))
16 #define B(i, j) (b+(j)*ldb + (i))
17
18 #define dA(i, j) (dw+(j)*ldda + (i))
19 #define dB(i, j) (dw+n*ldda+(j)*lddb + (i))
20
21 extern "C" magma_int_t
23  double *a, magma_int_t lda,
24  double *b, magma_int_t ldb, magma_int_t *info)
25 {
26 /*
27  -- MAGMA (version 1.2.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
31  May 2012
32
33
34  Purpose
35  =======
36
37  DSYGST reduces a real symmetric-definite generalized
38  eigenproblem to standard form.
39
40  If ITYPE = 1, the problem is A*x = lambda*B*x,
41  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
42
43  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
44  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
45
46  B must have been previously factorized as U**T*U or L*L**T by DPOTRF.
47
48  Arguments
49  =========
50
51  ITYPE (input) INTEGER
52  = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);
53  = 2 or 3: compute U*A*U**T or L**T*A*L.
54
55  UPLO (input) CHARACTER*1
56  = 'U': Upper triangle of A is stored and B is factored as
57  U**T*U;
58  = 'L': Lower triangle of A is stored and B is factored as
59  L*L**T.
60
61  N (input) INTEGER
62  The order of the matrices A and B. N >= 0.
63
64  A (input/output) COMPLEX*16 array, dimension (LDA,N)
65  On entry, the symmetric matrix A. If UPLO = 'U', the leading
66  N-by-N upper triangular part of A contains the upper
67  triangular part of the matrix A, and the strictly lower
68  triangular part of A is not referenced. If UPLO = 'L', the
69  leading N-by-N lower triangular part of A contains the lower
70  triangular part of the matrix A, and the strictly upper
71  triangular part of A is not referenced.
72
73  On exit, if INFO = 0, the transformed matrix, stored in the
74  same format as A.
75
76  LDA (input) INTEGER
77  The leading dimension of the array A. LDA >= max(1,N).
78
79  B (input) COMPLEX*16 array, dimension (LDB,N)
80  The triangular factor from the Cholesky factorization of B,
81  as returned by DPOTRF.
82
83  LDB (input) INTEGER
84  The leading dimension of the array B. LDB >= max(1,N).
85
86  INFO (output) INTEGER
87  = 0: successful exit
88  < 0: if INFO = -i, the i-th argument had an illegal value
89
90  =====================================================================*/
91
92  char uplo_[2] = {uplo, 0};
93  magma_int_t nb;
94  magma_int_t k, kb, kb2;
95  double c_one = MAGMA_D_ONE;
96  double c_neg_one = MAGMA_D_NEG_ONE;
97  double c_half = MAGMA_D_HALF;
98  double c_neg_half = MAGMA_D_NEG_HALF;
99  double *dw;
100  magma_int_t ldda = n;
101  magma_int_t lddb = n;
102  double d_one = 1.0;
103  long int upper = lapackf77_lsame(uplo_, "U");
104
105  /* Test the input parameters. */
106  *info = 0;
107  if (itype<1 || itype>3){
108  *info = -1;
109  }else if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) {
110  *info = -2;
111  } else if (n < 0) {
112  *info = -3;
113  } else if (lda < max(1,n)) {
114  *info = -5;
115  }else if (ldb < max(1,n)) {
116  *info = -7;
117  }
118  if (*info != 0) {
119  magma_xerbla( __func__, -(*info) );
120  return *info;
121  }
122
123  /* Quick return */
124  if ( n == 0 )
125  return *info;
126
127  if (MAGMA_SUCCESS != magma_dmalloc( &dw, 2*n*n )) {
128  *info = MAGMA_ERR_DEVICE_ALLOC;
129  return *info;
130  }
131
132  nb = magma_get_dsygst_nb(n);
133
134  static cudaStream_t stream[2];
135  magma_queue_create( &stream[0] );
136  magma_queue_create( &stream[1] );
137
138  magma_dsetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
139  magma_dsetmatrix( n, n, B(0, 0), ldb, dB(0, 0), lddb );
140
141  /* Use hybrid blocked code */
142
143  if (itype==1) {
144  if (upper) {
145
146  /* Compute inv(U')*A*inv(U) */
147
148  for(k = 0; k<n; k+=nb){
149  kb = min(n-k,nb);
150  kb2= min(n-k-nb,nb);
151
152  /* Update the upper triangle of A(k:n,k:n) */
153
154  lapackf77_dhegs2( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
155
156  magma_dsetmatrix_async( kb, kb,
157  A(k, k), lda,
158  dA(k, k), ldda, stream[0] );
159
160  if(k+kb<n){
161
163  kb, n-k-kb,
164  c_one, dB(k,k), lddb,
165  dA(k,k+kb), ldda);
166
167  magma_queue_sync( stream[0] );
168
170  kb, n-k-kb,
171  c_neg_half, dA(k,k), ldda,
172  dB(k,k+kb), lddb,
173  c_one, dA(k, k+kb), ldda);
174
176  n-k-kb, kb,
177  c_neg_one, dA(k,k+kb), ldda,
178  dB(k,k+kb), lddb,
179  d_one, dA(k+kb,k+kb), ldda);
180
181  magma_dgetmatrix_async( kb2, kb2,
182  dA(k+kb, k+kb), ldda,
183  A(k+kb, k+kb), lda, stream[1] );
184
186  kb, n-k-kb,
187  c_neg_half, dA(k,k), ldda,
188  dB(k,k+kb), lddb,
189  c_one, dA(k, k+kb), ldda);
190
192  kb, n-k-kb,
193  c_one ,dB(k+kb,k+kb), lddb,
194  dA(k,k+kb), ldda);
195
196  magma_queue_sync( stream[1] );
197
198  }
199
200  }
201
202  magma_queue_sync( stream[0] );
203
204  } else {
205
206  /* Compute inv(L)*A*inv(L') */
207
208  for(k = 0; k<n; k+=nb){
209  kb= min(n-k,nb);
210  kb2= min(n-k-nb,nb);
211
212  /* Update the lower triangle of A(k:n,k:n) */
213
214  lapackf77_dhegs2( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
215
216  magma_dsetmatrix_async( kb, kb,
217  A(k, k), lda,
218  dA(k, k), ldda, stream[0] );
219
220  if(k+kb<n){
221
223  n-k-kb, kb,
224  c_one, dB(k,k), lddb,
225  dA(k+kb,k), ldda);
226
227  magma_queue_sync( stream[0] );
228
230  n-k-kb, kb,
231  c_neg_half, dA(k,k), ldda,
232  dB(k+kb,k), lddb,
233  c_one, dA(k+kb, k), ldda);
234
236  n-k-kb, kb,
237  c_neg_one, dA(k+kb,k), ldda,
238  dB(k+kb,k), lddb,
239  d_one, dA(k+kb,k+kb), ldda);
240
241  magma_dgetmatrix_async( kb2, kb2,
242  dA(k+kb, k+kb), ldda,
243  A(k+kb, k+kb), lda, stream[1] );
244
246  n-k-kb, kb,
247  c_neg_half, dA(k,k), ldda,
248  dB(k+kb,k), lddb,
249  c_one, dA(k+kb, k), ldda);
250
252  n-k-kb, kb,
253  c_one, dB(k+kb,k+kb), lddb,
254  dA(k+kb,k), ldda);
255  }
256
257  magma_queue_sync( stream[1] );
258
259  }
260
261  }
262
263  magma_queue_sync( stream[0] );
264
265  } else {
266
267  if (upper) {
268
269  /* Compute U*A*U' */
270
271  for(k = 0; k<n; k+=nb){
272  kb= min(n-k,nb);
273
274  magma_dgetmatrix_async( kb, kb,
275  dA(k, k), ldda,
276  A(k, k), lda, stream[0] );
277
278  /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */
279  if(k>0){
280
282  k, kb,
283  c_one ,dB(0,0), lddb,
284  dA(0,k), ldda);
285
287  k, kb,
288  c_half, dA(k,k), ldda,
289  dB(0,k), lddb,
290  c_one, dA(0, k), ldda);
291
292  magma_queue_sync( stream[1] );
293
295  k, kb,
296  c_one, dA(0,k), ldda,
297  dB(0,k), lddb,
298  d_one, dA(0,0), ldda);
299
301  k, kb,
302  c_half, dA(k,k), ldda,
303  dB(0,k), lddb,
304  c_one, dA(0, k), ldda);
305
307  k, kb,
308  c_one, dB(k,k), lddb,
309  dA(0,k), ldda);
310
311  }
312
313  magma_queue_sync( stream[0] );
314
315  lapackf77_dhegs2( &itype, uplo_, &kb, A(k, k), &lda, B(k, k), &ldb, info);
316
317  magma_dsetmatrix_async( kb, kb,
318  A(k, k), lda,
319  dA(k, k), ldda, stream[1] );
320
321  }
322
323  magma_queue_sync( stream[1] );
324
325  } else {
326
327  /* Compute L'*A*L */
328
329  for(k = 0; k<n; k+=nb){
330  kb= min(n-k,nb);
331
332  magma_dgetmatrix_async( kb, kb,
333  dA(k, k), ldda,
334  A(k, k), lda, stream[0] );
335
336  /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */
337  if(k>0){
338
340  kb, k,
341  c_one ,dB(0,0), lddb,
342  dA(k,0), ldda);
343
345  kb, k,
346  c_half, dA(k,k), ldda,
347  dB(k,0), lddb,
348  c_one, dA(k, 0), ldda);
349
350  magma_queue_sync( stream[1] );
351
353  k, kb,
354  c_one, dA(k,0), ldda,
355  dB(k,0), lddb,
356  d_one, dA(0,0), ldda);
357
359  kb, k,
360  c_half, dA(k,k), ldda,
361  dB(k,0), lddb,
362  c_one, dA(k, 0), ldda);
363
365  kb, k,
366  c_one, dB(k,k), lddb,
367  dA(k,0), ldda);
368  }
369
370  magma_queue_sync( stream[0] );
371
372  lapackf77_dhegs2( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
373
374  magma_dsetmatrix_async( kb, kb,
375  A(k, k), lda,
376  dA(k, k), ldda, stream[1] );
377  }
378
379  magma_queue_sync( stream[1] );
380
381  }
382  }
383
384  magma_dgetmatrix( n, n, dA(0, 0), ldda, A(0, 0), lda );
385
386  magma_queue_destroy( stream[0] );
387  magma_queue_destroy( stream[1] );
388
389  magma_free( dw );
390
391  return *info;
392 } /* magma_dsygst_gpu */