MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
chegst.cpp File Reference
#include "common_magma.h"
Include dependency graph for chegst.cpp:

Go to the source code of this file.

Macros

#define A(i, j)   (a+(j)*lda + (i))
 
#define B(i, j)   (b+(j)*ldb + (i))
 
#define dA(i, j)   (dw+(j)*ldda + (i))
 
#define dB(i, j)   (dw+n*ldda+(j)*lddb + (i))
 

Functions

magma_int_t magma_chegst (magma_int_t itype, char uplo, magma_int_t n, magmaFloatComplex *a, magma_int_t lda, magmaFloatComplex *b, magma_int_t ldb, magma_int_t *info)
 

Macro Definition Documentation

#define A (   i,
 
)    (a+(j)*lda + (i))

Definition at line 16 of file chegst.cpp.

#define B (   i,
 
)    (b+(j)*ldb + (i))

Definition at line 17 of file chegst.cpp.

#define dA (   i,
 
)    (dw+(j)*ldda + (i))

Definition at line 19 of file chegst.cpp.

#define dB (   i,
 
)    (dw+n*ldda+(j)*lddb + (i))

Definition at line 20 of file chegst.cpp.

Function Documentation

magma_int_t magma_chegst ( magma_int_t  itype,
char  uplo,
magma_int_t  n,
magmaFloatComplex *  a,
magma_int_t  lda,
magmaFloatComplex *  b,
magma_int_t  ldb,
magma_int_t info 
)

Definition at line 23 of file chegst.cpp.

References __func__, A, B, dA, dB, lapackf77_chegst(), lapackf77_lsame, cgehrd_data::ldda, MAGMA_C_HALF, MAGMA_C_NEG_HALF, MAGMA_C_NEG_ONE, MAGMA_C_ONE, magma_cgetmatrix, magma_cgetmatrix_async, magma_chemm(), magma_cher2k(), magma_cmalloc(), magma_csetmatrix, magma_csetmatrix_async, magma_ctrmm(), magma_ctrsm(), MAGMA_ERR_DEVICE_ALLOC, magma_free, magma_get_chegst_nb(), magma_queue_create, magma_queue_destroy, magma_queue_sync, MAGMA_SUCCESS, magma_xerbla(), MagmaConjTrans, MagmaLeft, MagmaLower, MagmaNonUnit, MagmaNoTrans, MagmaRight, MagmaUpper, max, and min.

26 {
27 /* -- MAGMA (version 1.4.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
30  Univ. of Colorado, Denver
31  August 2013
32 
33  Purpose
34  =======
35  CHEGST reduces a complex Hermitian-definite generalized
36  eigenproblem to standard form.
37 
38  If ITYPE = 1, the problem is A*x = lambda*B*x,
39  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
40 
41  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
42  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
43 
44  B must have been previously factorized as U**H*U or L*L**H by CPOTRF.
45 
46  Arguments
47  =========
48  ITYPE (input) INTEGER
49  = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
50  = 2 or 3: compute U*A*U**H or L**H*A*L.
51 
52  UPLO (input) CHARACTER*1
53  = 'U': Upper triangle of A is stored and B is factored as
54  U**H*U;
55  = 'L': Lower triangle of A is stored and B is factored as
56  L*L**H.
57 
58  N (input) INTEGER
59  The order of the matrices A and B. N >= 0.
60 
61  A (input/output) COMPLEX array, dimension (LDA,N)
62  On entry, the Hermitian matrix A. If UPLO = 'U', the leading
63  N-by-N upper triangular part of A contains the upper
64  triangular part of the matrix A, and the strictly lower
65  triangular part of A is not referenced. If UPLO = 'L', the
66  leading N-by-N lower triangular part of A contains the lower
67  triangular part of the matrix A, and the strictly upper
68  triangular part of A is not referenced.
69 
70  On exit, if INFO = 0, the transformed matrix, stored in the
71  same format as A.
72 
73  LDA (input) INTEGER
74  The leading dimension of the array A. LDA >= max(1,N).
75 
76  B (input) COMPLEX array, dimension (LDB,N)
77  The triangular factor from the Cholesky factorization of B,
78  as returned by CPOTRF.
79 
80  LDB (input) INTEGER
81  The leading dimension of the array B. LDB >= max(1,N).
82 
83  INFO (output) INTEGER
84  = 0: successful exit
85  < 0: if INFO = -i, the i-th argument had an illegal value
86 
87  =====================================================================*/
88 
89  char uplo_[2] = {uplo, 0};
90  magma_int_t nb;
91  magma_int_t k, kb, kb2;
92  magmaFloatComplex c_one = MAGMA_C_ONE;
93  magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
94  magmaFloatComplex c_half = MAGMA_C_HALF;
95  magmaFloatComplex c_neg_half = MAGMA_C_NEG_HALF;
96  magmaFloatComplex *dw;
97  magma_int_t ldda = n;
98  magma_int_t lddb = n;
99  float d_one = 1.0;
100  int upper = lapackf77_lsame(uplo_, "U");
101 
102  /* Test the input parameters. */
103  *info = 0;
104  if (itype<1 || itype>3){
105  *info = -1;
106  }else if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) {
107  *info = -2;
108  } else if (n < 0) {
109  *info = -3;
110  } else if (lda < max(1,n)) {
111  *info = -5;
112  }else if (ldb < max(1,n)) {
113  *info = -7;
114  }
115  if (*info != 0) {
116  magma_xerbla( __func__, -(*info) );
117  return *info;
118  }
119 
120  /* Quick return */
121  if ( n == 0 )
122  return *info;
123 
124  if (MAGMA_SUCCESS != magma_cmalloc( &dw, 2*n*n )) {
125  *info = MAGMA_ERR_DEVICE_ALLOC;
126  return *info;
127  }
128 
129  nb = magma_get_chegst_nb(n);
130 
131  magma_queue_t stream[2];
132  magma_queue_create( &stream[0] );
133  magma_queue_create( &stream[1] );
134 
135  magma_csetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
136  magma_csetmatrix( n, n, B(0, 0), ldb, dB(0, 0), lddb );
137 
138  /* Use hybrid blocked code */
139 
140  if (itype==1) {
141  if (upper) {
142  /* Compute inv(U')*A*inv(U) */
143 
144  for(k = 0; k<n; k+=nb){
145  kb = min(n-k,nb);
146  kb2= min(n-k-nb,nb);
147 
148  /* Update the upper triangle of A(k:n,k:n) */
149 
150  lapackf77_chegst( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
151 
152  magma_csetmatrix_async( kb, kb,
153  A(k, k), lda,
154  dA(k, k), ldda, stream[0] );
155 
156  if(k+kb<n){
158  kb, n-k-kb,
159  c_one, dB(k,k), lddb,
160  dA(k,k+kb), ldda);
161 
162  magma_queue_sync( stream[0] );
163 
165  kb, n-k-kb,
166  c_neg_half, dA(k,k), ldda,
167  dB(k,k+kb), lddb,
168  c_one, dA(k, k+kb), ldda);
169 
171  n-k-kb, kb,
172  c_neg_one, dA(k,k+kb), ldda,
173  dB(k,k+kb), lddb,
174  d_one, dA(k+kb,k+kb), ldda);
175 
176  magma_cgetmatrix_async( kb2, kb2,
177  dA(k+kb, k+kb), ldda,
178  A(k+kb, k+kb), lda, stream[1] );
179 
181  kb, n-k-kb,
182  c_neg_half, dA(k,k), ldda,
183  dB(k,k+kb), lddb,
184  c_one, dA(k, k+kb), ldda);
185 
187  kb, n-k-kb,
188  c_one ,dB(k+kb,k+kb), lddb,
189  dA(k,k+kb), ldda);
190 
191  magma_queue_sync( stream[1] );
192  }
193  }
194 
195  magma_queue_sync( stream[0] );
196  }
197  else {
198  /* Compute inv(L)*A*inv(L') */
199 
200  for(k = 0; k<n; k+=nb){
201  kb= min(n-k,nb);
202  kb2= min(n-k-nb,nb);
203 
204  /* Update the lower triangle of A(k:n,k:n) */
205 
206  lapackf77_chegst( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
207 
208  magma_csetmatrix_async( kb, kb,
209  A(k, k), lda,
210  dA(k, k), ldda, stream[0] );
211 
212  if(k+kb<n){
214  n-k-kb, kb,
215  c_one, dB(k,k), lddb,
216  dA(k+kb,k), ldda);
217 
218  magma_queue_sync( stream[0] );
219 
221  n-k-kb, kb,
222  c_neg_half, dA(k,k), ldda,
223  dB(k+kb,k), lddb,
224  c_one, dA(k+kb, k), ldda);
225 
227  n-k-kb, kb,
228  c_neg_one, dA(k+kb,k), ldda,
229  dB(k+kb,k), lddb,
230  d_one, dA(k+kb,k+kb), ldda);
231 
232  magma_cgetmatrix_async( kb2, kb2,
233  dA(k+kb, k+kb), ldda,
234  A(k+kb, k+kb), lda, stream[1] );
235 
237  n-k-kb, kb,
238  c_neg_half, dA(k,k), ldda,
239  dB(k+kb,k), lddb,
240  c_one, dA(k+kb, k), ldda);
241 
243  n-k-kb, kb,
244  c_one, dB(k+kb,k+kb), lddb,
245  dA(k+kb,k), ldda);
246  }
247 
248  magma_queue_sync( stream[1] );
249  }
250  }
251 
252  magma_queue_sync( stream[0] );
253  }
254  else {
255  if (upper) {
256  /* Compute U*A*U' */
257  for(k = 0; k<n; k+=nb){
258  kb= min(n-k,nb);
259 
260  magma_cgetmatrix_async( kb, kb,
261  dA(k, k), ldda,
262  A(k, k), lda, stream[0] );
263 
264  /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */
265  if(k>0){
267  k, kb,
268  c_one ,dB(0,0), lddb,
269  dA(0,k), ldda);
270 
272  k, kb,
273  c_half, dA(k,k), ldda,
274  dB(0,k), lddb,
275  c_one, dA(0, k), ldda);
276 
277  magma_queue_sync( stream[1] );
278 
280  k, kb,
281  c_one, dA(0,k), ldda,
282  dB(0,k), lddb,
283  d_one, dA(0,0), ldda);
284 
286  k, kb,
287  c_half, dA(k,k), ldda,
288  dB(0,k), lddb,
289  c_one, dA(0, k), ldda);
290 
292  k, kb,
293  c_one, dB(k,k), lddb,
294  dA(0,k), ldda);
295  }
296 
297  magma_queue_sync( stream[0] );
298 
299  lapackf77_chegst( &itype, uplo_, &kb, A(k, k), &lda, B(k, k), &ldb, info);
300 
301  magma_csetmatrix_async( kb, kb,
302  A(k, k), lda,
303  dA(k, k), ldda, stream[1] );
304  }
305 
306  magma_queue_sync( stream[1] );
307  }
308  else {
309  /* Compute L'*A*L */
310  for(k = 0; k<n; k+=nb){
311  kb= min(n-k,nb);
312 
313  magma_cgetmatrix_async( kb, kb,
314  dA(k, k), ldda,
315  A(k, k), lda, stream[0] );
316 
317  /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */
318  if(k>0){
319 
321  kb, k,
322  c_one ,dB(0,0), lddb,
323  dA(k,0), ldda);
324 
326  kb, k,
327  c_half, dA(k,k), ldda,
328  dB(k,0), lddb,
329  c_one, dA(k, 0), ldda);
330 
331  magma_queue_sync( stream[1] );
332 
334  k, kb,
335  c_one, dA(k,0), ldda,
336  dB(k,0), lddb,
337  d_one, dA(0,0), ldda);
338 
340  kb, k,
341  c_half, dA(k,k), ldda,
342  dB(k,0), lddb,
343  c_one, dA(k, 0), ldda);
344 
346  kb, k,
347  c_one, dB(k,k), lddb,
348  dA(k,0), ldda);
349  }
350 
351  magma_queue_sync( stream[0] );
352 
353  lapackf77_chegst( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
354 
355  magma_csetmatrix_async( kb, kb,
356  A(k, k), lda,
357  dA(k, k), ldda, stream[1] );
358  }
359 
360  magma_queue_sync( stream[1] );
361  }
362  }
363 
364  magma_cgetmatrix( n, n, dA(0, 0), ldda, A(0, 0), lda );
365 
366  magma_queue_destroy( stream[0] );
367  magma_queue_destroy( stream[1] );
368 
369  magma_free( dw );
370 
371  return *info;
372 } /* magma_chegst_gpu */
void magma_chemm(magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
#define min(a, b)
Definition: common_magma.h:86
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define MagmaUpper
Definition: magma.h:61
#define magma_cgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_c.h:714
static magma_err_t magma_cmalloc(magmaFloatComplex_ptr *ptrPtr, size_t n)
Definition: magma.h:79
#define B(i, j)
Definition: chegst.cpp:17
#define dB(i, j)
Definition: chegst.cpp:20
#define MAGMA_C_NEG_ONE
Definition: magma.h:156
#define MAGMA_ERR_DEVICE_ALLOC
Definition: magma_types.h:276
#define magma_free(ptr)
Definition: magma.h:57
magma_int_t magma_get_chegst_nb(magma_int_t m)
Definition: get_nb.cpp:537
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
void magma_ctrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
#define A(i, j)
Definition: chegst.cpp:16
#define MAGMA_C_NEG_HALF
Definition: magma.h:157
#define magma_cgetmatrix(m, n, dA_src, ldda, hB_dst, ldb)
Definition: magmablas_c.h:705
#define MagmaLower
Definition: magma.h:62
void lapackf77_chegst(const magma_int_t *itype, const char *uplo, const magma_int_t *n, magmaFloatComplex *A, const magma_int_t *lda, magmaFloatComplex *B, const magma_int_t *ldb, magma_int_t *info)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaConjTrans
Definition: magma.h:59
#define MAGMA_C_ONE
Definition: magma.h:154
magma_int_t ldda
#define MagmaNonUnit
Definition: magma.h:65
#define MAGMA_SUCCESS
Definition: magma.h:106
#define MagmaRight
Definition: magma.h:69
#define dA(i, j)
Definition: chegst.cpp:19
#define MAGMA_C_HALF
Definition: magma.h:155
void magma_ctrmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
#define MagmaNoTrans
Definition: magma.h:57
#define magma_csetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_c.h:711
#define max(a, b)
Definition: common_magma.h:82
#define magma_csetmatrix(m, n, hA_src, lda, dB_dst, lddb)
Definition: magmablas_c.h:702
void magma_cher2k(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, float beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

Here is the caller graph for this function: