MAGMA  magma-1.4.0 Matrix Algebra on GPU and Multicore Architectures
zpotrf_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
6  August 2013
7
8  @author Stan Tomov
9  @precisions normal z -> s d c
10 */
11 #include "common_magma.h"
12
13 // === Define what BLAS to use ============================================
14 #define PRECISION_z
15
16 #if (defined(PRECISION_s) || defined(PRECISION_d))
17  #define magma_ztrsm magmablas_ztrsm
18 #endif
19 // === End defining what BLAS to use =======================================
20
21 #define dA(i, j) (dA + (j)*ldda + (i))
22
23 extern "C" magma_int_t
25  magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
26 {
27 /* -- MAGMA (version 1.4.0) --
28  Univ. of Tennessee, Knoxville
29  Univ. of California, Berkeley
31  August 2013
32
33  Purpose
34  =======
35  ZPOTRF computes the Cholesky factorization of a complex Hermitian
36  positive definite matrix dA.
37
38  The factorization has the form
39  dA = U**H * U, if UPLO = 'U', or
40  dA = L * L**H, if UPLO = 'L',
41  where U is an upper triangular matrix and L is lower triangular.
42
43  This is the block version of the algorithm, calling Level 3 BLAS.
44  If the current stream is NULL, this version replaces it with user defined
45  stream to overlap computation with communication.
46
47  Arguments
48  =========
49  UPLO (input) CHARACTER*1
50  = 'U': Upper triangle of dA is stored;
51  = 'L': Lower triangle of dA is stored.
52
53  N (input) INTEGER
54  The order of the matrix dA. N >= 0.
55
56  dA (input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N)
57  On entry, the Hermitian matrix dA. If UPLO = 'U', the leading
58  N-by-N upper triangular part of dA contains the upper
59  triangular part of the matrix dA, and the strictly lower
60  triangular part of dA is not referenced. If UPLO = 'L', the
61  leading N-by-N lower triangular part of dA contains the lower
62  triangular part of the matrix dA, and the strictly upper
63  triangular part of dA is not referenced.
64
65  On exit, if INFO = 0, the factor U or L from the Cholesky
66  factorization dA = U**H * U or dA = L * L**H.
67
68  LDDA (input) INTEGER
69  The leading dimension of the array dA. LDDA >= max(1,N).
70  To benefit from coalescent memory accesses LDDA must be
71  dividable by 16.
72
73  INFO (output) INTEGER
74  = 0: successful exit
75  < 0: if INFO = -i, the i-th argument had an illegal value
76  > 0: if INFO = i, the leading minor of order i is not
77  positive definite, and the factorization could not be
78  completed.
79  ===================================================================== */
80
81
82  magma_int_t j, jb, nb;
83  char uplo_[2] = {uplo, 0};
87  double d_one = 1.0;
88  double d_neg_one = -1.0;
89  int upper = lapackf77_lsame(uplo_, "U");
90
91  *info = 0;
92  if ( (! upper) && (! lapackf77_lsame(uplo_, "L")) ) {
93  *info = -1;
94  } else if (n < 0) {
95  *info = -2;
96  } else if (ldda < max(1,n)) {
97  *info = -4;
98  }
99  if (*info != 0) {
100  magma_xerbla( __func__, -(*info) );
101  return *info;
102  }
103
104  nb = magma_get_zpotrf_nb(n);
105
106  if (MAGMA_SUCCESS != magma_zmalloc_pinned( &work, nb*nb )) {
107  *info = MAGMA_ERR_HOST_ALLOC;
108  return *info;
109  }
110
111  /* Define user stream if current stream is NULL */
112  cudaStream_t stream[2], current_stream;
113  magmablasGetKernelStream(&current_stream);
114
115  magma_queue_create( &stream[0] );
116  if (current_stream == NULL) {
117  magma_queue_create( &stream[1] );
118  magmablasSetKernelStream(stream[1]);
119  }
120  else
121  stream[1] = current_stream;
122
123  if ((nb <= 1) || (nb >= n)) {
124  /* Use unblocked code. */
125  magma_zgetmatrix_async( n, n, dA, ldda, work, n, stream[1] );
126  magma_queue_sync( stream[1] );
127  lapackf77_zpotrf(uplo_, &n, work, &n, info);
128  magma_zsetmatrix_async( n, n, work, n, dA, ldda, stream[1] );
129  }
130  else {
131
132  /* Use blocked code. */
133  if (upper) {
134
135  /* Compute the Cholesky factorization A = U'*U. */
136  for (j=0; j<n; j+=nb) {
137
138  /* Update and factorize the current diagonal block and test
139  for non-positive-definiteness. Computing MIN */
140  jb = min(nb, (n-j));
141
143  d_neg_one, dA(0, j), ldda,
144  d_one, dA(j, j), ldda);
145
146  magma_queue_sync( stream[1] );
147  magma_zgetmatrix_async( jb, jb,
148  dA(j, j), ldda,
149  work, jb, stream[0] );
150
151  if ( (j+jb) < n) {
152  /* Compute the current block row. */
154  jb, (n-j-jb), j,
155  c_neg_one, dA(0, j ), ldda,
156  dA(0, j+jb), ldda,
157  c_one, dA(j, j+jb), ldda);
158  }
159
160  magma_queue_sync( stream[0] );
161  lapackf77_zpotrf(MagmaUpperStr, &jb, work, &jb, info);
162  magma_zsetmatrix_async( jb, jb,
163  work, jb,
164  dA(j, j), ldda, stream[1] );
165  if (*info != 0) {
166  *info = *info + j;
167  break;
168  }
169
170  if ( (j+jb) < n) {
172  jb, (n-j-jb),
173  c_one, dA(j, j ), ldda,
174  dA(j, j+jb), ldda);
175  }
176  }
177  }
178  else {
179  //=========================================================
180  // Compute the Cholesky factorization A = L*L'.
181  for (j=0; j<n; j+=nb) {
182
183  // Update and factorize the current diagonal block and test
184  // for non-positive-definiteness. Computing MIN
185  jb = min(nb, (n-j));
186
188  d_neg_one, dA(j, 0), ldda,
189  d_one, dA(j, j), ldda);
190
191  magma_queue_sync( stream[1] );
192  magma_zgetmatrix_async( jb, jb,
193  dA(j, j), ldda,
194  work, jb, stream[0] );
195
196  if ( (j+jb) < n) {
198  (n-j-jb), jb, j,
199  c_neg_one, dA(j+jb, 0), ldda,
200  dA(j, 0), ldda,
201  c_one, dA(j+jb, j), ldda);
202  }
203
204  magma_queue_sync( stream[0] );
205  lapackf77_zpotrf(MagmaLowerStr, &jb, work, &jb, info);
206  magma_zsetmatrix_async( jb, jb,
207  work, jb,
208  dA(j, j), ldda, stream[1] );
209  if (*info != 0) {
210  *info = *info + j;
211  break;
212  }
213
214  if ( (j+jb) < n) {
216  (n-j-jb), jb,
217  c_one, dA(j, j), ldda,
218  dA(j+jb, j), ldda);
219  }
220  }
221  }
222  }
223
224  magma_free_pinned( work );
225
226  magma_queue_destroy( stream[0] );
227  if (current_stream == NULL) {
228  magma_queue_destroy( stream[1] );
230  }
231
232  return *info;
233 } /* magma_zpotrf_gpu */
#define min(a, b)
Definition: common_magma.h:86
#define MagmaLeft
Definition: magma.h:68
#define magma_queue_create(queuePtr)
Definition: magma.h:113
#define __func__
Definition: common_magma.h:65
#define magma_zgetmatrix_async(m, n, dA_src, ldda, hB_dst, ldb, queue)
Definition: magmablas_z.h:714
#define MagmaUpper
Definition: magma.h:61
#define MAGMA_Z_NEG_ONE
Definition: magma.h:134
#define magma_free_pinned(ptr)
Definition: magma.h:60
int magma_int_t
Definition: magmablas.h:12
#define magma_queue_destroy(queue)
Definition: magma.h:116
void magma_ztrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaDoubleComplex alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, magmaDoubleComplex_ptr dB, magma_int_t lddb)
cublasStatus_t magmablasSetKernelStream(magma_queue_t stream)
magma_int_t magma_zpotrf_gpu(char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
#define MagmaLower
Definition: magma.h:62
magma_int_t magma_get_zpotrf_nb(magma_int_t m)
Definition: get_nb.cpp:79
#define dA(i, j)
Definition: zpotrf_gpu.cpp:21
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define lapackf77_lsame
Definition: magma_lapack.h:23
#define MagmaConjTrans
Definition: magma.h:59
#define MagmaLowerStr
Definition: magma.h:85
#define lapackf77_zpotrf
Definition: magma_zlapack.h:89
static magma_err_t magma_zmalloc_pinned(magmaDoubleComplex **ptrPtr, size_t n)
Definition: magma.h:92
#define MagmaNonUnit
Definition: magma.h:65
#define MAGMA_SUCCESS
Definition: magma.h:106
#define magma_zsetmatrix_async(m, n, hA_src, lda, dB_dst, lddb, queue)
Definition: magmablas_z.h:711
#define MagmaRight
Definition: magma.h:69
void magma_zherk(magma_uplo_t uplo, magma_trans_t trans, magma_int_t n, magma_int_t k, double alpha, magmaDoubleComplex_const_ptr dA, magma_int_t ldda, double beta, magmaDoubleComplex_ptr dC, magma_int_t lddc)
#define MAGMA_Z_ONE
Definition: magma.h:132
#define MagmaUpperStr
Definition: magma.h:84
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
cublasStatus_t magmablasGetKernelStream(magma_queue_t *stream)
#define MAGMA_ERR_HOST_ALLOC
Definition: magma_types.h:275
#define magma_queue_sync(queue)
Definition: magma.h:119