MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zpotrf_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @precisions normal z -> s d c
9 
10 */
11 #include "common_magma.h"
12 
13 // === Define what BLAS to use ============================================
14 #define PRECISION_z
15 #if (defined(PRECISION_s) || defined(PRECISION_d))
16  #define magma_zgemm magmablas_zgemm
17  #define magma_ztrsm magmablas_ztrsm
18 #endif
19 
20 #if (GPUSHMEM >= 200)
21  #if (defined(PRECISION_s))
22  #undef magma_sgemm
23  #define magma_sgemm magmablas_sgemm_fermi80
24  #endif
25 #endif
26 // === End defining what BLAS to use =======================================
27 
28 #define dA(i, j) (dA + (j)*ldda + (i))
29 
30 extern "C" magma_int_t
32  cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info)
33 {
34 /* -- MAGMA (version 1.2.0) --
35  Univ. of Tennessee, Knoxville
36  Univ. of California, Berkeley
37  Univ. of Colorado, Denver
38  May 2012
39 
40  Purpose
41  =======
42  ZPOTRF computes the Cholesky factorization of a complex Hermitian
43  positive definite matrix dA.
44 
45  The factorization has the form
46  dA = U**H * U, if UPLO = 'U', or
47  dA = L * L**H, if UPLO = 'L',
48  where U is an upper triangular matrix and L is lower triangular.
49 
50  This is the block version of the algorithm, calling Level 3 BLAS.
51 
52  Arguments
53  =========
54  UPLO (input) CHARACTER*1
55  = 'U': Upper triangle of dA is stored;
56  = 'L': Lower triangle of dA is stored.
57 
58  N (input) INTEGER
59  The order of the matrix dA. N >= 0.
60 
61  dA (input/output) COMPLEX_16 array on the GPU, dimension (LDDA,N)
62  On entry, the Hermitian matrix dA. If UPLO = 'U', the leading
63  N-by-N upper triangular part of dA contains the upper
64  triangular part of the matrix dA, and the strictly lower
65  triangular part of dA is not referenced. If UPLO = 'L', the
66  leading N-by-N lower triangular part of dA contains the lower
67  triangular part of the matrix dA, and the strictly upper
68  triangular part of dA is not referenced.
69 
70  On exit, if INFO = 0, the factor U or L from the Cholesky
71  factorization dA = U**H * U or dA = L * L**H.
72 
73  LDDA (input) INTEGER
74  The leading dimension of the array dA. LDDA >= max(1,N).
75  To benefit from coalescent memory accesses LDDA must be
76  dividable by 16.
77 
78  INFO (output) INTEGER
79  = 0: successful exit
80  < 0: if INFO = -i, the i-th argument had an illegal value
81  > 0: if INFO = i, the leading minor of order i is not
82  positive definite, and the factorization could not be
83  completed.
84  ===================================================================== */
85 
86 
87  magma_int_t j, jb, nb;
88  char uplo_[2] = {uplo, 0};
89  cuDoubleComplex c_one = MAGMA_Z_ONE;
90  cuDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
91  cuDoubleComplex *work;
92  double d_one = 1.0;
93  double d_neg_one = -1.0;
94  long int upper = lapackf77_lsame(uplo_, "U");
95 
96  *info = 0;
97  if ( (! upper) && (! lapackf77_lsame(uplo_, "L")) ) {
98  *info = -1;
99  } else if (n < 0) {
100  *info = -2;
101  } else if (ldda < max(1,n)) {
102  *info = -4;
103  }
104  if (*info != 0) {
105  magma_xerbla( __func__, -(*info) );
106  return *info;
107  }
108 
109  nb = magma_get_zpotrf_nb(n);
110 
111  if (MAGMA_SUCCESS != magma_zmalloc_host( &work, nb*nb )) {
112  *info = MAGMA_ERR_HOST_ALLOC;
113  return *info;
114  }
115 
116  static cudaStream_t stream[2];
117  magma_queue_create( &stream[0] );
118  magma_queue_create( &stream[1] );
119 
120  if ((nb <= 1) || (nb >= n)) {
121  /* Use unblocked code. */
122  magma_zgetmatrix( n, n, dA, ldda, work, n );
123  lapackf77_zpotrf(uplo_, &n, work, &n, info);
124  magma_zsetmatrix( n, n, work, n, dA, ldda );
125  } else {
126 
127  /* Use blocked code. */
128  if (upper) {
129 
130  /* Compute the Cholesky factorization A = U'*U. */
131  for (j=0; j<n; j+=nb) {
132 
133  /* Update and factorize the current diagonal block and test
134  for non-positive-definiteness. Computing MIN */
135  jb = min(nb, (n-j));
136 
138  d_neg_one, dA(0, j), ldda,
139  d_one, dA(j, j), ldda);
140 
141  magma_zgetmatrix_async( jb, jb,
142  dA(j, j), ldda,
143  work, jb, stream[1] );
144 
145  if ( (j+jb) < n) {
146  /* Compute the current block row. */
148  jb, (n-j-jb), j,
149  c_neg_one, dA(0, j ), ldda,
150  dA(0, j+jb), ldda,
151  c_one, dA(j, j+jb), ldda);
152  }
153 
154  magma_queue_sync( stream[1] );
155 
156  lapackf77_zpotrf(MagmaUpperStr, &jb, work, &jb, info);
157  magma_zsetmatrix_async( jb, jb,
158  work, jb,
159  dA(j, j), ldda, stream[0] );
160  if (*info != 0) {
161  *info = *info + j;
162  break;
163  }
164 
165  if ( (j+jb) < n)
167  jb, (n-j-jb),
168  c_one, dA(j, j ), ldda,
169  dA(j, j+jb), ldda);
170  }
171  } else {
172  //=========================================================
173  // Compute the Cholesky factorization A = L*L'.
174  for (j=0; j<n; j+=nb) {
175 
176  // Update and factorize the current diagonal block and test
177  // for non-positive-definiteness. Computing MIN
178  jb = min(nb, (n-j));
179 
181  d_neg_one, dA(j, 0), ldda,
182  d_one, dA(j, j), ldda);
183 
184  magma_zgetmatrix_async( jb, jb,
185  dA(j, j), ldda,
186  work, jb, stream[1] );
187 
188  if ( (j+jb) < n) {
190  (n-j-jb), jb, j,
191  c_neg_one, dA(j+jb, 0), ldda,
192  dA(j, 0), ldda,
193  c_one, dA(j+jb, j), ldda);
194  }
195 
196  magma_queue_sync( stream[1] );
197  lapackf77_zpotrf(MagmaLowerStr, &jb, work, &jb, info);
198  magma_zsetmatrix_async( jb, jb,
199  work, jb,
200  dA(j, j), ldda, stream[0] );
201  if (*info != 0) {
202  *info = *info + j;
203  break;
204  }
205 
206  if ( (j+jb) < n)
208  (n-j-jb), jb,
209  c_one, dA(j, j), ldda,
210  dA(j+jb, j), ldda);
211  }
212 
213  }
214  }
215 
216  magma_queue_destroy( stream[0] );
217  magma_queue_destroy( stream[1] );
218  magma_free_host( work );
219 
220  return *info;
221 } /* magma_zpotrf_gpu */