MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dsyr2k_mgpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for dsyr2k_mgpu.cpp:

Go to the source code of this file.

Macros

#define dA(dev, i, j)   (dA[dev] + (i) + (j)*lda + (aoffset) )
 
#define dB(dev, i, j)   (dB[dev] + (i) + (j)*ldb + (boffset) )
 
#define dC(dev, i, j)   (dC[dev] + (i) + (j)*ldc)
 

Functions

void magmablas_dsyr2k_mgpu2 (char uplo, char trans, magma_int_t n, magma_int_t k, double alpha, double *dA[], magma_int_t lda, magma_int_t aoffset, double *dB[], magma_int_t ldb, magma_int_t boffset, double beta, double *dC[], magma_int_t ldc, magma_int_t coffset, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream)
 

Macro Definition Documentation

#define dA (   dev,
  i,
 
)    (dA[dev] + (i) + (j)*lda + (aoffset) )
#define dB (   dev,
  i,
 
)    (dB[dev] + (i) + (j)*ldb + (boffset) )
#define dC (   dev,
  i,
 
)    (dC[dev] + (i) + (j)*ldc)

Function Documentation

void magmablas_dsyr2k_mgpu2 ( char  uplo,
char  trans,
magma_int_t  n,
magma_int_t  k,
double  alpha,
double *  dA[],
magma_int_t  lda,
magma_int_t  aoffset,
double *  dB[],
magma_int_t  ldb,
magma_int_t  boffset,
double  beta,
double *  dC[],
magma_int_t  ldc,
magma_int_t  coffset,
magma_int_t  ngpu,
magma_int_t  nb,
magma_queue_t  streams[][20],
magma_int_t  nstream 
)

Definition at line 157 of file dsyr2k_mgpu.cpp.

References __func__, dA, dB, dC, MAGMA_D_CNJG, MAGMA_D_MAKE, MAGMA_D_ONE, magma_dgemm(), magma_getdevice(), magma_setdevice(), magma_xerbla(), magmablasGetKernelStream(), magmablasSetKernelStream(), MagmaNoTrans, MagmaTrans, max, and min.

163 {
164  #define dA(dev, i, j) (dA[dev] + (i) + (j)*lda + (aoffset) )
165  #define dB(dev, i, j) (dB[dev] + (i) + (j)*ldb + (boffset) )
166  #define dC(dev, i, j) (dC[dev] + (i) + (j)*ldc)
167 
168  /* Check arguments */
169  magma_int_t info = 0;
170  if ( ! (uplo == 'l' || uplo == 'L')) {
171  info = -1; // 'u' not yet handled
172  } else if ( ! (trans == 'n' || trans == 'N')) {
173  info = -2; // 'c' not yet handled
174  } else if ( n < 0 ) {
175  info = -3;
176  } else if ( k < 0 ) {
177  info = -4;
178  } else if ( ((trans == 'n' || trans == 'N') && lda < max(1,n)) ||
179  ((trans == 'c' || trans == 'C') && lda < max(1,k)) ) {
180  info = -7;
181  } else if ( aoffset < 0 || aoffset > lda ) {
182  info = -8;
183  } else if ( ((trans == 'n' || trans == 'N') && ldb < max(1,n)) ||
184  ((trans == 'c' || trans == 'C') && ldb < max(1,k)) ) {
185  info = -10;
186  } else if ( boffset < 0 || boffset > ldb ) {
187  info = -11;
188  } else if ( ldc < max(1,n) ) {
189  info = -13;
190  } else if ( coffset < 0 || coffset > ldc ) {
191  info = -14;
192  } else if ( ngpu <= 0 ) {
193  info = -15;
194  } else if ( nb <= 0 ) {
195  info = -16;
196  } else if ( nstream <= 0 ) {
197  info = -18;
198  }
199  if ( info != 0 ) {
200  magma_xerbla( __func__, -(info) );
201  return;
202  }
203 
204  const double c_one = MAGMA_D_ONE;
205  double cbeta = MAGMA_D_MAKE( beta, 0. );
206 
207  magma_int_t ib, ioff, iblock, idev, di, s;
208 
209  magma_device_t cdev;
210  magma_queue_t cqueue;
211  magma_getdevice( &cdev );
212  magmablasGetKernelStream( &cqueue );
213 
214  // loop over all blocks
215  // Faster to have two loops: first loop does C_hat = alpha*A*B' + beta*C
216  // blockoffset is offset within first block; for subsequent blocks it is 0
217  magma_int_t blockoffset = coffset % nb;
218  for( magma_int_t i = 0; i < n; i += ib ) {
219  ib = min( nb-blockoffset, n-i ); // block size
220  ioff = i + coffset; // global index in parent matrix
221  iblock = (ioff / nb) / ngpu; // local block id
222  idev = (ioff / nb) % ngpu; // device with this block
223  di = iblock*nb + blockoffset; // local index in parent matrix
224 
225  magma_setdevice( idev );
226  s = iblock % nstream;
227  magmablasSetKernelStream( streams[ idev ][ s ] );
228 
229  // C[i:n,i] = alpha * A[i:n,0] * B[i,0]' + beta*C[i:n,i]
230  //printf( "dgemm n=%4d, ib=%4d, k=%4d, i=%4d\n", n-i, ib, k, i );
231  magma_dgemm( MagmaNoTrans, MagmaTrans, n-i, ib, k,
232  alpha, dA(idev,i,0), lda,
233  dB(idev,i,0), ldb,
234  cbeta, dC(idev,ioff,di), ldc );
235  blockoffset = 0;
236  }
237 
238  // second loop does C = (alpha)*B*A' + C_hat
239  alpha = MAGMA_D_CNJG( alpha );
240  blockoffset = coffset % nb;
241  for( magma_int_t i = 0; i < n; i += ib ) {
242  ib = min( nb-blockoffset, n-i ); // block size
243  ioff = i + coffset; // global index in parent matrix
244  iblock = (ioff / nb) / ngpu; // local block id
245  idev = (ioff / nb) % ngpu; // device with this block
246  di = iblock*nb + blockoffset; // local index in parent matrix
247 
248  magma_setdevice( idev );
249  s = iblock % nstream;
250  magmablasSetKernelStream( streams[ idev ][ s ] );
251 
252  // C[i:n,i] += (alpha) * B[i:n,0] * A[i,0]'
253  //printf( "dgemm n=%4d, ib=%4d, k=%4d, i=%4d\n", n-i, ib, k, i );
254  magma_dgemm( MagmaNoTrans, MagmaTrans, n-i, ib, k,
255  alpha, dB(idev,i,0), ldb,
256  dA(idev,i,0), lda,
257  c_one, dC(idev,ioff,di), ldc );
258  blockoffset = 0;
259  }
260 
261  magma_setdevice( cdev );
262  magmablasSetKernelStream( cqueue );
263 }
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
#define MAGMA_D_CNJG(v, t)
Definition: magma.h:164
magma_queue_t streams[MagmaMaxGPUs]
int magma_int_t
Definition: magmablas.h:12
#define dC(dev, i, j)
cublasStatus_t magmablasSetKernelStream(magma_queue_t stream)
void magma_setdevice(magma_device_t dev)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_getdevice(magma_device_t *dev)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define dB(dev, i, j)
#define MagmaTrans
Definition: magma.h:58
void magma_dgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc)
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
cublasStatus_t magmablasGetKernelStream(magma_queue_t *stream)
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function: