MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dsyr2k_mgpu_spec.cpp File Reference
#include "common_magma.h"
Include dependency graph for dsyr2k_mgpu_spec.cpp:

Go to the source code of this file.

Macros

#define dA(dev, i, j)   (dA[dev] + (i) + (j)*lda + (aoffset) )
 
#define dB(dev, i, j)   (dB[dev] + (i) + (j)*ldb + (boffset) )
 
#define dC(dev, i, j)   (dC[dev] + (i) + (j)*ldc)
 

Functions

void magmablas_dsyr2k_mgpu_spec (char uplo, char trans, magma_int_t n, magma_int_t k, double alpha, double *dA[], magma_int_t lda, magma_int_t aoffset, double *dB[], magma_int_t ldb, magma_int_t boffset, double beta, double *dC[], magma_int_t ldc, magma_int_t coffset, magma_int_t ngpu, magma_int_t nb, magma_queue_t streams[][20], magma_int_t nstream)
 

Macro Definition Documentation

#define dA (   dev,
  i,
 
)    (dA[dev] + (i) + (j)*lda + (aoffset) )
#define dB (   dev,
  i,
 
)    (dB[dev] + (i) + (j)*ldb + (boffset) )
#define dC (   dev,
  i,
 
)    (dC[dev] + (i) + (j)*ldc)

Function Documentation

void magmablas_dsyr2k_mgpu_spec ( char  uplo,
char  trans,
magma_int_t  n,
magma_int_t  k,
double  alpha,
double *  dA[],
magma_int_t  lda,
magma_int_t  aoffset,
double *  dB[],
magma_int_t  ldb,
magma_int_t  boffset,
double  beta,
double *  dC[],
magma_int_t  ldc,
magma_int_t  coffset,
magma_int_t  ngpu,
magma_int_t  nb,
magma_queue_t  streams[][20],
magma_int_t  nstream 
)

Definition at line 160 of file dsyr2k_mgpu_spec.cpp.

References __func__, dA, dB, dC, MAGMA_D_CNJG, MAGMA_D_MAKE, MAGMA_D_ONE, magma_dgemm(), magma_getdevice(), magma_setdevice(), magma_xerbla(), magmablasGetKernelStream(), magmablasSetKernelStream(), MagmaNoTrans, MagmaTrans, max, and min.

166 {
167  #define dA(dev, i, j) (dA[dev] + (i) + (j)*lda + (aoffset) )
168  #define dB(dev, i, j) (dB[dev] + (i) + (j)*ldb + (boffset) )
169  #define dC(dev, i, j) (dC[dev] + (i) + (j)*ldc)
170 
171  /* Check arguments */
172  magma_int_t info = 0;
173  if ( ! (uplo == 'l' || uplo == 'L')) {
174  info = -1; // 'u' not yet handled
175  } else if ( ! (trans == 'n' || trans == 'N')) {
176  info = -2; // 'c' not yet handled
177  } else if ( n < 0 ) {
178  info = -3;
179  } else if ( k < 0 ) {
180  info = -4;
181  } else if ( ((trans == 'n' || trans == 'N') && lda < max(1,n)) ||
182  ((trans == 'c' || trans == 'C') && lda < max(1,k)) ) {
183  info = -7;
184  } else if ( aoffset < 0 || aoffset > lda ) {
185  info = -8;
186  } else if ( ((trans == 'n' || trans == 'N') && ldb < max(1,n)) ||
187  ((trans == 'c' || trans == 'C') && ldb < max(1,k)) ) {
188  info = -10;
189  } else if ( boffset < 0 || boffset > ldb ) {
190  info = -11;
191  } else if ( ldc < max(1,n) ) {
192  info = -13;
193  } else if ( coffset < 0 || coffset > ldc ) {
194  info = -14;
195  } else if ( ngpu <= 0 ) {
196  info = -15;
197  } else if ( nb <= 0 ) {
198  info = -16;
199  } else if ( nstream <= 0 ) {
200  info = -18;
201  }
202  if ( info != 0 ) {
203  magma_xerbla( __func__, -(info) );
204  return;
205  }
206 
207  const double c_one = MAGMA_D_ONE;
208  double cbeta = MAGMA_D_MAKE( beta, 0. );
209 
210  magma_int_t ib, ioff, iblock, idev, di, s;
211 
212  magma_device_t cdev;
213  magma_queue_t cqueue;
214  magma_getdevice( &cdev );
215  magmablasGetKernelStream( &cqueue );
216 
217  // loop over all blocks
218  // Faster to have two loops: first loop does C_hat = alpha*A*B' + beta*C
219  // blockoffset is offset within first block; for subsequent blocks it is 0
220  magma_int_t blockoffset = coffset % nb;
221  for( magma_int_t i = 0; i < n; i += ib ) {
222  ib = min( nb-blockoffset, n-i ); // block size
223  ioff = i + coffset; // global index in parent matrix
224  iblock = (ioff / nb) / ngpu; // local block id
225  idev = (ioff / nb) % ngpu; // device with this block
226  di = iblock*nb + blockoffset; // local index in parent matrix
227 
228  magma_setdevice( idev );
229  s = iblock % nstream;
230  magmablasSetKernelStream( streams[ idev ][ s ] );
231 
232  // C[i:n,i] = alpha * A[i:n,0] * B[i,0]' + beta*C[i:n,i]
233  //printf( "dgemm n=%4d, ib=%4d, k=%4d, i=%4d\n", n-i, ib, k, i );
234  magma_dgemm( MagmaNoTrans, MagmaTrans, n, ib, k,
235  alpha, dA(idev,0,0), lda,
236  dB(idev,i,0), ldb,
237  cbeta, dC(idev,coffset,di), ldc );
238  blockoffset = 0;
239  }
240 
241  // second loop does C = (alpha)*B*A' + C_hat
242  alpha = MAGMA_D_CNJG( alpha );
243  blockoffset = coffset % nb;
244  for( magma_int_t i = 0; i < n; i += ib ) {
245  ib = min( nb-blockoffset, n-i ); // block size
246  ioff = i + coffset; // global index in parent matrix
247  iblock = (ioff / nb) / ngpu; // local block id
248  idev = (ioff / nb) % ngpu; // device with this block
249  di = iblock*nb + blockoffset; // local index in parent matrix
250 
251  magma_setdevice( idev );
252  s = iblock % nstream;
253  magmablasSetKernelStream( streams[ idev ][ s ] );
254 
255  // C[i:n,i] += (alpha) * B[i:n,0] * A[i,0]'
256  //printf( "dgemm n=%4d, ib=%4d, k=%4d, i=%4d\n", n-i, ib, k, i );
257  magma_dgemm( MagmaNoTrans, MagmaTrans, n, ib, k,
258  alpha, dB(idev,0,0), ldb,
259  dA(idev,i,0), lda,
260  c_one, dC(idev,coffset,di), ldc );
261  blockoffset = 0;
262  }
263 
264  magma_setdevice( cdev );
265  magmablasSetKernelStream( cqueue );
266 }
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_D_ONE
Definition: magma.h:176
#define __func__
Definition: common_magma.h:65
#define MAGMA_D_CNJG(v, t)
Definition: magma.h:164
magma_queue_t streams[MagmaMaxGPUs]
int magma_int_t
Definition: magmablas.h:12
cublasStatus_t magmablasSetKernelStream(magma_queue_t stream)
void magma_setdevice(magma_device_t dev)
#define MAGMA_D_MAKE(r, i)
Definition: magma.h:167
void magma_getdevice(magma_device_t *dev)
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define dB(dev, i, j)
#define MagmaTrans
Definition: magma.h:58
void magma_dgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, double alpha, magmaDouble_const_ptr dA, magma_int_t ldda, magmaDouble_const_ptr dB, magma_int_t lddb, double beta, magmaDouble_ptr dC, magma_int_t lddc)
#define MagmaNoTrans
Definition: magma.h:57
#define max(a, b)
Definition: common_magma.h:82
cublasStatus_t magmablasGetKernelStream(magma_queue_t *stream)
#define dC(dev, i, j)
#define dA(dev, i, j)

Here is the call graph for this function:

Here is the caller graph for this function: