MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
cssssm_gpu.cpp File Reference
#include "common_magma.h"
Include dependency graph for cssssm_gpu.cpp:

Go to the source code of this file.

Macros

#define A1T(i, j)   (dA1T + (i)*ldda1 + (j))
 
#define A2T(i, j)   (dA2T + (i)*ldda2 + (j))
 
#define L1(i)   (dL1 + (i)*lddl1 )
 
#define L2(i, j)   (dL2 + (i)*lddl2i + (j)*lddl2j)
 

Functions

magma_int_t magma_cssssm_gpu (char storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, magmaFloatComplex *dA1, magma_int_t ldda1, magmaFloatComplex *dA2, magma_int_t ldda2, magmaFloatComplex *dL1, magma_int_t lddl1, magmaFloatComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info)
 

Macro Definition Documentation

#define A1T (   i,
 
)    (dA1T + (i)*ldda1 + (j))
#define A2T (   i,
 
)    (dA2T + (i)*ldda2 + (j))
#define L1 (   i)    (dL1 + (i)*lddl1 )
#define L2 (   i,
 
)    (dL2 + (i)*lddl2i + (j)*lddl2j)

Function Documentation

magma_int_t magma_cssssm_gpu ( char  storev,
magma_int_t  m1,
magma_int_t  n1,
magma_int_t  m2,
magma_int_t  n2,
magma_int_t  k,
magma_int_t  ib,
magmaFloatComplex *  dA1,
magma_int_t  ldda1,
magmaFloatComplex *  dA2,
magma_int_t  ldda2,
magmaFloatComplex *  dL1,
magma_int_t  lddl1,
magmaFloatComplex *  dL2,
magma_int_t  lddl2,
magma_int_t IPIV,
magma_int_t info 
)

Definition at line 17 of file cssssm_gpu.cpp.

References __func__, A1T, A2T, L1, L2, MAGMA_C_NEG_ONE, MAGMA_C_ONE, magma_cgemm(), magma_ctrmm(), magma_ctrsm(), magma_xerbla(), magmablas_cgetmo_in, magmablas_cgetmo_out, magmablas_cswap(), magmablas_cswapblk(), MagmaLower, MagmaNoTrans, MagmaRight, MagmaTrans, MagmaUnit, max, and min.

24 {
25 /* -- MAGMA (version 1.4.0) --
26  Univ. of Tennessee, Knoxville
27  Univ. of California, Berkeley
28  Univ. of Colorado, Denver
29  August 2013
30 
31  Purpose
32  =======
33  CSSSSM applies the LU factorization update from a complex
34  matrix formed by a lower triangular IB-by-K tile L1 on top of a
35  M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1
36  tile A1 on top of a M2-by-N2 tile A2 (N1 == N2).
37 
38  This is the right-looking Level 2.5 BLAS version of the algorithm.
39 
40  Arguments
41  =========
42  M1 (input) INTEGER
43  The number of rows of the matrix A1. M1 >= 0.
44 
45  N1 (input) INTEGER
46  The number of columns of the matrix A1. N1 >= 0.
47 
48  M2 (input) INTEGER
49  The number of rows of the matrix A2. M2 >= 0.
50 
51  N2 (input) INTEGER
52  The number of columns of the matrix A2. N2 >= 0.
53 
54  K (input) INTEGER
55  The number of columns of the matrix L1 and L2. K >= 0.
56 
57  IB (input) INTEGER
58  The inner-blocking size. IB >= 0.
59 
60  dA1 (input,output) COMPLEX array, dimension(LDDA1, N), on gpu.
61  On entry, the M1-by-N1 tile dA1.
62  On exit, dA1 is updated by the application of dL (dL1 dL2).
63 
64  LDDA1 (input) INTEGER
65  The leading dimension of the array dA1. LDDA1 >= max(1,M1).
66 
67  dA2 (input,output) COMPLEX array, dimension(LDDA2, N) , on gpu.
68  On entry, the M2-by-N2 tile dA2.
69  On exit, dA2 is updated by the application of dL (dL1 dL2).
70 
71  LDDA2 (input) INTEGER
72  The leading dimension of the array dA2. LDDA2 >= max(1,M2).
73 
74  dL1 (input) COMPLEX array, dimension(LDDL1, K), on gpu.
75  The inverse of the IB-by-K lower triangular tile as returned by
76  CTSTRF.
77 
78  LDDL1 (input) INTEGER
79  The leading dimension of the array L1. LDDL1 >= max(1,2*IB).
80 
81  dL2 (input) COMPLEX array, dimension(LDDL2, K)
82  The M2-by-K tile as returned by CTSTRF.
83 
84  LDDL2 (input) INTEGER
85  The leading dimension of the array L2. LDDL2 >= max(1,M2).
86 
87  IPIV (input) INTEGER array on the cpu.
88  The pivot indices array of size K as returned by CTSTRF
89 
90  ===================================================================== */
91 
92 #define A1T(i,j) (dA1T + (i)*ldda1 + (j))
93 #define A2T(i,j) (dA2T + (i)*ldda2 + (j))
94 #define L1(i) (dL1 + (i)*lddl1 )
95 #define L2(i,j) (dL2 + (i)*lddl2i + (j)*lddl2j)
96 
97  magmaFloatComplex c_one = MAGMA_C_ONE;
98  magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
99 
100  int ip, ii, sb;
101  magmaFloatComplex *dA1T, *dA2T;
102  char transL;
103  int lddl2i, lddl2j;
104 
105  /* Check input arguments */
106  *info = 0;
107  if (m1 < 0) {
108  *info = -1;
109  }
110  else if (n1 < 0) {
111  *info = -2;
112  }
113  else if (m2 < 0) {
114  *info = -3;
115  }
116  else if (n2 < 0) {
117  *info = -4;
118  }
119  else if (k < 0) {
120  *info = -5;
121  }
122  else if (ib < 0) {
123  *info = -6;
124  }
125  else if (ldda1 < max(1,m1)) {
126  *info = -8;
127  }
128  else if (ldda2 < max(1,m2)) {
129  *info = -10;
130  }
131  else if (lddl1 < max(1,ib)) {
132  *info = -12;
133  }
134  else if (lddl2 < max(1,m2)) {
135  *info = -14;
136  }
137 
138  if (*info != 0) {
139  magma_xerbla( __func__, -(*info) );
140  return *info;
141  }
142 
143  /* Quick return */
144  if ((m1 == 0) || (n1 == 0) || (m2 == 0) || (n2 == 0) || (k == 0) || (ib == 0))
145  return *info;
146 
147  if ( (storev == 'C') || (storev == 'c') ) {
148  magmablas_cgetmo_in( dA1, dA1T, ldda1, m1, n1 );
149  magmablas_cgetmo_in( dA2, dA2T, ldda2, m2, n2 );
150  transL = MagmaTrans;
151  lddl2i = 1; lddl2j = lddl2;
152  } else {
153  dA1T = dA1;
154  dA2T = dA2;
155  transL = MagmaNoTrans;
156  lddl2i = lddl2; lddl2j = 1;
157  }
158 
159  ip = 0;
160  for( ii=0; ii<k; ii+=ib )
161  {
162  sb = min( k-ii, ib);
163 
164 #ifndef NOSWAPBLK
165  magmablas_cswapblk( 'R', n1,
166  A1T(0, 0), ldda1,
167  A2T(0, 0), ldda2,
168  ii+1, ii+ib, IPIV, 1, m1 );
169 #else
170  {
171  int im;
172  for(i=0; i<ib; i++) {
173  im = IPIV[ip]-1;
174 
175  if (im != (ii+i)) {
176  im = im - m1;
177 
178  assert( (im>=0) && (im<m1) && (im<m2) );
179  magmablas_cswap( n1, A1T(ii+i, 0), 1, A2T(im, 0), 1 );
180  }
181  ip++;
182  }
183  }
184 #endif
185 
186 #ifndef WITHOUTTRTRI
187  /* Lower, Trans, because L1 is not transposed */
189  n1, sb,
190  c_one, L1( ii), lddl1,
191  A1T(ii, 0), ldda1);
192 #else
193  /* Lower, Trans, because L1 is not transposed */
195  n1, sb,
196  c_one, L1( ii), lddl1,
197  A1T(ii, 0), ldda1);
198 #endif
199 
200  /* Second parameter is trans because L2 is not transposed */
201  magma_cgemm( MagmaNoTrans, transL,
202  n2, m2, sb,
203  c_neg_one, A1T(ii, 0), ldda1,
204  L2( 0, ii), lddl2,
205  c_one, A2T(0, 0 ), ldda2 );
206  }
207 
208  if ( (storev == 'C') || (storev == 'c') ) {
209  magmablas_cgetmo_out( dA1, dA1T, ldda1, m1, n1 );
210  magmablas_cgetmo_out( dA2, dA2T, ldda2, m2, n2 );
211  }
212  return *info;
213 }
#define min(a, b)
Definition: common_magma.h:86
#define __func__
Definition: common_magma.h:65
#define A2T(i, j)
#define MAGMA_C_NEG_ONE
Definition: magma.h:156
#define L2(i, j)
void magma_cgemm(magma_trans_t transA, magma_trans_t transB, magma_int_t m, magma_int_t n, magma_int_t k, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_const_ptr dB, magma_int_t lddb, magmaFloatComplex beta, magmaFloatComplex_ptr dC, magma_int_t lddc)
void magma_ctrsm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
#define magmablas_cgetmo_in(dA, dAT, ldda, m, n)
Definition: transpose.h:55
void magmablas_cswapblk(magma_storev_t storev, magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb, magma_int_t i1, magma_int_t i2, const magma_int_t *ipiv, magma_int_t inci, magma_int_t offset)
#define MagmaLower
Definition: magma.h:62
void magma_xerbla(const char *srname, magma_int_t info)
Definition: xerbla.cpp:8
#define MagmaTrans
Definition: magma.h:58
#define MAGMA_C_ONE
Definition: magma.h:154
#define magmablas_cgetmo_out(dA, dAT, ldda, m, n)
Definition: transpose.h:66
#define MagmaRight
Definition: magma.h:69
#define MagmaUnit
Definition: magma.h:66
void magma_ctrmm(magma_side_t side, magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_int_t m, magma_int_t n, magmaFloatComplex alpha, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
#define MagmaNoTrans
Definition: magma.h:57
#define A1T(i, j)
#define max(a, b)
Definition: common_magma.h:82
void magmablas_cswap(magma_int_t n, magmaFloatComplex_ptr dA, magma_int_t ldda, magmaFloatComplex_ptr dB, magma_int_t lddb)
#define L1(i)

Here is the call graph for this function:

Here is the caller graph for this function: