MAGMA  1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
clarfb_gpu.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.2.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  May 2012
7 
8  @author Mark Gates
9  @generated c Thu May 10 22:26:53 2012
10 */
11 #include "common_magma.h"
12 
13 // === Define what BLAS to use ============================================
14 #define PRECISION_c
15 #if (defined(PRECISION_s) || defined(PRECISION_d))
16 // #define magma_cgemm magmablas_cgemm
17 #endif
18 // === End defining what BLAS to use =======================================
19 
20 extern "C" magma_int_t
21 magma_clarfb_gpu( char side, char trans, char direct, char storev,
23  cuFloatComplex *dV, magma_int_t ldv,
24  cuFloatComplex *dT, magma_int_t ldt,
25  cuFloatComplex *dC, magma_int_t ldc,
26  cuFloatComplex *dwork, magma_int_t ldwork)
27 {
28 /* -- MAGMA (version 1.2.0) --
29  Univ. of Tennessee, Univ. of California Berkeley
30  May 2012
31 
32  Purpose
33  =======
34  CLARFB applies a complex block reflector H or its transpose H^H to a
35  COMPLEX m by n matrix C, from the left.
36 
37  Arguments
38  =========
39  SIDE (input) CHARACTER
40  = 'L': apply H or H^H from the Left
41  = 'R': apply H or H^H from the Right
42 
43  TRANS (input) CHARACTER
44  = 'N': apply H (No transpose)
45  = 'C': apply H^H (Conjugate transpose)
46 
47  DIRECT (input) CHARACTER
48  Indicates how H is formed from a product of elementary
49  reflectors
50  = 'F': H = H(1) H(2) . . . H(k) (Forward)
51  = 'B': H = H(k) . . . H(2) H(1) (Backward)
52 
53  STOREV (input) CHARACTER
54  Indicates how the vectors which define the elementary
55  reflectors are stored:
56  = 'C': Columnwise
57  = 'R': Rowwise
58 
59  M (input) INTEGER
60  The number of rows of the matrix C.
61 
62  N (input) INTEGER
63  The number of columns of the matrix C.
64 
65  K (input) INTEGER
66  The order of the matrix T (= the number of elementary
67  reflectors whose product defines the block reflector).
68 
69  DV (input) COMPLEX array on the GPU, dimension
70  (LDV,K) if STOREV = 'C'
71  (LDV,M) if STOREV = 'R' and SIDE = 'L'
72  (LDV,N) if STOREV = 'R' and SIDE = 'R'
73  The matrix V. See further details.
74 
75  LDV (input) INTEGER
76  The leading dimension of the array V.
77  If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
78  if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
79  if STOREV = 'R', LDV >= K.
80 
81  DT (input) COMPLEX array on the GPU, dimension (LDT,K)
82  The triangular k by k matrix T in the representation of the
83  block reflector.
84 
85  LDT (input) INTEGER
86  The leading dimension of the array T. LDT >= K.
87 
88  DC (input/output) COMPLEX array on the GPU, dimension (LDC,N)
89  On entry, the m by n matrix C.
90  On exit, C is overwritten by H*C, or H^H*C, or C*H, or C*H^H.
91 
92  LDC (input) INTEGER
93  The leading dimension of the array C. LDA >= max(1,M).
94 
95  WORK (workspace) COMPLEX array, dimension (LDWORK,K)
96 
97  LDWORK (input) INTEGER
98  The leading dimension of the array WORK.
99  If SIDE == 'L', LDWORK >= max(1,N);
100  if SIDE == 'R', LDWORK >= max(1,M);
101 
102  Further Details
103  ===============
104 
105  The shape of the matrix V and the storage of the vectors which define
106  the H(i) is best illustrated by the following example with n = 5 and
107  k = 3.
108  All elements including 0's and 1's are stored, unlike LAPACK.
109 
110  DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
111 
112  V = ( 1 0 0 ) V = ( 1 v1 v1 v1 v1 )
113  ( v1 1 0 ) ( 0 1 v2 v2 v2 )
114  ( v1 v2 1 ) ( 0 0 1 v3 v3 )
115  ( v1 v2 v3 )
116  ( v1 v2 v3 )
117 
118  DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
119 
120  V = ( v1 v2 v3 ) V = ( v1 v1 1 0 0 )
121  ( v1 v2 v3 ) ( v2 v2 v2 1 0 )
122  ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
123  ( 0 1 v3 )
124  ( 0 0 1 )
125 
126  =================================================================== */
127 
128  cuFloatComplex c_zero = MAGMA_C_ZERO;
129  cuFloatComplex c_one = MAGMA_C_ONE;
130  cuFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
131 
132  /* Function Body */
133  if (m <= 0 || n <= 0) {
134  return MAGMA_SUCCESS;
135  }
136 
137  // opposite of trans
138  char transt;
139  if (trans == 'N' || trans == 'n')
140  transt = MagmaConjTrans;
141  else
142  transt = MagmaNoTrans;
143 
144  // whether T is upper or lower triangular
145  char uplo;
146  if (direct == 'F' || direct == 'f')
147  uplo = MagmaUpper;
148  else
149  uplo = MagmaLower;
150 
151  // whether V is stored transposed or not
152  char notransV, transV;
153  if (storev == 'C' || storev == 'c') {
154  notransV = MagmaNoTrans;
155  transV = MagmaConjTrans;
156  }
157  else {
158  notransV = MagmaConjTrans;
159  transV = MagmaNoTrans;
160  }
161 
162  if ( side == 'l' || side == 'L' ) {
163  // Form H C or H^H C
164  // Comments assume H C. When forming H^H C, T gets transposed via transt.
165 
166  // W = C^H V
167  magma_cgemm( MagmaConjTrans, notransV,
168  n, k, m,
169  c_one, dC, ldc,
170  dV, ldv,
171  c_zero, dwork, ldwork);
172 
173  // W = W T^H = C^H V T^H
174  magma_ctrmm( MagmaRight, uplo, transt, MagmaNonUnit,
175  n, k,
176  c_one, dT, ldt,
177  dwork, ldwork);
178 
179  // C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C
180  magma_cgemm( notransV, MagmaConjTrans,
181  m, n, k,
182  c_neg_one, dV, ldv,
183  dwork, ldwork,
184  c_one, dC, ldc);
185  }
186  else {
187  // Form C H or C H^H
188  // Comments assume C H. When forming C H^H, T gets transposed via trans.
189 
190  // W = C V
191  magma_cgemm( MagmaNoTrans, notransV,
192  m, k, n,
193  c_one, dC, ldc,
194  dV, ldv,
195  c_zero, dwork, ldwork);
196 
197  // W = W T = C V T
198  magma_ctrmm( MagmaRight, uplo, trans, MagmaNonUnit,
199  m, k,
200  c_one, dT, ldt,
201  dwork, ldwork);
202 
203  // C = C - W V^H = C - C V T V^H = C (I - V T V^H) = C H
204  magma_cgemm( MagmaNoTrans, transV,
205  m, n, k,
206  c_neg_one, dwork, ldwork,
207  dV, ldv,
208  c_one, dC, ldc);
209  }
210 
211  return MAGMA_SUCCESS;
212 } /* magma_clarfb */