MAGMA
1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
Main Page
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
clarfb_gpu.cpp
Go to the documentation of this file.
1
/*
2
-- MAGMA (version 1.2.0) --
3
Univ. of Tennessee, Knoxville
4
Univ. of California, Berkeley
5
Univ. of Colorado, Denver
6
May 2012
7
8
@author Mark Gates
9
@generated c Thu May 10 22:26:53 2012
10
*/
11
#include "common_magma.h"
12
13
// === Define what BLAS to use ============================================
14
#define PRECISION_c
15
#if (defined(PRECISION_s) || defined(PRECISION_d))
16
// #define magma_cgemm magmablas_cgemm
17
#endif
18
// === End defining what BLAS to use =======================================
19
20
extern
"C"
magma_int_t
21
magma_clarfb_gpu
(
char
side
,
char
trans
,
char
direct,
char
storev,
22
magma_int_t
m,
magma_int_t
n,
magma_int_t
k,
23
cuFloatComplex *
dV
,
magma_int_t
ldv,
24
cuFloatComplex *
dT
,
magma_int_t
ldt,
25
cuFloatComplex *dC,
magma_int_t
ldc,
26
cuFloatComplex *
dwork
,
magma_int_t
ldwork)
27
{
28
/* -- MAGMA (version 1.2.0) --
29
Univ. of Tennessee, Univ. of California Berkeley
30
May 2012
31
32
Purpose
33
=======
34
CLARFB applies a complex block reflector H or its transpose H^H to a
35
COMPLEX m by n matrix C, from the left.
36
37
Arguments
38
=========
39
SIDE (input) CHARACTER
40
= 'L': apply H or H^H from the Left
41
= 'R': apply H or H^H from the Right
42
43
TRANS (input) CHARACTER
44
= 'N': apply H (No transpose)
45
= 'C': apply H^H (Conjugate transpose)
46
47
DIRECT (input) CHARACTER
48
Indicates how H is formed from a product of elementary
49
reflectors
50
= 'F': H = H(1) H(2) . . . H(k) (Forward)
51
= 'B': H = H(k) . . . H(2) H(1) (Backward)
52
53
STOREV (input) CHARACTER
54
Indicates how the vectors which define the elementary
55
reflectors are stored:
56
= 'C': Columnwise
57
= 'R': Rowwise
58
59
M (input) INTEGER
60
The number of rows of the matrix C.
61
62
N (input) INTEGER
63
The number of columns of the matrix C.
64
65
K (input) INTEGER
66
The order of the matrix T (= the number of elementary
67
reflectors whose product defines the block reflector).
68
69
DV (input) COMPLEX array on the GPU, dimension
70
(LDV,K) if STOREV = 'C'
71
(LDV,M) if STOREV = 'R' and SIDE = 'L'
72
(LDV,N) if STOREV = 'R' and SIDE = 'R'
73
The matrix V. See further details.
74
75
LDV (input) INTEGER
76
The leading dimension of the array V.
77
If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
78
if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
79
if STOREV = 'R', LDV >= K.
80
81
DT (input) COMPLEX array on the GPU, dimension (LDT,K)
82
The triangular k by k matrix T in the representation of the
83
block reflector.
84
85
LDT (input) INTEGER
86
The leading dimension of the array T. LDT >= K.
87
88
DC (input/output) COMPLEX array on the GPU, dimension (LDC,N)
89
On entry, the m by n matrix C.
90
On exit, C is overwritten by H*C, or H^H*C, or C*H, or C*H^H.
91
92
LDC (input) INTEGER
93
The leading dimension of the array C. LDA >= max(1,M).
94
95
WORK (workspace) COMPLEX array, dimension (LDWORK,K)
96
97
LDWORK (input) INTEGER
98
The leading dimension of the array WORK.
99
If SIDE == 'L', LDWORK >= max(1,N);
100
if SIDE == 'R', LDWORK >= max(1,M);
101
102
Further Details
103
===============
104
105
The shape of the matrix V and the storage of the vectors which define
106
the H(i) is best illustrated by the following example with n = 5 and
107
k = 3.
108
All elements including 0's and 1's are stored, unlike LAPACK.
109
110
DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
111
112
V = ( 1 0 0 ) V = ( 1 v1 v1 v1 v1 )
113
( v1 1 0 ) ( 0 1 v2 v2 v2 )
114
( v1 v2 1 ) ( 0 0 1 v3 v3 )
115
( v1 v2 v3 )
116
( v1 v2 v3 )
117
118
DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
119
120
V = ( v1 v2 v3 ) V = ( v1 v1 1 0 0 )
121
( v1 v2 v3 ) ( v2 v2 v2 1 0 )
122
( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
123
( 0 1 v3 )
124
( 0 0 1 )
125
126
=================================================================== */
127
128
cuFloatComplex c_zero =
MAGMA_C_ZERO
;
129
cuFloatComplex c_one =
MAGMA_C_ONE
;
130
cuFloatComplex c_neg_one =
MAGMA_C_NEG_ONE
;
131
132
/* Function Body */
133
if
(m <= 0 || n <= 0) {
134
return
MAGMA_SUCCESS
;
135
}
136
137
// opposite of trans
138
char
transt;
139
if
(trans ==
'N'
|| trans ==
'n'
)
140
transt =
MagmaConjTrans
;
141
else
142
transt =
MagmaNoTrans
;
143
144
// whether T is upper or lower triangular
145
char
uplo
;
146
if
(direct ==
'F'
|| direct ==
'f'
)
147
uplo =
MagmaUpper
;
148
else
149
uplo =
MagmaLower
;
150
151
// whether V is stored transposed or not
152
char
notransV, transV;
153
if
(storev ==
'C'
|| storev ==
'c'
) {
154
notransV =
MagmaNoTrans
;
155
transV =
MagmaConjTrans
;
156
}
157
else
{
158
notransV =
MagmaConjTrans
;
159
transV =
MagmaNoTrans
;
160
}
161
162
if
( side ==
'l'
|| side ==
'L'
) {
163
// Form H C or H^H C
164
// Comments assume H C. When forming H^H C, T gets transposed via transt.
165
166
// W = C^H V
167
magma_cgemm
(
MagmaConjTrans
, notransV,
168
n, k, m,
169
c_one, dC, ldc,
170
dV, ldv,
171
c_zero, dwork, ldwork);
172
173
// W = W T^H = C^H V T^H
174
magma_ctrmm
(
MagmaRight
, uplo, transt,
MagmaNonUnit
,
175
n, k,
176
c_one, dT, ldt,
177
dwork, ldwork);
178
179
// C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C
180
magma_cgemm
( notransV,
MagmaConjTrans
,
181
m, n, k,
182
c_neg_one, dV, ldv,
183
dwork, ldwork,
184
c_one, dC, ldc);
185
}
186
else
{
187
// Form C H or C H^H
188
// Comments assume C H. When forming C H^H, T gets transposed via trans.
189
190
// W = C V
191
magma_cgemm
(
MagmaNoTrans
, notransV,
192
m, k, n,
193
c_one, dC, ldc,
194
dV, ldv,
195
c_zero, dwork, ldwork);
196
197
// W = W T = C V T
198
magma_ctrmm
(
MagmaRight
, uplo, trans,
MagmaNonUnit
,
199
m, k,
200
c_one, dT, ldt,
201
dwork, ldwork);
202
203
// C = C - W V^H = C - C V T V^H = C (I - V T V^H) = C H
204
magma_cgemm
(
MagmaNoTrans
, transV,
205
m, n, k,
206
c_neg_one, dwork, ldwork,
207
dV, ldv,
208
c_one, dC, ldc);
209
}
210
211
return
MAGMA_SUCCESS
;
212
}
/* magma_clarfb */
magma-1.2.0
src
clarfb_gpu.cpp
Generated on Mon May 21 2012 16:42:22 for MAGMA by
1.8.1