MAGMA
1.2.0
MatrixAlgebraonGPUandMulticoreArchitectures
Main Page
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
sgeqrs3_gpu.cpp
Go to the documentation of this file.
1
/*
2
-- MAGMA (version 1.2.0) --
3
Univ. of Tennessee, Knoxville
4
Univ. of California, Berkeley
5
Univ. of Colorado, Denver
6
May 2012
7
8
@generated s Thu May 10 22:26:52 2012
9
10
*/
11
#include "common_magma.h"
12
13
extern
"C"
magma_int_t
14
magma_sgeqrs3_gpu
(
magma_int_t
m,
magma_int_t
n,
magma_int_t
nrhs,
15
float
*
dA
,
magma_int_t
ldda,
16
float
*tau,
float
*
dT
,
17
float
*
dB
,
magma_int_t
lddb,
18
float
*hwork,
magma_int_t
lwork,
19
magma_int_t
*info)
20
{
21
/* -- MAGMA (version 1.2.0) --
22
Univ. of Tennessee, Knoxville
23
Univ. of California, Berkeley
24
Univ. of Colorado, Denver
25
May 2012
26
27
Purpose
28
=======
29
Solves the least squares problem
30
min || A*X - C ||
31
using the QR factorization A = Q*R computed by SGEQRF3_GPU.
32
33
Arguments
34
=========
35
M (input) INTEGER
36
The number of rows of the matrix A. M >= 0.
37
38
N (input) INTEGER
39
The number of columns of the matrix A. M >= N >= 0.
40
41
NRHS (input) INTEGER
42
The number of columns of the matrix C. NRHS >= 0.
43
44
A (input) REAL array on the GPU, dimension (LDDA,N)
45
The i-th column must contain the vector which defines the
46
elementary reflector H(i), for i = 1,2,...,n, as returned by
47
SGEQRF3_GPU in the first n columns of its array argument A.
48
49
LDDA (input) INTEGER
50
The leading dimension of the array A, LDDA >= M.
51
52
TAU (input) REAL array, dimension (N)
53
TAU(i) must contain the scalar factor of the elementary
54
reflector H(i), as returned by MAGMA_SGEQRF_GPU.
55
56
DB (input/output) REAL array on the GPU, dimension (LDDB,NRHS)
57
On entry, the M-by-NRHS matrix C.
58
On exit, the N-by-NRHS solution matrix X.
59
60
DT (input) REAL array that is the output (the 6th argument)
61
of magma_sgeqrf_gpu of size
62
2*MIN(M, N)*NB + ((N+31)/32*32 )* MAX(NB, NRHS).
63
The array starts with a block of size MIN(M,N)*NB that stores
64
the triangular T matrices used in the QR factorization,
65
followed by MIN(M,N)*NB block storing the diagonal block
66
matrices for the R matrix, followed by work space of size
67
((N+31)/32*32 )* MAX(NB, NRHS).
68
69
LDDB (input) INTEGER
70
The leading dimension of the array DB. LDDB >= M.
71
72
HWORK (workspace/output) REAL array, dimension (LWORK)
73
On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
74
75
LWORK (input) INTEGER
76
The dimension of the array WORK, LWORK >= max(1,NRHS).
77
For optimum performance LWORK >= (M-N+NB)*(NRHS + 2*NB), where
78
NB is the blocksize given by magma_get_sgeqrf_nb( M ).
79
80
If LWORK = -1, then a workspace query is assumed; the routine
81
only calculates the optimal size of the HWORK array, returns
82
this value as the first entry of the WORK array.
83
84
INFO (output) INTEGER
85
= 0: successful exit
86
< 0: if INFO = -i, the i-th argument had an illegal value
87
===================================================================== */
88
89
#define a_ref(a_1,a_2) (dA+(a_2)*(ldda) + (a_1))
90
#define d_ref(a_1) (dT+(lddwork+(a_1))*nb)
91
92
float
c_one =
MAGMA_S_ONE
;
93
magma_int_t
k, lddwork;
94
95
magma_int_t
nb =
magma_get_sgeqrf_nb
(m);
96
magma_int_t
lwkopt = (m-n+nb)*(nrhs+2*nb);
97
long
int
lquery = (lwork == -1);
98
99
hwork[0] =
MAGMA_S_MAKE
( (
float
)lwkopt, 0. );
100
101
*info = 0;
102
if
(m < 0)
103
*info = -1;
104
else
if
(n < 0 || m < n)
105
*info = -2;
106
else
if
(nrhs < 0)
107
*info = -3;
108
else
if
(ldda <
max
(1,m))
109
*info = -5;
110
else
if
(lddb <
max
(1,m))
111
*info = -8;
112
else
if
(lwork < lwkopt && ! lquery)
113
*info = -10;
114
115
if
(*info != 0) {
116
magma_xerbla
(
__func__
, -(*info) );
117
return
*info;
118
}
119
else
if
(lquery)
120
return
*info;
121
122
k =
min
(m,n);
123
if
(k == 0) {
124
hwork[0] = c_one;
125
return
*info;
126
}
127
lddwork= k;
128
129
/* B := Q' * B */
130
magma_sormqr_gpu
(
MagmaLeft
,
MagmaTrans
,
131
m, nrhs, n,
132
a_ref
(0,0), ldda, tau,
133
dB, lddb, hwork, lwork, dT, nb, info );
134
if
( *info != 0 ) {
135
return
*info;
136
}
137
138
/* Solve R*X = B(1:n,:)
139
1. Move the block diagonal submatrices from d_ref to R
140
2. Solve
141
3. Restore the data format moving data from R back to d_ref
142
*/
143
magmablas_sswapdblk
(k, nb,
a_ref
(0,0), ldda, 1,
d_ref
(0), nb, 0);
144
if
( nrhs == 1 ) {
145
magma_strsv
(
MagmaUpper
,
MagmaNoTrans
,
MagmaNonUnit
,
146
n,
a_ref
(0,0), ldda, dB, 1);
147
}
else
{
148
magma_strsm
(
MagmaLeft
,
MagmaUpper
,
MagmaNoTrans
,
MagmaNonUnit
,
149
n, nrhs, c_one,
a_ref
(0,0), ldda, dB, lddb);
150
}
151
magmablas_sswapdblk
(k, nb,
d_ref
(0), nb, 0,
a_ref
(0,0), ldda, 1);
152
153
return
*info;
154
}
155
156
#undef a_ref
157
#undef d_ref
magma-1.2.0
src
sgeqrs3_gpu.cpp
Generated on Mon May 21 2012 16:42:24 for MAGMA by
1.8.1