MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
flops.h
Go to the documentation of this file.
1 
12 /*
13  * This file provide the flops formula for all Level 3 BLAS and some
14  * Lapack routines. Each macro uses the same size parameters as the
15  * function associated and provide one formula for additions and one
16  * for multiplications. Example to use these macros:
17  *
18  * FLOPS_ZGEMM( m, n, k )
19  *
20  * All the formula are reported in the LAPACK Lawn 41:
21  * http://www.netlib.org/lapack/lawns/lawn41.ps
22  */
23 #ifndef MAGMA_FLOPS_H
24 #define MAGMA_FLOPS_H
25 
26 /************************************************************************
27  * Generic formula coming from LAWN 41
28  ***********************************************************************/
29 
30 /*
31  * Level 2 BLAS
32  */
33 #define FMULS_GEMV(m_, n_) ((m_) * (n_) + 2. * (m_))
34 #define FADDS_GEMV(m_, n_) ((m_) * (n_) )
35 
36 #define FMULS_SYMV(n_) FMULS_GEMV( (n_), (n_) )
37 #define FADDS_SYMV(n_) FADDS_GEMV( (n_), (n_) )
38 #define FMULS_HEMV FMULS_SYMV
39 #define FADDS_HEMV FADDS_SYMV
40 
41 /*
42  * Level 3 BLAS
43  */
44 #define FMULS_GEMM(m_, n_, k_) ((m_) * (n_) * (k_))
45 #define FADDS_GEMM(m_, n_, k_) ((m_) * (n_) * (k_))
46 
47 #define FMULS_SYMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FMULS_GEMM((m_), (m_), (n_)) : FMULS_GEMM((m_), (n_), (n_)) )
48 #define FADDS_SYMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FADDS_GEMM((m_), (m_), (n_)) : FADDS_GEMM((m_), (n_), (n_)) )
49 #define FMULS_HEMM FMULS_SYMM
50 #define FADDS_HEMM FADDS_SYMM
51 
52 #define FMULS_SYRK(k_, n_) (0.5 * (k_) * (n_) * ((n_)+1))
53 #define FADDS_SYRK(k_, n_) (0.5 * (k_) * (n_) * ((n_)+1))
54 #define FMULS_HERK FMULS_SYRK
55 #define FADDS_HERK FADDS_SYRK
56 
57 #define FMULS_SYR2K(k_, n_) ((k_) * (n_) * (n_) )
58 #define FADDS_SYR2K(k_, n_) ((k_) * (n_) * (n_) + (n_))
59 #define FMULS_HER2K FMULS_SYR2K
60 #define FADDS_HER2K FADDS_SYR2K
61 
62 #define FMULS_TRMM_2(m_, n_) (0.5 * (n_) * (m_) * ((m_)+1))
63 #define FADDS_TRMM_2(m_, n_) (0.5 * (n_) * (m_) * ((m_)-1))
64 
65 
66 #define FMULS_TRMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FMULS_TRMM_2((m_), (n_)) : FMULS_TRMM_2((n_), (m_)) )
67 #define FADDS_TRMM(side_, m_, n_) ( ( (side_) == MagmaLeft ) ? FADDS_TRMM_2((m_), (n_)) : FADDS_TRMM_2((n_), (m_)) )
68 
69 #define FMULS_TRSM FMULS_TRMM
70 #define FADDS_TRSM FMULS_TRMM
71 
72 /*
73  * Lapack
74  */
75 #define FMULS_GETRF(m_, n_) ( ((m_) < (n_)) \
76  ? (0.5 * (m_) * ((m_) * ((n_) - (1./3.) * (m_) - 1. ) + (n_)) + (2. / 3.) * (m_)) \
77  : (0.5 * (n_) * ((n_) * ((m_) - (1./3.) * (n_) - 1. ) + (m_)) + (2. / 3.) * (n_)) )
78 #define FADDS_GETRF(m_, n_) ( ((m_) < (n_)) \
79  ? (0.5 * (m_) * ((m_) * ((n_) - (1./3.) * (m_) ) - (n_)) + (1. / 6.) * (m_)) \
80  : (0.5 * (n_) * ((n_) * ((m_) - (1./3.) * (n_) ) - (m_)) + (1. / 6.) * (n_)) )
81 
82 #define FMULS_GETRI(n_) ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) + 0.5)) )
83 #define FADDS_GETRI(n_) ( (n_) * ((5. / 6.) + (n_) * ((2. / 3.) * (n_) - 1.5)) )
84 
85 #define FMULS_GETRS(n_, nrhs_) ((nrhs_) * (n_) * (n_) )
86 #define FADDS_GETRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) - 1 ))
87 
88 #define FMULS_POTRF(n_) ((n_) * (((1. / 6.) * (n_) + 0.5) * (n_) + (1. / 3.)))
89 #define FADDS_POTRF(n_) ((n_) * (((1. / 6.) * (n_) ) * (n_) - (1. / 6.)))
90 
91 #define FMULS_POTRI(n_) ( (n_) * ((2. / 3.) + (n_) * ((1. / 3.) * (n_) + 1. )) )
92 #define FADDS_POTRI(n_) ( (n_) * ((1. / 6.) + (n_) * ((1. / 3.) * (n_) - 0.5)) )
93 
94 #define FMULS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) + 1 ))
95 #define FADDS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) - 1 ))
96 
97 //SPBTRF
98 //SPBTRS
99 //SSYTRF
100 //SSYTRI
101 //SSYTRS
102 
103 #define FMULS_GEQRF(m_, n_) (((m_) > (n_)) \
104  ? ((n_) * ((n_) * ( 0.5-(1./3.) * (n_) + (m_)) + (m_) + 23. / 6.)) \
105  : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) + 2.*(n_) + 23. / 6.)) )
106 #define FADDS_GEQRF(m_, n_) (((m_) > (n_)) \
107  ? ((n_) * ((n_) * ( 0.5-(1./3.) * (n_) + (m_)) + 5. / 6.)) \
108  : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) + (n_) + 5. / 6.)) )
109 
110 #define FMULS_GEQRT(m_, n_) (0.5 * (m_)*(n_))
111 #define FADDS_GEQRT(m_, n_) (0.5 * (m_)*(n_))
112 
113 #define FMULS_GEQLF(m_, n_) FMULS_GEQRF(m_, n_)
114 #define FADDS_GEQLF(m_, n_) FADDS_GEQRF(m_, n_)
115 
116 #define FMULS_GERQF(m_, n_) (((m_) > (n_)) \
117  ? ((n_) * ((n_) * ( 0.5-(1./3.) * (n_) + (m_)) + (m_) + 29. / 6.)) \
118  : ((m_) * ((m_) * ( -0.5-(1./3.) * (m_) + (n_)) + 2.*(n_) + 29. / 6.)) )
119 #define FADDS_GERQF(m_, n_) (((m_) > (n_)) \
120  ? ((n_) * ((n_) * ( -0.5-(1./3.) * (n_) + (m_)) + (m_) + 5. / 6.)) \
121  : ((m_) * ((m_) * ( 0.5-(1./3.) * (m_) + (n_)) + + 5. / 6.)) )
122 
123 #define FMULS_GELQF(m_, n_) FMULS_GERQF(m_, n_)
124 #define FADDS_GELQF(m_, n_) FADDS_GERQF(m_, n_)
125 
126 #define FMULS_UNGQR(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + 2. * (n_) - 5./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) - 1.)))
127 #define FADDS_UNGQR(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (n_) - (m_) + 1./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) )))
128 #define FMULS_UNGQL FMULS_UNGQR
129 #define FMULS_ORGQR FMULS_UNGQR
130 #define FMULS_ORGQL FMULS_UNGQR
131 #define FADDS_UNGQL FADDS_UNGQR
132 #define FADDS_ORGQR FADDS_UNGQR
133 #define FADDS_ORGQL FADDS_UNGQR
134 
135 #define FMULS_UNGRQ(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (m_) + (n_) - 2./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) - 1.)))
136 #define FADDS_UNGRQ(m_, n_, k_) ((k_) * (2.* (m_) * (n_) + (m_) - (n_) + 1./3. + (k_) * ( 2./3. * (k_) - ((m_) + (n_)) )))
137 #define FMULS_UNGLQ FMULS_UNGRQ
138 #define FMULS_ORGRQ FMULS_UNGRQ
139 #define FMULS_ORGLQ FMULS_UNGRQ
140 #define FADDS_UNGLQ FADDS_UNGRQ
141 #define FADDS_ORGRQ FADDS_UNGRQ
142 #define FADDS_ORGLQ FADDS_UNGRQ
143 
144 #define FMULS_GEQRS(m_, n_, nrhs_) ((nrhs_) * ((n_) * ( 2.* (m_) - 0.5 * (n_) + 2.5)))
145 #define FADDS_GEQRS(m_, n_, nrhs_) ((nrhs_) * ((n_) * ( 2.* (m_) - 0.5 * (n_) + 0.5)))
146 
147 #define FMULS_UNMQR(m_, n_, k_, side_) (( (side_) == MagmaLeft ) \
148  ? (2.*(n_)*(m_)*(k_) - (n_)*(k_)*(k_) + 2.*(n_)*(k_)) \
149  : (2.*(n_)*(m_)*(k_) - (m_)*(k_)*(k_) + (m_)*(k_) + (n_)*(k_) - 0.5*(k_)*(k_) + 0.5*(k_)))
150 #define FADDS_UNMQR(m_, n_, k_, side_) (( ((side_)) == MagmaLeft ) \
151  ? (2.*(n_)*(m_)*(k_) - (n_)*(k_)*(k_) + (n_)*(k_)) \
152  : (2.*(n_)*(m_)*(k_) - (m_)*(k_)*(k_) + (m_)*(k_)))
153 
154 #define FMULS_TRTRI(n_) ((n_) * ((n_) * ( 1./6. * (n_) + 0.5 ) + 1./3.))
155 #define FADDS_TRTRI(n_) ((n_) * ((n_) * ( 1./6. * (n_) - 0.5 ) + 1./3.))
156 
157 #define FMULS_GEHRD(n_) ( (n_) * ((n_) * (5./3. *(n_) + 0.5) - 7./6.) - 13. )
158 #define FADDS_GEHRD(n_) ( (n_) * ((n_) * (5./3. *(n_) - 1. ) - 2./3.) - 8. )
159 
160 #define FMULS_SYTRD(n_) ( (n_) * ( (n_) * ( 2./3. * (n_) + 2.5 ) - 1./6. ) - 15.)
161 #define FADDS_SYTRD(n_) ( (n_) * ( (n_) * ( 2./3. * (n_) + 1. ) - 8./3. ) - 4.)
162 #define FMULS_HETRD FMULS_SYTRD
163 #define FADDS_HETRD FADDS_SYTRD
164 
165 #define FMULS_GEBRD(m_, n_) ( ((m_) >= (n_)) \
166  ? ((n_) * ((n_) * (2. * (m_) - 2./3. * (n_) + 2. ) + 20./3.)) \
167  : ((m_) * ((m_) * (2. * (n_) - 2./3. * (m_) + 2. ) + 20./3.)) )
168 #define FADDS_GEBRD(m_, n_) ( ((m_) >= (n_)) \
169  ? ((n_) * ((n_) * (2. * (m_) - 2./3. * (n_) + 1. ) - (m_) + 5./3.)) \
170  : ((m_) * ((m_) * (2. * (n_) - 2./3. * (m_) + 1. ) - (n_) + 5./3.)) )
171 
172 #define FMULS_LARFG(n_) (2*n_)
173 #define FADDS_LARFG(n_) ( n_)
174 
175 
176 /*******************************************************************************
177  * Users functions
178  ******************************************************************************/
179 
180 /*
181  * Level 2 BLAS
182  */
183 #define FLOPS_ZGEMV(m_, n_) (6. * FMULS_GEMV((double)(m_), (double)(n_)) + 2.0 * FADDS_GEMV((double)(m_), (double)(n_)) )
184 #define FLOPS_CGEMV(m_, n_) (6. * FMULS_GEMV((double)(m_), (double)(n_)) + 2.0 * FADDS_GEMV((double)(m_), (double)(n_)) )
185 #define FLOPS_DGEMV(m_, n_) ( FMULS_GEMV((double)(m_), (double)(n_)) + FADDS_GEMV((double)(m_), (double)(n_)) )
186 #define FLOPS_SGEMV(m_, n_) ( FMULS_GEMV((double)(m_), (double)(n_)) + FADDS_GEMV((double)(m_), (double)(n_)) )
187 
188 #define FLOPS_ZHEMV(n_) (6. * FMULS_HEMV((double)(n_)) + 2.0 * FADDS_HEMV((double)(n_)) )
189 #define FLOPS_CHEMV(n_) (6. * FMULS_HEMV((double)(n_)) + 2.0 * FADDS_HEMV((double)(n_)) )
190 
191 #define FLOPS_ZSYMV(n_) (6. * FMULS_SYMV((double)(n_)) + 2.0 * FADDS_SYMV((double)(n_)) )
192 #define FLOPS_CSYMV(n_) (6. * FMULS_SYMV((double)(n_)) + 2.0 * FADDS_SYMV((double)(n_)) )
193 #define FLOPS_DSYMV(n_) ( FMULS_SYMV((double)(n_)) + FADDS_SYMV((double)(n_)) )
194 #define FLOPS_SSYMV(n_) ( FMULS_SYMV((double)(n_)) + FADDS_SYMV((double)(n_)) )
195 
196 /*
197  * Level 3 BLAS
198  */
199 #define FLOPS_ZGEMM(m_, n_, k_) (6. * FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
200 #define FLOPS_CGEMM(m_, n_, k_) (6. * FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
201 #define FLOPS_DGEMM(m_, n_, k_) ( FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
202 #define FLOPS_SGEMM(m_, n_, k_) ( FMULS_GEMM((double)(m_), (double)(n_), (double)(k_)) + FADDS_GEMM((double)(m_), (double)(n_), (double)(k_)) )
203 
204 #define FLOPS_ZHEMM(side_, m_, n_) (6. * FMULS_HEMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_HEMM(side_, (double)(m_), (double)(n_)) )
205 #define FLOPS_CHEMM(side_, m_, n_) (6. * FMULS_HEMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_HEMM(side_, (double)(m_), (double)(n_)) )
206 
207 #define FLOPS_ZSYMM(side_, m_, n_) (6. * FMULS_SYMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
208 #define FLOPS_CSYMM(side_, m_, n_) (6. * FMULS_SYMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
209 #define FLOPS_DSYMM(side_, m_, n_) ( FMULS_SYMM(side_, (double)(m_), (double)(n_)) + FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
210 #define FLOPS_SSYMM(side_, m_, n_) ( FMULS_SYMM(side_, (double)(m_), (double)(n_)) + FADDS_SYMM(side_, (double)(m_), (double)(n_)) )
211 
212 #define FLOPS_ZHERK(k_, n_) (6. * FMULS_HERK((double)(k_), (double)(n_)) + 2.0 * FADDS_HERK((double)(k_), (double)(n_)) )
213 #define FLOPS_CHERK(k_, n_) (6. * FMULS_HERK((double)(k_), (double)(n_)) + 2.0 * FADDS_HERK((double)(k_), (double)(n_)) )
214 
215 #define FLOPS_ZSYRK(k_, n_) (6. * FMULS_SYRK((double)(k_), (double)(n_)) + 2.0 * FADDS_SYRK((double)(k_), (double)(n_)) )
216 #define FLOPS_CSYRK(k_, n_) (6. * FMULS_SYRK((double)(k_), (double)(n_)) + 2.0 * FADDS_SYRK((double)(k_), (double)(n_)) )
217 #define FLOPS_DSYRK(k_, n_) ( FMULS_SYRK((double)(k_), (double)(n_)) + FADDS_SYRK((double)(k_), (double)(n_)) )
218 #define FLOPS_SSYRK(k_, n_) ( FMULS_SYRK((double)(k_), (double)(n_)) + FADDS_SYRK((double)(k_), (double)(n_)) )
219 
220 #define FLOPS_ZHER2K(k_, n_) (6. * FMULS_HER2K((double)(k_), (double)(n_)) + 2.0 * FADDS_HER2K((double)(k_), (double)(n_)) )
221 #define FLOPS_CHER2K(k_, n_) (6. * FMULS_HER2K((double)(k_), (double)(n_)) + 2.0 * FADDS_HER2K((double)(k_), (double)(n_)) )
222 
223 #define FLOPS_ZSYR2K(k_, n_) (6. * FMULS_SYR2K((double)(k_), (double)(n_)) + 2.0 * FADDS_SYR2K((double)(k_), (double)(n_)) )
224 #define FLOPS_CSYR2K(k_, n_) (6. * FMULS_SYR2K((double)(k_), (double)(n_)) + 2.0 * FADDS_SYR2K((double)(k_), (double)(n_)) )
225 #define FLOPS_DSYR2K(k_, n_) ( FMULS_SYR2K((double)(k_), (double)(n_)) + FADDS_SYR2K((double)(k_), (double)(n_)) )
226 #define FLOPS_SSYR2K(k_, n_) ( FMULS_SYR2K((double)(k_), (double)(n_)) + FADDS_SYR2K((double)(k_), (double)(n_)) )
227 
228 #define FLOPS_ZTRMM(side_, m_, n_) (6. * FMULS_TRMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
229 #define FLOPS_CTRMM(side_, m_, n_) (6. * FMULS_TRMM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
230 #define FLOPS_DTRMM(side_, m_, n_) ( FMULS_TRMM(side_, (double)(m_), (double)(n_)) + FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
231 #define FLOPS_STRMM(side_, m_, n_) ( FMULS_TRMM(side_, (double)(m_), (double)(n_)) + FADDS_TRMM(side_, (double)(m_), (double)(n_)) )
232 
233 #define FLOPS_ZTRSM(side_, m_, n_) (6. * FMULS_TRSM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
234 #define FLOPS_CTRSM(side_, m_, n_) (6. * FMULS_TRSM(side_, (double)(m_), (double)(n_)) + 2.0 * FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
235 #define FLOPS_DTRSM(side_, m_, n_) ( FMULS_TRSM(side_, (double)(m_), (double)(n_)) + FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
236 #define FLOPS_STRSM(side_, m_, n_) ( FMULS_TRSM(side_, (double)(m_), (double)(n_)) + FADDS_TRSM(side_, (double)(m_), (double)(n_)) )
237 
238 /*
239  * Lapack
240  */
241 #define FLOPS_ZGETRF(m_, n_) (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) )
242 #define FLOPS_CGETRF(m_, n_) (6. * FMULS_GETRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GETRF((double)(m_), (double)(n_)) )
243 #define FLOPS_DGETRF(m_, n_) ( FMULS_GETRF((double)(m_), (double)(n_)) + FADDS_GETRF((double)(m_), (double)(n_)) )
244 #define FLOPS_SGETRF(m_, n_) ( FMULS_GETRF((double)(m_), (double)(n_)) + FADDS_GETRF((double)(m_), (double)(n_)) )
245 
246 #define FLOPS_ZGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) )
247 #define FLOPS_CGETRI(n_) (6. * FMULS_GETRI((double)(n_)) + 2.0 * FADDS_GETRI((double)(n_)) )
248 #define FLOPS_DGETRI(n_) ( FMULS_GETRI((double)(n_)) + FADDS_GETRI((double)(n_)) )
249 #define FLOPS_SGETRI(n_) ( FMULS_GETRI((double)(n_)) + FADDS_GETRI((double)(n_)) )
250 
251 #define FLOPS_ZGETRS(n_, nrhs_) (6. * FMULS_GETRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GETRS((double)(n_), (double)(nrhs_)) )
252 #define FLOPS_CGETRS(n_, nrhs_) (6. * FMULS_GETRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GETRS((double)(n_), (double)(nrhs_)) )
253 #define FLOPS_DGETRS(n_, nrhs_) ( FMULS_GETRS((double)(n_), (double)(nrhs_)) + FADDS_GETRS((double)(n_), (double)(nrhs_)) )
254 #define FLOPS_SGETRS(n_, nrhs_) ( FMULS_GETRS((double)(n_), (double)(nrhs_)) + FADDS_GETRS((double)(n_), (double)(nrhs_)) )
255 
256 #define FLOPS_ZPOTRF(n_) (6. * FMULS_POTRF((double)(n_)) + 2.0 * FADDS_POTRF((double)(n_)) )
257 #define FLOPS_CPOTRF(n_) (6. * FMULS_POTRF((double)(n_)) + 2.0 * FADDS_POTRF((double)(n_)) )
258 #define FLOPS_DPOTRF(n_) ( FMULS_POTRF((double)(n_)) + FADDS_POTRF((double)(n_)) )
259 #define FLOPS_SPOTRF(n_) ( FMULS_POTRF((double)(n_)) + FADDS_POTRF((double)(n_)) )
260 
261 #define FLOPS_ZPOTRI(n_) (6. * FMULS_POTRI((double)(n_)) + 2.0 * FADDS_POTRI((double)(n_)) )
262 #define FLOPS_CPOTRI(n_) (6. * FMULS_POTRI((double)(n_)) + 2.0 * FADDS_POTRI((double)(n_)) )
263 #define FLOPS_DPOTRI(n_) ( FMULS_POTRI((double)(n_)) + FADDS_POTRI((double)(n_)) )
264 #define FLOPS_SPOTRI(n_) ( FMULS_POTRI((double)(n_)) + FADDS_POTRI((double)(n_)) )
265 
266 #define FLOPS_ZPOTRS(n_, nrhs_) (6. * FMULS_POTRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_POTRS((double)(n_), (double)(nrhs_)) )
267 #define FLOPS_CPOTRS(n_, nrhs_) (6. * FMULS_POTRS((double)(n_), (double)(nrhs_)) + 2.0 * FADDS_POTRS((double)(n_), (double)(nrhs_)) )
268 #define FLOPS_DPOTRS(n_, nrhs_) ( FMULS_POTRS((double)(n_), (double)(nrhs_)) + FADDS_POTRS((double)(n_), (double)(nrhs_)) )
269 #define FLOPS_SPOTRS(n_, nrhs_) ( FMULS_POTRS((double)(n_), (double)(nrhs_)) + FADDS_POTRS((double)(n_), (double)(nrhs_)) )
270 
271 #define FLOPS_ZGEQRF(m_, n_) (6. * FMULS_GEQRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRF((double)(m_), (double)(n_)) )
272 #define FLOPS_CGEQRF(m_, n_) (6. * FMULS_GEQRF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRF((double)(m_), (double)(n_)) )
273 #define FLOPS_DGEQRF(m_, n_) ( FMULS_GEQRF((double)(m_), (double)(n_)) + FADDS_GEQRF((double)(m_), (double)(n_)) )
274 #define FLOPS_SGEQRF(m_, n_) ( FMULS_GEQRF((double)(m_), (double)(n_)) + FADDS_GEQRF((double)(m_), (double)(n_)) )
275 
276 #define FLOPS_ZGEQRT(m_, n_) (6. * FMULS_GEQRT((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRT((double)(m_), (double)(n_)) )
277 #define FLOPS_CGEQRT(m_, n_) (6. * FMULS_GEQRT((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQRT((double)(m_), (double)(n_)) )
278 #define FLOPS_DGEQRT(m_, n_) ( FMULS_GEQRT((double)(m_), (double)(n_)) + FADDS_GEQRT((double)(m_), (double)(n_)) )
279 #define FLOPS_SGEQRT(m_, n_) ( FMULS_GEQRT((double)(m_), (double)(n_)) + FADDS_GEQRT((double)(m_), (double)(n_)) )
280 
281 #define FLOPS_ZGEQLF(m_, n_) (6. * FMULS_GEQLF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQLF((double)(m_), (double)(n_)) )
282 #define FLOPS_CGEQLF(m_, n_) (6. * FMULS_GEQLF((double)(m_), (double)(n_)) + 2.0 * FADDS_GEQLF((double)(m_), (double)(n_)) )
283 #define FLOPS_DGEQLF(m_, n_) ( FMULS_GEQLF((double)(m_), (double)(n_)) + FADDS_GEQLF((double)(m_), (double)(n_)) )
284 #define FLOPS_SGEQLF(m_, n_) ( FMULS_GEQLF((double)(m_), (double)(n_)) + FADDS_GEQLF((double)(m_), (double)(n_)) )
285 
286 #define FLOPS_ZGERQF(m_, n_) (6. * FMULS_GERQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GERQF((double)(m_), (double)(n_)) )
287 #define FLOPS_CGERQF(m_, n_) (6. * FMULS_GERQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GERQF((double)(m_), (double)(n_)) )
288 #define FLOPS_DGERQF(m_, n_) ( FMULS_GERQF((double)(m_), (double)(n_)) + FADDS_GERQF((double)(m_), (double)(n_)) )
289 #define FLOPS_SGERQF(m_, n_) ( FMULS_GERQF((double)(m_), (double)(n_)) + FADDS_GERQF((double)(m_), (double)(n_)) )
290 
291 #define FLOPS_ZGELQF(m_, n_) (6. * FMULS_GELQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GELQF((double)(m_), (double)(n_)) )
292 #define FLOPS_CGELQF(m_, n_) (6. * FMULS_GELQF((double)(m_), (double)(n_)) + 2.0 * FADDS_GELQF((double)(m_), (double)(n_)) )
293 #define FLOPS_DGELQF(m_, n_) ( FMULS_GELQF((double)(m_), (double)(n_)) + FADDS_GELQF((double)(m_), (double)(n_)) )
294 #define FLOPS_SGELQF(m_, n_) ( FMULS_GELQF((double)(m_), (double)(n_)) + FADDS_GELQF((double)(m_), (double)(n_)) )
295 
296 #define FLOPS_ZUNGQR(m_, n_, k_) (6. * FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) )
297 #define FLOPS_CUNGQR(m_, n_, k_) (6. * FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) )
298 #define FLOPS_DORGQR(m_, n_, k_) ( FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) )
299 #define FLOPS_SORGQR(m_, n_, k_) ( FMULS_UNGQR((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGQR((double)(m_), (double)(n_), (double)(k_)) )
300 
301 #define FLOPS_ZUNGQL(m_, n_, k_) (6. * FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) )
302 #define FLOPS_CUNGQL(m_, n_, k_) (6. * FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) )
303 #define FLOPS_DORGQL(m_, n_, k_) ( FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) )
304 #define FLOPS_SORGQL(m_, n_, k_) ( FMULS_UNGQL((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGQL((double)(m_), (double)(n_), (double)(k_)) )
305 
306 #define FLOPS_ZUNGRQ(m_, n_, k_) (6. * FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) )
307 #define FLOPS_CUNGRQ(m_, n_, k_) (6. * FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) )
308 #define FLOPS_DORGRQ(m_, n_, k_) ( FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) )
309 #define FLOPS_SORGRQ(m_, n_, k_) ( FMULS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGRQ((double)(m_), (double)(n_), (double)(k_)) )
310 
311 #define FLOPS_ZUNGLQ(m_, n_, k_) (6. * FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) )
312 #define FLOPS_CUNGLQ(m_, n_, k_) (6. * FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + 2.0 * FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) )
313 #define FLOPS_DORGLQ(m_, n_, k_) ( FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) )
314 #define FLOPS_SORGLQ(m_, n_, k_) ( FMULS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) + FADDS_UNGLQ((double)(m_), (double)(n_), (double)(k_)) )
315 
316 #define FLOPS_ZUNMQR(m_, n_, k_, side_) (6. * FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )
317 #define FLOPS_CUNMQR(m_, n_, k_, side_) (6. * FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + 2.0 * FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )
318 #define FLOPS_DORMQR(m_, n_, k_, side_) ( FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )
319 #define FLOPS_SORMQR(m_, n_, k_, side_) ( FMULS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) + FADDS_UNMQR((double)(m_), (double)(n_), (double)(k_), (side_)) )
320 
321 #define FLOPS_ZGEQRS(m_, n_, nrhs_) (6. * FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) )
322 #define FLOPS_CGEQRS(m_, n_, nrhs_) (6. * FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + 2.0 * FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) )
323 #define FLOPS_DGEQRS(m_, n_, nrhs_) ( FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) )
324 #define FLOPS_SGEQRS(m_, n_, nrhs_) ( FMULS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) + FADDS_GEQRS((double)(m_), (double)(n_), (double)(nrhs_)) )
325 
326 #define FLOPS_ZTRTRI(n_) (6. * FMULS_TRTRI((double)(n_)) + 2.0 * FADDS_TRTRI((double)(n_)) )
327 #define FLOPS_CTRTRI(n_) (6. * FMULS_TRTRI((double)(n_)) + 2.0 * FADDS_TRTRI((double)(n_)) )
328 #define FLOPS_DTRTRI(n_) ( FMULS_TRTRI((double)(n_)) + FADDS_TRTRI((double)(n_)) )
329 #define FLOPS_STRTRI(n_) ( FMULS_TRTRI((double)(n_)) + FADDS_TRTRI((double)(n_)) )
330 
331 #define FLOPS_ZGEHRD(n_) (6. * FMULS_GEHRD((double)(n_)) + 2.0 * FADDS_GEHRD((double)(n_)) )
332 #define FLOPS_CGEHRD(n_) (6. * FMULS_GEHRD((double)(n_)) + 2.0 * FADDS_GEHRD((double)(n_)) )
333 #define FLOPS_DGEHRD(n_) ( FMULS_GEHRD((double)(n_)) + FADDS_GEHRD((double)(n_)) )
334 #define FLOPS_SGEHRD(n_) ( FMULS_GEHRD((double)(n_)) + FADDS_GEHRD((double)(n_)) )
335 
336 #define FLOPS_ZHETRD(n_) (6. * FMULS_HETRD((double)(n_)) + 2.0 * FADDS_HETRD((double)(n_)) )
337 #define FLOPS_CHETRD(n_) (6. * FMULS_HETRD((double)(n_)) + 2.0 * FADDS_HETRD((double)(n_)) )
338 
339 #define FLOPS_ZSYTRD(n_) (6. * FMULS_SYTRD((double)(n_)) + 2.0 * FADDS_SYTRD((double)(n_)) )
340 #define FLOPS_CSYTRD(n_) (6. * FMULS_SYTRD((double)(n_)) + 2.0 * FADDS_SYTRD((double)(n_)) )
341 #define FLOPS_DSYTRD(n_) ( FMULS_SYTRD((double)(n_)) + FADDS_SYTRD((double)(n_)) )
342 #define FLOPS_SSYTRD(n_) ( FMULS_SYTRD((double)(n_)) + FADDS_SYTRD((double)(n_)) )
343 
344 #define FLOPS_ZGEBRD(m_, n_) (6. * FMULS_GEBRD((double)(m_), (double)(n_)) + 2.0 * FADDS_GEBRD((double)(m_), (double)(n_)) )
345 #define FLOPS_CGEBRD(m_, n_) (6. * FMULS_GEBRD((double)(m_), (double)(n_)) + 2.0 * FADDS_GEBRD((double)(m_), (double)(n_)) )
346 #define FLOPS_DGEBRD(m_, n_) ( FMULS_GEBRD((double)(m_), (double)(n_)) + FADDS_GEBRD((double)(m_), (double)(n_)) )
347 #define FLOPS_SGEBRD(m_, n_) ( FMULS_GEBRD((double)(m_), (double)(n_)) + FADDS_GEBRD((double)(m_), (double)(n_)) )
348 
349 #define FLOPS_ZLARFG(n_) (6. * FMULS_LARFG((double)n_) + 2. * FADDS_LARFG((double)n_) )
350 #define FLOPS_CLARFG(n_) (6. * FMULS_LARFG((double)n_) + 2. * FADDS_LARFG((double)n_) )
351 #define FLOPS_DLARFG(n_) ( FMULS_LARFG((double)n_) + FADDS_LARFG((double)n_) )
352 #define FLOPS_SLARFG(n_) ( FMULS_LARFG((double)n_) + FADDS_LARFG((double)n_) )
353 
354 #endif /* MAGMA_FLOPS_H */