MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
flops.h
Go to the documentation of this file.
1 
12 /*
13  * This file provide the flops formula for all Level 3 BLAS and some
14  * Lapack routines. Each macro uses the same size parameters as the
15  * function associated and provide one formula for additions and one
16  * for multiplications. Example to use these macros:
17  *
18  * FLOPS_ZGEMM( m, n, k )
19  *
20  * All the formula are reported in the LAPACK Lawn 41:
21  * http://www.netlib.org/lapack/lawns/lawn41.ps
22  */
23 #ifndef _FLOPS_H_
24 #define _FLOPS_H_
25 
26 /************************************************************************
27  * Generic formula coming from LAWN 41
28  ***********************************************************************/
29 
30 /*
31  * Level 2 BLAS
32  */
33 #define FMULS_GEMV(__m, __n) ((__m) * (__n) + 2. * (__m))
34 #define FADDS_GEMV(__m, __n) ((__m) * (__n) )
35 
36 #define FMULS_SYMV(__n) FMULS_GEMV( (__n), (__n) )
37 #define FADDS_SYMV(__n) FADDS_GEMV( (__n), (__n) )
38 #define FMULS_HEMV FMULS_SYMV
39 #define FADDS_HEMV FADDS_SYMV
40 
41 /*
42  * Level 3 BLAS
43  */
44 #define FMULS_GEMM(__m, __n, __k) ((__m) * (__n) * (__k))
45 #define FADDS_GEMM(__m, __n, __k) ((__m) * (__n) * (__k))
46 
47 #define FMULS_SYMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FMULS_GEMM((__m), (__m), (__n)) : FMULS_GEMM((__m), (__n), (__n)) )
48 #define FADDS_SYMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FADDS_GEMM((__m), (__m), (__n)) : FADDS_GEMM((__m), (__n), (__n)) )
49 #define FMULS_HEMM FMULS_SYMM
50 #define FADDS_HEMM FADDS_SYMM
51 
52 #define FMULS_SYRK(__k, __n) (0.5 * (__k) * (__n) * ((__n)+1))
53 #define FADDS_SYRK(__k, __n) (0.5 * (__k) * (__n) * ((__n)+1))
54 #define FMULS_HERK FMULS_SYRK
55 #define FADDS_HERK FADDS_SYRK
56 
57 #define FMULS_SYR2K(__k, __n) ((__k) * (__n) * (__n) )
58 #define FADDS_SYR2K(__k, __n) ((__k) * (__n) * (__n) + (__n))
59 #define FMULS_HER2K FMULS_SYR2K
60 #define FADDS_HER2K FADDS_SYR2K
61 
62 #define FMULS_TRMM_2(__m, __n) (0.5 * (__n) * (__m) * ((__m)+1))
63 #define FADDS_TRMM_2(__m, __n) (0.5 * (__n) * (__m) * ((__m)-1))
64 
65 
66 #define FMULS_TRMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FMULS_TRMM_2((__m), (__n)) : FMULS_TRMM_2((__n), (__m)) )
67 #define FADDS_TRMM(__side, __m, __n) ( ( (__side) == PlasmaLeft ) ? FADDS_TRMM_2((__m), (__n)) : FADDS_TRMM_2((__n), (__m)) )
68 
69 #define FMULS_TRSM FMULS_TRMM
70 #define FADDS_TRSM FMULS_TRMM
71 
72 /*
73  * Lapack
74  */
75 #define FMULS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (__m) * ((__m) * ((__n) - (1./3.) * (__m) - 1. ) + (__n)) + (2. / 3.) * (__m)) \
76  : (0.5 * (__n) * ((__n) * ((__m) - (1./3.) * (__n) - 1. ) + (__m)) + (2. / 3.) * (__n)) )
77 #define FADDS_GETRF(__m, __n) ( ((__m) < (__n)) ? (0.5 * (__m) * ((__m) * ((__n) - (1./3.) * (__m) ) - (__n)) + (1. / 6.) * (__m)) \
78  : (0.5 * (__n) * ((__n) * ((__m) - (1./3.) * (__n) ) - (__m)) + (1. / 6.) * (__n)) )
79 
80 #define FMULS_GETRI(__n) ( (__n) * ((5. / 6.) + (__n) * ((2. / 3.) * (__n) + 0.5)) )
81 #define FADDS_GETRI(__n) ( (__n) * ((5. / 6.) + (__n) * ((2. / 3.) * (__n) - 1.5)) )
82 
83 #define FMULS_GETRS(__n, __nrhs) ((__nrhs) * (__n) * (__n) )
84 #define FADDS_GETRS(__n, __nrhs) ((__nrhs) * (__n) * ((__n) - 1 ))
85 
86 #define FMULS_POTRF(__n) ((__n) * (((1. / 6.) * (__n) + 0.5) * (__n) + (1. / 3.)))
87 #define FADDS_POTRF(__n) ((__n) * (((1. / 6.) * (__n) ) * (__n) - (1. / 6.)))
88 
89 #define FMULS_POTRI(__n) ( (__n) * ((2. / 3.) + (__n) * ((1. / 3.) * (__n) + 1. )) )
90 #define FADDS_POTRI(__n) ( (__n) * ((1. / 6.) + (__n) * ((1. / 3.) * (__n) - 0.5)) )
91 
92 #define FMULS_POTRS(__n, __nrhs) ((__nrhs) * (__n) * ((__n) + 1 ))
93 #define FADDS_POTRS(__n, __nrhs) ((__nrhs) * (__n) * ((__n) - 1 ))
94 
95 //SPBTRF
96 //SPBTRS
97 //SSYTRF
98 //SSYTRI
99 //SSYTRS
100 
101 #define FMULS_GEQRF(__m, __n) (((__m) > (__n)) ? ((__n) * ((__n) * ( 0.5-(1./3.) * (__n) + (__m)) + (__m) + 23. / 6.)) \
102  : ((__m) * ((__m) * ( -0.5-(1./3.) * (__m) + (__n)) + 2.*(__n) + 23. / 6.)) )
103 #define FADDS_GEQRF(__m, __n) (((__m) > (__n)) ? ((__n) * ((__n) * ( 0.5-(1./3.) * (__n) + (__m)) + 5. / 6.)) \
104  : ((__m) * ((__m) * ( -0.5-(1./3.) * (__m) + (__n)) + (__n) + 5. / 6.)) )
105 
106 #define FMULS_GEQLF(__m, __n) FMULS_GEQRF(__m, __n)
107 #define FADDS_GEQLF(__m, __n) FADDS_GEQRF(__m, __n)
108 
109 #define FMULS_GERQF(__m, __n) (((__m) > (__n)) ? ((__n) * ((__n) * ( 0.5-(1./3.) * (__n) + (__m)) + (__m) + 29. / 6.)) \
110  : ((__m) * ((__m) * ( -0.5-(1./3.) * (__m) + (__n)) + 2.*(__n) + 29. / 6.)) )
111 #define FADDS_GERQF(__m, __n) (((__m) > (__n)) ? ((__n) * ((__n) * ( -0.5-(1./3.) * (__n) + (__m)) + (__m) + 5. / 6.)) \
112  : ((__m) * ((__m) * ( 0.5-(1./3.) * (__m) + (__n)) + + 5. / 6.)) )
113 
114 #define FMULS_GELQF(__m, __n) FMULS_GERQF(__m, __n)
115 #define FADDS_GELQF(__m, __n) FADDS_GERQF(__m, __n)
116 
117 #define FMULS_UNGQR(__m, __n, __k) ((__k) * (2.* (__m) * (__n) + 2. * (__n) - 5./3. + (__k) * ( 2./3. * (__k) - ((__m) + (__n)) - 1.)))
118 #define FADDS_UNGQR(__m, __n, __k) ((__k) * (2.* (__m) * (__n) + (__n) - (__m) + 1./3. + (__k) * ( 2./3. * (__k) - ((__m) + (__n)) )))
119 #define FMULS_UNGQL FMULS_UNGQR
120 #define FMULS_ORGQR FMULS_UNGQR
121 #define FMULS_ORGQL FMULS_UNGQR
122 #define FADDS_UNGQL FADDS_UNGQR
123 #define FADDS_ORGQR FADDS_UNGQR
124 #define FADDS_ORGQL FADDS_UNGQR
125 
126 #define FMULS_UNGRQ(__m, __n, __k) ((__k) * (2.* (__m) * (__n) + (__m) + (__n) - 2./3. + (__k) * ( 2./3. * (__k) - ((__m) + (__n)) - 1.)))
127 #define FADDS_UNGRQ(__m, __n, __k) ((__k) * (2.* (__m) * (__n) + (__m) - (__n) + 1./3. + (__k) * ( 2./3. * (__k) - ((__m) + (__n)) )))
128 #define FMULS_UNGLQ FMULS_UNGRQ
129 #define FMULS_ORGRQ FMULS_UNGRQ
130 #define FMULS_ORGLQ FMULS_UNGRQ
131 #define FADDS_UNGLQ FADDS_UNGRQ
132 #define FADDS_ORGRQ FADDS_UNGRQ
133 #define FADDS_ORGLQ FADDS_UNGRQ
134 
135 #define FMULS_GEQRS(__m, __n, __nrhs) ((__nrhs) * ((__n) * ( 2.* (__m) - 0.5 * (__n) + 2.5)))
136 #define FADDS_GEQRS(__m, __n, __nrhs) ((__nrhs) * ((__n) * ( 2.* (__m) - 0.5 * (__n) + 0.5)))
137 
138 //UNMQR, UNMLQ, UNMQL, UNMRQ (Left)
139 //UNMQR, UNMLQ, UNMQL, UNMRQ (Right)
140 
141 #define FMULS_TRTRI(__n) ((__n) * ((__n) * ( 1./6. * (__n) + 0.5 ) + 1./3.))
142 #define FADDS_TRTRI(__n) ((__n) * ((__n) * ( 1./6. * (__n) - 0.5 ) + 1./3.))
143 
144 #define FMULS_GEHRD(__n) ( (__n) * ((__n) * (5./3. *(__n) + 0.5) - 7./6.) - 13. )
145 #define FADDS_GEHRD(__n) ( (__n) * ((__n) * (5./3. *(__n) - 1. ) - 2./3.) - 8. )
146 
147 #define FMULS_SYTRD(__n) ( (__n) * ( (__n) * ( 2./3. * (__n) + 2.5 ) - 1./6. ) - 15.)
148 #define FADDS_SYTRD(__n) ( (__n) * ( (__n) * ( 2./3. * (__n) + 1. ) - 8./3. ) - 4.)
149 #define FMULS_HETRD FMULS_SYTRD
150 #define FADDS_HETRD FADDS_SYTRD
151 
152 #define FMULS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((__n) * ((__n) * (2. * (__m) - 2./3. * (__n) + 2. ) + 20./3.)) \
153  : ((__m) * ((__m) * (2. * (__n) - 2./3. * (__m) + 2. ) + 20./3.)) )
154 #define FADDS_GEBRD(__m, __n) ( ((__m) >= (__n)) ? ((__n) * ((__n) * (2. * (__m) - 2./3. * (__n) + 1. ) - (__m) + 5./3.)) \
155  : ((__m) * ((__m) * (2. * (__n) - 2./3. * (__m) + 1. ) - (__n) + 5./3.)) )
156 
157 
158 /*******************************************************************************
159  * Users functions
160  ******************************************************************************/
161 
162 /*
163  * Level 2 BLAS
164  */
165 #define FLOPS_ZGEMV(__m, __n) (6. * FMULS_GEMV((double)(__m), (double)(__n)) + 2.0 * FADDS_GEMV((double)(__m), (double)(__n)) )
166 #define FLOPS_CGEMV(__m, __n) (6. * FMULS_GEMV((double)(__m), (double)(__n)) + 2.0 * FADDS_GEMV((double)(__m), (double)(__n)) )
167 #define FLOPS_DGEMV(__m, __n) ( FMULS_GEMV((double)(__m), (double)(__n)) + FADDS_GEMV((double)(__m), (double)(__n)) )
168 #define FLOPS_SGEMV(__m, __n) ( FMULS_GEMV((double)(__m), (double)(__n)) + FADDS_GEMV((double)(__m), (double)(__n)) )
169 
170 #define FLOPS_ZHEMV(__n) (6. * FMULS_HEMV((double)(__n)) + 2.0 * FADDS_HEMV((double)(__n)) )
171 #define FLOPS_CHEMV(__n) (6. * FMULS_HEMV((double)(__n)) + 2.0 * FADDS_HEMV((double)(__n)) )
172 
173 #define FLOPS_ZSYMV(__n) (6. * FMULS_SYMV((double)(__n)) + 2.0 * FADDS_SYMV((double)(__n)) )
174 #define FLOPS_CSYMV(__n) (6. * FMULS_SYMV((double)(__n)) + 2.0 * FADDS_SYMV((double)(__n)) )
175 #define FLOPS_DSYMV(__n) ( FMULS_SYMV((double)(__n)) + FADDS_SYMV((double)(__n)) )
176 #define FLOPS_SSYMV(__n) ( FMULS_SYMV((double)(__n)) + FADDS_SYMV((double)(__n)) )
177 
178 /*
179  * Level 3 BLAS
180  */
181 #define FLOPS_ZGEMM(__m, __n, __k) (6. * FMULS_GEMM((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_GEMM((double)(__m), (double)(__n), (double)(__k)) )
182 #define FLOPS_CGEMM(__m, __n, __k) (6. * FMULS_GEMM((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_GEMM((double)(__m), (double)(__n), (double)(__k)) )
183 #define FLOPS_DGEMM(__m, __n, __k) ( FMULS_GEMM((double)(__m), (double)(__n), (double)(__k)) + FADDS_GEMM((double)(__m), (double)(__n), (double)(__k)) )
184 #define FLOPS_SGEMM(__m, __n, __k) ( FMULS_GEMM((double)(__m), (double)(__n), (double)(__k)) + FADDS_GEMM((double)(__m), (double)(__n), (double)(__k)) )
185 
186 #define FLOPS_ZHEMM(__side, __m, __n) (6. * FMULS_HEMM(__side, (double)(__m), (double)(__n)) + 2.0 * FADDS_HEMM(__side, (double)(__m), (double)(__n)) )
187 #define FLOPS_CHEMM(__side, __m, __n) (6. * FMULS_HEMM(__side, (double)(__m), (double)(__n)) + 2.0 * FADDS_HEMM(__side, (double)(__m), (double)(__n)) )
188 
189 #define FLOPS_ZSYMM(__side, __m, __n) (6. * FMULS_SYMM(__side, (double)(__m), (double)(__n)) + 2.0 * FADDS_SYMM(__side, (double)(__m), (double)(__n)) )
190 #define FLOPS_CSYMM(__side, __m, __n) (6. * FMULS_SYMM(__side, (double)(__m), (double)(__n)) + 2.0 * FADDS_SYMM(__side, (double)(__m), (double)(__n)) )
191 #define FLOPS_DSYMM(__side, __m, __n) ( FMULS_SYMM(__side, (double)(__m), (double)(__n)) + FADDS_SYMM(__side, (double)(__m), (double)(__n)) )
192 #define FLOPS_SSYMM(__side, __m, __n) ( FMULS_SYMM(__side, (double)(__m), (double)(__n)) + FADDS_SYMM(__side, (double)(__m), (double)(__n)) )
193 
194 #define FLOPS_ZHERK(__k, __n) (6. * FMULS_HERK((double)(__k), (double)(__n)) + 2.0 * FADDS_HERK((double)(__k), (double)(__n)) )
195 #define FLOPS_CHERK(__k, __n) (6. * FMULS_HERK((double)(__k), (double)(__n)) + 2.0 * FADDS_HERK((double)(__k), (double)(__n)) )
196 
197 #define FLOPS_ZSYRK(__k, __n) (6. * FMULS_SYRK((double)(__k), (double)(__n)) + 2.0 * FADDS_SYRK((double)(__k), (double)(__n)) )
198 #define FLOPS_CSYRK(__k, __n) (6. * FMULS_SYRK((double)(__k), (double)(__n)) + 2.0 * FADDS_SYRK((double)(__k), (double)(__n)) )
199 #define FLOPS_DSYRK(__k, __n) ( FMULS_SYRK((double)(__k), (double)(__n)) + FADDS_SYRK((double)(__k), (double)(__n)) )
200 #define FLOPS_SSYRK(__k, __n) ( FMULS_SYRK((double)(__k), (double)(__n)) + FADDS_SYRK((double)(__k), (double)(__n)) )
201 
202 #define FLOPS_ZHER2K(__k, __n) (6. * FMULS_HER2K((double)(__k), (double)(__n)) + 2.0 * FADDS_HER2K((double)(__k), (double)(__n)) )
203 #define FLOPS_CHER2K(__k, __n) (6. * FMULS_HER2K((double)(__k), (double)(__n)) + 2.0 * FADDS_HER2K((double)(__k), (double)(__n)) )
204 
205 #define FLOPS_ZSYR2K(__k, __n) (6. * FMULS_SYR2K((double)(__k), (double)(__n)) + 2.0 * FADDS_SYR2K((double)(__k), (double)(__n)) )
206 #define FLOPS_CSYR2K(__k, __n) (6. * FMULS_SYR2K((double)(__k), (double)(__n)) + 2.0 * FADDS_SYR2K((double)(__k), (double)(__n)) )
207 #define FLOPS_DSYR2K(__k, __n) ( FMULS_SYR2K((double)(__k), (double)(__n)) + FADDS_SYR2K((double)(__k), (double)(__n)) )
208 #define FLOPS_SSYR2K(__k, __n) ( FMULS_SYR2K((double)(__k), (double)(__n)) + FADDS_SYR2K((double)(__k), (double)(__n)) )
209 
210 #define FLOPS_ZTRMM(__side, __m, __n) (6. * FMULS_TRMM(__side, (double)(__m), (double)(__n)) + 2.0 * FADDS_TRMM(__side, (double)(__m), (double)(__n)) )
211 #define FLOPS_CTRMM(__side, __m, __n) (6. * FMULS_TRMM(__side, (double)(__m), (double)(__n)) + 2.0 * FADDS_TRMM(__side, (double)(__m), (double)(__n)) )
212 #define FLOPS_DTRMM(__side, __m, __n) ( FMULS_TRMM(__side, (double)(__m), (double)(__n)) + FADDS_TRMM(__side, (double)(__m), (double)(__n)) )
213 #define FLOPS_STRMM(__side, __m, __n) ( FMULS_TRMM(__side, (double)(__m), (double)(__n)) + FADDS_TRMM(__side, (double)(__m), (double)(__n)) )
214 
215 #define FLOPS_ZTRSM(__side, __m, __n) (6. * FMULS_TRSM(__side, (double)(__m), (double)(__n)) + 2.0 * FADDS_TRSM(__side, (double)(__m), (double)(__n)) )
216 #define FLOPS_CTRSM(__side, __m, __n) (6. * FMULS_TRSM(__side, (double)(__m), (double)(__n)) + 2.0 * FADDS_TRSM(__side, (double)(__m), (double)(__n)) )
217 #define FLOPS_DTRSM(__side, __m, __n) ( FMULS_TRSM(__side, (double)(__m), (double)(__n)) + FADDS_TRSM(__side, (double)(__m), (double)(__n)) )
218 #define FLOPS_STRSM(__side, __m, __n) ( FMULS_TRSM(__side, (double)(__m), (double)(__n)) + FADDS_TRSM(__side, (double)(__m), (double)(__n)) )
219 
220 /*
221  * Lapack
222  */
223 #define FLOPS_ZGETRF(__m, __n) (6. * FMULS_GETRF((double)(__m), (double)(__n)) + 2.0 * FADDS_GETRF((double)(__m), (double)(__n)) )
224 #define FLOPS_CGETRF(__m, __n) (6. * FMULS_GETRF((double)(__m), (double)(__n)) + 2.0 * FADDS_GETRF((double)(__m), (double)(__n)) )
225 #define FLOPS_DGETRF(__m, __n) ( FMULS_GETRF((double)(__m), (double)(__n)) + FADDS_GETRF((double)(__m), (double)(__n)) )
226 #define FLOPS_SGETRF(__m, __n) ( FMULS_GETRF((double)(__m), (double)(__n)) + FADDS_GETRF((double)(__m), (double)(__n)) )
227 
228 #define FLOPS_ZGETRI(__n) (6. * FMULS_GETRI((double)(__n)) + 2.0 * FADDS_GETRI((double)(__n)) )
229 #define FLOPS_CGETRI(__n) (6. * FMULS_GETRI((double)(__n)) + 2.0 * FADDS_GETRI((double)(__n)) )
230 #define FLOPS_DGETRI(__n) ( FMULS_GETRI((double)(__n)) + FADDS_GETRI((double)(__n)) )
231 #define FLOPS_SGETRI(__n) ( FMULS_GETRI((double)(__n)) + FADDS_GETRI((double)(__n)) )
232 
233 #define FLOPS_ZGETRS(__n, __nrhs) (6. * FMULS_GETRS((double)(__n), (double)(__nrhs)) + 2.0 * FADDS_GETRS((double)(__n), (double)(__nrhs)) )
234 #define FLOPS_CGETRS(__n, __nrhs) (6. * FMULS_GETRS((double)(__n), (double)(__nrhs)) + 2.0 * FADDS_GETRS((double)(__n), (double)(__nrhs)) )
235 #define FLOPS_DGETRS(__n, __nrhs) ( FMULS_GETRS((double)(__n), (double)(__nrhs)) + FADDS_GETRS((double)(__n), (double)(__nrhs)) )
236 #define FLOPS_SGETRS(__n, __nrhs) ( FMULS_GETRS((double)(__n), (double)(__nrhs)) + FADDS_GETRS((double)(__n), (double)(__nrhs)) )
237 
238 #define FLOPS_ZPOTRF(__n) (6. * FMULS_POTRF((double)(__n)) + 2.0 * FADDS_POTRF((double)(__n)) )
239 #define FLOPS_CPOTRF(__n) (6. * FMULS_POTRF((double)(__n)) + 2.0 * FADDS_POTRF((double)(__n)) )
240 #define FLOPS_DPOTRF(__n) ( FMULS_POTRF((double)(__n)) + FADDS_POTRF((double)(__n)) )
241 #define FLOPS_SPOTRF(__n) ( FMULS_POTRF((double)(__n)) + FADDS_POTRF((double)(__n)) )
242 
243 #define FLOPS_ZPOTRI(__n) (6. * FMULS_POTRI((double)(__n)) + 2.0 * FADDS_POTRI((double)(__n)) )
244 #define FLOPS_CPOTRI(__n) (6. * FMULS_POTRI((double)(__n)) + 2.0 * FADDS_POTRI((double)(__n)) )
245 #define FLOPS_DPOTRI(__n) ( FMULS_POTRI((double)(__n)) + FADDS_POTRI((double)(__n)) )
246 #define FLOPS_SPOTRI(__n) ( FMULS_POTRI((double)(__n)) + FADDS_POTRI((double)(__n)) )
247 
248 #define FLOPS_ZPOTRS(__n, __nrhs) (6. * FMULS_POTRS((double)(__n), (double)(__nrhs)) + 2.0 * FADDS_POTRS((double)(__n), (double)(__nrhs)) )
249 #define FLOPS_CPOTRS(__n, __nrhs) (6. * FMULS_POTRS((double)(__n), (double)(__nrhs)) + 2.0 * FADDS_POTRS((double)(__n), (double)(__nrhs)) )
250 #define FLOPS_DPOTRS(__n, __nrhs) ( FMULS_POTRS((double)(__n), (double)(__nrhs)) + FADDS_POTRS((double)(__n), (double)(__nrhs)) )
251 #define FLOPS_SPOTRS(__n, __nrhs) ( FMULS_POTRS((double)(__n), (double)(__nrhs)) + FADDS_POTRS((double)(__n), (double)(__nrhs)) )
252 
253 #define FLOPS_ZGEQRF(__m, __n) (6. * FMULS_GEQRF((double)(__m), (double)(__n)) + 2.0 * FADDS_GEQRF((double)(__m), (double)(__n)) )
254 #define FLOPS_CGEQRF(__m, __n) (6. * FMULS_GEQRF((double)(__m), (double)(__n)) + 2.0 * FADDS_GEQRF((double)(__m), (double)(__n)) )
255 #define FLOPS_DGEQRF(__m, __n) ( FMULS_GEQRF((double)(__m), (double)(__n)) + FADDS_GEQRF((double)(__m), (double)(__n)) )
256 #define FLOPS_SGEQRF(__m, __n) ( FMULS_GEQRF((double)(__m), (double)(__n)) + FADDS_GEQRF((double)(__m), (double)(__n)) )
257 
258 #define FLOPS_ZGEQLF(__m, __n) (6. * FMULS_GEQLF((double)(__m), (double)(__n)) + 2.0 * FADDS_GEQLF((double)(__m), (double)(__n)) )
259 #define FLOPS_CGEQLF(__m, __n) (6. * FMULS_GEQLF((double)(__m), (double)(__n)) + 2.0 * FADDS_GEQLF((double)(__m), (double)(__n)) )
260 #define FLOPS_DGEQLF(__m, __n) ( FMULS_GEQLF((double)(__m), (double)(__n)) + FADDS_GEQLF((double)(__m), (double)(__n)) )
261 #define FLOPS_SGEQLF(__m, __n) ( FMULS_GEQLF((double)(__m), (double)(__n)) + FADDS_GEQLF((double)(__m), (double)(__n)) )
262 
263 #define FLOPS_ZGERQF(__m, __n) (6. * FMULS_GERQF((double)(__m), (double)(__n)) + 2.0 * FADDS_GERQF((double)(__m), (double)(__n)) )
264 #define FLOPS_CGERQF(__m, __n) (6. * FMULS_GERQF((double)(__m), (double)(__n)) + 2.0 * FADDS_GERQF((double)(__m), (double)(__n)) )
265 #define FLOPS_DGERQF(__m, __n) ( FMULS_GERQF((double)(__m), (double)(__n)) + FADDS_GERQF((double)(__m), (double)(__n)) )
266 #define FLOPS_SGERQF(__m, __n) ( FMULS_GERQF((double)(__m), (double)(__n)) + FADDS_GERQF((double)(__m), (double)(__n)) )
267 
268 #define FLOPS_ZGELQF(__m, __n) (6. * FMULS_GELQF((double)(__m), (double)(__n)) + 2.0 * FADDS_GELQF((double)(__m), (double)(__n)) )
269 #define FLOPS_CGELQF(__m, __n) (6. * FMULS_GELQF((double)(__m), (double)(__n)) + 2.0 * FADDS_GELQF((double)(__m), (double)(__n)) )
270 #define FLOPS_DGELQF(__m, __n) ( FMULS_GELQF((double)(__m), (double)(__n)) + FADDS_GELQF((double)(__m), (double)(__n)) )
271 #define FLOPS_SGELQF(__m, __n) ( FMULS_GELQF((double)(__m), (double)(__n)) + FADDS_GELQF((double)(__m), (double)(__n)) )
272 
273 #define FLOPS_ZUNGQR(__m, __n, __k) (6. * FMULS_UNGQR((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_UNGQR((double)(__m), (double)(__n), (double)(__k)) )
274 #define FLOPS_CUNGQR(__m, __n, __k) (6. * FMULS_UNGQR((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_UNGQR((double)(__m), (double)(__n), (double)(__k)) )
275 #define FLOPS_DUNGQR(__m, __n, __k) ( FMULS_UNGQR((double)(__m), (double)(__n), (double)(__k)) + FADDS_UNGQR((double)(__m), (double)(__n), (double)(__k)) )
276 #define FLOPS_SUNGQR(__m, __n, __k) ( FMULS_UNGQR((double)(__m), (double)(__n), (double)(__k)) + FADDS_UNGQR((double)(__m), (double)(__n), (double)(__k)) )
277 
278 #define FLOPS_ZUNGQL(__m, __n, __k) (6. * FMULS_UNGQL((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_UNGQL((double)(__m), (double)(__n), (double)(__k)) )
279 #define FLOPS_CUNGQL(__m, __n, __k) (6. * FMULS_UNGQL((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_UNGQL((double)(__m), (double)(__n), (double)(__k)) )
280 #define FLOPS_DUNGQL(__m, __n, __k) ( FMULS_UNGQL((double)(__m), (double)(__n), (double)(__k)) + FADDS_UNGQL((double)(__m), (double)(__n), (double)(__k)) )
281 #define FLOPS_SUNGQL(__m, __n, __k) ( FMULS_UNGQL((double)(__m), (double)(__n), (double)(__k)) + FADDS_UNGQL((double)(__m), (double)(__n), (double)(__k)) )
282 
283 #define FLOPS_ZORGQR(__m, __n, __k) (6. * FMULS_ORGQR((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_ORGQR((double)(__m), (double)(__n), (double)(__k)) )
284 #define FLOPS_CORGQR(__m, __n, __k) (6. * FMULS_ORGQR((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_ORGQR((double)(__m), (double)(__n), (double)(__k)) )
285 #define FLOPS_DORGQR(__m, __n, __k) ( FMULS_ORGQR((double)(__m), (double)(__n), (double)(__k)) + FADDS_ORGQR((double)(__m), (double)(__n), (double)(__k)) )
286 #define FLOPS_SORGQR(__m, __n, __k) ( FMULS_ORGQR((double)(__m), (double)(__n), (double)(__k)) + FADDS_ORGQR((double)(__m), (double)(__n), (double)(__k)) )
287 
288 #define FLOPS_ZORGQL(__m, __n, __k) (6. * FMULS_ORGQL((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_ORGQL((double)(__m), (double)(__n), (double)(__k)) )
289 #define FLOPS_CORGQL(__m, __n, __k) (6. * FMULS_ORGQL((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_ORGQL((double)(__m), (double)(__n), (double)(__k)) )
290 #define FLOPS_DORGQL(__m, __n, __k) ( FMULS_ORGQL((double)(__m), (double)(__n), (double)(__k)) + FADDS_ORGQL((double)(__m), (double)(__n), (double)(__k)) )
291 #define FLOPS_SORGQL(__m, __n, __k) ( FMULS_ORGQL((double)(__m), (double)(__n), (double)(__k)) + FADDS_ORGQL((double)(__m), (double)(__n), (double)(__k)) )
292 
293 #define FLOPS_ZUNGRQ(__m, __n, __k) (6. * FMULS_UNGRQ((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_UNGRQ((double)(__m), (double)(__n), (double)(__k)) )
294 #define FLOPS_CUNGRQ(__m, __n, __k) (6. * FMULS_UNGRQ((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_UNGRQ((double)(__m), (double)(__n), (double)(__k)) )
295 #define FLOPS_DUNGRQ(__m, __n, __k) ( FMULS_UNGRQ((double)(__m), (double)(__n), (double)(__k)) + FADDS_UNGRQ((double)(__m), (double)(__n), (double)(__k)) )
296 #define FLOPS_SUNGRQ(__m, __n, __k) ( FMULS_UNGRQ((double)(__m), (double)(__n), (double)(__k)) + FADDS_UNGRQ((double)(__m), (double)(__n), (double)(__k)) )
297 
298 #define FLOPS_ZUNGLQ(__m, __n, __k) (6. * FMULS_UNGLQ((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_UNGLQ((double)(__m), (double)(__n), (double)(__k)) )
299 #define FLOPS_CUNGLQ(__m, __n, __k) (6. * FMULS_UNGLQ((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_UNGLQ((double)(__m), (double)(__n), (double)(__k)) )
300 #define FLOPS_DUNGLQ(__m, __n, __k) ( FMULS_UNGLQ((double)(__m), (double)(__n), (double)(__k)) + FADDS_UNGLQ((double)(__m), (double)(__n), (double)(__k)) )
301 #define FLOPS_SUNGLQ(__m, __n, __k) ( FMULS_UNGLQ((double)(__m), (double)(__n), (double)(__k)) + FADDS_UNGLQ((double)(__m), (double)(__n), (double)(__k)) )
302 
303 #define FLOPS_ZORGRQ(__m, __n, __k) (6. * FMULS_ORGRQ((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_ORGRQ((double)(__m), (double)(__n), (double)(__k)) )
304 #define FLOPS_CORGRQ(__m, __n, __k) (6. * FMULS_ORGRQ((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_ORGRQ((double)(__m), (double)(__n), (double)(__k)) )
305 #define FLOPS_DORGRQ(__m, __n, __k) ( FMULS_ORGRQ((double)(__m), (double)(__n), (double)(__k)) + FADDS_ORGRQ((double)(__m), (double)(__n), (double)(__k)) )
306 #define FLOPS_SORGRQ(__m, __n, __k) ( FMULS_ORGRQ((double)(__m), (double)(__n), (double)(__k)) + FADDS_ORGRQ((double)(__m), (double)(__n), (double)(__k)) )
307 
308 #define FLOPS_ZORGLQ(__m, __n, __k) (6. * FMULS_ORGLQ((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_ORGLQ((double)(__m), (double)(__n), (double)(__k)) )
309 #define FLOPS_CORGLQ(__m, __n, __k) (6. * FMULS_ORGLQ((double)(__m), (double)(__n), (double)(__k)) + 2.0 * FADDS_ORGLQ((double)(__m), (double)(__n), (double)(__k)) )
310 #define FLOPS_DORGLQ(__m, __n, __k) ( FMULS_ORGLQ((double)(__m), (double)(__n), (double)(__k)) + FADDS_ORGLQ((double)(__m), (double)(__n), (double)(__k)) )
311 #define FLOPS_SORGLQ(__m, __n, __k) ( FMULS_ORGLQ((double)(__m), (double)(__n), (double)(__k)) + FADDS_ORGLQ((double)(__m), (double)(__n), (double)(__k)) )
312 
313 #define FLOPS_ZGEQRS(__m, __n, __nrhs) (6. * FMULS_GEQRS((double)(__m), (double)(__n), (double)(__nrhs)) + 2.0 * FADDS_GEQRS((double)(__m), (double)(__n), (double)(__nrhs)) )
314 #define FLOPS_CGEQRS(__m, __n, __nrhs) (6. * FMULS_GEQRS((double)(__m), (double)(__n), (double)(__nrhs)) + 2.0 * FADDS_GEQRS((double)(__m), (double)(__n), (double)(__nrhs)) )
315 #define FLOPS_DGEQRS(__m, __n, __nrhs) ( FMULS_GEQRS((double)(__m), (double)(__n), (double)(__nrhs)) + FADDS_GEQRS((double)(__m), (double)(__n), (double)(__nrhs)) )
316 #define FLOPS_SGEQRS(__m, __n, __nrhs) ( FMULS_GEQRS((double)(__m), (double)(__n), (double)(__nrhs)) + FADDS_GEQRS((double)(__m), (double)(__n), (double)(__nrhs)) )
317 
318 #define FLOPS_ZTRTRI(__n) (6. * FMULS_TRTRI((double)(__n)) + 2.0 * FADDS_TRTRI((double)(__n)) )
319 #define FLOPS_CTRTRI(__n) (6. * FMULS_TRTRI((double)(__n)) + 2.0 * FADDS_TRTRI((double)(__n)) )
320 #define FLOPS_DTRTRI(__n) ( FMULS_TRTRI((double)(__n)) + FADDS_TRTRI((double)(__n)) )
321 #define FLOPS_STRTRI(__n) ( FMULS_TRTRI((double)(__n)) + FADDS_TRTRI((double)(__n)) )
322 
323 #define FLOPS_ZGEHRD(__n) (6. * FMULS_GEHRD((double)(__n)) + 2.0 * FADDS_GEHRD((double)(__n)) )
324 #define FLOPS_CGEHRD(__n) (6. * FMULS_GEHRD((double)(__n)) + 2.0 * FADDS_GEHRD((double)(__n)) )
325 #define FLOPS_DGEHRD(__n) ( FMULS_GEHRD((double)(__n)) + FADDS_GEHRD((double)(__n)) )
326 #define FLOPS_SGEHRD(__n) ( FMULS_GEHRD((double)(__n)) + FADDS_GEHRD((double)(__n)) )
327 
328 #define FLOPS_ZHETRD(__n) (6. * FMULS_HETRD((double)(__n)) + 2.0 * FADDS_HETRD((double)(__n)) )
329 #define FLOPS_CHETRD(__n) (6. * FMULS_HETRD((double)(__n)) + 2.0 * FADDS_HETRD((double)(__n)) )
330 
331 #define FLOPS_ZSYTRD(__n) (6. * FMULS_SYTRD((double)(__n)) + 2.0 * FADDS_SYTRD((double)(__n)) )
332 #define FLOPS_CSYTRD(__n) (6. * FMULS_SYTRD((double)(__n)) + 2.0 * FADDS_SYTRD((double)(__n)) )
333 #define FLOPS_DSYTRD(__n) ( FMULS_SYTRD((double)(__n)) + FADDS_SYTRD((double)(__n)) )
334 #define FLOPS_SSYTRD(__n) ( FMULS_SYTRD((double)(__n)) + FADDS_SYTRD((double)(__n)) )
335 
336 #define FLOPS_ZGEBRD(__m, __n) (6. * FMULS_GEBRD((double)(__m), (double)(__n)) + 2.0 * FADDS_GEBRD((double)(__m), (double)(__n)) )
337 #define FLOPS_CGEBRD(__m, __n) (6. * FMULS_GEBRD((double)(__m), (double)(__n)) + 2.0 * FADDS_GEBRD((double)(__m), (double)(__n)) )
338 #define FLOPS_DGEBRD(__m, __n) ( FMULS_GEBRD((double)(__m), (double)(__n)) + FADDS_GEBRD((double)(__m), (double)(__n)) )
339 #define FLOPS_SGEBRD(__m, __n) ( FMULS_GEBRD((double)(__m), (double)(__n)) + FADDS_GEBRD((double)(__m), (double)(__n)) )
340 
341 #endif /* _FLOPS_H_ */