19 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, (m), (n))
20 #define B(m,n) BLKADDR(B, PLASMA_Complex64_t, (m), (n))
21 #define T(m,n) BLKADDR(T, PLASMA_Complex64_t, (m), (n))
22 #define T2(m,n) BLKADDR(T, PLASMA_Complex64_t, (m), (n)+A.nt)
36 int ldaM, ldam, ldan, ldaMRD;
37 int ldbM, ldbm, ldbMRD;
38 int tempMm, tempkn, tempnn, tempmm, tempMRDm, tempkmin;
54 for (k = 0; k < K; k++) {
55 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
56 for (M = k; M < A.
mt; M += BS) {
57 tempMm = M == A.
mt-1 ? A.
m-M*A.
mb : A.
mb;
58 tempkmin =
min(tempMm, tempkn);
61 for (n = 0; n < B.
nt; n++) {
62 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
64 plasma->
quark, &task_flags,
72 for (m = M+1; m <
min(M+BS, A.
mt); m++) {
73 tempmm = m == A.
mt-1 ? A.
m-m*A.
mb : A.
mb;
76 for (n = 0; n < B.
nt; n++) {
77 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
79 plasma->
quark, &task_flags,
81 A.
nb, tempnn, tempmm, tempnn,
90 for (RD = BS; RD < A.
mt-k; RD *= 2) {
91 for (M = k; M+RD < A.
mt; M += 2*RD) {
92 tempMRDm = M+RD == A.
mt-1 ? A.
m-(M+RD)*A.
mb : A.
mb;
96 for (n = 0; n < B.
nt; n++) {
97 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
99 plasma->
quark, &task_flags,
101 A.
nb, tempnn, tempMRDm, tempnn,
115 for (k = K-1; k >= 0; k--) {
116 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
118 for (RD = BS; RD < A.
mt-k; RD *= 2)
120 for (RD = lastRD; RD >= BS; RD /= 2) {
121 for (M = k; M+RD < A.
mt; M += 2*RD) {
122 tempMRDm = M+RD == A.
mt-1 ? A.
m-(M+RD)*A.
mb : A.
mb;
126 for (n = 0; n < B.
nt; n++) {
127 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
129 plasma->
quark, &task_flags,
131 A.
nb, tempnn, tempMRDm, tempnn,
140 for (M = k; M < A.
mt; M += BS) {
141 tempMm = M == A.
mt-1 ? A.
m-M*A.
mb : A.
mb;
142 tempkmin =
min(tempMm, tempkn);
145 for (m =
min(M+BS, A.
mt)-1; m > M; m--) {
146 tempmm = m == A.
mt-1 ? A.
m-m*A.
mb : A.
mb;
149 for (n = 0; n < B.
nt; n++) {
150 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
152 plasma->
quark, &task_flags,
154 A.
nb, tempnn, tempmm, tempnn,
162 for (n = 0; n < B.
nt; n++) {
163 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
165 plasma->
quark, &task_flags,
181 for (k = K-1; k >= 0; k--) {
182 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
184 for (RD = BS; RD < A.
mt-k; RD *= 2)
186 for (RD = lastRD; RD >= BS; RD /= 2) {
187 for (M = k; M+RD < A.
mt; M += 2*RD) {
188 tempMRDm = M+RD == A.
mt-1 ? A.
m-(M+RD)*A.
mb : A.
mb;
190 for (m = 0; m < B.
mt; m++) {
192 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
194 plasma->
quark, &task_flags,
196 tempmm, B.
nb, tempmm, tempMRDm,
205 for (M = k; M < A.
mt; M += BS) {
206 tempMm = M == A.
mt-1 ? A.
m-M*A.
mb : A.
mb;
207 tempkmin =
min(tempMm, tempkn);
210 for (n =
min(M+BS, A.
mt)-1; n > M; n--) {
212 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
213 for (m = 0; m < B.
mt; m++) {
215 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
217 plasma->
quark, &task_flags,
219 tempmm, tempMm, tempmm, tempnn,
227 for (m = 0; m < B.
mt; m++) {
229 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
231 plasma->
quark, &task_flags,
245 for (k = 0; k < K; k++) {
246 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
247 for (M = k; M < A.
mt; M += BS) {
248 tempMm = M == A.
mt-1 ? A.
m-M*A.
mb : A.
mb;
249 tempkmin =
min(tempMm, tempkn);
251 for (m = 0; m < B.
mt; m++) {
253 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
255 plasma->
quark, &task_flags,
263 for (n = M+1; n <
min(M+BS, A.
mt); n++) {
264 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
266 for (m = 0; m < B.
mt; m++) {
267 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
270 plasma->
quark, &task_flags,
272 tempmm, tempMm, tempmm, tempnn,
281 for (RD = BS; RD < A.
mt-k; RD *= 2) {
282 for (M = k; M+RD < A.
mt; M += 2*RD) {
283 tempMRDm = M+RD == A.
mt-1 ? A.
m-(M+RD)*A.
mb : A.
mb;
285 for (m = 0; m < B.
mt; m++) {
286 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
289 plasma->
quark, &task_flags,
291 tempmm, B.
nb, tempmm, tempMRDm,