PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_dgessm.c
Go to the documentation of this file.
1 
17 #include <cblas.h>
18 #include <lapacke.h>
19 #include "common.h"
20 
21 /***************************************************************************/
64 #if defined(PLASMA_HAVE_WEAK)
65 #pragma weak CORE_dgessm = PCORE_dgessm
66 #define CORE_dgessm PCORE_dgessm
67 #endif
68 int CORE_dgessm(int M, int N, int K, int IB,
69  int *IPIV,
70  double *L, int LDL,
71  double *A, int LDA)
72 {
73  static double zone = 1.0;
74  static double mzone = -1.0;
75  static int ione = 1;
76 
77  int i, sb;
78  int tmp, tmp2;
79 
80  /* Check input arguments */
81  if (M < 0) {
82  coreblas_error(1, "Illegal value of M");
83  return -1;
84  }
85  if (N < 0) {
86  coreblas_error(2, "Illegal value of N");
87  return -2;
88  }
89  if (K < 0) {
90  coreblas_error(3, "Illegal value of K");
91  return -3;
92  }
93  if (IB < 0) {
94  coreblas_error(4, "Illegal value of IB");
95  return -4;
96  }
97  if ((LDL < max(1,M)) && (M > 0)) {
98  coreblas_error(7, "Illegal value of LDL");
99  return -7;
100  }
101  if ((LDA < max(1,M)) && (M > 0)) {
102  coreblas_error(9, "Illegal value of LDA");
103  return -9;
104  }
105 
106  /* Quick return */
107  if ((M == 0) || (N == 0) || (K == 0) || (IB == 0))
108  return PLASMA_SUCCESS;
109 
110  for(i = 0; i < K; i += IB) {
111  sb = min(IB, K-i);
112  /*
113  * Apply interchanges to columns I*IB+1:IB*( I+1 )+1.
114  */
115  tmp = i+1;
116  tmp2 = i+sb;
117  LAPACKE_dlaswp_work(LAPACK_COL_MAJOR, N, A, LDA, tmp, tmp2, IPIV, ione);
118  /*
119  * Compute block row of U.
120  */
121  cblas_dtrsm(
123  sb, N, (zone),
124  &L[LDL*i+i], LDL,
125  &A[i], LDA );
126 
127  if (i+sb < M) {
128  /*
129  * Update trailing submatrix.
130  */
131  cblas_dgemm(
133  M-(i+sb), N, sb,
134  (mzone), &L[LDL*i+(i+sb)], LDL,
135  &A[i], LDA,
136  (zone), &A[i+sb], LDA );
137  }
138  }
139  return PLASMA_SUCCESS;
140 }
141 
142 /***************************************************************************/
145 void QUARK_CORE_dgessm(Quark *quark, Quark_Task_Flags *task_flags,
146  int m, int n, int k, int ib, int nb,
147  int *IPIV,
148  double *L, int ldl,
149  double *A, int lda)
150 {
152  QUARK_Insert_Task(quark, CORE_dgessm_quark, task_flags,
153  sizeof(int), &m, VALUE,
154  sizeof(int), &n, VALUE,
155  sizeof(int), &k, VALUE,
156  sizeof(int), &ib, VALUE,
157  sizeof(int)*nb, IPIV, INPUT,
158  sizeof(double)*nb*nb, L, INPUT | QUARK_REGION_L,
159  sizeof(int), &ldl, VALUE,
160  sizeof(double)*nb*nb, A, INOUT,
161  sizeof(int), &lda, VALUE,
162  0);
163 }
164 
165 /***************************************************************************/
168 #if defined(PLASMA_HAVE_WEAK)
169 #pragma weak CORE_dgessm_quark = PCORE_dgessm_quark
170 #define CORE_dgessm_quark PCORE_dgessm_quark
171 #endif
173 {
174  int m;
175  int n;
176  int k;
177  int ib;
178  int *IPIV;
179  double *L;
180  int ldl;
181  double *A;
182  int lda;
183 
184  quark_unpack_args_9(quark, m, n, k, ib, IPIV, L, ldl, A, lda);
185  CORE_dgessm(m, n, k, ib, IPIV, L, ldl, A, lda);
186 }