PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_chbrce.c
Go to the documentation of this file.
1 
15 #include <lapacke.h>
16 #include "common.h"
17 
18 /***************************************************************************/
68 /***************************************************************************/
72 #define A(_m, _n) (PLASMA_Complex32_t *)plasma_geteltaddr(A, ((_m)-1), ((_n)-1), eltsize)
73 #define V(_m) &(V[(_m)-1])
74 #define TAU(_m) &(TAU[(_m)-1])
75 int
76 CORE_chbrce(int uplo, int N,
77  PLASMA_desc *A,
80  int st,
81  int ed,
82  int eltsize)
83 {
84  int NB, J1, J2, J3, KDM2, len, pt;
85  int len1, len2, t1ed, t2st;
86  int i;
87  static PLASMA_Complex32_t zzero = 0.0;
88  PLASMA_desc vA=*A;
89 
90 
91  /* Check input arguments */
92  if (N < 0) {
93  coreblas_error(2, "Illegal value of N");
94  return -2;
95  }
96  if (ed <= st) {
97  coreblas_error(6, "Illegal value of st and ed (internal)");
98  return -6;
99  }
100 
101  /* Quick return */
102  if (N == 0)
103  return PLASMA_SUCCESS;
104 
105  NB = A->mb;
106  KDM2 = A->mb-2;
107  if( uplo == PlasmaLower ) {
108  /* ========================
109  * LOWER CASE
110  * ========================*/
111  for (i = ed; i >= st+1 ; i--){
112  /* apply Householder from the right. and create newnnz outside the band if J3 < N */
113  J1 = ed+1;
114  J2 = min((i+1+KDM2), N);
115  J3 = min((J2+1), N);
116  len = J3-J1+1;
117  if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/
118 
119  t1ed = (J3/NB)*NB;
120  t2st = max(t1ed+1,J1);
121  len1 = t1ed-J1+1; /* can be negative*/
122  len2 = J3-t2st+1;
123  if(len1>0)CORE_clarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1, i-1), ELTLDD(vA, J1), A(J1 , i), ELTLDD(vA, J1) );
124  if(len2>0)CORE_clarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st));
125  /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it.*/
126  len = J3-J2; // soit 1 soit 0
127  if(len>0){
128  /* generate Householder to annihilate a(j+kd,j-1) within the band */
129  *V(J3) = *A(J3,i-1);
130  *A(J3,i-1) = 0.0;
131  LAPACKE_clarfg_work( 2, A(J2,i-1), V(J3), 1, TAU(J3));
132  }
133  }
134  /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
135  for (i = ed; i >= st+1 ; i--){
136  /* find if there was a nnz created. if yes apply left else nothing to be done.*/
137  J2 = min((i+1+KDM2), N);
138  J3 = min((J2+1), N);
139  len = J3-J2;
140  if(len>0){
141  pt = J2;
142  J1 = i;
143  J2 = min(ed,N);
144  t1ed = (J2/NB)*NB;
145  t2st = max(t1ed+1,J1);
146  len1 = t1ed-J1+1; /* can be negative*/
147  len2 = J2-t2st+1;
148  if(len1>0)CORE_clarfx2(PlasmaLeft, len1 , *V(J3), conjf(*TAU(J3)), A(pt, i ), ELTLDD(vA, pt), A((pt+1), i ), ELTLDD(vA, pt+1) );
149  if(len2>0)CORE_clarfx2(PlasmaLeft, len2 , *V(J3), conjf(*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt), A((pt+1), t2st), ELTLDD(vA, pt+1) );
150  }
151  }
152  } else {
153  /* ========================
154  * UPPER CASE
155  * ========================*/
156  for (i = ed; i >= st+1 ; i--){
157  /* apply Householder from the right. and create newnnz outside the band if J3 < N */
158  J1 = ed+1;
159  J2 = min((i+1+KDM2), N);
160  J3 = min((J2+1), N);
161  len = J3-J1+1;
162  if(J3>J2)*A((i-1), J3)=zzero;/* could be removed because A is supposed to be band.*/
163 
164  t1ed = (J3/NB)*NB;
165  t2st = max(t1ed+1,J1);
166  len1 = t1ed-J1+1; /* can be negative*/
167  len2 = J3-t2st+1;
168  if(len1>0)CORE_clarfx2(PlasmaLeft, len1 , conjf(*V(i)), *TAU(i), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) );
169  if(len2>0)CORE_clarfx2(PlasmaLeft, len2 , conjf(*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) );
170  /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it.*/
171  len = J3-J2; /* either 1 soit 0*/
172  if(len>0){
173  /* generate Householder to annihilate a(j+kd,j-1) within the band*/
174  *V(J3) = *A((i-1), J3);
175  *A((i-1), J3) = 0.0;
176  LAPACKE_clarfg_work( 2, A((i-1), J2), V(J3), 1, TAU(J3));
177  }
178  }
179  /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2*/
180  for (i = ed; i >= st+1 ; i--){
181  /* find if there was a nnz created. if yes apply right else nothing to be done.*/
182  J2 = min((i+1+KDM2), N);
183  J3 = min((J2+1), N);
184  len = J3-J2;
185  if(len>0){
186  pt = J2;
187  J1 = i;
188  J2 = min(ed,N);
189  t1ed = (J2/NB)*NB;
190  t2st = max(t1ed+1,J1);
191  len1 = t1ed-J1+1; /* can be negative*/
192  len2 = J2-t2st+1;
193  if(len1>0)CORE_clarfx2(PlasmaRight, len1 , conjf(*V(J3)), conjf(*TAU(J3)), A(i , pt), ELTLDD(vA, i), A(i, pt+1), ELTLDD(vA, i) );
194  if(len2>0)CORE_clarfx2(PlasmaRight, len2 , conjf(*V(J3)), conjf(*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st), A(t2st, pt+1), ELTLDD(vA, t2st) );
195  }
196  }
197  } /* end of else for the upper case */
198 
199  return PLASMA_SUCCESS;
200 }