PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_shbelr.c
Go to the documentation of this file.
1 
15 #include <lapacke.h>
16 #include "common.h"
17 
18 /***************************************************************************/
70 /***************************************************************************/
74 #define A(_m, _n) (float *)plasma_geteltaddr(A, ((_m)-1), ((_n)-1), eltsize)
75 #define V(_m) &(V[(_m)-1])
76 #define TAU(_m) &(TAU[(_m)-1])
77 int
78 CORE_shbelr(int uplo, int N,
79  PLASMA_desc *A,
80  float *V,
81  float *TAU,
82  int st,
83  int ed,
84  int eltsize)
85 {
86  int NB, J1, J2;
87  int len1, len2, t1ed, t2st;
88  int i;
89  static float zzero = 0.0;
90  PLASMA_desc vA=*A;
91 
92 
93  /* Check input arguments */
94  if (N < 0) {
95  coreblas_error(2, "Illegal value of N");
96  return -2;
97  }
98  if (ed <= st) {
99  coreblas_error(23, "Illegal value of st and ed (internal)");
100  return -23;
101  }
102 
103  /* Quick return */
104  if (N == 0)
105  return PLASMA_SUCCESS;
106 
107  NB = A->mb;
108  if( uplo == PlasmaLower ) {
109  /* ========================
110  * LOWER CASE
111  * ========================*/
112  for (i = ed; i >= st+1 ; i--){
113  /* generate Householder to annihilate a(i+k-1,i) within the band */
114  *V(i) = *A(i, (st-1));
115  *A(i, (st-1)) = zzero;
116  LAPACKE_slarfg_work( 2, A((i-1),(st-1)), V(i), 1, TAU(i));
117 
118  /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
119  J1 = st;
120  J2 = i-2;
121  t1ed = (J2/NB)*NB;
122  t2st = max(t1ed+1,J1);
123  len1 = t1ed-J1+1; /* can be negative */
124  len2 = J2-t2st+1;
125  if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
126  if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
127  CORE_slarfx2c(PlasmaLower, *V(i), *TAU(i), A(i-1, i-1), A(i, i-1), A(i, i));
128  }
129  /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 1 */
130  for (i = ed; i >= st+1 ; i--){
131  J1 = i+1;
132  J2 = min(ed,N);
133  t1ed = (J2/NB)*NB;
134  t2st = max(t1ed+1,J1);
135  len1 = t1ed-J1+1; /* can be negative */
136  len2 = J2-t2st+1;
137  if(len1>0)CORE_slarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
138  if(len2>0)CORE_slarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
139  }
140  }else{
141  /* ========================
142  * UPPER CASE
143  * ========================*/
144  for (i = ed; i >= st+1 ; i--){
145  /* generate Householder to annihilate a(i+k-1,i) within the band*/
146  *V(i) = *A((st-1), i);
147  *A((st-1), i) = zzero;
148  LAPACKE_slarfg_work( 2, A(st-1, i-1), V(i), 1, TAU(i));
149 
150  /* apply reflector from the left (horizontal row) and from the right for only the diagonal 2x2.*/
151  J1 = st;
152  J2 = i-2;
153  t1ed = (J2/NB)*NB;
154  t2st = max(t1ed+1,J1);
155  len1 = t1ed-J1+1; /* can be negative */
156  len2 = J2-t2st+1;
157  if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
158  if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st) );
159  CORE_slarfx2c(PlasmaUpper, *V(i), *TAU(i), A(i-1, i-1), A(i-1, i), A(i,i));
160  }
161  /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 1 */
162  for (i = ed; i >= st+1 ; i--){
163  J1 = i+1;
164  J2 = min(ed,N);
165  t1ed = (J2/NB)*NB;
166  t2st = max(t1ed+1,J1);
167  len1 = t1ed-J1+1; /* can be negative */
168  len2 = J2-t2st+1;
169  if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , (*V(i)), *TAU(i), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
170  if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , (*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
171  }
172  } /* end of else for the upper case */
173 
174  return PLASMA_SUCCESS;
175 
176 }