PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
strsmpl.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 /***************************************************************************/
67 int PLASMA_strsmpl(int N, int NRHS,
68  float *A, int LDA,
69  float *L, int *IPIV,
70  float *B, int LDB)
71 {
72  int NB, IB, IBNB, NT;
73  int status;
75  PLASMA_sequence *sequence = NULL;
77  PLASMA_desc descA, descB, descL;
78 
79  plasma = plasma_context_self();
80  if (plasma == NULL) {
81  plasma_fatal_error("PLASMA_strsmpl", "PLASMA not initialized");
83  }
84  /* Check input arguments */
85  if (N < 0) {
86  plasma_error("PLASMA_strsmpl", "illegal value of N");
87  return -1;
88  }
89  if (NRHS < 0) {
90  plasma_error("PLASMA_strsmpl", "illegal value of NRHS");
91  return -2;
92  }
93  if (LDA < max(1, N)) {
94  plasma_error("PLASMA_strsmpl", "illegal value of LDA");
95  return -4;
96  }
97  if (LDB < max(1, N)) {
98  plasma_error("PLASMA_strsmpl", "illegal value of LDB");
99  return -8;
100  }
101  /* Quick return */
102  if (min(N, NRHS) == 0)
103  return PLASMA_SUCCESS;
104 
105  /* Tune NB & IB depending on N & NRHS; Set NBNB */
106  status = plasma_tune(PLASMA_FUNC_SGESV, N, N, NRHS);
107  if (status != PLASMA_SUCCESS) {
108  plasma_error("PLASMA_strsmpl", "plasma_tune() failed");
109  return status;
110  }
111 
112  /* Set Mt, NT & NTRHS */
113  NB = PLASMA_NB;
114  IB = PLASMA_IB;
115  IBNB = IB*NB;
116  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
117 
118  plasma_sequence_create(plasma, &sequence);
119 
120  descL = plasma_desc_init(
122  IB, NB, IBNB,
123  NT*IB, NT*NB, 0, 0, NT*IB, NT*NB);
124  descL.mat = L;
125 
127  plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
128  plasma_sooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
129  } else {
130  plasma_siplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
131  plasma_siplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
132  }
133 
134  /* Call the tile interface */
135  PLASMA_strsmpl_Tile_Async(&descA, &descL, IPIV, &descB, sequence, &request);
136 
138  plasma_sooptile2lap( descB, B, NB, NB, LDB, NRHS );
140  plasma_desc_mat_free(&descA);
141  plasma_desc_mat_free(&descB);
142  } else {
143  plasma_siptile2lap( descA, A, NB, NB, LDA, N );
144  plasma_siptile2lap( descB, B, NB, NB, LDB, NRHS );
146  }
147 
148  status = sequence->status;
149  plasma_sequence_destroy(plasma, sequence);
150  return status;
151 }
152 
153 /***************************************************************************/
192 {
194  PLASMA_sequence *sequence = NULL;
196  int status;
197 
198  plasma = plasma_context_self();
199  if (plasma == NULL) {
200  plasma_fatal_error("PLASMA_strsmpl_Tile", "PLASMA not initialized");
202  }
203  plasma_sequence_create(plasma, &sequence);
204  PLASMA_strsmpl_Tile_Async(A, L, IPIV, B, sequence, &request);
206  status = sequence->status;
207  plasma_sequence_destroy(plasma, sequence);
208  return status;
209 }
210 
211 /***************************************************************************/
241  PLASMA_sequence *sequence, PLASMA_request *request)
242 {
243  PLASMA_desc descA = *A;
244  PLASMA_desc descL = *L;
245  PLASMA_desc descB = *B;
247 
248  plasma = plasma_context_self();
249  if (plasma == NULL) {
250  plasma_fatal_error("PLASMA_strsmpl_Tile", "PLASMA not initialized");
252  }
253  if (sequence == NULL) {
254  plasma_fatal_error("PLASMA_strsmpl_Tile", "NULL sequence");
255  return PLASMA_ERR_UNALLOCATED;
256  }
257  if (request == NULL) {
258  plasma_fatal_error("PLASMA_strsmpl_Tile", "NULL request");
259  return PLASMA_ERR_UNALLOCATED;
260  }
261  /* Check sequence status */
262  if (sequence->status == PLASMA_SUCCESS)
263  request->status = PLASMA_SUCCESS;
264  else
265  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
266 
267  /* Check descriptors for correctness */
268  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
269  plasma_error("PLASMA_strsmpl_Tile", "invalid first descriptor");
270  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
271  }
272  if (plasma_desc_check(&descL) != PLASMA_SUCCESS) {
273  plasma_error("PLASMA_strsmpl_Tile", "invalid second descriptor");
274  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
275  }
276  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
277  plasma_error("PLASMA_strsmpl_Tile", "invalid third descriptor");
278  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
279  }
280  /* Check input arguments */
281  if (descA.nb != descA.mb || descB.nb != descB.mb) {
282  plasma_error("PLASMA_strsmpl_Tile", "only square tiles supported");
283  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
284  }
285  /* Quick return */
286 /*
287  if (min(N, NRHS) == 0)
288  return PLASMA_SUCCESS;
289 */
291  PLASMA_desc, descA,
292  PLASMA_desc, descB,
293  PLASMA_desc, descL,
294  int*, IPIV,
295  PLASMA_sequence*, sequence,
296  PLASMA_request*, request);
297 
298  return PLASMA_SUCCESS;
299 }