PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zhegv.c
Go to the documentation of this file.
1 
15 #include <lapacke.h>
16 #include "common.h"
17 
18 /***************************************************************************/
129  PLASMA_Complex64_t *A, int LDA,
130  PLASMA_Complex64_t *B, int LDB,
131  double *W,
132  PLASMA_desc *descT,
133  PLASMA_Complex64_t *Q, int LDQ)
134 {
135  int NB, IB, NT;
136  int status;
138  PLASMA_sequence *sequence = NULL;
140  PLASMA_desc descA, descB, descQ;
141 
142  plasma = plasma_context_self();
143  if (plasma == NULL) {
144  plasma_error("PLASMA_zhegv", "PLASMA not initialized");
146  }
147 
148  /* Tune NB & IB depending on N; Set NBNB */
149  status = plasma_tune(PLASMA_FUNC_ZHEGV, N, N, 0);
150  if (status != PLASMA_SUCCESS) {
151  plasma_error("PLASMA_zhegv", "plasma_tune() failed");
152  return status;
153  }
154 
155  /* Set NT */
156  NB = PLASMA_NB;
157  IB = PLASMA_IB;
158  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
159 
160  /* Check input arguments */
161  if (itype != 1 && itype != 2 && itype != 3) {
162  plasma_error("PLASMA_zhegv", "Illegal value of itype");
163  return -1;
164  }
165  if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
166  plasma_error("PLASMA_zhegv", "illegal value of jobz");
167  return -2;
168  }
169  if (uplo != PlasmaLower && uplo!= PlasmaUpper) {
170  plasma_error("PLASMA_zhegv", "only PlasmaLower supported");
171  return -3;
172  }
173  if (N < 0) {
174  plasma_error("PLASMA_zhegv", "illegal value of N");
175  return -4;
176  }
177  if (LDA < max(1, N)) {
178  plasma_error("PLASMA_zhegv", "illegal value of LDA");
179  return -6;
180  }
181  if (LDB < max(1, N)) {
182  plasma_error("PLASMA_zhegv", "illegal value of LDB");
183  return -8;
184  }
185  if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
186  ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
187  plasma_error("PLASMA_zhegv", "invalid T descriptor");
188  return -10;
189  }
190  if (LDQ < max(1, N)) {
191  plasma_error("PLASMA_zhegv", "illegal value of LDQ");
192  return -12;
193  }
194 
195  /* Quick return */
196  if (N == 0)
197  return PLASMA_SUCCESS;
198 
199  if (jobz == PlasmaVec) {
200  plasma_error("PLASMA_zhegv", "computing the eigenvectors is not supported in this version");
201  return -1;
202  }
203 
204  plasma_sequence_create(plasma, &sequence);
205 
207  plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N,
208  plasma_desc_mat_free(&(descA)) );
209  plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N,
210  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)) );
211  if (jobz == PlasmaVec) {
212  /* No need for conversion, it's just output */
213  plasma_zdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N,
214  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descQ)) );
215  }
216  } else {
217  plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
218  plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, N, N );
219  if (jobz == PlasmaVec) {
220  /* No need for conversion, it's just output */
221  descQ = plasma_desc_init(
222  PlasmaComplexDouble, NB, NB, NB*NB,
223  LDQ, N, 0, 0, N, N);
224  descQ.mat = Q;
225  }
226  }
227 
228  /* Call the tile interface */
229  PLASMA_zhegv_Tile_Async(itype, PlasmaNoVec, uplo,
230  &descA, &descB, W,
231  descT, &descQ,
232  sequence, &request);
233 
235  plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
236  plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
237  if (jobz == PlasmaVec) {
238  plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
239  }
241  plasma_desc_mat_free(&descA);
242  plasma_desc_mat_free(&descB);
243  if (jobz == PlasmaVec)
244  plasma_desc_mat_free(&descQ);
245  } else {
246  plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
247  plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
248  if (jobz == PlasmaVec)
249  plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
251  }
252 
253  status = sequence->status;
254  plasma_sequence_destroy(plasma, sequence);
255  return status;
256 }
257 /***************************************************************************/
370  PLASMA_desc *A, PLASMA_desc *B, double *W,
372 {
374  PLASMA_sequence *sequence = NULL;
376  int status;
377 
378  plasma = plasma_context_self();
379  if (plasma == NULL) {
380  plasma_fatal_error("PLASMA_zhegv_Tile", "PLASMA not initialized");
382  }
383  plasma_sequence_create(plasma, &sequence);
384  PLASMA_zhegv_Tile_Async(itype, jobz, uplo, A, B, W, T, Q, sequence, &request);
386  status = sequence->status;
387  plasma_sequence_destroy(plasma, sequence);
388  return status;
389 }
390 
391 /***************************************************************************/
425  PLASMA_desc *A,
426  PLASMA_desc *B,
427  double *W,
428  PLASMA_desc *T,
429  PLASMA_desc *Q,
430  PLASMA_sequence *sequence, PLASMA_request *request)
431 {
432  int status;
433  PLASMA_desc descA = *A;
434  PLASMA_desc descB = *B;
435  PLASMA_desc descT = *T;
436  double *E;
438 
439  plasma = plasma_context_self();
440  if (plasma == NULL) {
441  plasma_fatal_error("PLASMA_zhegv_Tile_Async", "PLASMA not initialized");
443  }
444  if (sequence == NULL) {
445  plasma_fatal_error("PLASMA_zhegv_Tile_Async", "NULL sequence");
446  return PLASMA_ERR_UNALLOCATED;
447  }
448  if (request == NULL) {
449  plasma_fatal_error("PLASMA_zhegv_Tile_Async", "NULL request");
450  return PLASMA_ERR_UNALLOCATED;
451  }
452  /* Check sequence status */
453  if (sequence->status == PLASMA_SUCCESS)
454  request->status = PLASMA_SUCCESS;
455  else
456  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
457 
458  /* Check descriptors for correctness */
459  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
460  plasma_error("PLASMA_zhegv_Tile_Async", "invalid first descriptor");
461  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
462  }
463  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
464  plasma_error("PLASMA_zhegv_Tile_Async", "invalid second descriptor");
465  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
466  }
467  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
468  plasma_error("PLASMA_zhegv_Tile_Async", "invalid descriptor");
469  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
470  }
471  if (jobz == PlasmaVec){
472  if (plasma_desc_check(Q) != PLASMA_SUCCESS) {
473  plasma_error("PLASMA_zhegv_Tile_Async", "invalid descriptor");
474  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
475  }
476  }
477  /* Check input arguments */
478  if (itype != 1 && itype != 2 && itype != 3) {
479  plasma_error("PLASMA_zhegv_Tile_Async", "Illegal value of itype");
480  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
481  }
482  if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
483  plasma_error("PLASMA_zhegv_Tile_Async", "illegal value of jobz");
484  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
485  }
486  if (uplo != PlasmaLower && uplo != PlasmaUpper) {
487  plasma_error("PLASMA_zheev_Tile_Async", "illegal value of uplo");
488  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
489  }
490  if (descA.nb != descA.mb) {
491  plasma_error("PLASMA_zhegv_Tile_Async", "only square tiles supported");
492  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
493  }
494  if (jobz == PlasmaVec) {
495  plasma_error("PLASMA_zhegv_Tile_Async", "computing the eigenvectors is not supported in this version");
496  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
497  }
498  if ( (jobz == PlasmaVec) && (Q->nb != Q->mb) ) {
499  plasma_error("PLASMA_zhegv_Tile_Async", "only square tiles supported");
500  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
501  }
502 
503  E = (double *)plasma_shared_alloc(plasma, descA.n-1, PlasmaRealDouble);
504 
505  /* Currently NOT equivalent to LAPACK's
506  */
508  PLASMA_enum, uplo,
509  PLASMA_desc, descB,
510  PLASMA_sequence*, sequence,
511  PLASMA_request*, request);
512 
513  status = sequence->status;
514  if (status != 0){
515  status = descA.ln + status;
516  return status;
517  }
518 
519  /*
520  * Transform problem to standard eigenvalue problem and solve
521  */
522  plasma_dynamic_call_6(plasma_pzhegst,
523  PLASMA_enum, itype,
524  PLASMA_enum, uplo,
525  PLASMA_desc, descA,
526  PLASMA_desc, descB,
527  PLASMA_sequence*, sequence,
528  PLASMA_request*, request);
529 
530  /* Reduction to tridiagonal form
531  * with a two-stage approach.
532  */
533 
534  /*
535  *Reduction to BAND tridiagonal form
536  */
537  plasma_dynamic_call_5(plasma_pzherbt,
538  PLASMA_enum, uplo,
539  PLASMA_desc, descA,
540  PLASMA_desc, descT,
541  PLASMA_sequence*, sequence,
542  PLASMA_request*, request);
543 
544  /*
545  * Build the Q of the first stage
546  */
547  /* if (jobz == PlasmaVec){ */
548  /* /\* Initialize Q to Identity *\/ */
549  /* plasma_dynamic_call_6(plasma_pzlaset, */
550  /* PLASMA_enum, PlasmaUpperLower, */
551  /* PLASMA_Complex64_t, 0.0, */
552  /* PLASMA_Complex64_t, 1.0, */
553  /* PLASMA_desc, descQ, */
554  /* PLASMA_sequence*, sequence, */
555  /* PLASMA_request*, request); */
556 
557  /* /\* Accumulate the transformations from the first stage *\/ */
558  /* plasma_dynamic_call_6(plasma_pzungtr, */
559  /* PLASMA_enum, uplo, */
560  /* PLASMA_desc, descA, */
561  /* PLASMA_desc, descQ, */
562  /* PLASMA_desc, descT, */
563  /* PLASMA_sequence*, sequence, */
564  /* PLASMA_request*, request); */
565  /* } */
566 
567  /* Set the V's to zero before the 2nd stage (bulge chasing) */
569  plasma_pzlaset2,
570  PLASMA_enum, uplo,
571  PLASMA_Complex64_t, 0.0,
572  PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) :
573  plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
574  PLASMA_sequence*, sequence,
575  PLASMA_request*, request);
576  /*
577  * Reduction from BAND tridiagonal to the final condensed form
578  */
579  plasma_dynamic_call_7(plasma_pzhbrdt,
580  PLASMA_enum, uplo,
581  PLASMA_desc, descA,
582  double*, W,
583  double*, E,
584  PLASMA_desc, descT,
585  PLASMA_sequence*, sequence,
586  PLASMA_request*, request);
587 
588  /* For eigenvalues only, call DSTERF.
589  * For eigenvectors, first call ZUNGTR to generate the unitary matrix,
590  * then call ZSTEQR.
591  */
592 
593  if (jobz == PlasmaNoVec)
594  status = LAPACKE_dsterf(descA.n, W, E);
595  else {
596  status = LAPACKE_dsterf(descA.n, W, E);
597  /* Accumulate the transformations from the second stage */
598  /*
599  plasma_dynamic_call_6(plasma_pzungtr,
600  PLASMA_enum, uplo,
601  PLASMA_desc, descA,
602  PLASMA_desc, descQ,
603  PLASMA_desc, descT,
604  PLASMA_sequence*, sequence,
605  PLASMA_request*, request);
606  LAPACKE_zsteqr(jobz, descA.n, W, E, descQ.mat, descQ.lm);
607  */
608  }
609 
610  /* If matrix was scaled, then rescale eigenvalues appropriately.
611  */
612 
613  plasma_shared_free(plasma, E);
614 
615  return PLASMA_SUCCESS;
616 }