kflops.c

Go to the documentation of this file.
00001 
00008 /* $Id: kflops.c,v 1.7 2005/10/10 17:08:08 seymour Exp $ */
00009 /* $UTK_Copyright: $ */
00010 
00011 /* 
00012    Translated to C by Bonnie Toy 5/88 (modified on 2/25/94 to fix a problem
00013    with daxpy for unequal increments or equal increments not equal to 1. Jack 
00014    Dongarra)
00015 
00016    To compile single precision version for Sun-4:
00017 
00018    cc -DSP -O4 -fsingle -fsingle2 clinpack.c -lm
00019 
00020    To compile double precision version for Sun-4:
00021 
00022 
00023    cc -DDP -O4 clinpack.c -lm
00024 
00025    To obtain rolled source BLAS, add -DROLL to the command lines. To obtain
00026    unrolled source BLAS, add -DUNROLL to the command lines.
00027 
00028    You must specify one of -DSP or -DDP to compile correctly.
00029 
00030    You must specify one of -DROLL or -DUNROLL to compile correctly. */
00031 
00032 #include "kflops.h"
00033 
00034 #ifdef NORUSAGE
00035 #include <sys/times.h>
00036 #include <unistd.h>
00037 static REAL ClockTick = 0.0;
00038 struct tms ru;
00039 #else
00040 struct rusage ru;
00041 #endif
00042 
00043 #include <stdio.h>
00044 #include <math.h>
00045 #include <sys/time.h>
00046 #include <sys/resource.h>
00047 
00048 static REAL my_time[9][9];
00049 static int idamax();
00050 static REAL ddot();
00051 
00052 /*----------------------*/
00053 
00054 #ifdef __STDC__
00055 static void
00056 daxpy(int n, REAL da, REAL dx[], int incx, REAL dy[], int incy)
00057 #else
00058 static void
00059 daxpy(n, da, dx, incx, dy, incy)
00060 /* 
00061    constant times a vector plus a vector. jack dongarra, linpack, 3/11/78. */
00062 REAL dx[], dy[], da;
00063 int incx, incy, n;
00064 #endif
00065 {
00066   int i, ix, iy;
00067 
00068   if(n <= 0)
00069     return;
00070   if(da == ZERO)
00071     return;
00072 
00073   if(incx != 1 || incy != 1) {
00074 
00075     /* code for unequal increments or equal increments not equal to 1 */
00076 
00077     ix = 0;
00078     iy = 0;
00079     if(incx < 0)
00080       ix = (-n + 1) * incx;
00081     if(incy < 0)
00082       iy = (-n + 1) * incy;
00083     for(i = 0; i < n; i++) {
00084       dy[iy] = dy[iy] + da * dx[ix];
00085       ix = ix + incx;
00086       iy = iy + incy;
00087     }
00088     return;
00089   }
00090 
00091   /* code for both increments equal to 1 */
00092 
00093 #ifdef ROLL
00094   for(i = 0; i < n; i++) {
00095     dy[i] = dy[i] + da * dx[i];
00096   }
00097 #endif
00098 #ifdef UNROLL
00099 
00100   m = n % 4;
00101   if(m != 0) {
00102     for(i = 0; i < m; i++)
00103       dy[i] = dy[i] + da * dx[i];
00104     if(n < 4)
00105       return;
00106   }
00107   for(i = m; i < n; i = i + 4) {
00108     dy[i] = dy[i] + da * dx[i];
00109     dy[i + 1] = dy[i + 1] + da * dx[i + 1];
00110     dy[i + 2] = dy[i + 2] + da * dx[i + 2];
00111     dy[i + 3] = dy[i + 3] + da * dx[i + 3];
00112   }
00113 #endif
00114 }
00115 
00116 /*----------------------*/
00117 #ifdef __STDC__
00118 static void
00119 matgen(REAL a[], int lda, int n, REAL b[], REAL * norma)
00120 #else
00121 static void
00122 matgen(a, lda, n, b, norma)
00123 REAL a[], b[], *norma;
00124 int lda, n;
00125 #endif
00126 /* We would like to declare a[][lda], but c does not allow it.  In this
00127    function, references to a[i][j] are written a[lda*i+j].  */
00128 
00129 {
00130   int init, i, j;
00131 
00132   init = 1325;
00133   *norma = 0.0;
00134   for(j = 0; j < n; j++) {
00135     for(i = 0; i < n; i++) {
00136       init = 3125 * init % 65536;
00137       a[lda * j + i] = (init - 32768.0) / 16384.0;
00138       *norma = (a[lda * j + i] > *norma) ? a[lda * j + i] : *norma;
00139     }
00140   }
00141   for(i = 0; i < n; i++) {
00142     b[i] = 0.0;
00143   }
00144   for(j = 0; j < n; j++) {
00145     for(i = 0; i < n; i++) {
00146       b[i] = b[i] + a[lda * j + i];
00147     }
00148   }
00149 }
00150 
00151 
00152 /*----------------------*/
00153 
00154 #ifdef __STDC__
00155 static void
00156 dgesl(REAL a[], int lda, int n, int ipvt[], REAL b[], int job)
00157 #else
00158 static void
00159 dgesl(a, lda, n, ipvt, b, job)
00160 int lda, n, ipvt[], job;
00161 REAL a[], b[];
00162 #endif
00163 /* We would like to declare a[][lda], but c does not allow it.  In this
00164    function, references to a[i][j] are written a[lda*i+j].  */
00165 
00166 /* 
00167    dgesl solves the double precision system a * x = b or trans(a) * x = b
00168    using the factors computed by dgeco or dgefa.
00169 
00170    on entry
00171 
00172    a double precision[n][lda] the output from dgeco or dgefa.
00173 
00174    lda integer the leading dimension of the array a .
00175 
00176    n integer the order of the matrix a .
00177 
00178    ipvt integer[n] the pivot vector from dgeco or dgefa.
00179 
00180    b double precision[n] the right hand side vector.
00181 
00182    job integer = 0 to solve a*x = b , = nonzero to solve trans(a)*x = b where
00183    trans(a) is the transpose.
00184 
00185    on return
00186 
00187    b the solution vector x .
00188 
00189    error condition
00190 
00191    a division by zero will occur if the input factor contains a zero on the
00192    diagonal.  technically this indicates singularity but it is often caused by 
00193    improper arguments or improper setting of lda .  it will not occur if the
00194    subroutines are called correctly and if dgeco has set rcond .gt. 0.0 or
00195    dgefa has set info .eq. 0 .
00196 
00197    to compute inverse(a) * c where c is a matrix with p columns
00198    dgeco(a,lda,n,ipvt,rcond,z) if (!rcond is too small){ for (j=0,j<p,j++)
00199    dgesl(a,lda,n,ipvt,c[j][0],0); }
00200 
00201    linpack. this version dated 08/14/78 . cleve moler, university of new
00202    mexico, argonne national lab.
00203 
00204    functions
00205 
00206    blas daxpy,ddot */
00207 {
00208   /* internal variables */
00209 
00210   REAL t;
00211   int k, kb, l, nm1;
00212 
00213   nm1 = n - 1;
00214   if(job == 0) {
00215 
00216     /* job = 0 , solve a * x = b first solve l*y = b */
00217 
00218     if(nm1 >= 1) {
00219       for(k = 0; k < nm1; k++) {
00220         l = ipvt[k];
00221         t = b[l];
00222         if(l != k) {
00223           b[l] = b[k];
00224           b[k] = t;
00225         }
00226         daxpy(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
00227       }
00228     }
00229 
00230     /* now solve u*x = y */
00231 
00232     for(kb = 0; kb < n; kb++) {
00233       k = n - (kb + 1);
00234       b[k] = b[k] / a[lda * k + k];
00235       t = -b[k];
00236       daxpy(k, t, &a[lda * k + 0], 1, &b[0], 1);
00237     }
00238   }
00239   else {
00240 
00241     /* job = nonzero, solve trans(a) * x = b first solve trans(u)*y = b */
00242 
00243     for(k = 0; k < n; k++) {
00244       t = ddot(k, &a[lda * k + 0], 1, &b[0], 1);
00245       b[k] = (b[k] - t) / a[lda * k + k];
00246     }
00247 
00248     /* now solve trans(l)*x = y */
00249 
00250     if(nm1 >= 1) {
00251       for(kb = 1; kb < nm1; kb++) {
00252         k = n - (kb + 1);
00253         b[k] = b[k] + ddot(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1);
00254         l = ipvt[k];
00255         if(l != k) {
00256           t = b[l];
00257           b[l] = b[k];
00258           b[k] = t;
00259         }
00260       }
00261     }
00262   }
00263 }
00264 
00265 
00266 /*----------------------*/
00267 
00268 #ifdef __STDC__
00269 static REAL
00270 ddot(int n, REAL dx[], int incx, REAL dy[], int incy)
00271 #else
00272 static REAL
00273 ddot(n, dx, incx, dy, incy)
00274 /* 
00275    forms the dot product of two vectors. jack dongarra, linpack, 3/11/78. */
00276 REAL dx[], dy[];
00277 int incx, incy, n;
00278 #endif
00279 {
00280   REAL dtemp;
00281   int i, ix, iy;
00282 
00283   dtemp = ZERO;
00284 
00285   if(n <= 0)
00286     return (ZERO);
00287 
00288   if(incx != 1 || incy != 1) {
00289 
00290     /* code for unequal increments or equal increments not equal to 1 */
00291 
00292     ix = 0;
00293     iy = 0;
00294     if(incx < 0)
00295       ix = (-n + 1) * incx;
00296     if(incy < 0)
00297       iy = (-n + 1) * incy;
00298     for(i = 0; i < n; i++) {
00299       dtemp = dtemp + dx[ix] * dy[iy];
00300       ix = ix + incx;
00301       iy = iy + incy;
00302     }
00303     return (dtemp);
00304   }
00305 
00306   /* code for both increments equal to 1 */
00307 
00308 #ifdef ROLL
00309   for(i = 0; i < n; i++)
00310     dtemp = dtemp + dx[i] * dy[i];
00311   return (dtemp);
00312 #endif
00313 #ifdef UNROLL
00314 
00315   m = n % 5;
00316   if(m != 0) {
00317     for(i = 0; i < m; i++)
00318       dtemp = dtemp + dx[i] * dy[i];
00319     if(n < 5)
00320       return (dtemp);
00321   }
00322   for(i = m; i < n; i = i + 5) {
00323     dtemp = dtemp + dx[i] * dy[i] +
00324         dx[i + 1] * dy[i + 1] + dx[i + 2] * dy[i + 2] +
00325         dx[i + 3] * dy[i + 3] + dx[i + 4] * dy[i + 4];
00326   }
00327   return (dtemp);
00328 #endif
00329 }
00330 
00331 /*----------------------*/
00332 #ifdef __STDC__
00333 static void
00334 dscal(int n, REAL da, REAL dx[], int incx)
00335 #else
00336 static void
00337 dscal(n, da, dx, incx)
00338 
00339 /* scales a vector by a constant. jack dongarra, linpack, 3/11/78. */
00340 REAL da, dx[];
00341 int n, incx;
00342 #endif
00343 {
00344   int i, nincx;
00345 
00346   if(n <= 0)
00347     return;
00348   if(incx != 1) {
00349 
00350     /* code for increment not equal to 1 */
00351 
00352     nincx = n * incx;
00353     for(i = 0; i < nincx; i = i + incx)
00354       dx[i] = da * dx[i];
00355     return;
00356   }
00357 
00358   /* code for increment equal to 1 */
00359 
00360 #ifdef ROLL
00361   for(i = 0; i < n; i++)
00362     dx[i] = da * dx[i];
00363 #endif
00364 #ifdef UNROLL
00365 
00366   m = n % 5;
00367   if(m != 0) {
00368     for(i = 0; i < m; i++)
00369       dx[i] = da * dx[i];
00370     if(n < 5)
00371       return;
00372   }
00373   for(i = m; i < n; i = i + 5) {
00374     dx[i] = da * dx[i];
00375     dx[i + 1] = da * dx[i + 1];
00376     dx[i + 2] = da * dx[i + 2];
00377     dx[i + 3] = da * dx[i + 3];
00378     dx[i + 4] = da * dx[i + 4];
00379   }
00380 #endif
00381 
00382 }
00383 
00384 /*----------------------*/
00385 #ifdef __STDC__
00386 static void
00387 dgefa(REAL a[], int lda, int n, int ipvt[], int *info)
00388 #else
00389 static void
00390 dgefa(a, lda, n, ipvt, info)
00391 REAL a[];
00392 int lda, n, ipvt[], *info;
00393 #endif
00394 /* We would like to declare a[][lda], but c does not allow it.  In this
00395    function, references to a[i][j] are written a[lda*i+j].  */
00396 /* 
00397    dgefa factors a double precision matrix by gaussian elimination.
00398 
00399    dgefa is usually called by dgeco, but it can be called directly with a
00400    saving in time if rcond is not needed. (time for dgeco) = (1 + 9/n)*(time
00401    for dgefa) .
00402 
00403    on entry
00404 
00405    a REAL precision[n][lda] the matrix to be factored.
00406 
00407    lda integer the leading dimension of the array a .
00408 
00409    n integer the order of the matrix a .
00410 
00411    on return
00412 
00413    a an upper triangular matrix and the multipliers which were used to obtain 
00414    it. the factorization can be written a = l*u where l is a product of
00415    permutation and unit lower triangular matrices and u is upper triangular.
00416 
00417    ipvt integer[n] an integer vector of pivot indices.
00418 
00419    info integer = 0 normal value. = k if u[k][k] .eq. 0.0 .  this is not an
00420    error condition for this subroutine, but it does indicate that dgesl or
00421    dgedi will divide by zero if called.  use rcond in dgeco for a reliable
00422    indication of singularity.
00423 
00424    linpack. this version dated 08/14/78 . cleve moler, university of new
00425    mexico, argonne national lab.
00426 
00427    functions
00428 
00429    blas daxpy,dscal,idamax */
00430 
00431 {
00432   /* internal variables */
00433 
00434   REAL t;
00435   int j, k, kp1, l, nm1;
00436 
00437   /* gaussian elimination with partial pivoting */
00438 
00439   *info = 0;
00440   nm1 = n - 1;
00441   if(nm1 >= 0) {
00442     for(k = 0; k < nm1; k++) {
00443       kp1 = k + 1;
00444 
00445       /* find l = pivot index */
00446 
00447       l = idamax(n - k, &a[lda * k + k], 1) + k;
00448       ipvt[k] = l;
00449 
00450       /* zero pivot implies this column already triangularized */
00451 
00452       if(a[lda * k + l] != ZERO) {
00453 
00454         /* interchange if necessary */
00455 
00456         if(l != k) {
00457           t = a[lda * k + l];
00458           a[lda * k + l] = a[lda * k + k];
00459           a[lda * k + k] = t;
00460         }
00461 
00462         /* compute multipliers */
00463 
00464         t = -ONE / a[lda * k + k];
00465         dscal(n - (k + 1), t, &a[lda * k + k + 1], 1);
00466 
00467         /* row elimination with column indexing */
00468 
00469         for(j = kp1; j < n; j++) {
00470           t = a[lda * j + l];
00471           if(l != k) {
00472             a[lda * j + l] = a[lda * j + k];
00473             a[lda * j + k] = t;
00474           }
00475           daxpy(n - (k + 1), t, &a[lda * k + k + 1], 1,
00476                 &a[lda * j + k + 1], 1);
00477         }
00478       }
00479       else {
00480         *info = k;
00481       }
00482     }
00483   }
00484   ipvt[n - 1] = n - 1;
00485   if(a[lda * (n - 1) + (n - 1)] == ZERO)
00486     *info = n - 1;
00487 }
00488 
00489 /*----------------------*/
00490 #ifdef __STDC__
00491 static int
00492 idamax(int n, REAL dx[], int incx)
00493 #else
00494 static int
00495 idamax(n, dx, incx)
00496 /* 
00497    finds the index of element having max. absolute value. jack dongarra,
00498    linpack, 3/11/78. */
00499 REAL dx[];
00500 int incx, n;
00501 #endif
00502 {
00503   REAL dmax;
00504   int i, ix, itemp;
00505 
00506   if(n < 1)
00507     return (-1);
00508   if(n == 1)
00509     return (0);
00510   if(incx != 1) {
00511 
00512     /* code for increment not equal to 1 */
00513 
00514     ix = 1;
00515     itemp = 0;
00516     dmax = fabs((double) dx[0]);
00517     ix = ix + incx;
00518     for(i = 1; i < n; i++) {
00519       if(fabs((double) dx[ix]) > dmax) {
00520         itemp = i;
00521         dmax = fabs((double) dx[ix]);
00522       }
00523       ix = ix + incx;
00524     }
00525   }
00526   else {
00527 
00528     /* code for increment equal to 1 */
00529 
00530     itemp = 0;
00531     dmax = fabs((double) dx[0]);
00532     for(i = 1; i < n; i++) {
00533       if(fabs((double) dx[i]) > dmax) {
00534         itemp = i;
00535         dmax = fabs((double) dx[i]);
00536       }
00537     }
00538   }
00539   return (itemp);
00540 }
00541 
00542 
00543 /*----------------------*/
00544 #ifdef __STDC__
00545 static void
00546 dmxpy(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
00547 #else
00548 static void
00549 dmxpy(n1, y, n2, ldm, x, m)
00550 REAL y[], x[], m[];
00551 int n1, n2, ldm;
00552 #endif
00553 /* We would like to declare m[][ldm], but c does not allow it.  In this
00554    function, references to m[i][j] are written m[ldm*i+j].  */
00555 
00556 /* 
00557    purpose: multiply matrix m times vector x and add the result to vector y.
00558 
00559    parameters:
00560 
00561    n1 integer, number of elements in vector y, and number of rows in matrix m
00562 
00563    y double [n1], vector of length n1 to which is added the product m*x
00564 
00565    n2 integer, number of elements in vector x, and number of columns in
00566    matrix m
00567 
00568    ldm integer, leading dimension of array m
00569 
00570    x double [n2], vector of length n2
00571 
00572    m double [ldm][n2], matrix of n1 rows and n2 columns
00573 
00574    ---------------------------------------------------------------------- */
00575 {
00576   int j, i, jmin;
00577   /* cleanup odd vector */
00578 
00579   j = n2 % 2;
00580   if(j >= 1) {
00581     j = j - 1;
00582     for(i = 0; i < n1; i++)
00583       y[i] = (y[i]) + x[j] * m[ldm * j + i];
00584   }
00585 
00586   /* cleanup odd group of two vectors */
00587 
00588   j = n2 % 4;
00589   if(j >= 2) {
00590     j = j - 1;
00591     for(i = 0; i < n1; i++)
00592       y[i] = ((y[i])
00593               + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
00594   }
00595 
00596   /* cleanup odd group of four vectors */
00597 
00598   j = n2 % 8;
00599   if(j >= 4) {
00600     j = j - 1;
00601     for(i = 0; i < n1; i++)
00602       y[i] = ((((y[i])
00603                 + x[j - 3] * m[ldm * (j - 3) + i])
00604                + x[j - 2] * m[ldm * (j - 2) + i])
00605               + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
00606   }
00607 
00608   /* cleanup odd group of eight vectors */
00609 
00610   j = n2 % 16;
00611   if(j >= 8) {
00612     j = j - 1;
00613     for(i = 0; i < n1; i++)
00614       y[i] = ((((((((y[i])
00615                     + x[j - 7] * m[ldm * (j - 7) + i]) + x[j -
00616                                                            6] * m[ldm * (j -
00617                                                                          6) +
00618                                                                   i])
00619                   + x[j - 5] * m[ldm * (j - 5) + i]) + x[j - 4] * m[ldm * (j -
00620                                                                            4)
00621                                                                     + i])
00622                 + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j -
00623                                                                          2) +
00624                                                                   i])
00625               + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
00626   }
00627 
00628   /* main loop - groups of sixteen vectors */
00629 
00630   jmin = (n2 % 16) + 16;
00631   for(j = jmin - 1; j < n2; j = j + 16) {
00632     for(i = 0; i < n1; i++)
00633       y[i] = ((((((((((((((((y[i])
00634                             + x[j - 15] * m[ldm * (j - 15) + i])
00635                            + x[j - 14] * m[ldm * (j - 14) + i])
00636                           + x[j - 13] * m[ldm * (j - 13) + i])
00637                          + x[j - 12] * m[ldm * (j - 12) + i])
00638                         + x[j - 11] * m[ldm * (j - 11) + i])
00639                        + x[j - 10] * m[ldm * (j - 10) + i])
00640                       + x[j - 9] * m[ldm * (j - 9) + i])
00641                      + x[j - 8] * m[ldm * (j - 8) + i])
00642                     + x[j - 7] * m[ldm * (j - 7) + i])
00643                    + x[j - 6] * m[ldm * (j - 6) + i])
00644                   + x[j - 5] * m[ldm * (j - 5) + i])
00645                  + x[j - 4] * m[ldm * (j - 4) + i])
00646                 + x[j - 3] * m[ldm * (j - 3) + i])
00647                + x[j - 2] * m[ldm * (j - 2) + i])
00648               + x[j - 1] * m[ldm * (j - 1) + i])
00649           + x[j] * m[ldm * j + i];
00650   }
00651 }
00652 
00653 /*----------------------*/
00654 #ifdef __STDC__
00655 static REAL
00656 second()
00657 #else
00658 static REAL
00659 second()
00660 #endif
00661 {
00662   REAL t;
00663 
00664 #ifndef MPP                     /* Workstations */
00665 
00666 
00667 #ifndef NORUSAGE
00668   getrusage(RUSAGE_SELF, &ru);
00669 #else                           /* HPs */
00670   if(ClockTick == 0.0)
00671     ClockTick = (REAL) sysconf(_SC_CLK_TCK);
00672   if(times(&ru) == -1)
00673     fprintf(stderr, "second() : Oups\n");
00674 #endif
00675 
00676 
00677 #ifndef NORUSAGE
00678   t = (REAL) (ru.ru_utime.tv_sec) + ((REAL) (ru.ru_utime.tv_usec)) / 1.0e6;
00679 #else
00680   t = (REAL) ((REAL) ru.tms_utime / ClockTick);
00681 #endif
00682 #else                           /* MPPs !! */
00683 
00684 #endif                          /* MPP */
00685 
00686   return t;
00687 }
00688 
00689 
00690 /*----------------------*/
00691 int
00692 kflops()
00693 {
00694   static REAL aa[SIZE][SIZE], a[SIZE][SIZE + 1], b[SIZE], x[SIZE];
00695   REAL Newcray, ops, total, norma, normx;
00696   REAL resid, eps, t1, tm, tm2;
00697   REAL kflops_epslon(), second(), kf;
00698   static int ipvt[SIZE], n, i, ntimes, info, lda, ldaa, kflops;
00699 
00700   lda = SIZE + 1;
00701   ldaa = SIZE;
00702   Newcray = .056;
00703   n = SIZE/2;
00704 
00705   ops = (2.0e0 * (n * n * n)) / 3.0 + 2.0 * (n * n);
00706 
00707   matgen((REAL *) a, lda, n, b, &norma);
00708   t1 = second();
00709   dgefa((REAL *) a, lda, n, ipvt, &info);
00710   my_time[0][0] = second() - t1;
00711   t1 = second();
00712   dgesl((REAL *) a, lda, n, ipvt, b, 0);
00713   my_time[1][0] = second() - t1;
00714   total = my_time[0][0] + my_time[1][0];
00715 
00716   /* compute a residual to verify results.  */
00717 
00718   for(i = 0; i < n; i++) {
00719     x[i] = b[i];
00720   }
00721   matgen((REAL *) a, lda, n, b, &norma);
00722   for(i = 0; i < n; i++) {
00723     b[i] = -b[i];
00724   }
00725   dmxpy(n, b, n, lda, x, (REAL *) a);
00726   resid = 0.0;
00727   normx = 0.0;
00728   for(i = 0; i < n; i++) {
00729     resid = (resid > fabs((double) b[i]))
00730         ? resid : fabs((double) b[i]);
00731     normx = (normx > fabs((double) x[i]))
00732         ? normx : fabs((double) x[i]);
00733   }
00734   eps = kflops_epslon((REAL) ONE);
00735   /* residn = resid/( n*norma*normx*eps ); */
00736 
00737   my_time[2][0] = total;
00738   my_time[3][0] = ops / (1.0e3 * total);
00739   my_time[4][0] = 2.0e3 / my_time[3][0];
00740   my_time[5][0] = total / Newcray;
00741 
00742   matgen((REAL *) a, lda, n, b, &norma);
00743   t1 = second();
00744   dgefa((REAL *) a, lda, n, ipvt, &info);
00745   my_time[0][1] = second() - t1;
00746   t1 = second();
00747   dgesl((REAL *) a, lda, n, ipvt, b, 0);
00748   my_time[1][1] = second() - t1;
00749   total = my_time[0][1] + my_time[1][1];
00750   my_time[2][1] = total;
00751   my_time[3][1] = ops / (1.0e3 * total);
00752   my_time[4][1] = 2.0e3 / my_time[3][1];
00753   my_time[5][1] = total / Newcray;
00754 
00755   matgen((REAL *) a, lda, n, b, &norma);
00756   t1 = second();
00757   dgefa((REAL *) a, lda, n, ipvt, &info);
00758   my_time[0][2] = second() - t1;
00759   t1 = second();
00760   dgesl((REAL *) a, lda, n, ipvt, b, 0);
00761   my_time[1][2] = second() - t1;
00762   total = my_time[0][2] + my_time[1][2];
00763   my_time[2][2] = total;
00764   my_time[3][2] = ops / (1.0e3 * total);
00765   my_time[4][2] = 2.0e3 / my_time[3][2];
00766   my_time[5][2] = total / Newcray;
00767 
00768   ntimes = NTIMES;
00769   tm2 = 0.0;
00770   t1 = second();
00771 
00772   for(i = 0; i < ntimes; i++) {
00773     tm = second();
00774     matgen((REAL *) a, lda, n, b, &norma);
00775     tm2 = tm2 + second() - tm;
00776     dgefa((REAL *) a, lda, n, ipvt, &info);
00777   }
00778 
00779   my_time[0][3] = (second() - t1 - tm2) / ntimes;
00780   t1 = second();
00781 
00782   for(i = 0; i < ntimes; i++) {
00783     dgesl((REAL *) a, lda, n, ipvt, b, 0);
00784   }
00785 
00786   my_time[1][3] = (second() - t1) / ntimes;
00787   total = my_time[0][3] + my_time[1][3];
00788   my_time[2][3] = total;
00789   my_time[3][3] = ops / (1.0e3 * total);
00790   my_time[4][3] = 2.0e3 / my_time[3][3];
00791   my_time[5][3] = total / Newcray;
00792 
00793   matgen((REAL *) aa, ldaa, n, b, &norma);
00794   t1 = second();
00795   dgefa((REAL *) aa, ldaa, n, ipvt, &info);
00796   my_time[0][4] = second() - t1;
00797   t1 = second();
00798   dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
00799   my_time[1][4] = second() - t1;
00800   total = my_time[0][4] + my_time[1][4];
00801   my_time[2][4] = total;
00802   my_time[3][4] = ops / (1.0e3 * total);
00803   my_time[4][4] = 2.0e3 / my_time[3][4];
00804   my_time[5][4] = total / Newcray;
00805 
00806   matgen((REAL *) aa, ldaa, n, b, &norma);
00807   t1 = second();
00808   dgefa((REAL *) aa, ldaa, n, ipvt, &info);
00809   my_time[0][5] = second() - t1;
00810   t1 = second();
00811   dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
00812   my_time[1][5] = second() - t1;
00813   total = my_time[0][5] + my_time[1][5];
00814   my_time[2][5] = total;
00815   my_time[3][5] = ops / (1.0e3 * total);
00816   my_time[4][5] = 2.0e3 / my_time[3][5];
00817   my_time[5][5] = total / Newcray;
00818 
00819   matgen((REAL *) aa, ldaa, n, b, &norma);
00820   t1 = second();
00821   dgefa((REAL *) aa, ldaa, n, ipvt, &info);
00822   my_time[0][6] = second() - t1;
00823   t1 = second();
00824   dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
00825   my_time[1][6] = second() - t1;
00826   total = my_time[0][6] + my_time[1][6];
00827   my_time[2][6] = total;
00828   my_time[3][6] = ops / (1.0e3 * total);
00829   my_time[4][6] = 2.0e3 / my_time[3][6];
00830   my_time[5][6] = total / Newcray;
00831 
00832   ntimes = NTIMES;
00833   tm2 = 0;
00834   t1 = second();
00835   for(i = 0; i < ntimes; i++) {
00836     tm = second();
00837     matgen((REAL *) aa, ldaa, n, b, &norma);
00838     tm2 = tm2 + second() - tm;
00839     dgefa((REAL *) aa, ldaa, n, ipvt, &info);
00840   }
00841   my_time[0][7] = (second() - t1 - tm2) / ntimes;
00842   t1 = second();
00843   for(i = 0; i < ntimes; i++) {
00844     dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
00845   }
00846   my_time[1][7] = (second() - t1) / ntimes;
00847   total = my_time[0][7] + my_time[1][7];
00848 
00849   my_time[2][7] = total;
00850   my_time[3][7] = ops / (1.0e3 * total);
00851   my_time[4][7] = 2.0e3 / my_time[3][7];
00852   my_time[5][7] = total / Newcray;
00853 
00854   /* the following code sequence implements the semantics of the Fortran
00855      intrinsics "nint(min(my_time[3][3],my_time[3][7]))" */
00856 
00857   kf = (my_time[3][3] < my_time[3][7]) ? my_time[3][3] : my_time[3][7];
00858   kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
00859   if(fabs((double) kf) < ONE)
00860     kflops = 0;
00861   else {
00862     kflops = (int) fabs((double) kf);
00863     if(kf < ZERO)
00864       kflops = -kflops;
00865   }
00866 
00867   return kflops;
00868 }