Functions | Variables

kflops.c File Reference

#include "kflops.h"
#include <stdio.h>
#include <math.h>
#include <sys/time.h>
#include <sys/resource.h>
Include dependency graph for kflops.c:

Go to the source code of this file.

Functions

static int idamax ()
static REAL ddot ()
static void daxpy (int n, da, dx, int incx, dy, int incy)
static void matgen (a, int lda, int n, b,*norma)
static void dgesl (a, int lda, int n, ipvt, b, int job)
static REAL ddot (int n, dx, int incx, dy, int incy)
static void dscal (int n, REAL da, dx, int incx)
static void dgefa (a, int lda, int n, ipvt, int *info)
static int idamax (int n, dx, int incx)
static void dmxpy (int n1, y, int n2, int ldm, x, m)
static REAL second ()
int kflops ()

Variables

struct rusage ru
static REAL my_time [9][9]

Detailed Description

This file contains a kflops estimator inherited from earlier ICL projects

Definition in file kflops.c.


Function Documentation

static void daxpy ( int  n,
  da,
dx  ,
int  incx,
dy  ,
int  incy 
) [static]

Definition at line 59 of file kflops.c.

{
  int i, ix, iy;

  if(n <= 0)
    return;
  if(da == ZERO)
    return;

  if(incx != 1 || incy != 1) {

    /* code for unequal increments or equal increments not equal to 1 */

    ix = 0;
    iy = 0;
    if(incx < 0)
      ix = (-n + 1) * incx;
    if(incy < 0)
      iy = (-n + 1) * incy;
    for(i = 0; i < n; i++) {
      dy[iy] = dy[iy] + da * dx[ix];
      ix = ix + incx;
      iy = iy + incy;
    }
    return;
  }

  /* code for both increments equal to 1 */

#ifdef ROLL
  for(i = 0; i < n; i++) {
    dy[i] = dy[i] + da * dx[i];
  }
#endif
#ifdef UNROLL

  m = n % 4;
  if(m != 0) {
    for(i = 0; i < m; i++)
      dy[i] = dy[i] + da * dx[i];
    if(n < 4)
      return;
  }
  for(i = m; i < n; i = i + 4) {
    dy[i] = dy[i] + da * dx[i];
    dy[i + 1] = dy[i + 1] + da * dx[i + 1];
    dy[i + 2] = dy[i + 2] + da * dx[i + 2];
    dy[i + 3] = dy[i + 3] + da * dx[i + 3];
  }
#endif
}

Here is the caller graph for this function:

static REAL ddot (  )  [static]

Here is the caller graph for this function:

static REAL ddot ( int  n,
dx  ,
int  incx,
dy  ,
int  incy 
) [static]

Definition at line 273 of file kflops.c.

{
  REAL dtemp;
  int i, ix, iy;

  dtemp = ZERO;

  if(n <= 0)
    return (ZERO);

  if(incx != 1 || incy != 1) {

    /* code for unequal increments or equal increments not equal to 1 */

    ix = 0;
    iy = 0;
    if(incx < 0)
      ix = (-n + 1) * incx;
    if(incy < 0)
      iy = (-n + 1) * incy;
    for(i = 0; i < n; i++) {
      dtemp = dtemp + dx[ix] * dy[iy];
      ix = ix + incx;
      iy = iy + incy;
    }
    return (dtemp);
  }

  /* code for both increments equal to 1 */

#ifdef ROLL
  for(i = 0; i < n; i++)
    dtemp = dtemp + dx[i] * dy[i];
  return (dtemp);
#endif
#ifdef UNROLL

  m = n % 5;
  if(m != 0) {
    for(i = 0; i < m; i++)
      dtemp = dtemp + dx[i] * dy[i];
    if(n < 5)
      return (dtemp);
  }
  for(i = m; i < n; i = i + 5) {
    dtemp = dtemp + dx[i] * dy[i] +
        dx[i + 1] * dy[i + 1] + dx[i + 2] * dy[i + 2] +
        dx[i + 3] * dy[i + 3] + dx[i + 4] * dy[i + 4];
  }
  return (dtemp);
#endif
}

static void dgefa ( ,
int  lda,
int  n,
ipvt  ,
int *  info 
) [static]

Definition at line 390 of file kflops.c.

{
  /* internal variables */

  REAL t;
  int j, k, kp1, l, nm1;

  /* gaussian elimination with partial pivoting */

  *info = 0;
  nm1 = n - 1;
  if(nm1 >= 0) {
    for(k = 0; k < nm1; k++) {
      kp1 = k + 1;

      /* find l = pivot index */

      l = idamax(n - k, &a[lda * k + k], 1) + k;
      ipvt[k] = l;

      /* zero pivot implies this column already triangularized */

      if(a[lda * k + l] != ZERO) {

        /* interchange if necessary */

        if(l != k) {
          t = a[lda * k + l];
          a[lda * k + l] = a[lda * k + k];
          a[lda * k + k] = t;
        }

        /* compute multipliers */

        t = -ONE / a[lda * k + k];
        dscal(n - (k + 1), t, &a[lda * k + k + 1], 1);

        /* row elimination with column indexing */

        for(j = kp1; j < n; j++) {
          t = a[lda * j + l];
          if(l != k) {
            a[lda * j + l] = a[lda * j + k];
            a[lda * j + k] = t;
          }
          daxpy(n - (k + 1), t, &a[lda * k + k + 1], 1,
                &a[lda * j + k + 1], 1);
        }
      }
      else {
        *info = k;
      }
    }
  }
  ipvt[n - 1] = n - 1;
  if(a[lda * (n - 1) + (n - 1)] == ZERO)
    *info = n - 1;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void dgesl ( ,
int  lda,
int  n,
ipvt  ,
,
int  job 
) [static]

Definition at line 159 of file kflops.c.

                                                       { for (j=0,j<p,j++)
   dgesl(a,lda,n,ipvt,c[j][0],0); }

   linpack. this version dated 08/14/78 . cleve moler, university of new
   mexico, argonne national lab.

   functions

   blas daxpy,ddot */
{
  /* internal variables */

  REAL t;
  int k, kb, l, nm1;

  nm1 = n - 1;
  if(job == 0) {

    /* job = 0 , solve a * x = b first solve l*y = b */

    if(nm1 >= 1) {
      for(k = 0; k < nm1; k++) {
        l = ipvt[k];
        t = b[l];
        if(l != k) {
          b[l] = b[k];
          b[k] = t;
        }
        daxpy(n - (k + 1), t, &a[lda * k + k + 1], 1, &b[k + 1], 1);
      }
    }

    /* now solve u*x = y */

    for(kb = 0; kb < n; kb++) {
      k = n - (kb + 1);
      b[k] = b[k] / a[lda * k + k];
      t = -b[k];
      daxpy(k, t, &a[lda * k + 0], 1, &b[0], 1);
    }
  }
  else {

    /* job = nonzero, solve trans(a) * x = b first solve trans(u)*y = b */

    for(k = 0; k < n; k++) {
      t = ddot(k, &a[lda * k + 0], 1, &b[0], 1);
      b[k] = (b[k] - t) / a[lda * k + k];
    }

    /* now solve trans(l)*x = y */

    if(nm1 >= 1) {
      for(kb = 1; kb < nm1; kb++) {
        k = n - (kb + 1);
        b[k] = b[k] + ddot(n - (k + 1), &a[lda * k + k + 1], 1, &b[k + 1], 1);
        l = ipvt[k];
        if(l != k) {
          t = b[l];
          b[l] = b[k];
          b[k] = t;
        }
      }
    }
  }
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void dmxpy ( int  n1,
,
int  n2,
int  ldm,
,
m   
) [static]

Definition at line 549 of file kflops.c.

          : multiply matrix m times vector x and add the result to vector y.

   parameters:

   n1 integer, number of elements in vector y, and number of rows in matrix m

   y double [n1], vector of length n1 to which is added the product m*x

   n2 integer, number of elements in vector x, and number of columns in
   matrix m

   ldm integer, leading dimension of array m

   x double [n2], vector of length n2

   m double [ldm][n2], matrix of n1 rows and n2 columns

   ---------------------------------------------------------------------- */
{
  int j, i, jmin;
  /* cleanup odd vector */

  j = n2 % 2;
  if(j >= 1) {
    j = j - 1;
    for(i = 0; i < n1; i++)
      y[i] = (y[i]) + x[j] * m[ldm * j + i];
  }

  /* cleanup odd group of two vectors */

  j = n2 % 4;
  if(j >= 2) {
    j = j - 1;
    for(i = 0; i < n1; i++)
      y[i] = ((y[i])
              + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
  }

  /* cleanup odd group of four vectors */

  j = n2 % 8;
  if(j >= 4) {
    j = j - 1;
    for(i = 0; i < n1; i++)
      y[i] = ((((y[i])
                + x[j - 3] * m[ldm * (j - 3) + i])
               + x[j - 2] * m[ldm * (j - 2) + i])
              + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
  }

  /* cleanup odd group of eight vectors */

  j = n2 % 16;
  if(j >= 8) {
    j = j - 1;
    for(i = 0; i < n1; i++)
      y[i] = ((((((((y[i])
                    + x[j - 7] * m[ldm * (j - 7) + i]) + x[j -
                                                           6] * m[ldm * (j -
                                                                         6) +
                                                                  i])
                  + x[j - 5] * m[ldm * (j - 5) + i]) + x[j - 4] * m[ldm * (j -
                                                                           4)
                                                                    + i])
                + x[j - 3] * m[ldm * (j - 3) + i]) + x[j - 2] * m[ldm * (j -
                                                                         2) +
                                                                  i])
              + x[j - 1] * m[ldm * (j - 1) + i]) + x[j] * m[ldm * j + i];
  }

  /* main loop - groups of sixteen vectors */

  jmin = (n2 % 16) + 16;
  for(j = jmin - 1; j < n2; j = j + 16) {
    for(i = 0; i < n1; i++)
      y[i] = ((((((((((((((((y[i])
                            + x[j - 15] * m[ldm * (j - 15) + i])
                           + x[j - 14] * m[ldm * (j - 14) + i])
                          + x[j - 13] * m[ldm * (j - 13) + i])
                         + x[j - 12] * m[ldm * (j - 12) + i])
                        + x[j - 11] * m[ldm * (j - 11) + i])
                       + x[j - 10] * m[ldm * (j - 10) + i])
                      + x[j - 9] * m[ldm * (j - 9) + i])
                     + x[j - 8] * m[ldm * (j - 8) + i])
                    + x[j - 7] * m[ldm * (j - 7) + i])
                   + x[j - 6] * m[ldm * (j - 6) + i])
                  + x[j - 5] * m[ldm * (j - 5) + i])
                 + x[j - 4] * m[ldm * (j - 4) + i])
                + x[j - 3] * m[ldm * (j - 3) + i])
               + x[j - 2] * m[ldm * (j - 2) + i])
              + x[j - 1] * m[ldm * (j - 1) + i])
          + x[j] * m[ldm * j + i];
  }
}

Here is the caller graph for this function:

static void dscal ( int  n,
REAL  da,
dx  ,
int  incx 
) [static]

Definition at line 337 of file kflops.c.

{
  int i, nincx;

  if(n <= 0)
    return;
  if(incx != 1) {

    /* code for increment not equal to 1 */

    nincx = n * incx;
    for(i = 0; i < nincx; i = i + incx)
      dx[i] = da * dx[i];
    return;
  }

  /* code for increment equal to 1 */

#ifdef ROLL
  for(i = 0; i < n; i++)
    dx[i] = da * dx[i];
#endif
#ifdef UNROLL

  m = n % 5;
  if(m != 0) {
    for(i = 0; i < m; i++)
      dx[i] = da * dx[i];
    if(n < 5)
      return;
  }
  for(i = m; i < n; i = i + 5) {
    dx[i] = da * dx[i];
    dx[i + 1] = da * dx[i + 1];
    dx[i + 2] = da * dx[i + 2];
    dx[i + 3] = da * dx[i + 3];
    dx[i + 4] = da * dx[i + 4];
  }
#endif

}

Here is the caller graph for this function:

static int idamax (  )  [static]

Here is the caller graph for this function:

static int idamax ( int  n,
dx  ,
int  incx 
) [static]

Definition at line 495 of file kflops.c.

{
  REAL dmax;
  int i, ix, itemp;

  if(n < 1)
    return (-1);
  if(n == 1)
    return (0);
  if(incx != 1) {

    /* code for increment not equal to 1 */

    ix = 1;
    itemp = 0;
    dmax = fabs((double) dx[0]);
    ix = ix + incx;
    for(i = 1; i < n; i++) {
      if(fabs((double) dx[ix]) > dmax) {
        itemp = i;
        dmax = fabs((double) dx[ix]);
      }
      ix = ix + incx;
    }
  }
  else {

    /* code for increment equal to 1 */

    itemp = 0;
    dmax = fabs((double) dx[0]);
    for(i = 1; i < n; i++) {
      if(fabs((double) dx[i]) > dmax) {
        itemp = i;
        dmax = fabs((double) dx[i]);
      }
    }
  }
  return (itemp);
}

int kflops (  ) 

Definition at line 692 of file kflops.c.

{
  static REAL aa[SIZE][SIZE], a[SIZE][SIZE + 1], b[SIZE], x[SIZE];
  REAL Newcray, ops, total, norma, normx;
  REAL resid, eps, t1, tm, tm2;
  REAL kflops_epslon(), second(), kf;
  static int ipvt[SIZE], n, i, ntimes, info, lda, ldaa, kflops;

  lda = SIZE + 1;
  ldaa = SIZE;
  Newcray = .056;
  n = SIZE/2;

  ops = (2.0e0 * (n * n * n)) / 3.0 + 2.0 * (n * n);

  matgen((REAL *) a, lda, n, b, &norma);
  t1 = second();
  dgefa((REAL *) a, lda, n, ipvt, &info);
  my_time[0][0] = second() - t1;
  t1 = second();
  dgesl((REAL *) a, lda, n, ipvt, b, 0);
  my_time[1][0] = second() - t1;
  total = my_time[0][0] + my_time[1][0];

  /* compute a residual to verify results.  */

  for(i = 0; i < n; i++) {
    x[i] = b[i];
  }
  matgen((REAL *) a, lda, n, b, &norma);
  for(i = 0; i < n; i++) {
    b[i] = -b[i];
  }
  dmxpy(n, b, n, lda, x, (REAL *) a);
  resid = 0.0;
  normx = 0.0;
  for(i = 0; i < n; i++) {
    resid = (resid > fabs((double) b[i]))
        ? resid : fabs((double) b[i]);
    normx = (normx > fabs((double) x[i]))
        ? normx : fabs((double) x[i]);
  }
  eps = kflops_epslon((REAL) ONE);
  /* residn = resid/( n*norma*normx*eps ); */

  my_time[2][0] = total;
  my_time[3][0] = ops / (1.0e3 * total);
  my_time[4][0] = 2.0e3 / my_time[3][0];
  my_time[5][0] = total / Newcray;

  matgen((REAL *) a, lda, n, b, &norma);
  t1 = second();
  dgefa((REAL *) a, lda, n, ipvt, &info);
  my_time[0][1] = second() - t1;
  t1 = second();
  dgesl((REAL *) a, lda, n, ipvt, b, 0);
  my_time[1][1] = second() - t1;
  total = my_time[0][1] + my_time[1][1];
  my_time[2][1] = total;
  my_time[3][1] = ops / (1.0e3 * total);
  my_time[4][1] = 2.0e3 / my_time[3][1];
  my_time[5][1] = total / Newcray;

  matgen((REAL *) a, lda, n, b, &norma);
  t1 = second();
  dgefa((REAL *) a, lda, n, ipvt, &info);
  my_time[0][2] = second() - t1;
  t1 = second();
  dgesl((REAL *) a, lda, n, ipvt, b, 0);
  my_time[1][2] = second() - t1;
  total = my_time[0][2] + my_time[1][2];
  my_time[2][2] = total;
  my_time[3][2] = ops / (1.0e3 * total);
  my_time[4][2] = 2.0e3 / my_time[3][2];
  my_time[5][2] = total / Newcray;

  ntimes = NTIMES;
  tm2 = 0.0;
  t1 = second();

  for(i = 0; i < ntimes; i++) {
    tm = second();
    matgen((REAL *) a, lda, n, b, &norma);
    tm2 = tm2 + second() - tm;
    dgefa((REAL *) a, lda, n, ipvt, &info);
  }

  my_time[0][3] = (second() - t1 - tm2) / ntimes;
  t1 = second();

  for(i = 0; i < ntimes; i++) {
    dgesl((REAL *) a, lda, n, ipvt, b, 0);
  }

  my_time[1][3] = (second() - t1) / ntimes;
  total = my_time[0][3] + my_time[1][3];
  my_time[2][3] = total;
  my_time[3][3] = ops / (1.0e3 * total);
  my_time[4][3] = 2.0e3 / my_time[3][3];
  my_time[5][3] = total / Newcray;

  matgen((REAL *) aa, ldaa, n, b, &norma);
  t1 = second();
  dgefa((REAL *) aa, ldaa, n, ipvt, &info);
  my_time[0][4] = second() - t1;
  t1 = second();
  dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
  my_time[1][4] = second() - t1;
  total = my_time[0][4] + my_time[1][4];
  my_time[2][4] = total;
  my_time[3][4] = ops / (1.0e3 * total);
  my_time[4][4] = 2.0e3 / my_time[3][4];
  my_time[5][4] = total / Newcray;

  matgen((REAL *) aa, ldaa, n, b, &norma);
  t1 = second();
  dgefa((REAL *) aa, ldaa, n, ipvt, &info);
  my_time[0][5] = second() - t1;
  t1 = second();
  dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
  my_time[1][5] = second() - t1;
  total = my_time[0][5] + my_time[1][5];
  my_time[2][5] = total;
  my_time[3][5] = ops / (1.0e3 * total);
  my_time[4][5] = 2.0e3 / my_time[3][5];
  my_time[5][5] = total / Newcray;

  matgen((REAL *) aa, ldaa, n, b, &norma);
  t1 = second();
  dgefa((REAL *) aa, ldaa, n, ipvt, &info);
  my_time[0][6] = second() - t1;
  t1 = second();
  dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
  my_time[1][6] = second() - t1;
  total = my_time[0][6] + my_time[1][6];
  my_time[2][6] = total;
  my_time[3][6] = ops / (1.0e3 * total);
  my_time[4][6] = 2.0e3 / my_time[3][6];
  my_time[5][6] = total / Newcray;

  ntimes = NTIMES;
  tm2 = 0;
  t1 = second();
  for(i = 0; i < ntimes; i++) {
    tm = second();
    matgen((REAL *) aa, ldaa, n, b, &norma);
    tm2 = tm2 + second() - tm;
    dgefa((REAL *) aa, ldaa, n, ipvt, &info);
  }
  my_time[0][7] = (second() - t1 - tm2) / ntimes;
  t1 = second();
  for(i = 0; i < ntimes; i++) {
    dgesl((REAL *) aa, ldaa, n, ipvt, b, 0);
  }
  my_time[1][7] = (second() - t1) / ntimes;
  total = my_time[0][7] + my_time[1][7];

  my_time[2][7] = total;
  my_time[3][7] = ops / (1.0e3 * total);
  my_time[4][7] = 2.0e3 / my_time[3][7];
  my_time[5][7] = total / Newcray;

  /* the following code sequence implements the semantics of the Fortran
     intrinsics "nint(min(my_time[3][3],my_time[3][7]))" */

  kf = (my_time[3][3] < my_time[3][7]) ? my_time[3][3] : my_time[3][7];
  kf = (kf > ZERO) ? (kf + .5) : (kf - .5);
  if(fabs((double) kf) < ONE)
    kflops = 0;
  else {
    kflops = (int) fabs((double) kf);
    if(kf < ZERO)
      kflops = -kflops;
  }

  return kflops;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void matgen ( ,
int  lda,
int  n,
,
norma 
) [static]

Definition at line 122 of file kflops.c.

{
  int init, i, j;

  init = 1325;
  *norma = 0.0;
  for(j = 0; j < n; j++) {
    for(i = 0; i < n; i++) {
      init = 3125 * init % 65536;
      a[lda * j + i] = (init - 32768.0) / 16384.0;
      *norma = (a[lda * j + i] > *norma) ? a[lda * j + i] : *norma;
    }
  }
  for(i = 0; i < n; i++) {
    b[i] = 0.0;
  }
  for(j = 0; j < n; j++) {
    for(i = 0; i < n; i++) {
      b[i] = b[i] + a[lda * j + i];
    }
  }
}

Here is the caller graph for this function:

static REAL second (  )  [static]

Definition at line 659 of file kflops.c.

{
  REAL t;

#ifndef MPP                     /* Workstations */


#ifndef NORUSAGE
  getrusage(RUSAGE_SELF, &ru);
#else                           /* HPs */
  if(ClockTick == 0.0)
    ClockTick = (REAL) sysconf(_SC_CLK_TCK);
  if(times(&ru) == -1)
    fprintf(stderr, "second() : Oups\n");
#endif


#ifndef NORUSAGE
  t = (REAL) (ru.ru_utime.tv_sec) + ((REAL) (ru.ru_utime.tv_usec)) / 1.0e6;
#else
  t = (REAL) ((REAL) ru.tms_utime / ClockTick);
#endif
#else                           /* MPPs !! */

#endif                          /* MPP */

  return t;
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

REAL my_time[9][9] [static]

Definition at line 48 of file kflops.c.

struct rusage ru

Definition at line 40 of file kflops.c.