MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
auxiliary.h File Reference
#include "magma_types.h"
Include dependency graph for auxiliary.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  magma_timestr_s
 

Typedefs

typedef struct magma_timestr_s magma_timestr_t
 

Functions

magma_timestr_t get_current_time (void)
 
double GetTimerValue (magma_timestr_t time_1, magma_timestr_t time_2)
 
real_Double_t magma_wtime (void)
 
real_Double_t magma_sync_wtime (magma_queue_t queue)
 
size_t magma_strlcpy (char *dst, const char *src, size_t siz)
 
magma_int_t magma_num_gpus (void)
 
double magma_cabs (magmaDoubleComplex x)
 
float magma_cabsf (magmaFloatComplex x)
 
magma_int_t magma_is_devptr (const void *A)
 
magma_int_t magma_buildconnection_mgpu (magma_int_t gnode[MagmaMaxGPUs+2][MagmaMaxGPUs+2], magma_int_t *nbcmplx, magma_int_t ngpu)
 
void magma_indices_1D_bcyclic (magma_int_t nb, magma_int_t ngpu, magma_int_t dev, magma_int_t j0, magma_int_t j1, magma_int_t *dj0, magma_int_t *dj1)
 
void magma_print_devices ()
 
void swp2pswp (magma_trans_t trans, magma_int_t n, magma_int_t *ipiv, magma_int_t *newipiv)
 

Typedef Documentation

Function Documentation

magma_timestr_t get_current_time ( void  )

Definition at line 76 of file timer.cpp.

77 {
78  struct timeval time_val;
79  magma_timestr_t time;
80 
81  cudaDeviceSynchronize();
82  gettimeofday(&time_val, NULL);
83 
84  time.sec = time_val.tv_sec;
85  time.usec = time_val.tv_usec;
86  return (time);
87 }
double GetTimerValue ( magma_timestr_t  time_1,
magma_timestr_t  time_2 
)

Definition at line 94 of file timer.cpp.

References magma_timestr_s::sec, and magma_timestr_s::usec.

95 {
96  int sec, usec;
97 
98  sec = end.sec - start.sec;
99  usec = end.usec - start.usec;
100 
101  return (1000.*(double)(sec) + (double)(usec) * 0.001);
102 }
magma_int_t magma_buildconnection_mgpu ( magma_int_t  gnode[MagmaMaxGPUs+2][MagmaMaxGPUs+2],
magma_int_t nbcmplx,
magma_int_t  ngpu 
)

Definition at line 16 of file connection_mgpu.cpp.

References magma_setdevice(), and MagmaMaxGPUs.

17 {
18  magma_int_t *deviceid = (magma_int_t *) malloc(ngpu*sizeof(magma_int_t));
19  memset(deviceid,0,ngpu*sizeof(magma_int_t));
20 
21  nbcmplx[0] =0;
22 
23 
24  //printf(" Initializing....\n\n");
25  //printf(" This machine has %d GPU\n",ngpu);
26 
27  //printf(" cudaSuccess %d, cudaErrorInvalidDevice %d, cudaErrorPeerAccessAlreadyEnabled %d, cudaErrorInvalidValue %d \n", cudaSuccess, cudaErrorInvalidDevice,cudaErrorPeerAccessAlreadyEnabled, cudaErrorInvalidValue );
28 
29  int samecomplex=-1;
30  cudaError_t err,scerr;
31  cudaDeviceProp prop;
32 
33  magma_int_t cmplxnb = 0;
34  magma_int_t cmplxid = 0;
35  magma_int_t lcgpunb = 0;
36  for( magma_int_t d = 0; d < ngpu; ++d ) {
37  // check for unified memory & enable peer memory access between all GPUs.
38  magma_setdevice( d );
39  cudaGetDeviceProperties( &prop, d );
40  if ( ! prop.unifiedAddressing ) {
41  printf( "device %d doesn't support unified addressing\n", (int) d );
42  return -1;
43  }
44  // add this device to the list if not added yet.
45  // not added yet meaning belong to a new complex
46  if(deviceid[d]==0){
47  cmplxnb = cmplxnb+1;
48  cmplxid = cmplxnb-1;
49  gnode[cmplxid][MagmaMaxGPUs] = 1;
50  lcgpunb = gnode[cmplxid][MagmaMaxGPUs]-1;
51  gnode[cmplxid][lcgpunb] = d;
52  deviceid[d]=-1;
53  }
54  //printf("DEVICE %d : \n",d);
55 
56  for( magma_int_t d2 = d+1; d2 < ngpu; ++d2 ) {
57  // check for unified memory & enable peer memory access between all GPUs.
58  magma_setdevice( d2 );
59  cudaGetDeviceProperties( &prop, d2 );
60  if ( ! prop.unifiedAddressing ) {
61  printf( "device %d doesn't support unified addressing\n", (int) d2 );
62  return -1;
63  }
64 
65  scerr = cudaDeviceCanAccessPeer(&samecomplex,d,d2);
66 
67  //printf(" device %d and device %d have samecomplex= %d\n",d,d2,samecomplex);
68  if(samecomplex==1){
69  // d and d2 are on the same complex so add them, note that d is already added
70  // so just enable the peer Access for d and enable+add d2.
71  // FOR d:
72  magma_setdevice( d );
73  err = cudaDeviceEnablePeerAccess( d2, 0 );
74  //printf("enabling devide %d ==> %d error %d\n",d,d2,err);
75  if ( err != cudaSuccess && err != cudaErrorPeerAccessAlreadyEnabled ) {
76  printf( "device %d cudaDeviceEnablePeerAccess error %d\n", (int) d2, (int) err );
77  return -2;
78  }
79 
80  // FOR d2:
81  magma_setdevice( d2 );
82  err = cudaDeviceEnablePeerAccess( d, 0 );
83  //printf("enabling devide %d ==> %d error %d\n",d2,d,err);
84  if((err==cudaSuccess)||(err==cudaErrorPeerAccessAlreadyEnabled)){
85  if(deviceid[d2]==0){
86  //printf("adding device %d\n",d2);
87  gnode[cmplxid][MagmaMaxGPUs] = gnode[cmplxid][MagmaMaxGPUs]+1;
88  lcgpunb = gnode[cmplxid][MagmaMaxGPUs]-1;
89  gnode[cmplxid][lcgpunb] = d2;
90  deviceid[d2]=-1;
91  }
92  }else{
93  printf( "device %d cudaDeviceEnablePeerAccess error %d\n", (int) d, (int) err );
94  return -2;
95  }
96  }
97  }
98  }
99 
100  nbcmplx[0] = cmplxnb;
101  return cmplxnb;
102 }
int magma_int_t
Definition: magmablas.h:12
void magma_setdevice(magma_device_t dev)
#define MagmaMaxGPUs
Definition: magma_types.h:255

Here is the call graph for this function:

Here is the caller graph for this function:

double magma_cabs ( magmaDoubleComplex  x)
float magma_cabsf ( magmaFloatComplex  x)
void magma_indices_1D_bcyclic ( magma_int_t  nb,
magma_int_t  ngpu,
magma_int_t  dev,
magma_int_t  j0,
magma_int_t  j1,
magma_int_t dj0,
magma_int_t dj1 
)

Definition at line 220 of file auxiliary.cpp.

223 {
224  // on GPU jdev, which contains j0, dj0 maps to j0.
225  // on other GPUs, dj0 is start of the block on that GPU after j0's block.
226  magma_int_t jblock = (j0 / nb) / ngpu;
227  magma_int_t jdev = (j0 / nb) % ngpu;
228  if ( dev < jdev ) {
229  jblock += 1;
230  }
231  *dj0 = jblock*nb;
232  if ( dev == jdev ) {
233  *dj0 += (j0 % nb);
234  }
235 
236  // on GPU jdev, which contains j1-1, dj1 maps to j1.
237  // on other GPUs, dj1 is end of the block on that GPU before j1's block.
238  // j1 points to element after end (e.g., n), so subtract 1 to get last
239  // element, compute index, then add 1 to get just after that index again.
240  j1 -= 1;
241  jblock = (j1 / nb) / ngpu;
242  jdev = (j1 / nb) % ngpu;
243  if ( dev > jdev ) {
244  jblock -= 1;
245  }
246  if ( dev == jdev ) {
247  *dj1 = jblock*nb + (j1 % nb) + 1;
248  }
249  else {
250  *dj1 = jblock*nb + nb;
251  }
252 }
int magma_int_t
Definition: magmablas.h:12

Here is the caller graph for this function:

magma_int_t magma_is_devptr ( const void *  A)

Definition at line 47 of file auxiliary.cpp.

48 {
49  cudaError_t err;
50  cudaDeviceProp prop;
51  cudaPointerAttributes attr;
52  int dev;
53  err = cudaGetDevice( &dev );
54  if ( ! err ) {
55  err = cudaGetDeviceProperties( &prop, dev );
56  if ( ! err && prop.unifiedAddressing ) {
57  // I think the cudaPointerGetAttributes prototype is wrong, missing const (mgates)
58  err = cudaPointerGetAttributes( &attr, const_cast<void*>( A ));
59  if ( ! err ) {
60  // definitely know type
61  return (attr.memoryType == cudaMemoryTypeDevice);
62  }
63  else if ( err == cudaErrorInvalidValue ) {
64  // clear error; see http://icl.cs.utk.edu/magma/forum/viewtopic.php?f=2&t=529
65  cudaGetLastError();
66  // infer as host pointer
67  return 0;
68  }
69  }
70  }
71  // clear error
72  cudaGetLastError();
73  // unknown, e.g., device doesn't support unified addressing
74  return -1;
75 }
#define A(i, j)
Definition: cprint.cpp:16

Here is the caller graph for this function:

magma_int_t magma_num_gpus ( void  )

Definition at line 83 of file auxiliary.cpp.

References MagmaMaxGPUs, and min.

84 {
85  const char *ngpu_str = getenv("MAGMA_NUM_GPUS");
86  int ngpu = 1;
87  if ( ngpu_str != NULL ) {
88  char* endptr;
89  ngpu = strtol( ngpu_str, &endptr, 10 );
90  int ndevices;
91  cudaGetDeviceCount( &ndevices );
92  // if *endptr == '\0' then entire string was valid number (or empty)
93  if ( ngpu < 1 || *endptr != '\0' ) {
94  ngpu = 1;
95  fprintf( stderr, "$MAGMA_NUM_GPUS=%s is an invalid number; using %d GPU.\n",
96  ngpu_str, ngpu );
97  }
98  else if ( ngpu > MagmaMaxGPUs || ngpu > ndevices ) {
99  ngpu = min( ndevices, MagmaMaxGPUs );
100  fprintf( stderr, "$MAGMA_NUM_GPUS=%s exceeds MagmaMaxGPUs=%d or available GPUs=%d; using %d GPUs.\n",
101  ngpu_str, MagmaMaxGPUs, ndevices, ngpu );
102  }
103  assert( 1 <= ngpu && ngpu <= ndevices );
104  }
105  return ngpu;
106 }
#define min(a, b)
Definition: common_magma.h:86
#define MagmaMaxGPUs
Definition: magma_types.h:255

Here is the caller graph for this function:

void magma_print_devices ( )
size_t magma_strlcpy ( char *  dst,
const char *  src,
size_t  siz 
)

Definition at line 47 of file strlcpy.cpp.

48 {
49  char *d = dst;
50  const char *s = src;
51  size_t n = siz;
52 
53  /* Copy as many bytes as will fit */
54  if (n != 0) {
55  while (--n != 0) {
56  if ((*d++ = *s++) == '\0')
57  break;
58  }
59  }
60 
61  /* Not enough room in dst, add NUL and traverse rest of src */
62  if (n == 0) {
63  if (siz != 0)
64  *d = '\0'; /* NUL-terminate dst */
65  while (*s++)
66  ;
67  }
68 
69  return (s - src - 1); /* count does not include NUL */
70 }
real_Double_t magma_sync_wtime ( magma_queue_t  queue)

Definition at line 119 of file timer.cpp.

References magma_queue_sync, and magma_wtime().

120 {
121  magma_queue_sync( queue );
122  return magma_wtime();
123 }
double magma_wtime(void)
Definition: timer.cpp:110
#define magma_queue_sync(queue)
Definition: magma.h:119

Here is the call graph for this function:

Here is the caller graph for this function:

real_Double_t magma_wtime ( void  )

Definition at line 110 of file timer.cpp.

111 {
112  struct timeval t;
113  gettimeofday( &t, NULL );
114  return t.tv_sec + t.tv_usec*1e-6;
115 }

Here is the caller graph for this function:

void swp2pswp ( magma_trans_t  trans,
magma_int_t  n,
magma_int_t ipiv,
magma_int_t newipiv 
)

Definition at line 117 of file auxiliary.cpp.

References lapackf77_lsame.

118 {
119  magma_int_t i, newind, ind;
120  magma_int_t notran = (trans == 'N' || trans == 'n');
121 
122  for(i=0; i<n; i++)
123  newipiv[i] = -1;
124 
125  if (notran){
126  for(i=0; i<n; i++){
127  newind = ipiv[i] - 1;
128  if (newipiv[newind] == -1) {
129  if (newipiv[i]==-1){
130  newipiv[i] = newind;
131  if (newind>i)
132  newipiv[newind]= i;
133  }
134  else
135  {
136  ind = newipiv[i];
137  newipiv[i] = newind;
138  if (newind>i)
139  newipiv[newind]= ind;
140  }
141  }
142  else {
143  if (newipiv[i]==-1){
144  if (newind>i){
145  ind = newipiv[newind];
146  newipiv[newind] = i;
147  newipiv[i] = ind;
148  }
149  else
150  newipiv[i] = newipiv[newind];
151  }
152  else{
153  ind = newipiv[i];
154  newipiv[i] = newipiv[newind];
155  if (newind > i)
156  newipiv[newind] = ind;
157  }
158  }
159  }
160  } else {
161  for(i=n-1; i>=0; i--){
162  newind = ipiv[i] - 1;
163  if (newipiv[newind] == -1) {
164  if (newipiv[i]==-1){
165  newipiv[i] = newind;
166  if (newind>i)
167  newipiv[newind]= i;
168  }
169  else
170  {
171  ind = newipiv[i];
172  newipiv[i] = newind;
173  if (newind>i)
174  newipiv[newind]= ind;
175  }
176  }
177  else {
178  if (newipiv[i]==-1){
179  if (newind>i){
180  ind = newipiv[newind];
181  newipiv[newind] = i;
182  newipiv[i] = ind;
183  }
184  else
185  newipiv[i] = newipiv[newind];
186  }
187  else{
188  ind = newipiv[i];
189  newipiv[i] = newipiv[newind];
190  if (newind > i)
191  newipiv[newind] = ind;
192  }
193  }
194  }
195  }
196 }
int magma_int_t
Definition: magmablas.h:12

Here is the caller graph for this function: