MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
auxiliary.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 */
8 
9 #include "common_magma.h"
10 #include <assert.h>
11 
12 
13 #if defined( _WIN32 ) || defined( _WIN64 )
14 
15 // -------------------------
16 // Return log base 2 of x, per C99 standard. Not provided by Microsoft.
17 double log2( double x )
18 {
19  const double log_2 = 0.6931471805599453;
20  return log( x ) / log_2;
21 }
22 
23 #endif
24 
25 
26 // -------------------------
27 // Returns version of MAGMA, as defined by
28 // MAGMA_VERSION_MAJOR, MAGMA_VERSION_MINOR, MAGMA_VERSION_MICRO constants.
29 void magma_version( int* major, int* minor, int* micro )
30 {
31  if ( major != NULL && minor != NULL && micro != NULL ) {
32  *major = MAGMA_VERSION_MAJOR;
33  *minor = MAGMA_VERSION_MINOR;
34  *micro = MAGMA_VERSION_MICRO;
35  }
36 }
37 
38 
39 // -------------------------
40 // Returns:
41 // 1 if A is a device pointer (definitely),
42 // 0 if A is a host pointer (definitely or inferred from error),
43 // -1 if unknown.
44 // On 2.0 cards with unified addressing, CUDA can tell if this is a device pointer.
45 // For malloc'd host pointers, cudaPointerGetAttributes returns error.
46 // @author Mark Gates
48 {
49  cudaError_t err;
50  cudaDeviceProp prop;
51  cudaPointerAttributes attr;
52  int dev;
53  err = cudaGetDevice( &dev );
54  if ( ! err ) {
55  err = cudaGetDeviceProperties( &prop, dev );
56  if ( ! err && prop.unifiedAddressing ) {
57  // I think the cudaPointerGetAttributes prototype is wrong, missing const (mgates)
58  err = cudaPointerGetAttributes( &attr, const_cast<void*>( A ));
59  if ( ! err ) {
60  // definitely know type
61  return (attr.memoryType == cudaMemoryTypeDevice);
62  }
63  else if ( err == cudaErrorInvalidValue ) {
64  // clear error; see http://icl.cs.utk.edu/magma/forum/viewtopic.php?f=2&t=529
65  cudaGetLastError();
66  // infer as host pointer
67  return 0;
68  }
69  }
70  }
71  // clear error
72  cudaGetLastError();
73  // unknown, e.g., device doesn't support unified addressing
74  return -1;
75 }
76 
77 
78 /* ////////////////////////////////////////////////////////////////////////////
79  -- Get number of GPUs to use from $MAGMA_NUM_GPUS environment variable.
80  @author Mark Gates
81 */
82 extern "C"
84 {
85  const char *ngpu_str = getenv("MAGMA_NUM_GPUS");
86  int ngpu = 1;
87  if ( ngpu_str != NULL ) {
88  char* endptr;
89  ngpu = strtol( ngpu_str, &endptr, 10 );
90  int ndevices;
91  cudaGetDeviceCount( &ndevices );
92  // if *endptr == '\0' then entire string was valid number (or empty)
93  if ( ngpu < 1 || *endptr != '\0' ) {
94  ngpu = 1;
95  fprintf( stderr, "$MAGMA_NUM_GPUS=%s is an invalid number; using %d GPU.\n",
96  ngpu_str, ngpu );
97  }
98  else if ( ngpu > MagmaMaxGPUs || ngpu > ndevices ) {
99  ngpu = min( ndevices, MagmaMaxGPUs );
100  fprintf( stderr, "$MAGMA_NUM_GPUS=%s exceeds MagmaMaxGPUs=%d or available GPUs=%d; using %d GPUs.\n",
101  ngpu_str, MagmaMaxGPUs, ndevices, ngpu );
102  }
103  assert( 1 <= ngpu && ngpu <= ndevices );
104  }
105  return ngpu;
106 }
107 
108 
109 /* ////////////////////////////////////////////////////////////////////////////
110  -- Auxiliary function: ipiv(i) indicates that row i has been swapped with
111  ipiv(i) from top to bottom. This function rearranges ipiv into newipiv
112  where row i has to be moved to newipiv(i). The new pivoting allows for
113  parallel processing vs the original one assumes a specific ordering and
114  has to be done sequentially.
115 */
116 extern "C"
117 void swp2pswp( magma_trans_t trans, magma_int_t n, magma_int_t *ipiv, magma_int_t *newipiv)
118 {
119  magma_int_t i, newind, ind;
120  magma_int_t notran = (trans == 'N' || trans == 'n');
121 
122  for(i=0; i<n; i++)
123  newipiv[i] = -1;
124 
125  if (notran){
126  for(i=0; i<n; i++){
127  newind = ipiv[i] - 1;
128  if (newipiv[newind] == -1) {
129  if (newipiv[i]==-1){
130  newipiv[i] = newind;
131  if (newind>i)
132  newipiv[newind]= i;
133  }
134  else
135  {
136  ind = newipiv[i];
137  newipiv[i] = newind;
138  if (newind>i)
139  newipiv[newind]= ind;
140  }
141  }
142  else {
143  if (newipiv[i]==-1){
144  if (newind>i){
145  ind = newipiv[newind];
146  newipiv[newind] = i;
147  newipiv[i] = ind;
148  }
149  else
150  newipiv[i] = newipiv[newind];
151  }
152  else{
153  ind = newipiv[i];
154  newipiv[i] = newipiv[newind];
155  if (newind > i)
156  newipiv[newind] = ind;
157  }
158  }
159  }
160  } else {
161  for(i=n-1; i>=0; i--){
162  newind = ipiv[i] - 1;
163  if (newipiv[newind] == -1) {
164  if (newipiv[i]==-1){
165  newipiv[i] = newind;
166  if (newind>i)
167  newipiv[newind]= i;
168  }
169  else
170  {
171  ind = newipiv[i];
172  newipiv[i] = newind;
173  if (newind>i)
174  newipiv[newind]= ind;
175  }
176  }
177  else {
178  if (newipiv[i]==-1){
179  if (newind>i){
180  ind = newipiv[newind];
181  newipiv[newind] = i;
182  newipiv[i] = ind;
183  }
184  else
185  newipiv[i] = newipiv[newind];
186  }
187  else{
188  ind = newipiv[i];
189  newipiv[i] = newipiv[newind];
190  if (newind > i)
191  newipiv[newind] = ind;
192  }
193  }
194  }
195  }
196 }
197 
198 // --------------------
199 // Convert global indices [j0, j1) to local indices [dj0, dj1) on GPU dev,
200 // according to 1D block cyclic distribution.
201 // Note j0 and dj0 are inclusive, while j1 and dj1 are exclusive.
202 // This is consistent with the C++ container notion of first and last.
203 //
204 // Example with n = 75, nb = 10, ngpu = 3
205 // local dj: 0- 9, 10-19, 20-29
206 // -------------------------------------------
207 // gpu 0: 2 blocks, cols: 0- 9, 30-39, 60-69
208 // gpu 1: 1+ blocks, cols: 10-19, 40-49, 70-74 (partial)
209 // gpu 2: 1 block , cols: 20-29, 50-59
210 //
211 // j0 = 15, j0dev = 1
212 // j1 = 70, j1dev = 0
213 // gpu 0: dj0 = 10, dj1 = 30
214 // gpu 1: dj0 = 5, dj1 = 20
215 // gpu 2: dj0 = 0, dj1 = 20
216 //
217 // @author Mark Gates
218 
219 extern "C"
221  magma_int_t j0, magma_int_t j1,
222  magma_int_t* dj0, magma_int_t* dj1 )
223 {
224  // on GPU jdev, which contains j0, dj0 maps to j0.
225  // on other GPUs, dj0 is start of the block on that GPU after j0's block.
226  magma_int_t jblock = (j0 / nb) / ngpu;
227  magma_int_t jdev = (j0 / nb) % ngpu;
228  if ( dev < jdev ) {
229  jblock += 1;
230  }
231  *dj0 = jblock*nb;
232  if ( dev == jdev ) {
233  *dj0 += (j0 % nb);
234  }
235 
236  // on GPU jdev, which contains j1-1, dj1 maps to j1.
237  // on other GPUs, dj1 is end of the block on that GPU before j1's block.
238  // j1 points to element after end (e.g., n), so subtract 1 to get last
239  // element, compute index, then add 1 to get just after that index again.
240  j1 -= 1;
241  jblock = (j1 / nb) / ngpu;
242  jdev = (j1 / nb) % ngpu;
243  if ( dev > jdev ) {
244  jblock -= 1;
245  }
246  if ( dev == jdev ) {
247  *dj1 = jblock*nb + (j1 % nb) + 1;
248  }
249  else {
250  *dj1 = jblock*nb + nb;
251  }
252 }
#define min(a, b)
Definition: common_magma.h:86
#define MAGMA_VERSION_MINOR
Definition: magma_types.h:249
magma_int_t magma_num_gpus(void)
Definition: auxiliary.cpp:83
#define MAGMA_VERSION_MICRO
Definition: magma_types.h:250
int magma_int_t
Definition: magmablas.h:12
char magma_trans_t
Definition: magma_types.h:377
void swp2pswp(magma_trans_t trans, magma_int_t n, magma_int_t *ipiv, magma_int_t *newipiv)
Definition: auxiliary.cpp:117
void magma_indices_1D_bcyclic(magma_int_t nb, magma_int_t ngpu, magma_int_t dev, magma_int_t j0, magma_int_t j1, magma_int_t *dj0, magma_int_t *dj1)
Definition: auxiliary.cpp:220
#define MagmaMaxGPUs
Definition: magma_types.h:255
#define MAGMA_VERSION_MAJOR
Definition: magma_types.h:248
#define A(i, j)
Definition: cprint.cpp:16
void magma_version(int *major, int *minor, int *micro)
Definition: auxiliary.cpp:29
magma_int_t magma_is_devptr(const void *A)
Definition: auxiliary.cpp:47