MAGMA  magma-1.4.0
Matrix Algebra on GPU and Multicore Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
auxiliary.cpp
Go to the documentation of this file.
1 /*
2  -- MAGMA (version 1.4.0) --
3  Univ. of Tennessee, Knoxville
4  Univ. of California, Berkeley
5  Univ. of Colorado, Denver
6  August 2013
7 */
8 
9 #include "common_magma.h"
10 
11 #if defined( _WIN32 ) || defined( _WIN64 )
12 
13 # include <time.h>
14 # include <sys/timeb.h>
15 # if defined(_MSC_VER) || defined(_MSC_EXTENSIONS)
16 # define DELTA_EPOCH_IN_MICROSECS 11644473600000000Ui64
17 # else
18 # define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
19 # endif
20 
21 #else
22 
23 # include <sys/time.h>
24 
25 #endif
26 
27 #if defined(ADD_)
28 # define magma_gettime_f magma_gettime_f_
29 # define magma_gettimervalue_f magma_gettimervalue_f_
30 #elif defined(NOCHANGE)
31 #endif
32 
33 /* ////////////////////////////////////////////////////////////////////////////
34  -- Get current time
35 */
36 #if defined( _WIN32 ) || defined( _WIN64 )
37 struct timezone
38 {
39  int tz_minuteswest; /* minutes W of Greenwich */
40  int tz_dsttime; /* type of dst correction */
41 };
42 
43 extern "C"
44 int gettimeofday(struct timeval* tv, struct timezone* tz)
45 {
46  FILETIME ft;
47  unsigned __int64 tmpres = 0;
48  static int tzflag;
49 
50  if (NULL != tv)
51  {
52  GetSystemTimeAsFileTime(&ft);
53  tmpres |= ft.dwHighDateTime;
54  tmpres <<= 32;
55  tmpres |= ft.dwLowDateTime;
56 
57  /*converting file time to unix epoch*/
58  tmpres /= 10; /*convert into microseconds*/
59  tmpres -= DELTA_EPOCH_IN_MICROSECS;
60 
61  tv->tv_sec = (long)(tmpres / 1000000UL);
62  tv->tv_usec = (long)(tmpres % 1000000UL);
63  }
64  if (NULL != tz)
65  {
66  if (!tzflag)
67  {
68  _tzset();
69  tzflag++;
70  }
71  tz->tz_minuteswest = _timezone / 60;
72  tz->tz_dsttime = _daylight;
73  }
74  return 0;
75 }
76 #endif
77 
78 extern "C"
80 {
81  static struct timeval time_val;
82 
83  magma_timestr_t time;
84 
85  cudaDeviceSynchronize();
86  gettimeofday(&time_val, NULL);
87 
88  time.sec = time_val.tv_sec;
89  time.usec = time_val.tv_usec;
90  return (time);
91 }
92 
93 extern "C"
94 void magma_gettime_f(unsigned int *time) {
96  time[0] = tmp.sec;
97  time[1] = tmp.usec;
98 }
99 
100 /* ////////////////////////////////////////////////////////////////////////////
101  -- End elapsed time
102 */
103 extern "C"
105 {
106  int sec, usec;
107 
108  sec = time_2.sec - time_1.sec;
109  usec = time_2.usec - time_1.usec;
110 
111  return (1000.*(double)(sec) + (double)(usec) * 0.001);
112 }
113 
114 extern "C"
115 void magma_gettimervalue_f(unsigned int *start, unsigned int *end, double *result) {
116  magma_timestr_t time1, time2;
117  time1.sec = start[0];
118  time1.usec = start[1];
119  time2.sec = end[0];
120  time2.usec = end[1];
121  *result = GetTimerValue(time1, time2);
122 }
123 
124 /* ////////////////////////////////////////////////////////////////////////////
125  -- Print the available GPU devices
126 */
127 extern "C"
129 {
130  int ndevices;
131  cuDeviceGetCount( &ndevices );
132  for( int idevice = 0; idevice < ndevices; idevice++ )
133  {
134  char name[200];
135 #if CUDA_VERSION > 3010
136  size_t totalMem;
137 #else
138  unsigned int totalMem;
139 #endif
140 
141  int clock;
142  CUdevice dev;
143 
144  cuDeviceGet( &dev, idevice );
145  cuDeviceGetName( name, sizeof(name), dev );
146  cuDeviceTotalMem( &totalMem, dev );
147  cuDeviceGetAttribute( &clock,
148  CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev );
149  printf( "device %d: %s, %.1f MHz clock, %.1f MB memory\n",
150  idevice, name, clock/1000.f, totalMem/1024.f/1024.f );
151  }
152 }
153 
154 
155 /* ////////////////////////////////////////////////////////////////////////////
156  -- Put 0s in the upper triangular part of a panel (and 1s on the diagonal)
157  if uplo is 'U'/'u', or 0s in the lower triangular part of a panel (and
158  1s on the diagonal) if uplo is 'L'/'l'.
159  This is auxiliary function used in geqrf and geqlf.
160 */
161 extern "C"
162 void spanel_to_q(char uplo, int ib, float *a, int lda, float *work){
163  int i, j, k = 0;
164  float *col;
165 
166  if (uplo == 'U' || uplo == 'u'){
167  for(i=0; i<ib; i++){
168  col = a + i*lda;
169  for(j=0; j<i; j++){
170  work[k++] = col[j];
171  col[j] = 0.;
172  }
173  work[k++] = col[i];
174  col[j] = 1.;
175  }
176  }
177  else {
178  for(i=0; i<ib; i++){
179  col = a + i*lda;
180  work[k++] = col[i];
181  col[i] = 1.;
182  for(j=i+1; j<ib; j++){
183  work[k++] = col[j];
184  col[j] = 0.;
185  }
186  }
187  }
188 }
189 
190 /* ////////////////////////////////////////////////////////////////////////////
191  -- Restores a panel (after call to "panel_to_q").
192  This isauxiliary function usedin geqrf and geqlf.
193 */
194 extern "C"
195 void sq_to_panel(char uplo, int ib, float *a, int lda, float *work){
196  int i, j, k = 0;
197  float *col;
198 
199  if (uplo == 'U' || uplo == 'u'){
200  for(i=0; i<ib; i++){
201  col = a + i*lda;
202  for(j=0; j<=i; j++)
203  col[j] = work[k++];
204  }
205  }
206  else {
207  for(i=0; i<ib; i++){
208  col = a + i*lda;
209  for(j=i; j<ib; j++)
210  col[j] = work[k++];
211  }
212  }
213 }
214 
215 /* ////////////////////////////////////////////////////////////////////////////
216  -- Put 0s in the upper triangular part of a panel (and 1s on the diagonal)
217 */
218 extern "C"
219 void cpanel_to_q(char uplo, int ib, float2 *a, int lda, float2 *work){
220  int i, j, k = 0;
221  float2 *col;
222 
223  if (uplo == 'U' || uplo == 'u'){
224  for(i=0; i<ib; i++){
225  col = a + i*lda;
226  for(j=0; j<i; j++){
227  work[k ].x = col[j].x;
228  work[k++].y = col[j].y;
229  col[j].x = col[j].y = 0.;
230  }
231  work[k ].x = col[i].x;
232  work[k++].y = col[i].y;
233  col[j].x = 1.;
234  col[j].y = 0.;
235  }
236  }
237  else {
238  for(i=0; i<ib; i++){
239  col = a + i*lda;
240  work[k ].x = col[i].x;
241  work[k++].y = col[i].y;
242  col[i].x = 1.;
243  col[i].y = 0.;
244  for(j=i+1; j<ib; j++){
245  work[k ].x = col[j].x;
246  work[k++].y = col[j].y;
247  col[j].x = col[j].y = 0.;
248  }
249  }
250  }
251 }
252 
253 /* ////////////////////////////////////////////////////////////////////////////
254  -- Restores a panel (after call to "panel_to_q")
255 */
256 extern "C"
257 void cq_to_panel(char uplo, int ib, float2 *a, int lda, float2 *work){
258  int i, j, k = 0;
259  float2 *col;
260 
261  if (uplo == 'U' || uplo == 'u'){
262  for(i=0; i<ib; i++){
263  col = a + i*lda;
264  for(j=0; j<=i; j++){
265  col[j].x = work[k ].x;
266  col[j].y = work[k++].y;
267  }
268  }
269  }
270  else {
271  for(i=0; i<ib; i++){
272  col = a + i*lda;
273  for(j=i; j<ib; j++){
274  col[j].x = work[k ].x;
275  col[j].y = work[k++].y;
276  }
277  }
278  }
279 }
280 
281 /* ////////////////////////////////////////////////////////////////////////////
282  -- Put 0s in the upper triangular part of a panel (and 1s on the diagonal)
283 */
284 extern "C"
285 void dpanel_to_q(char uplo, int ib, double *a, int lda, double *work){
286  int i, j, k = 0;
287  double *col;
288 
289  if (uplo == 'U' || uplo == 'u'){
290  for(i=0; i<ib; i++){
291  col = a + i*lda;
292  for(j=0; j<i; j++){
293  work[k++] = col[j];
294  col[j] = 0.;
295  }
296  work[k++] = col[i];
297  col[j] = 1.;
298  }
299  }
300  else {
301  for(i=0; i<ib; i++){
302  col = a + i*lda;
303  work[k++] = col[i];
304  col[i] = 1.;
305  for(j=i+1; j<ib; j++){
306  work[k++] = col[j];
307  col[j] = 0.;
308  }
309  }
310  }
311 }
312 
313 /* ////////////////////////////////////////////////////////////////////////////
314  -- Restores a panel (after call to "panel_to_q")
315 */
316 extern "C"
317 void dq_to_panel(char uplo, int ib, double *a, int lda, double *work){
318  int i, j, k = 0;
319  double *col;
320 
321  if (uplo == 'U' || uplo == 'u'){
322  for(i=0; i<ib; i++){
323  col = a + i*lda;
324  for(j=0; j<=i; j++)
325  col[j] = work[k++];
326  }
327  }
328  else {
329  for(i=0; i<ib; i++){
330  col = a + i*lda;
331  for(j=i; j<ib; j++)
332  col[j] = work[k++];
333  }
334  }
335 }
336 
337 /* ////////////////////////////////////////////////////////////////////////////
338  -- Put 0s in the upper triangular part of a panel (and 1s on the diagonal)
339 */
340 extern "C"
341 void zpanel_to_q(char uplo, int ib, cuDoubleComplex *a, int lda, cuDoubleComplex *work){
342  int i, j, k = 0;
343  cuDoubleComplex *col;
344 
345  if (uplo == 'U' || uplo == 'u'){
346  for(i=0; i<ib; i++){
347  col = a + i*lda;
348  for(j=0; j<i; j++){
349  work[k ].x = col[j].x;
350  work[k++].y = col[j].y;
351  col[j].x = col[j].y = 0.;
352  }
353  work[k ].x = col[i].x;
354  work[k++].y = col[i].y;
355  col[j].x = 1.;
356  col[j].y = 0.;
357  }
358  }
359  else {
360  for(i=0; i<ib; i++){
361  col = a + i*lda;
362  work[k ].x = col[i].x;
363  work[k++].y = col[i].y;
364  col[i].x = 1.;
365  col[i].y = 0.;
366  for(j=i+1; j<ib; j++){
367  work[k ].x = col[j].x;
368  work[k++].y = col[j].y;
369  col[j].x = col[j].y = 0.;
370  }
371  }
372  }
373 }
374 
375 /* ////////////////////////////////////////////////////////////////////////////
376  -- Restores a panel (after call to "panel_to_q")
377 */
378 extern "C"
379 void zq_to_panel(char uplo, int ib, cuDoubleComplex *a, int lda, cuDoubleComplex *work){
380  int i, j, k = 0;
381  cuDoubleComplex *col;
382 
383  if (uplo == 'U' || uplo == 'u'){
384  for(i=0; i<ib; i++){
385  col = a + i*lda;
386  for(j=0; j<=i; j++){
387  col[j].x = work[k ].x;
388  col[j].y = work[k++].y;
389  }
390  }
391  }
392  else {
393  for(i=0; i<ib; i++){
394  col = a + i*lda;
395  for(j=i; j<ib; j++){
396  col[j].x = work[k ].x;
397  col[j].y = work[k++].y;
398  }
399  }
400  }
401 }
402 
403 /* ////////////////////////////////////////////////////////////////////////////
404  -- Auxiliary function: ipiv(i) indicates that row i has been swapped with
405  ipiv(i) from top to bottom. This function rearranges ipiv into newipiv
406  where row i has to be moved to newipiv(i). The new pivoting allows for
407  parallel processing vs the original one assumes a specific ordering and
408  has to be done sequentially.
409 */
410 extern "C"
411 void swp2pswp(char trans, int n, int *ipiv, int *newipiv){
412  int i, newind, ind;
413  char trans_[2] = {trans, 0};
414  long int notran = lapackf77_lsame(trans_, "N");
415 
416  for(i=0; i<n; i++)
417  newipiv[i] = -1;
418 
419  if (notran){
420  for(i=0; i<n; i++){
421  newind = ipiv[i] - 1;
422  if (newipiv[newind] == -1) {
423  if (newipiv[i]==-1){
424  newipiv[i] = newind;
425  if (newind>i)
426  newipiv[newind]= i;
427  }
428  else
429  {
430  ind = newipiv[i];
431  newipiv[i] = newind;
432  if (newind>i)
433  newipiv[newind]= ind;
434  }
435  }
436  else {
437  if (newipiv[i]==-1){
438  if (newind>i){
439  ind = newipiv[newind];
440  newipiv[newind] = i;
441  newipiv[i] = ind;
442  }
443  else
444  newipiv[i] = newipiv[newind];
445  }
446  else{
447  ind = newipiv[i];
448  newipiv[i] = newipiv[newind];
449  if (newind > i)
450  newipiv[newind] = ind;
451  }
452  }
453  }
454  } else {
455  for(i=n-1; i>=0; i--){
456  newind = ipiv[i] - 1;
457  if (newipiv[newind] == -1) {
458  if (newipiv[i]==-1){
459  newipiv[i] = newind;
460  if (newind>i)
461  newipiv[newind]= i;
462  }
463  else
464  {
465  ind = newipiv[i];
466  newipiv[i] = newind;
467  if (newind>i)
468  newipiv[newind]= ind;
469  }
470  }
471  else {
472  if (newipiv[i]==-1){
473  if (newind>i){
474  ind = newipiv[newind];
475  newipiv[newind] = i;
476  newipiv[i] = ind;
477  }
478  else
479  newipiv[i] = newipiv[newind];
480  }
481  else{
482  ind = newipiv[i];
483  newipiv[i] = newipiv[newind];
484  if (newind > i)
485  newipiv[newind] = ind;
486  }
487  }
488  }
489  }
490 }
491 
492 /* ////////////////////////////////////////////////////////////////////////////
493  -- Auxiliary function: used for debugging. Given a pointer to floating
494  point number on the GPU memory, the function returns the value
495  at that location.
496 */
497 extern "C"
498 float getv(float *da){
499  float res[1];
500  cublasGetVector(1, sizeof(float), da, 1, res, 1);
501  return res[0];
502 }
503 
504 /* ////////////////////////////////////////////////////////////////////////////
505  -- Auxiliary function sp_cat
506 */
507 extern "C"
508 int sp_cat(char *lp, char *rpp[], magma_int_t *rnp, magma_int_t*np, magma_int_t ll)
509 {
510  magma_int_t i, n, nc;
511  char *f__rp;
512 
513  n = (int)*np;
514  for(i = 0 ; i < n ; ++i)
515  {
516  nc = ll;
517  if(rnp[i] < nc)
518  nc = rnp[i];
519  ll -= nc;
520  f__rp = rpp[i];
521  while(--nc >= 0)
522  *lp++ = *f__rp++;
523  }
524  while(--ll >= 0)
525  *lp++ = ' ';
526 
527  return 0;
528 }
void zq_to_panel(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:57
void magma_gettime_f(unsigned int *time)
Definition: auxiliary.cpp:94
void sq_to_panel(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:57
int magma_int_t
Definition: magmablas.h:12
unsigned int usec
Definition: auxiliary.h:15
void cq_to_panel(char uplo, magma_int_t ib, magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *work)
Definition: cpanel_to_q.cpp:57
void cpanel_to_q(char uplo, magma_int_t ib, magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *work)
Definition: cpanel_to_q.cpp:17
void swp2pswp(magma_trans_t trans, magma_int_t n, magma_int_t *ipiv, magma_int_t *newipiv)
Definition: auxiliary.cpp:117
unsigned int sec
Definition: auxiliary.h:14
void magma_gettimervalue_f(unsigned int *start, unsigned int *end, double *result)
Definition: auxiliary.cpp:115
int sp_cat(char *lp, char *rpp[], magma_int_t *rnp, magma_int_t *np, magma_int_t ll)
Definition: auxiliary.cpp:508
#define lapackf77_lsame
Definition: magma_lapack.h:23
void zpanel_to_q(char uplo, magma_int_t ib, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *work)
Definition: zpanel_to_q.cpp:17
void spanel_to_q(char uplo, magma_int_t ib, float *A, magma_int_t lda, float *work)
Definition: spanel_to_q.cpp:17
void dpanel_to_q(char uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
Definition: dpanel_to_q.cpp:17
void dq_to_panel(char uplo, magma_int_t ib, double *A, magma_int_t lda, double *work)
Definition: dpanel_to_q.cpp:57
float getv(float *da)
Definition: auxiliary.cpp:498
double GetTimerValue(magma_timestr_t start, magma_timestr_t end)
Definition: timer.cpp:94
magma_timestr_t get_current_time(void)
Definition: timer.cpp:76
void printout_devices()
Definition: auxiliary.cpp:128