PAPI  5.3.2.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-nvml.c
Go to the documentation of this file.
1 /****************************/
2 /* THIS IS OPEN SOURCE CODE */
3 /****************************/
4 
20 #include <dlfcn.h>
21 
22 #include <stdio.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <inttypes.h>
26 #include <string.h>
27 /* Headers required by PAPI */
28 #include "papi.h"
29 #include "papi_internal.h"
30 #include "papi_vector.h"
31 #include "papi_memory.h"
32 
33 #include "linux-nvml.h"
34 
35 #include "nvml.h"
36 #include "cuda.h"
37 #include "cuda_runtime_api.h"
38 
40 
41 /***** CHANGE PROTOTYPES TO DECLARE CUDA AND NVML LIBRARY SYMBOLS AS WEAK *****
42  * This is done so that a version of PAPI built with the nvml component can *
43  * be installed on a system which does not have the cuda libraries installed. *
44  * *
45  * If this is done without these prototypes, then all papi services on the *
46  * system without the cuda libraries installed will fail. The PAPI libraries *
47  * contain references to the cuda libraries which are not installed. The *
48  * load of PAPI commands fails because the cuda library references can not be *
49  * resolved. *
50  * *
51  * This also defines pointers to the cuda library functions that we call. *
52  * These function pointers will be resolved with dlopen/dlsym calls at *
53  * component initialization time. The component then calls the cuda library *
54  * functions through these function pointers. *
55  ********************************************************************************/
56 #undef CUDAAPI
57 #define CUDAAPI __attribute__((weak))
58 CUresult CUDAAPI cuInit(unsigned int);
59 
60 CUresult (*cuInitPtr)(unsigned int);
61 
62 #undef CUDARTAPI
63 #define CUDARTAPI __attribute__((weak))
64 cudaError_t CUDARTAPI cudaGetDevice(int *);
65 cudaError_t CUDARTAPI cudaGetDeviceCount(int *);
66 cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int);
67 
68 cudaError_t (*cudaGetDevicePtr)(int *);
69 cudaError_t (*cudaGetDeviceCountPtr)(int *);
70 cudaError_t (*cudaDeviceGetPCIBusIdPtr)(char *, int, int);
71 
72 #undef DECLDIR
73 #define DECLDIR __attribute__((weak))
74 nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo (nvmlDevice_t, nvmlClockType_t, unsigned int *);
75 const char* DECLDIR nvmlErrorString (nvmlReturn_t);
76 nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
77 nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed (nvmlDevice_t, unsigned int *);
78 nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo (nvmlDevice_t, nvmlMemory_t *);
79 nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState (nvmlDevice_t, nvmlPstates_t *);
80 nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage (nvmlDevice_t, unsigned int *);
81 nvmlReturn_t DECLDIR nvmlDeviceGetTemperature (nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
82 nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
83 nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates (nvmlDevice_t, nvmlUtilization_t *);
84 nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex (unsigned int, nvmlDevice_t *);
85 nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo (nvmlDevice_t, nvmlPciInfo_t *);
86 nvmlReturn_t DECLDIR nvmlDeviceGetName (nvmlDevice_t, char *, unsigned int);
87 nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion (nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
88 nvmlReturn_t DECLDIR nvmlDeviceGetEccMode (nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
89 nvmlReturn_t DECLDIR nvmlInit (void);
90 nvmlReturn_t DECLDIR nvmlDeviceGetCount (unsigned int *);
91 nvmlReturn_t DECLDIR nvmlShutdown (void);
92 
93 nvmlReturn_t (*nvmlDeviceGetClockInfoPtr) (nvmlDevice_t, nvmlClockType_t, unsigned int *);
94 char* (*nvmlErrorStringPtr) (nvmlReturn_t);
95 nvmlReturn_t (*nvmlDeviceGetDetailedEccErrorsPtr) (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
96 nvmlReturn_t (*nvmlDeviceGetFanSpeedPtr) (nvmlDevice_t, unsigned int *);
97 nvmlReturn_t (*nvmlDeviceGetMemoryInfoPtr) (nvmlDevice_t, nvmlMemory_t *);
98 nvmlReturn_t (*nvmlDeviceGetPerformanceStatePtr) (nvmlDevice_t, nvmlPstates_t *);
99 nvmlReturn_t (*nvmlDeviceGetPowerUsagePtr) (nvmlDevice_t, unsigned int *);
100 nvmlReturn_t (*nvmlDeviceGetTemperaturePtr) (nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
101 nvmlReturn_t (*nvmlDeviceGetTotalEccErrorsPtr) (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
102 nvmlReturn_t (*nvmlDeviceGetUtilizationRatesPtr) (nvmlDevice_t, nvmlUtilization_t *);
103 nvmlReturn_t (*nvmlDeviceGetHandleByIndexPtr) (unsigned int, nvmlDevice_t *);
104 nvmlReturn_t (*nvmlDeviceGetPciInfoPtr) (nvmlDevice_t, nvmlPciInfo_t *);
105 nvmlReturn_t (*nvmlDeviceGetNamePtr) (nvmlDevice_t, char *, unsigned int);
106 nvmlReturn_t (*nvmlDeviceGetInforomVersionPtr) (nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
107 nvmlReturn_t (*nvmlDeviceGetEccModePtr) (nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
108 nvmlReturn_t (*nvmlInitPtr) (void);
109 nvmlReturn_t (*nvmlDeviceGetCountPtr) (unsigned int *);
110 nvmlReturn_t (*nvmlShutdownPtr) (void);
111 
112 
113 // file handles used to access cuda libraries with dlopen
114 static void* dl1 = NULL;
115 static void* dl2 = NULL;
116 static void* dl3 = NULL;
117 
118 static int linkCudaLibraries ();
119 
120 
121 /* Declare our vector in advance */
123 
124 /* upto 25 events per card how many cards per system should we allow for?! */
125 #define NVML_MAX_COUNTERS 100
126 
130 typedef struct nvml_control_state
131 {
132  int num_events;
133  int which_counter[NVML_MAX_COUNTERS];
134  long long counter[NVML_MAX_COUNTERS];
136 
138 typedef struct nvml_context
139 {
142 
145 
147 static int device_count = 0;
148 
150 static int num_events = 0;
151 
152 static nvmlDevice_t* devices=NULL;
153 static int* features=NULL;
154 
155 unsigned long long
156 getClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one )
157 {
158  unsigned int ret = 0;
159  nvmlReturn_t bad;
160  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
161 
162  if ( NVML_SUCCESS != bad ) {
163  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
164  }
165 
166  return (unsigned long long)ret;
167 }
168 
169  unsigned long long
170 getEccLocalErrors( nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
171 {
172  nvmlEccErrorCounts_t counts;
173 
174  nvmlReturn_t bad;
175  bad = (*nvmlDeviceGetDetailedEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
176 
177  if ( NVML_SUCCESS != bad ) {
178  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
179  }
180 
181 
182  switch ( which_one ) {
183  case LOCAL_ECC_REGFILE:
184  return counts.registerFile;
185  case LOCAL_ECC_L1:
186  return counts.l1Cache;
187  case LOCAL_ECC_L2:
188  return counts.l2Cache;
189  case LOCAL_ECC_MEM:
190  return counts.deviceMemory;
191  default:
192  ;
193  }
194  return (unsigned long long)-1;
195 }
196 
197  unsigned long long
198 getFanSpeed( nvmlDevice_t dev )
199 {
200  unsigned int ret = 0;
201  nvmlReturn_t bad;
202  bad = (*nvmlDeviceGetFanSpeedPtr)( dev, &ret );
203 
204  if ( NVML_SUCCESS != bad ) {
205  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
206  }
207 
208 
209  return (unsigned long long)ret;
210 }
211 
212  unsigned long long
213 getMaxClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one)
214 {
215  unsigned int ret = 0;
216  nvmlReturn_t bad;
217  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
218 
219  if ( NVML_SUCCESS != bad ) {
220  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
221  }
222 
223 
224  return (unsigned long long) ret;
225 }
226 
227  unsigned long long
228 getMemoryInfo( nvmlDevice_t dev, int which_one )
229 {
230  nvmlMemory_t meminfo;
231  nvmlReturn_t bad;
232  bad = (*nvmlDeviceGetMemoryInfoPtr)( dev, &meminfo );
233 
234  if ( NVML_SUCCESS != bad ) {
235  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
236  }
237 
238  switch (which_one) {
240  return meminfo.total;
241  case MEMINFO_UNALLOCED:
242  return meminfo.free;
243  case MEMINFO_ALLOCED:
244  return meminfo.used;
245  default:
246  ;
247  }
248  return (unsigned long long)-1;
249 }
250 
251  unsigned long long
252 getPState( nvmlDevice_t dev )
253 {
254  unsigned int ret = 0;
255  nvmlPstates_t state = NVML_PSTATE_15;
256  nvmlReturn_t bad;
257  bad = (*nvmlDeviceGetPerformanceStatePtr)( dev, &state );
258 
259  if ( NVML_SUCCESS != bad ) {
260  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
261  }
262 
263 
264  switch ( state ) {
265  case NVML_PSTATE_15:
266  ret++;
267  case NVML_PSTATE_14:
268  ret++;
269  case NVML_PSTATE_13:
270  ret++;
271  case NVML_PSTATE_12:
272  ret++;
273  case NVML_PSTATE_11:
274  ret++;
275  case NVML_PSTATE_10:
276  ret++;
277  case NVML_PSTATE_9:
278  ret++;
279  case NVML_PSTATE_8:
280  ret++;
281  case NVML_PSTATE_7:
282  ret++;
283  case NVML_PSTATE_6:
284  ret++;
285  case NVML_PSTATE_5:
286  ret++;
287  case NVML_PSTATE_4:
288  ret++;
289  case NVML_PSTATE_3:
290  ret++;
291  case NVML_PSTATE_2:
292  ret++;
293  case NVML_PSTATE_1:
294  ret++;
295  case NVML_PSTATE_0:
296  break;
297  case NVML_PSTATE_UNKNOWN:
298  default:
299  /* This should never happen?
300  * The API docs just state Unknown performance state... */
301  return (unsigned long long) -1;
302  }
303 
304  return (unsigned long long)ret;
305 }
306 
307  unsigned long long
308 getPowerUsage( nvmlDevice_t dev )
309 {
310  unsigned int power;
311  nvmlReturn_t bad;
312  bad = (*nvmlDeviceGetPowerUsagePtr)( dev, &power );
313 
314  if ( NVML_SUCCESS != bad ) {
315  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
316  }
317 
318 
319  return (unsigned long long) power;
320 }
321 
322  unsigned long long
323 getTemperature( nvmlDevice_t dev )
324 {
325  unsigned int ret = 0;
326  nvmlReturn_t bad;
327  bad = (*nvmlDeviceGetTemperaturePtr)( dev, NVML_TEMPERATURE_GPU, &ret );
328 
329  if ( NVML_SUCCESS != bad ) {
330  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
331  }
332 
333 
334  return (unsigned long long)ret;
335 }
336 
337  unsigned long long
338 getTotalEccErrors( nvmlDevice_t dev, nvmlEccBitType_t bits)
339 {
340  unsigned long long counts = 0;
341  nvmlReturn_t bad;
342  bad = (*nvmlDeviceGetTotalEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
343 
344  if ( NVML_SUCCESS != bad ) {
345  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
346  }
347 
348 
349  return counts;
350 }
351 
352 /* 0 => gpu util
353  1 => memory util
354  */
355  unsigned long long
356 getUtilization( nvmlDevice_t dev, int which_one )
357 {
358  nvmlUtilization_t util;
359  nvmlReturn_t bad;
360  bad = (*nvmlDeviceGetUtilizationRatesPtr)( dev, &util );
361 
362  if ( NVML_SUCCESS != bad ) {
363  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
364  }
365 
366 
367  switch (which_one) {
368  case GPU_UTILIZATION:
369  return (unsigned long long) util.gpu;
370  case MEMORY_UTILIZATION:
371  return (unsigned long long) util.memory;
372  default:
373  ;
374  }
375 
376  return (unsigned long long) -1;
377 }
378 
379  static void
381 {
382  /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while
383  * possible to implement a reset on the ECC counters, we pass */
384  /*
385  int i;
386  for ( i=0; i < device_count; i++ )
387  nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC );
388  */
389 }
390 
392 /* You might replace this with code that accesses */
393 /* hardware or reads values from the operatings system. */
394  static int
395 nvml_hardware_read( long long *value, int which_one)
396  //, nvml_context_t *ctx)
397 {
399  nvmlDevice_t handle;
400  int cudaIdx = -1;
401 
402  entry = &nvml_native_table[which_one];
403  *value = (long long) -1;
404  /* replace entry->resources with the current cuda_device->nvml device */
405  (*cudaGetDevicePtr)( &cudaIdx );
406 
407  if ( cudaIdx < 0 || cudaIdx > device_count )
408  return PAPI_EINVAL;
409 
410  /* Make sure the device we are running on has the requested event */
411  if ( !HAS_FEATURE( features[cudaIdx] , entry->type) )
412  return PAPI_EINVAL;
413 
414  handle = devices[cudaIdx];
415 
416  switch (entry->type) {
417  case FEATURE_CLOCK_INFO:
418  *value = getClockSpeed( handle,
419  (nvmlClockType_t)entry->options.clock );
420  break;
422  *value = getEccLocalErrors( handle,
423  (nvmlEccBitType_t)entry->options.ecc_opts.bits,
424  (int)entry->options.ecc_opts.which_one);
425  break;
426  case FEATURE_FAN_SPEED:
427  *value = getFanSpeed( handle );
428  break;
429  case FEATURE_MAX_CLOCK:
430  *value = getMaxClockSpeed( handle,
431  (nvmlClockType_t)entry->options.clock );
432  break;
433  case FEATURE_MEMORY_INFO:
434  *value = getMemoryInfo( handle,
435  (int)entry->options.which_one );
436  break;
437  case FEATURE_PERF_STATES:
438  *value = getPState( handle );
439  break;
440  case FEATURE_POWER:
441  *value = getPowerUsage( handle );
442  break;
443  case FEATURE_TEMP:
444  *value = getTemperature( handle );
445  break;
447  *value = getTotalEccErrors( handle,
448  (nvmlEccBitType_t)entry->options.ecc_opts.bits );
449  break;
450  case FEATURE_UTILIZATION:
451  *value = getUtilization( handle,
452  (int)entry->options.which_one );
453  break;
454  default:
455  return PAPI_EINVAL;
456  }
457 
458  return PAPI_OK;
459 
460 
461 }
462 
463 /********************************************************************/
464 /* Below are the functions required by the PAPI component interface */
465 /********************************************************************/
466 
468  int
470 {
471  (void) ctx;
472 
473  SUBDBG( "Enter: ctx: %p\n", ctx );
474 
475  return PAPI_OK;
476 }
477 
478  static int
480 {
481  nvmlReturn_t ret;
482  nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
483  nvmlDevice_t handle;
484  nvmlPciInfo_t info;
485 
486  cudaError_t cuerr;
487 
488  char busId[16];
489  char name[64];
490  char inforomECC[16];
491  char inforomPower[16];
492  char names[device_count][64];
493  char nvml_busIds[device_count][16];
494 
495  float ecc_version = 0.0, power_version = 0.0;
496 
497  int i = 0,
498  j = 0;
499  int isTesla = 0;
500  int isFermi = 0;
501  int isUnique = 1;
502 
503  unsigned int temp = 0;
504 
505 
506  /* list of nvml pci_busids */
507  for (i=0; i < device_count; i++) {
508  ret = (*nvmlDeviceGetHandleByIndexPtr)( i, &handle );
509  if ( NVML_SUCCESS != ret ) {
510  SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
511  return PAPI_ESYS;
512  }
513 
514  ret = (*nvmlDeviceGetPciInfoPtr)( handle, &info );
515  if ( NVML_SUCCESS != ret ) {
516  SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", (*nvmlErrorStringPtr)(ret) );
517  return PAPI_ESYS;
518  }
519  strncpy(nvml_busIds[i], info.busId, 16);
520  }
521 
522  /* We want to key our list of nvmlDevice_ts by each device's cuda index */
523  for (i=0; i < device_count; i++) {
524  cuerr = (*cudaDeviceGetPCIBusIdPtr)( busId, 16, i );
525  if ( CUDA_SUCCESS != cuerr ) {
526  SUBDBG("cudaDeviceGetPCIBusId failed.\n");
527  return PAPI_ESYS;
528  }
529  for (j=0; j < device_count; j++ ) {
530  if ( !strncmp( busId, nvml_busIds[j], 16) ) {
531  ret = (*nvmlDeviceGetHandleByIndexPtr)(j, &devices[i] );
532  if ( NVML_SUCCESS != ret ) {
533  SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
534  return PAPI_ESYS;
535  }
536  break;
537  }
538  }
539  }
540 
541  memset(names, 0x0, device_count*64);
542  /* So for each card, check whats querable */
543  for (i=0; i < device_count; i++ ) {
544  isTesla=0;
545  isFermi=1;
546  isUnique = 1;
547  features[i] = 0;
548 
549  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
550  if ( NVML_SUCCESS != ret) {
551  SUBDBG("nvmlDeviceGetName failed \n");
552  return PAPI_ESYS;
553  }
554 
555  for (j=0; j < i; j++ )
556  if ( 0 == strncmp( name, names[j], 64 ) ) {
557  /* if we have a match, and IF everything is sane,
558  * devices with the same name eg Tesla C2075 share features */
559  isUnique = 0;
560  features[i] = features[j];
561 
562  }
563 
564  if ( isUnique ) {
565  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
566  if ( NVML_SUCCESS != ret ) {
567  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
568  isFermi = 0;
569  }
570  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
571  if ( NVML_SUCCESS != ret ) {
572  /* This implies the card is older then Fermi */
573  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
574  SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
575  isFermi = 0;
576  }
577 
578  ecc_version = strtof(inforomECC, NULL );
579  power_version = strtof( inforomPower, NULL);
580 
581  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
582  isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;
583 
584  /* For Tesla and Quadro products from Fermi and Kepler families. */
585  if ( isFermi ) {
586  features[i] |= FEATURE_CLOCK_INFO;
587  num_events += 3;
588  }
589 
590  /* For Tesla and Quadro products from Fermi and Kepler families.
591  requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
592  requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
593  requires ECC mode to be enabled. */
594  ret = (*nvmlDeviceGetEccModePtr)( devices[i], &mode, NULL );
595  if ( NVML_SUCCESS == ret ) {
596  if ( NVML_FEATURE_ENABLED == mode) {
597  if ( ecc_version >= 2.0 ) {
598  features[i] |= FEATURE_ECC_LOCAL_ERRORS;
599  num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
600  }
601  if ( ecc_version >= 1.0 ) {
602  features[i] |= FEATURE_ECC_TOTAL_ERRORS;
603  num_events += 2; /* single bit errors, double bit errors */
604  }
605  }
606  } else {
607  SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml\
608 return code %d)\n", ret);
609  }
610 
611  /* For all discrete products with dedicated fans */
612  features[i] |= FEATURE_FAN_SPEED;
613  num_events++;
614 
615  /* For Tesla and Quadro products from Fermi and Kepler families. */
616  if ( isFermi ) {
617  features[i] |= FEATURE_MAX_CLOCK;
618  num_events += 3;
619  }
620 
621  /* For all products */
622  features[i] |= FEATURE_MEMORY_INFO;
623  num_events += 3; /* total, free, used */
624 
625  /* For Tesla and Quadro products from the Fermi and Kepler families. */
626  if ( isFermi ) {
627  features[i] |= FEATURE_PERF_STATES;
628  num_events++;
629  }
630 
631  /* For "GF11x" Tesla and Quadro products from the Fermi family
632  requires NVML_INFOROM_POWER 3.0 or higher
633  For Tesla and Quadro products from the Kepler family
634  does not require NVML_INFOROM_POWER */
635  /* Just try reading power, if it works, enable it*/
636  ret = (*nvmlDeviceGetPowerUsagePtr)( devices[i], &temp);
637  if ( NVML_SUCCESS == ret ) {
638  features[i] |= FEATURE_POWER;
639  num_events++;
640  } else {
641  SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on\
642 this card. (nvml return code %d)\n", ret );
643  }
644 
645  /* For all discrete and S-class products. */
646  features[i] |= FEATURE_TEMP;
647  num_events++;
648 
649  /* For Tesla and Quadro products from the Fermi and Kepler families */
650  if (isFermi) {
651  features[i] |= FEATURE_UTILIZATION;
652  num_events += 2;
653  }
654 
655  strncpy( names[i], name, 64);
656 
657  }
658  }
659  return PAPI_OK;
660 }
661 
662  static void
664 {
665  char name[64];
666  char sanitized_name[PAPI_MAX_STR_LEN];
667  char names[device_count][64];
668 
669  int i, nameLen = 0, j;
670  int isUnique = 1;
671 
673  nvmlReturn_t ret;
674 
675  nvml_native_table = (nvml_native_event_entry_t*) papi_malloc(
677  memset( nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events );
678  entry = &nvml_native_table[0];
679 
680  for (i=0; i < device_count; i++ ) {
681  memset( names[i], 0x0, 64 );
682  isUnique = 1;
683  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
684 
685  for (j=0; j < i; j++ )
686  {
687  if ( 0 == strncmp( name, names[j], 64 ) )
688  isUnique = 0;
689  }
690 
691  if ( isUnique ) {
692  nameLen = strlen(name);
693  strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
694  for (j=0; j < nameLen; j++)
695  if ( ' ' == sanitized_name[j] )
696  sanitized_name[j] = '_';
697 
698 
699 
700  if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
701  sprintf( entry->name, "%s:graphics_clock", sanitized_name );
702  strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
703  entry->options.clock = NVML_CLOCK_GRAPHICS;
704  entry->type = FEATURE_CLOCK_INFO;
705  entry++;
706 
707  sprintf( entry->name, "%s:sm_clock", sanitized_name);
708  strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
709  entry->options.clock = NVML_CLOCK_SM;
710  entry->type = FEATURE_CLOCK_INFO;
711  entry++;
712 
713  sprintf( entry->name, "%s:memory_clock", sanitized_name);
714  strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
715  entry->options.clock = NVML_CLOCK_MEM;
716  entry->type = FEATURE_CLOCK_INFO;
717  entry++;
718  }
719 
720  if ( HAS_FEATURE( features[i], FEATURE_ECC_LOCAL_ERRORS ) ) {
721  sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
722  strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
723  entry->options.ecc_opts = (struct local_ecc){
724  .bits = NVML_SINGLE_BIT_ECC,
725  .which_one = LOCAL_ECC_L1,
726  };
728  entry++;
729 
730  sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
731  strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
732  entry->options.ecc_opts = (struct local_ecc){
733  .bits = NVML_SINGLE_BIT_ECC,
734  .which_one = LOCAL_ECC_L2,
735  };
737  entry++;
738 
739  sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
740  strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
741  entry->options.ecc_opts = (struct local_ecc){
742  .bits = NVML_SINGLE_BIT_ECC,
743  .which_one = LOCAL_ECC_MEM,
744  };
746  entry++;
747 
748  sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
749  strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
750  entry->options.ecc_opts = (struct local_ecc){
751  .bits = NVML_SINGLE_BIT_ECC,
752  .which_one = LOCAL_ECC_REGFILE,
753  };
755  entry++;
756 
757  sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
758  strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
759  entry->options.ecc_opts = (struct local_ecc){
760  .bits = NVML_DOUBLE_BIT_ECC,
761  .which_one = LOCAL_ECC_L1,
762  };
764  entry++;
765 
766  sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
767  strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
768  entry->options.ecc_opts = (struct local_ecc){
769  .bits = NVML_DOUBLE_BIT_ECC,
770  .which_one = LOCAL_ECC_L2,
771  };
773  entry++;
774 
775  sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
776  strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
777  entry->options.ecc_opts = (struct local_ecc){
778  .bits = NVML_DOUBLE_BIT_ECC,
779  .which_one = LOCAL_ECC_MEM,
780  };
782  entry++;
783 
784  sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
785  strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
786  entry->options.ecc_opts = (struct local_ecc){
787  .bits = NVML_DOUBLE_BIT_ECC,
788  .which_one = LOCAL_ECC_REGFILE,
789  };
791  entry++;
792  }
793 
794  if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
795  sprintf( entry->name, "%s:fan_speed", sanitized_name);
796  strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
797  entry->type = FEATURE_FAN_SPEED;
798  entry++;
799  }
800 
801  if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
802  sprintf( entry->name, "%s:graphics_max_clock", sanitized_name);
803  strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
804  entry->options.clock = NVML_CLOCK_GRAPHICS;
805  entry->type = FEATURE_MAX_CLOCK;
806  entry++;
807 
808  sprintf( entry->name, "%s:sm_max_clock", sanitized_name);
809  strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
810  entry->options.clock = NVML_CLOCK_SM;
811  entry->type = FEATURE_MAX_CLOCK;
812  entry++;
813 
814  sprintf( entry->name, "%s:memory_max_clock", sanitized_name);
815  strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
816  entry->options.clock = NVML_CLOCK_MEM;
817  entry->type = FEATURE_MAX_CLOCK;
818  entry++;
819  }
820 
821  if ( HAS_FEATURE( features[i], FEATURE_MEMORY_INFO ) ) {
822  sprintf( entry->name, "%s:total_memory", sanitized_name);
823  strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
825  entry->type = FEATURE_MEMORY_INFO;
826  entry++;
827 
828  sprintf( entry->name, "%s:unallocated_memory", sanitized_name);
829  strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
831  entry->type = FEATURE_MEMORY_INFO;
832  entry++;
833 
834  sprintf( entry->name, "%s:allocated_memory", sanitized_name);
835  strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
837  entry->type = FEATURE_MEMORY_INFO;
838  entry++;
839  }
840 
841  if ( HAS_FEATURE( features[i], FEATURE_PERF_STATES ) ) {
842  sprintf( entry->name, "%s:pstate", sanitized_name);
843  strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
844  entry->type = FEATURE_PERF_STATES;
845  entry++;
846  }
847 
848  if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
849  sprintf( entry->name, "%s:power", sanitized_name);
850  // set the power event units value to "mW" for miliwatts
851  strncpy( entry->units, "mW",PAPI_MIN_STR_LEN);
852  strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw (+/-5 watts) for the entire board: GPU, memory, etc.", PAPI_MAX_STR_LEN);
853  entry->type = FEATURE_POWER;
854  entry++;
855  }
856 
857  if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
858  sprintf( entry->name, "%s:temperature", sanitized_name);
859  strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
860  entry->type = FEATURE_TEMP;
861  entry++;
862  }
863 
864  if ( HAS_FEATURE( features[i], FEATURE_ECC_TOTAL_ERRORS ) ) {
865  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
866  strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
867  entry->options.ecc_opts = (struct local_ecc){
868  .bits = NVML_SINGLE_BIT_ECC,
869  };
871  entry++;
872 
873  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
874  strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
875  entry->options.ecc_opts = (struct local_ecc){
876  .bits = NVML_DOUBLE_BIT_ECC,
877  };
879  entry++;
880  }
881 
882  if ( HAS_FEATURE( features[i], FEATURE_UTILIZATION ) ) {
883  sprintf( entry->name, "%s:gpu_utilization", sanitized_name);
884  strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
886  entry->type = FEATURE_UTILIZATION;
887  entry++;
888 
889  sprintf( entry->name, "%s:memory_utilization", sanitized_name);
890  strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
892  entry->type = FEATURE_UTILIZATION;
893  entry++;
894  }
895  strncpy( names[i], name, 64);
896  }
897  }
898 }
899 
904  int
906 {
907  SUBDBG ("Entry: cidx: %d\n", cidx);
908  nvmlReturn_t ret;
909  cudaError_t cuerr;
910  int papi_errorcode;
911 
912  int cuda_count = 0;
913  unsigned int nvml_count = 0;
914 
915  /* link in the cuda and nvml libraries and resolve the symbols we need to use */
916  if (linkCudaLibraries() != PAPI_OK) {
917  SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
918  SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
919  return (PAPI_ENOSUPP);
920  }
921 
922  ret = (*nvmlInitPtr)();
923  if ( NVML_SUCCESS != ret ) {
924  strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
925  return PAPI_ENOSUPP;
926  }
927 
928  cuerr = (*cuInitPtr)( 0 );
929  if ( CUDA_SUCCESS != cuerr ) {
930  strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
931  return PAPI_ENOSUPP;
932  }
933 
934  /* Figure out the number of CUDA devices in the system */
935  ret = (*nvmlDeviceGetCountPtr)( &nvml_count );
936  if ( NVML_SUCCESS != ret ) {
937  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
938  return PAPI_ENOSUPP;
939  }
940 
941  cuerr = (*cudaGetDeviceCountPtr)( &cuda_count );
942  if ( CUDA_SUCCESS != cuerr ) {
943  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
944  return PAPI_ENOSUPP;
945  }
946 
947  /* We can probably recover from this, when we're clever */
948  if ( (cuda_count > 0) && (nvml_count != (unsigned int)cuda_count ) ) {
949  strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts.");
950  return PAPI_ENOSUPP;
951  }
952 
953  device_count = cuda_count;
954 
955  /* A per device representation of what events are present */
956  features = (int*)papi_malloc(sizeof(int) * device_count );
957 
958  /* Handles to each device */
959  devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
960 
961  /* Figure out what events are supported on each card. */
962  if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) {
963  papi_free(features);
964  papi_free(devices);
965  sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." );
966  return PAPI_ENOSUPP;
967  }
968 
969  /* The assumption is that if everything went swimmingly in detectDevices,
970  all nvml calls here should be fine. */
972 
973  /* Export the total number of events available */
974  _nvml_vector.cmp_info.num_native_events = num_events;
975 
976  /* Export the component id */
977  _nvml_vector.cmp_info.CmpIdx = cidx;
978 
979  /* Export the number of 'counters' */
980  _nvml_vector.cmp_info.num_cntrs = num_events;
981  _nvml_vector.cmp_info.num_mpx_cntrs = num_events;
982 
983  return PAPI_OK;
984 }
985 
986 
987 /*
988  * Link the necessary CUDA libraries to use the cuda component. If any of them can not be found, then
989  * the CUDA component will just be disabled. This is done at runtime so that a version of PAPI built
990  * with the CUDA component can be installed and used on systems which have the CUDA libraries installed
991  * and on systems where these libraries are not installed.
992  */
993 static int
995 {
996  /* Attempt to guess if we were statically linked to libc, if so bail */
997  if ( _dl_non_dynamic_init != NULL ) {
998  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN);
999  return PAPI_ENOSUPP;
1000  }
1001 
1002  /* Need to link in the cuda libraries, if not found disable the component */
1003  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
1004  if (!dl1)
1005  {
1006  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
1007  return ( PAPI_ENOSUPP );
1008  }
1009  cuInitPtr = dlsym(dl1, "cuInit");
1010  if (dlerror() != NULL)
1011  {
1012  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
1013  return ( PAPI_ENOSUPP );
1014  }
1015 
1016  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
1017  if (!dl2)
1018  {
1019  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
1020  return ( PAPI_ENOSUPP );
1021  }
1022  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
1023  if (dlerror() != NULL)
1024  {
1025  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
1026  return ( PAPI_ENOSUPP );
1027  }
1028  cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount");
1029  if (dlerror() != NULL)
1030  {
1031  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.",PAPI_MAX_STR_LEN);
1032  return ( PAPI_ENOSUPP );
1033  }
1034  cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId");
1035  if (dlerror() != NULL)
1036  {
1037  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.",PAPI_MAX_STR_LEN);
1038  return ( PAPI_ENOSUPP );
1039  }
1040 
1041  dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
1042  if (!dl3)
1043  {
1044  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.",PAPI_MAX_STR_LEN);
1045  return ( PAPI_ENOSUPP );
1046  }
1047  nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo");
1048  if (dlerror() != NULL)
1049  {
1050  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.",PAPI_MAX_STR_LEN);
1051  return ( PAPI_ENOSUPP );
1052  }
1053  nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString");
1054  if (dlerror() != NULL)
1055  {
1056  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.",PAPI_MAX_STR_LEN);
1057  return ( PAPI_ENOSUPP );
1058  }
1059  nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors");
1060  if (dlerror() != NULL)
1061  {
1062  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.",PAPI_MAX_STR_LEN);
1063  return ( PAPI_ENOSUPP );
1064  }
1065  nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed");
1066  if (dlerror() != NULL)
1067  {
1068  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.",PAPI_MAX_STR_LEN);
1069  return ( PAPI_ENOSUPP );
1070  }
1071  nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo");
1072  if (dlerror() != NULL)
1073  {
1074  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.",PAPI_MAX_STR_LEN);
1075  return ( PAPI_ENOSUPP );
1076  }
1077  nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState");
1078  if (dlerror() != NULL)
1079  {
1080  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.",PAPI_MAX_STR_LEN);
1081  return ( PAPI_ENOSUPP );
1082  }
1083  nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage");
1084  if (dlerror() != NULL)
1085  {
1086  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.",PAPI_MAX_STR_LEN);
1087  return ( PAPI_ENOSUPP );
1088  }
1089  nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature");
1090  if (dlerror() != NULL)
1091  {
1092  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.",PAPI_MAX_STR_LEN);
1093  return ( PAPI_ENOSUPP );
1094  }
1095  nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors");
1096  if (dlerror() != NULL)
1097  {
1098  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.",PAPI_MAX_STR_LEN);
1099  return ( PAPI_ENOSUPP );
1100  }
1101  nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates");
1102  if (dlerror() != NULL)
1103  {
1104  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.",PAPI_MAX_STR_LEN);
1105  return ( PAPI_ENOSUPP );
1106  }
1107  nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex");
1108  if (dlerror() != NULL)
1109  {
1110  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.",PAPI_MAX_STR_LEN);
1111  return ( PAPI_ENOSUPP );
1112  }
1113  nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo");
1114  if (dlerror() != NULL)
1115  {
1116  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.",PAPI_MAX_STR_LEN);
1117  return ( PAPI_ENOSUPP );
1118  }
1119  nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName");
1120  if (dlerror() != NULL)
1121  {
1122  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.",PAPI_MAX_STR_LEN);
1123  return ( PAPI_ENOSUPP );
1124  }
1125  nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion");
1126  if (dlerror() != NULL)
1127  {
1128  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.",PAPI_MAX_STR_LEN);
1129  return ( PAPI_ENOSUPP );
1130  }
1131  nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode");
1132  if (dlerror() != NULL)
1133  {
1134  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.",PAPI_MAX_STR_LEN);
1135  return ( PAPI_ENOSUPP );
1136  }
1137  nvmlInitPtr = dlsym(dl3, "nvmlInit");
1138  if (dlerror() != NULL)
1139  {
1140  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.",PAPI_MAX_STR_LEN);
1141  return ( PAPI_ENOSUPP );
1142  }
1143  nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount");
1144  if (dlerror() != NULL)
1145  {
1146  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.",PAPI_MAX_STR_LEN);
1147  return ( PAPI_ENOSUPP );
1148  }
1149  nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown");
1150  if (dlerror() != NULL)
1151  {
1152  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.",PAPI_MAX_STR_LEN);
1153  return ( PAPI_ENOSUPP );
1154  }
1155 
1156  return ( PAPI_OK );
1157 }
1158 
1159 
1165  int
1167 {
1168  SUBDBG( "nvml_init_control_state... %p\n", ctl );
1169  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1170  memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) );
1171 
1172  return PAPI_OK;
1173 }
1174 
1175 
1177  int
1180  int count,
1181  hwd_context_t *ctx )
1182 {
1183  SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx );
1184  int i, index;
1185 
1186  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1187  (void) ctx;
1188 
1189 
1190  /* if no events, return */
1191  if (count==0) return PAPI_OK;
1192 
1193  for( i = 0; i < count; i++ ) {
1194  index = native[i].ni_event;
1195  nvml_ctl->which_counter[i]=index;
1196  /* We have no constraints on event position, so any event */
1197  /* can be in any slot. */
1198  native[i].ni_position = i;
1199  }
1200  nvml_ctl->num_events=count;
1201  return PAPI_OK;
1202 }
1204  int
1206 {
1207  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1208 
1209  (void) ctx;
1210  (void) ctl;
1211 
1212  /* anything that would need to be set at counter start time */
1213 
1214  /* reset */
1215  /* start the counting */
1216 
1217  return PAPI_OK;
1218 }
1219 
1220 
1222  int
1224 {
1225  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1226 
1227  int i;
1228  (void) ctx;
1229  (void) ctl;
1230  int ret;
1231 
1232  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1233 
1234  for (i=0;i<nvml_ctl->num_events;i++) {
1235  if ( PAPI_OK !=
1236  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1237  nvml_ctl->which_counter[i]) ))
1238  return ret;
1239 
1240  }
1241 
1242  return PAPI_OK;
1243 }
1244 
1245 
1247  int
1249  long long **events, int flags )
1250 {
1251  SUBDBG( "Enter: ctx: %p, flags: %d\n", ctx, flags );
1252 
1253  (void) ctx;
1254  (void) flags;
1255  int i;
1256  int ret;
1257  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1258 
1259 
1260  for (i=0;i<nvml_ctl->num_events;i++) {
1261  if ( PAPI_OK !=
1262  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1263  nvml_ctl->which_counter[i]) ))
1264  return ret;
1265 
1266  }
1267  /* return pointer to the values we read */
1268  *events = nvml_ctl->counter;
1269  return PAPI_OK;
1270 }
1271 
1273 /* otherwise, the updated state is written to ESI->hw_start */
1274  int
1276  long long *events )
1277 {
1278  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1279 
1280  (void) ctx;
1281  (void) ctl;
1282  (void) events;
1283 
1284 
1285  /* You can change ECC mode and compute exclusivity modes on the cards */
1286  /* But I don't see this as a function of a PAPI component at this time */
1287  /* All implementation issues aside. */
1288  return PAPI_OK;
1289 }
1290 
1291 
1293 /* If the eventset is not currently running, then the saved value in the */
1294 /* EventSet is set to zero without calling this routine. */
1295  int
1297 {
1298  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1299 
1300  (void) ctx;
1301  (void) ctl;
1302 
1303  /* Reset the hardware */
1305 
1306  return PAPI_OK;
1307 }
1308 
1310  int
1312 {
1313  SUBDBG( "Enter:\n" );
1314 
1315  if (nvml_native_table != NULL)
1316  papi_free(nvml_native_table);
1317  if (devices != NULL)
1318  papi_free(devices);
1319  if (features != NULL)
1320  papi_free(features);
1321 
1322  (*nvmlShutdownPtr)();
1323 
1324  device_count = 0;
1325  num_events = 0;
1326 
1327  // close the dynamic libraries needed by this component (opened in the init component call)
1328  dlclose(dl1);
1329  dlclose(dl2);
1330  dlclose(dl3);
1331 
1332  return PAPI_OK;
1333 }
1334 
1336  int
1338 {
1339  SUBDBG( "Enter: ctx: %p\n", ctx );
1340 
1341  (void) ctx;
1342 
1343  /* Last chance to clean up thread */
1344 
1345  return PAPI_OK;
1346 }
1347 
1348 
1349 
1353  int
1355 {
1356  SUBDBG( "Enter: ctx: %p, code: %d\n", ctx, code );
1357 
1358  (void) ctx;
1359  (void) code;
1360  (void) option;
1361 
1362 
1363  /* FIXME. This should maybe set up more state, such as which counters are active and */
1364  /* counter mappings. */
1365 
1366  return PAPI_OK;
1367 }
1368 
1378  int
1380 {
1381  SUBDBG( "Enter: cntrl: %p, domain: %d\n", cntrl, domain );
1382 
1383  (void) cntrl;
1384 
1385  int found = 0;
1386 
1387  if ( PAPI_DOM_USER & domain ) {
1388  SUBDBG( " PAPI_DOM_USER \n" );
1389  found = 1;
1390  }
1391  if ( PAPI_DOM_KERNEL & domain ) {
1392  SUBDBG( " PAPI_DOM_KERNEL \n" );
1393  found = 1;
1394  }
1395  if ( PAPI_DOM_OTHER & domain ) {
1396  SUBDBG( " PAPI_DOM_OTHER \n" );
1397  found = 1;
1398  }
1399  if ( PAPI_DOM_ALL & domain ) {
1400  SUBDBG( " PAPI_DOM_ALL \n" );
1401  found = 1;
1402  }
1403  if ( !found )
1404  return ( PAPI_EINVAL );
1405 
1406  return PAPI_OK;
1407 }
1408 
1409 
1410 /**************************************************************/
1411 /* Naming functions, used to translate event numbers to names */
1412 /**************************************************************/
1413 
1414 
1421  int
1422 _papi_nvml_ntv_enum_events( unsigned int *EventCode, int modifier )
1423 {
1424  int index;
1425 
1426  switch ( modifier ) {
1427 
1428  /* return EventCode of first event */
1429  case PAPI_ENUM_FIRST:
1430  /* return the first event that we support */
1431 
1432  *EventCode = 0;
1433  return PAPI_OK;
1434 
1435  /* return EventCode of next available event */
1436  case PAPI_ENUM_EVENTS:
1437  index = *EventCode;
1438 
1439  /* Make sure we are in range */
1440  if ( index < num_events - 1 ) {
1441 
1442  /* This assumes a non-sparse mapping of the events */
1443  *EventCode = *EventCode + 1;
1444  return PAPI_OK;
1445  } else {
1446  return PAPI_ENOEVNT;
1447  }
1448  break;
1449 
1450  default:
1451  return PAPI_EINVAL;
1452  }
1453 
1454  return PAPI_EINVAL;
1455 }
1456 
1462  int
1463 _papi_nvml_ntv_code_to_name( unsigned int EventCode, char *name, int len )
1464 {
1465  SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len);
1466  int index;
1467 
1468  index = EventCode;
1469 
1470  /* Make sure we are in range */
1471  if (index >= num_events) return PAPI_ENOEVNT;
1472 
1473  strncpy( name, nvml_native_table[index].name, len );
1474 
1475  return PAPI_OK;
1476 }
1477 
1483  int
1484 _papi_nvml_ntv_code_to_descr( unsigned int EventCode, char *descr, int len )
1485 {
1486  int index;
1487  index = EventCode;
1488 
1489  if (index >= num_events) return PAPI_ENOEVNT;
1490 
1491  strncpy( descr, nvml_native_table[index].description, len );
1492 
1493  return PAPI_OK;
1494 }
1495 
1500 int
1501 _papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
1502 {
1503 
1504  int index = EventCode;
1505 
1506  if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT;
1507 
1508  strncpy( info->symbol, nvml_native_table[index].name,
1509  sizeof(info->symbol));
1510 
1511  strncpy( info->units, nvml_native_table[index].units,
1512  sizeof(info->units));
1513 
1514  strncpy( info->long_descr, nvml_native_table[index].description,
1515  sizeof(info->symbol));
1516 
1517 // info->data_type = nvml_native_table[index].return_type;
1518 
1519  return PAPI_OK;
1520 }
1521 
1523 papi_vector_t _nvml_vector = {
1524  .cmp_info = {
1525  /* default component information */
1526  /* (unspecified values are initialized to 0) */
1527 
1528  .name = "nvml",
1529  .short_name="nvml",
1530  .version = "1.0",
1531  .description = "NVML provides the API for monitoring NVIDIA hardware (power usage, temperature, fan speed, etc)",
1532  .support_version = "n/a",
1533  .kernel_version = "n/a",
1534 
1535  .num_preset_events = 0,
1536  .num_native_events = 0, /* set by init_component */
1537  .default_domain = PAPI_DOM_USER,
1538  .available_domains = PAPI_DOM_USER,
1539  .default_granularity = PAPI_GRN_THR,
1540  .available_granularities = PAPI_GRN_THR,
1541  .hardware_intr_sig = PAPI_INT_SIGNAL,
1542 
1543 
1544  /* component specific cmp_info initializations */
1545  .hardware_intr = 0,
1546  .precise_intr = 0,
1547  .posix1b_timers = 0,
1548  .kernel_profile = 0,
1549  .kernel_multiplex = 0,
1550  .fast_counter_read = 0,
1551  .fast_real_timer = 0,
1552  .fast_virtual_timer = 0,
1553  .attach = 0,
1554  .attach_must_ptrace = 0,
1555  .cntr_umasks = 0,
1556  .cpu = 0,
1557  .inherit = 0,
1558  },
1559 
1560  /* sizes of framework-opaque component-private structures */
1561  .size = {
1562  .context = sizeof ( nvml_context_t ),
1563  .control_state = sizeof ( nvml_control_state_t ),
1564  .reg_value = sizeof ( nvml_register_t ),
1565  // .reg_alloc = sizeof ( nvml_reg_alloc_t ),
1566  },
1567 
1568  /* function pointers */
1569 
1570  /* Used for general PAPI interactions */
1572  .stop = _papi_nvml_stop,
1573  .read = _papi_nvml_read,
1574  .reset = _papi_nvml_reset,
1576  .init_component = _papi_nvml_init_component,
1577  .init_thread = _papi_nvml_init_thread,
1578  .init_control_state = _papi_nvml_init_control_state,
1579  .update_control_state = _papi_nvml_update_control_state,
1580  .ctl = _papi_nvml_ctl,
1581  .shutdown_thread = _papi_nvml_shutdown_thread,
1582  .shutdown_component = _papi_nvml_shutdown_component,
1584  .cleanup_eventset = NULL,
1585  /* called in add_native_events() */
1586  .allocate_registers = NULL,
1587 
1588  /* Used for overflow/profiling */
1589  .dispatch_timer = NULL,
1590  .get_overflow_address = NULL,
1591  .stop_profiling = NULL,
1592  .set_overflow = NULL,
1593  .set_profile = NULL,
1594 
1595  /* Name Mapping Functions */
1596  .ntv_enum_events = _papi_nvml_ntv_enum_events,
1597  .ntv_name_to_code = NULL,
1598  .ntv_code_to_name = _papi_nvml_ntv_code_to_name,
1599  .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr,
1600  .ntv_code_to_info = _papi_nvml_ntv_code_to_info,
1601 
1602 };
1603 
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:625
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
ssize_t read(int fd, void *buf, size_t count)
Definition: appio.c:225
memset(eventId, 0, size)
unsigned long long getPState(nvmlDevice_t dev)
Definition: linux-nvml.c:252
int _papi_nvml_ntv_code_to_name(unsigned int EventCode, char *name, int len)
Definition: linux-nvml.c:1463
long long flags
Definition: iozone.c:12330
static int linkCudaLibraries()
Definition: linux-nvml.c:994
#define papi_free(a)
Definition: papi_memory.h:35
unsigned long long getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
Definition: linux-nvml.c:338
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
start
Definition: iozone.c:22736
#define PAPI_DOM_ALL
Definition: fpapi.h:25
int type
Definition: linux-nvml.h:51
unsigned long long getPowerUsage(nvmlDevice_t dev)
Definition: linux-nvml.c:308
#define papi_malloc(a)
Definition: papi_memory.h:34
#define PAPI_MIN_STR_LEN
Definition: fpapi.h:41
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:19
void * get_overflow_address(void *context)
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
#define PAPI_ENOEVNT
Definition: fpapi.h:112
#define DECLDIR
static int num_events
struct local_ecc ecc_opts
Definition: linux-nvml.h:41
int _papi_nvml_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: linux-nvml.c:1248
int _papi_nvml_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1223
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:964
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
int _papi_nvml_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events)
Definition: linux-nvml.c:1275
char symbol[PAPI_HUGE_STR_LEN]
Definition: papi.h:961
unsigned long long getMemoryInfo(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:228
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
struct cache_ent * entry
Definition: libasync.c:1170
nvmlEccBitType_t bits
Definition: linux-nvml.h:35
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:26
#define PAPI_DOM_OTHER
Definition: fpapi.h:23
#define PAPI_DOM_KERNEL
Definition: fpapi.h:22
#define NVML_MAX_COUNTERS
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:48
int _papi_nvml_shutdown_component()
Definition: linux-nvml.c:1311
void double value
Definition: iozone.c:18781
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
Return codes and api definitions.
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1523
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:21
int _papi_nvml_init_thread(hwd_context_t *ctx)
Definition: linux-nvml.c:469
nvml_control_state_t
Definition: linux-nvml.c:135
int _papi_nvml_shutdown_thread(hwd_context_t *ctx)
Definition: linux-nvml.c:1337
int _papi_nvml_ntv_enum_events(unsigned int *EventCode, int modifier)
Definition: linux-nvml.c:1422
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
nvml_resource_options_t options
Definition: linux-nvml.h:47
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:632
int nvml_register_t
Definition: linux-nvml.h:32
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:41
int i
Definition: fileop.c:140
#define PAPI_ENOSUPP
Definition: fpapi.h:123
Definition: linux-nvml.h:45
ssize_t write(int fd, const void *buf, size_t count)
Definition: appio.c:298
static int device_count
Definition: linux-nvml.c:147
static int set_domain(hwd_control_state_t *cntrl, unsigned int domain)
unsigned long long getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
Definition: linux-nvml.c:170
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:29
long long found
Definition: libasync.c:735
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
static int cidx
Definition: event_info.c:40
int _papi_nvml_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1205
static int native
Definition: event_info.c:39
nvmlClockType_t clock
Definition: linux-nvml.h:40
#define FEATURE_TEMP
Definition: linux-nvml.h:13
__attribute__((constructor))
Definition: init_fini.c:12
stop
Definition: iozone.c:22741
int _papi_nvml_ntv_code_to_descr(unsigned int EventCode, char *descr, int len)
Definition: linux-nvml.c:1484
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
long long
Definition: iozone.c:19827
#define CUDAAPI
char events[MAX_EVENTS][BUFSIZ]
static int * features
Definition: linux-nvml.c:153
int _papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
Definition: linux-nvml.c:1501
#define PAPI_INT_SIGNAL
Definition: papi_internal.h:53
nvmlDevice_t handle
Definition: linux-nvml.c:399
int _papi_nvml_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: linux-nvml.c:1178
#define FEATURE_POWER
Definition: linux-nvml.h:12
#define PAPI_ESYS
Definition: fpapi.h:108
char units[PAPI_MIN_STR_LEN]
Definition: linux-nvml.h:49
strcpy(filename, default_filename)
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:50
static void nvml_hardware_reset()
Definition: linux-nvml.c:380
unsigned long long getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:156
int cudaIdx
Definition: linux-nvml.c:400
char * name
Definition: iozone.c:23648
unsigned long long getTemperature(nvmlDevice_t dev)
Definition: linux-nvml.c:323
int
Definition: iozone.c:18528
int temp
Definition: iozone.c:22158
static int detectDevices()
Definition: linux-nvml.c:479
child_idents[x-1] state
Definition: iozone.c:21341
unsigned long long getFanSpeed(nvmlDevice_t dev)
Definition: linux-nvml.c:198
#define LOCAL_ECC_L1
Definition: linux-nvml.h:24
#define HAS_FEATURE(features, query)
Definition: linux-nvml.h:17
int _papi_nvml_set_domain(hwd_control_state_t *cntrl, int domain)
Definition: linux-nvml.c:1379
#define CUDARTAPI
#define GPU_UTILIZATION
Definition: linux-nvml.h:28
int which_one
Definition: linux-nvml.h:36
#define PAPI_DOM_USER
Definition: fpapi.h:21
unsigned long long getUtilization(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:356
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:23
int _papi_nvml_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: linux-nvml.c:1354
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
#define LOCAL_ECC_L2
Definition: linux-nvml.h:25
static void createNativeEvents()
Definition: linux-nvml.c:663
const char * names[NUM_EVENTS]
char units[PAPI_MIN_STR_LEN]
Definition: papi.h:970
#define PAPI_GRN_THR
Definition: fpapi.h:67
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:20
int _papi_nvml_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1296
int _papi_nvml_init_control_state(hwd_control_state_t *ctl)
Definition: linux-nvml.c:1166
unsigned long long getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:213
int _papi_nvml_init_component(int cidx)
Definition: linux-nvml.c:905
nvml_control_state_t state
Definition: linux-nvml.c:140