PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-nvml.c
Go to the documentation of this file.
1 /****************************/
2 /* THIS IS OPEN SOURCE CODE */
3 /****************************/
4 
20 #include <dlfcn.h>
21 
22 #include <stdio.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <inttypes.h>
26 #include <string.h>
27 /* Headers required by PAPI */
28 #include "papi.h"
29 #include "papi_internal.h"
30 #include "papi_vector.h"
31 #include "papi_memory.h"
32 
33 #include "linux-nvml.h"
34 
35 #include "nvml.h"
36 #include "cuda.h"
37 #include "cuda_runtime_api.h"
38 
40 
41 /***** CHANGE PROTOTYPES TO DECLARE CUDA AND NVML LIBRARY SYMBOLS AS WEAK *****
42  * This is done so that a version of PAPI built with the nvml component can *
43  * be installed on a system which does not have the cuda libraries installed. *
44  * *
45  * If this is done without these prototypes, then all papi services on the *
46  * system without the cuda libraries installed will fail. The PAPI libraries *
47  * contain references to the cuda libraries which are not installed. The *
48  * load of PAPI commands fails because the cuda library references can not be *
49  * resolved. *
50  * *
51  * This also defines pointers to the cuda library functions that we call. *
52  * These function pointers will be resolved with dlopen/dlsym calls at *
53  * component initialization time. The component then calls the cuda library *
54  * functions through these function pointers. *
55  ********************************************************************************/
56 #undef CUDAAPI
57 #define CUDAAPI __attribute__((weak))
58 CUresult CUDAAPI cuInit(unsigned int);
59 
60 CUresult (*cuInitPtr)(unsigned int);
61 
62 #undef CUDARTAPI
63 #define CUDARTAPI __attribute__((weak))
64 cudaError_t CUDARTAPI cudaGetDevice(int *);
65 cudaError_t CUDARTAPI cudaGetDeviceCount(int *);
66 cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int);
67 
68 cudaError_t (*cudaGetDevicePtr)(int *);
69 cudaError_t (*cudaGetDeviceCountPtr)(int *);
70 cudaError_t (*cudaDeviceGetPCIBusIdPtr)(char *, int, int);
71 
72 #undef DECLDIR
73 #define DECLDIR __attribute__((weak))
74 nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo (nvmlDevice_t, nvmlClockType_t, unsigned int *);
75 const char* DECLDIR nvmlErrorString (nvmlReturn_t);
76 nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
77 nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed (nvmlDevice_t, unsigned int *);
78 nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo (nvmlDevice_t, nvmlMemory_t *);
79 nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState (nvmlDevice_t, nvmlPstates_t *);
80 nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage (nvmlDevice_t, unsigned int *);
81 nvmlReturn_t DECLDIR nvmlDeviceGetTemperature (nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
82 nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
83 nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates (nvmlDevice_t, nvmlUtilization_t *);
84 nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex (unsigned int, nvmlDevice_t *);
85 nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo (nvmlDevice_t, nvmlPciInfo_t *);
86 nvmlReturn_t DECLDIR nvmlDeviceGetName (nvmlDevice_t, char *, unsigned int);
87 nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion (nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
88 nvmlReturn_t DECLDIR nvmlDeviceGetEccMode (nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
89 nvmlReturn_t DECLDIR nvmlInit (void);
90 nvmlReturn_t DECLDIR nvmlDeviceGetCount (unsigned int *);
91 nvmlReturn_t DECLDIR nvmlShutdown (void);
92 
93 nvmlReturn_t (*nvmlDeviceGetClockInfoPtr) (nvmlDevice_t, nvmlClockType_t, unsigned int *);
94 char* (*nvmlErrorStringPtr) (nvmlReturn_t);
95 nvmlReturn_t (*nvmlDeviceGetDetailedEccErrorsPtr) (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
96 nvmlReturn_t (*nvmlDeviceGetFanSpeedPtr) (nvmlDevice_t, unsigned int *);
97 nvmlReturn_t (*nvmlDeviceGetMemoryInfoPtr) (nvmlDevice_t, nvmlMemory_t *);
98 nvmlReturn_t (*nvmlDeviceGetPerformanceStatePtr) (nvmlDevice_t, nvmlPstates_t *);
99 nvmlReturn_t (*nvmlDeviceGetPowerUsagePtr) (nvmlDevice_t, unsigned int *);
100 nvmlReturn_t (*nvmlDeviceGetTemperaturePtr) (nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
101 nvmlReturn_t (*nvmlDeviceGetTotalEccErrorsPtr) (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
102 nvmlReturn_t (*nvmlDeviceGetUtilizationRatesPtr) (nvmlDevice_t, nvmlUtilization_t *);
103 nvmlReturn_t (*nvmlDeviceGetHandleByIndexPtr) (unsigned int, nvmlDevice_t *);
104 nvmlReturn_t (*nvmlDeviceGetPciInfoPtr) (nvmlDevice_t, nvmlPciInfo_t *);
105 nvmlReturn_t (*nvmlDeviceGetNamePtr) (nvmlDevice_t, char *, unsigned int);
106 nvmlReturn_t (*nvmlDeviceGetInforomVersionPtr) (nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
107 nvmlReturn_t (*nvmlDeviceGetEccModePtr) (nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
108 nvmlReturn_t (*nvmlInitPtr) (void);
109 nvmlReturn_t (*nvmlDeviceGetCountPtr) (unsigned int *);
110 nvmlReturn_t (*nvmlShutdownPtr) (void);
111 
112 
113 // file handles used to access cuda libraries with dlopen
114 static void* dl1 = NULL;
115 static void* dl2 = NULL;
116 static void* dl3 = NULL;
117 
118 static int linkCudaLibraries ();
119 
120 
121 /* Declare our vector in advance */
123 
124 /* upto 25 events per card how many cards per system should we allow for?! */
125 #define NVML_MAX_COUNTERS 100
126 
130 typedef struct nvml_control_state
131 {
132  int num_events;
133  int which_counter[NVML_MAX_COUNTERS];
134  long long counter[NVML_MAX_COUNTERS];
136 
138 typedef struct nvml_context
139 {
142 
145 
147 static int device_count = 0;
148 
150 static int num_events = 0;
151 
152 static nvmlDevice_t* devices=NULL;
153 static int* features=NULL;
154 
155 unsigned long long
156 getClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one )
157 {
158  unsigned int ret = 0;
159  nvmlReturn_t bad;
160  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
161 
162  if ( NVML_SUCCESS != bad ) {
163  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
164  }
165 
166  return (unsigned long long)ret;
167 }
168 
169  unsigned long long
170 getEccLocalErrors( nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
171 {
172  nvmlEccErrorCounts_t counts;
173 
174  nvmlReturn_t bad;
175  bad = (*nvmlDeviceGetDetailedEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
176 
177  if ( NVML_SUCCESS != bad ) {
178  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
179  }
180 
181 
182  switch ( which_one ) {
183  case LOCAL_ECC_REGFILE:
184  return counts.registerFile;
185  case LOCAL_ECC_L1:
186  return counts.l1Cache;
187  case LOCAL_ECC_L2:
188  return counts.l2Cache;
189  case LOCAL_ECC_MEM:
190  return counts.deviceMemory;
191  default:
192  ;
193  }
194  return (unsigned long long)-1;
195 }
196 
197  unsigned long long
198 getFanSpeed( nvmlDevice_t dev )
199 {
200  unsigned int ret = 0;
201  nvmlReturn_t bad;
202  bad = (*nvmlDeviceGetFanSpeedPtr)( dev, &ret );
203 
204  if ( NVML_SUCCESS != bad ) {
205  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
206  }
207 
208 
209  return (unsigned long long)ret;
210 }
211 
212  unsigned long long
213 getMaxClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one)
214 {
215  unsigned int ret = 0;
216  nvmlReturn_t bad;
217  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
218 
219  if ( NVML_SUCCESS != bad ) {
220  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
221  }
222 
223 
224  return (unsigned long long) ret;
225 }
226 
227  unsigned long long
228 getMemoryInfo( nvmlDevice_t dev, int which_one )
229 {
230  nvmlMemory_t meminfo;
231  nvmlReturn_t bad;
232  bad = (*nvmlDeviceGetMemoryInfoPtr)( dev, &meminfo );
233 
234  if ( NVML_SUCCESS != bad ) {
235  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
236  }
237 
238  switch (which_one) {
240  return meminfo.total;
241  case MEMINFO_UNALLOCED:
242  return meminfo.free;
243  case MEMINFO_ALLOCED:
244  return meminfo.used;
245  default:
246  ;
247  }
248  return (unsigned long long)-1;
249 }
250 
251  unsigned long long
252 getPState( nvmlDevice_t dev )
253 {
254  unsigned int ret = 0;
255  nvmlPstates_t state = NVML_PSTATE_15;
256  nvmlReturn_t bad;
257  bad = (*nvmlDeviceGetPerformanceStatePtr)( dev, &state );
258 
259  if ( NVML_SUCCESS != bad ) {
260  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
261  }
262 
263 
264  switch ( state ) {
265  case NVML_PSTATE_15:
266  ret++;
267  case NVML_PSTATE_14:
268  ret++;
269  case NVML_PSTATE_13:
270  ret++;
271  case NVML_PSTATE_12:
272  ret++;
273  case NVML_PSTATE_11:
274  ret++;
275  case NVML_PSTATE_10:
276  ret++;
277  case NVML_PSTATE_9:
278  ret++;
279  case NVML_PSTATE_8:
280  ret++;
281  case NVML_PSTATE_7:
282  ret++;
283  case NVML_PSTATE_6:
284  ret++;
285  case NVML_PSTATE_5:
286  ret++;
287  case NVML_PSTATE_4:
288  ret++;
289  case NVML_PSTATE_3:
290  ret++;
291  case NVML_PSTATE_2:
292  ret++;
293  case NVML_PSTATE_1:
294  ret++;
295  case NVML_PSTATE_0:
296  break;
297  case NVML_PSTATE_UNKNOWN:
298  default:
299  /* This should never happen?
300  * The API docs just state Unknown performance state... */
301  return (unsigned long long) -1;
302  }
303 
304  return (unsigned long long)ret;
305 }
306 
307  unsigned long long
308 getPowerUsage( nvmlDevice_t dev )
309 {
310  unsigned int power;
311  nvmlReturn_t bad;
312  bad = (*nvmlDeviceGetPowerUsagePtr)( dev, &power );
313 
314  if ( NVML_SUCCESS != bad ) {
315  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
316  }
317 
318 
319  return (unsigned long long) power;
320 }
321 
322  unsigned long long
323 getTemperature( nvmlDevice_t dev )
324 {
325  unsigned int ret = 0;
326  nvmlReturn_t bad;
327  bad = (*nvmlDeviceGetTemperaturePtr)( dev, NVML_TEMPERATURE_GPU, &ret );
328 
329  if ( NVML_SUCCESS != bad ) {
330  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
331  }
332 
333 
334  return (unsigned long long)ret;
335 }
336 
337  unsigned long long
338 getTotalEccErrors( nvmlDevice_t dev, nvmlEccBitType_t bits)
339 {
340  unsigned long long counts = 0;
341  nvmlReturn_t bad;
342  bad = (*nvmlDeviceGetTotalEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
343 
344  if ( NVML_SUCCESS != bad ) {
345  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
346  }
347 
348 
349  return counts;
350 }
351 
352 /* 0 => gpu util
353  1 => memory util
354  */
355  unsigned long long
356 getUtilization( nvmlDevice_t dev, int which_one )
357 {
358  nvmlUtilization_t util;
359  nvmlReturn_t bad;
360  bad = (*nvmlDeviceGetUtilizationRatesPtr)( dev, &util );
361 
362  if ( NVML_SUCCESS != bad ) {
363  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
364  }
365 
366 
367  switch (which_one) {
368  case GPU_UTILIZATION:
369  return (unsigned long long) util.gpu;
370  case MEMORY_UTILIZATION:
371  return (unsigned long long) util.memory;
372  default:
373  ;
374  }
375 
376  return (unsigned long long) -1;
377 }
378 
379  static void
381 {
382  /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while
383  * possible to implement a reset on the ECC counters, we pass */
384  /*
385  int i;
386  for ( i=0; i < device_count; i++ )
387  nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC );
388  */
389 }
390 
392 /* You might replace this with code that accesses */
393 /* hardware or reads values from the operatings system. */
394  static int
395 nvml_hardware_read( long long *value, int which_one)
396  //, nvml_context_t *ctx)
397 {
399  nvmlDevice_t handle;
400  int cudaIdx = -1;
401 
402  entry = &nvml_native_table[which_one];
403  *value = (long long) -1;
404  /* replace entry->resources with the current cuda_device->nvml device */
405  (*cudaGetDevicePtr)( &cudaIdx );
406 
407  if ( cudaIdx < 0 || cudaIdx > device_count )
408  return PAPI_EINVAL;
409 
410  /* Make sure the device we are running on has the requested event */
411  if ( !HAS_FEATURE( features[cudaIdx] , entry->type) )
412  return PAPI_EINVAL;
413 
414  handle = devices[cudaIdx];
415 
416  switch (entry->type) {
417  case FEATURE_CLOCK_INFO:
418  *value = getClockSpeed( handle,
419  (nvmlClockType_t)entry->options.clock );
420  break;
422  *value = getEccLocalErrors( handle,
423  (nvmlEccBitType_t)entry->options.ecc_opts.bits,
424  (int)entry->options.ecc_opts.which_one);
425  break;
426  case FEATURE_FAN_SPEED:
427  *value = getFanSpeed( handle );
428  break;
429  case FEATURE_MAX_CLOCK:
430  *value = getMaxClockSpeed( handle,
431  (nvmlClockType_t)entry->options.clock );
432  break;
433  case FEATURE_MEMORY_INFO:
434  *value = getMemoryInfo( handle,
435  (int)entry->options.which_one );
436  break;
437  case FEATURE_PERF_STATES:
438  *value = getPState( handle );
439  break;
440  case FEATURE_POWER:
441  *value = getPowerUsage( handle );
442  break;
443  case FEATURE_TEMP:
444  *value = getTemperature( handle );
445  break;
447  *value = getTotalEccErrors( handle,
448  (nvmlEccBitType_t)entry->options.ecc_opts.bits );
449  break;
450  case FEATURE_UTILIZATION:
451  *value = getUtilization( handle,
452  (int)entry->options.which_one );
453  break;
454  default:
455  return PAPI_EINVAL;
456  }
457 
458  return PAPI_OK;
459 
460 
461 }
462 
463 /********************************************************************/
464 /* Below are the functions required by the PAPI component interface */
465 /********************************************************************/
466 
468  int
470 {
471  (void) ctx;
472 
473  SUBDBG( "Enter: ctx: %p\n", ctx );
474 
475  return PAPI_OK;
476 }
477 
478  static int
480 {
481  nvmlReturn_t ret;
482  nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
483  nvmlDevice_t handle;
484  nvmlPciInfo_t info;
485 
486  cudaError_t cuerr;
487 
488  char busId[16];
489  char name[64];
490  char inforomECC[16];
491  char inforomPower[16];
492  char names[device_count][64];
493  char nvml_busIds[device_count][16];
494 
495  float ecc_version = 0.0, power_version = 0.0;
496 
497  int i = 0,
498  j = 0;
499  int isTesla = 0;
500  int isFermi = 0;
501  int isUnique = 1;
502 
503  unsigned int temp = 0;
504 
505 
506  /* list of nvml pci_busids */
507  for (i=0; i < device_count; i++) {
508  ret = (*nvmlDeviceGetHandleByIndexPtr)( i, &handle );
509  if ( NVML_SUCCESS != ret ) {
510  SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
511  return PAPI_ESYS;
512  }
513 
514  ret = (*nvmlDeviceGetPciInfoPtr)( handle, &info );
515  if ( NVML_SUCCESS != ret ) {
516  SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", (*nvmlErrorStringPtr)(ret) );
517  return PAPI_ESYS;
518  }
519  strncpy(nvml_busIds[i], info.busId, sizeof(nvml_busIds[i])-1);
520  nvml_busIds[i][sizeof(nvml_busIds[i])-1] = '\0';
521  }
522 
523  /* We want to key our list of nvmlDevice_ts by each device's cuda index */
524  for (i=0; i < device_count; i++) {
525  cuerr = (*cudaDeviceGetPCIBusIdPtr)( busId, 16, i );
526  if ( CUDA_SUCCESS != cuerr ) {
527  SUBDBG("cudaDeviceGetPCIBusId failed.\n");
528  return PAPI_ESYS;
529  }
530  for (j=0; j < device_count; j++ ) {
531  if ( !strncmp( busId, nvml_busIds[j], 16) ) {
532  ret = (*nvmlDeviceGetHandleByIndexPtr)(j, &devices[i] );
533  if ( NVML_SUCCESS != ret ) {
534  SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
535  return PAPI_ESYS;
536  }
537  break;
538  }
539  }
540  }
541 
542  memset(names, 0x0, device_count*64);
543  /* So for each card, check whats querable */
544  for (i=0; i < device_count; i++ ) {
545  isTesla=0;
546  isFermi=1;
547  isUnique = 1;
548  features[i] = 0;
549 
550  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, sizeof(name)-1 );
551  if ( NVML_SUCCESS != ret) {
552  SUBDBG("nvmlDeviceGetName failed \n");
553  return PAPI_ESYS;
554  }
555 
556  name[sizeof(name)-1] = '\0'; // to safely use strstr operation below, the variable 'name' must be null terminated
557 
558  for (j=0; j < i; j++ )
559  if ( 0 == strncmp( name, names[j], 64 ) ) {
560  /* if we have a match, and IF everything is sane,
561  * devices with the same name eg Tesla C2075 share features */
562  isUnique = 0;
563  features[i] = features[j];
564 
565  }
566 
567  if ( isUnique ) {
568  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
569  if ( NVML_SUCCESS != ret ) {
570  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
571  isFermi = 0;
572  }
573  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
574  if ( NVML_SUCCESS != ret ) {
575  /* This implies the card is older then Fermi */
576  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
577  SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
578  isFermi = 0;
579  }
580 
581  ecc_version = strtof(inforomECC, NULL );
582  power_version = strtof( inforomPower, NULL);
583 
584  isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;
585 
586  /* For Tesla and Quadro products from Fermi and Kepler families. */
587  if ( isFermi ) {
588  features[i] |= FEATURE_CLOCK_INFO;
589  num_events += 3;
590  }
591 
592  /* For Tesla and Quadro products from Fermi and Kepler families.
593  requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
594  requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
595  requires ECC mode to be enabled. */
596  ret = (*nvmlDeviceGetEccModePtr)( devices[i], &mode, NULL );
597  if ( NVML_SUCCESS == ret ) {
598  if ( NVML_FEATURE_ENABLED == mode) {
599  if ( ecc_version >= 2.0 ) {
600  features[i] |= FEATURE_ECC_LOCAL_ERRORS;
601  num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
602  }
603  if ( ecc_version >= 1.0 ) {
604  features[i] |= FEATURE_ECC_TOTAL_ERRORS;
605  num_events += 2; /* single bit errors, double bit errors */
606  }
607  }
608  } else {
609  SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml\
610 return code %d)\n", ret);
611  }
612 
613  /* For all discrete products with dedicated fans */
614  features[i] |= FEATURE_FAN_SPEED;
615  num_events++;
616 
617  /* For Tesla and Quadro products from Fermi and Kepler families. */
618  if ( isFermi ) {
619  features[i] |= FEATURE_MAX_CLOCK;
620  num_events += 3;
621  }
622 
623  /* For all products */
624  features[i] |= FEATURE_MEMORY_INFO;
625  num_events += 3; /* total, free, used */
626 
627  /* For Tesla and Quadro products from the Fermi and Kepler families. */
628  if ( isFermi ) {
629  features[i] |= FEATURE_PERF_STATES;
630  num_events++;
631  }
632 
633  /* For "GF11x" Tesla and Quadro products from the Fermi family
634  requires NVML_INFOROM_POWER 3.0 or higher
635  For Tesla and Quadro products from the Kepler family
636  does not require NVML_INFOROM_POWER */
637  /* Just try reading power, if it works, enable it*/
638  ret = (*nvmlDeviceGetPowerUsagePtr)( devices[i], &temp);
639  if ( NVML_SUCCESS == ret ) {
640  features[i] |= FEATURE_POWER;
641  num_events++;
642  } else {
643  SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on\
644 this card. (nvml return code %d)\n", ret );
645  }
646 
647  /* For all discrete and S-class products. */
648  features[i] |= FEATURE_TEMP;
649  num_events++;
650 
651  /* For Tesla and Quadro products from the Fermi and Kepler families */
652  if (isFermi) {
653  features[i] |= FEATURE_UTILIZATION;
654  num_events += 2;
655  }
656 
657  strncpy( names[i], name, sizeof(names[0])-1);
658  names[i][sizeof(names[0])-1] = '\0';
659  }
660  }
661  return PAPI_OK;
662 }
663 
664  static void
666 {
667  char name[64];
668  char sanitized_name[PAPI_MAX_STR_LEN];
669  char names[device_count][64];
670 
671  int i, nameLen = 0, j;
672  int isUnique = 1;
673 
675  nvmlReturn_t ret;
676 
677  nvml_native_table = (nvml_native_event_entry_t*) papi_malloc(
679  memset( nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events );
680  entry = &nvml_native_table[0];
681 
682  for (i=0; i < device_count; i++ ) {
683  memset( names[i], 0x0, 64 );
684  isUnique = 1;
685  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, sizeof(name)-1 );
686  name[sizeof(name)-1] = '\0'; // to safely use strlen operation below, the variable 'name' must be null terminated
687 
688  for (j=0; j < i; j++ )
689  {
690  if ( 0 == strncmp( name, names[j], 64 ) )
691  isUnique = 0;
692  }
693 
694  if ( isUnique ) {
695  nameLen = strlen(name);
696  strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
697  for (j=0; j < nameLen; j++)
698  if ( ' ' == sanitized_name[j] )
699  sanitized_name[j] = '_';
700 
701 
702 
703  if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
704  sprintf( entry->name, "%s:graphics_clock", sanitized_name );
705  strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
706  entry->options.clock = NVML_CLOCK_GRAPHICS;
707  entry->type = FEATURE_CLOCK_INFO;
708  entry++;
709 
710  sprintf( entry->name, "%s:sm_clock", sanitized_name);
711  strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
712  entry->options.clock = NVML_CLOCK_SM;
713  entry->type = FEATURE_CLOCK_INFO;
714  entry++;
715 
716  sprintf( entry->name, "%s:memory_clock", sanitized_name);
717  strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
718  entry->options.clock = NVML_CLOCK_MEM;
719  entry->type = FEATURE_CLOCK_INFO;
720  entry++;
721  }
722 
723  if ( HAS_FEATURE( features[i], FEATURE_ECC_LOCAL_ERRORS ) ) {
724  sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
725  strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
726  entry->options.ecc_opts = (struct local_ecc){
727  .bits = NVML_SINGLE_BIT_ECC,
728  .which_one = LOCAL_ECC_L1,
729  };
731  entry++;
732 
733  sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
734  strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
735  entry->options.ecc_opts = (struct local_ecc){
736  .bits = NVML_SINGLE_BIT_ECC,
737  .which_one = LOCAL_ECC_L2,
738  };
740  entry++;
741 
742  sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
743  strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
744  entry->options.ecc_opts = (struct local_ecc){
745  .bits = NVML_SINGLE_BIT_ECC,
746  .which_one = LOCAL_ECC_MEM,
747  };
749  entry++;
750 
751  sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
752  strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
753  entry->options.ecc_opts = (struct local_ecc){
754  .bits = NVML_SINGLE_BIT_ECC,
755  .which_one = LOCAL_ECC_REGFILE,
756  };
758  entry++;
759 
760  sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
761  strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
762  entry->options.ecc_opts = (struct local_ecc){
763  .bits = NVML_DOUBLE_BIT_ECC,
764  .which_one = LOCAL_ECC_L1,
765  };
767  entry++;
768 
769  sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
770  strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
771  entry->options.ecc_opts = (struct local_ecc){
772  .bits = NVML_DOUBLE_BIT_ECC,
773  .which_one = LOCAL_ECC_L2,
774  };
776  entry++;
777 
778  sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
779  strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
780  entry->options.ecc_opts = (struct local_ecc){
781  .bits = NVML_DOUBLE_BIT_ECC,
782  .which_one = LOCAL_ECC_MEM,
783  };
785  entry++;
786 
787  sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
788  strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
789  entry->options.ecc_opts = (struct local_ecc){
790  .bits = NVML_DOUBLE_BIT_ECC,
791  .which_one = LOCAL_ECC_REGFILE,
792  };
794  entry++;
795  }
796 
797  if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
798  sprintf( entry->name, "%s:fan_speed", sanitized_name);
799  strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
800  entry->type = FEATURE_FAN_SPEED;
801  entry++;
802  }
803 
804  if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
805  sprintf( entry->name, "%s:graphics_max_clock", sanitized_name);
806  strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
807  entry->options.clock = NVML_CLOCK_GRAPHICS;
808  entry->type = FEATURE_MAX_CLOCK;
809  entry++;
810 
811  sprintf( entry->name, "%s:sm_max_clock", sanitized_name);
812  strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
813  entry->options.clock = NVML_CLOCK_SM;
814  entry->type = FEATURE_MAX_CLOCK;
815  entry++;
816 
817  sprintf( entry->name, "%s:memory_max_clock", sanitized_name);
818  strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
819  entry->options.clock = NVML_CLOCK_MEM;
820  entry->type = FEATURE_MAX_CLOCK;
821  entry++;
822  }
823 
824  if ( HAS_FEATURE( features[i], FEATURE_MEMORY_INFO ) ) {
825  sprintf( entry->name, "%s:total_memory", sanitized_name);
826  strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
828  entry->type = FEATURE_MEMORY_INFO;
829  entry++;
830 
831  sprintf( entry->name, "%s:unallocated_memory", sanitized_name);
832  strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
834  entry->type = FEATURE_MEMORY_INFO;
835  entry++;
836 
837  sprintf( entry->name, "%s:allocated_memory", sanitized_name);
838  strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
840  entry->type = FEATURE_MEMORY_INFO;
841  entry++;
842  }
843 
844  if ( HAS_FEATURE( features[i], FEATURE_PERF_STATES ) ) {
845  sprintf( entry->name, "%s:pstate", sanitized_name);
846  strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
847  entry->type = FEATURE_PERF_STATES;
848  entry++;
849  }
850 
851  if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
852  sprintf( entry->name, "%s:power", sanitized_name);
853  // set the power event units value to "mW" for miliwatts
854  strncpy( entry->units, "mW",PAPI_MIN_STR_LEN);
855  strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw (+/-5 watts) for the entire board: GPU, memory, etc.", PAPI_MAX_STR_LEN);
856  entry->type = FEATURE_POWER;
857  entry++;
858  }
859 
860  if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
861  sprintf( entry->name, "%s:temperature", sanitized_name);
862  strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
863  entry->type = FEATURE_TEMP;
864  entry++;
865  }
866 
867  if ( HAS_FEATURE( features[i], FEATURE_ECC_TOTAL_ERRORS ) ) {
868  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
869  strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
870  entry->options.ecc_opts = (struct local_ecc){
871  .bits = NVML_SINGLE_BIT_ECC,
872  };
874  entry++;
875 
876  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
877  strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
878  entry->options.ecc_opts = (struct local_ecc){
879  .bits = NVML_DOUBLE_BIT_ECC,
880  };
882  entry++;
883  }
884 
885  if ( HAS_FEATURE( features[i], FEATURE_UTILIZATION ) ) {
886  sprintf( entry->name, "%s:gpu_utilization", sanitized_name);
887  strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
889  entry->type = FEATURE_UTILIZATION;
890  entry++;
891 
892  sprintf( entry->name, "%s:memory_utilization", sanitized_name);
893  strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
895  entry->type = FEATURE_UTILIZATION;
896  entry++;
897  }
898  strncpy( names[i], name, sizeof(names[0])-1);
899  names[i][sizeof(names[0])-1] = '\0';
900  }
901  }
902 }
903 
908  int
910 {
911  SUBDBG ("Entry: cidx: %d\n", cidx);
912  nvmlReturn_t ret;
913  cudaError_t cuerr;
914  int papi_errorcode;
915 
916  int cuda_count = 0;
917  unsigned int nvml_count = 0;
918 
919  /* link in the cuda and nvml libraries and resolve the symbols we need to use */
920  if (linkCudaLibraries() != PAPI_OK) {
921  SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
922  SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
923  return (PAPI_ENOSUPP);
924  }
925 
926  ret = (*nvmlInitPtr)();
927  if ( NVML_SUCCESS != ret ) {
928  strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
929  return PAPI_ENOSUPP;
930  }
931 
932  cuerr = (*cuInitPtr)( 0 );
933  if ( CUDA_SUCCESS != cuerr ) {
934  strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
935  return PAPI_ENOSUPP;
936  }
937 
938  /* Figure out the number of CUDA devices in the system */
939  ret = (*nvmlDeviceGetCountPtr)( &nvml_count );
940  if ( NVML_SUCCESS != ret ) {
941  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
942  return PAPI_ENOSUPP;
943  }
944 
945  cuerr = (*cudaGetDeviceCountPtr)( &cuda_count );
946  if ( CUDA_SUCCESS != cuerr ) {
947  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
948  return PAPI_ENOSUPP;
949  }
950 
951  /* We can probably recover from this, when we're clever */
952  if ( (cuda_count > 0) && (nvml_count != (unsigned int)cuda_count ) ) {
953  strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts.");
954  return PAPI_ENOSUPP;
955  }
956 
957  device_count = cuda_count;
958 
959  /* A per device representation of what events are present */
960  features = (int*)papi_malloc(sizeof(int) * device_count );
961 
962  /* Handles to each device */
963  devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
964 
965  /* Figure out what events are supported on each card. */
966  if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) {
967  papi_free(features);
968  papi_free(devices);
969  sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." );
970  return PAPI_ENOSUPP;
971  }
972 
973  /* The assumption is that if everything went swimmingly in detectDevices,
974  all nvml calls here should be fine. */
976 
977  /* Export the total number of events available */
978  _nvml_vector.cmp_info.num_native_events = num_events;
979 
980  /* Export the component id */
981  _nvml_vector.cmp_info.CmpIdx = cidx;
982 
983  /* Export the number of 'counters' */
984  _nvml_vector.cmp_info.num_cntrs = num_events;
985  _nvml_vector.cmp_info.num_mpx_cntrs = num_events;
986 
987  return PAPI_OK;
988 }
989 
990 
991 /*
992  * Link the necessary CUDA libraries to use the cuda component. If any of them can not be found, then
993  * the CUDA component will just be disabled. This is done at runtime so that a version of PAPI built
994  * with the CUDA component can be installed and used on systems which have the CUDA libraries installed
995  * and on systems where these libraries are not installed.
996  */
997 static int
999 {
1000  /* Attempt to guess if we were statically linked to libc, if so bail */
1001  if ( _dl_non_dynamic_init != NULL ) {
1002  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN);
1003  return PAPI_ENOSUPP;
1004  }
1005 
1006  /* Need to link in the cuda libraries, if not found disable the component */
1007  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
1008  if (!dl1)
1009  {
1010  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
1011  return ( PAPI_ENOSUPP );
1012  }
1013  cuInitPtr = dlsym(dl1, "cuInit");
1014  if (dlerror() != NULL)
1015  {
1016  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
1017  return ( PAPI_ENOSUPP );
1018  }
1019 
1020  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
1021  if (!dl2)
1022  {
1023  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
1024  return ( PAPI_ENOSUPP );
1025  }
1026  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
1027  if (dlerror() != NULL)
1028  {
1029  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
1030  return ( PAPI_ENOSUPP );
1031  }
1032  cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount");
1033  if (dlerror() != NULL)
1034  {
1035  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.",PAPI_MAX_STR_LEN);
1036  return ( PAPI_ENOSUPP );
1037  }
1038  cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId");
1039  if (dlerror() != NULL)
1040  {
1041  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.",PAPI_MAX_STR_LEN);
1042  return ( PAPI_ENOSUPP );
1043  }
1044 
1045  dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
1046  if (!dl3)
1047  {
1048  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.",PAPI_MAX_STR_LEN);
1049  return ( PAPI_ENOSUPP );
1050  }
1051  nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo");
1052  if (dlerror() != NULL)
1053  {
1054  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.",PAPI_MAX_STR_LEN);
1055  return ( PAPI_ENOSUPP );
1056  }
1057  nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString");
1058  if (dlerror() != NULL)
1059  {
1060  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.",PAPI_MAX_STR_LEN);
1061  return ( PAPI_ENOSUPP );
1062  }
1063  nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors");
1064  if (dlerror() != NULL)
1065  {
1066  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.",PAPI_MAX_STR_LEN);
1067  return ( PAPI_ENOSUPP );
1068  }
1069  nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed");
1070  if (dlerror() != NULL)
1071  {
1072  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.",PAPI_MAX_STR_LEN);
1073  return ( PAPI_ENOSUPP );
1074  }
1075  nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo");
1076  if (dlerror() != NULL)
1077  {
1078  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.",PAPI_MAX_STR_LEN);
1079  return ( PAPI_ENOSUPP );
1080  }
1081  nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState");
1082  if (dlerror() != NULL)
1083  {
1084  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.",PAPI_MAX_STR_LEN);
1085  return ( PAPI_ENOSUPP );
1086  }
1087  nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage");
1088  if (dlerror() != NULL)
1089  {
1090  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.",PAPI_MAX_STR_LEN);
1091  return ( PAPI_ENOSUPP );
1092  }
1093  nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature");
1094  if (dlerror() != NULL)
1095  {
1096  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.",PAPI_MAX_STR_LEN);
1097  return ( PAPI_ENOSUPP );
1098  }
1099  nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors");
1100  if (dlerror() != NULL)
1101  {
1102  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.",PAPI_MAX_STR_LEN);
1103  return ( PAPI_ENOSUPP );
1104  }
1105  nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates");
1106  if (dlerror() != NULL)
1107  {
1108  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.",PAPI_MAX_STR_LEN);
1109  return ( PAPI_ENOSUPP );
1110  }
1111  nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex");
1112  if (dlerror() != NULL)
1113  {
1114  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.",PAPI_MAX_STR_LEN);
1115  return ( PAPI_ENOSUPP );
1116  }
1117  nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo");
1118  if (dlerror() != NULL)
1119  {
1120  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.",PAPI_MAX_STR_LEN);
1121  return ( PAPI_ENOSUPP );
1122  }
1123  nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName");
1124  if (dlerror() != NULL)
1125  {
1126  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.",PAPI_MAX_STR_LEN);
1127  return ( PAPI_ENOSUPP );
1128  }
1129  nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion");
1130  if (dlerror() != NULL)
1131  {
1132  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.",PAPI_MAX_STR_LEN);
1133  return ( PAPI_ENOSUPP );
1134  }
1135  nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode");
1136  if (dlerror() != NULL)
1137  {
1138  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.",PAPI_MAX_STR_LEN);
1139  return ( PAPI_ENOSUPP );
1140  }
1141  nvmlInitPtr = dlsym(dl3, "nvmlInit");
1142  if (dlerror() != NULL)
1143  {
1144  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.",PAPI_MAX_STR_LEN);
1145  return ( PAPI_ENOSUPP );
1146  }
1147  nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount");
1148  if (dlerror() != NULL)
1149  {
1150  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.",PAPI_MAX_STR_LEN);
1151  return ( PAPI_ENOSUPP );
1152  }
1153  nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown");
1154  if (dlerror() != NULL)
1155  {
1156  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.",PAPI_MAX_STR_LEN);
1157  return ( PAPI_ENOSUPP );
1158  }
1159 
1160  return ( PAPI_OK );
1161 }
1162 
1163 
1169  int
1171 {
1172  SUBDBG( "nvml_init_control_state... %p\n", ctl );
1173  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1174  memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) );
1175 
1176  return PAPI_OK;
1177 }
1178 
1179 
1181  int
1184  int count,
1185  hwd_context_t *ctx )
1186 {
1187  SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx );
1188  int i, index;
1189 
1190  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1191  (void) ctx;
1192 
1193 
1194  /* if no events, return */
1195  if (count==0) return PAPI_OK;
1196 
1197  for( i = 0; i < count; i++ ) {
1198  index = native[i].ni_event;
1199  nvml_ctl->which_counter[i]=index;
1200  /* We have no constraints on event position, so any event */
1201  /* can be in any slot. */
1202  native[i].ni_position = i;
1203  }
1204  nvml_ctl->num_events=count;
1205  return PAPI_OK;
1206 }
1208  int
1210 {
1211  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1212 
1213  (void) ctx;
1214  (void) ctl;
1215 
1216  /* anything that would need to be set at counter start time */
1217 
1218  /* reset */
1219  /* start the counting */
1220 
1221  return PAPI_OK;
1222 }
1223 
1224 
1226  int
1228 {
1229  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1230 
1231  int i;
1232  (void) ctx;
1233  (void) ctl;
1234  int ret;
1235 
1236  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1237 
1238  for (i=0;i<nvml_ctl->num_events;i++) {
1239  if ( PAPI_OK !=
1240  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1241  nvml_ctl->which_counter[i]) ))
1242  return ret;
1243 
1244  }
1245 
1246  return PAPI_OK;
1247 }
1248 
1249 
1251  int
1253  long long **events, int flags )
1254 {
1255  SUBDBG( "Enter: ctx: %p, flags: %d\n", ctx, flags );
1256 
1257  (void) ctx;
1258  (void) flags;
1259  int i;
1260  int ret;
1261  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1262 
1263 
1264  for (i=0;i<nvml_ctl->num_events;i++) {
1265  if ( PAPI_OK !=
1266  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1267  nvml_ctl->which_counter[i]) ))
1268  return ret;
1269 
1270  }
1271  /* return pointer to the values we read */
1272  *events = nvml_ctl->counter;
1273  return PAPI_OK;
1274 }
1275 
1277 /* otherwise, the updated state is written to ESI->hw_start */
1278  int
1280  long long *events )
1281 {
1282  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1283 
1284  (void) ctx;
1285  (void) ctl;
1286  (void) events;
1287 
1288 
1289  /* You can change ECC mode and compute exclusivity modes on the cards */
1290  /* But I don't see this as a function of a PAPI component at this time */
1291  /* All implementation issues aside. */
1292  return PAPI_OK;
1293 }
1294 
1295 
1297 /* If the eventset is not currently running, then the saved value in the */
1298 /* EventSet is set to zero without calling this routine. */
1299  int
1301 {
1302  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1303 
1304  (void) ctx;
1305  (void) ctl;
1306 
1307  /* Reset the hardware */
1309 
1310  return PAPI_OK;
1311 }
1312 
1314  int
1316 {
1317  SUBDBG( "Enter:\n" );
1318 
1319  if (nvml_native_table != NULL)
1320  papi_free(nvml_native_table);
1321  if (devices != NULL)
1322  papi_free(devices);
1323  if (features != NULL)
1324  papi_free(features);
1325 
1326  (*nvmlShutdownPtr)();
1327 
1328  device_count = 0;
1329  num_events = 0;
1330 
1331  // close the dynamic libraries needed by this component (opened in the init component call)
1332  dlclose(dl1);
1333  dlclose(dl2);
1334  dlclose(dl3);
1335 
1336  return PAPI_OK;
1337 }
1338 
1340  int
1342 {
1343  SUBDBG( "Enter: ctx: %p\n", ctx );
1344 
1345  (void) ctx;
1346 
1347  /* Last chance to clean up thread */
1348 
1349  return PAPI_OK;
1350 }
1351 
1352 
1353 
1357  int
1359 {
1360  SUBDBG( "Enter: ctx: %p, code: %d\n", ctx, code );
1361 
1362  (void) ctx;
1363  (void) code;
1364  (void) option;
1365 
1366 
1367  /* FIXME. This should maybe set up more state, such as which counters are active and */
1368  /* counter mappings. */
1369 
1370  return PAPI_OK;
1371 }
1372 
1382  int
1384 {
1385  SUBDBG( "Enter: cntrl: %p, domain: %d\n", cntrl, domain );
1386 
1387  (void) cntrl;
1388 
1389  int found = 0;
1390 
1391  if ( PAPI_DOM_USER & domain ) {
1392  SUBDBG( " PAPI_DOM_USER \n" );
1393  found = 1;
1394  }
1395  if ( PAPI_DOM_KERNEL & domain ) {
1396  SUBDBG( " PAPI_DOM_KERNEL \n" );
1397  found = 1;
1398  }
1399  if ( PAPI_DOM_OTHER & domain ) {
1400  SUBDBG( " PAPI_DOM_OTHER \n" );
1401  found = 1;
1402  }
1403  if ( PAPI_DOM_ALL & domain ) {
1404  SUBDBG( " PAPI_DOM_ALL \n" );
1405  found = 1;
1406  }
1407  if ( !found )
1408  return ( PAPI_EINVAL );
1409 
1410  return PAPI_OK;
1411 }
1412 
1413 
1414 /**************************************************************/
1415 /* Naming functions, used to translate event numbers to names */
1416 /**************************************************************/
1417 
1418 
1425  int
1426 _papi_nvml_ntv_enum_events( unsigned int *EventCode, int modifier )
1427 {
1428  int index;
1429 
1430  switch ( modifier ) {
1431 
1432  /* return EventCode of first event */
1433  case PAPI_ENUM_FIRST:
1434  /* return the first event that we support */
1435 
1436  *EventCode = 0;
1437  return PAPI_OK;
1438 
1439  /* return EventCode of next available event */
1440  case PAPI_ENUM_EVENTS:
1441  index = *EventCode;
1442 
1443  /* Make sure we are in range */
1444  if ( index < num_events - 1 ) {
1445 
1446  /* This assumes a non-sparse mapping of the events */
1447  *EventCode = *EventCode + 1;
1448  return PAPI_OK;
1449  } else {
1450  return PAPI_ENOEVNT;
1451  }
1452  break;
1453 
1454  default:
1455  return PAPI_EINVAL;
1456  }
1457 
1458  return PAPI_EINVAL;
1459 }
1460 
1466  int
1467 _papi_nvml_ntv_code_to_name( unsigned int EventCode, char *name, int len )
1468 {
1469  SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len);
1470  int index;
1471 
1472  index = EventCode;
1473 
1474  /* Make sure we are in range */
1475  if (index >= num_events) return PAPI_ENOEVNT;
1476 
1477  strncpy( name, nvml_native_table[index].name, len );
1478 
1479  return PAPI_OK;
1480 }
1481 
1487  int
1488 _papi_nvml_ntv_code_to_descr( unsigned int EventCode, char *descr, int len )
1489 {
1490  int index;
1491  index = EventCode;
1492 
1493  if (index >= num_events) return PAPI_ENOEVNT;
1494 
1495  strncpy( descr, nvml_native_table[index].description, len );
1496 
1497  return PAPI_OK;
1498 }
1499 
1504 int
1505 _papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
1506 {
1507 
1508  int index = EventCode;
1509 
1510  if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT;
1511 
1512  strncpy( info->symbol, nvml_native_table[index].name, sizeof(info->symbol)-1);
1513  info->symbol[sizeof(info->symbol)-1] = '\0';
1514 
1515  strncpy( info->units, nvml_native_table[index].units, sizeof(info->units)-1);
1516  info->units[sizeof(info->units)-1] = '\0';
1517 
1518  strncpy( info->long_descr, nvml_native_table[index].description, sizeof(info->long_descr)-1);
1519  info->long_descr[sizeof(info->long_descr)-1] = '\0';
1520 
1521 // info->data_type = nvml_native_table[index].return_type;
1522 
1523  return PAPI_OK;
1524 }
1525 
1527 papi_vector_t _nvml_vector = {
1528  .cmp_info = {
1529  /* default component information */
1530  /* (unspecified values are initialized to 0) */
1531 
1532  .name = "nvml",
1533  .short_name="nvml",
1534  .version = "1.0",
1535  .description = "NVML provides the API for monitoring NVIDIA hardware (power usage, temperature, fan speed, etc)",
1536  .support_version = "n/a",
1537  .kernel_version = "n/a",
1538 
1539  .num_preset_events = 0,
1540  .num_native_events = 0, /* set by init_component */
1541  .default_domain = PAPI_DOM_USER,
1542  .available_domains = PAPI_DOM_USER,
1543  .default_granularity = PAPI_GRN_THR,
1544  .available_granularities = PAPI_GRN_THR,
1545  .hardware_intr_sig = PAPI_INT_SIGNAL,
1546 
1547 
1548  /* component specific cmp_info initializations */
1549  .hardware_intr = 0,
1550  .precise_intr = 0,
1551  .posix1b_timers = 0,
1552  .kernel_profile = 0,
1553  .kernel_multiplex = 0,
1554  .fast_counter_read = 0,
1555  .fast_real_timer = 0,
1556  .fast_virtual_timer = 0,
1557  .attach = 0,
1558  .attach_must_ptrace = 0,
1559  .cntr_umasks = 0,
1560  .cpu = 0,
1561  .inherit = 0,
1562  },
1563 
1564  /* sizes of framework-opaque component-private structures */
1565  .size = {
1566  .context = sizeof ( nvml_context_t ),
1567  .control_state = sizeof ( nvml_control_state_t ),
1568  .reg_value = sizeof ( nvml_register_t ),
1569  // .reg_alloc = sizeof ( nvml_reg_alloc_t ),
1570  },
1571 
1572  /* function pointers */
1573 
1574  /* Used for general PAPI interactions */
1576  .stop = _papi_nvml_stop,
1577  .read = _papi_nvml_read,
1578  .reset = _papi_nvml_reset,
1580  .init_component = _papi_nvml_init_component,
1581  .init_thread = _papi_nvml_init_thread,
1582  .init_control_state = _papi_nvml_init_control_state,
1583  .update_control_state = _papi_nvml_update_control_state,
1584  .ctl = _papi_nvml_ctl,
1585  .shutdown_thread = _papi_nvml_shutdown_thread,
1586  .shutdown_component = _papi_nvml_shutdown_component,
1588  .cleanup_eventset = NULL,
1589  /* called in add_native_events() */
1590  .allocate_registers = NULL,
1591 
1592  /* Used for overflow/profiling */
1593  .dispatch_timer = NULL,
1594  .get_overflow_address = NULL,
1595  .stop_profiling = NULL,
1596  .set_overflow = NULL,
1597  .set_profile = NULL,
1598 
1599  /* Name Mapping Functions */
1600  .ntv_enum_events = _papi_nvml_ntv_enum_events,
1601  .ntv_name_to_code = NULL,
1602  .ntv_code_to_name = _papi_nvml_ntv_code_to_name,
1603  .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr,
1604  .ntv_code_to_info = _papi_nvml_ntv_code_to_info,
1605 
1606 };
1607 
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:626
#define PAPI_ENOEVNT
Definition: papi.h:258
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
ssize_t read(int fd, void *buf, size_t count)
Definition: appio.c:225
memset(eventId, 0, size)
unsigned long long getPState(nvmlDevice_t dev)
Definition: linux-nvml.c:252
int _papi_nvml_ntv_code_to_name(unsigned int EventCode, char *name, int len)
Definition: linux-nvml.c:1467
long long flags
Definition: iozone.c:12330
static int linkCudaLibraries()
Definition: linux-nvml.c:998
#define papi_free(a)
Definition: papi_memory.h:35
unsigned long long getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
Definition: linux-nvml.c:338
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
start
Definition: iozone.c:22736
int type
Definition: linux-nvml.h:51
unsigned long long getPowerUsage(nvmlDevice_t dev)
Definition: linux-nvml.c:308
#define papi_malloc(a)
Definition: papi_memory.h:34
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:19
#define PAPI_ENOSUPP
Definition: papi.h:269
void * get_overflow_address(void *context)
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
#define DECLDIR
static int num_events
struct local_ecc ecc_opts
Definition: linux-nvml.h:41
int _papi_nvml_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: linux-nvml.c:1252
int _papi_nvml_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1227
#define PAPI_DOM_KERNEL
Definition: papi.h:298
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:966
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
int _papi_nvml_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events)
Definition: linux-nvml.c:1279
char symbol[PAPI_HUGE_STR_LEN]
Definition: papi.h:963
#define PAPI_DOM_ALL
Definition: papi.h:301
unsigned long long getMemoryInfo(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:228
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
struct cache_ent * entry
Definition: libasync.c:1170
nvmlEccBitType_t bits
Definition: linux-nvml.h:35
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:26
#define PAPI_DOM_USER
Definition: papi.h:296
#define NVML_MAX_COUNTERS
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:48
int _papi_nvml_shutdown_component()
Definition: linux-nvml.c:1315
void double value
Definition: iozone.c:18781
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
Return codes and api definitions.
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1527
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:21
int _papi_nvml_init_thread(hwd_context_t *ctx)
Definition: linux-nvml.c:469
char events[MAX_EVENTS][BUFSIZ]
nvml_control_state_t
Definition: linux-nvml.c:135
int _papi_nvml_shutdown_thread(hwd_context_t *ctx)
Definition: linux-nvml.c:1341
int _papi_nvml_ntv_enum_events(unsigned int *EventCode, int modifier)
Definition: linux-nvml.c:1426
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
nvml_resource_options_t options
Definition: linux-nvml.h:47
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:633
int nvml_register_t
Definition: linux-nvml.h:32
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:41
int i
Definition: fileop.c:140
Definition: linux-nvml.h:45
ssize_t write(int fd, const void *buf, size_t count)
Definition: appio.c:298
static int device_count
Definition: linux-nvml.c:147
static int set_domain(hwd_control_state_t *cntrl, unsigned int domain)
unsigned long long getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
Definition: linux-nvml.c:170
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:29
long long found
Definition: libasync.c:735
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
static int cidx
Definition: event_info.c:40
int _papi_nvml_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1209
#define PAPI_ESYS
Definition: papi.h:253
static int native
Definition: event_info.c:39
nvmlClockType_t clock
Definition: linux-nvml.h:40
#define FEATURE_TEMP
Definition: linux-nvml.h:13
__attribute__((constructor))
Definition: init_fini.c:12
stop
Definition: iozone.c:22741
int _papi_nvml_ntv_code_to_descr(unsigned int EventCode, char *descr, int len)
Definition: linux-nvml.c:1488
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
long long
Definition: iozone.c:19827
#define CUDAAPI
static int * features
Definition: linux-nvml.c:153
int _papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
Definition: linux-nvml.c:1505
#define PAPI_INT_SIGNAL
Definition: papi_internal.h:53
nvmlDevice_t handle
Definition: linux-nvml.c:399
int _papi_nvml_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: linux-nvml.c:1182
#define PAPI_GRN_THR
Definition: papi.h:360
#define FEATURE_POWER
Definition: linux-nvml.h:12
char units[PAPI_MIN_STR_LEN]
Definition: linux-nvml.h:49
strcpy(filename, default_filename)
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:50
static void nvml_hardware_reset()
Definition: linux-nvml.c:380
unsigned long long getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:156
int cudaIdx
Definition: linux-nvml.c:400
char * name
Definition: iozone.c:23648
unsigned long long getTemperature(nvmlDevice_t dev)
Definition: linux-nvml.c:323
int
Definition: iozone.c:18528
#define PAPI_MIN_STR_LEN
Definition: papi.h:462
int temp
Definition: iozone.c:22158
static int detectDevices()
Definition: linux-nvml.c:479
child_idents[x-1] state
Definition: iozone.c:21341
unsigned long long getFanSpeed(nvmlDevice_t dev)
Definition: linux-nvml.c:198
#define LOCAL_ECC_L1
Definition: linux-nvml.h:24
#define HAS_FEATURE(features, query)
Definition: linux-nvml.h:17
int _papi_nvml_set_domain(hwd_control_state_t *cntrl, int domain)
Definition: linux-nvml.c:1383
#define CUDARTAPI
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
#define PAPI_DOM_OTHER
Definition: papi.h:299
#define GPU_UTILIZATION
Definition: linux-nvml.h:28
int which_one
Definition: linux-nvml.h:36
unsigned long long getUtilization(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:356
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:23
int _papi_nvml_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: linux-nvml.c:1358
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
#define LOCAL_ECC_L2
Definition: linux-nvml.h:25
static void createNativeEvents()
Definition: linux-nvml.c:665
const char * names[NUM_EVENTS]
char units[PAPI_MIN_STR_LEN]
Definition: papi.h:972
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:20
int _papi_nvml_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1300
int _papi_nvml_init_control_state(hwd_control_state_t *ctl)
Definition: linux-nvml.c:1170
unsigned long long getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:213
int _papi_nvml_init_component(int cidx)
Definition: linux-nvml.c:909
nvml_control_state_t state
Definition: linux-nvml.c:140