PAPI  5.6.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-nvml.c
Go to the documentation of this file.
1 /****************************
2 THIS IS OPEN SOURCE CODE
3 
4 Part of the PAPI software library. Copyright (c) 2005 - 2017,
5 Innovative Computing Laboratory, Dept of Electrical Engineering &
6 Computer Science University of Tennessee, Knoxville, TN.
7 
8 The open source software license conforms to the 2-clause BSD License
9 template.
10 
11 ****************************/
12 
29 #include <dlfcn.h>
30 
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include <inttypes.h>
35 #include <string.h>
36 /* Headers required by PAPI */
37 #include "papi.h"
38 #include "papi_internal.h"
39 #include "papi_vector.h"
40 #include "papi_memory.h"
41 
42 #include "linux-nvml.h"
43 
44 #include "nvml.h"
45 #include "cuda.h"
46 #include "cuda_runtime_api.h"
47 
49 
50 /***** CHANGE PROTOTYPES TO DECLARE CUDA AND NVML LIBRARY SYMBOLS AS WEAK *****
51  * This is done so that a version of PAPI built with the nvml component can *
52  * be installed on a system which does not have the cuda libraries installed. *
53  * *
54  * If this is done without these prototypes, then all papi services on the *
55  * system without the cuda libraries installed will fail. The PAPI libraries *
56  * contain references to the cuda libraries which are not installed. The *
57  * load of PAPI commands fails because the cuda library references can not be *
58  * resolved. *
59  * *
60  * This also defines pointers to the cuda library functions that we call. *
61  * These function pointers will be resolved with dlopen/dlsym calls at *
62  * component initialization time. The component then calls the cuda library *
63  * functions through these function pointers. *
64  ********************************************************************************/
65 #undef CUDAAPI
66 #define CUDAAPI __attribute__((weak))
67 CUresult CUDAAPI cuInit(unsigned int);
68 
69 CUresult(*cuInitPtr)(unsigned int);
70 
71 #undef CUDARTAPI
72 #define CUDARTAPI __attribute__((weak))
73 cudaError_t CUDARTAPI cudaGetDevice(int *);
74 cudaError_t CUDARTAPI cudaGetDeviceCount(int *);
75 cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int);
76 
77 cudaError_t (*cudaGetDevicePtr)(int *);
78 cudaError_t (*cudaGetDeviceCountPtr)(int *);
79 cudaError_t (*cudaDeviceGetPCIBusIdPtr)(char *, int, int);
80 
81 #undef DECLDIR
82 #define DECLDIR __attribute__((weak))
83 nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t, nvmlClockType_t, unsigned int *);
84 const char* DECLDIR nvmlErrorString(nvmlReturn_t);
85 nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
86 nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t, unsigned int *);
87 nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t, nvmlMemory_t *);
88 nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState(nvmlDevice_t, nvmlPstates_t *);
89 nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t, unsigned int *);
90 nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
91 nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
92 nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates(nvmlDevice_t, nvmlUtilization_t *);
93 nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex(unsigned int, nvmlDevice_t *);
94 nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo(nvmlDevice_t, nvmlPciInfo_t *);
95 nvmlReturn_t DECLDIR nvmlDeviceGetName(nvmlDevice_t, char *, unsigned int);
96 nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
97 nvmlReturn_t DECLDIR nvmlDeviceGetEccMode(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
98 nvmlReturn_t DECLDIR nvmlInit(void);
99 nvmlReturn_t DECLDIR nvmlDeviceGetCount(unsigned int *);
100 nvmlReturn_t DECLDIR nvmlShutdown(void);
101 nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int* limit);
102 nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit);
103 nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device, unsigned int* minLimit, unsigned int* maxLimit);
104 
105 nvmlReturn_t (*nvmlDeviceGetClockInfoPtr)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
106 char* (*nvmlErrorStringPtr)(nvmlReturn_t);
107 nvmlReturn_t (*nvmlDeviceGetDetailedEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
108 nvmlReturn_t (*nvmlDeviceGetFanSpeedPtr)(nvmlDevice_t, unsigned int *);
109 nvmlReturn_t (*nvmlDeviceGetMemoryInfoPtr)(nvmlDevice_t, nvmlMemory_t *);
110 nvmlReturn_t (*nvmlDeviceGetPerformanceStatePtr)(nvmlDevice_t, nvmlPstates_t *);
111 nvmlReturn_t (*nvmlDeviceGetPowerUsagePtr)(nvmlDevice_t, unsigned int *);
112 nvmlReturn_t (*nvmlDeviceGetTemperaturePtr)(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
113 nvmlReturn_t (*nvmlDeviceGetTotalEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
114 nvmlReturn_t (*nvmlDeviceGetUtilizationRatesPtr)(nvmlDevice_t, nvmlUtilization_t *);
115 nvmlReturn_t (*nvmlDeviceGetHandleByIndexPtr)(unsigned int, nvmlDevice_t *);
116 nvmlReturn_t (*nvmlDeviceGetPciInfoPtr)(nvmlDevice_t, nvmlPciInfo_t *);
117 nvmlReturn_t (*nvmlDeviceGetNamePtr)(nvmlDevice_t, char *, unsigned int);
118 nvmlReturn_t (*nvmlDeviceGetInforomVersionPtr)(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
119 nvmlReturn_t (*nvmlDeviceGetEccModePtr)(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
120 nvmlReturn_t (*nvmlInitPtr)(void);
121 nvmlReturn_t (*nvmlDeviceGetCountPtr)(unsigned int *);
122 nvmlReturn_t (*nvmlShutdownPtr)(void);
123 nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int* limit);
124 nvmlReturn_t (*nvmlDeviceSetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int limit);
125 nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitConstraintsPtr)(nvmlDevice_t device, unsigned int* minLimit, unsigned int* maxLimit);
126 
127 // file handles used to access cuda libraries with dlopen
128 static void* dl1 = NULL;
129 static void* dl2 = NULL;
130 static void* dl3 = NULL;
131 
132 static int linkCudaLibraries();
133 
134 /* Declare our vector in advance */
136 
137 /* upto 25 events per card how many cards per system should we allow for?! */
138 #define NVML_MAX_COUNTERS 100
139 
143 typedef struct nvml_control_state {
144  int num_events;
145  int which_counter[NVML_MAX_COUNTERS];
146  long long counter[NVML_MAX_COUNTERS];
148 
150 typedef struct nvml_context {
153 
156 
158 static int device_count = 0;
159 
161 static int num_events = 0;
162 
163 static nvmlDevice_t* devices = NULL;
164 static int* features = NULL;
165 static unsigned int *power_management_initial_limit = NULL;
166 static unsigned int *power_management_limit_constraint_min = NULL;
167 static unsigned int *power_management_limit_constraint_max = NULL;
168 
169 unsigned long long
170 getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
171 {
172  unsigned int ret = 0;
173  nvmlReturn_t bad;
174  bad = (*nvmlDeviceGetClockInfoPtr)(dev, which_one, &ret);
175 
176  if (NVML_SUCCESS != bad) {
177  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
178  }
179 
180  return (unsigned long long)ret;
181 }
182 
183 unsigned long long
184 getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
185 {
186  nvmlEccErrorCounts_t counts;
187 
188  nvmlReturn_t bad;
189  bad = (*nvmlDeviceGetDetailedEccErrorsPtr)(dev, bits, NVML_VOLATILE_ECC , &counts);
190 
191  if (NVML_SUCCESS != bad) {
192  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
193  }
194  switch (which_one) {
195  case LOCAL_ECC_REGFILE:
196  return counts.registerFile;
197  case LOCAL_ECC_L1:
198  return counts.l1Cache;
199  case LOCAL_ECC_L2:
200  return counts.l2Cache;
201  case LOCAL_ECC_MEM:
202  return counts.deviceMemory;
203  default:
204  ;
205  }
206  return (unsigned long long) - 1;
207 }
208 
209 unsigned long long
210 getFanSpeed(nvmlDevice_t dev)
211 {
212  unsigned int ret = 0;
213  nvmlReturn_t bad;
214  bad = (*nvmlDeviceGetFanSpeedPtr)(dev, &ret);
215 
216  if (NVML_SUCCESS != bad) {
217  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
218  }
219  return (unsigned long long)ret;
220 }
221 
222 unsigned long long
223 getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
224 {
225  unsigned int ret = 0;
226  nvmlReturn_t bad;
227  bad = (*nvmlDeviceGetClockInfoPtr)(dev, which_one, &ret);
228 
229  if (NVML_SUCCESS != bad) {
230  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
231  }
232  return (unsigned long long) ret;
233 }
234 
235 unsigned long long
236 getMemoryInfo(nvmlDevice_t dev, int which_one)
237 {
238  nvmlMemory_t meminfo;
239  nvmlReturn_t bad;
240  bad = (*nvmlDeviceGetMemoryInfoPtr)(dev, &meminfo);
241 
242  if (NVML_SUCCESS != bad) {
243  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
244  }
245 
246  switch (which_one) {
248  return meminfo.total;
249  case MEMINFO_UNALLOCED:
250  return meminfo.free;
251  case MEMINFO_ALLOCED:
252  return meminfo.used;
253  default:
254  ;
255  }
256  return (unsigned long long) - 1;
257 }
258 
259 unsigned long long
260 getPState(nvmlDevice_t dev)
261 {
262  unsigned int ret = 0;
263  nvmlPstates_t state = NVML_PSTATE_15;
264  nvmlReturn_t bad;
265  bad = (*nvmlDeviceGetPerformanceStatePtr)(dev, &state);
266 
267  if (NVML_SUCCESS != bad) {
268  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
269  }
270  switch (state) {
271  case NVML_PSTATE_15:
272  ret++;
273  case NVML_PSTATE_14:
274  ret++;
275  case NVML_PSTATE_13:
276  ret++;
277  case NVML_PSTATE_12:
278  ret++;
279  case NVML_PSTATE_11:
280  ret++;
281  case NVML_PSTATE_10:
282  ret++;
283  case NVML_PSTATE_9:
284  ret++;
285  case NVML_PSTATE_8:
286  ret++;
287  case NVML_PSTATE_7:
288  ret++;
289  case NVML_PSTATE_6:
290  ret++;
291  case NVML_PSTATE_5:
292  ret++;
293  case NVML_PSTATE_4:
294  ret++;
295  case NVML_PSTATE_3:
296  ret++;
297  case NVML_PSTATE_2:
298  ret++;
299  case NVML_PSTATE_1:
300  ret++;
301  case NVML_PSTATE_0:
302  break;
303  case NVML_PSTATE_UNKNOWN:
304  default:
305  /* This should never happen?
306  * The API docs just state Unknown performance state... */
307  return (unsigned long long) - 1;
308  }
309  return (unsigned long long)ret;
310 }
311 
312 unsigned long long
313 getPowerUsage(nvmlDevice_t dev)
314 {
315  unsigned int power;
316  nvmlReturn_t bad;
317  bad = (*nvmlDeviceGetPowerUsagePtr)(dev, &power);
318 
319  if (NVML_SUCCESS != bad) {
320  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
321  }
322  return (unsigned long long) power;
323 }
324 
325 unsigned long long
326 getTemperature(nvmlDevice_t dev)
327 {
328  unsigned int ret = 0;
329  nvmlReturn_t bad;
330  bad = (*nvmlDeviceGetTemperaturePtr)(dev, NVML_TEMPERATURE_GPU, &ret);
331 
332  if (NVML_SUCCESS != bad) {
333  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
334  }
335  return (unsigned long long)ret;
336 }
337 
338 unsigned long long
339 getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
340 {
341  unsigned long long counts = 0;
342  nvmlReturn_t bad;
343  bad = (*nvmlDeviceGetTotalEccErrorsPtr)(dev, bits, NVML_VOLATILE_ECC , &counts);
344 
345  if (NVML_SUCCESS != bad) {
346  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
347  }
348  return counts;
349 }
350 
351 /* 0 => gpu util
352  1 => memory util
353  */
354 unsigned long long
355 getUtilization(nvmlDevice_t dev, int which_one)
356 {
357  nvmlUtilization_t util;
358  nvmlReturn_t bad;
359  bad = (*nvmlDeviceGetUtilizationRatesPtr)(dev, &util);
360 
361  if (NVML_SUCCESS != bad) {
362  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
363  }
364 
365  switch (which_one) {
366  case GPU_UTILIZATION:
367  return (unsigned long long) util.gpu;
368  case MEMORY_UTILIZATION:
369  return (unsigned long long) util.memory;
370  default:
371  ;
372  }
373 
374  return (unsigned long long) - 1;
375 }
376 
377 unsigned long long getPowerManagementLimit(nvmlDevice_t dev)
378 {
379  unsigned int limit;
380  nvmlReturn_t rv;
381  rv = (*nvmlDeviceGetPowerManagementLimitPtr)(dev, &limit);
382  if (NVML_SUCCESS != rv) {
383  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(rv));
384  return (unsigned long long) 0;
385  }
386  return (unsigned long long) limit;
387 }
388 
389 static void
391 {
392  /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while
393  * possible to implement a reset on the ECC counters, we pass */
394  /*
395  for ( i=0; i < device_count; i++ )
396  nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC );
397  */
398  int i;
399  nvmlReturn_t ret;
400  unsigned int templimit = 0;
401  for (i = 0; i < device_count; i++) {
402  if (HAS_FEATURE(features[i], FEATURE_POWER_MANAGEMENT)) {
403  // if power management is available
404  if (power_management_initial_limit[i] != 0) {
405  ret = (*nvmlDeviceGetPowerManagementLimitPtr)(devices[i], &templimit);
406  if ((ret == NVML_SUCCESS) && (templimit != power_management_initial_limit[i])) {
407  SUBDBG("Reset power_management_limit on device %d to initial value of %d \n", i, power_management_initial_limit[i]);
408  // if power is not at its initial value
409  // reset to initial value
410  ret = (*nvmlDeviceSetPowerManagementLimitPtr)(devices[i], power_management_initial_limit[i]);
411  if (ret != NVML_SUCCESS)
412  SUBDBG("Unable to reset the NVML power management limit on device %i to %ull (return code %d) \n", i, power_management_initial_limit[i] , ret);
413  }
414  }
415  }
416  }
417 }
418 
420 /* You might replace this with code that accesses */
421 /* hardware or reads values from the operatings system. */
422 static int
423 nvml_hardware_read(long long *value, int which_one)
424 //, nvml_context_t *ctx)
425 {
427  nvmlDevice_t handle;
428  int cudaIdx = -1;
429 
430  entry = &nvml_native_table[which_one];
431  *value = (long long) - 1;
432  /* replace entry->resources with the current cuda_device->nvml device */
433  (*cudaGetDevicePtr)(&cudaIdx);
434 
435  if (cudaIdx < 0 || cudaIdx > device_count)
436  return PAPI_EINVAL;
437 
438  /* Make sure the device we are running on has the requested event */
439  if (!HAS_FEATURE(features[cudaIdx] , entry->type))
440  return PAPI_EINVAL;
441 
442  handle = devices[cudaIdx];
443 
444  switch (entry->type) {
445  case FEATURE_CLOCK_INFO:
446  *value = getClockSpeed(handle, (nvmlClockType_t)entry->options.clock);
447  break;
449  *value = getEccLocalErrors(handle,
450  (nvmlEccBitType_t)entry->options.ecc_opts.bits,
451  (int)entry->options.ecc_opts.which_one);
452  break;
453  case FEATURE_FAN_SPEED:
454  *value = getFanSpeed(handle);
455  break;
456  case FEATURE_MAX_CLOCK:
457  *value = getMaxClockSpeed(handle,
458  (nvmlClockType_t)entry->options.clock);
459  break;
460  case FEATURE_MEMORY_INFO:
461  *value = getMemoryInfo(handle,
462  (int)entry->options.which_one);
463  break;
464  case FEATURE_PERF_STATES:
465  *value = getPState(handle);
466  break;
467  case FEATURE_POWER:
468  *value = getPowerUsage(handle);
469  break;
470  case FEATURE_TEMP:
471  *value = getTemperature(handle);
472  break;
474  *value = getTotalEccErrors(handle,
475  (nvmlEccBitType_t)entry->options.ecc_opts.bits);
476  break;
477  case FEATURE_UTILIZATION:
478  *value = getUtilization(handle,
479  (int)entry->options.which_one);
480  break;
482  *value = getPowerManagementLimit(handle);
483  break;
484 
486  *value = power_management_limit_constraint_min[cudaIdx];
487  break;
488 
490  *value = power_management_limit_constraint_max[cudaIdx];
491  break;
492 
493  default:
494  return PAPI_EINVAL;
495  }
496 
497  return PAPI_OK;
498 }
499 
501 /* You might replace this with code that accesses */
502 /* hardware or reads values from the operatings system. */
503 static int nvml_hardware_write(long long *value, int which_one)
504 {
506  nvmlDevice_t handle;
507  int cudaIdx = -1;
508  nvmlReturn_t nvret;
509 
510  entry = &nvml_native_table[which_one];
511  /* replace entry->resources with the current cuda_device->nvml device */
512  (*cudaGetDevicePtr)(&cudaIdx);
513 
514  if (cudaIdx < 0 || cudaIdx > device_count)
515  return PAPI_EINVAL;
516 
517  /* Make sure the device we are running on has the requested event */
518  if (!HAS_FEATURE(features[cudaIdx] , entry->type))
519  return PAPI_EINVAL;
520 
521  handle = devices[cudaIdx];
522 
523  switch (entry->type) {
525  unsigned int setToPower = (unsigned int) * value;
526  if (setToPower < power_management_limit_constraint_min[cudaIdx]) {
527  SUBDBG("Error: Desired power %u mW < minimum %u mW on device %d\n", setToPower, power_management_limit_constraint_min[cudaIdx], cudaIdx);
528  return PAPI_EINVAL;
529  }
530  if (setToPower > power_management_limit_constraint_max[cudaIdx]) {
531  SUBDBG("Error: Desired power %u mW > maximum %u mW on device %d\n", setToPower, power_management_limit_constraint_max[cudaIdx], cudaIdx);
532  return PAPI_EINVAL;
533  }
534  if ((nvret = (*nvmlDeviceSetPowerManagementLimitPtr)(handle, setToPower)) != NVML_SUCCESS) {
535  SUBDBG("Error: %s\n", (*nvmlErrorStringPtr)(nvret));
536  return PAPI_EINVAL;
537  }
538  }
539  break;
540 
541  default:
542  return PAPI_EINVAL;
543  }
544 
545  return PAPI_OK;
546 }
547 
548 /********************************************************************/
549 /* Below are the functions required by the PAPI component interface */
550 /********************************************************************/
551 
553 int
555 {
556  (void) ctx;
557 
558  SUBDBG("Enter: ctx: %p\n", ctx);
559 
560  return PAPI_OK;
561 }
562 
563 static int
565 {
566  nvmlReturn_t ret;
567  nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
568 
569  char name[64];
570  char inforomECC[16];
571  char inforomPower[16];
572  char names[device_count][64];
573 
574  float ecc_version = 0.0;
575  float power_version = 0.0;
576 
577  int i = 0;
578  int isTesla = 0;
579  int isFermi = 0;
580 
581  unsigned int temp = 0;
582 
583  memset(names, 0x0, device_count * 64);
584 
585  /* So for each card, check whats querable */
586  for (i = 0; i < device_count; i++) {
587  isTesla = 0;
588  isFermi = 1;
589  features[i] = 0;
590 
591  ret = (*nvmlDeviceGetHandleByIndexPtr)(i, &devices[i]);
592  if (NVML_SUCCESS != ret) {
593  SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", i, i);
594  return PAPI_ESYS;
595  }
596 
597  ret = (*nvmlDeviceGetNamePtr)(devices[i], name, sizeof(name) - 1);
598  if (NVML_SUCCESS != ret) {
599  SUBDBG("nvmlDeviceGetName failed \n");
600  strncpy(name, "deviceNameUnknown", 17);
601  }
602 
603  name[sizeof(name) - 1] = '\0'; // to safely use strstr operation below, the variable 'name' must be null terminated
604 
605  ret = (*nvmlDeviceGetInforomVersionPtr)(devices[i], NVML_INFOROM_ECC, inforomECC, 16);
606  if (NVML_SUCCESS != ret) {
607  SUBDBG("nvmlGetInforomVersion fails %s\n", (*nvmlErrorStringPtr)(ret));
608  isFermi = 0;
609  }
610  ret = (*nvmlDeviceGetInforomVersionPtr)(devices[i], NVML_INFOROM_POWER, inforomPower, 16);
611  if (NVML_SUCCESS != ret) {
612  /* This implies the card is older then Fermi */
613  SUBDBG("nvmlGetInforomVersion fails %s\n", (*nvmlErrorStringPtr)(ret));
614  SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
615  isFermi = 0;
616  }
617 
618  ecc_version = strtof(inforomECC, NULL);
619  power_version = strtof(inforomPower, NULL);
620 
621  isTesla = (NULL == strstr(name, "Tesla")) ? 0 : 1;
622 
623  /* For Tesla and Quadro products from Fermi and Kepler families. */
624  if (isFermi) {
625  features[i] |= FEATURE_CLOCK_INFO;
626  num_events += 3;
627  }
628 
629  /* For Tesla and Quadro products from Fermi and Kepler families.
630  requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
631  requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
632  requires ECC mode to be enabled. */
633  ret = (*nvmlDeviceGetEccModePtr)(devices[i], &mode, NULL);
634  if (NVML_SUCCESS == ret) {
635  if (NVML_FEATURE_ENABLED == mode) {
636  if (ecc_version >= 2.0) {
637  features[i] |= FEATURE_ECC_LOCAL_ERRORS;
638  num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
639  }
640  if (ecc_version >= 1.0) {
641  features[i] |= FEATURE_ECC_TOTAL_ERRORS;
642  num_events += 2; /* single bit errors, double bit errors */
643  }
644  }
645  } else {
646  SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml return code %d)\n", ret);
647  }
648 
649  /* For all discrete products with dedicated fans */
650  features[i] |= FEATURE_FAN_SPEED;
651  num_events++;
652 
653  /* For Tesla and Quadro products from Fermi and Kepler families. */
654  if (isFermi) {
655  features[i] |= FEATURE_MAX_CLOCK;
656  num_events += 3;
657  }
658 
659  /* For all products */
660  features[i] |= FEATURE_MEMORY_INFO;
661  num_events += 3; /* total, free, used */
662 
663  /* For Tesla and Quadro products from the Fermi and Kepler families. */
664  if (isFermi) {
665  features[i] |= FEATURE_PERF_STATES;
666  num_events++;
667  }
668 
669  /* For "GF11x" Tesla and Quadro products from the Fermi family
670  requires NVML_INFOROM_POWER 3.0 or higher
671  For Tesla and Quadro products from the Kepler family
672  does not require NVML_INFOROM_POWER */
673  /* Just try reading power, if it works, enable it*/
674  ret = (*nvmlDeviceGetPowerUsagePtr)(devices[i], &temp);
675  if (NVML_SUCCESS == ret) {
676  features[i] |= FEATURE_POWER;
677  num_events++;
678  } else {
679  SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on this card. (nvml return code %d)\n", ret);
680  }
681 
682  /* For all discrete and S-class products. */
683  features[i] |= FEATURE_TEMP;
684  num_events++;
685 
686  // For power_management_limit
687  {
688  // Just try the call to see if it works
689  unsigned int templimit = 0;
690  ret = (*nvmlDeviceGetPowerManagementLimitPtr)(devices[i], &templimit);
691  if (ret == NVML_SUCCESS && templimit > 0) {
692  power_management_initial_limit[i] = templimit;
693  features[i] |= FEATURE_POWER_MANAGEMENT;
694  num_events += 1;
695  } else {
696  power_management_initial_limit[i] = 0;
697  SUBDBG("nvmlDeviceGetPowerManagementLimit not appear to be supported on this card. (NVML code %d)\n", ret);
698  }
699  }
700 
701  // For power_management_limit_constraints, minimum and maximum
702  {
703  unsigned int minLimit = 0, maxLimit = 0;
704  ret = (*nvmlDeviceGetPowerManagementLimitConstraintsPtr)(devices[i], &minLimit, &maxLimit);
705  if (ret == NVML_SUCCESS) {
706  power_management_limit_constraint_min[i] = minLimit;
708  num_events += 1;
709  power_management_limit_constraint_max[i] = maxLimit;
711  num_events += 1;
712  } else {
713  power_management_limit_constraint_min[i] = 0;
714  power_management_limit_constraint_max[i] = INT_MAX;
715  }
716  SUBDBG("Done nvmlDeviceGetPowerManagementLimitConstraintsPtr\n");
717  }
718 
719  /* For Tesla and Quadro products from the Fermi and Kepler families */
720  if (isFermi) {
721  features[i] |= FEATURE_UTILIZATION;
722  num_events += 2;
723  }
724 
725  int retval = snprintf(names[i], sizeof(name), "%s:device:%d", name, i);
726  if (retval > (int)sizeof(name)) {
727  SUBDBG("Device name is too long %s:device%d", name, i);
728  return (PAPI_EINVAL);
729  }
730  names[i][sizeof(name) - 1] = '\0';
731  }
732  return PAPI_OK;
733 }
734 
735 static void
737 {
738  char name[64];
739  char sanitized_name[PAPI_MAX_STR_LEN];
740  char names[device_count][64];
741 
742  int i, nameLen = 0, j;
743 
745  nvmlReturn_t ret;
746 
747  nvml_native_table = (nvml_native_event_entry_t*) papi_malloc(
749  memset(nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events);
750  entry = &nvml_native_table[0];
751 
752  for (i = 0; i < device_count; i++) {
753  memset(names[i], 0x0, 64);
754  ret = (*nvmlDeviceGetNamePtr)(devices[i], name, sizeof(name) - 1);
755  if (NVML_SUCCESS != ret) {
756  SUBDBG("nvmlDeviceGetName failed \n");
757  strncpy(name, "deviceNameUnknown", 17);
758  }
759  name[sizeof(name) - 1] = '\0'; // to safely use strlen operation below, the variable 'name' must be null terminated
760 
761  nameLen = strlen(name);
762  strncpy(sanitized_name, name, PAPI_MAX_STR_LEN);
763 
764  int retval = snprintf(sanitized_name, sizeof(name), "%s:device_%d", name, i);
765  if (retval > (int)sizeof(name)) {
766  SUBDBG("Device name is too long %s:device%d", name, i);
767  return;
768  }
769  sanitized_name[sizeof(name) - 1] = '\0';
770 
771  for (j = 0; j < nameLen; j++)
772  if (' ' == sanitized_name[j])
773  sanitized_name[j] = '_';
774 
775  if (HAS_FEATURE(features[i], FEATURE_CLOCK_INFO)) {
776  sprintf(entry->name, "%s:graphics_clock", sanitized_name);
777  strncpy(entry->description, "Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
778  entry->options.clock = NVML_CLOCK_GRAPHICS;
779  entry->type = FEATURE_CLOCK_INFO;
780  entry++;
781 
782  sprintf(entry->name, "%s:sm_clock", sanitized_name);
783  strncpy(entry->description, "SM clock domain (MHz).", PAPI_MAX_STR_LEN);
784  entry->options.clock = NVML_CLOCK_SM;
785  entry->type = FEATURE_CLOCK_INFO;
786  entry++;
787 
788  sprintf(entry->name, "%s:memory_clock", sanitized_name);
789  strncpy(entry->description, "Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
790  entry->options.clock = NVML_CLOCK_MEM;
791  entry->type = FEATURE_CLOCK_INFO;
792  entry++;
793  }
794 
795  if (HAS_FEATURE(features[i], FEATURE_ECC_LOCAL_ERRORS)) {
796  sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
797  strncpy(entry->description, "L1 cache single bit ECC", PAPI_MAX_STR_LEN);
798  entry->options.ecc_opts = (struct local_ecc) {
799  .bits = NVML_SINGLE_BIT_ECC,
800  .which_one = LOCAL_ECC_L1,
801  };
803  entry++;
804 
805  sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
806  strncpy(entry->description, "L2 cache single bit ECC", PAPI_MAX_STR_LEN);
807  entry->options.ecc_opts = (struct local_ecc) {
808  .bits = NVML_SINGLE_BIT_ECC,
809  .which_one = LOCAL_ECC_L2,
810  };
812  entry++;
813 
814  sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
815  strncpy(entry->description, "Device memory single bit ECC", PAPI_MAX_STR_LEN);
816  entry->options.ecc_opts = (struct local_ecc) {
817  .bits = NVML_SINGLE_BIT_ECC,
818  .which_one = LOCAL_ECC_MEM,
819  };
821  entry++;
822 
823  sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
824  strncpy(entry->description, "Register file single bit ECC", PAPI_MAX_STR_LEN);
825  entry->options.ecc_opts = (struct local_ecc) {
826  .bits = NVML_SINGLE_BIT_ECC,
827  .which_one = LOCAL_ECC_REGFILE,
828  };
830  entry++;
831 
832  sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
833  strncpy(entry->description, "L1 cache double bit ECC", PAPI_MAX_STR_LEN);
834  entry->options.ecc_opts = (struct local_ecc) {
835  .bits = NVML_DOUBLE_BIT_ECC,
836  .which_one = LOCAL_ECC_L1,
837  };
839  entry++;
840 
841  sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
842  strncpy(entry->description, "L2 cache double bit ECC", PAPI_MAX_STR_LEN);
843  entry->options.ecc_opts = (struct local_ecc) {
844  .bits = NVML_DOUBLE_BIT_ECC,
845  .which_one = LOCAL_ECC_L2,
846  };
848  entry++;
849 
850  sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
851  strncpy(entry->description, "Device memory double bit ECC", PAPI_MAX_STR_LEN);
852  entry->options.ecc_opts = (struct local_ecc) {
853  .bits = NVML_DOUBLE_BIT_ECC,
854  .which_one = LOCAL_ECC_MEM,
855  };
857  entry++;
858 
859  sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
860  strncpy(entry->description, "Register file double bit ECC", PAPI_MAX_STR_LEN);
861  entry->options.ecc_opts = (struct local_ecc) {
862  .bits = NVML_DOUBLE_BIT_ECC,
863  .which_one = LOCAL_ECC_REGFILE,
864  };
866  entry++;
867  }
868 
869  if (HAS_FEATURE(features[i], FEATURE_FAN_SPEED)) {
870  sprintf(entry->name, "%s:fan_speed", sanitized_name);
871  strncpy(entry->description, "The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
872  entry->type = FEATURE_FAN_SPEED;
873  entry++;
874  }
875 
876  if (HAS_FEATURE(features[i], FEATURE_MAX_CLOCK)) {
877  sprintf(entry->name, "%s:graphics_max_clock", sanitized_name);
878  strncpy(entry->description, "Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
879  entry->options.clock = NVML_CLOCK_GRAPHICS;
880  entry->type = FEATURE_MAX_CLOCK;
881  entry++;
882 
883  sprintf(entry->name, "%s:sm_max_clock", sanitized_name);
884  strncpy(entry->description, "Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
885  entry->options.clock = NVML_CLOCK_SM;
886  entry->type = FEATURE_MAX_CLOCK;
887  entry++;
888 
889  sprintf(entry->name, "%s:memory_max_clock", sanitized_name);
890  strncpy(entry->description, "Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
891  entry->options.clock = NVML_CLOCK_MEM;
892  entry->type = FEATURE_MAX_CLOCK;
893  entry++;
894  }
895 
896  if (HAS_FEATURE(features[i], FEATURE_MEMORY_INFO)) {
897  sprintf(entry->name, "%s:total_memory", sanitized_name);
898  strncpy(entry->description, "Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
900  entry->type = FEATURE_MEMORY_INFO;
901  entry++;
902 
903  sprintf(entry->name, "%s:unallocated_memory", sanitized_name);
904  strncpy(entry->description, "Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
906  entry->type = FEATURE_MEMORY_INFO;
907  entry++;
908 
909  sprintf(entry->name, "%s:allocated_memory", sanitized_name);
910  strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
912  entry->type = FEATURE_MEMORY_INFO;
913  entry++;
914  }
915 
916  if (HAS_FEATURE(features[i], FEATURE_PERF_STATES)) {
917  sprintf(entry->name, "%s:pstate", sanitized_name);
918  strncpy(entry->description, "The performance state of the device.", PAPI_MAX_STR_LEN);
919  entry->type = FEATURE_PERF_STATES;
920  entry++;
921  }
922 
923  if (HAS_FEATURE(features[i], FEATURE_POWER)) {
924  sprintf(entry->name, "%s:power", sanitized_name);
925  // set the power event units value to "mW" for miliwatts
926  strncpy(entry->units, "mW", PAPI_MIN_STR_LEN);
927  strncpy(entry->description, "Power usage reading for the device, in miliwatts. This is the power draw (+/-5 watts) for the entire board: GPU, memory, etc.", PAPI_MAX_STR_LEN);
928  entry->type = FEATURE_POWER;
929  entry++;
930  }
931 
932  if (HAS_FEATURE(features[i], FEATURE_TEMP)) {
933  sprintf(entry->name, "%s:temperature", sanitized_name);
934  strncpy(entry->description, "Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
935  entry->type = FEATURE_TEMP;
936  entry++;
937  }
938 
939  if (HAS_FEATURE(features[i], FEATURE_ECC_TOTAL_ERRORS)) {
940  sprintf(entry->name, "%s:total_ecc_errors", sanitized_name);
941  strncpy(entry->description, "Total single bit errors.", PAPI_MAX_STR_LEN);
942  entry->options.ecc_opts = (struct local_ecc) {
943  .bits = NVML_SINGLE_BIT_ECC,
944  };
946  entry++;
947 
948  sprintf(entry->name, "%s:total_ecc_errors", sanitized_name);
949  strncpy(entry->description, "Total double bit errors.", PAPI_MAX_STR_LEN);
950  entry->options.ecc_opts = (struct local_ecc) {
951  .bits = NVML_DOUBLE_BIT_ECC,
952  };
954  entry++;
955  }
956 
957  if (HAS_FEATURE(features[i], FEATURE_UTILIZATION)) {
958  sprintf(entry->name, "%s:gpu_utilization", sanitized_name);
959  strncpy(entry->description, "Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
961  entry->type = FEATURE_UTILIZATION;
962  entry++;
963 
964  sprintf(entry->name, "%s:memory_utilization", sanitized_name);
965  strncpy(entry->description, "Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
967  entry->type = FEATURE_UTILIZATION;
968  entry++;
969  }
970 
971  if (HAS_FEATURE(features[i], FEATURE_POWER_MANAGEMENT)) {
972  sprintf(entry->name, "%s:power_management_limit", sanitized_name);
973  // set the power event units value to "mW" for milliwatts
974  strncpy(entry->units, "mW", PAPI_MIN_STR_LEN);
975  strncpy(entry->description, "Power management limit in milliwatts associated with the device. The power limit defines the upper boundary for the cards power draw. If the cards total power draw reaches this limit the power management algorithm kicks in. This should be writable (with appropriate privileges) on supported Kepler or later (unit milliWatts). ", PAPI_MAX_STR_LEN);
977  entry++;
978  }
980  sprintf(entry->name, "%s:power_management_limit_constraint_min", sanitized_name);
981  strncpy(entry->units, "mW", PAPI_MIN_STR_LEN);
982  strncpy(entry->description, "The minimum power management limit in milliwatts.", PAPI_MAX_STR_LEN);
984  entry++;
985  }
986 
988  sprintf(entry->name, "%s:power_management_limit_constraint_max", sanitized_name);
989  strncpy(entry->units, "mW", PAPI_MIN_STR_LEN);
990  strncpy(entry->description, "The maximum power management limit in milliwatts.", PAPI_MAX_STR_LEN);
992  entry++;
993  }
994 
995  strncpy(names[i], name, sizeof(names[0]) - 1);
996  names[i][sizeof(names[0]) - 1] = '\0';
997  }
998 }
999 
1004 int
1006 {
1007  SUBDBG("Entry: cidx: %d\n", cidx);
1008  nvmlReturn_t ret;
1009  cudaError_t cuerr;
1010  int papi_errorcode;
1011 
1012  int cuda_count = 0;
1013  unsigned int nvml_count = 0;
1014 
1015  /* link in the cuda and nvml libraries and resolve the symbols we need to use */
1016  if (linkCudaLibraries() != PAPI_OK) {
1017  SUBDBG("Dynamic link of CUDA libraries failed, component will be disabled.\n");
1018  SUBDBG("See disable reason in papi_component_avail output for more details.\n");
1019  return (PAPI_ENOSUPP);
1020  }
1021 
1022  ret = (*nvmlInitPtr)();
1023  if (NVML_SUCCESS != ret) {
1024  strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
1025  return PAPI_ENOSUPP;
1026  }
1027 
1028  cuerr = (*cuInitPtr)(0);
1029  if (cudaSuccess != cuerr) {
1030  strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
1031  return PAPI_ENOSUPP;
1032  }
1033 
1034  /* Figure out the number of CUDA devices in the system */
1035  ret = (*nvmlDeviceGetCountPtr)(&nvml_count);
1036  if (NVML_SUCCESS != ret) {
1037  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
1038  return PAPI_ENOSUPP;
1039  }
1040 
1041  cuerr = (*cudaGetDeviceCountPtr)(&cuda_count);
1042  if (cudaSuccess != cuerr) {
1043  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
1044  return PAPI_ENOSUPP;
1045  }
1046 
1047  /* We can probably recover from this, when we're clever */
1048  if ((cuda_count > 0) && (nvml_count != (unsigned int)cuda_count)) {
1049  strcpy(_nvml_vector.cmp_info.disabled_reason, "CUDA and the NVIDIA managament library have different device counts.");
1050  return PAPI_ENOSUPP;
1051  }
1052 
1053  device_count = cuda_count;
1054  SUBDBG("Need to setup NVML with %d devices\n", device_count);
1055 
1056  /* A per device representation of what events are present */
1057  features = (int*)papi_malloc(sizeof(int) * device_count);
1058 
1059  /* Handles to each device */
1060  devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
1061 
1062  /* For each device, store the intial power value to enable reset if power is altered */
1063  power_management_initial_limit = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count);
1064  power_management_limit_constraint_min = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count);
1065  power_management_limit_constraint_max = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count);
1066 
1067  /* Figure out what events are supported on each card. */
1068  if ((papi_errorcode = detectDevices()) != PAPI_OK) {
1069  papi_free(features);
1070  papi_free(devices);
1071  sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install.");
1072  return PAPI_ENOSUPP;
1073  }
1074 
1075  /* The assumption is that if everything went swimmingly in detectDevices,
1076  all nvml calls here should be fine. */
1078 
1079  /* Export the total number of events available */
1080  _nvml_vector.cmp_info.num_native_events = num_events;
1081 
1082  /* Export the component id */
1083  _nvml_vector.cmp_info.CmpIdx = cidx;
1084 
1085  /* Export the number of 'counters' */
1086  _nvml_vector.cmp_info.num_cntrs = num_events;
1087  _nvml_vector.cmp_info.num_mpx_cntrs = num_events;
1088 
1089  return PAPI_OK;
1090 }
1091 
1092 /*
1093  * Link the necessary CUDA libraries to use the cuda component. If any of them can not be found, then
1094  * the CUDA component will just be disabled. This is done at runtime so that a version of PAPI built
1095  * with the CUDA component can be installed and used on systems which have the CUDA libraries installed
1096  * and on systems where these libraries are not installed.
1097  */
1098 static int
1100 {
1101  /* Attempt to guess if we were statically linked to libc, if so bail */
1102  if (_dl_non_dynamic_init != NULL) {
1103  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN);
1104  return PAPI_ENOSUPP;
1105  }
1106 
1107  /* Need to link in the cuda libraries, if not found disable the component */
1108  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
1109  if (!dl1) {
1110  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.", PAPI_MAX_STR_LEN);
1111  return (PAPI_ENOSUPP);
1112  }
1113  cuInitPtr = dlsym(dl1, "cuInit");
1114  if (dlerror() != NULL) {
1115  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.", PAPI_MAX_STR_LEN);
1116  return (PAPI_ENOSUPP);
1117  }
1118 
1119  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE);
1120  if (!dl2) {
1121  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.", PAPI_MAX_STR_LEN);
1122  return (PAPI_ENOSUPP);
1123  }
1124  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
1125  if (dlerror() != NULL) {
1126  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.", PAPI_MAX_STR_LEN);
1127  return (PAPI_ENOSUPP);
1128  }
1129  cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount");
1130  if (dlerror() != NULL) {
1131  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.", PAPI_MAX_STR_LEN);
1132  return (PAPI_ENOSUPP);
1133  }
1134  cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId");
1135  if (dlerror() != NULL) {
1136  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.", PAPI_MAX_STR_LEN);
1137  return (PAPI_ENOSUPP);
1138  }
1139 
1140  dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
1141  if (!dl3) {
1142  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.", PAPI_MAX_STR_LEN);
1143  return (PAPI_ENOSUPP);
1144  }
1145  nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo");
1146  if (dlerror() != NULL) {
1147  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.", PAPI_MAX_STR_LEN);
1148  return (PAPI_ENOSUPP);
1149  }
1150  nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString");
1151  if (dlerror() != NULL) {
1152  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.", PAPI_MAX_STR_LEN);
1153  return (PAPI_ENOSUPP);
1154  }
1155  nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors");
1156  if (dlerror() != NULL) {
1157  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.", PAPI_MAX_STR_LEN);
1158  return (PAPI_ENOSUPP);
1159  }
1160  nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed");
1161  if (dlerror() != NULL) {
1162  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.", PAPI_MAX_STR_LEN);
1163  return (PAPI_ENOSUPP);
1164  }
1165  nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo");
1166  if (dlerror() != NULL) {
1167  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.", PAPI_MAX_STR_LEN);
1168  return (PAPI_ENOSUPP);
1169  }
1170  nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState");
1171  if (dlerror() != NULL) {
1172  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.", PAPI_MAX_STR_LEN);
1173  return (PAPI_ENOSUPP);
1174  }
1175  nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage");
1176  if (dlerror() != NULL) {
1177  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.", PAPI_MAX_STR_LEN);
1178  return (PAPI_ENOSUPP);
1179  }
1180  nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature");
1181  if (dlerror() != NULL) {
1182  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.", PAPI_MAX_STR_LEN);
1183  return (PAPI_ENOSUPP);
1184  }
1185  nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors");
1186  if (dlerror() != NULL) {
1187  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.", PAPI_MAX_STR_LEN);
1188  return (PAPI_ENOSUPP);
1189  }
1190  nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates");
1191  if (dlerror() != NULL) {
1192  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.", PAPI_MAX_STR_LEN);
1193  return (PAPI_ENOSUPP);
1194  }
1195  nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex");
1196  if (dlerror() != NULL) {
1197  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.", PAPI_MAX_STR_LEN);
1198  return (PAPI_ENOSUPP);
1199  }
1200  nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo");
1201  if (dlerror() != NULL) {
1202  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.", PAPI_MAX_STR_LEN);
1203  return (PAPI_ENOSUPP);
1204  }
1205  nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName");
1206  if (dlerror() != NULL) {
1207  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.", PAPI_MAX_STR_LEN);
1208  return (PAPI_ENOSUPP);
1209  }
1210  nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion");
1211  if (dlerror() != NULL) {
1212  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.", PAPI_MAX_STR_LEN);
1213  return (PAPI_ENOSUPP);
1214  }
1215  nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode");
1216  if (dlerror() != NULL) {
1217  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.", PAPI_MAX_STR_LEN);
1218  return (PAPI_ENOSUPP);
1219  }
1220  nvmlInitPtr = dlsym(dl3, "nvmlInit");
1221  if (dlerror() != NULL) {
1222  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.", PAPI_MAX_STR_LEN);
1223  return (PAPI_ENOSUPP);
1224  }
1225  nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount");
1226  if (dlerror() != NULL) {
1227  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.", PAPI_MAX_STR_LEN);
1228  return (PAPI_ENOSUPP);
1229  }
1230  nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown");
1231  if (dlerror() != NULL) {
1232  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.", PAPI_MAX_STR_LEN);
1233  return (PAPI_ENOSUPP);
1234  }
1235  nvmlDeviceGetPowerManagementLimitPtr = dlsym(dl3, "nvmlDeviceGetPowerManagementLimit");
1236  if (dlerror() != NULL) {
1237  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerManagementLimit not found.", PAPI_MAX_STR_LEN);
1238  return (PAPI_ENOSUPP);
1239  }
1240  nvmlDeviceSetPowerManagementLimitPtr = dlsym(dl3, "nvmlDeviceSetPowerManagementLimit");
1241  if (dlerror() != NULL) {
1242  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceSetPowerManagementLimit not found.", PAPI_MAX_STR_LEN);
1243  return (PAPI_ENOSUPP);
1244  }
1245  nvmlDeviceGetPowerManagementLimitConstraintsPtr = dlsym(dl3, "nvmlDeviceGetPowerManagementLimitConstraints");
1246  if (dlerror() != NULL) {
1247  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerManagementLimitConstraints not found.", PAPI_MAX_STR_LEN);
1248  return (PAPI_ENOSUPP);
1249  }
1250  return (PAPI_OK);
1251 }
1252 
1258 int
1260 {
1261  SUBDBG("nvml_init_control_state... %p\n", ctl);
1262  nvml_control_state_t *nvml_ctl = (nvml_control_state_t *) ctl;
1263  memset(nvml_ctl, 0, sizeof(nvml_control_state_t));
1264 
1265  return PAPI_OK;
1266 }
1267 
1269 int
1272  int count,
1273  hwd_context_t *ctx)
1274 {
1275  SUBDBG("Enter: ctl: %p, ctx: %p\n", ctl, ctx);
1276  int i, index;
1277 
1278  nvml_control_state_t *nvml_ctl = (nvml_control_state_t *) ctl;
1279  (void) ctx;
1280 
1281  /* if no events, return */
1282  if (count == 0) return PAPI_OK;
1283 
1284  for (i = 0; i < count; i++) {
1285  index = native[i].ni_event;
1286  nvml_ctl->which_counter[i] = index;
1287  /* We have no constraints on event position, so any event */
1288  /* can be in any slot. */
1289  native[i].ni_position = i;
1290  }
1291  nvml_ctl->num_events = count;
1292  return PAPI_OK;
1293 }
1295 int
1297 {
1298  SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl);
1299 
1300  (void) ctx;
1301  (void) ctl;
1302 
1303  /* anything that would need to be set at counter start time */
1304 
1305  /* reset */
1306  /* start the counting */
1307 
1308  return PAPI_OK;
1309 }
1310 
1312 int
1314 {
1315  SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl);
1316 
1317  int i;
1318  (void) ctx;
1319  (void) ctl;
1320  int ret;
1321 
1322  nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl;
1323 
1324  for (i = 0; i < nvml_ctl->num_events; i++) {
1325  if (PAPI_OK !=
1326  (ret = nvml_hardware_read(&nvml_ctl->counter[i],
1327  nvml_ctl->which_counter[i])))
1328  return ret;
1329 
1330  }
1331 
1332  return PAPI_OK;
1333 }
1334 
1336 int
1338  long long **events, int flags)
1339 {
1340  SUBDBG("Enter: ctx: %p, flags: %d\n", ctx, flags);
1341 
1342  (void) ctx;
1343  (void) flags;
1344  int i;
1345  int ret;
1346  nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl;
1347 
1348  for (i = 0; i < nvml_ctl->num_events; i++) {
1349  if (PAPI_OK !=
1350  (ret = nvml_hardware_read(&nvml_ctl->counter[i],
1351  nvml_ctl->which_counter[i])))
1352  return ret;
1353 
1354  }
1355  /* return pointer to the values we read */
1356  *events = nvml_ctl->counter;
1357  return PAPI_OK;
1358 }
1359 
1361 /* otherwise, the updated state is written to ESI->hw_start */
1362 int
1364 {
1365  SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl);
1366  (void) ctx;
1367  nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl;
1368  int i;
1369  int ret;
1370 
1371  /* You can change ECC mode and compute exclusivity modes on the cards */
1372  /* But I don't see this as a function of a PAPI component at this time */
1373  /* All implementation issues aside. */
1374 
1375  // Currently POWER_MANAGEMENT can be written
1376  for (i = 0; i < nvml_ctl->num_events; i++) {
1377  if (PAPI_OK != (ret = nvml_hardware_write(&events[i], nvml_ctl->which_counter[i])))
1378  return ret;
1379  }
1380 
1381  /* return pointer to the values we read */
1382  return PAPI_OK;
1383 }
1384 
1386 /* If the eventset is not currently running, then the saved value in the */
1387 /* EventSet is set to zero without calling this routine. */
1388 int
1390 {
1391  SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl);
1392 
1393  (void) ctx;
1394  (void) ctl;
1395 
1396  /* Reset the hardware */
1398 
1399  return PAPI_OK;
1400 }
1401 
1403 int
1405 {
1406  SUBDBG("Enter:\n");
1408  if (nvml_native_table != NULL) papi_free(nvml_native_table);
1409  if (devices != NULL) papi_free(devices);
1410  if (features != NULL) papi_free(features);
1411  if (power_management_initial_limit) papi_free(power_management_initial_limit);
1412  if (power_management_limit_constraint_min) papi_free(power_management_limit_constraint_min);
1413  if (power_management_limit_constraint_max) papi_free(power_management_limit_constraint_max);
1414  (*nvmlShutdownPtr)();
1415 
1416  device_count = 0;
1417  num_events = 0;
1418 
1419  // close the dynamic libraries needed by this component (opened in the init component call)
1420  if (dl3) dlclose(dl3); dl3=NULL;
1421  if (dl2) dlclose(dl2); dl2=NULL;
1422  if (dl1) dlclose(dl1); dl1=NULL;
1423 
1424  return PAPI_OK;
1425 }
1426 
1428 int
1430 {
1431  SUBDBG("Enter: ctx: %p\n", ctx);
1432 
1433  (void) ctx;
1434 
1435  /* Last chance to clean up thread */
1436 
1437  return PAPI_OK;
1438 }
1439 
1443 int
1445 {
1446  SUBDBG("Enter: ctx: %p, code: %d\n", ctx, code);
1447 
1448  (void) ctx;
1449  (void) code;
1450  (void) option;
1451 
1452  /* FIXME. This should maybe set up more state, such as which counters are active and */
1453  /* counter mappings. */
1454 
1455  return PAPI_OK;
1456 }
1457 
1467 int
1469 {
1470  SUBDBG("Enter: cntrl: %p, domain: %d\n", cntrl, domain);
1471 
1472  (void) cntrl;
1473 
1474  int found = 0;
1475 
1476  if (PAPI_DOM_USER & domain) {
1477  SUBDBG(" PAPI_DOM_USER \n");
1478  found = 1;
1479  }
1480  if (PAPI_DOM_KERNEL & domain) {
1481  SUBDBG(" PAPI_DOM_KERNEL \n");
1482  found = 1;
1483  }
1484  if (PAPI_DOM_OTHER & domain) {
1485  SUBDBG(" PAPI_DOM_OTHER \n");
1486  found = 1;
1487  }
1488  if (PAPI_DOM_ALL & domain) {
1489  SUBDBG(" PAPI_DOM_ALL \n");
1490  found = 1;
1491  }
1492  if (!found)
1493  return (PAPI_EINVAL);
1494 
1495  return PAPI_OK;
1496 }
1497 
1498 /**************************************************************/
1499 /* Naming functions, used to translate event numbers to names */
1500 /**************************************************************/
1501 
1508 int
1509 _papi_nvml_ntv_enum_events(unsigned int *EventCode, int modifier)
1510 {
1511  int index;
1512 
1513  switch (modifier) {
1514 
1515  /* return EventCode of first event */
1516  case PAPI_ENUM_FIRST:
1517  /* return the first event that we support */
1518 
1519  *EventCode = 0;
1520  return PAPI_OK;
1521 
1522  /* return EventCode of next available event */
1523  case PAPI_ENUM_EVENTS:
1524  index = *EventCode;
1525 
1526  /* Make sure we are in range */
1527  if (index < num_events - 1) {
1528 
1529  /* This assumes a non-sparse mapping of the events */
1530  *EventCode = *EventCode + 1;
1531  return PAPI_OK;
1532  } else {
1533  return PAPI_ENOEVNT;
1534  }
1535  break;
1536 
1537  default:
1538  return PAPI_EINVAL;
1539  }
1540 
1541  return PAPI_EINVAL;
1542 }
1543 
1549 int
1550 _papi_nvml_ntv_code_to_name(unsigned int EventCode, char *name, int len)
1551 {
1552  SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len);
1553  int index;
1554 
1555  index = EventCode;
1556 
1557  /* Make sure we are in range */
1558  if (index >= num_events) return PAPI_ENOEVNT;
1559 
1560  strncpy(name, nvml_native_table[index].name, len);
1561 
1562  return PAPI_OK;
1563 }
1564 
1570 int
1571 _papi_nvml_ntv_code_to_descr(unsigned int EventCode, char *descr, int len)
1572 {
1573  int index;
1574  index = EventCode;
1575 
1576  if (index >= num_events) return PAPI_ENOEVNT;
1577 
1578  strncpy(descr, nvml_native_table[index].description, len);
1579 
1580  return PAPI_OK;
1581 }
1582 
1587 int
1588 _papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
1589 {
1590 
1591  int index = EventCode;
1592 
1593  if ((index < 0) || (index >= num_events)) return PAPI_ENOEVNT;
1594 
1595  strncpy(info->symbol, nvml_native_table[index].name, sizeof(info->symbol) - 1);
1596  info->symbol[sizeof(info->symbol) - 1] = '\0';
1597 
1598  strncpy(info->units, nvml_native_table[index].units, sizeof(info->units) - 1);
1599  info->units[sizeof(info->units) - 1] = '\0';
1600 
1601  strncpy(info->long_descr, nvml_native_table[index].description, sizeof(info->long_descr) - 1);
1602  info->long_descr[sizeof(info->long_descr) - 1] = '\0';
1603 
1604 // info->data_type = nvml_native_table[index].return_type;
1605 
1606  return PAPI_OK;
1607 }
1608 
1610 papi_vector_t _nvml_vector = {
1611  .cmp_info = {
1612  /* default component information */
1613  /* (unspecified values are initialized to 0) */
1614 
1615  .name = "nvml",
1616  .short_name = "nvml",
1617  .version = "1.0",
1618  .description = "NVML provides the API for monitoring NVIDIA hardware (power usage, temperature, fan speed, etc)",
1619  .support_version = "n/a",
1620  .kernel_version = "n/a",
1621 
1622  .num_preset_events = 0,
1623  .num_native_events = 0, /* set by init_component */
1624  .default_domain = PAPI_DOM_USER,
1625  .available_domains = PAPI_DOM_USER,
1626  .default_granularity = PAPI_GRN_THR,
1627  .available_granularities = PAPI_GRN_THR,
1628  .hardware_intr_sig = PAPI_INT_SIGNAL,
1629 
1630  /* component specific cmp_info initializations */
1631  .hardware_intr = 0,
1632  .precise_intr = 0,
1633  .posix1b_timers = 0,
1634  .kernel_profile = 0,
1635  .kernel_multiplex = 0,
1636  .fast_counter_read = 0,
1637  .fast_real_timer = 0,
1638  .fast_virtual_timer = 0,
1639  .attach = 0,
1640  .attach_must_ptrace = 0,
1641  .cntr_umasks = 0,
1642  .cpu = 0,
1643  .inherit = 0,
1644  },
1645 
1646  /* sizes of framework-opaque component-private structures */
1647  .size = {
1648  .context = sizeof(nvml_context_t),
1649  .control_state = sizeof(nvml_control_state_t),
1650  .reg_value = sizeof(nvml_register_t),
1651  // .reg_alloc = sizeof ( nvml_reg_alloc_t ),
1652  },
1653 
1654  /* function pointers */
1655 
1656  /* Used for general PAPI interactions */
1658  .stop = _papi_nvml_stop,
1659  .read = _papi_nvml_read,
1660  .reset = _papi_nvml_reset,
1662  .init_component = _papi_nvml_init_component,
1663  .init_thread = _papi_nvml_init_thread,
1664  .init_control_state = _papi_nvml_init_control_state,
1665  .update_control_state = _papi_nvml_update_control_state,
1666  .ctl = _papi_nvml_ctl,
1667  .shutdown_thread = _papi_nvml_shutdown_thread,
1668  .shutdown_component = _papi_nvml_shutdown_component,
1670  .cleanup_eventset = NULL,
1671  /* called in add_native_events() */
1672  .allocate_registers = NULL,
1673 
1674  /* Used for overflow/profiling */
1675  .dispatch_timer = NULL,
1676  .get_overflow_address = NULL,
1677  .stop_profiling = NULL,
1678  .set_overflow = NULL,
1679  .set_profile = NULL,
1680 
1681  /* Name Mapping Functions */
1682  .ntv_enum_events = _papi_nvml_ntv_enum_events,
1683  .ntv_name_to_code = NULL,
1684  .ntv_code_to_name = _papi_nvml_ntv_code_to_name,
1685  .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr,
1686  .ntv_code_to_info = _papi_nvml_ntv_code_to_info,
1687 
1688 };
1689 
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:629
#define PAPI_ENOEVNT
Definition: papi.h:260
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
ssize_t read(int fd, void *buf, size_t count)
Definition: appio.c:225
unsigned long long getPState(nvmlDevice_t dev)
Definition: linux-nvml.c:260
int _papi_nvml_ntv_code_to_name(unsigned int EventCode, char *name, int len)
Definition: linux-nvml.c:1550
#define FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MIN
Definition: linux-nvml.h:17
static unsigned int * power_management_limit_constraint_min
Definition: linux-nvml.c:166
static unsigned int * power_management_initial_limit
Definition: linux-nvml.c:165
long long flags
Definition: iozone.c:12330
static int linkCudaLibraries()
Definition: linux-nvml.c:1099
#define papi_free(a)
Definition: papi_memory.h:35
unsigned long long getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
Definition: linux-nvml.c:339
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
start
Definition: iozone.c:22736
int type
Definition: linux-nvml.h:53
unsigned long long getPowerUsage(nvmlDevice_t dev)
Definition: linux-nvml.c:313
#define papi_malloc(a)
Definition: papi_memory.h:34
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:22
#define PAPI_ENOSUPP
Definition: papi.h:271
void * get_overflow_address(void *context)
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
static int num_events
Definition: linux-nvml.c:161
#define DECLDIR
struct local_ecc ecc_opts
Definition: linux-nvml.h:44
int _papi_nvml_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: linux-nvml.c:1337
int _papi_nvml_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1313
#define PAPI_DOM_KERNEL
Definition: papi.h:300
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:969
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
int _papi_nvml_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events)
Definition: linux-nvml.c:1363
static int nvml_hardware_write(long long *value, int which_one)
Definition: linux-nvml.c:503
char symbol[PAPI_HUGE_STR_LEN]
Definition: papi.h:966
#define PAPI_DOM_ALL
Definition: papi.h:303
unsigned long long getMemoryInfo(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:236
return PAPI_OK
Definition: linux-nvml.c:497
int count
Definition: iozone.c:22422
struct cache_ent * entry
Definition: libasync.c:1170
nvmlEccBitType_t bits
Definition: linux-nvml.h:38
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:29
#define PAPI_DOM_USER
Definition: papi.h:298
#define NVML_MAX_COUNTERS
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:436
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:50
int _papi_nvml_shutdown_component()
Definition: linux-nvml.c:1404
void double value
Definition: iozone.c:18781
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:155
Return codes and api definitions.
static void * dl1
Definition: linux-cuda.c:84
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1610
unsigned long long getPowerManagementLimit(nvmlDevice_t dev)
Definition: linux-nvml.c:377
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:24
int _papi_nvml_init_thread(hwd_context_t *ctx)
Definition: linux-nvml.c:554
char events[MAX_EVENTS][BUFSIZ]
nvml_control_state_t
Definition: linux-nvml.c:147
int _papi_nvml_shutdown_thread(hwd_context_t *ctx)
Definition: linux-nvml.c:1429
int _papi_nvml_ntv_enum_events(unsigned int *EventCode, int modifier)
Definition: linux-nvml.c:1509
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
nvml_resource_options_t options
Definition: linux-nvml.h:49
static int cidx
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:636
static void * dl2
Definition: linux-cuda.c:85
int nvml_register_t
Definition: linux-nvml.h:35
static nvmlDevice_t * devices
Definition: linux-nvml.c:163
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:160
int i
Definition: fileop.c:140
Definition: linux-nvml.h:48
ssize_t write(int fd, const void *buf, size_t count)
Definition: appio.c:298
static int device_count
Definition: linux-nvml.c:158
static int set_domain(hwd_control_state_t *cntrl, unsigned int domain)
unsigned long long getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
Definition: linux-nvml.c:184
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:32
long long found
Definition: libasync.c:735
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
int _papi_nvml_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1296
#define PAPI_ESYS
Definition: papi.h:255
nvmlClockType_t clock
Definition: linux-nvml.h:43
#define FEATURE_TEMP
Definition: linux-nvml.h:13
__attribute__((constructor))
Definition: init_fini.c:12
stop
Definition: iozone.c:22741
#define FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MAX
Definition: linux-nvml.h:18
static int native
static unsigned int * power_management_limit_constraint_max
Definition: linux-nvml.c:167
int _papi_nvml_ntv_code_to_descr(unsigned int EventCode, char *descr, int len)
Definition: linux-nvml.c:1571
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
long long
Definition: iozone.c:19827
#define CUDAAPI
static int * features
Definition: linux-nvml.c:164
static void * dl3
Definition: linux-cuda.c:86
int _papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
Definition: linux-nvml.c:1588
#define PAPI_INT_SIGNAL
Definition: papi_internal.h:53
nvmlDevice_t handle
Definition: linux-nvml.c:427
int _papi_nvml_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: linux-nvml.c:1270
#define PAPI_GRN_THR
Definition: papi.h:362
#define FEATURE_POWER
Definition: linux-nvml.h:12
char units[PAPI_MIN_STR_LEN]
Definition: linux-nvml.h:51
strcpy(filename, default_filename)
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:52
static void nvml_hardware_reset()
Definition: linux-nvml.c:390
unsigned long long getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:170
char * name
Definition: iozone.c:23648
unsigned long long getTemperature(nvmlDevice_t dev)
Definition: linux-nvml.c:326
int
Definition: iozone.c:18528
#define PAPI_MIN_STR_LEN
Definition: papi.h:464
int temp
Definition: iozone.c:22158
static int detectDevices()
Definition: linux-nvml.c:564
child_idents[x-1] state
Definition: iozone.c:21341
unsigned long long getFanSpeed(nvmlDevice_t dev)
Definition: linux-nvml.c:210
#define LOCAL_ECC_L1
Definition: linux-nvml.h:27
#define HAS_FEATURE(features, query)
Definition: linux-nvml.h:20
int _papi_nvml_set_domain(hwd_control_state_t *cntrl, int domain)
Definition: linux-nvml.c:1468
#define CUDARTAPI
#define PAPI_MAX_STR_LEN
Definition: papi.h:465
#define PAPI_DOM_OTHER
Definition: papi.h:301
#define GPU_UTILIZATION
Definition: linux-nvml.h:31
int which_one
Definition: linux-nvml.h:39
#define FEATURE_POWER_MANAGEMENT
Definition: linux-nvml.h:16
unsigned long long getUtilization(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:355
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:26
int _papi_nvml_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: linux-nvml.c:1444
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
ssize_t retval
Definition: libasync.c:338
#define LOCAL_ECC_L2
Definition: linux-nvml.h:28
static void createNativeEvents()
Definition: linux-nvml.c:736
const char * names[NUM_EVENTS]
char units[PAPI_MIN_STR_LEN]
Definition: papi.h:975
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:23
int cudaIdx
Definition: linux-nvml.c:428
int _papi_nvml_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1389
int _papi_nvml_init_control_state(hwd_control_state_t *ctl)
Definition: linux-nvml.c:1259
unsigned long long getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:223
int _papi_nvml_init_component(int cidx)
Definition: linux-nvml.c:1005
nvml_control_state_t state
Definition: linux-nvml.c:151