PAPI  5.4.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-nvml.c File Reference

This is an NVML component, it demos the component interface and implements two counters nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature from Nvidia Management Library. Please refer to NVML documentation for details about nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature. Power is reported in mW and temperature in Celcius. More...

Include dependency graph for linux-nvml.c:

Go to the source code of this file.

Data Structures

struct  nvml_context_t
 

Macros

#define CUDAAPI   __attribute__((weak))
 
#define CUDARTAPI   __attribute__((weak))
 
#define DECLDIR   __attribute__((weak))
 
#define NVML_MAX_COUNTERS   100
 

Functions

unsigned long long getClockSpeed (nvmlDevice_t dev, nvmlClockType_t which_one)
 
unsigned long long getEccLocalErrors (nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
 
unsigned long long getFanSpeed (nvmlDevice_t dev)
 
unsigned long long getMaxClockSpeed (nvmlDevice_t dev, nvmlClockType_t which_one)
 
unsigned long long getMemoryInfo (nvmlDevice_t dev, int which_one)
 
unsigned long long getPState (nvmlDevice_t dev)
 
unsigned long long getPowerUsage (nvmlDevice_t dev)
 
unsigned long long getTemperature (nvmlDevice_t dev)
 
unsigned long long getTotalEccErrors (nvmlDevice_t dev, nvmlEccBitType_t bits)
 
unsigned long long getUtilization (nvmlDevice_t dev, int which_one)
 
static void nvml_hardware_reset ()
 
 switch (entry->type)
 
int _papi_nvml_init_thread (hwd_context_t *ctx)
 
static int detectDevices ()
 
static void createNativeEvents ()
 
int _papi_nvml_init_component (int cidx)
 
static int linkCudaLibraries ()
 
int _papi_nvml_init_control_state (hwd_control_state_t *ctl)
 
int _papi_nvml_update_control_state (hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
 
int _papi_nvml_start (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_stop (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_read (hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
 
int _papi_nvml_write (hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events)
 
int _papi_nvml_reset (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_shutdown_component ()
 
int _papi_nvml_shutdown_thread (hwd_context_t *ctx)
 
int _papi_nvml_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
int _papi_nvml_set_domain (hwd_control_state_t *cntrl, int domain)
 
int _papi_nvml_ntv_enum_events (unsigned int *EventCode, int modifier)
 
int _papi_nvml_ntv_code_to_name (unsigned int EventCode, char *name, int len)
 
int _papi_nvml_ntv_code_to_descr (unsigned int EventCode, char *descr, int len)
 
int _papi_nvml_ntv_code_to_info (unsigned int EventCode, PAPI_event_info_t *info)
 

Variables

void(* _dl_non_dynamic_init )(void)
 
 nvml_control_state_t
 
static nvml_native_event_entry_tnvml_native_table =NULL
 
static int device_count = 0
 
static int num_events = 0
 
static nvmlDevice_t * devices =NULL
 
static intfeatures =NULL
 
static int
 
nvmlDevice_t handle = devices[cudaIdx]
 
int cudaIdx = -1
 
 entry = &nvml_native_table[which_one]
 
value = (long long) -1
 
return PAPI_EINVAL
 
return PAPI_OK
 
papi_vector_t _nvml_vector
 

Detailed Description

Author
Kiran Kumar Kasichayanula kkasi.nosp@m.cha@.nosp@m.utk.e.nosp@m.du
James Ralph ralph.nosp@m.@eec.nosp@m.s.utk.nosp@m..edu

Definition in file linux-nvml.c.

Macro Definition Documentation

#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define DECLDIR   __attribute__((weak))
#define NVML_MAX_COUNTERS   100

Function Documentation

int _papi_nvml_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

This function sets various options in the component

Parameters
codevalid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT

Definition at line 1358 of file linux-nvml.c.

1359 {
1360  SUBDBG( "Enter: ctx: %p, code: %d\n", ctx, code );
1361 
1362  (void) ctx;
1363  (void) code;
1364  (void) option;
1365 
1366 
1367  /* FIXME. This should maybe set up more state, such as which counters are active and */
1368  /* counter mappings. */
1369 
1370  return PAPI_OK;
1371 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_init_component ( int  cidx)

Initialize hardware counters, setup the function vector table and get hardware information, this routine is called when the PAPI process is initialized (IE PAPI_library_init)

Definition at line 909 of file linux-nvml.c.

910 {
911  SUBDBG ("Entry: cidx: %d\n", cidx);
912  nvmlReturn_t ret;
913  cudaError_t cuerr;
914  int papi_errorcode;
915 
916  int cuda_count = 0;
917  unsigned int nvml_count = 0;
918 
919  /* link in the cuda and nvml libraries and resolve the symbols we need to use */
920  if (linkCudaLibraries() != PAPI_OK) {
921  SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
922  SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
923  return (PAPI_ENOSUPP);
924  }
925 
926  ret = (*nvmlInitPtr)();
927  if ( NVML_SUCCESS != ret ) {
928  strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
929  return PAPI_ENOSUPP;
930  }
931 
932  cuerr = (*cuInitPtr)( 0 );
933  if ( CUDA_SUCCESS != cuerr ) {
934  strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
935  return PAPI_ENOSUPP;
936  }
937 
938  /* Figure out the number of CUDA devices in the system */
939  ret = (*nvmlDeviceGetCountPtr)( &nvml_count );
940  if ( NVML_SUCCESS != ret ) {
941  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
942  return PAPI_ENOSUPP;
943  }
944 
945  cuerr = (*cudaGetDeviceCountPtr)( &cuda_count );
946  if ( CUDA_SUCCESS != cuerr ) {
947  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
948  return PAPI_ENOSUPP;
949  }
950 
951  /* We can probably recover from this, when we're clever */
952  if ( (cuda_count > 0) && (nvml_count != (unsigned int)cuda_count ) ) {
953  strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts.");
954  return PAPI_ENOSUPP;
955  }
956 
957  device_count = cuda_count;
958 
959  /* A per device representation of what events are present */
960  features = (int*)papi_malloc(sizeof(int) * device_count );
961 
962  /* Handles to each device */
963  devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
964 
965  /* Figure out what events are supported on each card. */
966  if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) {
969  sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." );
970  return PAPI_ENOSUPP;
971  }
972 
973  /* The assumption is that if everything went swimmingly in detectDevices,
974  all nvml calls here should be fine. */
976 
977  /* Export the total number of events available */
979 
980  /* Export the component id */
982 
983  /* Export the number of 'counters' */
986 
987  return PAPI_OK;
988 }
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
static int linkCudaLibraries()
Definition: linux-nvml.c:998
#define papi_free(a)
Definition: papi_memory.h:35
#define papi_malloc(a)
Definition: papi_memory.h:34
#define PAPI_ENOSUPP
Definition: papi.h:269
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1527
long long ret
Definition: iozone.c:1346
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:633
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
static int device_count
Definition: linux-nvml.c:147
static int cidx
Definition: event_info.c:40
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
strcpy(filename, default_filename)
static int detectDevices()
Definition: linux-nvml.c:479
static void createNativeEvents()
Definition: linux-nvml.c:665

Here is the call graph for this function:

int _papi_nvml_init_control_state ( hwd_control_state_t ctl)

Setup a counter control state. In general a control state holds the hardware info for an EventSet.

Definition at line 1170 of file linux-nvml.c.

1171 {
1172  SUBDBG( "nvml_init_control_state... %p\n", ctl );
1173  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1174  memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) );
1175 
1176  return PAPI_OK;
1177 }
memset(eventId, 0, size)
return PAPI_OK
Definition: linux-nvml.c:458
nvml_control_state_t
Definition: linux-nvml.c:135
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the call graph for this function:

int _papi_nvml_init_thread ( hwd_context_t ctx)

This is called whenever a thread is initialized

Definition at line 469 of file linux-nvml.c.

470 {
471  (void) ctx;
472 
473  SUBDBG( "Enter: ctx: %p\n", ctx );
474 
475  return PAPI_OK;
476 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_ntv_code_to_descr ( unsigned int  EventCode,
char *  descr,
int  len 
)

Takes a native event code and passes back the event description

Parameters
EventCodeis the native event code
descris a pointer for the description to be copied to
lenis the size of the descr string

Definition at line 1488 of file linux-nvml.c.

1489 {
1490  int index;
1491  index = EventCode;
1492 
1493  if (index >= num_events) return PAPI_ENOEVNT;
1494 
1495  strncpy( descr, nvml_native_table[index].description, len );
1496 
1497  return PAPI_OK;
1498 }
#define PAPI_ENOEVNT
Definition: papi.h:258
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
char description[PAPI_MAX_STR_LEN]
int _papi_nvml_ntv_code_to_info ( unsigned int  EventCode,
PAPI_event_info_t info 
)

Takes a native event code and passes back the event info

Parameters
EventCodeis the native event code
infois a pointer for the info to be copied to

Definition at line 1505 of file linux-nvml.c.

1506 {
1507 
1508  int index = EventCode;
1509 
1510  if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT;
1511 
1512  strncpy( info->symbol, nvml_native_table[index].name, sizeof(info->symbol)-1);
1513  info->symbol[sizeof(info->symbol)-1] = '\0';
1514 
1515  strncpy( info->units, nvml_native_table[index].units, sizeof(info->units)-1);
1516  info->units[sizeof(info->units)-1] = '\0';
1517 
1518  strncpy( info->long_descr, nvml_native_table[index].description, sizeof(info->long_descr)-1);
1519  info->long_descr[sizeof(info->long_descr)-1] = '\0';
1520 
1521 // info->data_type = nvml_native_table[index].return_type;
1522 
1523  return PAPI_OK;
1524 }
#define PAPI_ENOEVNT
Definition: papi.h:258
static int num_events
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:966
char symbol[PAPI_HUGE_STR_LEN]
Definition: papi.h:963
return PAPI_OK
Definition: linux-nvml.c:458
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:48
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
char units[PAPI_MIN_STR_LEN]
Definition: linux-nvml.h:49
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:50
char units[PAPI_MIN_STR_LEN]
Definition: papi.h:972
int _papi_nvml_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)

Takes a native event code and passes back the name

Parameters
EventCodeis the native event code
nameis a pointer for the name to be copied to
lenis the size of the name string

Definition at line 1467 of file linux-nvml.c.

1468 {
1469  SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len);
1470  int index;
1471 
1472  index = EventCode;
1473 
1474  /* Make sure we are in range */
1475  if (index >= num_events) return PAPI_ENOEVNT;
1476 
1477  strncpy( name, nvml_native_table[index].name, len );
1478 
1479  return PAPI_OK;
1480 }
#define PAPI_ENOEVNT
Definition: papi.h:258
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
char * name
Definition: iozone.c:23648
int _papi_nvml_ntv_enum_events ( unsigned int EventCode,
int  modifier 
)

Enumerate Native Events

Parameters
EventCodeis the event of interest
modifieris one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS If your component has attribute masks then these need to be handled here as well.

Definition at line 1426 of file linux-nvml.c.

1427 {
1428  int index;
1429 
1430  switch ( modifier ) {
1431 
1432  /* return EventCode of first event */
1433  case PAPI_ENUM_FIRST:
1434  /* return the first event that we support */
1435 
1436  *EventCode = 0;
1437  return PAPI_OK;
1438 
1439  /* return EventCode of next available event */
1440  case PAPI_ENUM_EVENTS:
1441  index = *EventCode;
1442 
1443  /* Make sure we are in range */
1444  if ( index < num_events - 1 ) {
1445 
1446  /* This assumes a non-sparse mapping of the events */
1447  *EventCode = *EventCode + 1;
1448  return PAPI_OK;
1449  } else {
1450  return PAPI_ENOEVNT;
1451  }
1452  break;
1453 
1454  default:
1455  return PAPI_EINVAL;
1456  }
1457 
1458  return PAPI_EINVAL;
1459 }
#define PAPI_ENOEVNT
Definition: papi.h:258
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
return PAPI_EINVAL
Definition: linux-nvml.c:408
int _papi_nvml_read ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long **  events,
int  flags 
)

Triggered by PAPI_read()

Definition at line 1252 of file linux-nvml.c.

1254 {
1255  SUBDBG( "Enter: ctx: %p, flags: %d\n", ctx, flags );
1256 
1257  (void) ctx;
1258  (void) flags;
1259  int i;
1260  int ret;
1261  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1262 
1263 
1264  for (i=0;i<nvml_ctl->num_events;i++) {
1265  if ( PAPI_OK !=
1266  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1267  nvml_ctl->which_counter[i]) ))
1268  return ret;
1269 
1270  }
1271  /* return pointer to the values we read */
1272  *events = nvml_ctl->counter;
1273  return PAPI_OK;
1274 }
long long flags
Definition: iozone.c:12330
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
char events[MAX_EVENTS][BUFSIZ]
nvml_control_state_t
Definition: linux-nvml.c:135
long long ret
Definition: iozone.c:1346
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_reset ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_reset() but only if the EventSet is currently running

Definition at line 1300 of file linux-nvml.c.

1301 {
1302  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1303 
1304  (void) ctx;
1305  (void) ctl;
1306 
1307  /* Reset the hardware */
1309 
1310  return PAPI_OK;
1311 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static void nvml_hardware_reset()
Definition: linux-nvml.c:380

Here is the call graph for this function:

int _papi_nvml_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

This function has to set the bits needed to count different domains In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER By default return PAPI_EINVAL if none of those are specified and PAPI_OK with success PAPI_DOM_USER is only user context is counted PAPI_DOM_KERNEL is only the Kernel/OS context is counted PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) PAPI_DOM_ALL is all of the domains

Definition at line 1383 of file linux-nvml.c.

1384 {
1385  SUBDBG( "Enter: cntrl: %p, domain: %d\n", cntrl, domain );
1386 
1387  (void) cntrl;
1388 
1389  int found = 0;
1390 
1391  if ( PAPI_DOM_USER & domain ) {
1392  SUBDBG( " PAPI_DOM_USER \n" );
1393  found = 1;
1394  }
1395  if ( PAPI_DOM_KERNEL & domain ) {
1396  SUBDBG( " PAPI_DOM_KERNEL \n" );
1397  found = 1;
1398  }
1399  if ( PAPI_DOM_OTHER & domain ) {
1400  SUBDBG( " PAPI_DOM_OTHER \n" );
1401  found = 1;
1402  }
1403  if ( PAPI_DOM_ALL & domain ) {
1404  SUBDBG( " PAPI_DOM_ALL \n" );
1405  found = 1;
1406  }
1407  if ( !found )
1408  return ( PAPI_EINVAL );
1409 
1410  return PAPI_OK;
1411 }
#define PAPI_DOM_KERNEL
Definition: papi.h:298
#define PAPI_DOM_ALL
Definition: papi.h:301
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_DOM_USER
Definition: papi.h:296
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
long long found
Definition: libasync.c:735
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_DOM_OTHER
Definition: papi.h:299
int _papi_nvml_shutdown_component ( )

Triggered by PAPI_shutdown()

Definition at line 1315 of file linux-nvml.c.

1316 {
1317  SUBDBG( "Enter:\n" );
1318 
1319  if (nvml_native_table != NULL)
1321  if (devices != NULL)
1322  papi_free(devices);
1323  if (features != NULL)
1325 
1326  (*nvmlShutdownPtr)();
1327 
1328  device_count = 0;
1329  num_events = 0;
1330 
1331  // close the dynamic libraries needed by this component (opened in the init component call)
1332  dlclose(dl1);
1333  dlclose(dl2);
1334  dlclose(dl3);
1335 
1336  return PAPI_OK;
1337 }
#define papi_free(a)
Definition: papi_memory.h:35
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
static int device_count
Definition: linux-nvml.c:147
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
int _papi_nvml_shutdown_thread ( hwd_context_t ctx)

Called at thread shutdown

Definition at line 1341 of file linux-nvml.c.

1342 {
1343  SUBDBG( "Enter: ctx: %p\n", ctx );
1344 
1345  (void) ctx;
1346 
1347  /* Last chance to clean up thread */
1348 
1349  return PAPI_OK;
1350 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_start ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_start()

Definition at line 1209 of file linux-nvml.c.

1210 {
1211  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1212 
1213  (void) ctx;
1214  (void) ctl;
1215 
1216  /* anything that would need to be set at counter start time */
1217 
1218  /* reset */
1219  /* start the counting */
1220 
1221  return PAPI_OK;
1222 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_stop ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_stop()

Definition at line 1227 of file linux-nvml.c.

1228 {
1229  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1230 
1231  int i;
1232  (void) ctx;
1233  (void) ctl;
1234  int ret;
1235 
1236  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1237 
1238  for (i=0;i<nvml_ctl->num_events;i++) {
1239  if ( PAPI_OK !=
1240  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1241  nvml_ctl->which_counter[i]) ))
1242  return ret;
1243 
1244  }
1245 
1246  return PAPI_OK;
1247 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
nvml_control_state_t
Definition: linux-nvml.c:135
long long ret
Definition: iozone.c:1346
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_update_control_state ( hwd_control_state_t ctl,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
)

Triggered by eventset operations like add or remove

Definition at line 1182 of file linux-nvml.c.

1186 {
1187  SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx );
1188  int i, index;
1189 
1190  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1191  (void) ctx;
1192 
1193 
1194  /* if no events, return */
1195  if (count==0) return PAPI_OK;
1196 
1197  for( i = 0; i < count; i++ ) {
1198  index = native[i].ni_event;
1199  nvml_ctl->which_counter[i]=index;
1200  /* We have no constraints on event position, so any event */
1201  /* can be in any slot. */
1202  native[i].ni_position = i;
1203  }
1204  nvml_ctl->num_events=count;
1205  return PAPI_OK;
1206 }
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
void
Definition: iozone.c:18627
nvml_control_state_t
Definition: linux-nvml.c:135
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_write ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long events 
)

Triggered by PAPI_write(), but only if the counters are running

Definition at line 1279 of file linux-nvml.c.

1281 {
1282  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1283 
1284  (void) ctx;
1285  (void) ctl;
1286  (void) events;
1287 
1288 
1289  /* You can change ECC mode and compute exclusivity modes on the cards */
1290  /* But I don't see this as a function of a PAPI component at this time */
1291  /* All implementation issues aside. */
1292  return PAPI_OK;
1293 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
char events[MAX_EVENTS][BUFSIZ]
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static void createNativeEvents ( void  )
static

Definition at line 665 of file linux-nvml.c.

666 {
667  char name[64];
668  char sanitized_name[PAPI_MAX_STR_LEN];
669  char names[device_count][64];
670 
671  int i, nameLen = 0, j;
672  int isUnique = 1;
673 
675  nvmlReturn_t ret;
676 
680  entry = &nvml_native_table[0];
681 
682  for (i=0; i < device_count; i++ ) {
683  memset( names[i], 0x0, 64 );
684  isUnique = 1;
685  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, sizeof(name)-1 );
686  name[sizeof(name)-1] = '\0'; // to safely use strlen operation below, the variable 'name' must be null terminated
687 
688  for (j=0; j < i; j++ )
689  {
690  if ( 0 == strncmp( name, names[j], 64 ) )
691  isUnique = 0;
692  }
693 
694  if ( isUnique ) {
695  nameLen = strlen(name);
696  strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
697  for (j=0; j < nameLen; j++)
698  if ( ' ' == sanitized_name[j] )
699  sanitized_name[j] = '_';
700 
701 
702 
703  if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
704  sprintf( entry->name, "%s:graphics_clock", sanitized_name );
705  strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
706  entry->options.clock = NVML_CLOCK_GRAPHICS;
707  entry->type = FEATURE_CLOCK_INFO;
708  entry++;
709 
710  sprintf( entry->name, "%s:sm_clock", sanitized_name);
711  strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
712  entry->options.clock = NVML_CLOCK_SM;
713  entry->type = FEATURE_CLOCK_INFO;
714  entry++;
715 
716  sprintf( entry->name, "%s:memory_clock", sanitized_name);
717  strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
718  entry->options.clock = NVML_CLOCK_MEM;
719  entry->type = FEATURE_CLOCK_INFO;
720  entry++;
721  }
722 
724  sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
725  strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
726  entry->options.ecc_opts = (struct local_ecc){
727  .bits = NVML_SINGLE_BIT_ECC,
728  .which_one = LOCAL_ECC_L1,
729  };
731  entry++;
732 
733  sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
734  strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
735  entry->options.ecc_opts = (struct local_ecc){
736  .bits = NVML_SINGLE_BIT_ECC,
737  .which_one = LOCAL_ECC_L2,
738  };
740  entry++;
741 
742  sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
743  strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
744  entry->options.ecc_opts = (struct local_ecc){
745  .bits = NVML_SINGLE_BIT_ECC,
746  .which_one = LOCAL_ECC_MEM,
747  };
749  entry++;
750 
751  sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
752  strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
753  entry->options.ecc_opts = (struct local_ecc){
754  .bits = NVML_SINGLE_BIT_ECC,
755  .which_one = LOCAL_ECC_REGFILE,
756  };
758  entry++;
759 
760  sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
761  strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
762  entry->options.ecc_opts = (struct local_ecc){
763  .bits = NVML_DOUBLE_BIT_ECC,
764  .which_one = LOCAL_ECC_L1,
765  };
767  entry++;
768 
769  sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
770  strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
771  entry->options.ecc_opts = (struct local_ecc){
772  .bits = NVML_DOUBLE_BIT_ECC,
773  .which_one = LOCAL_ECC_L2,
774  };
776  entry++;
777 
778  sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
779  strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
780  entry->options.ecc_opts = (struct local_ecc){
781  .bits = NVML_DOUBLE_BIT_ECC,
782  .which_one = LOCAL_ECC_MEM,
783  };
785  entry++;
786 
787  sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
788  strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
789  entry->options.ecc_opts = (struct local_ecc){
790  .bits = NVML_DOUBLE_BIT_ECC,
791  .which_one = LOCAL_ECC_REGFILE,
792  };
794  entry++;
795  }
796 
797  if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
798  sprintf( entry->name, "%s:fan_speed", sanitized_name);
799  strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
800  entry->type = FEATURE_FAN_SPEED;
801  entry++;
802  }
803 
804  if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
805  sprintf( entry->name, "%s:graphics_max_clock", sanitized_name);
806  strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
807  entry->options.clock = NVML_CLOCK_GRAPHICS;
808  entry->type = FEATURE_MAX_CLOCK;
809  entry++;
810 
811  sprintf( entry->name, "%s:sm_max_clock", sanitized_name);
812  strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
813  entry->options.clock = NVML_CLOCK_SM;
814  entry->type = FEATURE_MAX_CLOCK;
815  entry++;
816 
817  sprintf( entry->name, "%s:memory_max_clock", sanitized_name);
818  strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
819  entry->options.clock = NVML_CLOCK_MEM;
820  entry->type = FEATURE_MAX_CLOCK;
821  entry++;
822  }
823 
825  sprintf( entry->name, "%s:total_memory", sanitized_name);
826  strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
828  entry->type = FEATURE_MEMORY_INFO;
829  entry++;
830 
831  sprintf( entry->name, "%s:unallocated_memory", sanitized_name);
832  strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
834  entry->type = FEATURE_MEMORY_INFO;
835  entry++;
836 
837  sprintf( entry->name, "%s:allocated_memory", sanitized_name);
838  strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
840  entry->type = FEATURE_MEMORY_INFO;
841  entry++;
842  }
843 
845  sprintf( entry->name, "%s:pstate", sanitized_name);
846  strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
847  entry->type = FEATURE_PERF_STATES;
848  entry++;
849  }
850 
851  if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
852  sprintf( entry->name, "%s:power", sanitized_name);
853  // set the power event units value to "mW" for miliwatts
854  strncpy( entry->units, "mW",PAPI_MIN_STR_LEN);
855  strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw (+/-5 watts) for the entire board: GPU, memory, etc.", PAPI_MAX_STR_LEN);
856  entry->type = FEATURE_POWER;
857  entry++;
858  }
859 
860  if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
861  sprintf( entry->name, "%s:temperature", sanitized_name);
862  strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
863  entry->type = FEATURE_TEMP;
864  entry++;
865  }
866 
868  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
869  strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
870  entry->options.ecc_opts = (struct local_ecc){
871  .bits = NVML_SINGLE_BIT_ECC,
872  };
874  entry++;
875 
876  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
877  strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
878  entry->options.ecc_opts = (struct local_ecc){
879  .bits = NVML_DOUBLE_BIT_ECC,
880  };
882  entry++;
883  }
884 
886  sprintf( entry->name, "%s:gpu_utilization", sanitized_name);
887  strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
889  entry->type = FEATURE_UTILIZATION;
890  entry++;
891 
892  sprintf( entry->name, "%s:memory_utilization", sanitized_name);
893  strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
895  entry->type = FEATURE_UTILIZATION;
896  entry++;
897  }
898  strncpy( names[i], name, sizeof(names[0])-1);
899  names[i][sizeof(names[0])-1] = '\0';
900  }
901  }
902 }
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
memset(eventId, 0, size)
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
int type
Definition: linux-nvml.h:51
#define papi_malloc(a)
Definition: papi_memory.h:34
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:19
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
static int num_events
struct local_ecc ecc_opts
Definition: linux-nvml.h:41
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
struct cache_ent * entry
Definition: libasync.c:1170
nvmlEccBitType_t bits
Definition: linux-nvml.h:35
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:26
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:48
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:21
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
nvml_resource_options_t options
Definition: linux-nvml.h:47
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
int i
Definition: fileop.c:140
Definition: linux-nvml.h:45
static int device_count
Definition: linux-nvml.c:147
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:29
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
nvmlClockType_t clock
Definition: linux-nvml.h:40
#define FEATURE_TEMP
Definition: linux-nvml.h:13
static int * features
Definition: linux-nvml.c:153
#define FEATURE_POWER
Definition: linux-nvml.h:12
char units[PAPI_MIN_STR_LEN]
Definition: linux-nvml.h:49
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:50
char * name
Definition: iozone.c:23648
#define PAPI_MIN_STR_LEN
Definition: papi.h:462
#define LOCAL_ECC_L1
Definition: linux-nvml.h:24
#define HAS_FEATURE(features, query)
Definition: linux-nvml.h:17
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
#define GPU_UTILIZATION
Definition: linux-nvml.h:28
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:23
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
#define LOCAL_ECC_L2
Definition: linux-nvml.h:25
const char * names[NUM_EVENTS]
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:20

Here is the call graph for this function:

Here is the caller graph for this function:

static int detectDevices ( )
static

Definition at line 479 of file linux-nvml.c.

480 {
481  nvmlReturn_t ret;
482  nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
483  nvmlDevice_t handle;
484  nvmlPciInfo_t info;
485 
486  cudaError_t cuerr;
487 
488  char busId[16];
489  char name[64];
490  char inforomECC[16];
491  char inforomPower[16];
492  char names[device_count][64];
493  char nvml_busIds[device_count][16];
494 
495  float ecc_version = 0.0, power_version = 0.0;
496 
497  int i = 0,
498  j = 0;
499  int isTesla = 0;
500  int isFermi = 0;
501  int isUnique = 1;
502 
503  unsigned int temp = 0;
504 
505 
506  /* list of nvml pci_busids */
507  for (i=0; i < device_count; i++) {
508  ret = (*nvmlDeviceGetHandleByIndexPtr)( i, &handle );
509  if ( NVML_SUCCESS != ret ) {
510  SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
511  return PAPI_ESYS;
512  }
513 
514  ret = (*nvmlDeviceGetPciInfoPtr)( handle, &info );
515  if ( NVML_SUCCESS != ret ) {
516  SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", (*nvmlErrorStringPtr)(ret) );
517  return PAPI_ESYS;
518  }
519  strncpy(nvml_busIds[i], info.busId, sizeof(nvml_busIds[i])-1);
520  nvml_busIds[i][sizeof(nvml_busIds[i])-1] = '\0';
521  }
522 
523  /* We want to key our list of nvmlDevice_ts by each device's cuda index */
524  for (i=0; i < device_count; i++) {
525  cuerr = (*cudaDeviceGetPCIBusIdPtr)( busId, 16, i );
526  if ( CUDA_SUCCESS != cuerr ) {
527  SUBDBG("cudaDeviceGetPCIBusId failed.\n");
528  return PAPI_ESYS;
529  }
530  for (j=0; j < device_count; j++ ) {
531  if ( !strncmp( busId, nvml_busIds[j], 16) ) {
532  ret = (*nvmlDeviceGetHandleByIndexPtr)(j, &devices[i] );
533  if ( NVML_SUCCESS != ret ) {
534  SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
535  return PAPI_ESYS;
536  }
537  break;
538  }
539  }
540  }
541 
542  memset(names, 0x0, device_count*64);
543  /* So for each card, check whats querable */
544  for (i=0; i < device_count; i++ ) {
545  isTesla=0;
546  isFermi=1;
547  isUnique = 1;
548  features[i] = 0;
549 
550  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, sizeof(name)-1 );
551  if ( NVML_SUCCESS != ret) {
552  SUBDBG("nvmlDeviceGetName failed \n");
553  return PAPI_ESYS;
554  }
555 
556  name[sizeof(name)-1] = '\0'; // to safely use strstr operation below, the variable 'name' must be null terminated
557 
558  for (j=0; j < i; j++ )
559  if ( 0 == strncmp( name, names[j], 64 ) ) {
560  /* if we have a match, and IF everything is sane,
561  * devices with the same name eg Tesla C2075 share features */
562  isUnique = 0;
563  features[i] = features[j];
564 
565  }
566 
567  if ( isUnique ) {
568  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
569  if ( NVML_SUCCESS != ret ) {
570  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
571  isFermi = 0;
572  }
573  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
574  if ( NVML_SUCCESS != ret ) {
575  /* This implies the card is older then Fermi */
576  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
577  SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
578  isFermi = 0;
579  }
580 
581  ecc_version = strtof(inforomECC, NULL );
582  power_version = strtof( inforomPower, NULL);
583 
584  isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;
585 
586  /* For Tesla and Quadro products from Fermi and Kepler families. */
587  if ( isFermi ) {
589  num_events += 3;
590  }
591 
592  /* For Tesla and Quadro products from Fermi and Kepler families.
593  requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
594  requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
595  requires ECC mode to be enabled. */
596  ret = (*nvmlDeviceGetEccModePtr)( devices[i], &mode, NULL );
597  if ( NVML_SUCCESS == ret ) {
598  if ( NVML_FEATURE_ENABLED == mode) {
599  if ( ecc_version >= 2.0 ) {
601  num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
602  }
603  if ( ecc_version >= 1.0 ) {
605  num_events += 2; /* single bit errors, double bit errors */
606  }
607  }
608  } else {
609  SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml\
610 return code %d)\n", ret);
611  }
612 
613  /* For all discrete products with dedicated fans */
615  num_events++;
616 
617  /* For Tesla and Quadro products from Fermi and Kepler families. */
618  if ( isFermi ) {
620  num_events += 3;
621  }
622 
623  /* For all products */
625  num_events += 3; /* total, free, used */
626 
627  /* For Tesla and Quadro products from the Fermi and Kepler families. */
628  if ( isFermi ) {
630  num_events++;
631  }
632 
633  /* For "GF11x" Tesla and Quadro products from the Fermi family
634  requires NVML_INFOROM_POWER 3.0 or higher
635  For Tesla and Quadro products from the Kepler family
636  does not require NVML_INFOROM_POWER */
637  /* Just try reading power, if it works, enable it*/
638  ret = (*nvmlDeviceGetPowerUsagePtr)( devices[i], &temp);
639  if ( NVML_SUCCESS == ret ) {
641  num_events++;
642  } else {
643  SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on\
644 this card. (nvml return code %d)\n", ret );
645  }
646 
647  /* For all discrete and S-class products. */
648  features[i] |= FEATURE_TEMP;
649  num_events++;
650 
651  /* For Tesla and Quadro products from the Fermi and Kepler families */
652  if (isFermi) {
654  num_events += 2;
655  }
656 
657  strncpy( names[i], name, sizeof(names[0])-1);
658  names[i][sizeof(names[0])-1] = '\0';
659  }
660  }
661  return PAPI_OK;
662 }
memset(eventId, 0, size)
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
static int num_events
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
return PAPI_OK
Definition: linux-nvml.c:458
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
int i
Definition: fileop.c:140
static int device_count
Definition: linux-nvml.c:147
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
#define PAPI_ESYS
Definition: papi.h:253
#define FEATURE_TEMP
Definition: linux-nvml.h:13
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
nvmlDevice_t handle
Definition: linux-nvml.c:399
#define FEATURE_POWER
Definition: linux-nvml.h:12
char * name
Definition: iozone.c:23648
int temp
Definition: iozone.c:22158
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
const char * names[NUM_EVENTS]

Here is the call graph for this function:

Here is the caller graph for this function:

unsigned long long getClockSpeed ( nvmlDevice_t  dev,
nvmlClockType_t  which_one 
)

Definition at line 156 of file linux-nvml.c.

157 {
158  unsigned int ret = 0;
159  nvmlReturn_t bad;
160  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
161 
162  if ( NVML_SUCCESS != bad ) {
163  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
164  }
165 
166  return (unsigned long long)ret;
167 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getEccLocalErrors ( nvmlDevice_t  dev,
nvmlEccBitType_t  bits,
int  which_one 
)

Definition at line 170 of file linux-nvml.c.

171 {
172  nvmlEccErrorCounts_t counts;
173 
174  nvmlReturn_t bad;
175  bad = (*nvmlDeviceGetDetailedEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
176 
177  if ( NVML_SUCCESS != bad ) {
178  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
179  }
180 
181 
182  switch ( which_one ) {
183  case LOCAL_ECC_REGFILE:
184  return counts.registerFile;
185  case LOCAL_ECC_L1:
186  return counts.l1Cache;
187  case LOCAL_ECC_L2:
188  return counts.l2Cache;
189  case LOCAL_ECC_MEM:
190  return counts.deviceMemory;
191  default:
192  ;
193  }
194  return (unsigned long long)-1;
195 }
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:26
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define LOCAL_ECC_L1
Definition: linux-nvml.h:24
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:23
#define LOCAL_ECC_L2
Definition: linux-nvml.h:25

Here is the caller graph for this function:

unsigned long long getFanSpeed ( nvmlDevice_t  dev)

Definition at line 198 of file linux-nvml.c.

199 {
200  unsigned int ret = 0;
201  nvmlReturn_t bad;
202  bad = (*nvmlDeviceGetFanSpeedPtr)( dev, &ret );
203 
204  if ( NVML_SUCCESS != bad ) {
205  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
206  }
207 
208 
209  return (unsigned long long)ret;
210 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getMaxClockSpeed ( nvmlDevice_t  dev,
nvmlClockType_t  which_one 
)

Definition at line 213 of file linux-nvml.c.

214 {
215  unsigned int ret = 0;
216  nvmlReturn_t bad;
217  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
218 
219  if ( NVML_SUCCESS != bad ) {
220  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
221  }
222 
223 
224  return (unsigned long long) ret;
225 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getMemoryInfo ( nvmlDevice_t  dev,
int  which_one 
)

Definition at line 228 of file linux-nvml.c.

229 {
230  nvmlMemory_t meminfo;
231  nvmlReturn_t bad;
232  bad = (*nvmlDeviceGetMemoryInfoPtr)( dev, &meminfo );
233 
234  if ( NVML_SUCCESS != bad ) {
235  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
236  }
237 
238  switch (which_one) {
240  return meminfo.total;
241  case MEMINFO_UNALLOCED:
242  return meminfo.free;
243  case MEMINFO_ALLOCED:
244  return meminfo.used;
245  default:
246  ;
247  }
248  return (unsigned long long)-1;
249 }
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:19
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:21
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:20

Here is the caller graph for this function:

unsigned long long getPowerUsage ( nvmlDevice_t  dev)

Definition at line 308 of file linux-nvml.c.

309 {
310  unsigned int power;
311  nvmlReturn_t bad;
312  bad = (*nvmlDeviceGetPowerUsagePtr)( dev, &power );
313 
314  if ( NVML_SUCCESS != bad ) {
315  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
316  }
317 
318 
319  return (unsigned long long) power;
320 }
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getPState ( nvmlDevice_t  dev)

Definition at line 252 of file linux-nvml.c.

253 {
254  unsigned int ret = 0;
255  nvmlPstates_t state = NVML_PSTATE_15;
256  nvmlReturn_t bad;
257  bad = (*nvmlDeviceGetPerformanceStatePtr)( dev, &state );
258 
259  if ( NVML_SUCCESS != bad ) {
260  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
261  }
262 
263 
264  switch ( state ) {
265  case NVML_PSTATE_15:
266  ret++;
267  case NVML_PSTATE_14:
268  ret++;
269  case NVML_PSTATE_13:
270  ret++;
271  case NVML_PSTATE_12:
272  ret++;
273  case NVML_PSTATE_11:
274  ret++;
275  case NVML_PSTATE_10:
276  ret++;
277  case NVML_PSTATE_9:
278  ret++;
279  case NVML_PSTATE_8:
280  ret++;
281  case NVML_PSTATE_7:
282  ret++;
283  case NVML_PSTATE_6:
284  ret++;
285  case NVML_PSTATE_5:
286  ret++;
287  case NVML_PSTATE_4:
288  ret++;
289  case NVML_PSTATE_3:
290  ret++;
291  case NVML_PSTATE_2:
292  ret++;
293  case NVML_PSTATE_1:
294  ret++;
295  case NVML_PSTATE_0:
296  break;
297  case NVML_PSTATE_UNKNOWN:
298  default:
299  /* This should never happen?
300  * The API docs just state Unknown performance state... */
301  return (unsigned long long) -1;
302  }
303 
304  return (unsigned long long)ret;
305 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
child_idents[x-1] state
Definition: iozone.c:21341

Here is the caller graph for this function:

unsigned long long getTemperature ( nvmlDevice_t  dev)

Definition at line 323 of file linux-nvml.c.

324 {
325  unsigned int ret = 0;
326  nvmlReturn_t bad;
327  bad = (*nvmlDeviceGetTemperaturePtr)( dev, NVML_TEMPERATURE_GPU, &ret );
328 
329  if ( NVML_SUCCESS != bad ) {
330  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
331  }
332 
333 
334  return (unsigned long long)ret;
335 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getTotalEccErrors ( nvmlDevice_t  dev,
nvmlEccBitType_t  bits 
)

Definition at line 338 of file linux-nvml.c.

339 {
340  unsigned long long counts = 0;
341  nvmlReturn_t bad;
342  bad = (*nvmlDeviceGetTotalEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
343 
344  if ( NVML_SUCCESS != bad ) {
345  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
346  }
347 
348 
349  return counts;
350 }
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getUtilization ( nvmlDevice_t  dev,
int  which_one 
)

Definition at line 356 of file linux-nvml.c.

357 {
358  nvmlUtilization_t util;
359  nvmlReturn_t bad;
360  bad = (*nvmlDeviceGetUtilizationRatesPtr)( dev, &util );
361 
362  if ( NVML_SUCCESS != bad ) {
363  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
364  }
365 
366 
367  switch (which_one) {
368  case GPU_UTILIZATION:
369  return (unsigned long long) util.gpu;
370  case MEMORY_UTILIZATION:
371  return (unsigned long long) util.memory;
372  default:
373  ;
374  }
375 
376  return (unsigned long long) -1;
377 }
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:29
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define GPU_UTILIZATION
Definition: linux-nvml.h:28

Here is the caller graph for this function:

static int linkCudaLibraries ( )
static

Definition at line 998 of file linux-nvml.c.

999 {
1000  /* Attempt to guess if we were statically linked to libc, if so bail */
1001  if ( _dl_non_dynamic_init != NULL ) {
1002  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN);
1003  return PAPI_ENOSUPP;
1004  }
1005 
1006  /* Need to link in the cuda libraries, if not found disable the component */
1007  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
1008  if (!dl1)
1009  {
1010  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
1011  return ( PAPI_ENOSUPP );
1012  }
1013  cuInitPtr = dlsym(dl1, "cuInit");
1014  if (dlerror() != NULL)
1015  {
1016  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
1017  return ( PAPI_ENOSUPP );
1018  }
1019 
1020  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
1021  if (!dl2)
1022  {
1023  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
1024  return ( PAPI_ENOSUPP );
1025  }
1026  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
1027  if (dlerror() != NULL)
1028  {
1029  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
1030  return ( PAPI_ENOSUPP );
1031  }
1032  cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount");
1033  if (dlerror() != NULL)
1034  {
1035  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.",PAPI_MAX_STR_LEN);
1036  return ( PAPI_ENOSUPP );
1037  }
1038  cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId");
1039  if (dlerror() != NULL)
1040  {
1041  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.",PAPI_MAX_STR_LEN);
1042  return ( PAPI_ENOSUPP );
1043  }
1044 
1045  dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
1046  if (!dl3)
1047  {
1048  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.",PAPI_MAX_STR_LEN);
1049  return ( PAPI_ENOSUPP );
1050  }
1051  nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo");
1052  if (dlerror() != NULL)
1053  {
1054  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.",PAPI_MAX_STR_LEN);
1055  return ( PAPI_ENOSUPP );
1056  }
1057  nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString");
1058  if (dlerror() != NULL)
1059  {
1060  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.",PAPI_MAX_STR_LEN);
1061  return ( PAPI_ENOSUPP );
1062  }
1063  nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors");
1064  if (dlerror() != NULL)
1065  {
1066  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.",PAPI_MAX_STR_LEN);
1067  return ( PAPI_ENOSUPP );
1068  }
1069  nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed");
1070  if (dlerror() != NULL)
1071  {
1072  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.",PAPI_MAX_STR_LEN);
1073  return ( PAPI_ENOSUPP );
1074  }
1075  nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo");
1076  if (dlerror() != NULL)
1077  {
1078  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.",PAPI_MAX_STR_LEN);
1079  return ( PAPI_ENOSUPP );
1080  }
1081  nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState");
1082  if (dlerror() != NULL)
1083  {
1084  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.",PAPI_MAX_STR_LEN);
1085  return ( PAPI_ENOSUPP );
1086  }
1087  nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage");
1088  if (dlerror() != NULL)
1089  {
1090  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.",PAPI_MAX_STR_LEN);
1091  return ( PAPI_ENOSUPP );
1092  }
1093  nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature");
1094  if (dlerror() != NULL)
1095  {
1096  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.",PAPI_MAX_STR_LEN);
1097  return ( PAPI_ENOSUPP );
1098  }
1099  nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors");
1100  if (dlerror() != NULL)
1101  {
1102  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.",PAPI_MAX_STR_LEN);
1103  return ( PAPI_ENOSUPP );
1104  }
1105  nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates");
1106  if (dlerror() != NULL)
1107  {
1108  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.",PAPI_MAX_STR_LEN);
1109  return ( PAPI_ENOSUPP );
1110  }
1111  nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex");
1112  if (dlerror() != NULL)
1113  {
1114  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.",PAPI_MAX_STR_LEN);
1115  return ( PAPI_ENOSUPP );
1116  }
1117  nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo");
1118  if (dlerror() != NULL)
1119  {
1120  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.",PAPI_MAX_STR_LEN);
1121  return ( PAPI_ENOSUPP );
1122  }
1123  nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName");
1124  if (dlerror() != NULL)
1125  {
1126  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.",PAPI_MAX_STR_LEN);
1127  return ( PAPI_ENOSUPP );
1128  }
1129  nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion");
1130  if (dlerror() != NULL)
1131  {
1132  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.",PAPI_MAX_STR_LEN);
1133  return ( PAPI_ENOSUPP );
1134  }
1135  nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode");
1136  if (dlerror() != NULL)
1137  {
1138  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.",PAPI_MAX_STR_LEN);
1139  return ( PAPI_ENOSUPP );
1140  }
1141  nvmlInitPtr = dlsym(dl3, "nvmlInit");
1142  if (dlerror() != NULL)
1143  {
1144  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.",PAPI_MAX_STR_LEN);
1145  return ( PAPI_ENOSUPP );
1146  }
1147  nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount");
1148  if (dlerror() != NULL)
1149  {
1150  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.",PAPI_MAX_STR_LEN);
1151  return ( PAPI_ENOSUPP );
1152  }
1153  nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown");
1154  if (dlerror() != NULL)
1155  {
1156  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.",PAPI_MAX_STR_LEN);
1157  return ( PAPI_ENOSUPP );
1158  }
1159 
1160  return ( PAPI_OK );
1161 }
#define PAPI_ENOSUPP
Definition: papi.h:269
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1527
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:633
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:41
#define PAPI_MAX_STR_LEN
Definition: papi.h:463

Here is the caller graph for this function:

static void nvml_hardware_reset ( )
static

Definition at line 380 of file linux-nvml.c.

381 {
382  /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while
383  * possible to implement a reset on the ECC counters, we pass */
384  /*
385  int i;
386  for ( i=0; i < device_count; i++ )
387  nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC );
388  */
389 }

Here is the caller graph for this function:

switch ( entry->  type)

Definition at line 416 of file linux-nvml.c.

416  {
417  case FEATURE_CLOCK_INFO:
419  (nvmlClockType_t)entry->options.clock );
420  break;
423  (nvmlEccBitType_t)entry->options.ecc_opts.bits,
424  (int)entry->options.ecc_opts.which_one);
425  break;
426  case FEATURE_FAN_SPEED:
427  *value = getFanSpeed( handle );
428  break;
429  case FEATURE_MAX_CLOCK:
431  (nvmlClockType_t)entry->options.clock );
432  break;
433  case FEATURE_MEMORY_INFO:
435  (int)entry->options.which_one );
436  break;
437  case FEATURE_PERF_STATES:
438  *value = getPState( handle );
439  break;
440  case FEATURE_POWER:
441  *value = getPowerUsage( handle );
442  break;
443  case FEATURE_TEMP:
445  break;
448  (nvmlEccBitType_t)entry->options.ecc_opts.bits );
449  break;
450  case FEATURE_UTILIZATION:
452  (int)entry->options.which_one );
453  break;
454  default:
455  return PAPI_EINVAL;
456  }
unsigned long long getPState(nvmlDevice_t dev)
Definition: linux-nvml.c:252
unsigned long long getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
Definition: linux-nvml.c:338
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
unsigned long long getPowerUsage(nvmlDevice_t dev)
Definition: linux-nvml.c:308
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
unsigned long long getMemoryInfo(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:228
struct cache_ent * entry
Definition: libasync.c:1170
return PAPI_EINVAL
Definition: linux-nvml.c:408
void double value
Definition: iozone.c:18781
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
unsigned long long getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
Definition: linux-nvml.c:170
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
#define FEATURE_TEMP
Definition: linux-nvml.h:13
nvmlDevice_t handle
Definition: linux-nvml.c:399
#define FEATURE_POWER
Definition: linux-nvml.h:12
unsigned long long getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:156
unsigned long long getTemperature(nvmlDevice_t dev)
Definition: linux-nvml.c:323
unsigned long long getFanSpeed(nvmlDevice_t dev)
Definition: linux-nvml.c:198
unsigned long long getUtilization(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:356
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
unsigned long long getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:213

Here is the call graph for this function:

Variable Documentation

void(* _dl_non_dynamic_init)(void)

Holds control flags. Usually there's one of these per event-set. Usually this is out-of band configuration of the hardware

< Copy of counts, holds results when stopped

Definition at line 39 of file linux-nvml.c.

131 {
132  int num_events;
133  int which_counter[NVML_MAX_COUNTERS];
134  long long counter[NVML_MAX_COUNTERS];
static int num_events
#define NVML_MAX_COUNTERS
nvml_control_state_t
Definition: linux-nvml.c:135
papi_vector_t _nvml_vector

Vector that points to entry points for our component

Definition at line 1527 of file linux-nvml.c.

(*) cudaGetDevicePtr cudaIdx) = -1

Definition at line 400 of file linux-nvml.c.

int device_count = 0
static

Number of devices detected at component_init time

Definition at line 147 of file linux-nvml.c.

nvmlDevice_t* devices =NULL
static

Definition at line 152 of file linux-nvml.c.

entry = &nvml_native_table[which_one]

Definition at line 402 of file linux-nvml.c.

int* features =NULL
static

Definition at line 153 of file linux-nvml.c.

handle = devices[cudaIdx]

Definition at line 399 of file linux-nvml.c.

int
static
Initial value:

Code that reads event values.

Definition at line 397 of file linux-nvml.c.

int num_events = 0
static

number of events in the table

Definition at line 150 of file linux-nvml.c.

nvml_control_state_t

Definition at line 135 of file linux-nvml.c.

nvml_native_event_entry_t* nvml_native_table =NULL
static

This table contains the native events

Definition at line 144 of file linux-nvml.c.

return PAPI_EINVAL

Definition at line 408 of file linux-nvml.c.

return PAPI_OK

Definition at line 458 of file linux-nvml.c.

* value = (long long) -1

Definition at line 403 of file linux-nvml.c.