PAPI  5.3.2.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-nvml.c File Reference

This is an NVML component, it demos the component interface and implements two counters nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature from Nvidia Management Library. Please refer to NVML documentation for details about nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature. Power is reported in mW and temperature in Celcius. More...

Include dependency graph for linux-nvml.c:

Go to the source code of this file.

Data Structures

struct  nvml_context_t
 

Macros

#define CUDAAPI   __attribute__((weak))
 
#define CUDARTAPI   __attribute__((weak))
 
#define DECLDIR   __attribute__((weak))
 
#define NVML_MAX_COUNTERS   100
 

Functions

unsigned long long getClockSpeed (nvmlDevice_t dev, nvmlClockType_t which_one)
 
unsigned long long getEccLocalErrors (nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
 
unsigned long long getFanSpeed (nvmlDevice_t dev)
 
unsigned long long getMaxClockSpeed (nvmlDevice_t dev, nvmlClockType_t which_one)
 
unsigned long long getMemoryInfo (nvmlDevice_t dev, int which_one)
 
unsigned long long getPState (nvmlDevice_t dev)
 
unsigned long long getPowerUsage (nvmlDevice_t dev)
 
unsigned long long getTemperature (nvmlDevice_t dev)
 
unsigned long long getTotalEccErrors (nvmlDevice_t dev, nvmlEccBitType_t bits)
 
unsigned long long getUtilization (nvmlDevice_t dev, int which_one)
 
static void nvml_hardware_reset ()
 
 switch (entry->type)
 
int _papi_nvml_init_thread (hwd_context_t *ctx)
 
static int detectDevices ()
 
static void createNativeEvents ()
 
int _papi_nvml_init_component (int cidx)
 
static int linkCudaLibraries ()
 
int _papi_nvml_init_control_state (hwd_control_state_t *ctl)
 
int _papi_nvml_update_control_state (hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
 
int _papi_nvml_start (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_stop (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_read (hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
 
int _papi_nvml_write (hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events)
 
int _papi_nvml_reset (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_shutdown_component ()
 
int _papi_nvml_shutdown_thread (hwd_context_t *ctx)
 
int _papi_nvml_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
int _papi_nvml_set_domain (hwd_control_state_t *cntrl, int domain)
 
int _papi_nvml_ntv_enum_events (unsigned int *EventCode, int modifier)
 
int _papi_nvml_ntv_code_to_name (unsigned int EventCode, char *name, int len)
 
int _papi_nvml_ntv_code_to_descr (unsigned int EventCode, char *descr, int len)
 
int _papi_nvml_ntv_code_to_info (unsigned int EventCode, PAPI_event_info_t *info)
 

Variables

void(* _dl_non_dynamic_init )(void)
 
 nvml_control_state_t
 
static nvml_native_event_entry_tnvml_native_table =NULL
 
static int device_count = 0
 
static int num_events = 0
 
static nvmlDevice_t * devices =NULL
 
static intfeatures =NULL
 
static int
 
nvmlDevice_t handle = devices[cudaIdx]
 
int cudaIdx = -1
 
 entry = &nvml_native_table[which_one]
 
value = (long long) -1
 
return PAPI_EINVAL
 
return PAPI_OK
 
papi_vector_t _nvml_vector
 

Detailed Description

Author
Kiran Kumar Kasichayanula kkasi.nosp@m.cha@.nosp@m.utk.e.nosp@m.du
James Ralph ralph.nosp@m.@eec.nosp@m.s.utk.nosp@m..edu

Definition in file linux-nvml.c.

Macro Definition Documentation

#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define DECLDIR   __attribute__((weak))
#define NVML_MAX_COUNTERS   100

Function Documentation

int _papi_nvml_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

This function sets various options in the component

Parameters
codevalid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT

Definition at line 1354 of file linux-nvml.c.

1355 {
1356  SUBDBG( "Enter: ctx: %p, code: %d\n", ctx, code );
1357 
1358  (void) ctx;
1359  (void) code;
1360  (void) option;
1361 
1362 
1363  /* FIXME. This should maybe set up more state, such as which counters are active and */
1364  /* counter mappings. */
1365 
1366  return PAPI_OK;
1367 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_init_component ( int  cidx)

Initialize hardware counters, setup the function vector table and get hardware information, this routine is called when the PAPI process is initialized (IE PAPI_library_init)

Definition at line 905 of file linux-nvml.c.

906 {
907  SUBDBG ("Entry: cidx: %d\n", cidx);
908  nvmlReturn_t ret;
909  cudaError_t cuerr;
910  int papi_errorcode;
911 
912  int cuda_count = 0;
913  unsigned int nvml_count = 0;
914 
915  /* link in the cuda and nvml libraries and resolve the symbols we need to use */
916  if (linkCudaLibraries() != PAPI_OK) {
917  SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
918  SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
919  return (PAPI_ENOSUPP);
920  }
921 
922  ret = (*nvmlInitPtr)();
923  if ( NVML_SUCCESS != ret ) {
924  strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
925  return PAPI_ENOSUPP;
926  }
927 
928  cuerr = (*cuInitPtr)( 0 );
929  if ( CUDA_SUCCESS != cuerr ) {
930  strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
931  return PAPI_ENOSUPP;
932  }
933 
934  /* Figure out the number of CUDA devices in the system */
935  ret = (*nvmlDeviceGetCountPtr)( &nvml_count );
936  if ( NVML_SUCCESS != ret ) {
937  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
938  return PAPI_ENOSUPP;
939  }
940 
941  cuerr = (*cudaGetDeviceCountPtr)( &cuda_count );
942  if ( CUDA_SUCCESS != cuerr ) {
943  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
944  return PAPI_ENOSUPP;
945  }
946 
947  /* We can probably recover from this, when we're clever */
948  if ( (cuda_count > 0) && (nvml_count != (unsigned int)cuda_count ) ) {
949  strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts.");
950  return PAPI_ENOSUPP;
951  }
952 
953  device_count = cuda_count;
954 
955  /* A per device representation of what events are present */
956  features = (int*)papi_malloc(sizeof(int) * device_count );
957 
958  /* Handles to each device */
959  devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
960 
961  /* Figure out what events are supported on each card. */
962  if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) {
965  sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." );
966  return PAPI_ENOSUPP;
967  }
968 
969  /* The assumption is that if everything went swimmingly in detectDevices,
970  all nvml calls here should be fine. */
972 
973  /* Export the total number of events available */
975 
976  /* Export the component id */
978 
979  /* Export the number of 'counters' */
982 
983  return PAPI_OK;
984 }
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
static int linkCudaLibraries()
Definition: linux-nvml.c:994
#define papi_free(a)
Definition: papi_memory.h:35
#define papi_malloc(a)
Definition: papi_memory.h:34
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1523
long long ret
Definition: iozone.c:1346
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:632
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define PAPI_ENOSUPP
Definition: fpapi.h:123
static int device_count
Definition: linux-nvml.c:147
static int cidx
Definition: event_info.c:40
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
strcpy(filename, default_filename)
static int detectDevices()
Definition: linux-nvml.c:479
static void createNativeEvents()
Definition: linux-nvml.c:663

Here is the call graph for this function:

int _papi_nvml_init_control_state ( hwd_control_state_t ctl)

Setup a counter control state. In general a control state holds the hardware info for an EventSet.

Definition at line 1166 of file linux-nvml.c.

1167 {
1168  SUBDBG( "nvml_init_control_state... %p\n", ctl );
1169  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1170  memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) );
1171 
1172  return PAPI_OK;
1173 }
memset(eventId, 0, size)
return PAPI_OK
Definition: linux-nvml.c:458
nvml_control_state_t
Definition: linux-nvml.c:135
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the call graph for this function:

int _papi_nvml_init_thread ( hwd_context_t ctx)

This is called whenever a thread is initialized

Definition at line 469 of file linux-nvml.c.

470 {
471  (void) ctx;
472 
473  SUBDBG( "Enter: ctx: %p\n", ctx );
474 
475  return PAPI_OK;
476 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_ntv_code_to_descr ( unsigned int  EventCode,
char *  descr,
int  len 
)

Takes a native event code and passes back the event description

Parameters
EventCodeis the native event code
descris a pointer for the description to be copied to
lenis the size of the descr string

Definition at line 1484 of file linux-nvml.c.

1485 {
1486  int index;
1487  index = EventCode;
1488 
1489  if (index >= num_events) return PAPI_ENOEVNT;
1490 
1491  strncpy( descr, nvml_native_table[index].description, len );
1492 
1493  return PAPI_OK;
1494 }
#define PAPI_ENOEVNT
Definition: fpapi.h:112
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
char description[PAPI_MAX_STR_LEN]
int _papi_nvml_ntv_code_to_info ( unsigned int  EventCode,
PAPI_event_info_t info 
)

Takes a native event code and passes back the event info

Parameters
EventCodeis the native event code
infois a pointer for the info to be copied to

Definition at line 1501 of file linux-nvml.c.

1502 {
1503 
1504  int index = EventCode;
1505 
1506  if ( ( index < 0) || (index >= num_events )) return PAPI_ENOEVNT;
1507 
1508  strncpy( info->symbol, nvml_native_table[index].name,
1509  sizeof(info->symbol));
1510 
1511  strncpy( info->units, nvml_native_table[index].units,
1512  sizeof(info->units));
1513 
1514  strncpy( info->long_descr, nvml_native_table[index].description,
1515  sizeof(info->symbol));
1516 
1517 // info->data_type = nvml_native_table[index].return_type;
1518 
1519  return PAPI_OK;
1520 }
#define PAPI_ENOEVNT
Definition: fpapi.h:112
static int num_events
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:964
char symbol[PAPI_HUGE_STR_LEN]
Definition: papi.h:961
return PAPI_OK
Definition: linux-nvml.c:458
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:48
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
char units[PAPI_MIN_STR_LEN]
Definition: linux-nvml.h:49
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:50
char units[PAPI_MIN_STR_LEN]
Definition: papi.h:970
int _papi_nvml_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)

Takes a native event code and passes back the name

Parameters
EventCodeis the native event code
nameis a pointer for the name to be copied to
lenis the size of the name string

Definition at line 1463 of file linux-nvml.c.

1464 {
1465  SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len);
1466  int index;
1467 
1468  index = EventCode;
1469 
1470  /* Make sure we are in range */
1471  if (index >= num_events) return PAPI_ENOEVNT;
1472 
1473  strncpy( name, nvml_native_table[index].name, len );
1474 
1475  return PAPI_OK;
1476 }
#define PAPI_ENOEVNT
Definition: fpapi.h:112
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
char * name
Definition: iozone.c:23648
int _papi_nvml_ntv_enum_events ( unsigned int EventCode,
int  modifier 
)

Enumerate Native Events

Parameters
EventCodeis the event of interest
modifieris one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS If your component has attribute masks then these need to be handled here as well.

Definition at line 1422 of file linux-nvml.c.

1423 {
1424  int index;
1425 
1426  switch ( modifier ) {
1427 
1428  /* return EventCode of first event */
1429  case PAPI_ENUM_FIRST:
1430  /* return the first event that we support */
1431 
1432  *EventCode = 0;
1433  return PAPI_OK;
1434 
1435  /* return EventCode of next available event */
1436  case PAPI_ENUM_EVENTS:
1437  index = *EventCode;
1438 
1439  /* Make sure we are in range */
1440  if ( index < num_events - 1 ) {
1441 
1442  /* This assumes a non-sparse mapping of the events */
1443  *EventCode = *EventCode + 1;
1444  return PAPI_OK;
1445  } else {
1446  return PAPI_ENOEVNT;
1447  }
1448  break;
1449 
1450  default:
1451  return PAPI_EINVAL;
1452  }
1453 
1454  return PAPI_EINVAL;
1455 }
#define PAPI_ENOEVNT
Definition: fpapi.h:112
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
return PAPI_EINVAL
Definition: linux-nvml.c:408
int _papi_nvml_read ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long **  events,
int  flags 
)

Triggered by PAPI_read()

Definition at line 1248 of file linux-nvml.c.

1250 {
1251  SUBDBG( "Enter: ctx: %p, flags: %d\n", ctx, flags );
1252 
1253  (void) ctx;
1254  (void) flags;
1255  int i;
1256  int ret;
1257  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1258 
1259 
1260  for (i=0;i<nvml_ctl->num_events;i++) {
1261  if ( PAPI_OK !=
1262  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1263  nvml_ctl->which_counter[i]) ))
1264  return ret;
1265 
1266  }
1267  /* return pointer to the values we read */
1268  *events = nvml_ctl->counter;
1269  return PAPI_OK;
1270 }
long long flags
Definition: iozone.c:12330
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
nvml_control_state_t
Definition: linux-nvml.c:135
long long ret
Definition: iozone.c:1346
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
char events[MAX_EVENTS][BUFSIZ]
int _papi_nvml_reset ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_reset() but only if the EventSet is currently running

Definition at line 1296 of file linux-nvml.c.

1297 {
1298  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1299 
1300  (void) ctx;
1301  (void) ctl;
1302 
1303  /* Reset the hardware */
1305 
1306  return PAPI_OK;
1307 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static void nvml_hardware_reset()
Definition: linux-nvml.c:380

Here is the call graph for this function:

int _papi_nvml_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

This function has to set the bits needed to count different domains In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER By default return PAPI_EINVAL if none of those are specified and PAPI_OK with success PAPI_DOM_USER is only user context is counted PAPI_DOM_KERNEL is only the Kernel/OS context is counted PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) PAPI_DOM_ALL is all of the domains

Definition at line 1379 of file linux-nvml.c.

1380 {
1381  SUBDBG( "Enter: cntrl: %p, domain: %d\n", cntrl, domain );
1382 
1383  (void) cntrl;
1384 
1385  int found = 0;
1386 
1387  if ( PAPI_DOM_USER & domain ) {
1388  SUBDBG( " PAPI_DOM_USER \n" );
1389  found = 1;
1390  }
1391  if ( PAPI_DOM_KERNEL & domain ) {
1392  SUBDBG( " PAPI_DOM_KERNEL \n" );
1393  found = 1;
1394  }
1395  if ( PAPI_DOM_OTHER & domain ) {
1396  SUBDBG( " PAPI_DOM_OTHER \n" );
1397  found = 1;
1398  }
1399  if ( PAPI_DOM_ALL & domain ) {
1400  SUBDBG( " PAPI_DOM_ALL \n" );
1401  found = 1;
1402  }
1403  if ( !found )
1404  return ( PAPI_EINVAL );
1405 
1406  return PAPI_OK;
1407 }
#define PAPI_DOM_ALL
Definition: fpapi.h:25
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_DOM_OTHER
Definition: fpapi.h:23
#define PAPI_DOM_KERNEL
Definition: fpapi.h:22
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
long long found
Definition: libasync.c:735
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_DOM_USER
Definition: fpapi.h:21
int _papi_nvml_shutdown_component ( )

Triggered by PAPI_shutdown()

Definition at line 1311 of file linux-nvml.c.

1312 {
1313  SUBDBG( "Enter:\n" );
1314 
1315  if (nvml_native_table != NULL)
1317  if (devices != NULL)
1318  papi_free(devices);
1319  if (features != NULL)
1321 
1322  (*nvmlShutdownPtr)();
1323 
1324  device_count = 0;
1325  num_events = 0;
1326 
1327  // close the dynamic libraries needed by this component (opened in the init component call)
1328  dlclose(dl1);
1329  dlclose(dl2);
1330  dlclose(dl3);
1331 
1332  return PAPI_OK;
1333 }
#define papi_free(a)
Definition: papi_memory.h:35
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
static int device_count
Definition: linux-nvml.c:147
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
int _papi_nvml_shutdown_thread ( hwd_context_t ctx)

Called at thread shutdown

Definition at line 1337 of file linux-nvml.c.

1338 {
1339  SUBDBG( "Enter: ctx: %p\n", ctx );
1340 
1341  (void) ctx;
1342 
1343  /* Last chance to clean up thread */
1344 
1345  return PAPI_OK;
1346 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_start ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_start()

Definition at line 1205 of file linux-nvml.c.

1206 {
1207  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1208 
1209  (void) ctx;
1210  (void) ctl;
1211 
1212  /* anything that would need to be set at counter start time */
1213 
1214  /* reset */
1215  /* start the counting */
1216 
1217  return PAPI_OK;
1218 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_stop ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_stop()

Definition at line 1223 of file linux-nvml.c.

1224 {
1225  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1226 
1227  int i;
1228  (void) ctx;
1229  (void) ctl;
1230  int ret;
1231 
1232  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1233 
1234  for (i=0;i<nvml_ctl->num_events;i++) {
1235  if ( PAPI_OK !=
1236  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1237  nvml_ctl->which_counter[i]) ))
1238  return ret;
1239 
1240  }
1241 
1242  return PAPI_OK;
1243 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
nvml_control_state_t
Definition: linux-nvml.c:135
long long ret
Definition: iozone.c:1346
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_update_control_state ( hwd_control_state_t ctl,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
)

Triggered by eventset operations like add or remove

Definition at line 1178 of file linux-nvml.c.

1182 {
1183  SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx );
1184  int i, index;
1185 
1186  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1187  (void) ctx;
1188 
1189 
1190  /* if no events, return */
1191  if (count==0) return PAPI_OK;
1192 
1193  for( i = 0; i < count; i++ ) {
1194  index = native[i].ni_event;
1195  nvml_ctl->which_counter[i]=index;
1196  /* We have no constraints on event position, so any event */
1197  /* can be in any slot. */
1198  native[i].ni_position = i;
1199  }
1200  nvml_ctl->num_events=count;
1201  return PAPI_OK;
1202 }
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
void
Definition: iozone.c:18627
nvml_control_state_t
Definition: linux-nvml.c:135
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_write ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long events 
)

Triggered by PAPI_write(), but only if the counters are running

Definition at line 1275 of file linux-nvml.c.

1277 {
1278  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1279 
1280  (void) ctx;
1281  (void) ctl;
1282  (void) events;
1283 
1284 
1285  /* You can change ECC mode and compute exclusivity modes on the cards */
1286  /* But I don't see this as a function of a PAPI component at this time */
1287  /* All implementation issues aside. */
1288  return PAPI_OK;
1289 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
char events[MAX_EVENTS][BUFSIZ]
static void createNativeEvents ( void  )
static

Definition at line 663 of file linux-nvml.c.

664 {
665  char name[64];
666  char sanitized_name[PAPI_MAX_STR_LEN];
667  char names[device_count][64];
668 
669  int i, nameLen = 0, j;
670  int isUnique = 1;
671 
673  nvmlReturn_t ret;
674 
678  entry = &nvml_native_table[0];
679 
680  for (i=0; i < device_count; i++ ) {
681  memset( names[i], 0x0, 64 );
682  isUnique = 1;
683  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
684 
685  for (j=0; j < i; j++ )
686  {
687  if ( 0 == strncmp( name, names[j], 64 ) )
688  isUnique = 0;
689  }
690 
691  if ( isUnique ) {
692  nameLen = strlen(name);
693  strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
694  for (j=0; j < nameLen; j++)
695  if ( ' ' == sanitized_name[j] )
696  sanitized_name[j] = '_';
697 
698 
699 
700  if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
701  sprintf( entry->name, "%s:graphics_clock", sanitized_name );
702  strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
703  entry->options.clock = NVML_CLOCK_GRAPHICS;
704  entry->type = FEATURE_CLOCK_INFO;
705  entry++;
706 
707  sprintf( entry->name, "%s:sm_clock", sanitized_name);
708  strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
709  entry->options.clock = NVML_CLOCK_SM;
710  entry->type = FEATURE_CLOCK_INFO;
711  entry++;
712 
713  sprintf( entry->name, "%s:memory_clock", sanitized_name);
714  strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
715  entry->options.clock = NVML_CLOCK_MEM;
716  entry->type = FEATURE_CLOCK_INFO;
717  entry++;
718  }
719 
721  sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
722  strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
723  entry->options.ecc_opts = (struct local_ecc){
724  .bits = NVML_SINGLE_BIT_ECC,
725  .which_one = LOCAL_ECC_L1,
726  };
728  entry++;
729 
730  sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
731  strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
732  entry->options.ecc_opts = (struct local_ecc){
733  .bits = NVML_SINGLE_BIT_ECC,
734  .which_one = LOCAL_ECC_L2,
735  };
737  entry++;
738 
739  sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
740  strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
741  entry->options.ecc_opts = (struct local_ecc){
742  .bits = NVML_SINGLE_BIT_ECC,
743  .which_one = LOCAL_ECC_MEM,
744  };
746  entry++;
747 
748  sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
749  strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
750  entry->options.ecc_opts = (struct local_ecc){
751  .bits = NVML_SINGLE_BIT_ECC,
752  .which_one = LOCAL_ECC_REGFILE,
753  };
755  entry++;
756 
757  sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
758  strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
759  entry->options.ecc_opts = (struct local_ecc){
760  .bits = NVML_DOUBLE_BIT_ECC,
761  .which_one = LOCAL_ECC_L1,
762  };
764  entry++;
765 
766  sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
767  strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
768  entry->options.ecc_opts = (struct local_ecc){
769  .bits = NVML_DOUBLE_BIT_ECC,
770  .which_one = LOCAL_ECC_L2,
771  };
773  entry++;
774 
775  sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
776  strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
777  entry->options.ecc_opts = (struct local_ecc){
778  .bits = NVML_DOUBLE_BIT_ECC,
779  .which_one = LOCAL_ECC_MEM,
780  };
782  entry++;
783 
784  sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
785  strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
786  entry->options.ecc_opts = (struct local_ecc){
787  .bits = NVML_DOUBLE_BIT_ECC,
788  .which_one = LOCAL_ECC_REGFILE,
789  };
791  entry++;
792  }
793 
794  if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
795  sprintf( entry->name, "%s:fan_speed", sanitized_name);
796  strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
797  entry->type = FEATURE_FAN_SPEED;
798  entry++;
799  }
800 
801  if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
802  sprintf( entry->name, "%s:graphics_max_clock", sanitized_name);
803  strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
804  entry->options.clock = NVML_CLOCK_GRAPHICS;
805  entry->type = FEATURE_MAX_CLOCK;
806  entry++;
807 
808  sprintf( entry->name, "%s:sm_max_clock", sanitized_name);
809  strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
810  entry->options.clock = NVML_CLOCK_SM;
811  entry->type = FEATURE_MAX_CLOCK;
812  entry++;
813 
814  sprintf( entry->name, "%s:memory_max_clock", sanitized_name);
815  strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
816  entry->options.clock = NVML_CLOCK_MEM;
817  entry->type = FEATURE_MAX_CLOCK;
818  entry++;
819  }
820 
822  sprintf( entry->name, "%s:total_memory", sanitized_name);
823  strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
825  entry->type = FEATURE_MEMORY_INFO;
826  entry++;
827 
828  sprintf( entry->name, "%s:unallocated_memory", sanitized_name);
829  strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
831  entry->type = FEATURE_MEMORY_INFO;
832  entry++;
833 
834  sprintf( entry->name, "%s:allocated_memory", sanitized_name);
835  strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
837  entry->type = FEATURE_MEMORY_INFO;
838  entry++;
839  }
840 
842  sprintf( entry->name, "%s:pstate", sanitized_name);
843  strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
844  entry->type = FEATURE_PERF_STATES;
845  entry++;
846  }
847 
848  if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
849  sprintf( entry->name, "%s:power", sanitized_name);
850  // set the power event units value to "mW" for miliwatts
851  strncpy( entry->units, "mW",PAPI_MIN_STR_LEN);
852  strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw (+/-5 watts) for the entire board: GPU, memory, etc.", PAPI_MAX_STR_LEN);
853  entry->type = FEATURE_POWER;
854  entry++;
855  }
856 
857  if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
858  sprintf( entry->name, "%s:temperature", sanitized_name);
859  strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
860  entry->type = FEATURE_TEMP;
861  entry++;
862  }
863 
865  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
866  strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
867  entry->options.ecc_opts = (struct local_ecc){
868  .bits = NVML_SINGLE_BIT_ECC,
869  };
871  entry++;
872 
873  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
874  strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
875  entry->options.ecc_opts = (struct local_ecc){
876  .bits = NVML_DOUBLE_BIT_ECC,
877  };
879  entry++;
880  }
881 
883  sprintf( entry->name, "%s:gpu_utilization", sanitized_name);
884  strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
886  entry->type = FEATURE_UTILIZATION;
887  entry++;
888 
889  sprintf( entry->name, "%s:memory_utilization", sanitized_name);
890  strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
892  entry->type = FEATURE_UTILIZATION;
893  entry++;
894  }
895  strncpy( names[i], name, 64);
896  }
897  }
898 }
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
memset(eventId, 0, size)
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
int type
Definition: linux-nvml.h:51
#define papi_malloc(a)
Definition: papi_memory.h:34
#define PAPI_MIN_STR_LEN
Definition: fpapi.h:41
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:19
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
static int num_events
struct local_ecc ecc_opts
Definition: linux-nvml.h:41
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
struct cache_ent * entry
Definition: libasync.c:1170
nvmlEccBitType_t bits
Definition: linux-nvml.h:35
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:26
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:48
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:21
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
nvml_resource_options_t options
Definition: linux-nvml.h:47
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
int i
Definition: fileop.c:140
Definition: linux-nvml.h:45
static int device_count
Definition: linux-nvml.c:147
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:29
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
nvmlClockType_t clock
Definition: linux-nvml.h:40
#define FEATURE_TEMP
Definition: linux-nvml.h:13
static int * features
Definition: linux-nvml.c:153
#define FEATURE_POWER
Definition: linux-nvml.h:12
char units[PAPI_MIN_STR_LEN]
Definition: linux-nvml.h:49
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:50
char * name
Definition: iozone.c:23648
#define LOCAL_ECC_L1
Definition: linux-nvml.h:24
#define HAS_FEATURE(features, query)
Definition: linux-nvml.h:17
#define GPU_UTILIZATION
Definition: linux-nvml.h:28
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:23
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
#define LOCAL_ECC_L2
Definition: linux-nvml.h:25
const char * names[NUM_EVENTS]
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:20

Here is the call graph for this function:

Here is the caller graph for this function:

static int detectDevices ( )
static

Definition at line 479 of file linux-nvml.c.

480 {
481  nvmlReturn_t ret;
482  nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
483  nvmlDevice_t handle;
484  nvmlPciInfo_t info;
485 
486  cudaError_t cuerr;
487 
488  char busId[16];
489  char name[64];
490  char inforomECC[16];
491  char inforomPower[16];
492  char names[device_count][64];
493  char nvml_busIds[device_count][16];
494 
495  float ecc_version = 0.0, power_version = 0.0;
496 
497  int i = 0,
498  j = 0;
499  int isTesla = 0;
500  int isFermi = 0;
501  int isUnique = 1;
502 
503  unsigned int temp = 0;
504 
505 
506  /* list of nvml pci_busids */
507  for (i=0; i < device_count; i++) {
508  ret = (*nvmlDeviceGetHandleByIndexPtr)( i, &handle );
509  if ( NVML_SUCCESS != ret ) {
510  SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
511  return PAPI_ESYS;
512  }
513 
514  ret = (*nvmlDeviceGetPciInfoPtr)( handle, &info );
515  if ( NVML_SUCCESS != ret ) {
516  SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", (*nvmlErrorStringPtr)(ret) );
517  return PAPI_ESYS;
518  }
519  strncpy(nvml_busIds[i], info.busId, 16);
520  }
521 
522  /* We want to key our list of nvmlDevice_ts by each device's cuda index */
523  for (i=0; i < device_count; i++) {
524  cuerr = (*cudaDeviceGetPCIBusIdPtr)( busId, 16, i );
525  if ( CUDA_SUCCESS != cuerr ) {
526  SUBDBG("cudaDeviceGetPCIBusId failed.\n");
527  return PAPI_ESYS;
528  }
529  for (j=0; j < device_count; j++ ) {
530  if ( !strncmp( busId, nvml_busIds[j], 16) ) {
531  ret = (*nvmlDeviceGetHandleByIndexPtr)(j, &devices[i] );
532  if ( NVML_SUCCESS != ret ) {
533  SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
534  return PAPI_ESYS;
535  }
536  break;
537  }
538  }
539  }
540 
541  memset(names, 0x0, device_count*64);
542  /* So for each card, check whats querable */
543  for (i=0; i < device_count; i++ ) {
544  isTesla=0;
545  isFermi=1;
546  isUnique = 1;
547  features[i] = 0;
548 
549  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
550  if ( NVML_SUCCESS != ret) {
551  SUBDBG("nvmlDeviceGetName failed \n");
552  return PAPI_ESYS;
553  }
554 
555  for (j=0; j < i; j++ )
556  if ( 0 == strncmp( name, names[j], 64 ) ) {
557  /* if we have a match, and IF everything is sane,
558  * devices with the same name eg Tesla C2075 share features */
559  isUnique = 0;
560  features[i] = features[j];
561 
562  }
563 
564  if ( isUnique ) {
565  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
566  if ( NVML_SUCCESS != ret ) {
567  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
568  isFermi = 0;
569  }
570  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
571  if ( NVML_SUCCESS != ret ) {
572  /* This implies the card is older then Fermi */
573  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
574  SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
575  isFermi = 0;
576  }
577 
578  ecc_version = strtof(inforomECC, NULL );
579  power_version = strtof( inforomPower, NULL);
580 
581  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
582  isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;
583 
584  /* For Tesla and Quadro products from Fermi and Kepler families. */
585  if ( isFermi ) {
587  num_events += 3;
588  }
589 
590  /* For Tesla and Quadro products from Fermi and Kepler families.
591  requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
592  requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
593  requires ECC mode to be enabled. */
594  ret = (*nvmlDeviceGetEccModePtr)( devices[i], &mode, NULL );
595  if ( NVML_SUCCESS == ret ) {
596  if ( NVML_FEATURE_ENABLED == mode) {
597  if ( ecc_version >= 2.0 ) {
599  num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
600  }
601  if ( ecc_version >= 1.0 ) {
603  num_events += 2; /* single bit errors, double bit errors */
604  }
605  }
606  } else {
607  SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml\
608 return code %d)\n", ret);
609  }
610 
611  /* For all discrete products with dedicated fans */
613  num_events++;
614 
615  /* For Tesla and Quadro products from Fermi and Kepler families. */
616  if ( isFermi ) {
618  num_events += 3;
619  }
620 
621  /* For all products */
623  num_events += 3; /* total, free, used */
624 
625  /* For Tesla and Quadro products from the Fermi and Kepler families. */
626  if ( isFermi ) {
628  num_events++;
629  }
630 
631  /* For "GF11x" Tesla and Quadro products from the Fermi family
632  requires NVML_INFOROM_POWER 3.0 or higher
633  For Tesla and Quadro products from the Kepler family
634  does not require NVML_INFOROM_POWER */
635  /* Just try reading power, if it works, enable it*/
636  ret = (*nvmlDeviceGetPowerUsagePtr)( devices[i], &temp);
637  if ( NVML_SUCCESS == ret ) {
639  num_events++;
640  } else {
641  SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on\
642 this card. (nvml return code %d)\n", ret );
643  }
644 
645  /* For all discrete and S-class products. */
646  features[i] |= FEATURE_TEMP;
647  num_events++;
648 
649  /* For Tesla and Quadro products from the Fermi and Kepler families */
650  if (isFermi) {
652  num_events += 2;
653  }
654 
655  strncpy( names[i], name, 64);
656 
657  }
658  }
659  return PAPI_OK;
660 }
memset(eventId, 0, size)
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
static int num_events
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
return PAPI_OK
Definition: linux-nvml.c:458
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
int i
Definition: fileop.c:140
static int device_count
Definition: linux-nvml.c:147
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
#define FEATURE_TEMP
Definition: linux-nvml.h:13
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
nvmlDevice_t handle
Definition: linux-nvml.c:399
#define FEATURE_POWER
Definition: linux-nvml.h:12
#define PAPI_ESYS
Definition: fpapi.h:108
char * name
Definition: iozone.c:23648
int temp
Definition: iozone.c:22158
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
const char * names[NUM_EVENTS]

Here is the call graph for this function:

Here is the caller graph for this function:

unsigned long long getClockSpeed ( nvmlDevice_t  dev,
nvmlClockType_t  which_one 
)

Definition at line 156 of file linux-nvml.c.

157 {
158  unsigned int ret = 0;
159  nvmlReturn_t bad;
160  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
161 
162  if ( NVML_SUCCESS != bad ) {
163  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
164  }
165 
166  return (unsigned long long)ret;
167 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getEccLocalErrors ( nvmlDevice_t  dev,
nvmlEccBitType_t  bits,
int  which_one 
)

Definition at line 170 of file linux-nvml.c.

171 {
172  nvmlEccErrorCounts_t counts;
173 
174  nvmlReturn_t bad;
175  bad = (*nvmlDeviceGetDetailedEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
176 
177  if ( NVML_SUCCESS != bad ) {
178  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
179  }
180 
181 
182  switch ( which_one ) {
183  case LOCAL_ECC_REGFILE:
184  return counts.registerFile;
185  case LOCAL_ECC_L1:
186  return counts.l1Cache;
187  case LOCAL_ECC_L2:
188  return counts.l2Cache;
189  case LOCAL_ECC_MEM:
190  return counts.deviceMemory;
191  default:
192  ;
193  }
194  return (unsigned long long)-1;
195 }
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:26
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define LOCAL_ECC_L1
Definition: linux-nvml.h:24
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:23
#define LOCAL_ECC_L2
Definition: linux-nvml.h:25

Here is the caller graph for this function:

unsigned long long getFanSpeed ( nvmlDevice_t  dev)

Definition at line 198 of file linux-nvml.c.

199 {
200  unsigned int ret = 0;
201  nvmlReturn_t bad;
202  bad = (*nvmlDeviceGetFanSpeedPtr)( dev, &ret );
203 
204  if ( NVML_SUCCESS != bad ) {
205  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
206  }
207 
208 
209  return (unsigned long long)ret;
210 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getMaxClockSpeed ( nvmlDevice_t  dev,
nvmlClockType_t  which_one 
)

Definition at line 213 of file linux-nvml.c.

214 {
215  unsigned int ret = 0;
216  nvmlReturn_t bad;
217  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
218 
219  if ( NVML_SUCCESS != bad ) {
220  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
221  }
222 
223 
224  return (unsigned long long) ret;
225 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getMemoryInfo ( nvmlDevice_t  dev,
int  which_one 
)

Definition at line 228 of file linux-nvml.c.

229 {
230  nvmlMemory_t meminfo;
231  nvmlReturn_t bad;
232  bad = (*nvmlDeviceGetMemoryInfoPtr)( dev, &meminfo );
233 
234  if ( NVML_SUCCESS != bad ) {
235  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
236  }
237 
238  switch (which_one) {
240  return meminfo.total;
241  case MEMINFO_UNALLOCED:
242  return meminfo.free;
243  case MEMINFO_ALLOCED:
244  return meminfo.used;
245  default:
246  ;
247  }
248  return (unsigned long long)-1;
249 }
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:19
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:21
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:20

Here is the caller graph for this function:

unsigned long long getPowerUsage ( nvmlDevice_t  dev)

Definition at line 308 of file linux-nvml.c.

309 {
310  unsigned int power;
311  nvmlReturn_t bad;
312  bad = (*nvmlDeviceGetPowerUsagePtr)( dev, &power );
313 
314  if ( NVML_SUCCESS != bad ) {
315  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
316  }
317 
318 
319  return (unsigned long long) power;
320 }
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getPState ( nvmlDevice_t  dev)

Definition at line 252 of file linux-nvml.c.

253 {
254  unsigned int ret = 0;
255  nvmlPstates_t state = NVML_PSTATE_15;
256  nvmlReturn_t bad;
257  bad = (*nvmlDeviceGetPerformanceStatePtr)( dev, &state );
258 
259  if ( NVML_SUCCESS != bad ) {
260  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
261  }
262 
263 
264  switch ( state ) {
265  case NVML_PSTATE_15:
266  ret++;
267  case NVML_PSTATE_14:
268  ret++;
269  case NVML_PSTATE_13:
270  ret++;
271  case NVML_PSTATE_12:
272  ret++;
273  case NVML_PSTATE_11:
274  ret++;
275  case NVML_PSTATE_10:
276  ret++;
277  case NVML_PSTATE_9:
278  ret++;
279  case NVML_PSTATE_8:
280  ret++;
281  case NVML_PSTATE_7:
282  ret++;
283  case NVML_PSTATE_6:
284  ret++;
285  case NVML_PSTATE_5:
286  ret++;
287  case NVML_PSTATE_4:
288  ret++;
289  case NVML_PSTATE_3:
290  ret++;
291  case NVML_PSTATE_2:
292  ret++;
293  case NVML_PSTATE_1:
294  ret++;
295  case NVML_PSTATE_0:
296  break;
297  case NVML_PSTATE_UNKNOWN:
298  default:
299  /* This should never happen?
300  * The API docs just state Unknown performance state... */
301  return (unsigned long long) -1;
302  }
303 
304  return (unsigned long long)ret;
305 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
child_idents[x-1] state
Definition: iozone.c:21341

Here is the caller graph for this function:

unsigned long long getTemperature ( nvmlDevice_t  dev)

Definition at line 323 of file linux-nvml.c.

324 {
325  unsigned int ret = 0;
326  nvmlReturn_t bad;
327  bad = (*nvmlDeviceGetTemperaturePtr)( dev, NVML_TEMPERATURE_GPU, &ret );
328 
329  if ( NVML_SUCCESS != bad ) {
330  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
331  }
332 
333 
334  return (unsigned long long)ret;
335 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getTotalEccErrors ( nvmlDevice_t  dev,
nvmlEccBitType_t  bits 
)

Definition at line 338 of file linux-nvml.c.

339 {
340  unsigned long long counts = 0;
341  nvmlReturn_t bad;
342  bad = (*nvmlDeviceGetTotalEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
343 
344  if ( NVML_SUCCESS != bad ) {
345  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
346  }
347 
348 
349  return counts;
350 }
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getUtilization ( nvmlDevice_t  dev,
int  which_one 
)

Definition at line 356 of file linux-nvml.c.

357 {
358  nvmlUtilization_t util;
359  nvmlReturn_t bad;
360  bad = (*nvmlDeviceGetUtilizationRatesPtr)( dev, &util );
361 
362  if ( NVML_SUCCESS != bad ) {
363  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
364  }
365 
366 
367  switch (which_one) {
368  case GPU_UTILIZATION:
369  return (unsigned long long) util.gpu;
370  case MEMORY_UTILIZATION:
371  return (unsigned long long) util.memory;
372  default:
373  ;
374  }
375 
376  return (unsigned long long) -1;
377 }
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:29
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define GPU_UTILIZATION
Definition: linux-nvml.h:28

Here is the caller graph for this function:

static int linkCudaLibraries ( )
static

Definition at line 994 of file linux-nvml.c.

995 {
996  /* Attempt to guess if we were statically linked to libc, if so bail */
997  if ( _dl_non_dynamic_init != NULL ) {
998  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN);
999  return PAPI_ENOSUPP;
1000  }
1001 
1002  /* Need to link in the cuda libraries, if not found disable the component */
1003  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
1004  if (!dl1)
1005  {
1006  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
1007  return ( PAPI_ENOSUPP );
1008  }
1009  cuInitPtr = dlsym(dl1, "cuInit");
1010  if (dlerror() != NULL)
1011  {
1012  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
1013  return ( PAPI_ENOSUPP );
1014  }
1015 
1016  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
1017  if (!dl2)
1018  {
1019  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
1020  return ( PAPI_ENOSUPP );
1021  }
1022  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
1023  if (dlerror() != NULL)
1024  {
1025  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
1026  return ( PAPI_ENOSUPP );
1027  }
1028  cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount");
1029  if (dlerror() != NULL)
1030  {
1031  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.",PAPI_MAX_STR_LEN);
1032  return ( PAPI_ENOSUPP );
1033  }
1034  cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId");
1035  if (dlerror() != NULL)
1036  {
1037  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.",PAPI_MAX_STR_LEN);
1038  return ( PAPI_ENOSUPP );
1039  }
1040 
1041  dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
1042  if (!dl3)
1043  {
1044  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.",PAPI_MAX_STR_LEN);
1045  return ( PAPI_ENOSUPP );
1046  }
1047  nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo");
1048  if (dlerror() != NULL)
1049  {
1050  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.",PAPI_MAX_STR_LEN);
1051  return ( PAPI_ENOSUPP );
1052  }
1053  nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString");
1054  if (dlerror() != NULL)
1055  {
1056  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.",PAPI_MAX_STR_LEN);
1057  return ( PAPI_ENOSUPP );
1058  }
1059  nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors");
1060  if (dlerror() != NULL)
1061  {
1062  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.",PAPI_MAX_STR_LEN);
1063  return ( PAPI_ENOSUPP );
1064  }
1065  nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed");
1066  if (dlerror() != NULL)
1067  {
1068  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.",PAPI_MAX_STR_LEN);
1069  return ( PAPI_ENOSUPP );
1070  }
1071  nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo");
1072  if (dlerror() != NULL)
1073  {
1074  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.",PAPI_MAX_STR_LEN);
1075  return ( PAPI_ENOSUPP );
1076  }
1077  nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState");
1078  if (dlerror() != NULL)
1079  {
1080  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.",PAPI_MAX_STR_LEN);
1081  return ( PAPI_ENOSUPP );
1082  }
1083  nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage");
1084  if (dlerror() != NULL)
1085  {
1086  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.",PAPI_MAX_STR_LEN);
1087  return ( PAPI_ENOSUPP );
1088  }
1089  nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature");
1090  if (dlerror() != NULL)
1091  {
1092  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.",PAPI_MAX_STR_LEN);
1093  return ( PAPI_ENOSUPP );
1094  }
1095  nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors");
1096  if (dlerror() != NULL)
1097  {
1098  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.",PAPI_MAX_STR_LEN);
1099  return ( PAPI_ENOSUPP );
1100  }
1101  nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates");
1102  if (dlerror() != NULL)
1103  {
1104  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.",PAPI_MAX_STR_LEN);
1105  return ( PAPI_ENOSUPP );
1106  }
1107  nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex");
1108  if (dlerror() != NULL)
1109  {
1110  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.",PAPI_MAX_STR_LEN);
1111  return ( PAPI_ENOSUPP );
1112  }
1113  nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo");
1114  if (dlerror() != NULL)
1115  {
1116  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.",PAPI_MAX_STR_LEN);
1117  return ( PAPI_ENOSUPP );
1118  }
1119  nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName");
1120  if (dlerror() != NULL)
1121  {
1122  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.",PAPI_MAX_STR_LEN);
1123  return ( PAPI_ENOSUPP );
1124  }
1125  nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion");
1126  if (dlerror() != NULL)
1127  {
1128  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.",PAPI_MAX_STR_LEN);
1129  return ( PAPI_ENOSUPP );
1130  }
1131  nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode");
1132  if (dlerror() != NULL)
1133  {
1134  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.",PAPI_MAX_STR_LEN);
1135  return ( PAPI_ENOSUPP );
1136  }
1137  nvmlInitPtr = dlsym(dl3, "nvmlInit");
1138  if (dlerror() != NULL)
1139  {
1140  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.",PAPI_MAX_STR_LEN);
1141  return ( PAPI_ENOSUPP );
1142  }
1143  nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount");
1144  if (dlerror() != NULL)
1145  {
1146  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.",PAPI_MAX_STR_LEN);
1147  return ( PAPI_ENOSUPP );
1148  }
1149  nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown");
1150  if (dlerror() != NULL)
1151  {
1152  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.",PAPI_MAX_STR_LEN);
1153  return ( PAPI_ENOSUPP );
1154  }
1155 
1156  return ( PAPI_OK );
1157 }
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1523
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:632
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:41
#define PAPI_ENOSUPP
Definition: fpapi.h:123

Here is the caller graph for this function:

static void nvml_hardware_reset ( )
static

Definition at line 380 of file linux-nvml.c.

381 {
382  /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while
383  * possible to implement a reset on the ECC counters, we pass */
384  /*
385  int i;
386  for ( i=0; i < device_count; i++ )
387  nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC );
388  */
389 }

Here is the caller graph for this function:

switch ( entry->  type)

Definition at line 416 of file linux-nvml.c.

416  {
417  case FEATURE_CLOCK_INFO:
419  (nvmlClockType_t)entry->options.clock );
420  break;
423  (nvmlEccBitType_t)entry->options.ecc_opts.bits,
424  (int)entry->options.ecc_opts.which_one);
425  break;
426  case FEATURE_FAN_SPEED:
427  *value = getFanSpeed( handle );
428  break;
429  case FEATURE_MAX_CLOCK:
431  (nvmlClockType_t)entry->options.clock );
432  break;
433  case FEATURE_MEMORY_INFO:
435  (int)entry->options.which_one );
436  break;
437  case FEATURE_PERF_STATES:
438  *value = getPState( handle );
439  break;
440  case FEATURE_POWER:
441  *value = getPowerUsage( handle );
442  break;
443  case FEATURE_TEMP:
445  break;
448  (nvmlEccBitType_t)entry->options.ecc_opts.bits );
449  break;
450  case FEATURE_UTILIZATION:
452  (int)entry->options.which_one );
453  break;
454  default:
455  return PAPI_EINVAL;
456  }
unsigned long long getPState(nvmlDevice_t dev)
Definition: linux-nvml.c:252
unsigned long long getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
Definition: linux-nvml.c:338
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
unsigned long long getPowerUsage(nvmlDevice_t dev)
Definition: linux-nvml.c:308
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
unsigned long long getMemoryInfo(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:228
struct cache_ent * entry
Definition: libasync.c:1170
return PAPI_EINVAL
Definition: linux-nvml.c:408
void double value
Definition: iozone.c:18781
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
unsigned long long getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
Definition: linux-nvml.c:170
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
#define FEATURE_TEMP
Definition: linux-nvml.h:13
nvmlDevice_t handle
Definition: linux-nvml.c:399
#define FEATURE_POWER
Definition: linux-nvml.h:12
unsigned long long getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:156
unsigned long long getTemperature(nvmlDevice_t dev)
Definition: linux-nvml.c:323
unsigned long long getFanSpeed(nvmlDevice_t dev)
Definition: linux-nvml.c:198
unsigned long long getUtilization(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:356
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
unsigned long long getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:213

Here is the call graph for this function:

Variable Documentation

void(* _dl_non_dynamic_init)(void)

Holds control flags. Usually there's one of these per event-set. Usually this is out-of band configuration of the hardware

< Copy of counts, holds results when stopped

Definition at line 39 of file linux-nvml.c.

131 {
132  int num_events;
133  int which_counter[NVML_MAX_COUNTERS];
134  long long counter[NVML_MAX_COUNTERS];
static int num_events
#define NVML_MAX_COUNTERS
nvml_control_state_t
Definition: linux-nvml.c:135
papi_vector_t _nvml_vector

Vector that points to entry points for our component

Definition at line 1523 of file linux-nvml.c.

(*) cudaGetDevicePtr cudaIdx) = -1

Definition at line 400 of file linux-nvml.c.

int device_count = 0
static

Number of devices detected at component_init time

Definition at line 147 of file linux-nvml.c.

nvmlDevice_t* devices =NULL
static

Definition at line 152 of file linux-nvml.c.

entry = &nvml_native_table[which_one]

Definition at line 402 of file linux-nvml.c.

int* features =NULL
static

Definition at line 153 of file linux-nvml.c.

handle = devices[cudaIdx]

Definition at line 399 of file linux-nvml.c.

int
static
Initial value:

Code that reads event values.

Definition at line 397 of file linux-nvml.c.

int num_events = 0
static

number of events in the table

Definition at line 150 of file linux-nvml.c.

nvml_control_state_t

Definition at line 135 of file linux-nvml.c.

nvml_native_event_entry_t* nvml_native_table =NULL
static

This table contains the native events

Definition at line 144 of file linux-nvml.c.

return PAPI_EINVAL

Definition at line 408 of file linux-nvml.c.

return PAPI_OK

Definition at line 458 of file linux-nvml.c.

* value = (long long) -1

Definition at line 403 of file linux-nvml.c.