PAPI  5.3.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-nvml.c File Reference

This is an NVML component, it demos the component interface and implements two counters nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature from Nvidia Management Library. Please refer to NVML documentation for details about nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature. Power is reported in mW and temperature in Celcius. More...

Include dependency graph for linux-nvml.c:

Go to the source code of this file.

Data Structures

struct  nvml_context_t
 

Macros

#define CUDAAPI   __attribute__((weak))
 
#define CUDARTAPI   __attribute__((weak))
 
#define DECLDIR   __attribute__((weak))
 
#define NVML_MAX_COUNTERS   100
 

Functions

unsigned long long getClockSpeed (nvmlDevice_t dev, nvmlClockType_t which_one)
 
unsigned long long getEccLocalErrors (nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
 
unsigned long long getFanSpeed (nvmlDevice_t dev)
 
unsigned long long getMaxClockSpeed (nvmlDevice_t dev, nvmlClockType_t which_one)
 
unsigned long long getMemoryInfo (nvmlDevice_t dev, int which_one)
 
unsigned long long getPState (nvmlDevice_t dev)
 
unsigned long long getPowerUsage (nvmlDevice_t dev)
 
unsigned long long getTemperature (nvmlDevice_t dev)
 
unsigned long long getTotalEccErrors (nvmlDevice_t dev, nvmlEccBitType_t bits)
 
unsigned long long getUtilization (nvmlDevice_t dev, int which_one)
 
static void nvml_hardware_reset ()
 
 switch (entry->type)
 
int _papi_nvml_init_thread (hwd_context_t *ctx)
 
static int detectDevices ()
 
static void createNativeEvents ()
 
int _papi_nvml_init_component (int cidx)
 
static int linkCudaLibraries ()
 
int _papi_nvml_init_control_state (hwd_control_state_t *ctl)
 
int _papi_nvml_update_control_state (hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
 
int _papi_nvml_start (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_stop (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_read (hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
 
int _papi_nvml_write (hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events)
 
int _papi_nvml_reset (hwd_context_t *ctx, hwd_control_state_t *ctl)
 
int _papi_nvml_shutdown_component ()
 
int _papi_nvml_shutdown_thread (hwd_context_t *ctx)
 
int _papi_nvml_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
int _papi_nvml_set_domain (hwd_control_state_t *cntrl, int domain)
 
int _papi_nvml_ntv_enum_events (unsigned int *EventCode, int modifier)
 
int _papi_nvml_ntv_code_to_name (unsigned int EventCode, char *name, int len)
 
int _papi_nvml_ntv_code_to_descr (unsigned int EventCode, char *descr, int len)
 

Variables

void(* _dl_non_dynamic_init )(void)
 
 nvml_control_state_t
 
static nvml_native_event_entry_tnvml_native_table =NULL
 
static int device_count = 0
 
static int num_events = 0
 
static nvmlDevice_t * devices =NULL
 
static intfeatures =NULL
 
static int
 
nvmlDevice_t handle = devices[cudaIdx]
 
int cudaIdx = -1
 
 entry = &nvml_native_table[which_one]
 
value = (long long) -1
 
return PAPI_EINVAL
 
return PAPI_OK
 
papi_vector_t _nvml_vector
 

Detailed Description

Author
Kiran Kumar Kasichayanula kkasi.nosp@m.cha@.nosp@m.utk.e.nosp@m.du
James Ralph ralph.nosp@m.@eec.nosp@m.s.utk.nosp@m..edu

Definition in file linux-nvml.c.

Macro Definition Documentation

#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define DECLDIR   __attribute__((weak))
#define NVML_MAX_COUNTERS   100

Function Documentation

int _papi_nvml_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

This function sets various options in the component

Parameters
codevalid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT

Definition at line 1347 of file linux-nvml.c.

1348 {
1349  SUBDBG( "Enter: ctx: %p, code: %d\n", ctx, code );
1350 
1351  (void) ctx;
1352  (void) code;
1353  (void) option;
1354 
1355 
1356  /* FIXME. This should maybe set up more state, such as which counters are active and */
1357  /* counter mappings. */
1358 
1359  return PAPI_OK;
1360 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_init_component ( int  cidx)

Initialize hardware counters, setup the function vector table and get hardware information, this routine is called when the PAPI process is initialized (IE PAPI_library_init)

Definition at line 898 of file linux-nvml.c.

899 {
900  SUBDBG ("Entry: cidx: %d\n", cidx);
901  nvmlReturn_t ret;
902  cudaError_t cuerr;
903  int papi_errorcode;
904 
905  int cuda_count = 0;
906  unsigned int nvml_count = 0;
907 
908  /* link in the cuda and nvml libraries and resolve the symbols we need to use */
909  if (linkCudaLibraries() != PAPI_OK) {
910  SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
911  SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
912  return (PAPI_ENOSUPP);
913  }
914 
915  ret = (*nvmlInitPtr)();
916  if ( NVML_SUCCESS != ret ) {
917  strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
918  return PAPI_ENOSUPP;
919  }
920 
921  cuerr = (*cuInitPtr)( 0 );
922  if ( CUDA_SUCCESS != cuerr ) {
923  strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
924  return PAPI_ENOSUPP;
925  }
926 
927  /* Figure out the number of CUDA devices in the system */
928  ret = (*nvmlDeviceGetCountPtr)( &nvml_count );
929  if ( NVML_SUCCESS != ret ) {
930  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
931  return PAPI_ENOSUPP;
932  }
933 
934  cuerr = (*cudaGetDeviceCountPtr)( &cuda_count );
935  if ( CUDA_SUCCESS != cuerr ) {
936  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
937  return PAPI_ENOSUPP;
938  }
939 
940  /* We can probably recover from this, when we're clever */
941  if ( (cuda_count > 0) && (nvml_count != (unsigned int)cuda_count ) ) {
942  strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts.");
943  return PAPI_ENOSUPP;
944  }
945 
946  device_count = cuda_count;
947 
948  /* A per device representation of what events are present */
949  features = (int*)papi_malloc(sizeof(int) * device_count );
950 
951  /* Handles to each device */
952  devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
953 
954  /* Figure out what events are supported on each card. */
955  if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) {
958  sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." );
959  return PAPI_ENOSUPP;
960  }
961 
962  /* The assumption is that if everything went swimmingly in detectDevices,
963  all nvml calls here should be fine. */
965 
966  /* Export the total number of events available */
968 
969  /* Export the component id */
971 
972  /* Export the number of 'counters' */
975 
976  return PAPI_OK;
977 }
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
static int linkCudaLibraries()
Definition: linux-nvml.c:987
#define papi_free(a)
Definition: papi_memory.h:35
#define papi_malloc(a)
Definition: papi_memory.h:34
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1490
long long ret
Definition: iozone.c:1346
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:632
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define PAPI_ENOSUPP
Definition: fpapi.h:123
static int device_count
Definition: linux-nvml.c:147
static int cidx
Definition: event_info.c:40
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
strcpy(filename, default_filename)
static int detectDevices()
Definition: linux-nvml.c:479
static void createNativeEvents()
Definition: linux-nvml.c:658

Here is the call graph for this function:

int _papi_nvml_init_control_state ( hwd_control_state_t ctl)

Setup a counter control state. In general a control state holds the hardware info for an EventSet.

Definition at line 1159 of file linux-nvml.c.

1160 {
1161  SUBDBG( "nvml_init_control_state... %p\n", ctl );
1162  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1163  memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) );
1164 
1165  return PAPI_OK;
1166 }
memset(eventId, 0, size)
return PAPI_OK
Definition: linux-nvml.c:458
nvml_control_state_t
Definition: linux-nvml.c:135
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the call graph for this function:

int _papi_nvml_init_thread ( hwd_context_t ctx)

This is called whenever a thread is initialized

Definition at line 469 of file linux-nvml.c.

470 {
471  (void) ctx;
472 
473  SUBDBG( "Enter: ctx: %p\n", ctx );
474 
475  return PAPI_OK;
476 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_ntv_code_to_descr ( unsigned int  EventCode,
char *  descr,
int  len 
)

Takes a native event code and passes back the event description

Parameters
EventCodeis the native event code
descris a pointer for the description to be copied to
lenis the size of the descr string

Definition at line 1477 of file linux-nvml.c.

1478 {
1479  int index;
1480  index = EventCode;
1481 
1482  if (index >= num_events) return PAPI_ENOEVNT;
1483 
1484  strncpy( descr, nvml_native_table[index].description, len );
1485 
1486  return PAPI_OK;
1487 }
#define PAPI_ENOEVNT
Definition: fpapi.h:112
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
char description[PAPI_MAX_STR_LEN]
int _papi_nvml_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)

Takes a native event code and passes back the name

Parameters
EventCodeis the native event code
nameis a pointer for the name to be copied to
lenis the size of the name string

Definition at line 1456 of file linux-nvml.c.

1457 {
1458  SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len);
1459  int index;
1460 
1461  index = EventCode;
1462 
1463  /* Make sure we are in range */
1464  if (index >= num_events) return PAPI_ENOEVNT;
1465 
1466  strncpy( name, nvml_native_table[index].name, len );
1467 
1468  return PAPI_OK;
1469 }
#define PAPI_ENOEVNT
Definition: fpapi.h:112
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
char * name
Definition: iozone.c:23648
int _papi_nvml_ntv_enum_events ( unsigned int EventCode,
int  modifier 
)

Enumerate Native Events

Parameters
EventCodeis the event of interest
modifieris one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS If your component has attribute masks then these need to be handled here as well.

Definition at line 1415 of file linux-nvml.c.

1416 {
1417  int index;
1418 
1419  switch ( modifier ) {
1420 
1421  /* return EventCode of first event */
1422  case PAPI_ENUM_FIRST:
1423  /* return the first event that we support */
1424 
1425  *EventCode = 0;
1426  return PAPI_OK;
1427 
1428  /* return EventCode of next available event */
1429  case PAPI_ENUM_EVENTS:
1430  index = *EventCode;
1431 
1432  /* Make sure we are in range */
1433  if ( index < num_events - 1 ) {
1434 
1435  /* This assumes a non-sparse mapping of the events */
1436  *EventCode = *EventCode + 1;
1437  return PAPI_OK;
1438  } else {
1439  return PAPI_ENOEVNT;
1440  }
1441  break;
1442 
1443  default:
1444  return PAPI_EINVAL;
1445  }
1446 
1447  return PAPI_EINVAL;
1448 }
#define PAPI_ENOEVNT
Definition: fpapi.h:112
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
return PAPI_EINVAL
Definition: linux-nvml.c:408
int _papi_nvml_read ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long **  events,
int  flags 
)

Triggered by PAPI_read()

Definition at line 1241 of file linux-nvml.c.

1243 {
1244  SUBDBG( "Enter: ctx: %p, flags: %d\n", ctx, flags );
1245 
1246  (void) ctx;
1247  (void) flags;
1248  int i;
1249  int ret;
1250  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1251 
1252 
1253  for (i=0;i<nvml_ctl->num_events;i++) {
1254  if ( PAPI_OK !=
1255  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1256  nvml_ctl->which_counter[i]) ))
1257  return ret;
1258 
1259  }
1260  /* return pointer to the values we read */
1261  *events = nvml_ctl->counter;
1262  return PAPI_OK;
1263 }
long long flags
Definition: iozone.c:12330
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
nvml_control_state_t
Definition: linux-nvml.c:135
long long ret
Definition: iozone.c:1346
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
char events[MAX_EVENTS][BUFSIZ]
int _papi_nvml_reset ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_reset() but only if the EventSet is currently running

Definition at line 1289 of file linux-nvml.c.

1290 {
1291  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1292 
1293  (void) ctx;
1294  (void) ctl;
1295 
1296  /* Reset the hardware */
1298 
1299  return PAPI_OK;
1300 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static void nvml_hardware_reset()
Definition: linux-nvml.c:380

Here is the call graph for this function:

int _papi_nvml_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

This function has to set the bits needed to count different domains In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER By default return PAPI_EINVAL if none of those are specified and PAPI_OK with success PAPI_DOM_USER is only user context is counted PAPI_DOM_KERNEL is only the Kernel/OS context is counted PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) PAPI_DOM_ALL is all of the domains

Definition at line 1372 of file linux-nvml.c.

1373 {
1374  SUBDBG( "Enter: cntrl: %p, domain: %d\n", cntrl, domain );
1375 
1376  (void) cntrl;
1377 
1378  int found = 0;
1379 
1380  if ( PAPI_DOM_USER & domain ) {
1381  SUBDBG( " PAPI_DOM_USER \n" );
1382  found = 1;
1383  }
1384  if ( PAPI_DOM_KERNEL & domain ) {
1385  SUBDBG( " PAPI_DOM_KERNEL \n" );
1386  found = 1;
1387  }
1388  if ( PAPI_DOM_OTHER & domain ) {
1389  SUBDBG( " PAPI_DOM_OTHER \n" );
1390  found = 1;
1391  }
1392  if ( PAPI_DOM_ALL & domain ) {
1393  SUBDBG( " PAPI_DOM_ALL \n" );
1394  found = 1;
1395  }
1396  if ( !found )
1397  return ( PAPI_EINVAL );
1398 
1399  return PAPI_OK;
1400 }
#define PAPI_DOM_ALL
Definition: fpapi.h:25
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_DOM_OTHER
Definition: fpapi.h:23
#define PAPI_DOM_KERNEL
Definition: fpapi.h:22
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
long long found
Definition: libasync.c:735
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_DOM_USER
Definition: fpapi.h:21
int _papi_nvml_shutdown_component ( )

Triggered by PAPI_shutdown()

Definition at line 1304 of file linux-nvml.c.

1305 {
1306  SUBDBG( "Enter:\n" );
1307 
1308  if (nvml_native_table != NULL)
1310  if (devices != NULL)
1311  papi_free(devices);
1312  if (features != NULL)
1314 
1315  (*nvmlShutdownPtr)();
1316 
1317  device_count = 0;
1318  num_events = 0;
1319 
1320  // close the dynamic libraries needed by this component (opened in the init component call)
1321  dlclose(dl1);
1322  dlclose(dl2);
1323  dlclose(dl3);
1324 
1325  return PAPI_OK;
1326 }
#define papi_free(a)
Definition: papi_memory.h:35
static int num_events
return PAPI_OK
Definition: linux-nvml.c:458
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
static int device_count
Definition: linux-nvml.c:147
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
int _papi_nvml_shutdown_thread ( hwd_context_t ctx)

Called at thread shutdown

Definition at line 1330 of file linux-nvml.c.

1331 {
1332  SUBDBG( "Enter: ctx: %p\n", ctx );
1333 
1334  (void) ctx;
1335 
1336  /* Last chance to clean up thread */
1337 
1338  return PAPI_OK;
1339 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_start ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_start()

Definition at line 1198 of file linux-nvml.c.

1199 {
1200  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1201 
1202  (void) ctx;
1203  (void) ctl;
1204 
1205  /* anything that would need to be set at counter start time */
1206 
1207  /* reset */
1208  /* start the counting */
1209 
1210  return PAPI_OK;
1211 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_stop ( hwd_context_t ctx,
hwd_control_state_t ctl 
)

Triggered by PAPI_stop()

Definition at line 1216 of file linux-nvml.c.

1217 {
1218  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1219 
1220  int i;
1221  (void) ctx;
1222  (void) ctl;
1223  int ret;
1224 
1225  nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
1226 
1227  for (i=0;i<nvml_ctl->num_events;i++) {
1228  if ( PAPI_OK !=
1229  ( ret = nvml_hardware_read( &nvml_ctl->counter[i],
1230  nvml_ctl->which_counter[i]) ))
1231  return ret;
1232 
1233  }
1234 
1235  return PAPI_OK;
1236 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
nvml_control_state_t
Definition: linux-nvml.c:135
long long ret
Definition: iozone.c:1346
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_update_control_state ( hwd_control_state_t ctl,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
)

Triggered by eventset operations like add or remove

Definition at line 1171 of file linux-nvml.c.

1175 {
1176  SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx );
1177  int i, index;
1178 
1179  nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
1180  (void) ctx;
1181 
1182 
1183  /* if no events, return */
1184  if (count==0) return PAPI_OK;
1185 
1186  for( i = 0; i < count; i++ ) {
1187  index = native[i].ni_event;
1188  nvml_ctl->which_counter[i]=index;
1189  /* We have no constraints on event position, so any event */
1190  /* can be in any slot. */
1191  native[i].ni_position = i;
1192  }
1193  nvml_ctl->num_events=count;
1194  return PAPI_OK;
1195 }
return PAPI_OK
Definition: linux-nvml.c:458
int count
Definition: iozone.c:22422
void
Definition: iozone.c:18627
nvml_control_state_t
Definition: linux-nvml.c:135
int i
Definition: fileop.c:140
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int _papi_nvml_write ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long events 
)

Triggered by PAPI_write(), but only if the counters are running

Definition at line 1268 of file linux-nvml.c.

1270 {
1271  SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
1272 
1273  (void) ctx;
1274  (void) ctl;
1275  (void) events;
1276 
1277 
1278  /* You can change ECC mode and compute exclusivity modes on the cards */
1279  /* But I don't see this as a function of a PAPI component at this time */
1280  /* All implementation issues aside. */
1281  return PAPI_OK;
1282 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
char events[MAX_EVENTS][BUFSIZ]
static void createNativeEvents ( void  )
static

Definition at line 658 of file linux-nvml.c.

659 {
660  char name[64];
661  char sanitized_name[PAPI_MAX_STR_LEN];
662  char names[device_count][64];
663 
664  int i, nameLen = 0, j;
665  int isUnique = 1;
666 
668  nvmlReturn_t ret;
669 
673  entry = &nvml_native_table[0];
674 
675  for (i=0; i < device_count; i++ ) {
676  memset( names[i], 0x0, 64 );
677  isUnique = 1;
678  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
679 
680  for (j=0; j < i; j++ )
681  {
682  if ( 0 == strncmp( name, names[j], 64 ) )
683  isUnique = 0;
684  }
685 
686  if ( isUnique ) {
687  nameLen = strlen(name);
688  strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
689  for (j=0; j < nameLen; j++)
690  if ( ' ' == sanitized_name[j] )
691  sanitized_name[j] = '_';
692 
693 
694 
695  if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
696  sprintf( entry->name, "%s:graphics_clock", sanitized_name );
697  strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
698  entry->options.clock = NVML_CLOCK_GRAPHICS;
699  entry->type = FEATURE_CLOCK_INFO;
700  entry++;
701 
702  sprintf( entry->name, "%s:sm_clock", sanitized_name);
703  strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
704  entry->options.clock = NVML_CLOCK_SM;
705  entry->type = FEATURE_CLOCK_INFO;
706  entry++;
707 
708  sprintf( entry->name, "%s:memory_clock", sanitized_name);
709  strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
710  entry->options.clock = NVML_CLOCK_MEM;
711  entry->type = FEATURE_CLOCK_INFO;
712  entry++;
713  }
714 
716  sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
717  strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
718  entry->options.ecc_opts = (struct local_ecc){
719  .bits = NVML_SINGLE_BIT_ECC,
720  .which_one = LOCAL_ECC_L1,
721  };
723  entry++;
724 
725  sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
726  strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
727  entry->options.ecc_opts = (struct local_ecc){
728  .bits = NVML_SINGLE_BIT_ECC,
729  .which_one = LOCAL_ECC_L2,
730  };
732  entry++;
733 
734  sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
735  strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
736  entry->options.ecc_opts = (struct local_ecc){
737  .bits = NVML_SINGLE_BIT_ECC,
738  .which_one = LOCAL_ECC_MEM,
739  };
741  entry++;
742 
743  sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
744  strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
745  entry->options.ecc_opts = (struct local_ecc){
746  .bits = NVML_SINGLE_BIT_ECC,
747  .which_one = LOCAL_ECC_REGFILE,
748  };
750  entry++;
751 
752  sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
753  strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
754  entry->options.ecc_opts = (struct local_ecc){
755  .bits = NVML_DOUBLE_BIT_ECC,
756  .which_one = LOCAL_ECC_L1,
757  };
759  entry++;
760 
761  sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
762  strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
763  entry->options.ecc_opts = (struct local_ecc){
764  .bits = NVML_DOUBLE_BIT_ECC,
765  .which_one = LOCAL_ECC_L2,
766  };
768  entry++;
769 
770  sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
771  strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
772  entry->options.ecc_opts = (struct local_ecc){
773  .bits = NVML_DOUBLE_BIT_ECC,
774  .which_one = LOCAL_ECC_MEM,
775  };
777  entry++;
778 
779  sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
780  strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
781  entry->options.ecc_opts = (struct local_ecc){
782  .bits = NVML_DOUBLE_BIT_ECC,
783  .which_one = LOCAL_ECC_REGFILE,
784  };
786  entry++;
787  }
788 
789  if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
790  sprintf( entry->name, "%s:fan_speed", sanitized_name);
791  strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
792  entry->type = FEATURE_FAN_SPEED;
793  entry++;
794  }
795 
796  if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
797  sprintf( entry->name, "%s:graphics_max_clock", sanitized_name);
798  strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
799  entry->options.clock = NVML_CLOCK_GRAPHICS;
800  entry->type = FEATURE_MAX_CLOCK;
801  entry++;
802 
803  sprintf( entry->name, "%s:sm_max_clock", sanitized_name);
804  strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
805  entry->options.clock = NVML_CLOCK_SM;
806  entry->type = FEATURE_MAX_CLOCK;
807  entry++;
808 
809  sprintf( entry->name, "%s:memory_max_clock", sanitized_name);
810  strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
811  entry->options.clock = NVML_CLOCK_MEM;
812  entry->type = FEATURE_MAX_CLOCK;
813  entry++;
814  }
815 
817  sprintf( entry->name, "%s:total_memory", sanitized_name);
818  strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
820  entry->type = FEATURE_MEMORY_INFO;
821  entry++;
822 
823  sprintf( entry->name, "%s:unallocated_memory", sanitized_name);
824  strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
826  entry->type = FEATURE_MEMORY_INFO;
827  entry++;
828 
829  sprintf( entry->name, "%s:allocated_memory", sanitized_name);
830  strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
832  entry->type = FEATURE_MEMORY_INFO;
833  entry++;
834  }
835 
837  sprintf( entry->name, "%s:pstate", sanitized_name);
838  strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
839  entry->type = FEATURE_PERF_STATES;
840  entry++;
841  }
842 
843  if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
844  sprintf( entry->name, "%s:power", sanitized_name);
845  strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw for the entire board, including GPU, memory, etc.\n The reading is accurate to within a range of +/-5 watts.", PAPI_MAX_STR_LEN);
846  entry->type = FEATURE_POWER;
847  entry++;
848  }
849 
850  if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
851  sprintf( entry->name, "%s:temperature", sanitized_name);
852  strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
853  entry->type = FEATURE_TEMP;
854  entry++;
855  }
856 
858  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
859  strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
860  entry->options.ecc_opts = (struct local_ecc){
861  .bits = NVML_SINGLE_BIT_ECC,
862  };
864  entry++;
865 
866  sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
867  strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
868  entry->options.ecc_opts = (struct local_ecc){
869  .bits = NVML_DOUBLE_BIT_ECC,
870  };
872  entry++;
873  }
874 
876  sprintf( entry->name, "%s:gpu_utilization", sanitized_name);
877  strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
879  entry->type = FEATURE_UTILIZATION;
880  entry++;
881 
882  sprintf( entry->name, "%s:memory_utilization", sanitized_name);
883  strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
885  entry->type = FEATURE_UTILIZATION;
886  entry++;
887  }
888  strncpy( names[i], name, 64);
889  }
890  }
891 }
sprintf(splash[splash_line++],"\tIozone: Performance Test of File I/O\n")
memset(eventId, 0, size)
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
int type
Definition: linux-nvml.h:50
#define papi_malloc(a)
Definition: papi_memory.h:34
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:19
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
static int num_events
struct local_ecc ecc_opts
Definition: linux-nvml.h:41
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
struct cache_ent * entry
Definition: libasync.c:1170
nvmlEccBitType_t bits
Definition: linux-nvml.h:35
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:26
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:48
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:144
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:21
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
nvml_resource_options_t options
Definition: linux-nvml.h:47
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
int i
Definition: fileop.c:140
Definition: linux-nvml.h:45
static int device_count
Definition: linux-nvml.c:147
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:29
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
nvmlClockType_t clock
Definition: linux-nvml.h:40
#define FEATURE_TEMP
Definition: linux-nvml.h:13
static int * features
Definition: linux-nvml.c:153
#define FEATURE_POWER
Definition: linux-nvml.h:12
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:49
char * name
Definition: iozone.c:23648
#define LOCAL_ECC_L1
Definition: linux-nvml.h:24
#define HAS_FEATURE(features, query)
Definition: linux-nvml.h:17
#define GPU_UTILIZATION
Definition: linux-nvml.h:28
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:23
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
#define LOCAL_ECC_L2
Definition: linux-nvml.h:25
const char * names[NUM_EVENTS]
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:20

Here is the call graph for this function:

Here is the caller graph for this function:

static int detectDevices ( )
static

Definition at line 479 of file linux-nvml.c.

480 {
481  nvmlReturn_t ret;
482  nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
483  nvmlDevice_t handle;
484  nvmlPciInfo_t info;
485 
486  cudaError_t cuerr;
487 
488  char busId[16];
489  char name[64];
490  char inforomECC[16];
491  char inforomPower[16];
492  char names[device_count][64];
493  char nvml_busIds[device_count][16];
494 
495  float ecc_version = 0.0, power_version = 0.0;
496 
497  int i = 0,
498  j = 0;
499  int isTesla = 0;
500  int isFermi = 0;
501  int isUnique = 1;
502 
503  unsigned int temp = 0;
504 
505 
506  /* list of nvml pci_busids */
507  for (i=0; i < device_count; i++) {
508  ret = (*nvmlDeviceGetHandleByIndexPtr)( i, &handle );
509  if ( NVML_SUCCESS != ret ) {
510  SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
511  return PAPI_ESYS;
512  }
513 
514  ret = (*nvmlDeviceGetPciInfoPtr)( handle, &info );
515  if ( NVML_SUCCESS != ret ) {
516  SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", (*nvmlErrorStringPtr)(ret) );
517  return PAPI_ESYS;
518  }
519  strncpy(nvml_busIds[i], info.busId, 16);
520  }
521 
522  /* We want to key our list of nvmlDevice_ts by each device's cuda index */
523  for (i=0; i < device_count; i++) {
524  cuerr = (*cudaDeviceGetPCIBusIdPtr)( busId, 16, i );
525  if ( CUDA_SUCCESS != cuerr ) {
526  SUBDBG("cudaDeviceGetPCIBusId failed.\n");
527  return PAPI_ESYS;
528  }
529  for (j=0; j < device_count; j++ ) {
530  if ( !strncmp( busId, nvml_busIds[j], 16) ) {
531  ret = (*nvmlDeviceGetHandleByIndexPtr)(j, &devices[i] );
532  if ( NVML_SUCCESS != ret ) {
533  SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
534  return PAPI_ESYS;
535  }
536  break;
537  }
538  }
539  }
540 
541  memset(names, 0x0, device_count*64);
542  /* So for each card, check whats querable */
543  for (i=0; i < device_count; i++ ) {
544  isTesla=0;
545  isFermi=1;
546  isUnique = 1;
547  features[i] = 0;
548 
549  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
550  if ( NVML_SUCCESS != ret) {
551  SUBDBG("nvmlDeviceGetName failed \n");
552  return PAPI_ESYS;
553  }
554 
555  for (j=0; j < i; j++ )
556  if ( 0 == strncmp( name, names[j], 64 ) ) {
557  /* if we have a match, and IF everything is sane,
558  * devices with the same name eg Tesla C2075 share features */
559  isUnique = 0;
560  features[i] = features[j];
561 
562  }
563 
564  if ( isUnique ) {
565  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
566  if ( NVML_SUCCESS != ret ) {
567  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
568  isFermi = 0;
569  }
570  ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
571  if ( NVML_SUCCESS != ret ) {
572  /* This implies the card is older then Fermi */
573  SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
574  SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
575  isFermi = 0;
576  }
577 
578  ecc_version = strtof(inforomECC, NULL );
579  power_version = strtof( inforomPower, NULL);
580 
581  ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
582  isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;
583 
584  /* For Tesla and Quadro products from Fermi and Kepler families. */
585  if ( isFermi ) {
587  num_events += 3;
588  }
589 
590  /* For Tesla and Quadro products from Fermi and Kepler families.
591  requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
592  requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
593  requires ECC mode to be enabled. */
594  if ( isFermi ) {
595  ret = (*nvmlDeviceGetEccModePtr)( devices[i], &mode, NULL );
596  if ( NVML_FEATURE_ENABLED == mode) {
597  if ( ecc_version >= 2.0 ) {
599  num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
600  }
601  if ( ecc_version >= 1.0 ) {
603  num_events += 2; /* single bit errors, double bit errors */
604  }
605  }
606  }
607 
608  /* For all discrete products with dedicated fans */
610  num_events++;
611 
612  /* For Tesla and Quadro products from Fermi and Kepler families. */
613  if ( isFermi ) {
615  num_events += 3;
616  }
617 
618  /* For all products */
620  num_events += 3; /* total, free, used */
621 
622  /* For Tesla and Quadro products from the Fermi and Kepler families. */
623  if ( isFermi ) {
625  num_events++;
626  }
627 
628  /* For "GF11x" Tesla and Quadro products from the Fermi family
629  requires NVML_INFOROM_POWER 3.0 or higher
630  For Tesla and Quadro products from the Kepler family
631  does not require NVML_INFOROM_POWER */
632  if ( isFermi ) {
633  ret = (*nvmlDeviceGetPowerUsagePtr)( devices[i], &temp);
634  if ( NVML_SUCCESS == ret ) {
636  num_events++;
637  }
638  }
639 
640  /* For all discrete and S-class products. */
641  features[i] |= FEATURE_TEMP;
642  num_events++;
643 
644  /* For Tesla and Quadro products from the Fermi and Kepler families */
645  if (isFermi) {
647  num_events += 2;
648  }
649 
650  strncpy( names[i], name, 64);
651 
652  }
653  }
654  return PAPI_OK;
655 }
memset(eventId, 0, size)
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
static int num_events
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
return PAPI_OK
Definition: linux-nvml.c:458
long long ret
Definition: iozone.c:1346
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
static nvmlDevice_t * devices
Definition: linux-nvml.c:152
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
int i
Definition: fileop.c:140
static int device_count
Definition: linux-nvml.c:147
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
#define FEATURE_TEMP
Definition: linux-nvml.h:13
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int * features
Definition: linux-nvml.c:153
nvmlDevice_t handle
Definition: linux-nvml.c:399
#define FEATURE_POWER
Definition: linux-nvml.h:12
#define PAPI_ESYS
Definition: fpapi.h:108
char * name
Definition: iozone.c:23648
int temp
Definition: iozone.c:22158
long j
Definition: iozone.c:19135
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
const char * names[NUM_EVENTS]

Here is the call graph for this function:

Here is the caller graph for this function:

unsigned long long getClockSpeed ( nvmlDevice_t  dev,
nvmlClockType_t  which_one 
)

Definition at line 156 of file linux-nvml.c.

157 {
158  unsigned int ret = 0;
159  nvmlReturn_t bad;
160  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
161 
162  if ( NVML_SUCCESS != bad ) {
163  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
164  }
165 
166  return (unsigned long long)ret;
167 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getEccLocalErrors ( nvmlDevice_t  dev,
nvmlEccBitType_t  bits,
int  which_one 
)

Definition at line 170 of file linux-nvml.c.

171 {
172  nvmlEccErrorCounts_t counts;
173 
174  nvmlReturn_t bad;
175  bad = (*nvmlDeviceGetDetailedEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
176 
177  if ( NVML_SUCCESS != bad ) {
178  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
179  }
180 
181 
182  switch ( which_one ) {
183  case LOCAL_ECC_REGFILE:
184  return counts.registerFile;
185  case LOCAL_ECC_L1:
186  return counts.l1Cache;
187  case LOCAL_ECC_L2:
188  return counts.l2Cache;
189  case LOCAL_ECC_MEM:
190  return counts.deviceMemory;
191  default:
192  ;
193  }
194  return (unsigned long long)-1;
195 }
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:26
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define LOCAL_ECC_L1
Definition: linux-nvml.h:24
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:23
#define LOCAL_ECC_L2
Definition: linux-nvml.h:25

Here is the caller graph for this function:

unsigned long long getFanSpeed ( nvmlDevice_t  dev)

Definition at line 198 of file linux-nvml.c.

199 {
200  unsigned int ret = 0;
201  nvmlReturn_t bad;
202  bad = (*nvmlDeviceGetFanSpeedPtr)( dev, &ret );
203 
204  if ( NVML_SUCCESS != bad ) {
205  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
206  }
207 
208 
209  return (unsigned long long)ret;
210 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getMaxClockSpeed ( nvmlDevice_t  dev,
nvmlClockType_t  which_one 
)

Definition at line 213 of file linux-nvml.c.

214 {
215  unsigned int ret = 0;
216  nvmlReturn_t bad;
217  bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
218 
219  if ( NVML_SUCCESS != bad ) {
220  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
221  }
222 
223 
224  return (unsigned long long) ret;
225 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getMemoryInfo ( nvmlDevice_t  dev,
int  which_one 
)

Definition at line 228 of file linux-nvml.c.

229 {
230  nvmlMemory_t meminfo;
231  nvmlReturn_t bad;
232  bad = (*nvmlDeviceGetMemoryInfoPtr)( dev, &meminfo );
233 
234  if ( NVML_SUCCESS != bad ) {
235  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
236  }
237 
238  switch (which_one) {
240  return meminfo.total;
241  case MEMINFO_UNALLOCED:
242  return meminfo.free;
243  case MEMINFO_ALLOCED:
244  return meminfo.used;
245  default:
246  ;
247  }
248  return (unsigned long long)-1;
249 }
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:19
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:21
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:20

Here is the caller graph for this function:

unsigned long long getPowerUsage ( nvmlDevice_t  dev)

Definition at line 308 of file linux-nvml.c.

309 {
310  unsigned int power;
311  nvmlReturn_t bad;
312  bad = (*nvmlDeviceGetPowerUsagePtr)( dev, &power );
313 
314  if ( NVML_SUCCESS != bad ) {
315  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
316  }
317 
318 
319  return (unsigned long long) power;
320 }
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getPState ( nvmlDevice_t  dev)

Definition at line 252 of file linux-nvml.c.

253 {
254  unsigned int ret = 0;
255  nvmlPstates_t state = NVML_PSTATE_15;
256  nvmlReturn_t bad;
257  bad = (*nvmlDeviceGetPerformanceStatePtr)( dev, &state );
258 
259  if ( NVML_SUCCESS != bad ) {
260  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
261  }
262 
263 
264  switch ( state ) {
265  case NVML_PSTATE_15:
266  ret++;
267  case NVML_PSTATE_14:
268  ret++;
269  case NVML_PSTATE_13:
270  ret++;
271  case NVML_PSTATE_12:
272  ret++;
273  case NVML_PSTATE_11:
274  ret++;
275  case NVML_PSTATE_10:
276  ret++;
277  case NVML_PSTATE_9:
278  ret++;
279  case NVML_PSTATE_8:
280  ret++;
281  case NVML_PSTATE_7:
282  ret++;
283  case NVML_PSTATE_6:
284  ret++;
285  case NVML_PSTATE_5:
286  ret++;
287  case NVML_PSTATE_4:
288  ret++;
289  case NVML_PSTATE_3:
290  ret++;
291  case NVML_PSTATE_2:
292  ret++;
293  case NVML_PSTATE_1:
294  ret++;
295  case NVML_PSTATE_0:
296  break;
297  case NVML_PSTATE_UNKNOWN:
298  default:
299  /* This should never happen?
300  * The API docs just state Unknown performance state... */
301  return (unsigned long long) -1;
302  }
303 
304  return (unsigned long long)ret;
305 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
child_idents[x-1] state
Definition: iozone.c:21341

Here is the caller graph for this function:

unsigned long long getTemperature ( nvmlDevice_t  dev)

Definition at line 323 of file linux-nvml.c.

324 {
325  unsigned int ret = 0;
326  nvmlReturn_t bad;
327  bad = (*nvmlDeviceGetTemperaturePtr)( dev, NVML_TEMPERATURE_GPU, &ret );
328 
329  if ( NVML_SUCCESS != bad ) {
330  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
331  }
332 
333 
334  return (unsigned long long)ret;
335 }
long long ret
Definition: iozone.c:1346
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getTotalEccErrors ( nvmlDevice_t  dev,
nvmlEccBitType_t  bits 
)

Definition at line 338 of file linux-nvml.c.

339 {
340  unsigned long long counts = 0;
341  nvmlReturn_t bad;
342  bad = (*nvmlDeviceGetTotalEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
343 
344  if ( NVML_SUCCESS != bad ) {
345  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
346  }
347 
348 
349  return counts;
350 }
#define SUBDBG(format, args...)
Definition: papi_debug.h:63

Here is the caller graph for this function:

unsigned long long getUtilization ( nvmlDevice_t  dev,
int  which_one 
)

Definition at line 356 of file linux-nvml.c.

357 {
358  nvmlUtilization_t util;
359  nvmlReturn_t bad;
360  bad = (*nvmlDeviceGetUtilizationRatesPtr)( dev, &util );
361 
362  if ( NVML_SUCCESS != bad ) {
363  SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
364  }
365 
366 
367  switch (which_one) {
368  case GPU_UTILIZATION:
369  return (unsigned long long) util.gpu;
370  case MEMORY_UTILIZATION:
371  return (unsigned long long) util.memory;
372  default:
373  ;
374  }
375 
376  return (unsigned long long) -1;
377 }
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:29
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define GPU_UTILIZATION
Definition: linux-nvml.h:28

Here is the caller graph for this function:

static int linkCudaLibraries ( )
static

Definition at line 987 of file linux-nvml.c.

988 {
989  /* Attempt to guess if we were statically linked to libc, if so bail */
990  if ( _dl_non_dynamic_init != NULL ) {
991  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN);
992  return PAPI_ENOSUPP;
993  }
994 
995  /* Need to link in the cuda libraries, if not found disable the component */
996  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
997  if (!dl1)
998  {
999  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
1000  return ( PAPI_ENOSUPP );
1001  }
1002  cuInitPtr = dlsym(dl1, "cuInit");
1003  if (dlerror() != NULL)
1004  {
1005  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
1006  return ( PAPI_ENOSUPP );
1007  }
1008 
1009  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
1010  if (!dl2)
1011  {
1012  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
1013  return ( PAPI_ENOSUPP );
1014  }
1015  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
1016  if (dlerror() != NULL)
1017  {
1018  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
1019  return ( PAPI_ENOSUPP );
1020  }
1021  cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount");
1022  if (dlerror() != NULL)
1023  {
1024  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.",PAPI_MAX_STR_LEN);
1025  return ( PAPI_ENOSUPP );
1026  }
1027  cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId");
1028  if (dlerror() != NULL)
1029  {
1030  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.",PAPI_MAX_STR_LEN);
1031  return ( PAPI_ENOSUPP );
1032  }
1033 
1034  dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
1035  if (!dl3)
1036  {
1037  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.",PAPI_MAX_STR_LEN);
1038  return ( PAPI_ENOSUPP );
1039  }
1040  nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo");
1041  if (dlerror() != NULL)
1042  {
1043  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.",PAPI_MAX_STR_LEN);
1044  return ( PAPI_ENOSUPP );
1045  }
1046  nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString");
1047  if (dlerror() != NULL)
1048  {
1049  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.",PAPI_MAX_STR_LEN);
1050  return ( PAPI_ENOSUPP );
1051  }
1052  nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors");
1053  if (dlerror() != NULL)
1054  {
1055  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.",PAPI_MAX_STR_LEN);
1056  return ( PAPI_ENOSUPP );
1057  }
1058  nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed");
1059  if (dlerror() != NULL)
1060  {
1061  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.",PAPI_MAX_STR_LEN);
1062  return ( PAPI_ENOSUPP );
1063  }
1064  nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo");
1065  if (dlerror() != NULL)
1066  {
1067  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.",PAPI_MAX_STR_LEN);
1068  return ( PAPI_ENOSUPP );
1069  }
1070  nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState");
1071  if (dlerror() != NULL)
1072  {
1073  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.",PAPI_MAX_STR_LEN);
1074  return ( PAPI_ENOSUPP );
1075  }
1076  nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage");
1077  if (dlerror() != NULL)
1078  {
1079  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.",PAPI_MAX_STR_LEN);
1080  return ( PAPI_ENOSUPP );
1081  }
1082  nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature");
1083  if (dlerror() != NULL)
1084  {
1085  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.",PAPI_MAX_STR_LEN);
1086  return ( PAPI_ENOSUPP );
1087  }
1088  nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors");
1089  if (dlerror() != NULL)
1090  {
1091  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.",PAPI_MAX_STR_LEN);
1092  return ( PAPI_ENOSUPP );
1093  }
1094  nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates");
1095  if (dlerror() != NULL)
1096  {
1097  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.",PAPI_MAX_STR_LEN);
1098  return ( PAPI_ENOSUPP );
1099  }
1100  nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex");
1101  if (dlerror() != NULL)
1102  {
1103  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.",PAPI_MAX_STR_LEN);
1104  return ( PAPI_ENOSUPP );
1105  }
1106  nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo");
1107  if (dlerror() != NULL)
1108  {
1109  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.",PAPI_MAX_STR_LEN);
1110  return ( PAPI_ENOSUPP );
1111  }
1112  nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName");
1113  if (dlerror() != NULL)
1114  {
1115  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.",PAPI_MAX_STR_LEN);
1116  return ( PAPI_ENOSUPP );
1117  }
1118  nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion");
1119  if (dlerror() != NULL)
1120  {
1121  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.",PAPI_MAX_STR_LEN);
1122  return ( PAPI_ENOSUPP );
1123  }
1124  nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode");
1125  if (dlerror() != NULL)
1126  {
1127  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.",PAPI_MAX_STR_LEN);
1128  return ( PAPI_ENOSUPP );
1129  }
1130  nvmlInitPtr = dlsym(dl3, "nvmlInit");
1131  if (dlerror() != NULL)
1132  {
1133  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.",PAPI_MAX_STR_LEN);
1134  return ( PAPI_ENOSUPP );
1135  }
1136  nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount");
1137  if (dlerror() != NULL)
1138  {
1139  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.",PAPI_MAX_STR_LEN);
1140  return ( PAPI_ENOSUPP );
1141  }
1142  nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown");
1143  if (dlerror() != NULL)
1144  {
1145  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.",PAPI_MAX_STR_LEN);
1146  return ( PAPI_ENOSUPP );
1147  }
1148 
1149  return ( PAPI_OK );
1150 }
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1490
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:632
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:41
#define PAPI_ENOSUPP
Definition: fpapi.h:123

Here is the caller graph for this function:

static void nvml_hardware_reset ( )
static

Definition at line 380 of file linux-nvml.c.

381 {
382  /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while
383  * possible to implement a reset on the ECC counters, we pass */
384  /*
385  int i;
386  for ( i=0; i < device_count; i++ )
387  nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC );
388  */
389 }

Here is the caller graph for this function:

switch ( entry->  type)

Definition at line 416 of file linux-nvml.c.

416  {
417  case FEATURE_CLOCK_INFO:
419  (nvmlClockType_t)entry->options.clock );
420  break;
423  (nvmlEccBitType_t)entry->options.ecc_opts.bits,
424  (int)entry->options.ecc_opts.which_one);
425  break;
426  case FEATURE_FAN_SPEED:
427  *value = getFanSpeed( handle );
428  break;
429  case FEATURE_MAX_CLOCK:
431  (nvmlClockType_t)entry->options.clock );
432  break;
433  case FEATURE_MEMORY_INFO:
435  (int)entry->options.which_one );
436  break;
437  case FEATURE_PERF_STATES:
438  *value = getPState( handle );
439  break;
440  case FEATURE_POWER:
441  *value = getPowerUsage( handle );
442  break;
443  case FEATURE_TEMP:
445  break;
448  (nvmlEccBitType_t)entry->options.ecc_opts.bits );
449  break;
450  case FEATURE_UTILIZATION:
452  (int)entry->options.which_one );
453  break;
454  default:
455  return PAPI_EINVAL;
456  }
unsigned long long getPState(nvmlDevice_t dev)
Definition: linux-nvml.c:252
unsigned long long getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
Definition: linux-nvml.c:338
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
unsigned long long getPowerUsage(nvmlDevice_t dev)
Definition: linux-nvml.c:308
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
unsigned long long getMemoryInfo(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:228
struct cache_ent * entry
Definition: libasync.c:1170
return PAPI_EINVAL
Definition: linux-nvml.c:408
void double value
Definition: iozone.c:18781
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
unsigned long long getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
Definition: linux-nvml.c:170
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
#define FEATURE_TEMP
Definition: linux-nvml.h:13
nvmlDevice_t handle
Definition: linux-nvml.c:399
#define FEATURE_POWER
Definition: linux-nvml.h:12
unsigned long long getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:156
unsigned long long getTemperature(nvmlDevice_t dev)
Definition: linux-nvml.c:323
unsigned long long getFanSpeed(nvmlDevice_t dev)
Definition: linux-nvml.c:198
unsigned long long getUtilization(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:356
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
unsigned long long getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:213

Here is the call graph for this function:

Variable Documentation

void(* _dl_non_dynamic_init)(void)

Holds control flags. Usually there's one of these per event-set. Usually this is out-of band configuration of the hardware

< Copy of counts, holds results when stopped

Definition at line 39 of file linux-nvml.c.

131 {
132  int num_events;
133  int which_counter[NVML_MAX_COUNTERS];
134  long long counter[NVML_MAX_COUNTERS];
static int num_events
#define NVML_MAX_COUNTERS
nvml_control_state_t
Definition: linux-nvml.c:135
papi_vector_t _nvml_vector

Vector that points to entry points for our component

Definition at line 1490 of file linux-nvml.c.

(*) cudaGetDevicePtr cudaIdx) = -1

Definition at line 400 of file linux-nvml.c.

int device_count = 0
static

Number of devices detected at component_init time

Definition at line 147 of file linux-nvml.c.

nvmlDevice_t* devices =NULL
static

Definition at line 152 of file linux-nvml.c.

entry = &nvml_native_table[which_one]

Definition at line 402 of file linux-nvml.c.

int* features =NULL
static

Definition at line 153 of file linux-nvml.c.

handle = devices[cudaIdx]

Definition at line 399 of file linux-nvml.c.

int
static
Initial value:

Code that reads event values.

Definition at line 397 of file linux-nvml.c.

int num_events = 0
static

number of events in the table

Definition at line 150 of file linux-nvml.c.

nvml_control_state_t

Definition at line 135 of file linux-nvml.c.

nvml_native_event_entry_t* nvml_native_table =NULL
static

This table contains the native events

Definition at line 144 of file linux-nvml.c.

return PAPI_EINVAL

Definition at line 408 of file linux-nvml.c.

return PAPI_OK

Definition at line 458 of file linux-nvml.c.

* value = (long long) -1

Definition at line 403 of file linux-nvml.c.