PAPI  5.4.1.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-cuda.c File Reference

This implements a PAPI component that enables PAPI-C to access hardware monitoring counters for NVIDIA CUDA GPU devices through the CUPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Data Structures

struct  papicuda_context_t
 
struct  papicuda_name_desc_t
 
struct  papicuda_device_desc_t
 
struct  papicuda_control_t
 
struct  papicuda_active_cucontext_t
 

Macros

#define PAPICUDA_MAX_COUNTERS   512
 
#define CHECK_CU_ERROR(err, cufunc)   if( (err) != CUDA_SUCCESS ) { PAPIERROR( "CUDA Driver API function failed '%s'", cufunc ); return -1; }
 
#define CHECK_CUPTI_ERROR(err, cuptifunc)   if( (err) != CUPTI_SUCCESS ) { PAPIERROR( "CUPTI API function failed '%s'", cuptifunc ); return -1; }
 
#define CHECK_PRINT_EVAL(err, str, eval)   if( (err) ) { PAPIERROR( "%s", str ); eval; }
 
#define CUDAAPI   __attribute__((weak))
 
#define CUDARTAPI   __attribute__((weak))
 
#define CUPTIAPI   __attribute__((weak))
 
#define CHECK_DL_STATUS(err, str)   if( err ) { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
 

Functions

static int papicuda_list_all_events (papicuda_context_t *gctxt)
 
static int papicuda_init_thread (hwd_context_t *ctx)
 
static int papicuda_init_component (int cidx)
 
static int papicuda_init_control_state (hwd_control_state_t *ctrl)
 
static int papicuda_update_control_state (hwd_control_state_t *ctrl, NativeInfo_t *nativeInfo, int nativeCount, hwd_context_t *ctx)
 
static int papicuda_start (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
static int papicuda_stop (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
static int papicuda_read (hwd_context_t *ctx, hwd_control_state_t *ctrl, long long **events, int flags)
 
int papicuda_shutdown_thread (hwd_context_t *ctx)
 
static int papicuda_shutdown_component (void)
 
static int papicuda_ctrl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
static int papicuda_set_domain (hwd_control_state_t *ctrl, int domain)
 
static int papicuda_reset (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
static int papicuda_cleanup_eventset (hwd_control_state_t *ctrl)
 
static int papicuda_ntv_enum_events (unsigned int *EventCode, int modifier)
 
static int papicuda_ntv_code_to_name (unsigned int EventCode, char *name, int len)
 
static int papicuda_ntv_code_to_descr (unsigned int EventCode, char *name, int len)
 

Variables

static voiddl1 = NULL
 
static voiddl2 = NULL
 
static voiddl3 = NULL
 
papi_vector_t _cuda_vector
 
static papicuda_context_tglobal_papicuda_context = NULL
 
static papicuda_control_tglobal_papicuda_control = NULL
 
void(* _dl_non_dynamic_init )(cudaError_t CUDARTAPI cudaFree void)
 

Detailed Description

Author
Asim YarKhan yarkh.nosp@m.an@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u (updated in 2015 for multiple CUDA contexts/devices)
Heike Jagode (in collaboration with Robert Dietrich, TU Dresden) jagod.nosp@m.e@ee.nosp@m.cs.ut.nosp@m.k.ed.nosp@m.u

Definition in file linux-cuda.c.

Macro Definition Documentation

#define CHECK_CU_ERROR (   err,
  cufunc 
)    if( (err) != CUDA_SUCCESS ) { PAPIERROR( "CUDA Driver API function failed '%s'", cufunc ); return -1; }

Definition at line 88 of file linux-cuda.c.

#define CHECK_CUPTI_ERROR (   err,
  cuptifunc 
)    if( (err) != CUPTI_SUCCESS ) { PAPIERROR( "CUPTI API function failed '%s'", cuptifunc ); return -1; }

Definition at line 91 of file linux-cuda.c.

#define CHECK_DL_STATUS (   err,
  str 
)    if( err ) { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
#define CHECK_PRINT_EVAL (   err,
  str,
  eval 
)    if( (err) ) { PAPIERROR( "%s", str ); eval; }

Definition at line 94 of file linux-cuda.c.

#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define CUPTIAPI   __attribute__((weak))
#define PAPICUDA_MAX_COUNTERS   512

Definition at line 27 of file linux-cuda.c.

Function Documentation

static int papicuda_cleanup_eventset ( hwd_control_state_t ctrl)
static

Definition at line 821 of file linux-cuda.c.

822 {
823  SUBDBG( "Entering\n" );
824  ( void ) ctrl;
826  papicuda_active_cucontext_t *currctrl;
827  int cuContextIdx, gg;
828  CUptiResult cuptiErr;
829  CUcontext saveCtx, tmpCtx;
830 
831  SUBDBG( "Switch to each context and disable CUDA eventgroups\n" );
832  /* Save current cuda context and restore later */
833  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
834  /* Switch to each context and enable CUDA eventgroups */
835  for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
836  currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
837  /* Switch to this device / cuda context */
838  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ), "cuCtxPushCurrent" );
839  for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
840  /* Destroy the eventGroups; it also frees the perfmon hardware on the GPU */
841  cuptiErr = ( *cuptiEventGroupDestroyPtr )( currctrl->eventGroup[gg] );
842  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDestroy" );
843  }
844  currctrl->numEventGroups = 0;
845  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ), "cuCtxPopCurrent" );
846  }
847  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ), "cuCtxPushCurrent" );
848  /* Record that there are no active contexts or events */
849  gctrl->activeEventCount = 0;
850  return ( PAPI_OK );
851 }
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.c:88
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:85
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define CHECK_CUPTI_ERROR(err, cuptifunc)
Definition: linux-cuda.c:91
int countOfActiveCUContexts
Definition: linux-cuda.c:57
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
CUpti_EventGroup eventGroup[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:70
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:58
static int papicuda_ctrl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)
static

This function sets various options in the component - Does nothing in the CUDA component.

Parameters
[in]ctx– hardware context
[in]codevalid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT
[in]option– options to be set

Definition at line 745 of file linux-cuda.c.

746 {
747  SUBDBG( "Entering\n" );
748  ( void ) ctx;
749  ( void ) code;
750  ( void ) option;
751  return ( PAPI_OK );
752 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int papicuda_init_component ( int  cidx)
static

Initialize hardware counters, setup the function vector table and get hardware information, this routine is called when the PAPI process is initialized (IE PAPI_library_init)

Definition at line 396 of file linux-cuda.c.

397 {
398  SUBDBG( "Entering with cidx: %d\n", cidx );
399  int err;
400 
401  /* link in all the cuda libraries and resolve the symbols we need to use */
402  if( papicuda_linkCudaLibraries() != PAPI_OK ) {
403  PAPIERROR( "Dynamic link of CUDA libraries failed, component will be disabled.\n" );
404  return ( PAPI_ENOSUPP );
405  }
406 
407  /* Create the structure */
410 
411  /* Get list of all native CUDA events supported */
413  CHECK_PRINT_EVAL( err!=0, "ERROR: Could not get a list of CUDA/CUPTI events", return( PAPI_ENOSUPP ) );
414 
415  /* Export some information */
420 
421  //SUBDBG( "Exiting PAPI_OK\n" );
422  return ( PAPI_OK );
423 }
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:82
#define PAPI_ENOSUPP
Definition: papi.h:269
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define CHECK_PRINT_EVAL(err, str, eval)
Definition: linux-cuda.c:94
static int papicuda_list_all_events(papicuda_context_t *gctxt)
Definition: linux-cuda.c:250
static int cidx
Definition: event_info.c:40
uint32_t availEventSize
Definition: linux-cuda.c:33
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
void PAPIERROR(char *format,...)
papi_vector_t _cuda_vector
Definition: linux-cuda.c:79
#define papi_calloc(a, b)
Definition: papi_memory.h:37

Here is the call graph for this function:

static int papicuda_init_control_state ( hwd_control_state_t ctrl)
static

Setup a counter control state. In general a control state holds the hardware info for an EventSet.

Definition at line 430 of file linux-cuda.c.

431 {
432  SUBDBG( "Entering\n" );
433  ( void ) ctrl;
435 
436  CHECK_PRINT_EVAL( !gctxt, "Error: The PAPI CUDA component needs to be initialized first", return( PAPI_ENOINIT ) );
437  /* If no events were found during the initial component initialization, return error */
439  strncpy( _cuda_vector.cmp_info.disabled_reason, "ERROR CUDA: No events exist", PAPI_MAX_STR_LEN );
440  return ( PAPI_EMISC );
441  }
442  /* If it does not exist, create the global structure to hold CUDA contexts and active events */
443  if ( !global_papicuda_control ) {
447  }
448  return PAPI_OK;
449 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:85
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:82
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define CHECK_PRINT_EVAL(err, str, eval)
Definition: linux-cuda.c:94
#define PAPI_EMISC
Definition: papi.h:265
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:633
int countOfActiveCUContexts
Definition: linux-cuda.c:57
uint32_t availEventSize
Definition: linux-cuda.c:33
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _cuda_vector
Definition: linux-cuda.c:79
#define PAPI_ENOINIT
Definition: papi.h:267
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
#define papi_calloc(a, b)
Definition: papi_memory.h:37
static int papicuda_init_thread ( hwd_context_t ctx)
static

Definition at line 375 of file linux-cuda.c.

376 {
377  ( void ) ctx;
378  SUBDBG( "Entering\n" );
379 
380  return PAPI_OK;
381 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int papicuda_list_all_events ( papicuda_context_t gctxt)
static

Definition at line 250 of file linux-cuda.c.

251 {
252  SUBDBG( "Entering\n" );
253  CUptiResult cuptiErr;
254  CUresult cuErr;
255  unsigned int deviceNum;
256  uint32_t domainNum, eventNum;
257  papicuda_device_desc_t *mydevice;
258  char tmpStr[PAPI_MIN_STR_LEN];
259  tmpStr[PAPI_MIN_STR_LEN-1]='\0';
260  size_t tmpSizeBytes;
261  int ii;
262 
263  /* How many gpgpu devices do we have? */
264  cuErr = ( *cuDeviceGetCountPtr )( &gctxt->deviceCount );
265  if ( cuErr==CUDA_ERROR_NOT_INITIALIZED ) {
266  /* If CUDA not initilaized, initialized CUDA and retry the device list */
267  /* This is required for some of the PAPI tools, that do not call the init functions */
268  CHECK_CU_ERROR( ( *cuInitPtr )( 0 ), "cuInit" );
269  cuErr = ( *cuDeviceGetCountPtr )( &gctxt->deviceCount );
270  }
271  CHECK_CU_ERROR( cuErr, "cuDeviceGetCount" );
272  CHECK_PRINT_EVAL( gctxt->deviceCount==0, "ERROR CUDA: Could not find any CUDA devices", return( PAPI_ENOSUPP ) );
273  SUBDBG( "Found %d devices\n", gctxt->deviceCount );
274 
275  /* allocate memory for device information */
277  CHECK_PRINT_EVAL( !gctxt->deviceArray, "ERROR CUDA: Could not allocate memory for CUDA device structure", return( PAPI_ENOSUPP ) );
278 
279  /* For each device, get domains and domain-events counts */
280  gctxt->availEventSize = 0;
281  for( deviceNum = 0; deviceNum < ( uint )gctxt->deviceCount; deviceNum++ ) {
282  mydevice = &gctxt->deviceArray[deviceNum];
283  /* Get device id for each device */
284  CHECK_CU_ERROR( ( *cuDeviceGetPtr )( &mydevice->cuDev, deviceNum ), "cuDeviceGet" );
285  /* Get device name */
286  CHECK_CU_ERROR( ( *cuDeviceGetNamePtr )( mydevice->deviceName, PAPI_MIN_STR_LEN-1, mydevice->cuDev ), "cuDeviceGetName" );
287  mydevice->deviceName[PAPI_MIN_STR_LEN-1]='\0';
288  /* Get max num domains for each device */
289  CHECK_CUPTI_ERROR( ( *cuptiDeviceGetNumEventDomainsPtr )( mydevice->cuDev, &mydevice->maxDomains ), "cuptiDeviceGetNumEventDomains" );
290  /* Allocate space to hold domain IDs */
291  mydevice->domainIDArray = ( CUpti_EventDomainID * ) papi_calloc( mydevice->maxDomains, sizeof( CUpti_EventDomainID ) );
292  CHECK_PRINT_EVAL( !mydevice->domainIDArray, "ERROR CUDA: Could not allocate memory for CUDA device domains", return( PAPI_ENOMEM ) );
293  /* Put domain ids into allocated space */
294  size_t domainarraysize = mydevice->maxDomains * sizeof( CUpti_EventDomainID );
295  CHECK_CUPTI_ERROR( ( *cuptiDeviceEnumEventDomainsPtr )( mydevice->cuDev, &domainarraysize, mydevice->domainIDArray ), "cuptiDeviceEnumEventDomains" );
296  /* Allocate space to hold domain event counts */
297  mydevice->domainIDNumEvents = ( uint32_t * ) papi_calloc( mydevice->maxDomains, sizeof( uint32_t ) );
298  CHECK_PRINT_EVAL( !mydevice->domainIDNumEvents, "ERROR CUDA: Could not allocate memory for domain event counts", return( PAPI_ENOMEM ) );
299  /* For each domain, get event counts in domainNumEvents[] */
300  for ( domainNum=0; domainNum < mydevice->maxDomains; domainNum++ ) {
301  CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum];
302  /* Get num events in domain */
303  //SUBDBG( "Device %d:%d calling cuptiEventDomainGetNumEventsPtr with domainID %d \n", deviceNum, mydevice->cuDev, domainID );
304  CHECK_CUPTI_ERROR( ( *cuptiEventDomainGetNumEventsPtr ) ( domainID, &mydevice->domainIDNumEvents[domainNum] ), "cuptiEventDomainGetNumEvents" );
305  /* Keep track of overall number of events */
306  gctxt->availEventSize += mydevice->domainIDNumEvents[domainNum];
307  }
308  }
309 
310  /* Allocate space for all events and descriptors */
311  gctxt->availEventIDArray = ( CUpti_EventID * ) papi_calloc( gctxt->availEventSize, sizeof( CUpti_EventID ) );
312  CHECK_PRINT_EVAL( !gctxt->availEventIDArray, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
313  gctxt->availEventDeviceNum = ( int * ) papi_calloc( gctxt->availEventSize, sizeof( int ) );
314  CHECK_PRINT_EVAL( !gctxt->availEventDeviceNum, "ERROR CUDA: Could not allocate memory", return( PAPI_ENOMEM ) );
316  CHECK_PRINT_EVAL( !gctxt->availEventDesc, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
317  /* Record the events and descriptions */
318  int idxEventArray = 0;
319  for( deviceNum = 0; deviceNum < ( uint )gctxt->deviceCount; deviceNum++ ) {
320  mydevice = &gctxt->deviceArray[deviceNum];
321  //SUBDBG( "For device %d %d maxdomains %d \n", deviceNum, mydevice->cuDev, mydevice->maxDomains );
322  /* Get and store event IDs, names, descriptions into the large arrays allocated */
323  for ( domainNum=0; domainNum < mydevice->maxDomains; domainNum++ ) {
324  /* Get domain id */
325  CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum];
326  uint32_t domainNumEvents = mydevice->domainIDNumEvents[domainNum];
327  SUBDBG( "For device %d domain %d %d numEvents %d\n", mydevice->cuDev, domainNum, domainID, domainNumEvents );
328  /* Allocate temp space for eventIDs for this domain */
329  CUpti_EventID *domainEventIDArray = ( CUpti_EventID * ) papi_calloc( domainNumEvents, sizeof( CUpti_EventID ) );
330  CHECK_PRINT_EVAL( !domainEventIDArray, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
331  /* Load the domain eventIDs in temp space */
332  size_t domainEventArraySize = domainNumEvents * sizeof( CUpti_EventID );
333  cuptiErr = ( *cuptiEventDomainEnumEventsPtr ) ( domainID, &domainEventArraySize, domainEventIDArray );
334  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventDomainEnumEvents" );
335  /* For each event, get and store name and description */
336  for ( eventNum=0; eventNum<domainNumEvents; eventNum++ ) {
337  /* Record the event IDs in native event array */
338  CUpti_EventID myeventID = domainEventIDArray[eventNum];
339  gctxt->availEventIDArray[idxEventArray] = myeventID;
340  gctxt->availEventDeviceNum[idxEventArray] = deviceNum;
341  /* Get event name */
342  tmpSizeBytes = PAPI_MIN_STR_LEN-1 * sizeof( char );
343  cuptiErr = ( *cuptiEventGetAttributePtr ) ( myeventID, CUPTI_EVENT_ATTR_NAME, &tmpSizeBytes, tmpStr ) ;
344  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGetAttribute" );
345  /* Save a full path for the event, filling spaces with underscores */
346  //snprintf( gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "%s:%d:%s", mydevice->deviceName, deviceNum, tmpStr );
347  snprintf( gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "device:%d:%s", deviceNum, tmpStr );
348  gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN-1] = '\0';
349  char *nameTmpPtr = gctxt->availEventDesc[idxEventArray].name;
350  for ( ii = 0; ii < ( int )strlen( nameTmpPtr ); ii++ ) if ( nameTmpPtr[ii] == ' ' ) nameTmpPtr[ii] = '_';
351  /* Save description in the native event array */
352  tmpSizeBytes = PAPI_2MAX_STR_LEN-1 * sizeof( char );
353  cuptiErr = ( *cuptiEventGetAttributePtr ) ( myeventID, CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &tmpSizeBytes, gctxt->availEventDesc[idxEventArray].description );
354  CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGetAttribute" );
355  gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN-1] = '\0';
356  // SUBDBG( "Event ID:%d Name:%s Desc:%s\n", gctxt->availEventIDArray[idxEventArray], gctxt->availEventDesc[idxEventArray].name, gctxt->availEventDesc[idxEventArray].description );
357  /* Increment index past events in this domain to start of next domain */
358  idxEventArray++;
359  }
360  papi_free ( domainEventIDArray );
361  }
362  }
363  /* return 0 if everything went OK */
364  return 0;
365 }
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.c:88
uint32_t * domainIDNumEvents
Definition: linux-cuda.c:52
#define papi_free(a)
Definition: papi_memory.h:35
CUpti_EventID * availEventIDArray
Definition: linux-cuda.c:34
#define PAPI_ENOSUPP
Definition: papi.h:269
int * availEventDeviceNum
Definition: linux-cuda.c:35
#define CHECK_PRINT_EVAL(err, str, eval)
Definition: linux-cuda.c:94
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:36
#define PAPI_2MAX_STR_LEN
Definition: papi.h:464
CUpti_EventDomainID * domainIDArray
Definition: linux-cuda.c:51
#define CHECK_CUPTI_ERROR(err, cuptifunc)
Definition: linux-cuda.c:91
uint32_t availEventSize
Definition: linux-cuda.c:33
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
unsigned uint
Definition: perfmon.c:40
#define PAPI_ENOMEM
Definition: papi.h:252
struct papicuda_device_desc * deviceArray
Definition: linux-cuda.c:32
int
Definition: iozone.c:18528
#define PAPI_MIN_STR_LEN
Definition: papi.h:462
char deviceName[PAPI_MIN_STR_LEN]
Definition: linux-cuda.c:49
#define papi_calloc(a, b)
Definition: papi_memory.h:37

Here is the caller graph for this function:

static int papicuda_ntv_code_to_descr ( unsigned int  EventCode,
char *  name,
int  len 
)
static

Takes a native event code and passes back the event description

Parameters
EventCodeis the native event code
descris a pointer for the description to be copied to
lenis the size of the descr string

Definition at line 905 of file linux-cuda.c.

906 {
907  //SUBDBG( "Entering\n" );
908  unsigned int index = EventCode;
910  if ( index < gctxt->availEventSize ) {
911  strncpy( name, gctxt->availEventDesc[index].description, len );
912  } else {
913  return ( PAPI_EINVAL );
914  }
915  return ( PAPI_OK );
916 }
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:82
return PAPI_OK
Definition: linux-nvml.c:458
return PAPI_EINVAL
Definition: linux-nvml.c:408
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:36
char * name
Definition: iozone.c:23648
static int papicuda_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)
static

Takes a native event code and passes back the name

Parameters
EventCodeis the native event code
nameis a pointer for the name to be copied to
lenis the size of the name string

Definition at line 885 of file linux-cuda.c.

886 {
887  //SUBDBG( "Entering EventCode %d\n", EventCode );
888  unsigned int index = EventCode;
890  if ( index < gctxt->availEventSize ) {
891  strncpy( name, gctxt->availEventDesc[index].name, len );
892  } else {
893  return ( PAPI_EINVAL );
894  }
895  //SUBDBG( "EventCode %d: Exit %s\n", EventCode, name );
896  return ( PAPI_OK );
897 }
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:82
return PAPI_OK
Definition: linux-nvml.c:458
return PAPI_EINVAL
Definition: linux-nvml.c:408
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:36
char * name
Definition: iozone.c:23648
static int papicuda_ntv_enum_events ( unsigned int EventCode,
int  modifier 
)
static

Enumerate Native Events.

Parameters
EventCodeis the event of interest
modifieris one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS

Definition at line 858 of file linux-cuda.c.

859 {
860  //SUBDBG( "Entering\n" );
861  switch( modifier ) {
862  case PAPI_ENUM_FIRST:
863  *EventCode = 0;
864  return ( PAPI_OK );
865  break;
866  case PAPI_ENUM_EVENTS:
867  if( *EventCode < global_papicuda_context->availEventSize - 1 ) {
868  *EventCode = *EventCode + 1;
869  return ( PAPI_OK );
870  } else
871  return ( PAPI_ENOEVNT );
872  break;
873  default:
874  return ( PAPI_EINVAL );
875  }
876  return ( PAPI_OK );
877 }
#define PAPI_ENOEVNT
Definition: papi.h:258
return PAPI_OK
Definition: linux-nvml.c:458
return PAPI_EINVAL
Definition: linux-nvml.c:408
static int papicuda_read ( hwd_context_t ctx,
hwd_control_state_t ctrl,
long long **  events,
int  flags 
)
static

Triggered by PAPI_read(). For CUDA component, switch to each context, read all the eventgroups, and put the values in the correct places.

Definition at line 642 of file linux-cuda.c.

643 {
644  SUBDBG( "Entering\n" );
645  ( void ) ctx;
646  ( void ) ctrl;
647  ( void ) flags;
650  papicuda_active_cucontext_t *currctrl;
651  int cuContextIdx, gg, ii, jj;
652  CUcontext saveCtx, tmpCtx;
653  CUptiResult cuptiErr;
654  size_t readEventValueBufferSize = sizeof( uint64_t )*PAPICUDA_MAX_COUNTERS;
655  uint64_t readEventValueBuffer[PAPICUDA_MAX_COUNTERS];
656  size_t readEventIDArraySize = sizeof( CUpti_EventID )*PAPICUDA_MAX_COUNTERS;
657  CUpti_EventID readEventIDArray[PAPICUDA_MAX_COUNTERS];
658  size_t numEventIDsRead;
659 
660  SUBDBG( "Switch to each context and read CUDA eventgroups\n" );
661  // SUBDBG( "Save initial CUDA context\n" );
662  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
663  /* Switch to each context and enable CUDA eventgroups */
664  for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
665  currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
666  // SUBDBG( "Switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
667  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ), "cuCtxPushCurrent" );
668  for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
669  // SUBDBG( "Read from context %d eventgroup %d\n", cuContextIdx, gg );
670  cuptiErr = ( *cuptiEventGroupReadAllEventsPtr )( currctrl->eventGroup[gg], CUPTI_EVENT_READ_FLAG_NONE, &readEventValueBufferSize, readEventValueBuffer, &readEventIDArraySize, readEventIDArray, &numEventIDsRead );
671  CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupReadAllEvents: Could not read from CUPTI eventgroup", return( PAPI_EMISC ) );
672  /* Match read values against active events by scanning activeEvents array and matching associated availEventIDs */
673  for( ii = 0; ii < ( int )numEventIDsRead; ii++ ) {
674  for( jj = 0; jj < gctrl->activeEventCount; jj++ ) {
675  int eventIndex = gctrl->activeEventIndex[jj];
676  if ( gctrl->activeEventContextIdx[jj]==cuContextIdx && gctxt->availEventIDArray[eventIndex]==readEventIDArray[ii] ) {
677  gctrl->activeEventValues[jj] += ( long long )readEventValueBuffer[ii];
678  SUBDBG( "Matched read-eventID %d:%d value %ld activeEvent %d value %lld \n", jj, (int)readEventIDArray[ii], readEventValueBuffer[ii], eventIndex, gctrl->activeEventValues[jj] );
679  break;
680  }
681  }
682  }
683  }
684  CUresult cuErr = ( *cuCtxPopCurrentPtr ) ( &tmpCtx );
685  if ( cuErr != CUDA_SUCCESS ) PAPIERROR ( "Error popping context %d\n", cuErr );
686  CHECK_CU_ERROR( cuErr, "cuCtxPopCurrent" );
687  }
688  //SUBDBG( "Restore original context\n" );
689  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ), "cuCtxPushCurrent" );
690  *events = gctrl->activeEventValues;
691  return ( PAPI_OK );
692 }
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.c:88
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:85
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:82
long long flags
Definition: iozone.c:12330
#define PAPICUDA_MAX_COUNTERS
Definition: linux-cuda.c:27
CUpti_EventID * availEventIDArray
Definition: linux-cuda.c:34
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define CHECK_PRINT_EVAL(err, str, eval)
Definition: linux-cuda.c:94
char events[MAX_EVENTS][BUFSIZ]
int activeEventContextIdx[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:61
#define PAPI_EMISC
Definition: papi.h:265
int countOfActiveCUContexts
Definition: linux-cuda.c:57
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
long long
Definition: iozone.c:19827
void PAPIERROR(char *format,...)
CUpti_EventGroup eventGroup[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:70
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
int
Definition: iozone.c:18528
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:58
int activeEventIndex[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:60

Here is the call graph for this function:

static int papicuda_reset ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Triggered by PAPI_reset() but only if the EventSet is currently running. If the eventset is not currently running, then the saved value in the EventSet is set to zero without calling this routine.

Definition at line 783 of file linux-cuda.c.

784 {
785  SUBDBG( "Entering\n" );
786  ( void ) ctx;
787  ( void ) ctrl;
789  papicuda_active_cucontext_t *currctrl;
790  int cuContextIdx, gg, ii;
791  CUptiResult cuptiErr;
792  CUcontext saveCtx, tmpCtx;
793 
794  //SUBDBG( "Reset all active event values\n" );
795  for ( ii=0; ii<gctrl->activeEventCount; ii++ )
796  gctrl->activeEventValues[ii] = 0;
797  // SUBDBG( "Save initial CUDA context and restore later\n" );
798  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
799  // SUBDBG( "Switch to each context and reset CUDA eventgroups\n" );
800  for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
801  currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
802  //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
803  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ), "cuCtxPushCurrent" );
804  for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
805  // SUBDBG( "Reset events in eventgroup\n" );
806  cuptiErr = ( *cuptiEventGroupResetAllEventsPtr )( currctrl->eventGroup[gg] );
807  CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupResetAllEvents: Could not reset the event groups", return( PAPI_EMISC ) );
808  SUBDBG( "For papicuda context %d on device %d event group %d was enabled and reset\n", cuContextIdx, currctrl->deviceNum, gg );
809  }
810  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ), "cuCtxPopCurrent" );
811  }
812  // SUBDBG( "Restore original context\n" );
813  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ), "cuCtxPushCurrent" );
814  return ( PAPI_OK );
815 }
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.c:88
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:85
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define CHECK_PRINT_EVAL(err, str, eval)
Definition: linux-cuda.c:94
#define PAPI_EMISC
Definition: papi.h:265
int countOfActiveCUContexts
Definition: linux-cuda.c:57
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
CUpti_EventGroup eventGroup[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:70
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:58
static int papicuda_set_domain ( hwd_control_state_t ctrl,
int  domain 
)
static

Definition at line 765 of file linux-cuda.c.

766 {
767  SUBDBG( "Entering\n" );
768  ( void ) ctrl;
769  if ( ( PAPI_DOM_USER & domain ) ||
770  ( PAPI_DOM_KERNEL & domain ) ||
771  ( PAPI_DOM_OTHER & domain ) )
772  return ( PAPI_OK );
773  else
774  return ( PAPI_EINVAL );
775  return ( PAPI_OK );
776 }
#define PAPI_DOM_KERNEL
Definition: papi.h:298
return PAPI_OK
Definition: linux-nvml.c:458
#define PAPI_DOM_USER
Definition: papi.h:296
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_DOM_OTHER
Definition: papi.h:299
static int papicuda_shutdown_component ( void  )
static

Triggered by PAPI_shutdown() and frees memory allocated in the CUDA component.

Definition at line 704 of file linux-cuda.c.

705 {
706  SUBDBG( "Entering\n" );
709  int deviceNum, cuContextIdx;
710  /* Free context */
711  if ( gctxt ) {
712  for( deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++ ) {
713  papicuda_device_desc_t *mydevice = &gctxt->deviceArray[deviceNum];
714  papi_free( mydevice->domainIDArray );
715  papi_free( mydevice->domainIDNumEvents );
716  }
717  papi_free( gctxt->availEventIDArray );
718  papi_free( gctxt->availEventDeviceNum );
719  papi_free( gctxt->availEventDesc );
720  papi_free( gctxt->deviceArray );
721  papi_free( gctxt );
722  global_papicuda_context = gctxt = NULL;
723  }
724  /* Free control */
725  if ( gctrl ) {
726  for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ )
727  if ( gctrl->arrayOfActiveCUContexts[cuContextIdx]!=NULL )
728  papi_free( gctrl->arrayOfActiveCUContexts[cuContextIdx] );
729  papi_free( gctrl );
730  global_papicuda_control = gctrl = NULL;
731  }
732  // close the dynamic libraries needed by this component (opened in the init substrate call)
733  dlclose( dl1 );
734  dlclose( dl2 );
735  dlclose( dl3 );
736  return ( PAPI_OK );
737 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:85
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:82
uint32_t * domainIDNumEvents
Definition: linux-cuda.c:52
#define papi_free(a)
Definition: papi_memory.h:35
CUpti_EventID * availEventIDArray
Definition: linux-cuda.c:34
int * availEventDeviceNum
Definition: linux-cuda.c:35
return PAPI_OK
Definition: linux-nvml.c:458
static void * dl1
Definition: linux-cuda.c:74
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:36
static void * dl2
Definition: linux-cuda.c:75
CUpti_EventDomainID * domainIDArray
Definition: linux-cuda.c:51
int countOfActiveCUContexts
Definition: linux-cuda.c:57
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static void * dl3
Definition: linux-cuda.c:76
struct papicuda_device_desc * deviceArray
Definition: linux-cuda.c:32
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:58
int papicuda_shutdown_thread ( hwd_context_t ctx)

Called at thread shutdown. Does nothing in the CUDA component.

Definition at line 695 of file linux-cuda.c.

696 {
697  SUBDBG( "Entering\n" );
698  ( void ) ctx;
699 
700  return ( PAPI_OK );
701 }
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int papicuda_start ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Triggered by PAPI_start(). For CUDA component, switch to each context and start all eventgroups.

Definition at line 565 of file linux-cuda.c.

566 {
567  SUBDBG( "Entering\n" );
568  ( void ) ctx;
569  ( void ) ctrl;
571  //papicuda_context_t *gctxt = global_papicuda_context;
572  papicuda_active_cucontext_t *currctrl;
573  int cuContextIdx, gg, ii;
574  CUptiResult cuptiErr;
575  CUcontext saveCtx, tmpCtx;
576 
577  //SUBDBG( "Reset all active event values\n" );
578  for ( ii=0; ii<gctrl->activeEventCount; ii++ )
579  gctrl->activeEventValues[ii] = 0;
580 
581  // SUBDBG( "Switch to each context and enable CUDA eventgroups associated with that context\n" );
582  /* Save current cuda context */
583  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
584  /* Switch to each context and enable CUDA eventgroups */
585  for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
586  currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
587  //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
588  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ), "cuCtxPushCurrent" );
589  for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
590  // SUBDBG( "Enable event group\n" );
591  cuptiErr = ( *cuptiEventGroupEnablePtr )( currctrl->eventGroup[gg] );
592  CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupEnable: Could not enable one of the event groups", return( PAPI_EMISC ) );
593  // SUBDBG( "Reset events in eventgroup\n" );
594  cuptiErr = ( *cuptiEventGroupResetAllEventsPtr )( currctrl->eventGroup[gg] );
595  CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupResetAllEvents: Could not reset the event groups", return( PAPI_EMISC ) );
596  SUBDBG( "For papicuda context %d on device %d event group %d was enabled and reset\n", cuContextIdx, currctrl->deviceNum, gg );
597  }
598  // SUBDBG( "Pop temp context\n" );
599  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ), "cuCtxPopCurrent" );
600  }
601  //SUBDBG( "Restore original context\n" );
602  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ), "cuCtxPushCurrent" );
603  return ( PAPI_OK );
604 }
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.c:88
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:85
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define CHECK_PRINT_EVAL(err, str, eval)
Definition: linux-cuda.c:94
#define PAPI_EMISC
Definition: papi.h:265
int countOfActiveCUContexts
Definition: linux-cuda.c:57
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
CUpti_EventGroup eventGroup[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:70
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:58
static int papicuda_stop ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Triggered by PAPI_stop()

Definition at line 607 of file linux-cuda.c.

608 {
609  SUBDBG( "Entering to disable all CUPTI eventgroups\n" );
610  ( void ) ctx;
611  ( void ) ctrl;
613  papicuda_active_cucontext_t *currctrl;
614  int cuContextIdx, gg;
615  CUptiResult cuptiErr;
616  CUcontext saveCtx, tmpCtx;
617 
618  // SUBDBG( "Save initial CUDA context\n" );
619  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
620  // SUBDBG( "Switch to each context and disable CUDA eventgroups\n" );
621  for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
622  currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
623  //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
624  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ), "cuCtxPushCurrent" );
625  for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
626  // SUBDBG( "Disable events in eventgroup\n" );
627  cuptiErr = ( *cuptiEventGroupDisablePtr )( currctrl->eventGroup[gg] );
628  CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupDisable: Could not disable the event groups", return( PAPI_EMISC ) );
629  SUBDBG( "For papicuda context %d on device %d event group %d was disabled\n", cuContextIdx, currctrl->deviceNum, gg );
630  }
631  CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ), "cuCtxPopCurrent" );
632  }
633  //SUBDBG( "Restore original context\n" );
634  CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ), "cuCtxPushCurrent" );
635  return ( PAPI_OK );
636 }
#define CHECK_CU_ERROR(err, cufunc)
Definition: linux-cuda.c:88
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:85
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
#define CHECK_PRINT_EVAL(err, str, eval)
Definition: linux-cuda.c:94
#define PAPI_EMISC
Definition: papi.h:265
int countOfActiveCUContexts
Definition: linux-cuda.c:57
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
CUpti_EventGroup eventGroup[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:70
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:58
static int papicuda_update_control_state ( hwd_control_state_t ctrl,
NativeInfo_t nativeInfo,
int  nativeCount,
hwd_context_t ctx 
)
static

Triggered by eventset operations like add or remove. For CUDA, needs to be called multiple times from each seperate CUDA context with the events to be measured from that context. For each context, create eventgroups for the events.

Definition at line 456 of file linux-cuda.c.

457 {
458  /* Note: NativeInfo_t is defined in papi_internal.h */
459  SUBDBG( "Entering with nativeCount %d\n", nativeCount );
460  ( void ) ctx;
461  ( void ) ctrl;
464  papicuda_active_cucontext_t *currctrl;
465  int currDeviceNum, currContextIdx, cuContextIdx;
466  CUcontext currCuCtx;
467  int index, ii, jj;
468 
469  if ( nativeCount == 0 ) {
470  /* Does nativeCount=0 implies that the component is being reset!? */
471  /* gctrl->activeEventCount = 0; */
472  } else {
473  /* nativecount>0 so we need to process the events */
474  // SUBDBG( "There are currently %d contexts\n", gctrl->countOfActiveCUContexts );
475 
476  /* Get/query some device and context specific information */
477  CHECK_PRINT_EVAL( ( *cudaGetDevicePtr )( &currDeviceNum )!=CUDA_SUCCESS, "cudaGetDevice: CUDA device MUST be set before adding events", return( PAPI_EMISC ) );
478  CHECK_PRINT_EVAL( ( *cudaFreePtr )( NULL )!=CUDA_SUCCESS, "cudaFree: Failed to free in this CUDA context", return( PAPI_EMISC ) );
479  CHECK_PRINT_EVAL( ( *cuCtxGetCurrentPtr )( &currCuCtx )!=CUDA_SUCCESS, "cuCtxGetCurrent: CUDA context MUST be initialized before adding events", return ( PAPI_EMISC ) );
480 
481  /* Find current context/control, creating it if does not exist */
482  for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ )
483  if ( gctrl->arrayOfActiveCUContexts[cuContextIdx]->context == currCuCtx ) break;
484  CHECK_PRINT_EVAL( cuContextIdx==PAPICUDA_MAX_COUNTERS, "Exceeded hardcoded maximum number of contexts (PAPICUDA_MAX_COUNTERS)", return( PAPI_EMISC ) );
485  if ( cuContextIdx==gctrl->countOfActiveCUContexts ) {
486  gctrl->arrayOfActiveCUContexts[cuContextIdx] = papi_calloc( 1, sizeof( papicuda_active_cucontext_t ) );
487  CHECK_PRINT_EVAL( ( gctrl->arrayOfActiveCUContexts[cuContextIdx]==NULL ), "Memory allocation for new active context failed", return( PAPI_ENOMEM ) ) ;
488  gctrl->arrayOfActiveCUContexts[cuContextIdx]->context = currCuCtx;
489  gctrl->arrayOfActiveCUContexts[cuContextIdx]->deviceNum = currDeviceNum;
490  gctrl->countOfActiveCUContexts++;
491  SUBDBG( "Added a new context ... now %d\n", gctrl->countOfActiveCUContexts );
492  }
493  currContextIdx = cuContextIdx;
494  currctrl = gctrl->arrayOfActiveCUContexts[currContextIdx];
495  /* At this point, currCuCtx is at index cuContextIdx in the arrayOfActiveCUContexts array */
496 
497  /* For each event, check if it is already added. If not, try to added it to the current context.
498  Try each existing eventgroup. If none will have this event, create a new event group. If new event group will not have it... fail */
499  /* For each event */
500  for( ii = 0; ii < nativeCount; ii++ ) {
501  index = nativeInfo[ii].ni_event; /* Get the PAPI event index from the user */
502  /* Check to see if event is already in some context */
503  SUBDBG( "Searching %d active events to see if event %d %s is already in some context\n", gctrl->activeEventCount, index, gctxt->availEventDesc[index].name );
504  int eventAlreadyAdded=0;
505  for( jj = 0; jj < gctrl->activeEventCount; jj++ ) {
506  if ( gctrl->activeEventIndex[jj] == index ) {
507  eventAlreadyAdded=1;
508  break;
509  }
510  }
511 
512  /* If event was not found in any context.. try to insert it into current context */
513  if ( !eventAlreadyAdded ) {
514  SUBDBG( "Need to add event %d %s to the current context\n", index, gctxt->availEventDesc[index].name );
515  /* Make sure that the device number for the event matches the device for this context */
516  CHECK_PRINT_EVAL( (currDeviceNum!=gctxt->availEventDeviceNum[index]), "Current CUDA device cannot use this event", return( PAPI_EINVAL ) );
517  /* if this event index corresponds to something from availEventIDArray */
518  if ( index < ( int )gctxt->availEventSize ) {
519  /* lookup cuptieventid for this event index */
520  CUpti_EventID cuptieventid = gctxt->availEventIDArray[index];
521  CUpti_EventGroup cuptieventgroup;
522  int addstatus=!CUPTI_SUCCESS, gg;
523  SUBDBG( "Event %s is going to be added to current context %d having %d eventgroups\n", gctxt->availEventDesc[index].name, currContextIdx, currctrl->numEventGroups );
524  /* For each existing eventgroup, try to insert this event */
525  for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
526  cuptieventgroup = currctrl->eventGroup[gg];
527  addstatus = ( *cuptiEventGroupAddEventPtr )( cuptieventgroup, cuptieventid );
528  if ( addstatus==CUPTI_SUCCESS ) {
529  SUBDBG( "Event %s successfully added to current eventgroup %d:%d\n", gctxt->availEventDesc[index].name, currContextIdx, gg );
530  break;
531  }
532  }
533  /* If the event could not be added to any earlier eventgroup, create a new one and try again. Fail if this does not succeed */
534  if ( addstatus!=CUPTI_SUCCESS ) {
535  //SUBDBG( "Event %s needs a new eventgroup\n", gctxt->availEventDesc[index].name );
536  CHECK_PRINT_EVAL( ( gg>PAPICUDA_MAX_COUNTERS-1 ), "For current CUDA device, could not add event (no more eventgroups can be added)", return( PAPI_EMISC ) );
537  //SUBDBG( "gg %d context %d %p\n", gg, currctrl->context, currctrl->context );
538  CHECK_CUPTI_ERROR( ( *cuptiEventGroupCreatePtr )( currctrl->context, &currctrl->eventGroup[gg], 0 ), "cuptiEventGroupCreate" );
539  cuptieventgroup = currctrl->eventGroup[gg];
540  currctrl->numEventGroups++;
541  addstatus = ( *cuptiEventGroupAddEventPtr )( cuptieventgroup, cuptieventid );
542  CHECK_PRINT_EVAL( ( addstatus!=CUPTI_SUCCESS ), "cuptiEventGroupAddEvent: Could not add event (event may not match CUDA context)", return( PAPI_EMISC ) );
543  SUBDBG( "Event %s successfully added to new eventgroup %d:%d\n", gctxt->availEventDesc[index].name, currContextIdx, gg );
544  }
545  }
546 
547  /* Record index of this active event back into the nativeInfo structure */
548  nativeInfo[ii].ni_position = gctrl->activeEventCount;
549  /* record added event at the higher level */
550  CHECK_PRINT_EVAL( ( gctrl->activeEventCount==PAPICUDA_MAX_COUNTERS-1 ), "Exceeded maximum num of events (PAPI_MAX_COUNTERS)", return( PAPI_EMISC ) );
551  gctrl->activeEventIndex[gctrl->activeEventCount] = index;
552  gctrl->activeEventContextIdx[gctrl->activeEventCount] = currContextIdx;
553  gctrl->activeEventValues[gctrl->activeEventCount] = 0;
554  gctrl->activeEventCount++;
555 
556  }
557  }
558  }
559  return ( PAPI_OK );
560 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:85
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:82
#define PAPICUDA_MAX_COUNTERS
Definition: linux-cuda.c:27
CUpti_EventID * availEventIDArray
Definition: linux-cuda.c:34
int * availEventDeviceNum
Definition: linux-cuda.c:35
return PAPI_OK
Definition: linux-nvml.c:458
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:408
#define CHECK_PRINT_EVAL(err, str, eval)
Definition: linux-cuda.c:94
int activeEventContextIdx[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:61
#define PAPI_EMISC
Definition: papi.h:265
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:36
#define CHECK_CUPTI_ERROR(err, cuptifunc)
Definition: linux-cuda.c:91
int countOfActiveCUContexts
Definition: linux-cuda.c:57
uint32_t availEventSize
Definition: linux-cuda.c:33
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
CUpti_EventGroup eventGroup[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:70
#define PAPI_ENOMEM
Definition: papi.h:252
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:58
int activeEventIndex[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:60
#define papi_calloc(a, b)
Definition: papi_memory.h:37

Variable Documentation

papi_vector_t _cuda_vector

Vector that points to entry points for the component

Definition at line 79 of file linux-cuda.c.

void( * _dl_non_dynamic_init)(cudaError_t CUDARTAPI cudaFree void)

Definition at line 112 of file linux-cuda.c.

184  { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
185 
186 static int papicuda_linkCudaLibraries()
187 {
188  /* Attempt to guess if we were statically linked to libc, if so bail */
189  if( _dl_non_dynamic_init != NULL ) {
190  strncpy( _cuda_vector.cmp_info.disabled_reason, "The cuda component does not support statically linking to libc.", PAPI_MAX_STR_LEN );
191  return PAPI_ENOSUPP;
192  }
193  /* Need to link in the cuda libraries, if not found disable the component */
194  dl1 = dlopen( "libcuda.so", RTLD_NOW | RTLD_GLOBAL );
195  CHECK_DL_STATUS( !dl1 , "CUDA library libcuda.so not found." );
196  cuCtxGetCurrentPtr = dlsym( dl1, "cuCtxGetCurrent" );
197  CHECK_DL_STATUS( dlerror()!=NULL , "CUDA function cuCtxGetCurrent not found." );
198  cuDeviceGetPtr = dlsym( dl1, "cuDeviceGet" );
199  CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGet not found." );
200  cuDeviceGetCountPtr = dlsym( dl1, "cuDeviceGetCount" );
201  CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGetCount not found." );
202  cuDeviceGetNamePtr = dlsym( dl1, "cuDeviceGetName" );
203  CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGetName not found." );
204  cuInitPtr = dlsym( dl1, "cuInit" );
205  CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuInit not found." );
206  cuCtxPopCurrentPtr = dlsym( dl1, "cuCtxPopCurrent" );
207  CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuCtxPopCurrent not found." );
208  cuCtxPushCurrentPtr = dlsym( dl1, "cuCtxPushCurrent" );
209  CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuCtxPushCurrent not found." );
210 
211  dl2 = dlopen( "libcudart.so", RTLD_NOW | RTLD_GLOBAL );
212  CHECK_DL_STATUS( !dl2, "CUDA runtime library libcudart.so not found." );
213  cudaGetDevicePtr = dlsym( dl2, "cudaGetDevice" );
214  CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaGetDevice not found." );
215  cudaSetDevicePtr = dlsym( dl2, "cudaSetDevice" );
216  CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaSetDevice not found." );
217  cudaFreePtr = dlsym( dl2, "cudaFree" );
218  CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaFree not found." );
219 
220  dl3 = dlopen( "libcupti.so", RTLD_NOW | RTLD_GLOBAL );
221  CHECK_DL_STATUS( !dl3, "CUDA runtime library libcupti.so not found." );
222  cuptiDeviceEnumEventDomainsPtr = dlsym( dl3, "cuptiDeviceEnumEventDomains" );
223  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiDeviceEnumEventDomains not found." );
224  cuptiDeviceGetNumEventDomainsPtr = dlsym( dl3, "cuptiDeviceGetNumEventDomains" );
225  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiDeviceGetNumEventDomains not found." );
226  cuptiEventDomainEnumEventsPtr = dlsym( dl3, "cuptiEventDomainEnumEvents" );
227  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventDomainEnumEvents not found." );
228  cuptiEventDomainGetNumEventsPtr = dlsym( dl3, "cuptiEventDomainGetNumEvents" );
229  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventDomainGetNumEvents not found." );
230  cuptiEventGetAttributePtr = dlsym( dl3, "cuptiEventGetAttribute" );
231  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGetAttribute not found." );
232  cuptiEventGroupAddEventPtr = dlsym( dl3, "cuptiEventGroupAddEvent" );
233  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupAddEvent not found." );
234  cuptiEventGroupCreatePtr = dlsym( dl3, "cuptiEventGroupCreate" );
235  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupCreate not found." );
236  cuptiEventGroupDestroyPtr = dlsym( dl3, "cuptiEventGroupDestroy" );
237  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupDestroy not found." );
238  cuptiEventGroupDisablePtr = dlsym( dl3, "cuptiEventGroupDisable" );
239  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupDisable not found." );
240  cuptiEventGroupEnablePtr = dlsym( dl3, "cuptiEventGroupEnable" );
241  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupEnable not found." );
242  cuptiEventGroupReadAllEventsPtr = dlsym( dl3, "cuptiEventGroupReadAllEvents" );
243  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupReadAllEvents not found." );
244  cuptiEventGroupResetAllEventsPtr = dlsym( dl3, "cuptiEventGroupResetAllEvents" );
245  CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupResetAllEvents not found." );
246  return ( PAPI_OK );
247 }
#define PAPI_ENOSUPP
Definition: papi.h:269
return PAPI_OK
Definition: linux-nvml.c:458
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
static void * dl1
Definition: linux-cuda.c:74
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:633
static void * dl2
Definition: linux-cuda.c:75
static void * dl3
Definition: linux-cuda.c:76
papi_vector_t _cuda_vector
Definition: linux-cuda.c:79
#define PAPI_MAX_STR_LEN
Definition: papi.h:463
#define CHECK_DL_STATUS(err, str)
void(* _dl_non_dynamic_init)(cudaError_t CUDARTAPI cudaFree void)
Definition: linux-cuda.c:112
void* dl1 = NULL
static

Definition at line 74 of file linux-cuda.c.

void* dl2 = NULL
static

Definition at line 75 of file linux-cuda.c.

void* dl3 = NULL
static

Definition at line 76 of file linux-cuda.c.

papicuda_context_t* global_papicuda_context = NULL
static

Definition at line 82 of file linux-cuda.c.

papicuda_control_t* global_papicuda_control = NULL
static

Definition at line 85 of file linux-cuda.c.