linux-cuda.c File Reference

This implements a PAPI component that enables PAPI-C to access hardware monitoring counters for NVIDIA CUDA GPU devices through the CUPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Data Structures

struct  papicuda_context_t
struct  papicuda_name_desc_t
struct  papicuda_device_desc_t
struct  papicuda_control_t
struct  papicuda_active_cucontext_t

Defines

#define PAPICUDA_MAX_COUNTERS   512
#define CHECK_CU_ERROR(err, cufunc)   if( (err) != CUDA_SUCCESS ) { PAPIERROR( "CUDA Driver API function failed '%s'", cufunc ); return -1; }
#define CHECK_CUPTI_ERROR(err, cuptifunc)   if( (err) != CUPTI_SUCCESS ) { PAPIERROR( "CUPTI API function failed '%s'", cuptifunc ); return -1; }
#define CHECK_PRINT_EVAL(err, str, eval)   if( (err) ) { PAPIERROR( "%s", str ); eval; }
#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define CUPTIAPI   __attribute__((weak))
#define CHECK_DL_STATUS(err, str)   if( err ) { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }

Functions

static int papicuda_list_all_events (papicuda_context_t *gctxt)
static int papicuda_init_thread (hwd_context_t *ctx)
static int papicuda_init_component (int cidx)
static int papicuda_init_control_state (hwd_control_state_t *ctrl)
static int papicuda_update_control_state (hwd_control_state_t *ctrl, NativeInfo_t *nativeInfo, int nativeCount, hwd_context_t *ctx)
static int papicuda_start (hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int papicuda_stop (hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int papicuda_read (hwd_context_t *ctx, hwd_control_state_t *ctrl, long long **events, int flags)
int papicuda_shutdown_thread (hwd_context_t *ctx)
static int papicuda_shutdown_component (void)
static int papicuda_ctrl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
static int papicuda_set_domain (hwd_control_state_t *ctrl, int domain)
static int papicuda_reset (hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int papicuda_cleanup_eventset (hwd_control_state_t *ctrl)
static int papicuda_ntv_enum_events (unsigned int *EventCode, int modifier)
static int papicuda_ntv_code_to_name (unsigned int EventCode, char *name, int len)
static int papicuda_ntv_code_to_descr (unsigned int EventCode, char *name, int len)

Variables

static void * dl1 = NULL
static void * dl2 = NULL
static void * dl3 = NULL
papi_vector_t _cuda_vector
static papicuda_context_tglobal_papicuda_context = NULL
static papicuda_control_tglobal_papicuda_control = NULL
void(* _dl_non_dynamic_init )(cudaError_t CUDARTAPI cudaFree void)

Detailed Description

Author:
Asim YarKhan yarkhan@icl.utk.edu (updated in 2015 for multiple CUDA contexts/devices)
Heike Jagode (in collaboration with Robert Dietrich, TU Dresden) jagode@eecs.utk.edu

Definition in file linux-cuda.c.


Define Documentation

#define CHECK_CU_ERROR ( err,
cufunc   )     if( (err) != CUDA_SUCCESS ) { PAPIERROR( "CUDA Driver API function failed '%s'", cufunc ); return -1; }

Definition at line 88 of file linux-cuda.c.

#define CHECK_CUPTI_ERROR ( err,
cuptifunc   )     if( (err) != CUPTI_SUCCESS ) { PAPIERROR( "CUPTI API function failed '%s'", cuptifunc ); return -1; }

Definition at line 91 of file linux-cuda.c.

#define CHECK_DL_STATUS ( err,
str   )     if( err ) { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
#define CHECK_PRINT_EVAL ( err,
str,
eval   )     if( (err) ) { PAPIERROR( "%s", str ); eval; }

Definition at line 94 of file linux-cuda.c.

#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define CUPTIAPI   __attribute__((weak))
#define PAPICUDA_MAX_COUNTERS   512

Definition at line 27 of file linux-cuda.c.


Function Documentation

static int papicuda_cleanup_eventset ( hwd_control_state_t ctrl  )  [static]

Definition at line 822 of file linux-cuda.c.

00823 {
00824     SUBDBG( "Entering\n" );
00825     ( void ) ctrl;
00826     papicuda_control_t *gctrl = global_papicuda_control;
00827     papicuda_active_cucontext_t *currctrl;
00828     int cuContextIdx, gg;
00829     CUptiResult cuptiErr;
00830     CUcontext saveCtx, tmpCtx;
00831 
00832     SUBDBG( "Switch to each context and disable CUDA eventgroups\n" );
00833     /* Save current cuda context and restore later */
00834     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00835     /* Switch to each context and enable CUDA eventgroups */
00836     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00837         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00838         /* Switch to this device / cuda context */
00839         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00840         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00841             /* Destroy the eventGroups; it also frees the perfmon hardware on the GPU */
00842             cuptiErr = ( *cuptiEventGroupDestroyPtr )( currctrl->eventGroup[gg] );
00843             CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDestroy" );
00844         }
00845         currctrl->numEventGroups = 0;
00846         CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ),  "cuCtxPopCurrent" );
00847     }
00848     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00849     /* Record that there are no active contexts or events */
00850     gctrl->activeEventCount = 0;
00851     return ( PAPI_OK );
00852 }

static int papicuda_ctrl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
) [static]

This function sets various options in the component - Does nothing in the CUDA component.

Parameters:
[in] ctx -- hardware context
[in] code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT
[in] option -- options to be set

Definition at line 746 of file linux-cuda.c.

00747 {
00748     SUBDBG( "Entering\n" );
00749     ( void ) ctx;
00750     ( void ) code;
00751     ( void ) option;
00752     return ( PAPI_OK );
00753 }

static int papicuda_init_component ( int  cidx  )  [static]

Initialize hardware counters, setup the function vector table and get hardware information, this routine is called when the PAPI process is initialized (IE PAPI_library_init)

Definition at line 396 of file linux-cuda.c.

00397 {
00398     SUBDBG( "Entering with cidx: %d\n", cidx );
00399     int err;
00400 
00401     /* link in all the cuda libraries and resolve the symbols we need to use */
00402     if( papicuda_linkCudaLibraries() != PAPI_OK ) {
00403         SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
00404         SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
00405         return (PAPI_ENOSUPP);
00406     }
00407 
00408     /* Create the structure */
00409     if ( !global_papicuda_context )
00410         global_papicuda_context = ( papicuda_context_t* ) papi_calloc( 1, sizeof( papicuda_context_t ) );
00411 
00412     /* Get list of all native CUDA events supported */
00413     err = papicuda_list_all_events( global_papicuda_context );
00414     CHECK_PRINT_EVAL( err!=0, "ERROR: Could not get a list of CUDA/CUPTI events", return( PAPI_ENOSUPP ) );
00415 
00416     /* Export some information */
00417     _cuda_vector.cmp_info.CmpIdx = cidx;
00418     _cuda_vector.cmp_info.num_native_events = global_papicuda_context->availEventSize;
00419     _cuda_vector.cmp_info.num_cntrs = _cuda_vector.cmp_info.num_native_events;
00420     _cuda_vector.cmp_info.num_mpx_cntrs = _cuda_vector.cmp_info.num_native_events;
00421 
00422     //SUBDBG( "Exiting PAPI_OK\n" );
00423     return ( PAPI_OK );
00424 }

Here is the call graph for this function:

static int papicuda_init_control_state ( hwd_control_state_t ctrl  )  [static]

Setup a counter control state. In general a control state holds the hardware info for an EventSet.

Definition at line 431 of file linux-cuda.c.

00432 {
00433     SUBDBG( "Entering\n" );
00434     ( void ) ctrl;
00435     papicuda_context_t *gctxt = global_papicuda_context;
00436 
00437     CHECK_PRINT_EVAL( !gctxt, "Error: The PAPI CUDA component needs to be initialized first", return( PAPI_ENOINIT ) );
00438     /* If no events were found during the initial component initialization, return error  */
00439     if( global_papicuda_context->availEventSize <= 0 ) {
00440         strncpy( _cuda_vector.cmp_info.disabled_reason, "ERROR CUDA: No events exist", PAPI_MAX_STR_LEN );
00441         return ( PAPI_EMISC );
00442     }
00443     /* If it does not exist, create the global structure to hold CUDA contexts and active events */
00444     if ( !global_papicuda_control ) {
00445         global_papicuda_control = ( papicuda_control_t* ) papi_calloc( 1, sizeof( papicuda_control_t ) );
00446         global_papicuda_control->countOfActiveCUContexts = 0;
00447         global_papicuda_control->activeEventCount = 0;
00448     }
00449     return PAPI_OK;
00450 }

static int papicuda_init_thread ( hwd_context_t ctx  )  [static]

Definition at line 375 of file linux-cuda.c.

00376 {
00377     ( void ) ctx;
00378     SUBDBG( "Entering\n" );
00379 
00380     return PAPI_OK;
00381 }

static int papicuda_list_all_events ( papicuda_context_t gctxt  )  [static]

Definition at line 250 of file linux-cuda.c.

00251 {
00252     SUBDBG( "Entering\n" );
00253     CUptiResult cuptiErr;
00254     CUresult cuErr;
00255     unsigned int deviceNum;
00256     uint32_t domainNum, eventNum;
00257     papicuda_device_desc_t *mydevice;
00258     char tmpStr[PAPI_MIN_STR_LEN];
00259     tmpStr[PAPI_MIN_STR_LEN-1]='\0';
00260     size_t tmpSizeBytes;
00261     int ii;
00262 
00263     /* How many gpgpu devices do we have? */
00264     cuErr = ( *cuDeviceGetCountPtr )( &gctxt->deviceCount );
00265     if ( cuErr==CUDA_ERROR_NOT_INITIALIZED ) {
00266         /* If CUDA not initilaized, initialized CUDA and retry the device list */
00267         /* This is required for some of the PAPI tools, that do not call the init functions */
00268         CHECK_CU_ERROR( ( *cuInitPtr )( 0 ), "cuInit" );
00269         cuErr = ( *cuDeviceGetCountPtr )( &gctxt->deviceCount );
00270     }
00271     CHECK_CU_ERROR( cuErr, "cuDeviceGetCount" );
00272     CHECK_PRINT_EVAL( gctxt->deviceCount==0, "ERROR CUDA: Could not find any CUDA devices", return( PAPI_ENOSUPP ) );
00273     SUBDBG( "Found %d devices\n", gctxt->deviceCount );
00274 
00275     /* allocate memory for device information */
00276     gctxt->deviceArray = ( papicuda_device_desc_t * ) papi_calloc( gctxt->deviceCount, sizeof( papicuda_device_desc_t ) );
00277     CHECK_PRINT_EVAL( !gctxt->deviceArray, "ERROR CUDA: Could not allocate memory for CUDA device structure", return( PAPI_ENOSUPP ) );
00278 
00279     /* For each device, get domains and domain-events counts */
00280     gctxt->availEventSize = 0;
00281     for( deviceNum = 0; deviceNum < ( uint )gctxt->deviceCount; deviceNum++ ) {
00282         mydevice = &gctxt->deviceArray[deviceNum];
00283         /* Get device id for each device */
00284         CHECK_CU_ERROR( ( *cuDeviceGetPtr )( &mydevice->cuDev, deviceNum ), "cuDeviceGet" );
00285         /* Get device name */
00286         CHECK_CU_ERROR( ( *cuDeviceGetNamePtr )( mydevice->deviceName, PAPI_MIN_STR_LEN-1, mydevice->cuDev ), "cuDeviceGetName" );
00287         mydevice->deviceName[PAPI_MIN_STR_LEN-1]='\0';
00288         /* Get max num domains for each device */
00289         CHECK_CUPTI_ERROR( ( *cuptiDeviceGetNumEventDomainsPtr )( mydevice->cuDev, &mydevice->maxDomains ), "cuptiDeviceGetNumEventDomains" );
00290         /* Allocate space to hold domain IDs */
00291         mydevice->domainIDArray = ( CUpti_EventDomainID * ) papi_calloc( mydevice->maxDomains, sizeof( CUpti_EventDomainID ) );
00292         CHECK_PRINT_EVAL( !mydevice->domainIDArray, "ERROR CUDA: Could not allocate memory for CUDA device domains", return( PAPI_ENOMEM ) );
00293         /* Put domain ids into allocated space */
00294         size_t domainarraysize = mydevice->maxDomains * sizeof( CUpti_EventDomainID );
00295         CHECK_CUPTI_ERROR( ( *cuptiDeviceEnumEventDomainsPtr )( mydevice->cuDev, &domainarraysize, mydevice->domainIDArray ), "cuptiDeviceEnumEventDomains" );
00296         /* Allocate space to hold domain event counts  */
00297         mydevice->domainIDNumEvents = ( uint32_t * ) papi_calloc( mydevice->maxDomains, sizeof( uint32_t ) );
00298         CHECK_PRINT_EVAL( !mydevice->domainIDNumEvents, "ERROR CUDA: Could not allocate memory for domain event counts", return( PAPI_ENOMEM ) );
00299         /* For each domain, get event counts in domainNumEvents[]  */
00300         for ( domainNum=0; domainNum < mydevice->maxDomains; domainNum++ ) {
00301             CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum];
00302             /* Get num events in domain */
00303             //SUBDBG( "Device %d:%d calling cuptiEventDomainGetNumEventsPtr with domainID %d \n", deviceNum, mydevice->cuDev, domainID );
00304             CHECK_CUPTI_ERROR(  ( *cuptiEventDomainGetNumEventsPtr ) ( domainID, &mydevice->domainIDNumEvents[domainNum] ), "cuptiEventDomainGetNumEvents" );
00305             /* Keep track of overall number of events */
00306             gctxt->availEventSize += mydevice->domainIDNumEvents[domainNum];
00307         }
00308     }
00309 
00310     /* Allocate space for all events and descriptors */
00311     gctxt->availEventIDArray = ( CUpti_EventID * ) papi_calloc( gctxt->availEventSize, sizeof( CUpti_EventID ) );
00312     CHECK_PRINT_EVAL( !gctxt->availEventIDArray, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
00313     gctxt->availEventDeviceNum = ( int * ) papi_calloc( gctxt->availEventSize, sizeof( int ) );
00314     CHECK_PRINT_EVAL( !gctxt->availEventDeviceNum, "ERROR CUDA: Could not allocate memory", return( PAPI_ENOMEM ) );
00315     gctxt->availEventDesc = ( papicuda_name_desc_t * ) papi_calloc( gctxt->availEventSize, sizeof( papicuda_name_desc_t ) );
00316     CHECK_PRINT_EVAL( !gctxt->availEventDesc, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
00317     /* Record the events and descriptions */
00318     int idxEventArray = 0;
00319     for( deviceNum = 0; deviceNum < ( uint )gctxt->deviceCount; deviceNum++ ) {
00320         mydevice = &gctxt->deviceArray[deviceNum];
00321         //SUBDBG( "For device %d %d maxdomains %d \n", deviceNum, mydevice->cuDev, mydevice->maxDomains );
00322         /* Get and store event IDs, names, descriptions into the large arrays allocated */
00323         for ( domainNum=0; domainNum < mydevice->maxDomains; domainNum++ ) {
00324             /* Get domain id */
00325             CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum];
00326             uint32_t domainNumEvents = mydevice->domainIDNumEvents[domainNum];
00327             SUBDBG( "For device %d domain %d %d numEvents %d\n", mydevice->cuDev, domainNum, domainID, domainNumEvents );
00328             /* Allocate temp space for eventIDs for this domain */
00329             CUpti_EventID *domainEventIDArray = ( CUpti_EventID * ) papi_calloc( domainNumEvents, sizeof( CUpti_EventID ) );
00330             CHECK_PRINT_EVAL( !domainEventIDArray, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
00331             /* Load the domain eventIDs in temp space */
00332             size_t domainEventArraySize = domainNumEvents * sizeof( CUpti_EventID );
00333             cuptiErr = ( *cuptiEventDomainEnumEventsPtr )  ( domainID, &domainEventArraySize, domainEventIDArray );
00334             CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventDomainEnumEvents" );
00335             /* For each event, get and store name and description */
00336             for ( eventNum=0; eventNum<domainNumEvents; eventNum++ ) {
00337                 /* Record the event IDs in native event array */
00338                 CUpti_EventID myeventID = domainEventIDArray[eventNum];
00339                 gctxt->availEventIDArray[idxEventArray] = myeventID;
00340                 gctxt->availEventDeviceNum[idxEventArray] = deviceNum;
00341                 /* Get event name */
00342                 tmpSizeBytes = PAPI_MIN_STR_LEN-1 * sizeof( char );
00343                 cuptiErr = ( *cuptiEventGetAttributePtr ) ( myeventID, CUPTI_EVENT_ATTR_NAME, &tmpSizeBytes, tmpStr ) ;
00344                 CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGetAttribute" );
00345                 /* Save a full path for the event, filling spaces with underscores */
00346                 //snprintf( gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "%s:%d:%s", mydevice->deviceName, deviceNum, tmpStr );
00347                 snprintf( gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "device:%d:%s", deviceNum, tmpStr );
00348                 gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN-1] = '\0';
00349                 char *nameTmpPtr = gctxt->availEventDesc[idxEventArray].name;
00350                 for ( ii = 0; ii < ( int )strlen( nameTmpPtr ); ii++ ) if ( nameTmpPtr[ii] == ' ' ) nameTmpPtr[ii] = '_';
00351                 /* Save description in the native event array */
00352                 tmpSizeBytes = PAPI_2MAX_STR_LEN-1 * sizeof( char );
00353                 cuptiErr = ( *cuptiEventGetAttributePtr ) ( myeventID, CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &tmpSizeBytes, gctxt->availEventDesc[idxEventArray].description );
00354                 CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGetAttribute" );
00355                 gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN-1] = '\0';
00356                 // SUBDBG( "Event ID:%d Name:%s Desc:%s\n", gctxt->availEventIDArray[idxEventArray], gctxt->availEventDesc[idxEventArray].name, gctxt->availEventDesc[idxEventArray].description );
00357                 /* Increment index past events in this domain to start of next domain */
00358                 idxEventArray++;
00359             }
00360             papi_free ( domainEventIDArray );
00361         }
00362     }
00363     /* return 0 if everything went OK */
00364     return 0;
00365 }

Here is the caller graph for this function:

static int papicuda_ntv_code_to_descr ( unsigned int  EventCode,
char *  name,
int  len 
) [static]

Takes a native event code and passes back the event description

Parameters:
EventCode is the native event code
descr is a pointer for the description to be copied to
len is the size of the descr string

Definition at line 906 of file linux-cuda.c.

00907 {
00908     //SUBDBG( "Entering\n" );
00909     unsigned int index = EventCode;
00910     papicuda_context_t *gctxt = global_papicuda_context;
00911     if ( index < gctxt->availEventSize ) {
00912         strncpy( name, gctxt->availEventDesc[index].description, len );
00913     } else {
00914         return ( PAPI_EINVAL );
00915     }
00916     return ( PAPI_OK );
00917 }

static int papicuda_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
) [static]

Takes a native event code and passes back the name

Parameters:
EventCode is the native event code
name is a pointer for the name to be copied to
len is the size of the name string

Definition at line 886 of file linux-cuda.c.

00887 {
00888     //SUBDBG( "Entering EventCode %d\n", EventCode );
00889     unsigned int index = EventCode;
00890     papicuda_context_t *gctxt = global_papicuda_context;
00891     if ( index < gctxt->availEventSize ) {
00892         strncpy( name, gctxt->availEventDesc[index].name, len );
00893     } else {
00894         return ( PAPI_EINVAL );
00895     }
00896     //SUBDBG( "EventCode %d: Exit %s\n", EventCode, name );
00897     return ( PAPI_OK );
00898 }

static int papicuda_ntv_enum_events ( unsigned int *  EventCode,
int  modifier 
) [static]

Enumerate Native Events.

Parameters:
EventCode is the event of interest
modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS

Definition at line 859 of file linux-cuda.c.

00860 {
00861     //SUBDBG( "Entering\n" );
00862     switch( modifier ) {
00863     case PAPI_ENUM_FIRST:
00864         *EventCode = 0;
00865         return ( PAPI_OK );
00866         break;
00867     case PAPI_ENUM_EVENTS:
00868         if( *EventCode < global_papicuda_context->availEventSize - 1 ) {
00869             *EventCode = *EventCode + 1;
00870             return ( PAPI_OK );
00871         } else
00872             return ( PAPI_ENOEVNT );
00873         break;
00874     default:
00875         return ( PAPI_EINVAL );
00876     }
00877     return ( PAPI_OK );
00878 }

static int papicuda_read ( hwd_context_t ctx,
hwd_control_state_t ctrl,
long long **  events,
int  flags 
) [static]

Triggered by PAPI_read(). For CUDA component, switch to each context, read all the eventgroups, and put the values in the correct places.

Definition at line 643 of file linux-cuda.c.

00644 {
00645     SUBDBG( "Entering\n" );
00646     ( void ) ctx;
00647     ( void ) ctrl;
00648     ( void ) flags;
00649     papicuda_control_t *gctrl = global_papicuda_control;
00650     papicuda_context_t *gctxt = global_papicuda_context;
00651     papicuda_active_cucontext_t *currctrl;
00652     int cuContextIdx, gg, ii, jj;
00653     CUcontext saveCtx, tmpCtx;
00654     CUptiResult cuptiErr;
00655     size_t readEventValueBufferSize = sizeof( uint64_t )*PAPICUDA_MAX_COUNTERS;
00656     uint64_t readEventValueBuffer[PAPICUDA_MAX_COUNTERS];
00657     size_t readEventIDArraySize = sizeof( CUpti_EventID )*PAPICUDA_MAX_COUNTERS;
00658     CUpti_EventID readEventIDArray[PAPICUDA_MAX_COUNTERS];
00659     size_t numEventIDsRead;
00660 
00661     SUBDBG( "Switch to each context and read CUDA eventgroups\n" );
00662     // SUBDBG( "Save initial CUDA context\n" );
00663     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00664     /* Switch to each context and enable CUDA eventgroups */
00665     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00666         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00667         // SUBDBG( "Switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
00668         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00669         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00670             // SUBDBG( "Read from context %d eventgroup %d\n", cuContextIdx, gg );
00671             cuptiErr = ( *cuptiEventGroupReadAllEventsPtr )( currctrl->eventGroup[gg], CUPTI_EVENT_READ_FLAG_NONE, &readEventValueBufferSize, readEventValueBuffer, &readEventIDArraySize, readEventIDArray, &numEventIDsRead );
00672             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupReadAllEvents: Could not read from CUPTI eventgroup", return( PAPI_EMISC ) );
00673             /* Match read values against active events by scanning activeEvents array and matching associated availEventIDs  */
00674             for( ii = 0; ii < ( int )numEventIDsRead; ii++ ) {
00675                 for( jj = 0; jj < gctrl->activeEventCount; jj++ ) {
00676                     int eventIndex = gctrl->activeEventIndex[jj];
00677                     if ( gctrl->activeEventContextIdx[jj]==cuContextIdx && gctxt->availEventIDArray[eventIndex]==readEventIDArray[ii] ) {
00678                         gctrl->activeEventValues[jj] += ( long long )readEventValueBuffer[ii];
00679                         SUBDBG( "Matched read-eventID %d:%d value %ld activeEvent %d value %lld \n", jj, (int)readEventIDArray[ii], readEventValueBuffer[ii], eventIndex, gctrl->activeEventValues[jj] );
00680                         break;
00681                     }
00682                 }
00683             }
00684         }
00685         CUresult cuErr = ( *cuCtxPopCurrentPtr ) ( &tmpCtx );
00686         if ( cuErr != CUDA_SUCCESS ) PAPIERROR ( "Error popping context %d\n", cuErr );
00687         CHECK_CU_ERROR( cuErr,  "cuCtxPopCurrent" );
00688     }
00689     //SUBDBG( "Restore original context\n" );
00690     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00691     *events = gctrl->activeEventValues;
00692     return ( PAPI_OK );
00693 }

Here is the call graph for this function:

static int papicuda_reset ( hwd_context_t ctx,
hwd_control_state_t ctrl 
) [static]

Triggered by PAPI_reset() but only if the EventSet is currently running. If the eventset is not currently running, then the saved value in the EventSet is set to zero without calling this routine.

Definition at line 784 of file linux-cuda.c.

00785 {
00786     SUBDBG( "Entering\n" );
00787     ( void ) ctx;
00788     ( void ) ctrl;
00789     papicuda_control_t *gctrl = global_papicuda_control;
00790     papicuda_active_cucontext_t *currctrl;
00791     int cuContextIdx, gg, ii;
00792     CUptiResult cuptiErr;
00793     CUcontext saveCtx, tmpCtx;
00794 
00795     //SUBDBG( "Reset all active event values\n" );
00796     for ( ii=0; ii<gctrl->activeEventCount; ii++ )
00797         gctrl->activeEventValues[ii] = 0;
00798     // SUBDBG( "Save initial CUDA context and restore later\n" );
00799     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00800     // SUBDBG( "Switch to each context and reset CUDA eventgroups\n" );
00801     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00802         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00803         //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
00804         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00805         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00806             // SUBDBG( "Reset events in eventgroup\n" );
00807             cuptiErr = ( *cuptiEventGroupResetAllEventsPtr )( currctrl->eventGroup[gg] );
00808             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupResetAllEvents: Could not reset the event groups", return( PAPI_EMISC ) );
00809             SUBDBG( "For papicuda context %d on device %d event group %d was enabled and reset\n", cuContextIdx, currctrl->deviceNum, gg );
00810         }
00811         CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ),  "cuCtxPopCurrent" );
00812     }
00813     // SUBDBG( "Restore original context\n" );
00814     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00815     return ( PAPI_OK );
00816 }

static int papicuda_set_domain ( hwd_control_state_t ctrl,
int  domain 
) [static]

Definition at line 766 of file linux-cuda.c.

00767 {
00768     SUBDBG( "Entering\n" );
00769     ( void ) ctrl;
00770     if ( ( PAPI_DOM_USER & domain ) ||
00771             ( PAPI_DOM_KERNEL & domain ) ||
00772             ( PAPI_DOM_OTHER & domain ) )
00773         return ( PAPI_OK );
00774     else
00775         return ( PAPI_EINVAL );
00776     return ( PAPI_OK );
00777 }

static int papicuda_shutdown_component ( void   )  [static]

Triggered by PAPI_shutdown() and frees memory allocated in the CUDA component.

Definition at line 705 of file linux-cuda.c.

00706 {
00707     SUBDBG( "Entering\n" );
00708     papicuda_control_t *gctrl = global_papicuda_control;
00709     papicuda_context_t *gctxt = global_papicuda_context;
00710     int deviceNum, cuContextIdx;
00711     /* Free context  */
00712     if ( gctxt ) {
00713         for( deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++ ) {
00714             papicuda_device_desc_t *mydevice = &gctxt->deviceArray[deviceNum];
00715             papi_free( mydevice->domainIDArray );
00716             papi_free( mydevice->domainIDNumEvents );
00717         }
00718         papi_free( gctxt->availEventIDArray );
00719         papi_free( gctxt->availEventDeviceNum );
00720         papi_free( gctxt->availEventDesc );
00721         papi_free( gctxt->deviceArray );
00722         papi_free( gctxt );
00723         global_papicuda_context = gctxt = NULL;
00724     }
00725     /* Free control  */
00726     if ( gctrl ) {
00727         for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ )
00728             if ( gctrl->arrayOfActiveCUContexts[cuContextIdx]!=NULL )
00729                 papi_free( gctrl->arrayOfActiveCUContexts[cuContextIdx] );
00730         papi_free( gctrl );
00731         global_papicuda_control = gctrl = NULL;
00732     }
00733     // close the dynamic libraries needed by this component (opened in the init substrate call)
00734     dlclose( dl1 );
00735     dlclose( dl2 );
00736     dlclose( dl3 );
00737     return ( PAPI_OK );
00738 }

int papicuda_shutdown_thread ( hwd_context_t ctx  ) 

Called at thread shutdown. Does nothing in the CUDA component.

Definition at line 696 of file linux-cuda.c.

00697 {
00698     SUBDBG( "Entering\n" );
00699     ( void ) ctx;
00700 
00701     return ( PAPI_OK );
00702 }

static int papicuda_start ( hwd_context_t ctx,
hwd_control_state_t ctrl 
) [static]

Triggered by PAPI_start(). For CUDA component, switch to each context and start all eventgroups.

Definition at line 566 of file linux-cuda.c.

00567 {
00568     SUBDBG( "Entering\n" );
00569     ( void ) ctx;
00570     ( void ) ctrl;
00571     papicuda_control_t *gctrl = global_papicuda_control;
00572     //papicuda_context_t *gctxt = global_papicuda_context;
00573     papicuda_active_cucontext_t *currctrl;
00574     int cuContextIdx, gg, ii;
00575     CUptiResult cuptiErr;
00576     CUcontext saveCtx, tmpCtx;
00577 
00578     //SUBDBG( "Reset all active event values\n" );
00579     for ( ii=0; ii<gctrl->activeEventCount; ii++ )
00580         gctrl->activeEventValues[ii] = 0;
00581 
00582     // SUBDBG( "Switch to each context and enable CUDA eventgroups associated with that context\n" );
00583     /* Save current cuda context */
00584     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00585     /* Switch to each context and enable CUDA eventgroups */
00586     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00587         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00588         //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
00589         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00590         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00591             // SUBDBG( "Enable event group\n" );
00592             cuptiErr = ( *cuptiEventGroupEnablePtr )( currctrl->eventGroup[gg] );
00593             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupEnable: Could not enable one of the event groups", return( PAPI_EMISC ) );
00594             // SUBDBG( "Reset events in eventgroup\n" );
00595             cuptiErr = ( *cuptiEventGroupResetAllEventsPtr )( currctrl->eventGroup[gg] );
00596             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupResetAllEvents: Could not reset the event groups", return( PAPI_EMISC ) );
00597             SUBDBG( "For papicuda context %d on device %d event group %d was enabled and reset\n", cuContextIdx, currctrl->deviceNum, gg );
00598         }
00599         // SUBDBG( "Pop temp context\n" );
00600         CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ),  "cuCtxPopCurrent" );
00601     }
00602     //SUBDBG( "Restore original context\n" );
00603     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00604     return ( PAPI_OK );
00605 }

static int papicuda_stop ( hwd_context_t ctx,
hwd_control_state_t ctrl 
) [static]

Triggered by PAPI_stop()

Definition at line 608 of file linux-cuda.c.

00609 {
00610     SUBDBG( "Entering to disable all CUPTI eventgroups\n" );
00611     ( void ) ctx;
00612     ( void ) ctrl;
00613     papicuda_control_t *gctrl = global_papicuda_control;
00614     papicuda_active_cucontext_t *currctrl;
00615     int cuContextIdx, gg;
00616     CUptiResult cuptiErr;
00617     CUcontext saveCtx, tmpCtx;
00618 
00619     // SUBDBG( "Save initial CUDA context\n" );
00620     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00621     // SUBDBG( "Switch to each context and disable CUDA eventgroups\n" );
00622     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00623         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00624         //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
00625         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00626         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00627             // SUBDBG( "Disable events in eventgroup\n" );
00628             cuptiErr = ( *cuptiEventGroupDisablePtr )( currctrl->eventGroup[gg] );
00629             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupDisable: Could not disable the event groups", return( PAPI_EMISC ) );
00630             SUBDBG( "For papicuda context %d on device %d event group %d was disabled\n", cuContextIdx, currctrl->deviceNum, gg );
00631         }
00632         CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ),  "cuCtxPopCurrent" );
00633     }
00634     //SUBDBG( "Restore original context\n" );
00635     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00636     return ( PAPI_OK );
00637 }

static int papicuda_update_control_state ( hwd_control_state_t ctrl,
NativeInfo_t nativeInfo,
int  nativeCount,
hwd_context_t ctx 
) [static]

Triggered by eventset operations like add or remove. For CUDA, needs to be called multiple times from each seperate CUDA context with the events to be measured from that context. For each context, create eventgroups for the events.

Definition at line 457 of file linux-cuda.c.

00458 {
00459     /* Note: NativeInfo_t is defined in papi_internal.h */
00460     SUBDBG( "Entering with nativeCount %d\n", nativeCount );
00461     ( void ) ctx;
00462     ( void ) ctrl;
00463     papicuda_control_t *gctrl = global_papicuda_control;
00464     papicuda_context_t *gctxt = global_papicuda_context;
00465     papicuda_active_cucontext_t *currctrl;
00466     int currDeviceNum, currContextIdx, cuContextIdx;
00467     CUcontext currCuCtx;
00468     int index, ii, jj;
00469 
00470     if ( nativeCount == 0 ) {
00471         /* Does nativeCount=0 implies that the component is being reset!? */
00472         /* gctrl->activeEventCount = 0;  */
00473     } else {
00474         /* nativecount>0 so we need to process the events */
00475         // SUBDBG( "There are currently %d contexts\n", gctrl->countOfActiveCUContexts );
00476 
00477         /* Get/query some device and context specific information  */
00478         CHECK_PRINT_EVAL( ( *cudaGetDevicePtr )( &currDeviceNum )!=CUDA_SUCCESS, "cudaGetDevice: CUDA device MUST be set before adding events", return( PAPI_EMISC ) );
00479         CHECK_PRINT_EVAL( ( *cudaFreePtr )( NULL )!=CUDA_SUCCESS, "cudaFree: Failed to free in this CUDA context", return( PAPI_EMISC ) );
00480         CHECK_PRINT_EVAL( ( *cuCtxGetCurrentPtr )( &currCuCtx )!=CUDA_SUCCESS, "cuCtxGetCurrent: CUDA context MUST be initialized before adding events", return ( PAPI_EMISC ) );
00481 
00482         /* Find current context/control, creating it if does not exist */
00483         for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ )
00484             if ( gctrl->arrayOfActiveCUContexts[cuContextIdx]->context == currCuCtx ) break;
00485         CHECK_PRINT_EVAL( cuContextIdx==PAPICUDA_MAX_COUNTERS,  "Exceeded hardcoded maximum number of contexts (PAPICUDA_MAX_COUNTERS)", return( PAPI_EMISC ) );
00486         if ( cuContextIdx==gctrl->countOfActiveCUContexts ) {
00487             gctrl->arrayOfActiveCUContexts[cuContextIdx] = papi_calloc( 1, sizeof( papicuda_active_cucontext_t ) );
00488             CHECK_PRINT_EVAL( ( gctrl->arrayOfActiveCUContexts[cuContextIdx]==NULL ), "Memory allocation for new active context failed", return( PAPI_ENOMEM ) ) ;
00489             gctrl->arrayOfActiveCUContexts[cuContextIdx]->context = currCuCtx;
00490             gctrl->arrayOfActiveCUContexts[cuContextIdx]->deviceNum = currDeviceNum;
00491             gctrl->countOfActiveCUContexts++;
00492             SUBDBG( "Added a new context ... now %d\n", gctrl->countOfActiveCUContexts );
00493         }
00494         currContextIdx = cuContextIdx;
00495         currctrl = gctrl->arrayOfActiveCUContexts[currContextIdx];
00496         /* At this point, currCuCtx is at index cuContextIdx in the arrayOfActiveCUContexts array */
00497 
00498         /* For each event, check if it is already added.  If not, try to added it to the current context.
00499            Try each existing eventgroup.  If none will have this event, create a new event group.  If new event group will not have it... fail */
00500         /* For each event */
00501         for( ii = 0; ii < nativeCount; ii++ ) {
00502             index = nativeInfo[ii].ni_event; /* Get the PAPI event index from the user */
00503             /* Check to see if event is already in some context */
00504             SUBDBG( "Searching %d active events to see if event %d %s is already in some context\n", gctrl->activeEventCount, index, gctxt->availEventDesc[index].name );
00505             int eventAlreadyAdded=0;
00506             for( jj = 0; jj < gctrl->activeEventCount; jj++ ) {
00507                 if ( gctrl->activeEventIndex[jj] == index ) {
00508                     eventAlreadyAdded=1;
00509                     break;
00510                 }
00511             }
00512 
00513             /* If event was not found in any context.. try to insert it into current context */
00514             if ( !eventAlreadyAdded ) {
00515                 SUBDBG( "Need to add event %d %s to the current context\n", index, gctxt->availEventDesc[index].name );
00516                 /* Make sure that the device number for the event matches the device for this context */
00517                 CHECK_PRINT_EVAL( (currDeviceNum!=gctxt->availEventDeviceNum[index]), "Current CUDA device cannot use this event", return( PAPI_EINVAL ) );
00518                 /* if this event index corresponds to something from availEventIDArray */
00519                 if ( index < ( int )gctxt->availEventSize ) {
00520                     /* lookup cuptieventid for this event index */
00521                     CUpti_EventID cuptieventid = gctxt->availEventIDArray[index];
00522                     CUpti_EventGroup cuptieventgroup;
00523                     int addstatus=!CUPTI_SUCCESS, gg;
00524                     SUBDBG( "Event %s is going to be added to current context %d having %d eventgroups\n", gctxt->availEventDesc[index].name, currContextIdx, currctrl->numEventGroups );
00525                     /* For each existing eventgroup, try to insert this event */
00526                     for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00527                         cuptieventgroup = currctrl->eventGroup[gg];
00528                         addstatus = ( *cuptiEventGroupAddEventPtr )( cuptieventgroup, cuptieventid );
00529                         if ( addstatus==CUPTI_SUCCESS ) {
00530                             SUBDBG( "Event %s successfully added to current eventgroup %d:%d\n", gctxt->availEventDesc[index].name, currContextIdx, gg );
00531                             break;
00532                         }
00533                     }
00534                     /* If the event could not be added to any earlier eventgroup, create a new one and try again.  Fail if this does not succeed */
00535                     if ( addstatus!=CUPTI_SUCCESS ) {
00536                         //SUBDBG( "Event %s needs a new eventgroup\n", gctxt->availEventDesc[index].name );
00537                         CHECK_PRINT_EVAL( ( gg>PAPICUDA_MAX_COUNTERS-1 ), "For current CUDA device, could not add event (no more eventgroups can be added)", return( PAPI_EMISC ) );
00538                         //SUBDBG( "gg %d context %d %p\n", gg, currctrl->context, currctrl->context  );
00539                         CHECK_CUPTI_ERROR( ( *cuptiEventGroupCreatePtr )( currctrl->context, &currctrl->eventGroup[gg], 0 ), "cuptiEventGroupCreate" );
00540                         cuptieventgroup = currctrl->eventGroup[gg];
00541                         currctrl->numEventGroups++;
00542                         addstatus = ( *cuptiEventGroupAddEventPtr )( cuptieventgroup, cuptieventid );
00543                         CHECK_PRINT_EVAL( ( addstatus!=CUPTI_SUCCESS ), "cuptiEventGroupAddEvent: Could not add event (event may not match CUDA context)", return( PAPI_EMISC ) );
00544                         SUBDBG( "Event %s successfully added to new eventgroup %d:%d\n", gctxt->availEventDesc[index].name, currContextIdx, gg );
00545                     }
00546                 }
00547 
00548                 /* Record index of this active event back into the nativeInfo structure */
00549                 nativeInfo[ii].ni_position = gctrl->activeEventCount;
00550                 /* record added event at the higher level */
00551                 CHECK_PRINT_EVAL( ( gctrl->activeEventCount==PAPICUDA_MAX_COUNTERS-1 ), "Exceeded maximum num of events (PAPI_MAX_COUNTERS)", return( PAPI_EMISC ) );
00552                 gctrl->activeEventIndex[gctrl->activeEventCount] = index;
00553                 gctrl->activeEventContextIdx[gctrl->activeEventCount] = currContextIdx;
00554                 gctrl->activeEventValues[gctrl->activeEventCount] = 0;
00555                 gctrl->activeEventCount++;
00556 
00557             }
00558         }
00559     }
00560     return ( PAPI_OK );
00561 }


Variable Documentation

Vector that points to entry points for the component

Definition at line 79 of file linux-cuda.c.

void( * _dl_non_dynamic_init)(cudaError_t CUDARTAPI cudaFree void)

Definition at line 112 of file linux-cuda.c.

00184                                               { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
00185 
00186 static int papicuda_linkCudaLibraries()
00187 {
00188     /* Attempt to guess if we were statically linked to libc, if so bail */
00189     if( _dl_non_dynamic_init != NULL ) {
00190         strncpy( _cuda_vector.cmp_info.disabled_reason, "The cuda component does not support statically linking to libc.", PAPI_MAX_STR_LEN );
00191         return PAPI_ENOSUPP;
00192     }
00193     /* Need to link in the cuda libraries, if not found disable the component */
00194     dl1 = dlopen( "libcuda.so", RTLD_NOW | RTLD_GLOBAL );
00195     CHECK_DL_STATUS( !dl1 , "CUDA library libcuda.so not found." );
00196     cuCtxGetCurrentPtr = dlsym( dl1, "cuCtxGetCurrent" );
00197     CHECK_DL_STATUS( dlerror()!=NULL , "CUDA function cuCtxGetCurrent not found." );
00198     cuDeviceGetPtr = dlsym( dl1, "cuDeviceGet" );
00199     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGet not found." );
00200     cuDeviceGetCountPtr = dlsym( dl1, "cuDeviceGetCount" );
00201     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGetCount not found." );
00202     cuDeviceGetNamePtr = dlsym( dl1, "cuDeviceGetName" );
00203     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGetName not found." );
00204     cuInitPtr = dlsym( dl1, "cuInit" );
00205     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuInit not found." );
00206     cuCtxPopCurrentPtr = dlsym( dl1, "cuCtxPopCurrent" );
00207     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuCtxPopCurrent not found." );
00208     cuCtxPushCurrentPtr = dlsym( dl1, "cuCtxPushCurrent" );
00209     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuCtxPushCurrent not found." );
00210 
00211     dl2 = dlopen( "libcudart.so", RTLD_NOW | RTLD_GLOBAL );
00212     CHECK_DL_STATUS( !dl2, "CUDA runtime library libcudart.so not found." );
00213     cudaGetDevicePtr = dlsym( dl2, "cudaGetDevice" );
00214     CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaGetDevice not found." );
00215     cudaSetDevicePtr = dlsym( dl2, "cudaSetDevice" );
00216     CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaSetDevice not found." );
00217     cudaFreePtr = dlsym( dl2, "cudaFree" );
00218     CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaFree not found." );
00219 
00220     dl3 = dlopen( "libcupti.so", RTLD_NOW | RTLD_GLOBAL );
00221     CHECK_DL_STATUS( !dl3, "CUDA runtime library libcupti.so not found." );
00222     cuptiDeviceEnumEventDomainsPtr = dlsym( dl3, "cuptiDeviceEnumEventDomains" );
00223     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiDeviceEnumEventDomains not found." );
00224     cuptiDeviceGetNumEventDomainsPtr = dlsym( dl3, "cuptiDeviceGetNumEventDomains" );
00225     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiDeviceGetNumEventDomains not found." );
00226     cuptiEventDomainEnumEventsPtr = dlsym( dl3, "cuptiEventDomainEnumEvents" );
00227     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventDomainEnumEvents not found." );
00228     cuptiEventDomainGetNumEventsPtr = dlsym( dl3, "cuptiEventDomainGetNumEvents" );
00229     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventDomainGetNumEvents not found." );
00230     cuptiEventGetAttributePtr = dlsym( dl3, "cuptiEventGetAttribute" );
00231     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGetAttribute not found." );
00232     cuptiEventGroupAddEventPtr = dlsym( dl3, "cuptiEventGroupAddEvent" );
00233     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupAddEvent not found." );
00234     cuptiEventGroupCreatePtr = dlsym( dl3, "cuptiEventGroupCreate" );
00235     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupCreate not found." );
00236     cuptiEventGroupDestroyPtr = dlsym( dl3, "cuptiEventGroupDestroy" );
00237     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupDestroy not found." );
00238     cuptiEventGroupDisablePtr = dlsym( dl3, "cuptiEventGroupDisable" );
00239     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupDisable not found." );
00240     cuptiEventGroupEnablePtr = dlsym( dl3, "cuptiEventGroupEnable" );
00241     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupEnable not found." );
00242     cuptiEventGroupReadAllEventsPtr = dlsym( dl3, "cuptiEventGroupReadAllEvents" );
00243     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupReadAllEvents not found." );
00244     cuptiEventGroupResetAllEventsPtr = dlsym( dl3, "cuptiEventGroupResetAllEvents" );
00245     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupResetAllEvents not found." );
00246     return ( PAPI_OK );
00247 }

void* dl1 = NULL [static]

Definition at line 74 of file linux-cuda.c.

void* dl2 = NULL [static]

Definition at line 75 of file linux-cuda.c.

void* dl3 = NULL [static]

Definition at line 76 of file linux-cuda.c.

Definition at line 82 of file linux-cuda.c.

Definition at line 85 of file linux-cuda.c.


Generated on 26 Jan 2016 for PAPI by  doxygen 1.6.1