linux-cuda.c File Reference

This implements a PAPI component that enables PAPI-C to access hardware monitoring counters for NVIDIA CUDA GPU devices through the CUPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Data Structures

struct  papicuda_context_t
struct  papicuda_name_desc_t
struct  papicuda_device_desc_t
struct  papicuda_control_t
struct  papicuda_active_cucontext_t

Defines

#define PAPICUDA_MAX_COUNTERS   512
#define CHECK_CU_ERROR(err, cufunc)   if( (err) != CUDA_SUCCESS ) { PAPIERROR( "CUDA Driver API function failed '%s'", cufunc ); return -1; }
#define CHECK_CUPTI_ERROR(err, cuptifunc)   if( (err) != CUPTI_SUCCESS ) { PAPIERROR( "CUPTI API function failed '%s'", cuptifunc ); return -1; }
#define CHECK_PRINT_EVAL(err, str, eval)   if( (err) ) { PAPIERROR( "%s", str ); eval; }
#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define CUPTIAPI   __attribute__((weak))
#define CHECK_DL_STATUS(err, str)   if( err ) { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }

Functions

static int papicuda_list_all_events (papicuda_context_t *gctxt)
static int papicuda_init_thread (hwd_context_t *ctx)
static int papicuda_init_component (int cidx)
static int papicuda_init_control_state (hwd_control_state_t *ctrl)
static int papicuda_update_control_state (hwd_control_state_t *ctrl, NativeInfo_t *nativeInfo, int nativeCount, hwd_context_t *ctx)
static int papicuda_start (hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int papicuda_stop (hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int papicuda_read (hwd_context_t *ctx, hwd_control_state_t *ctrl, long long **events, int flags)
int papicuda_shutdown_thread (hwd_context_t *ctx)
static int papicuda_shutdown_component (void)
static int papicuda_ctrl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
static int papicuda_set_domain (hwd_control_state_t *ctrl, int domain)
static int papicuda_reset (hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int papicuda_cleanup_eventset (hwd_control_state_t *ctrl)
static int papicuda_ntv_enum_events (unsigned int *EventCode, int modifier)
static int papicuda_ntv_code_to_name (unsigned int EventCode, char *name, int len)
static int papicuda_ntv_code_to_descr (unsigned int EventCode, char *name, int len)

Variables

static void * dl1 = NULL
static void * dl2 = NULL
static void * dl3 = NULL
papi_vector_t _cuda_vector
static papicuda_context_tglobal_papicuda_context = NULL
static papicuda_control_tglobal_papicuda_control = NULL
void(* _dl_non_dynamic_init )(cudaError_t CUDARTAPI cudaFree void)

Detailed Description

Author:
Asim YarKhan yarkhan@icl.utk.edu (updated in 2015 for multiple CUDA contexts/devices)
Heike Jagode (in collaboration with Robert Dietrich, TU Dresden) jagode@eecs.utk.edu

Definition in file linux-cuda.c.


Define Documentation

#define CHECK_CU_ERROR ( err,
cufunc   )     if( (err) != CUDA_SUCCESS ) { PAPIERROR( "CUDA Driver API function failed '%s'", cufunc ); return -1; }

Definition at line 88 of file linux-cuda.c.

#define CHECK_CUPTI_ERROR ( err,
cuptifunc   )     if( (err) != CUPTI_SUCCESS ) { PAPIERROR( "CUPTI API function failed '%s'", cuptifunc ); return -1; }

Definition at line 91 of file linux-cuda.c.

#define CHECK_DL_STATUS ( err,
str   )     if( err ) { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
#define CHECK_PRINT_EVAL ( err,
str,
eval   )     if( (err) ) { PAPIERROR( "%s", str ); eval; }

Definition at line 94 of file linux-cuda.c.

#define CUDAAPI   __attribute__((weak))
#define CUDARTAPI   __attribute__((weak))
#define CUPTIAPI   __attribute__((weak))
#define PAPICUDA_MAX_COUNTERS   512

Definition at line 27 of file linux-cuda.c.


Function Documentation

static int papicuda_cleanup_eventset ( hwd_control_state_t ctrl  )  [static]

Definition at line 828 of file linux-cuda.c.

00829 {
00830     SUBDBG( "Entering\n" );
00831     ( void ) ctrl;
00832     papicuda_control_t *gctrl = global_papicuda_control;
00833     papicuda_active_cucontext_t *currctrl;
00834     int cuContextIdx, gg;
00835     CUptiResult cuptiErr;
00836     CUcontext saveCtx, tmpCtx;
00837 
00838     SUBDBG( "Switch to each context and disable CUDA eventgroups\n" );
00839     /* Save current cuda context and restore later */
00840     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00841     /* Switch to each context and enable CUDA eventgroups */
00842     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00843         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00844         /* Switch to this device / cuda context */
00845         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00846         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00847             /* Destroy the eventGroups; it also frees the perfmon hardware on the GPU */
00848             cuptiErr = ( *cuptiEventGroupDestroyPtr )( currctrl->eventGroup[gg] );
00849             CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDestroy" );
00850         }
00851         currctrl->numEventGroups = 0;
00852         CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ),  "cuCtxPopCurrent" );
00853     }
00854     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00855     /* Record that there are no active contexts or events */
00856     gctrl->activeEventCount = 0;
00857     return ( PAPI_OK );
00858 }

static int papicuda_ctrl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
) [static]

This function sets various options in the component - Does nothing in the CUDA component.

Parameters:
[in] ctx -- hardware context
[in] code valid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT
[in] option -- options to be set

Definition at line 752 of file linux-cuda.c.

00753 {
00754     SUBDBG( "Entering\n" );
00755     ( void ) ctx;
00756     ( void ) code;
00757     ( void ) option;
00758     return ( PAPI_OK );
00759 }

static int papicuda_init_component ( int  cidx  )  [static]

Initialize hardware counters, setup the function vector table and get hardware information, this routine is called when the PAPI process is initialized (IE PAPI_library_init)

Definition at line 402 of file linux-cuda.c.

00403 {
00404     SUBDBG( "Entering with cidx: %d\n", cidx );
00405     int err;
00406 
00407     /* link in all the cuda libraries and resolve the symbols we need to use */
00408     if( papicuda_linkCudaLibraries() != PAPI_OK ) {
00409         SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
00410         SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
00411         return (PAPI_ENOSUPP);
00412     }
00413 
00414     /* Create the structure */
00415     if ( !global_papicuda_context )
00416         global_papicuda_context = ( papicuda_context_t* ) papi_calloc( 1, sizeof( papicuda_context_t ) );
00417 
00418     /* Get list of all native CUDA events supported */
00419     err = papicuda_list_all_events( global_papicuda_context );
00420     if ( err!=0 ) return( err );
00421 
00422     /* Export some information */
00423     _cuda_vector.cmp_info.CmpIdx = cidx;
00424     _cuda_vector.cmp_info.num_native_events = global_papicuda_context->availEventSize;
00425     _cuda_vector.cmp_info.num_cntrs = _cuda_vector.cmp_info.num_native_events;
00426     _cuda_vector.cmp_info.num_mpx_cntrs = _cuda_vector.cmp_info.num_native_events;
00427 
00428     //SUBDBG( "Exiting PAPI_OK\n" );
00429     return ( PAPI_OK );
00430 }

Here is the call graph for this function:

static int papicuda_init_control_state ( hwd_control_state_t ctrl  )  [static]

Setup a counter control state. In general a control state holds the hardware info for an EventSet.

Definition at line 437 of file linux-cuda.c.

00438 {
00439     SUBDBG( "Entering\n" );
00440     ( void ) ctrl;
00441     papicuda_context_t *gctxt = global_papicuda_context;
00442 
00443     CHECK_PRINT_EVAL( !gctxt, "Error: The PAPI CUDA component needs to be initialized first", return( PAPI_ENOINIT ) );
00444     /* If no events were found during the initial component initialization, return error  */
00445     if( global_papicuda_context->availEventSize <= 0 ) {
00446         strncpy( _cuda_vector.cmp_info.disabled_reason, "ERROR CUDA: No events exist", PAPI_MAX_STR_LEN );
00447         return ( PAPI_EMISC );
00448     }
00449     /* If it does not exist, create the global structure to hold CUDA contexts and active events */
00450     if ( !global_papicuda_control ) {
00451         global_papicuda_control = ( papicuda_control_t* ) papi_calloc( 1, sizeof( papicuda_control_t ) );
00452         global_papicuda_control->countOfActiveCUContexts = 0;
00453         global_papicuda_control->activeEventCount = 0;
00454     }
00455     return PAPI_OK;
00456 }

static int papicuda_init_thread ( hwd_context_t ctx  )  [static]

Definition at line 381 of file linux-cuda.c.

00382 {
00383     ( void ) ctx;
00384     SUBDBG( "Entering\n" );
00385 
00386     return PAPI_OK;
00387 }

static int papicuda_list_all_events ( papicuda_context_t gctxt  )  [static]

Definition at line 250 of file linux-cuda.c.

00251 {
00252     SUBDBG( "Entering\n" );
00253     CUptiResult cuptiErr;
00254     CUresult cuErr;
00255     unsigned int deviceNum;
00256     uint32_t domainNum, eventNum;
00257     papicuda_device_desc_t *mydevice;
00258     char tmpStr[PAPI_MIN_STR_LEN];
00259     tmpStr[PAPI_MIN_STR_LEN-1]='\0';
00260     size_t tmpSizeBytes;
00261     int ii;
00262 
00263     /* How many gpgpu devices do we have? */
00264     cuErr = ( *cuDeviceGetCountPtr )( &gctxt->deviceCount );
00265     if ( cuErr==CUDA_ERROR_NOT_INITIALIZED ) {
00266         /* If CUDA not initilaized, initialized CUDA and retry the device list */
00267         /* This is required for some of the PAPI tools, that do not call the init functions */
00268         if ( (( *cuInitPtr )( 0 )) != CUDA_SUCCESS ) {
00269             strncpy( _cuda_vector.cmp_info.disabled_reason, "CUDA cannot be found and initialized (cuInit failed).", PAPI_MAX_STR_LEN );
00270             return PAPI_ENOSUPP;
00271         }
00272         cuErr = ( *cuDeviceGetCountPtr )( &gctxt->deviceCount );
00273     }
00274     CHECK_CU_ERROR( cuErr, "cuDeviceGetCount" );
00275     if ( gctxt->deviceCount==0 ) {
00276         strncpy( _cuda_vector.cmp_info.disabled_reason, "CUDA initialized but no CUDA devices found.", PAPI_MAX_STR_LEN );
00277         return PAPI_ENOSUPP;
00278     }
00279     SUBDBG( "Found %d devices\n", gctxt->deviceCount );
00280 
00281     /* allocate memory for device information */
00282     gctxt->deviceArray = ( papicuda_device_desc_t * ) papi_calloc( gctxt->deviceCount, sizeof( papicuda_device_desc_t ) );
00283     CHECK_PRINT_EVAL( !gctxt->deviceArray, "ERROR CUDA: Could not allocate memory for CUDA device structure", return( PAPI_ENOSUPP ) );
00284 
00285     /* For each device, get domains and domain-events counts */
00286     gctxt->availEventSize = 0;
00287     for( deviceNum = 0; deviceNum < ( uint )gctxt->deviceCount; deviceNum++ ) {
00288         mydevice = &gctxt->deviceArray[deviceNum];
00289         /* Get device id for each device */
00290         CHECK_CU_ERROR( ( *cuDeviceGetPtr )( &mydevice->cuDev, deviceNum ), "cuDeviceGet" );
00291         /* Get device name */
00292         CHECK_CU_ERROR( ( *cuDeviceGetNamePtr )( mydevice->deviceName, PAPI_MIN_STR_LEN-1, mydevice->cuDev ), "cuDeviceGetName" );
00293         mydevice->deviceName[PAPI_MIN_STR_LEN-1]='\0';
00294         /* Get max num domains for each device */
00295         CHECK_CUPTI_ERROR( ( *cuptiDeviceGetNumEventDomainsPtr )( mydevice->cuDev, &mydevice->maxDomains ), "cuptiDeviceGetNumEventDomains" );
00296         /* Allocate space to hold domain IDs */
00297         mydevice->domainIDArray = ( CUpti_EventDomainID * ) papi_calloc( mydevice->maxDomains, sizeof( CUpti_EventDomainID ) );
00298         CHECK_PRINT_EVAL( !mydevice->domainIDArray, "ERROR CUDA: Could not allocate memory for CUDA device domains", return( PAPI_ENOMEM ) );
00299         /* Put domain ids into allocated space */
00300         size_t domainarraysize = mydevice->maxDomains * sizeof( CUpti_EventDomainID );
00301         CHECK_CUPTI_ERROR( ( *cuptiDeviceEnumEventDomainsPtr )( mydevice->cuDev, &domainarraysize, mydevice->domainIDArray ), "cuptiDeviceEnumEventDomains" );
00302         /* Allocate space to hold domain event counts  */
00303         mydevice->domainIDNumEvents = ( uint32_t * ) papi_calloc( mydevice->maxDomains, sizeof( uint32_t ) );
00304         CHECK_PRINT_EVAL( !mydevice->domainIDNumEvents, "ERROR CUDA: Could not allocate memory for domain event counts", return( PAPI_ENOMEM ) );
00305         /* For each domain, get event counts in domainNumEvents[]  */
00306         for ( domainNum=0; domainNum < mydevice->maxDomains; domainNum++ ) {
00307             CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum];
00308             /* Get num events in domain */
00309             //SUBDBG( "Device %d:%d calling cuptiEventDomainGetNumEventsPtr with domainID %d \n", deviceNum, mydevice->cuDev, domainID );
00310             CHECK_CUPTI_ERROR(  ( *cuptiEventDomainGetNumEventsPtr ) ( domainID, &mydevice->domainIDNumEvents[domainNum] ), "cuptiEventDomainGetNumEvents" );
00311             /* Keep track of overall number of events */
00312             gctxt->availEventSize += mydevice->domainIDNumEvents[domainNum];
00313         }
00314     }
00315 
00316     /* Allocate space for all events and descriptors */
00317     gctxt->availEventIDArray = ( CUpti_EventID * ) papi_calloc( gctxt->availEventSize, sizeof( CUpti_EventID ) );
00318     CHECK_PRINT_EVAL( !gctxt->availEventIDArray, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
00319     gctxt->availEventDeviceNum = ( int * ) papi_calloc( gctxt->availEventSize, sizeof( int ) );
00320     CHECK_PRINT_EVAL( !gctxt->availEventDeviceNum, "ERROR CUDA: Could not allocate memory", return( PAPI_ENOMEM ) );
00321     gctxt->availEventDesc = ( papicuda_name_desc_t * ) papi_calloc( gctxt->availEventSize, sizeof( papicuda_name_desc_t ) );
00322     CHECK_PRINT_EVAL( !gctxt->availEventDesc, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
00323     /* Record the events and descriptions */
00324     int idxEventArray = 0;
00325     for( deviceNum = 0; deviceNum < ( uint )gctxt->deviceCount; deviceNum++ ) {
00326         mydevice = &gctxt->deviceArray[deviceNum];
00327         //SUBDBG( "For device %d %d maxdomains %d \n", deviceNum, mydevice->cuDev, mydevice->maxDomains );
00328         /* Get and store event IDs, names, descriptions into the large arrays allocated */
00329         for ( domainNum=0; domainNum < mydevice->maxDomains; domainNum++ ) {
00330             /* Get domain id */
00331             CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum];
00332             uint32_t domainNumEvents = mydevice->domainIDNumEvents[domainNum];
00333             SUBDBG( "For device %d domain %d %d numEvents %d\n", mydevice->cuDev, domainNum, domainID, domainNumEvents );
00334             /* Allocate temp space for eventIDs for this domain */
00335             CUpti_EventID *domainEventIDArray = ( CUpti_EventID * ) papi_calloc( domainNumEvents, sizeof( CUpti_EventID ) );
00336             CHECK_PRINT_EVAL( !domainEventIDArray, "ERROR CUDA: Could not allocate memory for events", return( PAPI_ENOMEM ) );
00337             /* Load the domain eventIDs in temp space */
00338             size_t domainEventArraySize = domainNumEvents * sizeof( CUpti_EventID );
00339             cuptiErr = ( *cuptiEventDomainEnumEventsPtr )  ( domainID, &domainEventArraySize, domainEventIDArray );
00340             CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventDomainEnumEvents" );
00341             /* For each event, get and store name and description */
00342             for ( eventNum=0; eventNum<domainNumEvents; eventNum++ ) {
00343                 /* Record the event IDs in native event array */
00344                 CUpti_EventID myeventID = domainEventIDArray[eventNum];
00345                 gctxt->availEventIDArray[idxEventArray] = myeventID;
00346                 gctxt->availEventDeviceNum[idxEventArray] = deviceNum;
00347                 /* Get event name */
00348                 tmpSizeBytes = PAPI_MIN_STR_LEN-1 * sizeof( char );
00349                 cuptiErr = ( *cuptiEventGetAttributePtr ) ( myeventID, CUPTI_EVENT_ATTR_NAME, &tmpSizeBytes, tmpStr ) ;
00350                 CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGetAttribute" );
00351                 /* Save a full path for the event, filling spaces with underscores */
00352                 //snprintf( gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "%s:%d:%s", mydevice->deviceName, deviceNum, tmpStr );
00353                 snprintf( gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "device:%d:%s", deviceNum, tmpStr );
00354                 gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN-1] = '\0';
00355                 char *nameTmpPtr = gctxt->availEventDesc[idxEventArray].name;
00356                 for ( ii = 0; ii < ( int )strlen( nameTmpPtr ); ii++ ) if ( nameTmpPtr[ii] == ' ' ) nameTmpPtr[ii] = '_';
00357                 /* Save description in the native event array */
00358                 tmpSizeBytes = PAPI_2MAX_STR_LEN-1 * sizeof( char );
00359                 cuptiErr = ( *cuptiEventGetAttributePtr ) ( myeventID, CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &tmpSizeBytes, gctxt->availEventDesc[idxEventArray].description );
00360                 CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGetAttribute" );
00361                 gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN-1] = '\0';
00362                 // SUBDBG( "Event ID:%d Name:%s Desc:%s\n", gctxt->availEventIDArray[idxEventArray], gctxt->availEventDesc[idxEventArray].name, gctxt->availEventDesc[idxEventArray].description );
00363                 /* Increment index past events in this domain to start of next domain */
00364                 idxEventArray++;
00365             }
00366             papi_free ( domainEventIDArray );
00367         }
00368     }
00369     /* return 0 if everything went OK */
00370     return 0;
00371 }

Here is the caller graph for this function:

static int papicuda_ntv_code_to_descr ( unsigned int  EventCode,
char *  name,
int  len 
) [static]

Takes a native event code and passes back the event description

Parameters:
EventCode is the native event code
descr is a pointer for the description to be copied to
len is the size of the descr string

Definition at line 912 of file linux-cuda.c.

00913 {
00914     //SUBDBG( "Entering\n" );
00915     unsigned int index = EventCode;
00916     papicuda_context_t *gctxt = global_papicuda_context;
00917     if ( index < gctxt->availEventSize ) {
00918         strncpy( name, gctxt->availEventDesc[index].description, len );
00919     } else {
00920         return ( PAPI_EINVAL );
00921     }
00922     return ( PAPI_OK );
00923 }

static int papicuda_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
) [static]

Takes a native event code and passes back the name

Parameters:
EventCode is the native event code
name is a pointer for the name to be copied to
len is the size of the name string

Definition at line 892 of file linux-cuda.c.

00893 {
00894     //SUBDBG( "Entering EventCode %d\n", EventCode );
00895     unsigned int index = EventCode;
00896     papicuda_context_t *gctxt = global_papicuda_context;
00897     if ( index < gctxt->availEventSize ) {
00898         strncpy( name, gctxt->availEventDesc[index].name, len );
00899     } else {
00900         return ( PAPI_EINVAL );
00901     }
00902     //SUBDBG( "EventCode %d: Exit %s\n", EventCode, name );
00903     return ( PAPI_OK );
00904 }

static int papicuda_ntv_enum_events ( unsigned int *  EventCode,
int  modifier 
) [static]

Enumerate Native Events.

Parameters:
EventCode is the event of interest
modifier is one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS

Definition at line 865 of file linux-cuda.c.

00866 {
00867     //SUBDBG( "Entering\n" );
00868     switch( modifier ) {
00869     case PAPI_ENUM_FIRST:
00870         *EventCode = 0;
00871         return ( PAPI_OK );
00872         break;
00873     case PAPI_ENUM_EVENTS:
00874         if( *EventCode < global_papicuda_context->availEventSize - 1 ) {
00875             *EventCode = *EventCode + 1;
00876             return ( PAPI_OK );
00877         } else
00878             return ( PAPI_ENOEVNT );
00879         break;
00880     default:
00881         return ( PAPI_EINVAL );
00882     }
00883     return ( PAPI_OK );
00884 }

static int papicuda_read ( hwd_context_t ctx,
hwd_control_state_t ctrl,
long long **  events,
int  flags 
) [static]

Triggered by PAPI_read(). For CUDA component, switch to each context, read all the eventgroups, and put the values in the correct places.

Definition at line 649 of file linux-cuda.c.

00650 {
00651     SUBDBG( "Entering\n" );
00652     ( void ) ctx;
00653     ( void ) ctrl;
00654     ( void ) flags;
00655     papicuda_control_t *gctrl = global_papicuda_control;
00656     papicuda_context_t *gctxt = global_papicuda_context;
00657     papicuda_active_cucontext_t *currctrl;
00658     int cuContextIdx, gg, ii, jj;
00659     CUcontext saveCtx, tmpCtx;
00660     CUptiResult cuptiErr;
00661     size_t readEventValueBufferSize = sizeof( uint64_t )*PAPICUDA_MAX_COUNTERS;
00662     uint64_t readEventValueBuffer[PAPICUDA_MAX_COUNTERS];
00663     size_t readEventIDArraySize = sizeof( CUpti_EventID )*PAPICUDA_MAX_COUNTERS;
00664     CUpti_EventID readEventIDArray[PAPICUDA_MAX_COUNTERS];
00665     size_t numEventIDsRead;
00666 
00667     SUBDBG( "Switch to each context and read CUDA eventgroups\n" );
00668     // SUBDBG( "Save initial CUDA context\n" );
00669     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00670     /* Switch to each context and enable CUDA eventgroups */
00671     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00672         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00673         // SUBDBG( "Switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
00674         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00675         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00676             // SUBDBG( "Read from context %d eventgroup %d\n", cuContextIdx, gg );
00677             cuptiErr = ( *cuptiEventGroupReadAllEventsPtr )( currctrl->eventGroup[gg], CUPTI_EVENT_READ_FLAG_NONE, &readEventValueBufferSize, readEventValueBuffer, &readEventIDArraySize, readEventIDArray, &numEventIDsRead );
00678             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupReadAllEvents: Could not read from CUPTI eventgroup", return( PAPI_EMISC ) );
00679             /* Match read values against active events by scanning activeEvents array and matching associated availEventIDs  */
00680             for( ii = 0; ii < ( int )numEventIDsRead; ii++ ) {
00681                 for( jj = 0; jj < gctrl->activeEventCount; jj++ ) {
00682                     int eventIndex = gctrl->activeEventIndex[jj];
00683                     if ( gctrl->activeEventContextIdx[jj]==cuContextIdx && gctxt->availEventIDArray[eventIndex]==readEventIDArray[ii] ) {
00684                         gctrl->activeEventValues[jj] += ( long long )readEventValueBuffer[ii];
00685                         SUBDBG( "Matched read-eventID %d:%d value %ld activeEvent %d value %lld \n", jj, (int)readEventIDArray[ii], readEventValueBuffer[ii], eventIndex, gctrl->activeEventValues[jj] );
00686                         break;
00687                     }
00688                 }
00689             }
00690         }
00691         CUresult cuErr = ( *cuCtxPopCurrentPtr ) ( &tmpCtx );
00692         if ( cuErr != CUDA_SUCCESS ) PAPIERROR ( "Error popping context %d\n", cuErr );
00693         CHECK_CU_ERROR( cuErr,  "cuCtxPopCurrent" );
00694     }
00695     //SUBDBG( "Restore original context\n" );
00696     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00697     *events = gctrl->activeEventValues;
00698     return ( PAPI_OK );
00699 }

Here is the call graph for this function:

static int papicuda_reset ( hwd_context_t ctx,
hwd_control_state_t ctrl 
) [static]

Triggered by PAPI_reset() but only if the EventSet is currently running. If the eventset is not currently running, then the saved value in the EventSet is set to zero without calling this routine.

Definition at line 790 of file linux-cuda.c.

00791 {
00792     SUBDBG( "Entering\n" );
00793     ( void ) ctx;
00794     ( void ) ctrl;
00795     papicuda_control_t *gctrl = global_papicuda_control;
00796     papicuda_active_cucontext_t *currctrl;
00797     int cuContextIdx, gg, ii;
00798     CUptiResult cuptiErr;
00799     CUcontext saveCtx, tmpCtx;
00800 
00801     //SUBDBG( "Reset all active event values\n" );
00802     for ( ii=0; ii<gctrl->activeEventCount; ii++ )
00803         gctrl->activeEventValues[ii] = 0;
00804     // SUBDBG( "Save initial CUDA context and restore later\n" );
00805     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00806     // SUBDBG( "Switch to each context and reset CUDA eventgroups\n" );
00807     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00808         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00809         //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
00810         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00811         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00812             // SUBDBG( "Reset events in eventgroup\n" );
00813             cuptiErr = ( *cuptiEventGroupResetAllEventsPtr )( currctrl->eventGroup[gg] );
00814             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupResetAllEvents: Could not reset the event groups", return( PAPI_EMISC ) );
00815             SUBDBG( "For papicuda context %d on device %d event group %d was enabled and reset\n", cuContextIdx, currctrl->deviceNum, gg );
00816         }
00817         CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ),  "cuCtxPopCurrent" );
00818     }
00819     // SUBDBG( "Restore original context\n" );
00820     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00821     return ( PAPI_OK );
00822 }

static int papicuda_set_domain ( hwd_control_state_t ctrl,
int  domain 
) [static]

Definition at line 772 of file linux-cuda.c.

00773 {
00774     SUBDBG( "Entering\n" );
00775     ( void ) ctrl;
00776     if ( ( PAPI_DOM_USER & domain ) ||
00777             ( PAPI_DOM_KERNEL & domain ) ||
00778             ( PAPI_DOM_OTHER & domain ) )
00779         return ( PAPI_OK );
00780     else
00781         return ( PAPI_EINVAL );
00782     return ( PAPI_OK );
00783 }

static int papicuda_shutdown_component ( void   )  [static]

Triggered by PAPI_shutdown() and frees memory allocated in the CUDA component.

Definition at line 711 of file linux-cuda.c.

00712 {
00713     SUBDBG( "Entering\n" );
00714     papicuda_control_t *gctrl = global_papicuda_control;
00715     papicuda_context_t *gctxt = global_papicuda_context;
00716     int deviceNum, cuContextIdx;
00717     /* Free context  */
00718     if ( gctxt ) {
00719         for( deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++ ) {
00720             papicuda_device_desc_t *mydevice = &gctxt->deviceArray[deviceNum];
00721             papi_free( mydevice->domainIDArray );
00722             papi_free( mydevice->domainIDNumEvents );
00723         }
00724         papi_free( gctxt->availEventIDArray );
00725         papi_free( gctxt->availEventDeviceNum );
00726         papi_free( gctxt->availEventDesc );
00727         papi_free( gctxt->deviceArray );
00728         papi_free( gctxt );
00729         global_papicuda_context = gctxt = NULL;
00730     }
00731     /* Free control  */
00732     if ( gctrl ) {
00733         for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ )
00734             if ( gctrl->arrayOfActiveCUContexts[cuContextIdx]!=NULL )
00735                 papi_free( gctrl->arrayOfActiveCUContexts[cuContextIdx] );
00736         papi_free( gctrl );
00737         global_papicuda_control = gctrl = NULL;
00738     }
00739     // close the dynamic libraries needed by this component (opened in the init substrate call)
00740     dlclose( dl1 );
00741     dlclose( dl2 );
00742     dlclose( dl3 );
00743     return ( PAPI_OK );
00744 }

int papicuda_shutdown_thread ( hwd_context_t ctx  ) 

Called at thread shutdown. Does nothing in the CUDA component.

Definition at line 702 of file linux-cuda.c.

00703 {
00704     SUBDBG( "Entering\n" );
00705     ( void ) ctx;
00706 
00707     return ( PAPI_OK );
00708 }

static int papicuda_start ( hwd_context_t ctx,
hwd_control_state_t ctrl 
) [static]

Triggered by PAPI_start(). For CUDA component, switch to each context and start all eventgroups.

Definition at line 572 of file linux-cuda.c.

00573 {
00574     SUBDBG( "Entering\n" );
00575     ( void ) ctx;
00576     ( void ) ctrl;
00577     papicuda_control_t *gctrl = global_papicuda_control;
00578     //papicuda_context_t *gctxt = global_papicuda_context;
00579     papicuda_active_cucontext_t *currctrl;
00580     int cuContextIdx, gg, ii;
00581     CUptiResult cuptiErr;
00582     CUcontext saveCtx, tmpCtx;
00583 
00584     //SUBDBG( "Reset all active event values\n" );
00585     for ( ii=0; ii<gctrl->activeEventCount; ii++ )
00586         gctrl->activeEventValues[ii] = 0;
00587 
00588     // SUBDBG( "Switch to each context and enable CUDA eventgroups associated with that context\n" );
00589     /* Save current cuda context */
00590     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00591     /* Switch to each context and enable CUDA eventgroups */
00592     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00593         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00594         //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
00595         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00596         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00597             // SUBDBG( "Enable event group\n" );
00598             cuptiErr = ( *cuptiEventGroupEnablePtr )( currctrl->eventGroup[gg] );
00599             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupEnable: Could not enable one of the event groups", return( PAPI_EMISC ) );
00600             // SUBDBG( "Reset events in eventgroup\n" );
00601             cuptiErr = ( *cuptiEventGroupResetAllEventsPtr )( currctrl->eventGroup[gg] );
00602             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupResetAllEvents: Could not reset the event groups", return( PAPI_EMISC ) );
00603             SUBDBG( "For papicuda context %d on device %d event group %d was enabled and reset\n", cuContextIdx, currctrl->deviceNum, gg );
00604         }
00605         // SUBDBG( "Pop temp context\n" );
00606         CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ),  "cuCtxPopCurrent" );
00607     }
00608     //SUBDBG( "Restore original context\n" );
00609     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00610     return ( PAPI_OK );
00611 }

static int papicuda_stop ( hwd_context_t ctx,
hwd_control_state_t ctrl 
) [static]

Triggered by PAPI_stop()

Definition at line 614 of file linux-cuda.c.

00615 {
00616     SUBDBG( "Entering to disable all CUPTI eventgroups\n" );
00617     ( void ) ctx;
00618     ( void ) ctrl;
00619     papicuda_control_t *gctrl = global_papicuda_control;
00620     papicuda_active_cucontext_t *currctrl;
00621     int cuContextIdx, gg;
00622     CUptiResult cuptiErr;
00623     CUcontext saveCtx, tmpCtx;
00624 
00625     // SUBDBG( "Save initial CUDA context\n" );
00626     CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &saveCtx ), "cuCtxPopCurrent" );
00627     // SUBDBG( "Switch to each context and disable CUDA eventgroups\n" );
00628     for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ ) {
00629         currctrl = gctrl->arrayOfActiveCUContexts[cuContextIdx];
00630         //SUBDBG( "Try to switch to context %d associated with device %d\n", cuContextIdx, currctrl->deviceNum );
00631         CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( currctrl->context ),  "cuCtxPushCurrent" );
00632         for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00633             // SUBDBG( "Disable events in eventgroup\n" );
00634             cuptiErr = ( *cuptiEventGroupDisablePtr )( currctrl->eventGroup[gg] );
00635             CHECK_PRINT_EVAL( ( cuptiErr!=CUPTI_SUCCESS ), "cuptiEventGroupDisable: Could not disable the event groups", return( PAPI_EMISC ) );
00636             SUBDBG( "For papicuda context %d on device %d event group %d was disabled\n", cuContextIdx, currctrl->deviceNum, gg );
00637         }
00638         CHECK_CU_ERROR( ( *cuCtxPopCurrentPtr ) ( &tmpCtx ),  "cuCtxPopCurrent" );
00639     }
00640     //SUBDBG( "Restore original context\n" );
00641     CHECK_CU_ERROR( ( *cuCtxPushCurrentPtr ) ( saveCtx ),  "cuCtxPushCurrent" );
00642     return ( PAPI_OK );
00643 }

static int papicuda_update_control_state ( hwd_control_state_t ctrl,
NativeInfo_t nativeInfo,
int  nativeCount,
hwd_context_t ctx 
) [static]

Triggered by eventset operations like add or remove. For CUDA, needs to be called multiple times from each seperate CUDA context with the events to be measured from that context. For each context, create eventgroups for the events.

Definition at line 463 of file linux-cuda.c.

00464 {
00465     /* Note: NativeInfo_t is defined in papi_internal.h */
00466     SUBDBG( "Entering with nativeCount %d\n", nativeCount );
00467     ( void ) ctx;
00468     ( void ) ctrl;
00469     papicuda_control_t *gctrl = global_papicuda_control;
00470     papicuda_context_t *gctxt = global_papicuda_context;
00471     papicuda_active_cucontext_t *currctrl;
00472     int currDeviceNum, currContextIdx, cuContextIdx;
00473     CUcontext currCuCtx;
00474     int index, ii, jj;
00475 
00476     if ( nativeCount == 0 ) {
00477         /* Does nativeCount=0 implies that the component is being reset!? */
00478         /* gctrl->activeEventCount = 0;  */
00479     } else {
00480         /* nativecount>0 so we need to process the events */
00481         // SUBDBG( "There are currently %d contexts\n", gctrl->countOfActiveCUContexts );
00482 
00483         /* Get/query some device and context specific information  */
00484         CHECK_PRINT_EVAL( ( *cudaGetDevicePtr )( &currDeviceNum )!=cudaSuccess, "cudaGetDevice: CUDA device MUST be set before adding events", return( PAPI_EMISC ) );
00485         CHECK_PRINT_EVAL( ( *cudaFreePtr )( NULL )!=cudaSuccess, "cudaFree: Failed to free in this CUDA context", return( PAPI_EMISC ) );
00486         CHECK_PRINT_EVAL( ( *cuCtxGetCurrentPtr )( &currCuCtx )!=CUDA_SUCCESS, "cuCtxGetCurrent: CUDA context MUST be initialized before adding events", return ( PAPI_EMISC ) );
00487 
00488         /* Find current context/control, creating it if does not exist */
00489         for ( cuContextIdx=0; cuContextIdx<gctrl->countOfActiveCUContexts; cuContextIdx++ )
00490             if ( gctrl->arrayOfActiveCUContexts[cuContextIdx]->context == currCuCtx ) break;
00491         CHECK_PRINT_EVAL( cuContextIdx==PAPICUDA_MAX_COUNTERS,  "Exceeded hardcoded maximum number of contexts (PAPICUDA_MAX_COUNTERS)", return( PAPI_EMISC ) );
00492         if ( cuContextIdx==gctrl->countOfActiveCUContexts ) {
00493             gctrl->arrayOfActiveCUContexts[cuContextIdx] = papi_calloc( 1, sizeof( papicuda_active_cucontext_t ) );
00494             CHECK_PRINT_EVAL( ( gctrl->arrayOfActiveCUContexts[cuContextIdx]==NULL ), "Memory allocation for new active context failed", return( PAPI_ENOMEM ) ) ;
00495             gctrl->arrayOfActiveCUContexts[cuContextIdx]->context = currCuCtx;
00496             gctrl->arrayOfActiveCUContexts[cuContextIdx]->deviceNum = currDeviceNum;
00497             gctrl->countOfActiveCUContexts++;
00498             SUBDBG( "Added a new context ... now %d\n", gctrl->countOfActiveCUContexts );
00499         }
00500         currContextIdx = cuContextIdx;
00501         currctrl = gctrl->arrayOfActiveCUContexts[currContextIdx];
00502         /* At this point, currCuCtx is at index cuContextIdx in the arrayOfActiveCUContexts array */
00503 
00504         /* For each event, check if it is already added.  If not, try to added it to the current context.
00505            Try each existing eventgroup.  If none will have this event, create a new event group.  If new event group will not have it... fail */
00506         /* For each event */
00507         for( ii = 0; ii < nativeCount; ii++ ) {
00508             index = nativeInfo[ii].ni_event; /* Get the PAPI event index from the user */
00509             /* Check to see if event is already in some context */
00510             SUBDBG( "Searching %d active events to see if event %d %s is already in some context\n", gctrl->activeEventCount, index, gctxt->availEventDesc[index].name );
00511             int eventAlreadyAdded=0;
00512             for( jj = 0; jj < gctrl->activeEventCount; jj++ ) {
00513                 if ( gctrl->activeEventIndex[jj] == index ) {
00514                     eventAlreadyAdded=1;
00515                     break;
00516                 }
00517             }
00518 
00519             /* If event was not found in any context.. try to insert it into current context */
00520             if ( !eventAlreadyAdded ) {
00521                 SUBDBG( "Need to add event %d %s to the current context\n", index, gctxt->availEventDesc[index].name );
00522                 /* Make sure that the device number for the event matches the device for this context */
00523                 CHECK_PRINT_EVAL( (currDeviceNum!=gctxt->availEventDeviceNum[index]), "Current CUDA device cannot use this event", return( PAPI_EINVAL ) );
00524                 /* if this event index corresponds to something from availEventIDArray */
00525                 if ( index < ( int )gctxt->availEventSize ) {
00526                     /* lookup cuptieventid for this event index */
00527                     CUpti_EventID cuptieventid = gctxt->availEventIDArray[index];
00528                     CUpti_EventGroup cuptieventgroup;
00529                     int addstatus=!CUPTI_SUCCESS, gg;
00530                     SUBDBG( "Event %s is going to be added to current context %d having %d eventgroups\n", gctxt->availEventDesc[index].name, currContextIdx, currctrl->numEventGroups );
00531                     /* For each existing eventgroup, try to insert this event */
00532                     for ( gg=0; gg<currctrl->numEventGroups; gg++ ) {
00533                         cuptieventgroup = currctrl->eventGroup[gg];
00534                         addstatus = ( *cuptiEventGroupAddEventPtr )( cuptieventgroup, cuptieventid );
00535                         if ( addstatus==CUPTI_SUCCESS ) {
00536                             SUBDBG( "Event %s successfully added to current eventgroup %d:%d\n", gctxt->availEventDesc[index].name, currContextIdx, gg );
00537                             break;
00538                         }
00539                     }
00540                     /* If the event could not be added to any earlier eventgroup, create a new one and try again.  Fail if this does not succeed */
00541                     if ( addstatus!=CUPTI_SUCCESS ) {
00542                         //SUBDBG( "Event %s needs a new eventgroup\n", gctxt->availEventDesc[index].name );
00543                         CHECK_PRINT_EVAL( ( gg>PAPICUDA_MAX_COUNTERS-1 ), "For current CUDA device, could not add event (no more eventgroups can be added)", return( PAPI_EMISC ) );
00544                         //SUBDBG( "gg %d context %d %p\n", gg, currctrl->context, currctrl->context  );
00545                         CHECK_CUPTI_ERROR( ( *cuptiEventGroupCreatePtr )( currctrl->context, &currctrl->eventGroup[gg], 0 ), "cuptiEventGroupCreate" );
00546                         cuptieventgroup = currctrl->eventGroup[gg];
00547                         currctrl->numEventGroups++;
00548                         addstatus = ( *cuptiEventGroupAddEventPtr )( cuptieventgroup, cuptieventid );
00549                         CHECK_PRINT_EVAL( ( addstatus!=CUPTI_SUCCESS ), "cuptiEventGroupAddEvent: Could not add event (event may not match CUDA context)", return( PAPI_EMISC ) );
00550                         SUBDBG( "Event %s successfully added to new eventgroup %d:%d\n", gctxt->availEventDesc[index].name, currContextIdx, gg );
00551                     }
00552                 }
00553 
00554                 /* Record index of this active event back into the nativeInfo structure */
00555                 nativeInfo[ii].ni_position = gctrl->activeEventCount;
00556                 /* record added event at the higher level */
00557                 CHECK_PRINT_EVAL( ( gctrl->activeEventCount==PAPICUDA_MAX_COUNTERS-1 ), "Exceeded maximum num of events (PAPI_MAX_COUNTERS)", return( PAPI_EMISC ) );
00558                 gctrl->activeEventIndex[gctrl->activeEventCount] = index;
00559                 gctrl->activeEventContextIdx[gctrl->activeEventCount] = currContextIdx;
00560                 gctrl->activeEventValues[gctrl->activeEventCount] = 0;
00561                 gctrl->activeEventCount++;
00562 
00563             }
00564         }
00565     }
00566     return ( PAPI_OK );
00567 }


Variable Documentation

Vector that points to entry points for the component

Definition at line 79 of file linux-cuda.c.

void( * _dl_non_dynamic_init)(cudaError_t CUDARTAPI cudaFree void)

Definition at line 112 of file linux-cuda.c.

00184                                               { strncpy( _cuda_vector.cmp_info.disabled_reason, str, PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
00185 
00186 static int papicuda_linkCudaLibraries()
00187 {
00188     /* Attempt to guess if we were statically linked to libc, if so bail */
00189     if( _dl_non_dynamic_init != NULL ) {
00190         strncpy( _cuda_vector.cmp_info.disabled_reason, "The cuda component does not support statically linking to libc.", PAPI_MAX_STR_LEN );
00191         return PAPI_ENOSUPP;
00192     }
00193     /* Need to link in the cuda libraries, if not found disable the component */
00194     dl1 = dlopen( "libcuda.so", RTLD_NOW | RTLD_GLOBAL );
00195     CHECK_DL_STATUS( !dl1 , "CUDA library libcuda.so not found." );
00196     cuCtxGetCurrentPtr = dlsym( dl1, "cuCtxGetCurrent" );
00197     CHECK_DL_STATUS( dlerror()!=NULL , "CUDA function cuCtxGetCurrent not found." );
00198     cuDeviceGetPtr = dlsym( dl1, "cuDeviceGet" );
00199     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGet not found." );
00200     cuDeviceGetCountPtr = dlsym( dl1, "cuDeviceGetCount" );
00201     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGetCount not found." );
00202     cuDeviceGetNamePtr = dlsym( dl1, "cuDeviceGetName" );
00203     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuDeviceGetName not found." );
00204     cuInitPtr = dlsym( dl1, "cuInit" );
00205     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuInit not found." );
00206     cuCtxPopCurrentPtr = dlsym( dl1, "cuCtxPopCurrent" );
00207     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuCtxPopCurrent not found." );
00208     cuCtxPushCurrentPtr = dlsym( dl1, "cuCtxPushCurrent" );
00209     CHECK_DL_STATUS( dlerror()!=NULL, "CUDA function cuCtxPushCurrent not found." );
00210 
00211     dl2 = dlopen( "libcudart.so", RTLD_NOW | RTLD_GLOBAL );
00212     CHECK_DL_STATUS( !dl2, "CUDA runtime library libcudart.so not found." );
00213     cudaGetDevicePtr = dlsym( dl2, "cudaGetDevice" );
00214     CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaGetDevice not found." );
00215     cudaSetDevicePtr = dlsym( dl2, "cudaSetDevice" );
00216     CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaSetDevice not found." );
00217     cudaFreePtr = dlsym( dl2, "cudaFree" );
00218     CHECK_DL_STATUS( dlerror()!=NULL, "CUDART function cudaFree not found." );
00219 
00220     dl3 = dlopen( "libcupti.so", RTLD_NOW | RTLD_GLOBAL );
00221     CHECK_DL_STATUS( !dl3, "CUDA runtime library libcupti.so not found." );
00222     cuptiDeviceEnumEventDomainsPtr = dlsym( dl3, "cuptiDeviceEnumEventDomains" );
00223     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiDeviceEnumEventDomains not found." );
00224     cuptiDeviceGetNumEventDomainsPtr = dlsym( dl3, "cuptiDeviceGetNumEventDomains" );
00225     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiDeviceGetNumEventDomains not found." );
00226     cuptiEventDomainEnumEventsPtr = dlsym( dl3, "cuptiEventDomainEnumEvents" );
00227     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventDomainEnumEvents not found." );
00228     cuptiEventDomainGetNumEventsPtr = dlsym( dl3, "cuptiEventDomainGetNumEvents" );
00229     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventDomainGetNumEvents not found." );
00230     cuptiEventGetAttributePtr = dlsym( dl3, "cuptiEventGetAttribute" );
00231     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGetAttribute not found." );
00232     cuptiEventGroupAddEventPtr = dlsym( dl3, "cuptiEventGroupAddEvent" );
00233     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupAddEvent not found." );
00234     cuptiEventGroupCreatePtr = dlsym( dl3, "cuptiEventGroupCreate" );
00235     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupCreate not found." );
00236     cuptiEventGroupDestroyPtr = dlsym( dl3, "cuptiEventGroupDestroy" );
00237     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupDestroy not found." );
00238     cuptiEventGroupDisablePtr = dlsym( dl3, "cuptiEventGroupDisable" );
00239     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupDisable not found." );
00240     cuptiEventGroupEnablePtr = dlsym( dl3, "cuptiEventGroupEnable" );
00241     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupEnable not found." );
00242     cuptiEventGroupReadAllEventsPtr = dlsym( dl3, "cuptiEventGroupReadAllEvents" );
00243     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupReadAllEvents not found." );
00244     cuptiEventGroupResetAllEventsPtr = dlsym( dl3, "cuptiEventGroupResetAllEvents" );
00245     CHECK_DL_STATUS( dlerror()!=NULL, "CUPTI function cuptiEventGroupResetAllEvents not found." );
00246     return ( PAPI_OK );
00247 }

void* dl1 = NULL [static]

Definition at line 74 of file linux-cuda.c.

void* dl2 = NULL [static]

Definition at line 75 of file linux-cuda.c.

void* dl3 = NULL [static]

Definition at line 76 of file linux-cuda.c.

Definition at line 82 of file linux-cuda.c.

Definition at line 85 of file linux-cuda.c.


Generated on 17 Nov 2016 for PAPI by  doxygen 1.6.1