PAPI  5.6.0.0
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
linux-cuda.c File Reference

This implements a PAPI component that enables PAPI-C to access hardware monitoring counters for NVIDIA CUDA GPU devices through the CUPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Data Structures

struct  papicuda_context_t
 
struct  papicuda_name_desc_t
 
struct  papicuda_device_desc_t
 
struct  papicuda_control_t
 
struct  papicuda_active_cucontext_t
 

Macros

#define PAPICUDA_MAX_COUNTERS   512
 
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
 
#define CUDA_CALL(call, handleerror)
 
#define CU_CALL(call, handleerror)
 
#define CUPTI_CALL(call, handleerror)
 
#define BUF_SIZE   (32 * 1024)
 
#define ALIGN_SIZE   (8)
 
#define ALIGN_BUFFER(buffer, align)   (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer))
 
#define CUAPIWEAK   __attribute__( ( weak ) )
 
#define DECLARECUFUNC(funcname, funcsig)   CUresult CUAPIWEAK funcname funcsig; CUresult( *funcname##Ptr ) funcsig;
 
#define CUDAAPIWEAK   __attribute__( ( weak ) )
 
#define DECLARECUDAFUNC(funcname, funcsig)   cudaError_t CUDAAPIWEAK funcname funcsig; cudaError_t( *funcname##Ptr ) funcsig;
 
#define CUPTIAPIWEAK   __attribute__( ( weak ) )
 
#define DECLARECUPTIFUNC(funcname, funcsig)   CUptiResult CUPTIAPIWEAK funcname funcsig; CUptiResult( *funcname##Ptr ) funcsig;
 
#define DLSYM_AND_CHECK(dllib, name)   dlsym( dllib, name ); if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
 

Functions

static int papicuda_cleanup_eventset (hwd_control_state_t *ctrl)
 
static int papicuda_add_native_events (papicuda_context_t *gctxt)
 
static int papicuda_convert_metric_value_to_long_long (CUpti_MetricValue metricValue, CUpti_MetricValueKind valueKind, long long int *papiValue)
 
static int papicuda_init_thread (hwd_context_t *ctx)
 
static int papicuda_init_component (int cidx)
 
static int papicuda_init_control_state (hwd_control_state_t *ctrl)
 
static int papicuda_update_control_state (hwd_control_state_t *ctrl, NativeInfo_t *nativeInfo, int nativeCount, hwd_context_t *ctx)
 
static int papicuda_start (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
static int papicuda_read (hwd_context_t *ctx, hwd_control_state_t *ctrl, long long **values, int flags)
 
static int papicuda_stop (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
int papicuda_shutdown_thread (hwd_context_t *ctx)
 
static int papicuda_shutdown_component (void)
 
static int papicuda_reset (hwd_context_t *ctx, hwd_control_state_t *ctrl)
 
static int papicuda_ctrl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
 
static int papicuda_set_domain (hwd_control_state_t *ctrl, int domain)
 
static int papicuda_ntv_enum_events (unsigned int *EventCode, int modifier)
 
static int papicuda_ntv_code_to_name (unsigned int EventCode, char *name, int len)
 
static int papicuda_ntv_code_to_descr (unsigned int EventCode, char *name, int len)
 

Variables

static voiddl1 = NULL
 
static voiddl2 = NULL
 
static voiddl3 = NULL
 
papi_vector_t _cuda_vector
 
static papicuda_context_tglobal_papicuda_context = NULL
 
static papicuda_control_tglobal_papicuda_control = NULL
 
void(* _dl_non_dynamic_init )(void)
 

Detailed Description

Author
Asim YarKhan yarkh.nosp@m.an@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u (updated in 2017 to support CUDA metrics)
Asim YarKhan yarkh.nosp@m.an@i.nosp@m.cl.ut.nosp@m.k.ed.nosp@m.u (updated in 2015 for multiple CUDA contexts/devices)
Heike Jagode (First version, in collaboration with Robert Dietrich, TU Dresden) jagod.nosp@m.e@ic.nosp@m.l.utk.nosp@m..edu

The open source software license for PAPI conforms to the BSD License template.

Definition in file linux-cuda.c.

Macro Definition Documentation

#define ALIGN_BUFFER (   buffer,
  align 
)    (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer))

Definition at line 139 of file linux-cuda.c.

#define ALIGN_SIZE   (8)

Definition at line 138 of file linux-cuda.c.

#define BUF_SIZE   (32 * 1024)

Definition at line 137 of file linux-cuda.c.

#define CHECK_PRINT_EVAL (   checkcond,
  str,
  evalthis 
)
Value:
do { \
int _cond = (checkcond); \
if (_cond) { \
SUBDBG("error: condition %s failed: %s.\n", #checkcond, str); \
evalthis; \
} \
} while (0)
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
int
Definition: iozone.c:18528
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")

Definition at line 98 of file linux-cuda.c.

#define CU_CALL (   call,
  handleerror 
)
Value:
do { \
CUresult _status = (call); \
if (_status != CUDA_SUCCESS) { \
SUBDBG("error: function %s failed with error %d.\n", #call, _status); \
handleerror; \
} \
} while (0)
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")

Definition at line 116 of file linux-cuda.c.

#define CUAPIWEAK   __attribute__( ( weak ) )
#define CUDA_CALL (   call,
  handleerror 
)
Value:
do { \
cudaError_t _status = (call); \
if (_status != cudaSuccess) { \
SUBDBG("error: function %s failed with error %d.\n", #call, _status); \
handleerror; \
} \
} while (0)
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")

Definition at line 107 of file linux-cuda.c.

#define CUDAAPIWEAK   __attribute__( ( weak ) )
#define CUPTI_CALL (   call,
  handleerror 
)
Value:
do { \
CUptiResult _status = (call); \
if (_status != CUPTI_SUCCESS) { \
const char *errstr; \
(*cuptiGetResultStringPtr)(_status, &errstr); \
SUBDBG("error: function %s failed with error %s.\n", #call, errstr); \
handleerror; \
} \
} while (0)
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
if(gettimeofday(&tp,(struct timezone *) NULL)==-1) perror("gettimeofday")

Definition at line 126 of file linux-cuda.c.

#define CUPTIAPIWEAK   __attribute__( ( weak ) )
#define DECLARECUDAFUNC (   funcname,
  funcsig 
)    cudaError_t CUDAAPIWEAK funcname funcsig; cudaError_t( *funcname##Ptr ) funcsig;
#define DECLARECUFUNC (   funcname,
  funcsig 
)    CUresult CUAPIWEAK funcname funcsig; CUresult( *funcname##Ptr ) funcsig;
#define DECLARECUPTIFUNC (   funcname,
  funcsig 
)    CUptiResult CUPTIAPIWEAK funcname funcsig; CUptiResult( *funcname##Ptr ) funcsig;
#define DLSYM_AND_CHECK (   dllib,
  name 
)    dlsym( dllib, name ); if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
#define PAPICUDA_MAX_COUNTERS   512

Definition at line 27 of file linux-cuda.c.

Function Documentation

static int papicuda_add_native_events ( papicuda_context_t gctxt)
static

Definition at line 297 of file linux-cuda.c.

298 {
299  SUBDBG("Entering\n");
300  CUresult cuErr;
301  int deviceNum;
302  uint32_t domainNum, eventNum;
303  papicuda_device_desc_t *mydevice;
304  char tmpStr[PAPI_MIN_STR_LEN];
305  tmpStr[PAPI_MIN_STR_LEN - 1] = '\0';
306  size_t tmpSizeBytes;
307  int ii;
308  uint32_t maxEventSize;
309 
310  /* How many CUDA devices do we have? */
311  cuErr = (*cuDeviceGetCountPtr) (&gctxt->deviceCount);
312  if(cuErr == CUDA_ERROR_NOT_INITIALIZED) {
313  /* If CUDA not initilaized, initialized CUDA and retry the device list */
314  /* This is required for some of the PAPI tools, that do not call the init functions */
315  if(((*cuInitPtr) (0)) != CUDA_SUCCESS) {
316  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA cannot be found and initialized (cuInit failed).", PAPI_MAX_STR_LEN);
317  return PAPI_ENOSUPP;
318  }
319  CU_CALL((*cuDeviceGetCountPtr) (&gctxt->deviceCount), return (PAPI_EMISC));
320  }
321 
322  if(gctxt->deviceCount == 0) {
323  strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA initialized but no CUDA devices found.", PAPI_MAX_STR_LEN);
324  return PAPI_ENOSUPP;
325  }
326  SUBDBG("Found %d devices\n", gctxt->deviceCount);
327 
328  /* allocate memory for device information */
330  CHECK_PRINT_EVAL(!gctxt->deviceArray, "ERROR CUDA: Could not allocate memory for CUDA device structure", return (PAPI_ENOMEM));
331 
332  /* For each device, get domains and domain-events counts */
333  maxEventSize = 0;
334  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) {
335  mydevice = &gctxt->deviceArray[deviceNum];
336  /* Get device id, name, numeventdomains for each device */
337  CU_CALL((*cuDeviceGetPtr) (&mydevice->cuDev, deviceNum), return (PAPI_EMISC));
338  CU_CALL((*cuDeviceGetNamePtr) (mydevice->deviceName, PAPI_MIN_STR_LEN - 1, mydevice->cuDev), return (PAPI_EMISC));
339  mydevice->deviceName[PAPI_MIN_STR_LEN - 1] = '\0';
340  CUPTI_CALL((*cuptiDeviceGetNumEventDomainsPtr) (mydevice->cuDev, &mydevice->maxDomains), return (PAPI_EMISC));
341  /* Allocate space to hold domain IDs */
342  mydevice->domainIDArray = (CUpti_EventDomainID *) papi_calloc(mydevice->maxDomains, sizeof(CUpti_EventDomainID));
343  CHECK_PRINT_EVAL(!mydevice->domainIDArray, "ERROR CUDA: Could not allocate memory for CUDA device domains", return (PAPI_ENOMEM));
344  /* Put domain ids into allocated space */
345  size_t domainarraysize = mydevice->maxDomains * sizeof(CUpti_EventDomainID);
346  CUPTI_CALL((*cuptiDeviceEnumEventDomainsPtr) (mydevice->cuDev, &domainarraysize, mydevice->domainIDArray), return (PAPI_EMISC));
347  /* Allocate space to hold domain event counts */
348  mydevice->domainIDNumEvents = (uint32_t *) papi_calloc(mydevice->maxDomains, sizeof(uint32_t));
349  CHECK_PRINT_EVAL(!mydevice->domainIDNumEvents, "ERROR CUDA: Could not allocate memory for domain event counts", return (PAPI_ENOMEM));
350  /* For each domain, get event counts in domainNumEvents[] */
351  for(domainNum = 0; domainNum < mydevice->maxDomains; domainNum++) {
352  CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum];
353  /* Get num events in domain */
354  // SUBDBG( "Device %d:%d calling cuptiEventDomainGetNumEventsPtr with domainID %d \n", deviceNum, mydevice->cuDev, domainID );
355  CUPTI_CALL((*cuptiEventDomainGetNumEventsPtr) (domainID, &mydevice->domainIDNumEvents[domainNum]), return (PAPI_EMISC));
356  /* Keep track of overall number of events */
357  maxEventSize += mydevice->domainIDNumEvents[domainNum];
358  }
359  }
360 
361  /* Create space for metrics */
362  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) {
363  uint32_t maxMetrics;
364  mydevice = &gctxt->deviceArray[deviceNum];
365  // CUPTI_CALL((*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics), return (PAPI_EMISC));
366  if ( (*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics) != CUPTI_SUCCESS )
367  maxMetrics = 0;
368  maxEventSize += maxMetrics;
369  }
370 
371  /* Allocate space for all events and descriptors */
372  gctxt->availEventKind = (CUpti_ActivityKind *) papi_calloc(maxEventSize, sizeof(CUpti_ActivityKind));
373  CHECK_PRINT_EVAL(!gctxt->availEventKind, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
374  gctxt->availEventDeviceNum = (int *) papi_calloc(maxEventSize, sizeof(int));
375  CHECK_PRINT_EVAL(!gctxt->availEventDeviceNum, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
376  gctxt->availEventIDArray = (CUpti_EventID *) papi_calloc(maxEventSize, sizeof(CUpti_EventID));
377  CHECK_PRINT_EVAL(!gctxt->availEventIDArray, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
378  gctxt->availEventIsBeingMeasuredInEventset = (uint32_t *) papi_calloc(maxEventSize, sizeof(uint32_t));
379  CHECK_PRINT_EVAL(!gctxt->availEventIsBeingMeasuredInEventset, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
380  gctxt->availEventDesc = (papicuda_name_desc_t *) papi_calloc(maxEventSize, sizeof(papicuda_name_desc_t));
381  CHECK_PRINT_EVAL(!gctxt->availEventDesc, "ERROR CUDA: Could not allocate memory", return (PAPI_ENOMEM));
382 
383  /* Record the events and descriptions */
384  uint32_t idxEventArray = 0;
385  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) {
386  mydevice = &gctxt->deviceArray[deviceNum];
387  // SUBDBG( "For device %d %d maxdomains %d \n", deviceNum, mydevice->cuDev, mydevice->maxDomains );
388  /* Get and store event IDs, names, descriptions into the large arrays allocated */
389  for(domainNum = 0; domainNum < mydevice->maxDomains; domainNum++) {
390  /* Get domain id */
391  CUpti_EventDomainID domainID = mydevice->domainIDArray[domainNum];
392  uint32_t domainNumEvents = mydevice->domainIDNumEvents[domainNum];
393  // SUBDBG( "For device %d domain %d domainID %d numEvents %d\n", mydevice->cuDev, domainNum, domainID, domainNumEvents );
394  /* Allocate temp space for eventIDs for this domain */
395  CUpti_EventID *domainEventIDArray = (CUpti_EventID *) papi_calloc(domainNumEvents, sizeof(CUpti_EventID));
396  CHECK_PRINT_EVAL(!domainEventIDArray, "ERROR CUDA: Could not allocate memory for events", return (PAPI_ENOMEM));
397  /* Load the domain eventIDs in temp space */
398  size_t domainEventArraySize = domainNumEvents * sizeof(CUpti_EventID);
399  CUPTI_CALL((*cuptiEventDomainEnumEventsPtr) (domainID, &domainEventArraySize, domainEventIDArray), return (PAPI_EMISC));
400  /* For each event, get and store name and description */
401  for(eventNum = 0; eventNum < domainNumEvents; eventNum++) {
402  /* Record the event IDs in native event array */
403  CUpti_EventID myeventCuptiEventId = domainEventIDArray[eventNum];
404  gctxt->availEventKind[idxEventArray] = CUPTI_ACTIVITY_KIND_EVENT;
405  gctxt->availEventIDArray[idxEventArray] = myeventCuptiEventId;
406  gctxt->availEventDeviceNum[idxEventArray] = deviceNum;
407  /* Get event name */
408  tmpSizeBytes = PAPI_MIN_STR_LEN - 1 * sizeof(char);
409  CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, CUPTI_EVENT_ATTR_NAME, &tmpSizeBytes, tmpStr), return (PAPI_EMISC));
410  /* Save a full path for the event, filling spaces with underscores */
411  // snprintf( gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "%s:%d:%s", mydevice->deviceName, deviceNum, tmpStr );
412  snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "event:%s:device=%d", tmpStr, deviceNum);
413  gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN - 1] = '\0';
414  char *nameTmpPtr = gctxt->availEventDesc[idxEventArray].name;
415  for(ii = 0; ii < (int) strlen(nameTmpPtr); ii++)
416  if(nameTmpPtr[ii] == ' ')
417  nameTmpPtr[ii] = '_';
418  /* Save description in the native event array */
419  tmpSizeBytes = PAPI_2MAX_STR_LEN - 1 * sizeof(char);
420  CUPTI_CALL((*cuptiEventGetAttributePtr) (myeventCuptiEventId, CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &tmpSizeBytes, gctxt->availEventDesc[idxEventArray].description), return (PAPI_EMISC));
421  gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN - 1] = '\0';
422  // SUBDBG( "Event ID:%d Name:%s Desc:%s\n", gctxt->availEventIDArray[idxEventArray], gctxt->availEventDesc[idxEventArray].name, gctxt->availEventDesc[idxEventArray].description );
423  /* Increment index past events in this domain to start of next domain */
424  idxEventArray++;
425  }
426  papi_free(domainEventIDArray);
427  }
428  }
429 
430  /* Retrieve and store metric information for each device */
431  SUBDBG("Checking for metrics\n");
432  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) {
433  uint32_t maxMetrics, i;
434  CUpti_MetricID *metricIdList = NULL;
435  mydevice = &gctxt->deviceArray[deviceNum];
436  // CUPTI_CALL((*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics), return (PAPI_EMISC));
437  if ( (*cuptiDeviceGetNumMetricsPtr) (mydevice->cuDev, &maxMetrics) != CUPTI_SUCCESS ) {
438  maxMetrics = 0;
439  continue;
440  }
441  SUBDBG("Device %d: Checking each of the (maxMetrics) %d metrics\n", deviceNum, maxMetrics);
442  size_t size = maxMetrics * sizeof(CUpti_EventID);
443  metricIdList = (CUpti_MetricID *) papi_calloc(maxMetrics, sizeof(CUpti_EventID));
444  CHECK_PRINT_EVAL(metricIdList == NULL, "Out of memory", return (PAPI_ENOMEM));
445  CUPTI_CALL((*cuptiDeviceEnumMetricsPtr) (mydevice->cuDev, &size, metricIdList), return (PAPI_EMISC));
446  for(i = 0; i < maxMetrics; i++) {
447  gctxt->availEventIDArray[idxEventArray] = metricIdList[i];
448  gctxt->availEventKind[idxEventArray] = CUPTI_ACTIVITY_KIND_METRIC;
449  gctxt->availEventDeviceNum[idxEventArray] = deviceNum;
450  size = PAPI_MIN_STR_LEN;
451  CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricIdList[i], CUPTI_METRIC_ATTR_NAME, &size, (uint8_t *) tmpStr), return (PAPI_EMISC));
452  // FIXME SOMEDAY: For this release the nvlink metrics are not functioning so skip them
453  if(strstr(tmpStr, "nvlink")!=NULL) continue;
454  // FIXME SOMEDAY: For this release the nvlink metrics are not functioning so skip them
455  if(size >= PAPI_MIN_STR_LEN)
456  gctxt->availEventDesc[idxEventArray].name[PAPI_MIN_STR_LEN - 1] = '\0';
457  snprintf(gctxt->availEventDesc[idxEventArray].name, PAPI_MIN_STR_LEN, "metric:%s:device=%d", tmpStr, deviceNum);
458  size = PAPI_2MAX_STR_LEN;
459  CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricIdList[i], CUPTI_METRIC_ATTR_LONG_DESCRIPTION, &size, (uint8_t *) gctxt->availEventDesc[idxEventArray].description), return (PAPI_EMISC));
460  if(size >= PAPI_2MAX_STR_LEN)
461  gctxt->availEventDesc[idxEventArray].description[PAPI_2MAX_STR_LEN - 1] = '\0';
462  // SUBDBG( "For device %d availEvent[%d] %s\n", mydevice->cuDev, idxEventArray, gctxt->availEventDesc[idxEventArray].name);
463  idxEventArray++;
464  }
465  papi_free(metricIdList);
466  }
467  gctxt->availEventSize = idxEventArray;
468 
469  /* return 0 if everything went OK */
470  return 0;
471 }
uint32_t * domainIDNumEvents
Definition: linux-cuda.c:56
#define papi_free(a)
Definition: papi_memory.h:35
uint32_t * availEventIDArray
Definition: linux-cuda.c:38
#define PAPI_ENOSUPP
Definition: papi.h:271
int * availEventDeviceNum
Definition: linux-cuda.c:37
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:116
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:98
#define PAPI_EMISC
Definition: papi.h:267
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:40
#define PAPI_2MAX_STR_LEN
Definition: papi.h:466
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:636
CUpti_EventDomainID * domainIDArray
Definition: linux-cuda.c:55
int i
Definition: fileop.c:140
char *long long size
Definition: iozone.c:12023
uint32_t availEventSize
Definition: linux-cuda.c:35
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _cuda_vector
Definition: linux-cuda.c:89
#define PAPI_ENOMEM
Definition: papi.h:254
struct papicuda_device_desc * deviceArray
Definition: linux-cuda.c:34
int
Definition: iozone.c:18528
#define PAPI_MIN_STR_LEN
Definition: papi.h:464
#define PAPI_MAX_STR_LEN
Definition: papi.h:465
char deviceName[PAPI_MIN_STR_LEN]
Definition: linux-cuda.c:53
return
Definition: iozone.c:22170
CUpti_ActivityKind * availEventKind
Definition: linux-cuda.c:36
uint32_t * availEventIsBeingMeasuredInEventset
Definition: linux-cuda.c:39
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:126

Here is the caller graph for this function:

static int papicuda_cleanup_eventset ( hwd_control_state_t ctrl)
static

Definition at line 1033 of file linux-cuda.c.

1034 {
1035  SUBDBG("Entering\n");
1036  (void) ctrl;
1038  // papicuda_active_cucontext_t *currctrl;
1039  uint32_t cc;
1040  int saveDeviceNum;
1041 
1042  SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n");
1043  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
1044  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
1045  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
1046  int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum;
1047  CUpti_EventGroupSets *currEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses;
1048  if(currDeviceNum != saveDeviceNum)
1049  CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1050  else
1051  CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1052  //CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) (currEventGroupPasses), return (PAPI_EMISC));
1053  (*cuptiEventGroupSetsDestroyPtr) (currEventGroupPasses);
1054  gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses = NULL;
1055  papi_free( gctrl->arrayOfActiveCUContexts[cc] );
1056  /* Pop the pushed context */
1057  if(currDeviceNum != saveDeviceNum)
1058  CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
1059  }
1060  /* Record that there are no active contexts or events */
1061  gctrl->countOfActiveCUContexts = 0;
1062  gctrl->activeEventCount = 0;
1063  return (PAPI_OK);
1064 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:95
#define papi_free(a)
Definition: papi_memory.h:35
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:116
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
struct client_command cc
Definition: iozone.c:21326
#define PAPI_EMISC
Definition: papi.h:267
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:107
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:61
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
uint32_t activeEventCount
Definition: linux-cuda.c:63

Here is the caller graph for this function:

static int papicuda_convert_metric_value_to_long_long ( CUpti_MetricValue  metricValue,
CUpti_MetricValueKind  valueKind,
long long int papiValue 
)
static

Definition at line 481 of file linux-cuda.c.

482 {
483  union {
484  long long ll;
485  double fp;
486  } tmpValue;
487 
488  SUBDBG("Try to convert the CUPTI metric value kind (index %d) to PAPI value (long long or double)\n", valueKind);
489  switch (valueKind) {
490  case CUPTI_METRIC_VALUE_KIND_DOUBLE:
491  SUBDBG("Metric double %f\n", metricValue.metricValueDouble);
492  tmpValue.ll = (long long)(metricValue.metricValueDouble);
493  //CHECK_PRINT_EVAL(tmpValue.fp - metricValue.metricValueDouble > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
494  break;
495  case CUPTI_METRIC_VALUE_KIND_UINT64:
496  SUBDBG("Metric uint64 = %llu\n", (unsigned long long) metricValue.metricValueUint64);
497  tmpValue.ll = (long long) (metricValue.metricValueUint64);
498  CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueUint64 > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
499  break;
500  case CUPTI_METRIC_VALUE_KIND_INT64:
501  SUBDBG("Metric int64 = %lld\n", (long long) metricValue.metricValueInt64);
502  tmpValue.ll = (long long) (metricValue.metricValueInt64);
503  CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueInt64 > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
504  break;
505  case CUPTI_METRIC_VALUE_KIND_PERCENT:
506  SUBDBG("Metric percent = %f%%\n", metricValue.metricValuePercent);
507  tmpValue.ll = (long long)(metricValue.metricValuePercent*100);
508  //CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValuePercent > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
509  break;
510  case CUPTI_METRIC_VALUE_KIND_THROUGHPUT:
511  SUBDBG("Metric throughput %llu bytes/sec\n", (unsigned long long) metricValue.metricValueThroughput);
512  tmpValue.ll = (long long) (metricValue.metricValueThroughput);
513  CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueThroughput > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
514  break;
515  case CUPTI_METRIC_VALUE_KIND_UTILIZATION_LEVEL:
516  SUBDBG("Metric utilization level %u\n", (unsigned int) metricValue.metricValueUtilizationLevel);
517  tmpValue.ll = (long long) (metricValue.metricValueUtilizationLevel);
518  CHECK_PRINT_EVAL(tmpValue.ll - metricValue.metricValueUtilizationLevel > 1e-6, "Error converting metric\n", return (PAPI_EMISC));
519  break;
520  default:
521  CHECK_PRINT_EVAL(1, "ERROR: unsupported metric value kind", return (PAPI_EINVAL));
522  exit(-1);
523  }
524  *papiValue = tmpValue.ll;
525  return (PAPI_OK);
526 }
return PAPI_OK
Definition: linux-nvml.c:497
return PAPI_EINVAL
Definition: linux-nvml.c:436
static FILE * fp
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:98
#define PAPI_EMISC
Definition: papi.h:267
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
long long
Definition: iozone.c:19827
return
Definition: iozone.c:22170
void exit()

Here is the call graph for this function:

Here is the caller graph for this function:

static int papicuda_ctrl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)
static

Definition at line 1167 of file linux-cuda.c.

1168 {
1169  SUBDBG("Entering\n");
1170  (void) ctx;
1171  (void) code;
1172  (void) option;
1173  return (PAPI_OK);
1174 }
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int papicuda_init_component ( int  cidx)
static

Definition at line 557 of file linux-cuda.c.

558 {
559  SUBDBG("Entering with cidx: %d\n", cidx);
560  int rv;
561 
562  /* link in all the cuda libraries and resolve the symbols we need to use */
563  if(papicuda_linkCudaLibraries() != PAPI_OK) {
564  SUBDBG("Dynamic link of CUDA libraries failed, component will be disabled.\n");
565  SUBDBG("See disable reason in papi_component_avail output for more details.\n");
566  return (PAPI_ENOSUPP);
567  }
568 
569  /* Create the structure */
572 
573  /* Get list of all native CUDA events supported */
575  if(rv != 0)
576  return (rv);
577 
578  /* Export some information */
583 
584  return (PAPI_OK);
585 }
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:92
#define PAPI_ENOSUPP
Definition: papi.h:271
return PAPI_OK
Definition: linux-nvml.c:497
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
static int cidx
uint32_t availEventSize
Definition: linux-cuda.c:35
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _cuda_vector
Definition: linux-cuda.c:89
static int papicuda_add_native_events(papicuda_context_t *gctxt)
Definition: linux-cuda.c:297
#define papi_calloc(a, b)
Definition: papi_memory.h:37

Here is the call graph for this function:

static int papicuda_init_control_state ( hwd_control_state_t ctrl)
static

Definition at line 592 of file linux-cuda.c.

593 {
594  SUBDBG("Entering\n");
595  (void) ctrl;
597 
598  CHECK_PRINT_EVAL(!gctxt, "Error: The PAPI CUDA component needs to be initialized first", return (PAPI_ENOINIT));
599  /* If no events were found during the initial component initialization, return error */
601  strncpy(_cuda_vector.cmp_info.disabled_reason, "ERROR CUDA: No events exist", PAPI_MAX_STR_LEN);
602  return (PAPI_EMISC);
603  }
604  /* If it does not exist, create the global structure to hold CUDA contexts and active events */
609  }
610  return PAPI_OK;
611 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:95
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:92
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:98
#define PAPI_EMISC
Definition: papi.h:267
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:636
uint32_t availEventSize
Definition: linux-cuda.c:35
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
papi_vector_t _cuda_vector
Definition: linux-cuda.c:89
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:61
#define PAPI_ENOINIT
Definition: papi.h:269
#define PAPI_MAX_STR_LEN
Definition: papi.h:465
uint32_t activeEventCount
Definition: linux-cuda.c:63
#define papi_calloc(a, b)
Definition: papi_memory.h:37
static int papicuda_init_thread ( hwd_context_t ctx)
static

Definition at line 536 of file linux-cuda.c.

537 {
538  (void) ctx;
539  SUBDBG("Entering\n");
540  return PAPI_OK;
541 }
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int papicuda_ntv_code_to_descr ( unsigned int  EventCode,
char *  name,
int  len 
)
static

Definition at line 1249 of file linux-cuda.c.

1250 {
1251  // SUBDBG( "Entering\n" );
1252  unsigned int index = EventCode;
1254  if(index < gctxt->availEventSize) {
1255  strncpy(name, gctxt->availEventDesc[index].description, len);
1256  } else {
1257  return (PAPI_EINVAL);
1258  }
1259  return (PAPI_OK);
1260 }
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:92
return PAPI_OK
Definition: linux-nvml.c:497
return PAPI_EINVAL
Definition: linux-nvml.c:436
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:40
char * name
Definition: iozone.c:23648
static int papicuda_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)
static

Definition at line 1229 of file linux-cuda.c.

1230 {
1231  // SUBDBG( "Entering EventCode %d\n", EventCode );
1232  unsigned int index = EventCode;
1234  if(index < gctxt->availEventSize) {
1235  strncpy(name, gctxt->availEventDesc[index].name, len);
1236  } else {
1237  return (PAPI_EINVAL);
1238  }
1239  // SUBDBG( "Exit: EventCode %d: Name %s\n", EventCode, name );
1240  return (PAPI_OK);
1241 }
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:92
return PAPI_OK
Definition: linux-nvml.c:497
return PAPI_EINVAL
Definition: linux-nvml.c:436
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:40
char * name
Definition: iozone.c:23648
static int papicuda_ntv_enum_events ( unsigned int EventCode,
int  modifier 
)
static

Definition at line 1202 of file linux-cuda.c.

1203 {
1204  // SUBDBG( "Entering (get next event after %u)\n", *EventCode );
1205  switch (modifier) {
1206  case PAPI_ENUM_FIRST:
1207  *EventCode = 0;
1208  return (PAPI_OK);
1209  break;
1210  case PAPI_ENUM_EVENTS:
1211  if(*EventCode < global_papicuda_context->availEventSize - 1) {
1212  *EventCode = *EventCode + 1;
1213  return (PAPI_OK);
1214  } else
1215  return (PAPI_ENOEVNT);
1216  break;
1217  default:
1218  return (PAPI_EINVAL);
1219  }
1220  return (PAPI_OK);
1221 }
#define PAPI_ENOEVNT
Definition: papi.h:260
return PAPI_OK
Definition: linux-nvml.c:497
return PAPI_EINVAL
Definition: linux-nvml.c:436
static int papicuda_read ( hwd_context_t ctx,
hwd_control_state_t ctrl,
long long **  values,
int  flags 
)
static

Definition at line 831 of file linux-cuda.c.

832 {
833  SUBDBG("Entering\n");
834  (void) ctx;
835  (void) ctrl;
836  (void) flags;
839  uint32_t gg, ii, jj, ee, instanceK, cc, rr, ss;
840  int saveDeviceNum;
841  size_t eventIdsSize = PAPICUDA_MAX_COUNTERS * sizeof(CUpti_EventID);
842  uint64_t readEventValueBuffer[PAPICUDA_MAX_COUNTERS];
843  CUpti_EventID readEventIDArray[PAPICUDA_MAX_COUNTERS];
844 
845  // Get read time stamp
846  CUPTI_CALL((*cuptiGetTimestampPtr) (&gctrl->cuptiReadTimestampNs), return (PAPI_EMISC));
847  uint64_t durationNs = gctrl->cuptiReadTimestampNs - gctrl->cuptiStartTimestampNs;
849 
850  SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n");
851  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
852  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
853  int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum;
854  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
855  SUBDBG("Set to device %d cuCtx %p \n", currDeviceNum, currCuCtx);
856  if(currDeviceNum != saveDeviceNum)
857  CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC));
858  else
859  CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC));
860 
861  size_t numEventIDsRead = 0;
862  CU_CALL((*cuCtxSynchronizePtr) (), return (PAPI_EMISC));
863  CUpti_EventGroupSets *currEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses;
864  uint32_t numEvents, numInstances, numTotalInstances;
865  size_t sizeofuint32num = sizeof(uint32_t);
866  CUpti_EventDomainID groupDomainID;
867  size_t groupDomainIDSize = sizeof(groupDomainID);
868  CUdevice cudevice = gctxt->deviceArray[currDeviceNum].cuDev;
869 
870  /* Since we accumulate the eventValues in a buffer, it needs to be cleared for each context */
871  for(ee = 0; ee < PAPICUDA_MAX_COUNTERS; ee++)
872  readEventValueBuffer[ee] = 0;
873 
874  for (ss=0; ss<currEventGroupPasses->numSets; ss++) {
875  CUpti_EventGroupSet groupset = currEventGroupPasses->sets[ss];
876  SUBDBG("Read events in this context\n");
877  for(gg = 0; gg < groupset.numEventGroups; gg++) {
878  CUpti_EventGroup group = groupset.eventGroups[gg];
879  CUPTI_CALL((*cuptiEventGroupGetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_EVENT_DOMAIN_ID, &groupDomainIDSize, &groupDomainID), return (PAPI_EMISC));
880  CUPTI_CALL((*cuptiDeviceGetEventDomainAttributePtr) (cudevice, groupDomainID, CUPTI_EVENT_DOMAIN_ATTR_TOTAL_INSTANCE_COUNT, &sizeofuint32num, &numTotalInstances), return (PAPI_EMISC));
881  CUPTI_CALL((*cuptiEventGroupGetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_INSTANCE_COUNT, &sizeofuint32num, &numInstances), return (PAPI_EMISC));
882  CUPTI_CALL((*cuptiEventGroupGetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_NUM_EVENTS, &sizeofuint32num, &numEvents), return (PAPI_EMISC));
883  eventIdsSize = PAPICUDA_MAX_COUNTERS * sizeof(CUpti_EventID);
884  CUpti_EventID eventIds[PAPICUDA_MAX_COUNTERS];
885  CUPTI_CALL((*cuptiEventGroupGetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_EVENTS, &eventIdsSize, eventIds), return (PAPI_EMISC));
886  SUBDBG("Context %d eventgroup %d domain numTotalInstaces %u numInstances %u numEvents %u\n", cc, gg, numTotalInstances, numInstances, numEvents);
887  size_t valuesSize = sizeof(uint64_t) * numInstances;
888  uint64_t *values = (uint64_t *) papi_malloc(valuesSize);
889  CHECK_PRINT_EVAL(values == NULL, "Out of memory", return (PAPI_ENOMEM));
890  /* For each event, read all values and normalize */
891  for(ee = 0; ee < numEvents; ee++) {
892  CUPTI_CALL((*cuptiEventGroupReadEventPtr) (group, CUPTI_EVENT_READ_FLAG_NONE, eventIds[ee], &valuesSize, values), return (PAPI_EMISC));
893  // sum collect event values from all instances
894  uint64_t valuesum = 0;
895  for(instanceK = 0; instanceK < numInstances; instanceK++)
896  valuesum += values[instanceK];
897  // It seems that the same event can occur multiple times in eventIds, so we need to accumulate values in older valueBuffers if needed
898  // Scan thru readEvents looking for a match, break if found, if not found, increment numEventIDsRead
899  for(rr = 0; rr < numEventIDsRead; rr++)
900  if(readEventIDArray[rr] == eventIds[ee])
901  break;
902  /* If the event was not found, increment the numEventIDsRead */
903  if(rr == numEventIDsRead)
904  numEventIDsRead++;
905  readEventIDArray[rr] = eventIds[ee];
906  readEventValueBuffer[rr] += valuesum;
907  size_t tmpStrSize = PAPI_MIN_STR_LEN - 1 * sizeof(char);
908  char tmpStr[PAPI_MIN_STR_LEN];
909  CUPTI_CALL((*cuptiEventGetAttributePtr) (eventIds[ee], CUPTI_EVENT_ATTR_NAME, &tmpStrSize, tmpStr), return (PAPI_EMISC));
910  SUBDBG("Read context %d eventgroup %d numEventIDsRead %lu device %d event %d/%d %d name %s value %lu (rr %d id %d val %lu) \n", cc, gg, numEventIDsRead, currDeviceNum, ee, numEvents, eventIds[ee], tmpStr, valuesum, rr,
911  eventIds[rr], readEventValueBuffer[rr]);
912  }
913  papi_free(values);
914  }
915  }
916 
917  // normalize the event values to represent the total number of domain instances on the device
918  for(ii = 0; ii < numEventIDsRead; ii++)
919  readEventValueBuffer[numEventIDsRead] = (readEventValueBuffer[numEventIDsRead] * numTotalInstances) / numInstances;
920 
921  /* For this pushed device and context, figure out the event and metric values and record them into the arrays */
922  SUBDBG("For this device and context, match read values against active events by scanning activeEvents array and matching associated availEventIDs\n");
923  for(jj = 0; jj < gctrl->activeEventCount; jj++) {
924  int index = gctrl->activeEventIndex[jj];
925  /* If the device/context does not match the current context, move to next */
926  if(gctxt->availEventDeviceNum[index] != currDeviceNum)
927  continue;
928  uint32_t eventId = gctxt->availEventIDArray[index];
929  switch (gctxt->availEventKind[index]) {
930  case CUPTI_ACTIVITY_KIND_EVENT:
931  SUBDBG("Searching for activeEvent %s eventId %u\n", gctxt->availEventDesc[index].name, eventId);
932  for(ii = 0; ii < numEventIDsRead; ii++) {
933  SUBDBG("Look at readEventIDArray[%u/%zu] with id %u\n", ii, numEventIDsRead, readEventIDArray[ii]);
934  if(readEventIDArray[ii] == eventId) {
935  gctrl->activeEventValues[jj] += (long long) readEventValueBuffer[ii];
936  SUBDBG("Matched read-eventID %d:%d eventName %s value %ld activeEvent %d value %lld \n", jj, (int) eventId, gctxt->availEventDesc[index].name, readEventValueBuffer[ii], index, gctrl->activeEventValues[jj]);
937  break;
938  }
939  }
940  break;
941 
942  case CUPTI_ACTIVITY_KIND_METRIC:
943  SUBDBG("For the metric, find list of events required to calculate this metric value\n");
944  CUpti_MetricID metricId = gctxt->availEventIDArray[index];
945  int metricDeviceNum = gctxt->availEventDeviceNum[index];
946  CUdevice cudevice = gctxt->deviceArray[metricDeviceNum].cuDev;
947  uint32_t numEvents, ee;
948  CUPTI_CALL((*cuptiMetricGetNumEventsPtr) (metricId, &numEvents), return (PAPI_EINVAL));
949  SUBDBG("Metric %s needs %d events\n", gctxt->availEventDesc[index].name, numEvents);
950  size_t eventIdArraySizeBytes = numEvents * sizeof(CUpti_EventID);
951  CUpti_EventID *eventIdArray = papi_malloc(eventIdArraySizeBytes);
952  CHECK_PRINT_EVAL(eventIdArray == NULL, "Malloc failed", return (PAPI_ENOMEM));
953  size_t eventValueArraySizeBytes = numEvents * sizeof(uint64_t);
954  uint64_t *eventValueArray = papi_malloc(eventValueArraySizeBytes);
955  CHECK_PRINT_EVAL(eventValueArray == NULL, "Malloc failed", return (PAPI_ENOMEM));
956  CUPTI_CALL((*cuptiMetricEnumEventsPtr) (metricId, &eventIdArraySizeBytes, eventIdArray), return (PAPI_EINVAL));
957  // Match metrics for the users events
958  for(ee = 0; ee < numEvents; ee++) {
959  for(ii = 0; ii < numEventIDsRead; ii++) {
960  if(eventIdArray[ee] == readEventIDArray[ii]) {
961  SUBDBG("Matched metric %s, found %d/%d events with eventId %d\n", gctxt->availEventDesc[index].name, ee, numEvents, readEventIDArray[ii]);
962  eventValueArray[ee] = readEventValueBuffer[ii];
963  break;
964  }
965  }
966  CHECK_PRINT_EVAL(ii == numEventIDsRead, "Could not find required event for metric", return (PAPI_EINVAL));
967  }
968 
969  // Use CUPTI to calculate a metric. Return all metric values mapped into long long values.
970  CUpti_MetricValue metricValue;
971  CUpti_MetricValueKind valueKind;
972  size_t valueKindSize = sizeof(valueKind);
973  CUPTI_CALL((*cuptiMetricGetAttributePtr) (metricId, CUPTI_METRIC_ATTR_VALUE_KIND, &valueKindSize, &valueKind), return (PAPI_EMISC));
974  CUPTI_CALL((*cuptiMetricGetValuePtr) (cudevice, metricId, eventIdArraySizeBytes, eventIdArray, eventValueArraySizeBytes, eventValueArray, durationNs, &metricValue), return (PAPI_EMISC));
975  int retval = papicuda_convert_metric_value_to_long_long(metricValue, valueKind, &(gctrl->activeEventValues[jj]));
976  if(retval != PAPI_OK)
977  return (retval);
978  papi_free(eventIdArray);
979  papi_free(eventValueArray);
980  break;
981 
982  default:
983  SUBDBG("Not handled");
984  break;
985  }
986  }
987 
988  /* Pop the pushed context */
989  if(currDeviceNum != saveDeviceNum)
990  CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
991  }
992  *values = gctrl->activeEventValues;
993  return (PAPI_OK);
994 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:95
uint64_t cuptiStartTimestampNs
Definition: linux-cuda.c:66
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:92
long long flags
Definition: iozone.c:12330
#define PAPICUDA_MAX_COUNTERS
Definition: linux-cuda.c:27
#define papi_free(a)
Definition: papi_memory.h:35
uint32_t * availEventIDArray
Definition: linux-cuda.c:38
#define papi_malloc(a)
Definition: papi_memory.h:34
int * availEventDeviceNum
Definition: linux-cuda.c:37
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:116
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:436
static int papicuda_convert_metric_value_to_long_long(CUpti_MetricValue metricValue, CUpti_MetricValueKind valueKind, long long int *papiValue)
Definition: linux-cuda.c:481
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:98
struct client_command cc
Definition: iozone.c:21326
#define PAPI_EMISC
Definition: papi.h:267
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:40
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
long long
Definition: iozone.c:19827
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:107
#define PAPI_ENOMEM
Definition: papi.h:254
struct papicuda_device_desc * deviceArray
Definition: linux-cuda.c:34
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:61
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:65
#define PAPI_MIN_STR_LEN
Definition: papi.h:464
return
Definition: iozone.c:22170
CUpti_ActivityKind * availEventKind
Definition: linux-cuda.c:36
static long long values[NUM_EVENTS]
Definition: init_fini.c:10
ssize_t retval
Definition: libasync.c:338
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
int activeEventIndex[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:64
uint32_t activeEventCount
Definition: linux-cuda.c:63
uint64_t cuptiReadTimestampNs
Definition: linux-cuda.c:67
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:126

Here is the call graph for this function:

static int papicuda_reset ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Definition at line 1125 of file linux-cuda.c.

1126 {
1127  (void) ctx;
1128  (void) ctrl;
1130  uint32_t gg, ii, cc, ss;
1131  int saveDeviceNum;
1132 
1133  SUBDBG("Reset all active event values\n");
1134  for(ii = 0; ii < gctrl->activeEventCount; ii++)
1135  gctrl->activeEventValues[ii] = 0;
1136 
1137  SUBDBG("Save current context, then switch to each active device/context and reset\n");
1138  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
1139  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
1140  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
1141  int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum;
1142  if(currDeviceNum != saveDeviceNum)
1143  CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1144  else
1145  CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1146  CUpti_EventGroupSets *currEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses;
1147  for (ss=0; ss<currEventGroupPasses->numSets; ss++) {
1148  CUpti_EventGroupSet groupset = currEventGroupPasses->sets[ss];
1149  for(gg = 0; gg < groupset.numEventGroups; gg++) {
1150  CUpti_EventGroup group = groupset.eventGroups[gg];
1151  CUPTI_CALL((*cuptiEventGroupResetAllEventsPtr) (group), return (PAPI_EMISC));
1152  }
1153  CUPTI_CALL((*cuptiEventGroupSetEnablePtr) (&groupset), return (PAPI_EMISC));
1154  }
1155  if(currDeviceNum != saveDeviceNum)
1156  CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
1157  }
1158  return (PAPI_OK);
1159 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:95
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:116
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
struct client_command cc
Definition: iozone.c:21326
#define PAPI_EMISC
Definition: papi.h:267
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:107
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:61
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:65
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
uint32_t activeEventCount
Definition: linux-cuda.c:63
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:126
static int papicuda_set_domain ( hwd_control_state_t ctrl,
int  domain 
)
static

Definition at line 1186 of file linux-cuda.c.

1187 {
1188  SUBDBG("Entering\n");
1189  (void) ctrl;
1190  if((PAPI_DOM_USER & domain) || (PAPI_DOM_KERNEL & domain) || (PAPI_DOM_OTHER & domain) || (PAPI_DOM_ALL & domain))
1191  return (PAPI_OK);
1192  else
1193  return (PAPI_EINVAL);
1194  return (PAPI_OK);
1195 }
#define PAPI_DOM_KERNEL
Definition: papi.h:300
#define PAPI_DOM_ALL
Definition: papi.h:303
return PAPI_OK
Definition: linux-nvml.c:497
#define PAPI_DOM_USER
Definition: papi.h:298
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:436
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_DOM_OTHER
Definition: papi.h:301
static int papicuda_shutdown_component ( void  )
static

Definition at line 1077 of file linux-cuda.c.

1078 {
1079  SUBDBG("Entering\n");
1082  int deviceNum;
1083  uint32_t cc;
1084  /* Free context */
1085  if(gctxt) {
1086  for(deviceNum = 0; deviceNum < gctxt->deviceCount; deviceNum++) {
1087  papicuda_device_desc_t *mydevice = &gctxt->deviceArray[deviceNum];
1088  papi_free(mydevice->domainIDArray);
1089  papi_free(mydevice->domainIDNumEvents);
1090  }
1091  papi_free(gctxt->availEventIDArray);
1093  papi_free(gctxt->availEventKind);
1095  papi_free(gctxt->availEventDesc);
1096  papi_free(gctxt->deviceArray);
1097  papi_free(gctxt);
1098  global_papicuda_context = gctxt = NULL;
1099  }
1100  /* Free control */
1101  if(gctrl) {
1102  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
1103 #ifdef PAPICUDA_KERNEL_REPLAY_MODE
1104  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
1105  CUPTI_CALL((*cuptiDisableKernelReplayModePtr) (currCuCtx), return (PAPI_EMISC));
1106 #endif
1107  if(gctrl->arrayOfActiveCUContexts[cc] != NULL)
1108  papi_free(gctrl->arrayOfActiveCUContexts[cc]);
1109  }
1110  papi_free(gctrl);
1111  global_papicuda_control = gctrl = NULL;
1112  }
1113  // close the dynamic libraries needed by this component (opened in the init substrate call)
1114  dlclose(dl1);
1115  dlclose(dl2);
1116  dlclose(dl3);
1117  return (PAPI_OK);
1118 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:95
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:92
uint32_t * domainIDNumEvents
Definition: linux-cuda.c:56
#define papi_free(a)
Definition: papi_memory.h:35
uint32_t * availEventIDArray
Definition: linux-cuda.c:38
int * availEventDeviceNum
Definition: linux-cuda.c:37
return PAPI_OK
Definition: linux-nvml.c:497
static void * dl1
Definition: linux-cuda.c:84
struct client_command cc
Definition: iozone.c:21326
#define PAPI_EMISC
Definition: papi.h:267
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:40
static void * dl2
Definition: linux-cuda.c:85
CUpti_EventDomainID * domainIDArray
Definition: linux-cuda.c:55
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static void * dl3
Definition: linux-cuda.c:86
struct papicuda_device_desc * deviceArray
Definition: linux-cuda.c:34
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:61
CUpti_ActivityKind * availEventKind
Definition: linux-cuda.c:36
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
uint32_t * availEventIsBeingMeasuredInEventset
Definition: linux-cuda.c:39
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:126
int papicuda_shutdown_thread ( hwd_context_t ctx)

Definition at line 1068 of file linux-cuda.c.

1069 {
1070  SUBDBG("Entering\n");
1071  (void) ctx;
1072 
1073  return (PAPI_OK);
1074 }
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
static int papicuda_start ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Definition at line 786 of file linux-cuda.c.

787 {
788  SUBDBG("Entering\n");
789  (void) ctx;
790  (void) ctrl;
792  // papicuda_context_t *gctxt = global_papicuda_context;
793  uint32_t ii, gg, cc, ss;
794  int saveDeviceNum = -1;
795 
796  SUBDBG("Reset all active event values\n");
797  for(ii = 0; ii < gctrl->activeEventCount; ii++)
798  gctrl->activeEventValues[ii] = 0;
799 
800  SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n");
801  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
802  CUPTI_CALL((*cuptiGetTimestampPtr) (&gctrl->cuptiStartTimestampNs), return (PAPI_EMISC));
803  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
804  int eventDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum;
805  CUcontext eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
806  SUBDBG("Set to device %d cuCtx %p \n", eventDeviceNum, eventCuCtx);
807  // CUDA_CALL( (*cudaSetDevicePtr)(eventDeviceNum), return(PAPI_EMISC));
808  if(eventDeviceNum != saveDeviceNum)
809  CU_CALL((*cuCtxPushCurrentPtr) (eventCuCtx), return (PAPI_EMISC));
810  CUpti_EventGroupSets *eventEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses;
811  for (ss=0; ss<eventEventGroupPasses->numSets; ss++) {
812  CUpti_EventGroupSet groupset = eventEventGroupPasses->sets[ss];
813  for(gg = 0; gg < groupset.numEventGroups; gg++) {
814  CUpti_EventGroup group = groupset.eventGroups[gg];
815  uint32_t one = 1;
816  CUPTI_CALL((*cuptiEventGroupSetAttributePtr) (group, CUPTI_EVENT_GROUP_ATTR_PROFILE_ALL_DOMAIN_INSTANCES, sizeof(uint32_t), &one), return (PAPI_EMISC));
817  }
818  CUPTI_CALL((*cuptiEventGroupSetEnablePtr) (&groupset), return (PAPI_EMISC));
819  }
820  if(eventDeviceNum != saveDeviceNum)
821  CU_CALL((*cuCtxPopCurrentPtr) (&eventCuCtx), return (PAPI_EMISC));
822  }
823 
824  return (PAPI_OK);
825 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:95
uint64_t cuptiStartTimestampNs
Definition: linux-cuda.c:66
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:116
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
struct client_command cc
Definition: iozone.c:21326
#define PAPI_EMISC
Definition: papi.h:267
int one
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:107
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:61
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:65
return
Definition: iozone.c:22170
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
uint32_t activeEventCount
Definition: linux-cuda.c:63
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:126
static int papicuda_stop ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)
static

Definition at line 997 of file linux-cuda.c.

998 {
999  SUBDBG("Entering\n");
1000  (void) ctx;
1001  (void) ctrl;
1003  uint32_t cc, ss;
1004  int saveDeviceNum;
1005 
1006  SUBDBG("Save current context, then switch to each active device/context and enable eventgroups\n");
1007  CUDA_CALL((*cudaGetDevicePtr) (&saveDeviceNum), return (PAPI_EMISC));
1008  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
1009  int currDeviceNum = gctrl->arrayOfActiveCUContexts[cc]->deviceNum;
1010  CUcontext currCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
1011  SUBDBG("Set to device %d cuCtx %p \n", currDeviceNum, currCuCtx);
1012  if(currDeviceNum != saveDeviceNum)
1013  CU_CALL((*cuCtxPushCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1014  else
1015  CU_CALL((*cuCtxSetCurrentPtr) (currCuCtx), return (PAPI_EMISC));
1016  CUpti_EventGroupSets *currEventGroupPasses = gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses;
1017  for (ss=0; ss<currEventGroupPasses->numSets; ss++) {
1018  CUpti_EventGroupSet groupset = currEventGroupPasses->sets[ss];
1019  CUPTI_CALL((*cuptiEventGroupSetDisablePtr) (&groupset), return (PAPI_EMISC));
1020  }
1021  /* Pop the pushed context */
1022  if(currDeviceNum != saveDeviceNum)
1023  CU_CALL((*cuCtxPopCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
1024 
1025  }
1026  return (PAPI_OK);
1027 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:95
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:116
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
struct client_command cc
Definition: iozone.c:21326
#define PAPI_EMISC
Definition: papi.h:267
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:107
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:61
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:126
static int papicuda_update_control_state ( hwd_control_state_t ctrl,
NativeInfo_t nativeInfo,
int  nativeCount,
hwd_context_t ctx 
)
static

Definition at line 619 of file linux-cuda.c.

620 {
621  SUBDBG("Entering with nativeCount %d\n", nativeCount);
622  (void) ctx;
623  // (void) ctrl;
626  int currDeviceNum;
627  CUcontext currCuCtx;
628  int eventContextIdx;
629  CUcontext eventCuCtx;
630  int index, ii;
631  uint32_t numEvents, ee, cc;
632 
633  /* Return if no events */
634  if(nativeCount == 0)
635  return (PAPI_OK);
636 
637  /* Get deviceNum, initialize context if needed via free, get context */
638  // CU_CALL( (*cuCtxGetCurrentPtr)(&currCuCtx), return(PAPI_EMISC));
639  CUDA_CALL((*cudaGetDevicePtr) (&currDeviceNum), return (PAPI_EMISC));
640  SUBDBG("currDeviceNum %d \n", currDeviceNum);
641  CUDA_CALL((*cudaFreePtr) (NULL), return (PAPI_EMISC));
642  CU_CALL((*cuCtxGetCurrentPtr) (&currCuCtx), return (PAPI_EMISC));
643  SUBDBG("currDeviceNum %d cuCtx %p \n", currDeviceNum, currCuCtx);
644 
645  /* Handle user request of events to be monitored */
646  for(ii = 0; ii < nativeCount; ii++) {
647  /* Get the PAPI event index from the user */
648  index = nativeInfo[ii].ni_event;
649 #ifdef DEBUG
650  char *eventName = gctxt->availEventDesc[index].name;
651 #endif
652  int eventDeviceNum = gctxt->availEventDeviceNum[index];
653 
654  /* if this event is already added continue to next ii, if not, mark it as being added */
655  if(gctxt->availEventIsBeingMeasuredInEventset[index] == 1) {
656  SUBDBG("Skipping event %s which is already added\n", eventName);
657  continue;
658  } else
659  gctxt->availEventIsBeingMeasuredInEventset[index] = 1;
660 
661  /* Find context/control in papicuda, creating it if does not exist */
662  for(cc = 0; cc < gctrl->countOfActiveCUContexts; cc++) {
663  CHECK_PRINT_EVAL(cc >= PAPICUDA_MAX_COUNTERS, "Exceeded hardcoded maximum number of contexts (PAPICUDA_MAX_COUNTERS)", return (PAPI_EMISC));
664  if(gctrl->arrayOfActiveCUContexts[cc]->deviceNum == eventDeviceNum) {
665  eventCuCtx = gctrl->arrayOfActiveCUContexts[cc]->cuCtx;
666  SUBDBG("Event %s device %d already has a cuCtx %p registered\n", eventName, eventDeviceNum, eventCuCtx);
667  if(eventCuCtx != currCuCtx)
668  CU_CALL((*cuCtxPushCurrentPtr) (eventCuCtx), return (PAPI_EMISC));
669  break;
670  }
671  }
672  // Create context if it does not exit
673  if(cc == gctrl->countOfActiveCUContexts) {
674  SUBDBG("Event %s device %d does not have a cuCtx registered yet...\n", eventName, eventDeviceNum);
675  if(currDeviceNum != eventDeviceNum) {
676  CUDA_CALL((*cudaSetDevicePtr) (eventDeviceNum), return (PAPI_EMISC));
677  CUDA_CALL((*cudaFreePtr) (NULL), return (PAPI_EMISC));
678  CU_CALL((*cuCtxGetCurrentPtr) (&eventCuCtx), return (PAPI_EMISC));
679  } else {
680  eventCuCtx = currCuCtx;
681  }
683  CHECK_PRINT_EVAL(gctrl->arrayOfActiveCUContexts[cc] == NULL, "Memory allocation for new active context failed", return (PAPI_ENOMEM));
684  gctrl->arrayOfActiveCUContexts[cc]->deviceNum = eventDeviceNum;
685  gctrl->arrayOfActiveCUContexts[cc]->cuCtx = eventCuCtx;
686  gctrl->arrayOfActiveCUContexts[cc]->eventGroupPasses = NULL;
687  gctrl->arrayOfActiveCUContexts[cc]->conMetricsCount = 0;
688  gctrl->arrayOfActiveCUContexts[cc]->conEventsCount = 0;
689  gctrl->countOfActiveCUContexts++;
690  SUBDBG("Added a new context deviceNum %d cuCtx %p ... now countOfActiveCUContexts is %d\n", eventDeviceNum, eventCuCtx, gctrl->countOfActiveCUContexts);
691  }
692  eventContextIdx = cc;
693 
694  papicuda_active_cucontext_t *eventctrl = gctrl->arrayOfActiveCUContexts[eventContextIdx];
695  switch (gctxt->availEventKind[index]) {
696  case CUPTI_ACTIVITY_KIND_METRIC:
697  SUBDBG("Need to add metric %d %s \n", index, eventName);
698  /* For the metric, find list of events required */
699  CUpti_MetricID metricId = gctxt->availEventIDArray[index];
700  CUPTI_CALL((*cuptiMetricGetNumEventsPtr) (metricId, &numEvents), return (PAPI_EINVAL));
701  size_t sizeBytes = numEvents * sizeof(CUpti_EventID);
702  CUpti_EventID *eventIdArray = papi_malloc(sizeBytes);
703  CHECK_PRINT_EVAL(eventIdArray == NULL, "Malloc failed", return (PAPI_ENOMEM));
704  CUPTI_CALL((*cuptiMetricEnumEventsPtr) (metricId, &sizeBytes, eventIdArray), return (PAPI_EINVAL));
705  SUBDBG("For metric %s, append the list of %d required events\n", eventName, numEvents);
706  for(ee = 0; ee < numEvents; ee++) {
707  eventctrl->conEvents[eventctrl->conEventsCount] = eventIdArray[ee];
708  eventctrl->conEventsCount++;
709  SUBDBG("For metric %s, appended event %d - %d %d to this context (conEventsCount %d)\n", eventName, ee, eventIdArray[ee], eventctrl->conEvents[eventctrl->conEventsCount], eventctrl->conEventsCount);
710  if (eventctrl->conEventsCount >= PAPICUDA_MAX_COUNTERS) {
711  SUBDBG("Num events (generated by metric) exceeded PAPICUDA_MAX_COUNTERS\n");
712  return(PAPI_EINVAL);
713  }
714  }
715  eventctrl->conMetrics[eventctrl->conMetricsCount] = metricId;
716  eventctrl->conMetricsCount++;
717  if (eventctrl->conMetricsCount >= PAPICUDA_MAX_COUNTERS) {
718  SUBDBG("Num metrics exceeded PAPICUDA_MAX_COUNTERS\n");
719  return(PAPI_EINVAL);
720  }
721  break;
722 
723  case CUPTI_ACTIVITY_KIND_EVENT:
724  SUBDBG("Need to add event %d %s to the context\n", index, eventName);
725  /* lookup cuptieventid for this event index */
726  CUpti_EventID eventId = gctxt->availEventIDArray[index];
727  eventctrl->conEvents[eventctrl->conEventsCount] = eventId;
728  eventctrl->conEventsCount++;
729  break;
730 
731  default:
732  CHECK_PRINT_EVAL(1, "Unknown CUPTI measure", return (PAPI_EMISC));
733  break;
734  }
735 
736  if (eventctrl->conEventsCount >= PAPICUDA_MAX_COUNTERS) {
737  SUBDBG("Num events exceeded PAPICUDA_MAX_COUNTERS\n");
738  return(PAPI_EINVAL);
739  }
740 
741  /* Record index of this active event back into the nativeInfo structure */
742  nativeInfo[ii].ni_position = gctrl->activeEventCount;
743  /* record added event at the higher level */
744  CHECK_PRINT_EVAL(gctrl->activeEventCount == PAPICUDA_MAX_COUNTERS - 1, "Exceeded maximum num of events (PAPI_MAX_COUNTERS)", return (PAPI_EMISC));
745  gctrl->activeEventIndex[gctrl->activeEventCount] = index;
746  // gctrl->activeEventContextIdx[gctrl->activeEventCount] = eventContextIdx;
747  gctrl->activeEventValues[gctrl->activeEventCount] = 0;
748  gctrl->activeEventCount++;
749 
750  /* Create/recreate eventgrouppass structures for the added event and context */
751  SUBDBG("Create eventGroupPasses for context (destroy pre-existing) (nativeCount %d, conEventsCount %d) \n", gctrl->activeEventCount, eventctrl->conEventsCount);
752  if(eventctrl->conEventsCount > 0) {
753  // SUBDBG("Destroy prevous eventGroupPasses for the context \n");
754  if(eventctrl->eventGroupPasses != NULL)
755  CUPTI_CALL((*cuptiEventGroupSetsDestroyPtr) (eventctrl->eventGroupPasses), return (PAPI_EMISC));
756  eventctrl->eventGroupPasses = NULL;
757  size_t sizeBytes = (eventctrl->conEventsCount) * sizeof(CUpti_EventID);
758  // SUBDBG("About to create eventGroupPasses for the context (sizeBytes %zu) \n", sizeBytes);
759 #ifdef PAPICUDA_KERNEL_REPLAY_MODE
760  CUPTI_CALL((*cuptiEnableKernelReplayModePtr) (eventCuCtx), return (PAPI_ECMP));
761  CUPTI_CALL((*cuptiEventGroupSetsCreatePtr) (eventCuCtx, sizeBytes, eventctrl->conEvents, &eventctrl->eventGroupPasses), return (PAPI_ECMP));
762 #else
763  CUPTI_CALL((*cuptiSetEventCollectionModePtr)(eventCuCtx,CUPTI_EVENT_COLLECTION_MODE_KERNEL), return(PAPI_ECMP));
764  CUPTI_CALL((*cuptiEventGroupSetsCreatePtr) (eventCuCtx, sizeBytes, eventctrl->conEvents, &eventctrl->eventGroupPasses), return (PAPI_EMISC));
765  if (eventctrl->eventGroupPasses->numSets > 1) {
766  SUBDBG("Error occured: The combined CUPTI events require more than 1 pass... try different events\n");
768  return(PAPI_ECOMBO);
769  } else {
770  SUBDBG("Created eventGroupPasses for context total-events %d in-this-context %d passes-requied %d) \n", gctrl->activeEventCount, eventctrl->conEventsCount, eventctrl->eventGroupPasses->numSets);
771  }
772 
773 #endif
774  }
775 
776  if(eventCuCtx != currCuCtx)
777  CU_CALL((*cuCtxPopCurrentPtr) (&eventCuCtx), return (PAPI_EMISC));
778 
779  }
780  return (PAPI_OK);
781 }
static papicuda_control_t * global_papicuda_control
Definition: linux-cuda.c:95
CUpti_EventID conEvents[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:78
static papicuda_context_t * global_papicuda_context
Definition: linux-cuda.c:92
#define PAPICUDA_MAX_COUNTERS
Definition: linux-cuda.c:27
uint32_t * availEventIDArray
Definition: linux-cuda.c:38
#define papi_malloc(a)
Definition: papi_memory.h:34
CUpti_EventGroupSets * eventGroupPasses
Definition: linux-cuda.c:80
int * availEventDeviceNum
Definition: linux-cuda.c:37
#define CU_CALL(call, handleerror)
Definition: linux-cuda.c:116
return PAPI_OK
Definition: linux-nvml.c:497
void
Definition: iozone.c:18627
return PAPI_EINVAL
Definition: linux-nvml.c:436
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:98
struct client_command cc
Definition: iozone.c:21326
#define PAPI_EMISC
Definition: papi.h:267
struct papicuda_name_desc * availEventDesc
Definition: linux-cuda.c:40
static int papicuda_cleanup_eventset(hwd_control_state_t *ctrl)
Definition: linux-cuda.c:1033
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define CUDA_CALL(call, handleerror)
Definition: linux-cuda.c:107
#define PAPI_ECMP
Definition: papi.h:256
#define PAPI_ENOMEM
Definition: papi.h:254
uint32_t countOfActiveCUContexts
Definition: linux-cuda.c:61
long long activeEventValues[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:65
#define PAPI_ECOMBO
Definition: papi.h:277
return
Definition: iozone.c:22170
CUpti_EventID conMetrics[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:75
CUpti_ActivityKind * availEventKind
Definition: linux-cuda.c:36
struct papicuda_active_cucontext_s * arrayOfActiveCUContexts[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:62
int activeEventIndex[PAPICUDA_MAX_COUNTERS]
Definition: linux-cuda.c:64
uint32_t activeEventCount
Definition: linux-cuda.c:63
uint32_t * availEventIsBeingMeasuredInEventset
Definition: linux-cuda.c:39
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define CUPTI_CALL(call, handleerror)
Definition: linux-cuda.c:126

Here is the call graph for this function:

Variable Documentation

papi_vector_t _cuda_vector

Definition at line 89 of file linux-cuda.c.

void(* _dl_non_dynamic_init)(void)

Definition at line 160 of file linux-cuda.c.

229 {
230 #define DLSYM_AND_CHECK( dllib, name ) dlsym( dllib, name ); if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); }
231 
232  /* Attempt to guess if we were statically linked to libc, if so bail */
233  if(_dl_non_dynamic_init != NULL) {
234  strncpy(_cuda_vector.cmp_info.disabled_reason, "The CUDA component does not support statically linking to libc.", PAPI_MAX_STR_LEN);
235  return PAPI_ENOSUPP;
236  }
237  /* Need to link in the cuda libraries, if not found disable the component */
238  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
239  CHECK_PRINT_EVAL(!dl1, "CUDA library libcuda.so not found.", return (PAPI_ENOSUPP));
240  cuCtxGetCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxGetCurrent");
241  cuCtxSetCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxSetCurrent");
242  cuDeviceGetPtr = DLSYM_AND_CHECK(dl1, "cuDeviceGet");
243  cuDeviceGetCountPtr = DLSYM_AND_CHECK(dl1, "cuDeviceGetCount");
244  cuDeviceGetNamePtr = DLSYM_AND_CHECK(dl1, "cuDeviceGetName");
245  cuInitPtr = DLSYM_AND_CHECK(dl1, "cuInit");
246  cuCtxPopCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxPopCurrent");
247  cuCtxPushCurrentPtr = DLSYM_AND_CHECK(dl1, "cuCtxPushCurrent");
248  cuCtxSynchronizePtr = DLSYM_AND_CHECK(dl1, "cuCtxSynchronize");
249 
250  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE);
251  CHECK_PRINT_EVAL(!dl2, "CUDA runtime library libcudart.so not found.", return (PAPI_ENOSUPP));
252  cudaGetDevicePtr = DLSYM_AND_CHECK(dl2, "cudaGetDevice");
253  cudaSetDevicePtr = DLSYM_AND_CHECK(dl2, "cudaSetDevice");
254  cudaFreePtr = DLSYM_AND_CHECK(dl2, "cudaFree");
255 
256  dl3 = dlopen("libcupti.so", RTLD_NOW | RTLD_GLOBAL);
257  CHECK_PRINT_EVAL(!dl3, "CUDA runtime library libcudart.so not found.", return (PAPI_ENOSUPP));
258  /* The macro DLSYM_AND_CHECK results in the expansion example below */
259  /* cuptiDeviceEnumEventDomainsPtr = dlsym( dl3, "cuptiDeviceEnumEventDomains" ); */
260  /* if ( dlerror()!=NULL ) { strncpy( _cuda_vector.cmp_info.disabled_reason, "A CUDA required function was not found in dynamic libs", PAPI_MAX_STR_LEN ); return ( PAPI_ENOSUPP ); } */
261  cuptiDeviceEnumMetricsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceEnumMetrics");
262  cuptiDeviceGetEventDomainAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetEventDomainAttribute");
263  cuptiDeviceGetNumMetricsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetNumMetrics");
264  cuptiEventGroupGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupGetAttribute");
265  cuptiEventGroupReadEventPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupReadEvent");
266  cuptiEventGroupSetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetAttribute");
267  cuptiEventGroupSetDisablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetDisable");
268  cuptiEventGroupSetEnablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetEnable");
269  cuptiEventGroupSetsCreatePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetsCreate");
270  cuptiEventGroupSetsDestroyPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupSetsDestroy");
271  cuptiGetTimestampPtr = DLSYM_AND_CHECK(dl3, "cuptiGetTimestamp");
272  cuptiMetricEnumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiMetricEnumEvents");
273  cuptiMetricGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetAttribute");
274  cuptiMetricGetNumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetNumEvents");
275  cuptiMetricGetValuePtr = DLSYM_AND_CHECK(dl3, "cuptiMetricGetValue");
276  cuptiSetEventCollectionModePtr = DLSYM_AND_CHECK(dl3, "cuptiSetEventCollectionMode");
277  cuptiDeviceEnumEventDomainsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceEnumEventDomains");
278  cuptiDeviceGetNumEventDomainsPtr = DLSYM_AND_CHECK(dl3, "cuptiDeviceGetNumEventDomains");
279  cuptiEventDomainEnumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainEnumEvents");
280  cuptiEventDomainGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainGetAttribute");
281  cuptiEventDomainGetNumEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventDomainGetNumEvents");
282  cuptiEventGetAttributePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGetAttribute");
283  cuptiEventGroupAddEventPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupAddEvent");
284  cuptiEventGroupCreatePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupCreate");
285  cuptiEventGroupDestroyPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupDestroy");
286  cuptiEventGroupDisablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupDisable");
287  cuptiEventGroupEnablePtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupEnable");
288  cuptiEventGroupReadAllEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupReadAllEvents");
289  cuptiEventGroupResetAllEventsPtr = DLSYM_AND_CHECK(dl3, "cuptiEventGroupResetAllEvents");
290  cuptiGetResultStringPtr = DLSYM_AND_CHECK(dl3, "cuptiGetResultString");
291  cuptiEnableKernelReplayModePtr = DLSYM_AND_CHECK(dl3, "cuptiEnableKernelReplayMode");
292  cuptiDisableKernelReplayModePtr = DLSYM_AND_CHECK(dl3, "cuptiEnableKernelReplayMode");
293  return (PAPI_OK);
294 }
#define PAPI_ENOSUPP
Definition: papi.h:271
return PAPI_OK
Definition: linux-nvml.c:497
#define DLSYM_AND_CHECK(dllib, name)
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
static void * dl1
Definition: linux-cuda.c:84
#define CHECK_PRINT_EVAL(checkcond, str, evalthis)
Definition: linux-cuda.c:98
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:636
static void * dl2
Definition: linux-cuda.c:85
void(* _dl_non_dynamic_init)(void)
Definition: linux-cuda.c:160
static void * dl3
Definition: linux-cuda.c:86
papi_vector_t _cuda_vector
Definition: linux-cuda.c:89
#define PAPI_MAX_STR_LEN
Definition: papi.h:465
void* dl1 = NULL
static

Definition at line 84 of file linux-cuda.c.

void* dl2 = NULL
static

Definition at line 85 of file linux-cuda.c.

void* dl3 = NULL
static

Definition at line 86 of file linux-cuda.c.

papicuda_context_t* global_papicuda_context = NULL
static

Definition at line 92 of file linux-cuda.c.

papicuda_control_t* global_papicuda_control = NULL
static

Definition at line 95 of file linux-cuda.c.